Skip to content

Commit

Permalink
Merge pull request #515 from openeemeter/fix/dst-date-range
Browse files Browse the repository at this point in the history
Handle ambiguous and nonexistent local times
  • Loading branch information
travis-recurve authored Oct 24, 2024
2 parents b952818 + 7528d92 commit 0db27b2
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Changelog
Development
-----------

* Placeholder
* Handle ambiguous and nonexistent local times when creating daily dataclass

4.0.6
-----
Expand Down
2 changes: 2 additions & 0 deletions eemeter/eemeter/models/daily/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,8 @@ def _compute_meter_value_df(self, df: pd.DataFrame):
end=end_date,
freq="D",
tz=df.index.tz,
ambiguous=True,
nonexistent="shift_forward",
)
all_days_df = pd.DataFrame(index=all_days_index)
# the following drops common days to handle DST issues with pytz.
Expand Down
21 changes: 21 additions & 0 deletions tests/daily_model/test_daily_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from eemeter.eemeter.samples import load_sample
import numpy as np
import pandas as pd
from pandas import Timestamp, DatetimeIndex, DataFrame
import pytest

TEMPERATURE_SEED = 29
Expand Down Expand Up @@ -831,3 +832,23 @@ def test_offset_aggregations_hourly(il_electricity_cdd_hdd_hourly):
)
assert baseline is not None
assert len(baseline.df) == NUM_DAYS_IN_YEAR


def test_dst_handling():
# 2020-03-08 02:00 is nonexistent, should push to 03:00
tz = "America/New_York"
idx = DatetimeIndex([Timestamp("2020-03-07 02", tz=tz), Timestamp("2021-03-06 02", tz=tz)])
df = DataFrame({"observed": [1]*2, "temperature": [50]*2}, index=idx)
baseline = DailyBaselineData(df, is_electricity_data=True)
assert len(baseline.df) == 365
hours, counts = np.unique(baseline.df.index.hour, return_counts=True)
assert (hours == [2, 3]).all()
assert (counts == [364, 1]).all()

# 2020-11-01 01:00 is ambiguous, single index should be chosen
tz = "America/New_York"
idx = DatetimeIndex([Timestamp("2020-03-07 01", tz=tz), Timestamp("2021-03-06 01", tz=tz)])
df = DataFrame({"observed": [1]*2, "temperature": [50]*2}, index=idx)
baseline = DailyBaselineData(df, is_electricity_data=True)
assert len(baseline.df) == 365
assert (baseline.df.index.hour == 1).all()

0 comments on commit 0db27b2

Please sign in to comment.