Skip to content

Commit

Permalink
Merge pull request #305 from Aarhus-Psychiatry-Research/mb/add_error_…
Browse files Browse the repository at this point in the history
…if_incident

fix: add meaningful error if entity id is not unique in incident outcome
  • Loading branch information
HLasse authored Oct 20, 2023
2 parents 6a54809 + 65b7597 commit fc00c57
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/timeseriesflattener/flattened_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,11 @@ def _add_incident_outcome(
prediction_timestamp_col_name = f"{self.timestamp_col_name}_prediction"
outcome_timestamp_col_name = f"{self.timestamp_col_name}_outcome"

if not outcome_spec.timeseries_df[self.entity_id_col_name].is_unique:
raise ValueError(
f"""Since incident = True, we expect only one outcome timestamp per entity id. This is not the case in {outcome_spec.feature_base_name}""",
)

df = pd.merge(
self._df,
outcome_spec.timeseries_df,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,42 @@ def test_add_age_error():
)


def test_incident_addition_with_multiple_timestamps_raises_meaningful_error():
prediction_times_str = """entity_id,timestamp,
1,2021-12-31 00:00:00
"""

event_times_str = """entity_id,timestamp,value,
1,2021-12-31 00:00:01, 1
1,2021-12-31 00:00:01, 1
"""

prediction_times_df = str_to_df(prediction_times_str)
event_times_df = str_to_df(event_times_str)

flattened_dataset = TimeseriesFlattener(
prediction_times_df=prediction_times_df,
timestamp_col_name="timestamp",
entity_id_col_name="entity_id",
n_workers=4,
drop_pred_times_with_insufficient_look_distance=False,
)

flattened_dataset.add_spec(
spec=OutcomeSpec(
timeseries_df=event_times_df,
lookahead_days=2,
incident=True,
fallback=np.NaN,
feature_base_name="value",
aggregation_fn=maximum,
),
)

with pytest.raises(ValueError, match="Since incident = True"):
flattened_dataset.get_df().reset_index(drop=True)


def test_incident_outcome_removing_prediction_times():
prediction_times_str = """entity_id,timestamp,
1,2021-12-31 00:00:00
Expand Down

0 comments on commit fc00c57

Please sign in to comment.