diff --git a/CHANGELOG.md b/CHANGELOG.md index 8439668f75..500c10a09f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co - Added `multivariate_plot` parameter in `show_anomalies()` to separately plot each component in multivariate series. [#2544](https://github.com/unit8co/darts/pull/2544) by [He Weilin](https://github.com/cnhwl). **Fixed** +- Fixed a bug when performing optimized historical forecasts with `stride=1` using a `RegressionModel` with `output_chunk_shift>=1` and `output_chunk_length=1`, where the forecast time index was not properly shifted. [#2634](https://github.com/unit8co/darts/pull/2634) by [Mattias De Charleroy](https://github.com/MattiasDC). **Dependencies** diff --git a/darts/tests/utils/historical_forecasts/test_historical_forecasts.py b/darts/tests/utils/historical_forecasts/test_historical_forecasts.py index 1f739f9599..f696724cb2 100644 --- a/darts/tests/utils/historical_forecasts/test_historical_forecasts.py +++ b/darts/tests/utils/historical_forecasts/test_historical_forecasts.py @@ -59,7 +59,20 @@ from darts.utils.likelihood_models import GaussianLikelihood, QuantileRegression models = [LinearRegressionModel, NaiveDrift] -models_reg_no_cov_cls_kwargs = [(LinearRegressionModel, {"lags": 8}, {}, (8, 1))] +models_reg_no_cov_cls_kwargs = [ + (LinearRegressionModel, {"lags": 8}, {}, (8, 1)), + # output_chunk_length only + (LinearRegressionModel, {"lags": 5, "output_chunk_length": 2}, {}, (5, 2)), + # output_chunk_shift only + (LinearRegressionModel, {"lags": 5, "output_chunk_shift": 1}, {}, (5, 2)), + # output_chunk_shift + output_chunk_length only + ( + LinearRegressionModel, + {"lags": 5, "output_chunk_shift": 1, "output_chunk_length": 2}, + {}, + (5, 3), + ), +] if not isinstance(CatBoostModel, NotImportedModule): models_reg_no_cov_cls_kwargs.append(( CatBoostModel, @@ -650,13 +663,42 @@ def test_historical_forecasts_negative_rangeindex(self): @pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs) def test_historical_forecasts(self, config): - train_length = 10 + """Tests historical forecasts with retraining for expected forecast lengths and times""" forecast_horizon = 8 # if no fit and retrain=false, should fit at fist iteration model_cls, kwargs, model_kwarg, bounds = config model = model_cls(**kwargs, **model_kwarg) + # set train length to be the minimum required training length + # +1 as sklearn models require min 2 train samples + train_length = bounds[0] + bounds[1] + 1 + + if model.output_chunk_shift > 0: + with pytest.raises(ValueError) as err: + forecasts = model.historical_forecasts( + series=self.ts_pass_val, + forecast_horizon=forecast_horizon, + stride=1, + train_length=train_length, + retrain=True, + overlap_end=False, + ) + assert str(err.value).startswith( + "Cannot perform auto-regression `(n > output_chunk_length)`" + ) + # continue the test without auto-regression if we are using shifts + forecast_horizon = model.output_chunk_length - # time index + # time index without train length + forecasts_no_train_length = model.historical_forecasts( + series=self.ts_pass_val, + forecast_horizon=forecast_horizon, + stride=1, + train_length=None, + retrain=True, + overlap_end=False, + ) + + # time index with minimum train length forecasts = model.historical_forecasts( series=self.ts_pass_val, forecast_horizon=forecast_horizon, @@ -666,23 +708,21 @@ def test_historical_forecasts(self, config): overlap_end=False, ) + assert len(forecasts_no_train_length) == len(forecasts) theorical_forecast_length = ( self.ts_val_length - - max([ - ( - bounds[0] + bounds[1] + 1 - ), # +1 as sklearn models require min 2 train samples - train_length, - ]) # because we train + - train_length # because we train - forecast_horizon # because we have overlap_end = False + 1 # because we include the first element ) - assert len(forecasts) == theorical_forecast_length, ( f"Model {model_cls.__name__} does not return the right number of historical forecasts in the case " f"of retrain=True and overlap_end=False, and a time index of type DateTimeIndex. " f"Expected {theorical_forecast_length}, got {len(forecasts)}" ) + assert forecasts.time_index.equals( + self.ts_pass_val.time_index[-theorical_forecast_length:] + ) # range index forecasts = model.historical_forecasts( @@ -699,6 +739,10 @@ def test_historical_forecasts(self, config): f"of retrain=True, overlap_end=False, and a time index of type RangeIndex." f"Expected {theorical_forecast_length}, got {len(forecasts)}" ) + assert forecasts.time_index.equals( + self.ts_pass_val_range.time_index[-theorical_forecast_length:] + ) + start_idx = self.ts_pass_val_range.get_index_at_point(forecasts.start_time()) # stride 2 forecasts = model.historical_forecasts( @@ -710,30 +754,30 @@ def test_historical_forecasts(self, config): overlap_end=False, ) - theorical_forecast_length = np.floor( - ( + theorical_forecast_length = int( + np.floor( ( - self.ts_val_length - - max([ - ( - bounds[0] + bounds[1] + 1 - ), # +1 as sklearn models require min 2 train samples - train_length, - ]) # because we train - - forecast_horizon # because we have overlap_end = False - + 1 # because we include the first element + ( + self.ts_val_length + - train_length # because we train + - forecast_horizon # because we have overlap_end = False + + 1 # because we include the first element + ) + - 1 ) - - 1 - ) - / 2 - + 1 # because of stride - ) # if odd number of elements, we keep the floor + / 2 + + 1 # because of stride + ) # if odd number of elements, we keep the floor + ) assert len(forecasts) == theorical_forecast_length, ( f"Model {model_cls.__name__} does not return the right number of historical forecasts in the case " f"of retrain=True and overlap_end=False and stride=2. " f"Expected {theorical_forecast_length}, got {len(forecasts)}" ) + assert forecasts.time_index.equals( + self.ts_pass_val_range.time_index[start_idx::2] + ) # stride 3 forecasts = model.historical_forecasts( @@ -749,12 +793,7 @@ def test_historical_forecasts(self, config): ( ( self.ts_val_length - - max([ - ( - bounds[0] + bounds[1] + 1 - ), # +1 as sklearn models require min 2 train samples - train_length, - ]) # because we train + - train_length # because we train - forecast_horizon # because we have overlap_end = False + 1 # because we include the first element ) @@ -770,6 +809,9 @@ def test_historical_forecasts(self, config): f"of retrain=True and overlap_end=False and stride=3. " f"Expected {theorical_forecast_length}, got {len(forecasts)}" ) + assert forecasts.time_index.equals( + self.ts_pass_val_range.time_index[start_idx::3] + ) # last points only False forecasts = model.historical_forecasts( @@ -784,12 +826,7 @@ def test_historical_forecasts(self, config): theorical_forecast_length = ( self.ts_val_length - - max([ - ( - bounds[0] + bounds[1] + 1 - ), # +1 as sklearn models require min 2 train samples - train_length, - ]) # because we train + - train_length # because we train - forecast_horizon # because we have overlap_end = False + 1 # because we include the first element ) @@ -804,6 +841,11 @@ def test_historical_forecasts(self, config): f"Model {model_cls} does not return forecast_horizon points per historical forecast in the case of " f"retrain=True and overlap_end=False, and last_points_only=False" ) + last_points_times = np.array([fc.end_time() for fc in forecasts]) + np.testing.assert_equal( + last_points_times, + self.ts_pass_val_range.time_index[-theorical_forecast_length:].values, + ) if not model.supports_past_covariates: with pytest.raises(ValueError) as msg: @@ -1144,15 +1186,32 @@ def test_historical_forecasts_start_too_early(self, caplog, config): @pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs) def test_regression_auto_start_multiple_no_cov(self, config): - train_length = 15 + # minimum required train length (+1 since sklearn models require 2 samples) forecast_horizon = 10 model_cls, kwargs, model_kwargs, bounds = config + train_length = bounds[0] + bounds[1] + 1 model = model_cls( **kwargs, **model_kwargs, ) model.fit(self.ts_pass_train) + if model.output_chunk_shift > 0: + with pytest.raises(ValueError) as err: + forecasts = model.historical_forecasts( + series=[self.ts_pass_val, self.ts_pass_val], + forecast_horizon=forecast_horizon, + train_length=train_length, + stride=1, + retrain=True, + overlap_end=False, + ) + assert str(err.value).startswith( + "Cannot perform auto-regression `(n > output_chunk_length)`" + ) + # continue the test without autogregression if we are using shifts + forecast_horizon = model.output_chunk_length + forecasts = model.historical_forecasts( series=[self.ts_pass_val, self.ts_pass_val], forecast_horizon=forecast_horizon, @@ -1168,12 +1227,7 @@ def test_regression_auto_start_multiple_no_cov(self, config): theorical_forecast_length = ( self.ts_val_length - - max([ - ( - bounds[0] + bounds[1] + 1 - ), # +1 as sklearn models require min 2 train samples - train_length, - ]) # because we train + - train_length - forecast_horizon # because we have overlap_end = False + 1 # because we include the first element ) @@ -1183,6 +1237,9 @@ def test_regression_auto_start_multiple_no_cov(self, config): f"of retrain=True and overlap_end=False, and a time index of type DateTimeIndex. " f"Expected {theorical_forecast_length}, got {len(forecasts[0])} and {len(forecasts[1])}" ) + assert forecasts[0].time_index.equals(forecasts[1].time_index) and forecasts[ + 0 + ].time_index.equals(self.ts_pass_val.time_index[-theorical_forecast_length:]) @pytest.mark.slow @pytest.mark.parametrize( diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py index 934294d926..a8a1444731 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts_regression.py @@ -162,19 +162,24 @@ def _optimized_historical_forecasts_last_points_only( else: forecast = forecast[:, 0] + if ( + stride == 1 + and model.output_chunk_length == 1 + and model.output_chunk_shift == 0 + ): + times = times[0] + else: + times = generate_index( + start=hist_fct_start + + (forecast_horizon + model.output_chunk_shift - 1) * freq, + length=forecast.shape[0], + freq=freq * stride, + name=series_.time_index.name, + ) + forecasts_list.append( TimeSeries.from_times_and_values( - times=( - times[0] - if stride == 1 and model.output_chunk_length == 1 - else generate_index( - start=hist_fct_start - + (forecast_horizon + model.output_chunk_shift - 1) * freq, - length=forecast.shape[0], - freq=freq * stride, - name=series_.time_index.name, - ) - ), + times=times, values=forecast, columns=forecast_components, static_covariates=series_.static_covariates,