Skip to content

Commit

Permalink
Merge branch 'main' into fix/hive_datasource
Browse files Browse the repository at this point in the history
  • Loading branch information
m1n0 authored Jan 24, 2024
2 parents a82edd6 + 64bc338 commit 447ef66
Show file tree
Hide file tree
Showing 34 changed files with 203 additions and 55 deletions.
2 changes: 1 addition & 1 deletion soda/athena/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-athena"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Athena Package"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/bigquery/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-bigquery"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Bigquery Package"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/contracts/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-contracts"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Contracts Package"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

package_name = "soda-core"
# Managed by tbump - do not change manually
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/core/soda/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
SODA_CORE_VERSION = "3.1.3"
SODA_CORE_VERSION = "3.1.4"
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ class HyperparameterConfigs(ADBaseModel):

class ModelConfigs(ADBaseModel):
type: str = "prophet"
holidays_country_code: Optional[str] = None
hyperparameters: HyperparameterConfigs = HyperparameterConfigs()


Expand Down
58 changes: 58 additions & 0 deletions soda/core/tests/data_source/test_anomaly_detection_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,64 @@ def test_anomaly_detection_static_hyperparameters(data_source_fixture: DataSourc
scan.assert_all_checks_pass()


@pytest.mark.skipif(
condition=os.getenv("SCIENTIFIC_TESTS") == "SKIP",
reason="Environment variable SCIENTIFIC_TESTS is set to SKIP which skips tests depending on the scientific package",
)
def test_anomaly_detection_static_hyperparameters_built_in_holidays(data_source_fixture: DataSourceFixture) -> None:
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()

scan.add_sodacl_yaml_str(
f"""
checks for {table_name}:
- anomaly detection for row_count:
model:
type: prophet
holidays_country_code: TR
"""
)
metric_values = [10, 10, 10, 9, 8, 0, 0, 0, 0]
scan.mock_historic_values(
metric_identity=f"metric-{scan._scan_definition_name}-{scan._data_source_name}-{table_name}-row_count",
metric_values=metric_values,
time_generator=TimeGenerator(),
)
scan.execute()
scan.assert_all_checks_pass()


@pytest.mark.skipif(
condition=os.getenv("SCIENTIFIC_TESTS") == "SKIP",
reason="Environment variable SCIENTIFIC_TESTS is set to SKIP which skips tests depending on the scientific package",
)
def test_anomaly_detection_static_hyperparameters_wrong_built_in_holidays(
data_source_fixture: DataSourceFixture,
) -> None:
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()

scan.add_sodacl_yaml_str(
f"""
checks for {table_name}:
- anomaly detection for row_count:
model:
type: prophet
holidays_country_code: invalid_country_code
"""
)
metric_values = [10, 10, 10, 9, 8, 0, 0, 0, 0]
scan.mock_historic_values(
metric_identity=f"metric-{scan._scan_definition_name}-{scan._data_source_name}-{table_name}-row_count",
metric_values=metric_values,
time_generator=TimeGenerator(),
)
scan.execute(allow_error_warning=True)
scan.assert_all_checks_skipped()


@pytest.mark.skipif(
condition=os.getenv("SCIENTIFIC_TESTS") == "SKIP",
reason="Environment variable SCIENTIFIC_TESTS is set to SKIP which skips tests depending on the scientific package",
Expand Down
2 changes: 1 addition & 1 deletion soda/dask/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-pandas-dask"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Dask Package"

requires = [f"soda-core=={package_version}", "dask>=2022.10.0", "dask-sql>=2022.12.0,<2023.6.0"]
Expand Down
2 changes: 1 addition & 1 deletion soda/db2/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-db2"
package_version = "3.1.3"
package_version = "3.1.4"
# TODO Add proper description
description = "Soda Core IBM DB2 Package"

Expand Down
2 changes: 1 addition & 1 deletion soda/dbt/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-dbt"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core dbt Package"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/denodo/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-denodo"
package_version = "3.1.3"
package_version = "3.1.4"
# TODO Add proper description
description = "Soda Core Denodo Package"

Expand Down
2 changes: 1 addition & 1 deletion soda/dremio/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-dremio"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Dremio Package"

requires = [f"soda-core=={package_version}", "pyodbc", "pyarrow"]
Expand Down
2 changes: 1 addition & 1 deletion soda/duckdb/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-duckdb"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Duckdb Package"

requires = [f"soda-core=={package_version}", "duckdb"]
Expand Down
2 changes: 1 addition & 1 deletion soda/mysql/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-mysql"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core MySQL Package"

requires = [
Expand Down
2 changes: 1 addition & 1 deletion soda/oracle/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-oracle"
package_version = "3.1.3"
package_version = "3.1.4"
# TODO Add proper description
description = "Soda Core Oracle Package"

Expand Down
2 changes: 1 addition & 1 deletion soda/postgres/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-postgres"
package_version = "3.1.3"
package_version = "3.1.4"
# TODO Add proper description
description = "Soda Core Postgres Package"

Expand Down
2 changes: 1 addition & 1 deletion soda/redshift/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-redshift"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Redshift Package"

requires = [f"soda-core=={package_version}", "boto3", "psycopg2-binary>=2.8.5, <3.0"]
Expand Down
2 changes: 1 addition & 1 deletion soda/scientific/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from setuptools import find_namespace_packages, setup

package_name = "soda-core-scientific"
package_version = "3.1.3"
package_version = "3.1.4"
description = "Soda Core Scientific Package"
requires = [
f"soda-core=={package_version}",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def evaluate(self) -> Tuple[str, Dict[str, Any]]:
logs=self._logs,
params=self.params,
time_series_df=feedback_processed_df,
hyperparamaters_cfg=self.model_cfg.hyperparameters,
model_cfg=self.model_cfg,
training_dataset_params=self.training_dataset_params,
has_exogenous_regressor=has_exogenous_regressor,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,10 @@ class FreqDetectionResultError(Exception):
To be raised and passed as a result error message down the line.
"""


class NotSupportedHolidayCountryError(Exception):
"""Thrown in case of wrong holiday country.
To be raised and passed as a result error message down the line.
"""
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from soda.common.logs import Logs
from soda.sodacl.anomaly_detection_metric_check_cfg import (
HyperparameterConfigs,
ModelConfigs,
ProphetDefaultHyperparameters,
TrainingDatasetParameters,
)
Expand All @@ -22,6 +23,7 @@
from soda.scientific.anomaly_detection_v2.exceptions import (
AggregationValueError,
FreqDetectionResultError,
NotSupportedHolidayCountryError,
)
from soda.scientific.anomaly_detection_v2.frequency_detector import FrequencyDetector
from soda.scientific.anomaly_detection_v2.globals import (
Expand All @@ -47,7 +49,7 @@ def __init__(
logs: Logs,
params: Dict[str, Any],
time_series_df: pd.DataFrame,
hyperparamaters_cfg: HyperparameterConfigs,
model_cfg: ModelConfigs,
training_dataset_params: TrainingDatasetParameters,
has_exogenous_regressor: bool = False,
) -> None:
Expand All @@ -57,7 +59,7 @@ def __init__(
params (Dict[str, Any]): config class parsed from detector_config.yml.
time_series_df (pd.DataFrame): time series data to be used for training and prediction.
logs (Logs): logging object.
hyperparamaters_cfg (HyperparameterConfigs): hyperparameter configs.
model_cfg (ModelConfigs): hyperparameter configs.
training_dataset_params (TrainingDatasetParameters): training dataset configs.
has_exogenous_regressor (bool, optional): whether the time series data has an exogenous regressor. Defaults to False.
Expand All @@ -77,7 +79,8 @@ def __init__(
self.logs = logs
self.params = params
self.raw_time_series_df = time_series_df
self.hyperparamaters_cfg = hyperparamaters_cfg
self.model_cfg = model_cfg
self.hyperparamaters_cfg = model_cfg.hyperparameters
self.training_dataset_params = training_dataset_params
self.has_exogenous_regressor = has_exogenous_regressor

Expand Down Expand Up @@ -265,6 +268,17 @@ def setup_fit_predict(
f"Anomaly Detection: Fitting prophet model with the following parameters:\n{model_hyperparameters.model_dump_json(indent=4)}"
)
model = Prophet(**model_hyperparameters.model_dump())
holidays_country_code = self.model_cfg.holidays_country_code
# Add country specific holidays
if holidays_country_code is not None:
try:
model = model.add_country_holidays(country_name=holidays_country_code)
except AttributeError:
raise NotSupportedHolidayCountryError(
f"Anomaly Detection Error: Country '{holidays_country_code}' is not supported. "
"The list of supported countries can be found here: "
"https://github.com/vacanza/python-holidays/"
)
if "external_regressor" in time_series_df:
self.logs.info(
"Anomaly Detection: Found a custom external_regressor derived from user feedback and adding it to Prophet model"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
test_feedback_processor_prophet_model_skip_measurements_expectation,
test_feedback_processor_seasonality_skip_measurements,
test_feedback_processor_seasonality_skip_measurements_expectation,
test_prophet_model_skip_measurements_previousAndThis,
test_prophet_model_skip_measurements_previousAndThis_expectation,
test_prophet_model_skip_measurements_this_exclusive_previous,
test_prophet_model_skip_measurements_this_exclusive_previous_expectation,
)
from soda.common.logs import Logs

Expand Down
5 changes: 2 additions & 3 deletions soda/scientific/tests/anomaly_detection_v2/base_model_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
)
from soda.common.logs import Logs
from soda.sodacl.anomaly_detection_metric_check_cfg import (
HyperparameterConfigs,
ModelConfigs,
TrainingDatasetParameters,
)
Expand Down Expand Up @@ -67,7 +66,7 @@ def test_base_model_preprocess(time_series_df: pd.DataFrame, expected_time_serie
logs=LOGS,
params=PARAMS,
time_series_df=time_series_df,
hyperparamaters_cfg=HyperparameterConfigs(),
model_cfg=ModelConfigs(),
training_dataset_params=TrainingDatasetParameters(),
)
df_preprocessed = detector.preprocess(time_series_df=time_series_df)
Expand Down Expand Up @@ -121,7 +120,7 @@ def test_base_model_remove_big_gaps(size: int, n_rows_to_convert_none: int, expe
logs=LOGS,
params=PARAMS,
time_series_df=time_series_df,
hyperparamaters_cfg=HyperparameterConfigs(),
model_cfg=ModelConfigs(),
training_dataset_params=TrainingDatasetParameters(),
)
df_preprocessed = detector.remove_big_gaps_from_time_series(
Expand Down
Loading

0 comments on commit 447ef66

Please sign in to comment.