From 34a05ffcb8824122facd112f916bab09adf39727 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaros=C5=82aw=20Za=C5=82ucki?= Date: Wed, 17 Jul 2024 16:44:00 +0200 Subject: [PATCH] Ensure check identity is part of query name for failed rows queries. (#2117) * CLOUD-7669 Ensure check identity is part of query name for failed rows queries. * Use query counter instead of check identity as Check:metric is 1:n, metric:query is also 1:n * Partition name is optional, include always table name in such case. * Add UT to cover failing/passing queries uniqueness. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- soda/core/soda/execution/query/query.py | 11 ++++++++- soda/core/tests/data_source/test_invalid.py | 23 +++++++++++++++++++ .../data_source/test_metric_check_filter.py | 8 +++---- soda/core/tests/helpers/mock_soda_cloud.py | 7 ++++-- 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/soda/core/soda/execution/query/query.py b/soda/core/soda/execution/query/query.py index a5c247f5c..c5b379836 100644 --- a/soda/core/soda/execution/query/query.py +++ b/soda/core/soda/execution/query/query.py @@ -13,6 +13,13 @@ class Query: + _counter = 0 + + @classmethod + def generate_id(cls): + cls._counter += 1 + return cls._counter + def __init__( self, data_source_scan: DataSourceScan, @@ -91,9 +98,11 @@ def get_dict(self, name_suffix: str | None = None, sql: str | None = None) -> di @staticmethod def build_query_name(data_source_scan, table, partition, column, unqualified_query_name): - full_query_pieces = [data_source_scan.data_source.data_source_name] + full_query_pieces = [str(Query.generate_id()), data_source_scan.data_source.data_source_name] if partition is not None and partition.partition_name is not None: full_query_pieces.append(f"{partition.table.table_name}[{partition.partition_name}]") + elif partition is not None and partition.partition_name is None: + full_query_pieces.append(f"{partition.table.table_name}") elif table is not None: full_query_pieces.append(f"{table.table_name}") if column is not None: diff --git a/soda/core/tests/data_source/test_invalid.py b/soda/core/tests/data_source/test_invalid.py index 81648d304..9029ccefe 100644 --- a/soda/core/tests/data_source/test_invalid.py +++ b/soda/core/tests/data_source/test_invalid.py @@ -45,6 +45,29 @@ def test_column_configured_invalid_values(data_source_fixture: DataSourceFixture scan.assert_all_checks_pass() +def test_valid_failed_passing_rows_queries_uniqueness(data_source_fixture: DataSourceFixture): + table_name = data_source_fixture.ensure_test_table(customers_test_table) + + scan = data_source_fixture.create_test_scan() + mock_soda_cloud = scan.enable_mock_soda_cloud() + scan.add_sodacl_yaml_str( + f""" + checks for {table_name}: + - invalid_count(cst_size) = 3: + valid min: 0 + - invalid_count(cst_size) = 4: + valid max: 0 + """ + ) + scan.execute() + scan.assert_all_checks_pass() + + first_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(0) + second_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(1) + assert first_check_diagnostic_block["failingRowsQueryName"] != second_check_diagnostic_block["failingRowsQueryName"] + assert first_check_diagnostic_block["passingRowsQueryName"] != second_check_diagnostic_block["passingRowsQueryName"] + + def test_valid_min_max(data_source_fixture: DataSourceFixture): table_name = data_source_fixture.ensure_test_table(customers_test_table) diff --git a/soda/core/tests/data_source/test_metric_check_filter.py b/soda/core/tests/data_source/test_metric_check_filter.py index 74c12fe9a..a3a9400c1 100644 --- a/soda/core/tests/data_source/test_metric_check_filter.py +++ b/soda/core/tests/data_source/test_metric_check_filter.py @@ -57,11 +57,11 @@ def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture): scan.assert_all_checks_fail() - failing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "failingRowsQueryName") - assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_condition + failing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "failingRowsQueryName") + assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_sql - passing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "passingRowsQueryName") - assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_condition + passing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "passingRowsQueryName") + assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_sql @pytest.mark.skipif( diff --git a/soda/core/tests/helpers/mock_soda_cloud.py b/soda/core/tests/helpers/mock_soda_cloud.py index 345367821..993a0da6c 100644 --- a/soda/core/tests/helpers/mock_soda_cloud.py +++ b/soda/core/tests/helpers/mock_soda_cloud.py @@ -185,10 +185,13 @@ def find_failed_rows_line_count(self, check_index: int) -> int: file_contents = self.find_failed_rows_content(check_index) return file_contents.count("\n") - def find_failed_rows_sample_query(self, check_index: int, query_type: str = "failingRowsQueryName"): + def find_failed_rows_query(self, check_index: int, query_type: str = "failingRowsQueryName"): block = self.find_failed_rows_diagnostics_block(check_index) assert block[query_type] - sample_query = self.find_queries(block[query_type]) + return self.find_queries(block[query_type]) + + def find_failed_rows_query_sql(self, check_index: int, query_type: str = "failingRowsQueryName"): + sample_query = self.find_failed_rows_query(check_index, query_type) assert sample_query["sql"] return sample_query["sql"].lower()