Skip to content

Commit

Permalink
Ensure check identity is part of query name for failed rows queries. (#…
Browse files Browse the repository at this point in the history
…2117)

* CLOUD-7669 Ensure check identity is part of query name for failed rows queries.

* Use query counter instead of check identity as Check:metric is 1:n, metric:query is also 1:n

* Partition name is optional, include always table name in such case.

* Add UT to cover failing/passing queries uniqueness.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jzalucki and pre-commit-ci[bot] authored Jul 17, 2024
1 parent f13d268 commit 34a05ff
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 7 deletions.
11 changes: 10 additions & 1 deletion soda/core/soda/execution/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@


class Query:
_counter = 0

@classmethod
def generate_id(cls):
cls._counter += 1
return cls._counter

def __init__(
self,
data_source_scan: DataSourceScan,
Expand Down Expand Up @@ -91,9 +98,11 @@ def get_dict(self, name_suffix: str | None = None, sql: str | None = None) -> di

@staticmethod
def build_query_name(data_source_scan, table, partition, column, unqualified_query_name):
full_query_pieces = [data_source_scan.data_source.data_source_name]
full_query_pieces = [str(Query.generate_id()), data_source_scan.data_source.data_source_name]
if partition is not None and partition.partition_name is not None:
full_query_pieces.append(f"{partition.table.table_name}[{partition.partition_name}]")
elif partition is not None and partition.partition_name is None:
full_query_pieces.append(f"{partition.table.table_name}")
elif table is not None:
full_query_pieces.append(f"{table.table_name}")
if column is not None:
Expand Down
23 changes: 23 additions & 0 deletions soda/core/tests/data_source/test_invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,29 @@ def test_column_configured_invalid_values(data_source_fixture: DataSourceFixture
scan.assert_all_checks_pass()


def test_valid_failed_passing_rows_queries_uniqueness(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()
mock_soda_cloud = scan.enable_mock_soda_cloud()
scan.add_sodacl_yaml_str(
f"""
checks for {table_name}:
- invalid_count(cst_size) = 3:
valid min: 0
- invalid_count(cst_size) = 4:
valid max: 0
"""
)
scan.execute()
scan.assert_all_checks_pass()

first_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(0)
second_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(1)
assert first_check_diagnostic_block["failingRowsQueryName"] != second_check_diagnostic_block["failingRowsQueryName"]
assert first_check_diagnostic_block["passingRowsQueryName"] != second_check_diagnostic_block["passingRowsQueryName"]


def test_valid_min_max(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

Expand Down
8 changes: 4 additions & 4 deletions soda/core/tests/data_source/test_metric_check_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture):

scan.assert_all_checks_fail()

failing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "failingRowsQueryName")
assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_condition
failing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "failingRowsQueryName")
assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_sql

passing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "passingRowsQueryName")
assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_condition
passing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "passingRowsQueryName")
assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_sql


@pytest.mark.skipif(
Expand Down
7 changes: 5 additions & 2 deletions soda/core/tests/helpers/mock_soda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,13 @@ def find_failed_rows_line_count(self, check_index: int) -> int:
file_contents = self.find_failed_rows_content(check_index)
return file_contents.count("\n")

def find_failed_rows_sample_query(self, check_index: int, query_type: str = "failingRowsQueryName"):
def find_failed_rows_query(self, check_index: int, query_type: str = "failingRowsQueryName"):
block = self.find_failed_rows_diagnostics_block(check_index)
assert block[query_type]
sample_query = self.find_queries(block[query_type])
return self.find_queries(block[query_type])

def find_failed_rows_query_sql(self, check_index: int, query_type: str = "failingRowsQueryName"):
sample_query = self.find_failed_rows_query(check_index, query_type)
assert sample_query["sql"]
return sample_query["sql"].lower()

Expand Down

0 comments on commit 34a05ff

Please sign in to comment.