Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure check identity is part of query name for failed rows queries. #2117

Merged
merged 8 commits into from
Jul 17, 2024
11 changes: 10 additions & 1 deletion soda/core/soda/execution/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@


class Query:
_counter = 0

@classmethod
def generate_id(cls):
cls._counter += 1
return cls._counter

def __init__(
self,
data_source_scan: DataSourceScan,
Expand Down Expand Up @@ -91,9 +98,11 @@ def get_dict(self, name_suffix: str | None = None, sql: str | None = None) -> di

@staticmethod
def build_query_name(data_source_scan, table, partition, column, unqualified_query_name):
full_query_pieces = [data_source_scan.data_source.data_source_name]
full_query_pieces = [str(Query.generate_id()), data_source_scan.data_source.data_source_name]
if partition is not None and partition.partition_name is not None:
full_query_pieces.append(f"{partition.table.table_name}[{partition.partition_name}]")
elif partition is not None and partition.partition_name is None:
full_query_pieces.append(f"{partition.table.table_name}")
elif table is not None:
full_query_pieces.append(f"{table.table_name}")
if column is not None:
Expand Down
23 changes: 23 additions & 0 deletions soda/core/tests/data_source/test_invalid.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,29 @@ def test_column_configured_invalid_values(data_source_fixture: DataSourceFixture
scan.assert_all_checks_pass()


def test_valid_failed_passing_rows_queries_uniqueness(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

scan = data_source_fixture.create_test_scan()
mock_soda_cloud = scan.enable_mock_soda_cloud()
scan.add_sodacl_yaml_str(
f"""
checks for {table_name}:
- invalid_count(cst_size) = 3:
valid min: 0
- invalid_count(cst_size) = 4:
valid max: 0
"""
)
scan.execute()
scan.assert_all_checks_pass()

first_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(0)
second_check_diagnostic_block = mock_soda_cloud.find_failed_rows_diagnostics_block(1)
assert first_check_diagnostic_block["failingRowsQueryName"] != second_check_diagnostic_block["failingRowsQueryName"]
assert first_check_diagnostic_block["passingRowsQueryName"] != second_check_diagnostic_block["passingRowsQueryName"]


def test_valid_min_max(data_source_fixture: DataSourceFixture):
table_name = data_source_fixture.ensure_test_table(customers_test_table)

Expand Down
8 changes: 4 additions & 4 deletions soda/core/tests/data_source/test_metric_check_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def test_missing_filtered_sample_query(data_source_fixture: DataSourceFixture):

scan.assert_all_checks_fail()

failing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "failingRowsQueryName")
assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_condition
failing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "failingRowsQueryName")
assert "(pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in failing_rows_query_sql

passing_rows_query_condition = mock_soda_cloud.find_failed_rows_sample_query(0, "passingRowsQueryName")
assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_condition
passing_rows_query_sql = mock_soda_cloud.find_failed_rows_query_sql(0, "passingRowsQueryName")
assert "not (pct is null or pct in ('no value','n/a','error')) and (country = 'nl')" in passing_rows_query_sql


@pytest.mark.skipif(
Expand Down
7 changes: 5 additions & 2 deletions soda/core/tests/helpers/mock_soda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,10 +185,13 @@ def find_failed_rows_line_count(self, check_index: int) -> int:
file_contents = self.find_failed_rows_content(check_index)
return file_contents.count("\n")

def find_failed_rows_sample_query(self, check_index: int, query_type: str = "failingRowsQueryName"):
def find_failed_rows_query(self, check_index: int, query_type: str = "failingRowsQueryName"):
block = self.find_failed_rows_diagnostics_block(check_index)
assert block[query_type]
sample_query = self.find_queries(block[query_type])
return self.find_queries(block[query_type])

def find_failed_rows_query_sql(self, check_index: int, query_type: str = "failingRowsQueryName"):
sample_query = self.find_failed_rows_query(check_index, query_type)
assert sample_query["sql"]
return sample_query["sql"].lower()

Expand Down
Loading