From 2019c3421e617d3c3a1434b29458ded5cce3f161 Mon Sep 17 00:00:00 2001 From: ChaosHour Date: Wed, 5 Feb 2025 20:01:10 -0800 Subject: [PATCH] Updated diff_tables.py and hashdiff_tables.py --- data_diff/diff_tables.py | 7 +++++-- data_diff/hashdiff_tables.py | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/data_diff/diff_tables.py b/data_diff/diff_tables.py index c06b079e..bf4d6ed1 100644 --- a/data_diff/diff_tables.py +++ b/data_diff/diff_tables.py @@ -302,7 +302,9 @@ def _diff_tables_root( table2 = table2.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types2) max_rows = max(table1.approximate_size(), table2.approximate_size()) - diff_res = self._diff_segments(ti, table1, table2, info_tree, max_rows=max_rows, segment_index=0, segment_count=1) # Pass initial values + diff_res = self._diff_segments( + ti, table1, table2, info_tree, max_rows=max_rows, segment_index=0, segment_count=1 + ) # Pass initial values return ti @abstractmethod @@ -316,7 +318,8 @@ def _diff_segments( level=0, segment_index=None, segment_count=None, - ): ... + ): + ... def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, info_tree): if len(table1.key_columns) != len(table2.key_columns): diff --git a/data_diff/hashdiff_tables.py b/data_diff/hashdiff_tables.py index 33d717c7..6de41a5b 100644 --- a/data_diff/hashdiff_tables.py +++ b/data_diff/hashdiff_tables.py @@ -214,9 +214,21 @@ def _diff_segments( return # Submit the bisect and diff segments task to the ThreadedYielder - ti.submit(self._bisect_and_diff_segments, ti, table1, table2, info_tree, level=level, max_rows=max(count1, count2), segment_index=segment_index, segment_count=segment_count) - - def _bisect_and_diff_segments(self, ti, table1, table2, info_tree, level=0, max_rows=None, segment_index=None, segment_count=None): + ti.submit( + self._bisect_and_diff_segments, + ti, + table1, + table2, + info_tree, + level=level, + max_rows=max(count1, count2), + segment_index=segment_index, + segment_count=segment_count, + ) + + def _bisect_and_diff_segments( + self, ti, table1, table2, info_tree, level=0, max_rows=None, segment_index=None, segment_count=None + ): max_space_size = max(table1.approximate_size(), table2.approximate_size()) if max_rows is None: max_rows = max_space_size @@ -254,6 +266,8 @@ def _bisect_and_diff_segments(self, ti, table1, table2, info_tree, level=0, max_ return diff if segment_index is not None: - return super()._bisect_and_diff_segments(ti, table1, table2, info_tree, level, max_rows, segment_index=segment_index, segment_count=segment_count) + return super()._bisect_and_diff_segments( + ti, table1, table2, info_tree, level, max_rows, segment_index=segment_index, segment_count=segment_count + ) else: return super()._bisect_and_diff_segments(ti, table1, table2, info_tree, level, max_rows)