Skip to content

Commit

Permalink
Updated diff_tables.py and hashdiff_tables.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ChaosHour committed Feb 6, 2025
1 parent 1477656 commit 2019c34
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
7 changes: 5 additions & 2 deletions data_diff/diff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,9 @@ def _diff_tables_root(
table2 = table2.new_key_bounds(min_key=min_key1, max_key=max_key1, key_types=key_types2)

max_rows = max(table1.approximate_size(), table2.approximate_size())
diff_res = self._diff_segments(ti, table1, table2, info_tree, max_rows=max_rows, segment_index=0, segment_count=1) # Pass initial values
diff_res = self._diff_segments(
ti, table1, table2, info_tree, max_rows=max_rows, segment_index=0, segment_count=1
) # Pass initial values
return ti

@abstractmethod
Expand All @@ -316,7 +318,8 @@ def _diff_segments(
level=0,
segment_index=None,
segment_count=None,
): ...
):
...

def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, info_tree):
if len(table1.key_columns) != len(table2.key_columns):
Expand Down
22 changes: 18 additions & 4 deletions data_diff/hashdiff_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,21 @@ def _diff_segments(
return

# Submit the bisect and diff segments task to the ThreadedYielder
ti.submit(self._bisect_and_diff_segments, ti, table1, table2, info_tree, level=level, max_rows=max(count1, count2), segment_index=segment_index, segment_count=segment_count)

def _bisect_and_diff_segments(self, ti, table1, table2, info_tree, level=0, max_rows=None, segment_index=None, segment_count=None):
ti.submit(
self._bisect_and_diff_segments,
ti,
table1,
table2,
info_tree,
level=level,
max_rows=max(count1, count2),
segment_index=segment_index,
segment_count=segment_count,
)

def _bisect_and_diff_segments(
self, ti, table1, table2, info_tree, level=0, max_rows=None, segment_index=None, segment_count=None
):
max_space_size = max(table1.approximate_size(), table2.approximate_size())
if max_rows is None:
max_rows = max_space_size
Expand Down Expand Up @@ -254,6 +266,8 @@ def _bisect_and_diff_segments(self, ti, table1, table2, info_tree, level=0, max_
return diff

if segment_index is not None:
return super()._bisect_and_diff_segments(ti, table1, table2, info_tree, level, max_rows, segment_index=segment_index, segment_count=segment_count)
return super()._bisect_and_diff_segments(
ti, table1, table2, info_tree, level, max_rows, segment_index=segment_index, segment_count=segment_count
)
else:
return super()._bisect_and_diff_segments(ti, table1, table2, info_tree, level, max_rows)

0 comments on commit 2019c34

Please sign in to comment.