Skip to content

Commit

Permalink
Fix divide by zero error when pyarrow table size comes out 0 (#368)
Browse files Browse the repository at this point in the history
* Add a case when in-memory-size is also 0

* Add UTs

* bump version
  • Loading branch information
raghumdani authored Oct 25, 2024
1 parent 88ccf0c commit 1655368
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
2 changes: 1 addition & 1 deletion deltacat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@

deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))

__version__ = "1.1.26"
__version__ = "1.1.27"


__all__ = [
Expand Down
2 changes: 1 addition & 1 deletion deltacat/compute/resource_estimation/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
sampled_on_disk_size += delta.manifest.entries[entry_index].meta.content_length
sampled_num_rows += len(tbl)

if not sampled_on_disk_size:
if not sampled_on_disk_size or not sampled_in_memory_size:
return EstimatedResources.of(
memory_bytes=0,
statistics=Statistics.of(
Expand Down
37 changes: 37 additions & 0 deletions deltacat/tests/compute/resource_estimation/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,43 @@ def test_delta_manifest_parquet_when_file_sampling(
== parquet_delta_with_manifest.meta.content_length
)

def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
self,
local_deltacat_storage_kwargs,
parquet_delta_with_manifest: Delta,
monkeypatch,
):
params = EstimateResourcesParams.of(
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING,
max_files_to_sample=2,
)

def mock_func(*args, **kwargs):
class MockedValue:
nbytes = 0

def __len__(self):
return 0

return MockedValue()

monkeypatch.setattr(ds, "download_delta_manifest_entry", mock_func)

result = estimate_resources_required_to_process_delta(
delta=parquet_delta_with_manifest,
operation_type=OperationType.PYARROW_DOWNLOAD,
deltacat_storage=ds,
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
estimate_resources_params=params,
)

assert parquet_delta_with_manifest.manifest is not None
assert result.memory_bytes == 0
assert (
result.statistics.on_disk_size_bytes
== parquet_delta_with_manifest.meta.content_length
)

def test_delta_manifest_utsv_when_file_sampling(
self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
):
Expand Down

0 comments on commit 1655368

Please sign in to comment.