Skip to content

Commit

Permalink
[minor] make parquet prune tests more readable (apache#10112)
Browse files Browse the repository at this point in the history
* [minor] make parquet prune tests more readable

* typo
  • Loading branch information
Ted-Jiang authored Apr 17, 2024
1 parent 4ad4f90 commit dea4a1b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 22 deletions.
26 changes: 14 additions & 12 deletions datafusion/core/tests/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,10 @@ enum Scenario {
}

enum Unit {
RowGroup,
Page,
// pass max row per row_group in parquet writer
RowGroup(usize),
// pass max row per page in parquet writer
Page(usize),
}

/// Test fixture that has an execution context that has an external
Expand Down Expand Up @@ -185,13 +187,13 @@ impl ContextWithParquet {
mut config: SessionConfig,
) -> Self {
let file = match unit {
Unit::RowGroup => {
Unit::RowGroup(row_per_group) => {
config = config.with_parquet_bloom_filter_pruning(true);
make_test_file_rg(scenario).await
make_test_file_rg(scenario, row_per_group).await
}
Unit::Page => {
Unit::Page(row_per_page) => {
config = config.with_parquet_page_index_pruning(true);
make_test_file_page(scenario).await
make_test_file_page(scenario, row_per_page).await
}
};
let parquet_path = file.path().to_string_lossy();
Expand Down Expand Up @@ -880,15 +882,15 @@ fn create_data_batch(scenario: Scenario) -> Vec<RecordBatch> {
}

/// Create a test parquet file with various data types
async fn make_test_file_rg(scenario: Scenario) -> NamedTempFile {
async fn make_test_file_rg(scenario: Scenario, row_per_group: usize) -> NamedTempFile {
let mut output_file = tempfile::Builder::new()
.prefix("parquet_pruning")
.suffix(".parquet")
.tempfile()
.expect("tempfile creation");

let props = WriterProperties::builder()
.set_max_row_group_size(5)
.set_max_row_group_size(row_per_group)
.set_bloom_filter_enabled(true)
.build();

Expand All @@ -906,17 +908,17 @@ async fn make_test_file_rg(scenario: Scenario) -> NamedTempFile {
output_file
}

async fn make_test_file_page(scenario: Scenario) -> NamedTempFile {
async fn make_test_file_page(scenario: Scenario, row_per_page: usize) -> NamedTempFile {
let mut output_file = tempfile::Builder::new()
.prefix("parquet_page_pruning")
.suffix(".parquet")
.tempfile()
.expect("tempfile creation");

// set row count to 5, should get same result as rowGroup
// set row count to row_per_page, should get same result as rowGroup
let props = WriterProperties::builder()
.set_data_page_row_count_limit(5)
.set_write_batch_size(5)
.set_data_page_row_count_limit(row_per_page)
.set_write_batch_size(row_per_page)
.build();

let batches = create_data_batch(scenario);
Expand Down
Loading

0 comments on commit dea4a1b

Please sign in to comment.