Skip to content

Commit

Permalink
Add options of parquet bloom filter and page index in Session config (a…
Browse files Browse the repository at this point in the history
  • Loading branch information
Ted-Jiang authored Jan 15, 2024
1 parent 6b8c6ad commit d07d126
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
6 changes: 2 additions & 4 deletions datafusion/core/tests/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,11 @@ impl ContextWithParquet {
) -> Self {
let file = match unit {
Unit::RowGroup => {
let config = config.options_mut();
config.execution.parquet.bloom_filter_enabled = true;
config = config.with_parquet_bloom_filter_pruning(true);
make_test_file_rg(scenario).await
}
Unit::Page => {
let config = config.options_mut();
config.execution.parquet.enable_page_index = true;
config = config.with_parquet_page_index_pruning(true);
make_test_file_page(scenario).await
}
};
Expand Down
22 changes: 22 additions & 0 deletions datafusion/execution/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,28 @@ impl SessionConfig {
self.options.execution.parquet.pruning
}

/// Returns true if bloom filter should be used to skip parquet row groups
pub fn parquet_bloom_filter_pruning(&self) -> bool {
self.options.execution.parquet.bloom_filter_enabled
}

/// Enables or disables the use of bloom filter for parquet readers to skip row groups
pub fn with_parquet_bloom_filter_pruning(mut self, enabled: bool) -> Self {
self.options.execution.parquet.bloom_filter_enabled = enabled;
self
}

/// Returns true if page index should be used to skip parquet data pages
pub fn parquet_page_index_pruning(&self) -> bool {
self.options.execution.parquet.enable_page_index
}

/// Enables or disables the use of page index for parquet readers to skip parquet data pages
pub fn with_parquet_page_index_pruning(mut self, enabled: bool) -> Self {
self.options.execution.parquet.enable_page_index = enabled;
self
}

/// Enables or disables the collection of statistics after listing files
pub fn with_collect_statistics(mut self, enabled: bool) -> Self {
self.options.execution.collect_statistics = enabled;
Expand Down

0 comments on commit d07d126

Please sign in to comment.