Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for GROUPING SETS/CUBE/ROLLUP #2716

Merged
merged 26 commits into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
95d9eb5
WIP
thinkharderdev Jun 8, 2022
60facd5
Implement for non-row based accumulators
thinkharderdev Jun 8, 2022
9e1bf30
Non-row aggregations
thinkharderdev Jun 9, 2022
5a8d4ba
Map logical plan and add some basic tests
thinkharderdev Jun 9, 2022
c20c3c0
Handle grouping sets in various optimize passes.
thinkharderdev Jun 9, 2022
bcfe2bb
Implemented create_cube_expr and create_rollup_expr functions
Tomczik76 Jun 10, 2022
fee7536
Cleanup and ignore SingleDistinctToGroupBy when using grouping sets f…
thinkharderdev Jun 9, 2022
6c6c0d3
Handle grouping sets in SingleDistinctToGroupBy
thinkharderdev Jun 9, 2022
5658da3
Add more tests and burn the boats
thinkharderdev Jun 9, 2022
44b8dfa
Fix(ish) partitioning
thinkharderdev Jun 9, 2022
31a05ec
Serialization for grouping set exprs
thinkharderdev Jun 10, 2022
3fbd4e3
fixed bug with create_cube_expr function
Tomczik76 Jun 10, 2022
2dfda2a
fixed bug with create_cube_expr function
Tomczik76 Jun 10, 2022
d05a649
Fixed bug in row-based-aggregation
thinkharderdev Jun 10, 2022
8fb2461
Added unit tests for test_create_rollup_expr and test_create_cube_expr
Tomczik76 Jun 10, 2022
0532f03
Formatting
thinkharderdev Jun 11, 2022
5eb1881
Tests, linter fixes and docs
thinkharderdev Jun 11, 2022
4084611
Linting
thinkharderdev Jun 11, 2022
a1654a4
Better encoding which avoids evaluating grouping expressions redundantly
thinkharderdev Jun 11, 2022
a049675
Remove commented code
thinkharderdev Jun 11, 2022
aabd639
Apply suggestions from code review
thinkharderdev Jun 12, 2022
e5f3df5
PR Comments: Rename PhysicalGroupingSet -> PhysicalGroupBy and clarif…
thinkharderdev Jun 12, 2022
c0ed1b9
Disable single_distinct_to_groupby for grouping sets for now and add …
thinkharderdev Jun 12, 2022
2bc6976
PR comments
thinkharderdev Jun 13, 2022
27dcdb1
Remove old comment
thinkharderdev Jun 13, 2022
a2cb52d
Return PhysicalGroupBy from AggregateExec::group_expr
thinkharderdev Jun 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions datafusion/core/src/physical_optimizer/aggregate_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ mod tests {

use crate::error::Result;
use crate::logical_plan::Operator;
use crate::physical_plan::aggregates::AggregateExec;
use crate::physical_plan::aggregates::{AggregateExec, PhysicalGroupBy};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love the new name PhysicalGroupBy

use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
use crate::physical_plan::common;
use crate::physical_plan::expressions::Count;
Expand Down Expand Up @@ -407,15 +407,15 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
source,
Arc::clone(&schema),
)?;

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(partial_agg),
Arc::clone(&schema),
Expand All @@ -435,15 +435,15 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
source,
Arc::clone(&schema),
)?;

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(partial_agg),
Arc::clone(&schema),
Expand All @@ -462,7 +462,7 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
source,
Arc::clone(&schema),
Expand All @@ -473,7 +473,7 @@ mod tests {

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(coalesce),
Arc::clone(&schema),
Expand All @@ -492,7 +492,7 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
source,
Arc::clone(&schema),
Expand All @@ -503,7 +503,7 @@ mod tests {

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(coalesce),
Arc::clone(&schema),
Expand Down Expand Up @@ -533,15 +533,15 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
filter,
Arc::clone(&schema),
)?;

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(partial_agg),
Arc::clone(&schema),
Expand Down Expand Up @@ -576,15 +576,15 @@ mod tests {

let partial_agg = AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
filter,
Arc::clone(&schema),
)?;

let final_agg = AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![agg.count_expr()],
Arc::new(partial_agg),
Arc::clone(&schema),
Expand Down
8 changes: 5 additions & 3 deletions datafusion/core/src/physical_optimizer/repartition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,9 @@ mod tests {
use super::*;
use crate::datasource::listing::PartitionedFile;
use crate::datasource::object_store::ObjectStoreUrl;
use crate::physical_plan::aggregates::{AggregateExec, AggregateMode};
use crate::physical_plan::aggregates::{
AggregateExec, AggregateMode, PhysicalGroupBy,
};
use crate::physical_plan::expressions::{col, PhysicalSortExpr};
use crate::physical_plan::file_format::{FileScanConfig, ParquetExec};
use crate::physical_plan::filter::FilterExec;
Expand Down Expand Up @@ -305,12 +307,12 @@ mod tests {
Arc::new(
AggregateExec::try_new(
AggregateMode::Final,
vec![],
PhysicalGroupBy::default(),
vec![],
Arc::new(
AggregateExec::try_new(
AggregateMode::Partial,
vec![],
PhysicalGroupBy::default(),
vec![],
input,
schema.clone(),
Expand Down
Loading