diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index e9cdd754e1365..31678ad165e32 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -636,16 +636,8 @@ pub trait AggregateUDFImpl: Debug + Send + Sync { None } - /// Indicates whether the aggregation function is monotonic as a set function. A set - /// function is monotonically increasing if its value increases as its argument grows - /// (as a set). Formally, `f` is a monotonically increasing set function if `f(S) >= f(T)` - /// whenever `S` is a superset of `T`. - /// - /// For example `count` and `max` are monotonically increasing as their values always - /// increase (or stay the same) as new values are seen. - /// - /// `min` is monotonically decreasing as its value always decreases or stays - /// the same as new values are seen. + /// Indicates whether the aggregation function is monotonic as a set + /// function. See [`AggregateExprSetMonotonicity`] for details. fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { AggregateExprSetMonotonicity::NotMonotonic } @@ -832,26 +824,27 @@ pub mod aggregate_doc_sections { }; } -/// Status of an Aggregate Expression's Set Monotonicity -#[derive(Debug, Clone)] +/// Indicates whether an aggregation function is monotonic as a set +/// function. A set function is monotonically increasing if its value +/// increases as its argument grows (as a set). Formally, `f` is a +/// monotonically increasing set function if `f(S) >= f(T)` whenever `S` +/// is a superset of `T`. +/// +/// For example `COUNT` and `MAX` are monotonically increasing as their +/// values always increase (or stay the same) as new values are seen. On +/// the other hand, `MIN` is monotonically decreasing as its value always +/// decreases or stays the same as new values are seen. +#[derive(Debug, Clone, PartialEq)] pub enum AggregateExprSetMonotonicity { - /// Ordering exists as ascending + /// Aggregate value increases or stays the same as the input set grows. Increasing, - /// Ordering exists as descending + /// Aggregate value decreases or stays the same as the input set grows. Decreasing, - /// No ordering + /// Aggregate value may increase, decrease, or stay the same as the input + /// set grows. NotMonotonic, } -impl AggregateExprSetMonotonicity { - pub fn is_decreasing(&self) -> bool { - matches!(self, Self::Decreasing) - } - pub fn is_monotonic(&self) -> bool { - !matches!(self, Self::NotMonotonic) - } -} - #[cfg(test)] mod test { use crate::{AggregateUDF, AggregateUDFImpl}; diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs index 48eabe7b9cce9..992004c669813 100644 --- a/datafusion/functions-aggregate/src/count.rs +++ b/datafusion/functions-aggregate/src/count.rs @@ -354,6 +354,8 @@ impl AggregateUDFImpl for Count { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { + // `COUNT` is monotonically increasing as it always increases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Increasing } } diff --git a/datafusion/functions-aggregate/src/min_max.rs b/datafusion/functions-aggregate/src/min_max.rs index a36a2f1845938..6ef97f8d61a63 100644 --- a/datafusion/functions-aggregate/src/min_max.rs +++ b/datafusion/functions-aggregate/src/min_max.rs @@ -363,8 +363,8 @@ impl AggregateUDFImpl for Max { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { - // max is monotonically increasing as it always increases or - // stays the same as new values are seen + // `MAX` is monotonically increasing as it always increases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Increasing } } @@ -1191,8 +1191,8 @@ impl AggregateUDFImpl for Min { } fn set_monotonicity(&self, _data_type: &DataType) -> AggregateExprSetMonotonicity { - // min is monotonically decreasing as it always decreases or - // stays the same as new values are seen + // `MIN` is monotonically decreasing as it always decreases or stays + // the same as new values are seen. AggregateExprSetMonotonicity::Decreasing } } diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs index 1bb54734fb69b..e5d7ef8555ebb 100644 --- a/datafusion/functions-aggregate/src/sum.rs +++ b/datafusion/functions-aggregate/src/sum.rs @@ -256,8 +256,8 @@ impl AggregateUDFImpl for Sum { } fn set_monotonicity(&self, data_type: &DataType) -> AggregateExprSetMonotonicity { - // Sum is only monotonic if its input is unsigned - // TODO: Expand these utilizing statistics + // `SUM` is only monotonically increasing when its input is unsigned. + // TODO: Expand these utilizing statistics. match data_type { DataType::UInt8 => AggregateExprSetMonotonicity::Increasing, DataType::UInt16 => AggregateExprSetMonotonicity::Increasing, diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs index ac86939f8cbb5..3087bfbf9a4be 100644 --- a/datafusion/physical-expr/src/aggregate.rs +++ b/datafusion/physical-expr/src/aggregate.rs @@ -47,8 +47,9 @@ use datafusion_expr::{AggregateExprSetMonotonicity, AggregateUDF, ReversedUDAF}; use datafusion_expr_common::accumulator::Accumulator; use datafusion_expr_common::groups_accumulator::GroupsAccumulator; use datafusion_expr_common::type_coercion::aggregates::check_arg_count; -use datafusion_functions_aggregate_common::accumulator::AccumulatorArgs; -use datafusion_functions_aggregate_common::accumulator::StateFieldsArgs; +use datafusion_functions_aggregate_common::accumulator::{ + AccumulatorArgs, StateFieldsArgs, +}; use datafusion_functions_aggregate_common::order::AggregateOrderSensitivity; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; @@ -535,10 +536,8 @@ impl AggregateFunctionExpr { self.fun.default_value(data_type) } - /// Indicates whether the aggregation function is monotonic as a set function. A set - /// function is monotonically increasing if its value increases as its argument grows - /// (as a set). Formally, `f` is a monotonically increasing set function if `f(S) >= f(T)` - /// whenever `S` is a superset of `T`. + /// Indicates whether the aggregation function is monotonic as a set + /// function. See [`AggregateExprSetMonotonicity`] for details. pub fn set_monotonicity(&self) -> AggregateExprSetMonotonicity { let field = self.field(); let data_type = field.data_type(); @@ -550,11 +549,14 @@ impl AggregateFunctionExpr { // If the aggregate expressions are set-monotonic, the output data is // naturally ordered with it per group or partition. let monotonicity = self.set_monotonicity(); - if !monotonicity.is_monotonic() { + if monotonicity == AggregateExprSetMonotonicity::NotMonotonic { return None; } let expr = Arc::new(Column::new(self.name(), aggr_func_idx)); - let options = SortOptions::new(monotonicity.is_decreasing(), false); + let options = SortOptions::new( + monotonicity == AggregateExprSetMonotonicity::Decreasing, + false, + ); Some(PhysicalSortExpr { expr, options }) } } diff --git a/parquet-testing b/parquet-testing index f4d7ed772a62a..e45cd23f784aa 160000 --- a/parquet-testing +++ b/parquet-testing @@ -1 +1 @@ -Subproject commit f4d7ed772a62a95111db50fbcad2460833e8c882 +Subproject commit e45cd23f784aab3d6bf0701f8f4e621469ed3be7 diff --git a/testing b/testing index d2a1371230349..98fceecd024dc 160000 --- a/testing +++ b/testing @@ -1 +1 @@ -Subproject commit d2a13712303498963395318a4eb42872e66aead7 +Subproject commit 98fceecd024dccd2f8a00e32fc144975f218acf4