From 9b2274549864457074925743b683df881fbd8595 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 29 Jun 2023 09:42:13 -0400
Subject: [PATCH 01/89] POC: Demonstrate new GroupHashAggregate stream approach

---
 .../core/src/physical_plan/aggregates/mod.rs  |  18 +-
 .../src/physical_plan/aggregates/row_hash.rs  |   2 +
 .../src/physical_plan/aggregates/row_hash2.rs | 449 ++++++++++++++++++
 datafusion/physical-expr/Cargo.toml           |   1 +
 .../physical-expr/src/aggregate/average.rs    | 218 ++++++++-
 .../src/aggregate/groups_accumulator.rs       | 100 ++++
 datafusion/physical-expr/src/aggregate/mod.rs |  15 +
 datafusion/physical-expr/src/lib.rs           |   2 +
 8 files changed, 801 insertions(+), 4 deletions(-)
 create mode 100644 datafusion/core/src/physical_plan/aggregates/row_hash2.rs
 create mode 100644 datafusion/physical-expr/src/aggregate/groups_accumulator.rs
diff --git a/datafusion/core/src/physical_plan/aggregates/mod.rs b/datafusion/core/src/physical_plan/aggregates/mod.rs
index 343f7628b71e..e086b545b885 100644
--- a/datafusion/core/src/physical_plan/aggregates/mod.rs
+++ b/datafusion/core/src/physical_plan/aggregates/mod.rs
@@ -49,6 +49,7 @@ use std::sync::Arc;
 mod bounded_aggregate_stream;
 mod no_grouping;
 mod row_hash;
+mod row_hash2;
 mod utils;
 
 pub use datafusion_expr::AggregateFunction;
@@ -58,6 +59,8 @@ use datafusion_physical_expr::utils::{
     get_finer_ordering, ordering_satisfy_requirement_concrete,
 };
 
+use self::row_hash2::GroupedHashAggregateStream2;
+
 /// Hash aggregate modes
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum AggregateMode {
@@ -196,6 +199,7 @@ impl PartialEq for PhysicalGroupBy {
 enum StreamType {
     AggregateStream(AggregateStream),
     GroupedHashAggregateStream(GroupedHashAggregateStream),
+    GroupedHashAggregateStream2(GroupedHashAggregateStream2),
     BoundedAggregate(BoundedAggregateStream),
 }
 
@@ -204,6 +208,7 @@ impl From<StreamType> for SendableRecordBatchStream {
         match stream {
             StreamType::AggregateStream(stream) => Box::pin(stream),
             StreamType::GroupedHashAggregateStream(stream) => Box::pin(stream),
+            StreamType::GroupedHashAggregateStream2(stream) => Box::pin(stream),
             StreamType::BoundedAggregate(stream) => Box::pin(stream),
         }
     }
@@ -711,12 +716,23 @@ impl AggregateExec {
                 partition,
                 aggregation_ordering,
             )?))
+        } else if self.use_poc_group_by() {
+            Ok(StreamType::GroupedHashAggregateStream2(
+                GroupedHashAggregateStream2::new(self, context, partition)?,
+            ))
         } else {
             Ok(StreamType::GroupedHashAggregateStream(
                 GroupedHashAggregateStream::new(self, context, partition)?,
             ))
         }
     }
+
+    /// Returns true if we should use the POC group by stream
+    /// TODO: check for actually supported aggregates, etc
+    fn use_poc_group_by(&self) -> bool {
+        //info!("AAL Checking POC group by: {self:#?}");
+        true
+    }
 }
 
 impl ExecutionPlan for AggregateExec {
@@ -980,7 +996,7 @@ fn group_schema(schema: &Schema, group_count: usize) -> SchemaRef {
     Arc::new(Schema::new(group_fields))
 }
 
-/// returns physical expressions to evaluate against a batch
+/// returns physical expressions for arguments to evaluate against a batch
 /// The expressions are different depending on `mode`:
 /// * Partial: AggregateExpr::expressions
 /// * Final: columns of `AggregateExpr::state_fields()`
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
index beb70f1b4c55..46f460d5a6d8 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
@@ -17,6 +17,7 @@
 
 //! Hash aggregation through row format
 
+use log::info;
 use std::cmp::min;
 use std::ops::Range;
 use std::sync::Arc;
@@ -119,6 +120,7 @@ impl GroupedHashAggregateStream {
         context: Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
+        info!("Creating GroupedHashAggregateStream");
         let agg_schema = Arc::clone(&agg.schema);
         let agg_group_by = agg.group_by.clone();
         let agg_filter_expr = agg.filter_expr.clone();
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
new file mode 100644
index 000000000000..90e7cd0724b2
--- /dev/null
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -0,0 +1,449 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Hash aggregation through row format
+//!
+//! POC demonstration of GroupByHashApproach
+
+use datafusion_physical_expr::GroupsAccumulator;
+use log::info;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use std::vec;
+
+use ahash::RandomState;
+use arrow::row::{OwnedRow, RowConverter, SortField};
+use datafusion_physical_expr::hash_utils::create_hashes;
+use futures::ready;
+use futures::stream::{Stream, StreamExt};
+
+use crate::physical_plan::aggregates::{
+    evaluate_group_by, evaluate_many, evaluate_optional, group_schema, AggregateMode,
+    PhysicalGroupBy,
+};
+use crate::physical_plan::metrics::{BaselineMetrics, RecordOutput};
+use crate::physical_plan::{aggregates, AggregateExpr, PhysicalExpr};
+use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
+use arrow::array::*;
+use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
+use datafusion_common::Result;
+use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
+use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
+use datafusion_execution::TaskContext;
+use hashbrown::raw::RawTable;
+
+#[derive(Debug, Clone)]
+/// This object tracks the aggregation phase (input/output)
+pub(crate) enum ExecutionState {
+    ReadingInput,
+    /// When producing output, the remaining rows to output are stored
+    /// here and are sliced off as needed in batch_size chunks
+    ProducingOutput(RecordBatch),
+    Done,
+}
+
+use super::AggregateExec;
+
+/// Grouping aggregate
+///
+/// For each aggregation entry, we use:
+/// - [Arrow-row] represents grouping keys for fast hash computation and comparison directly on raw bytes.
+/// - [GroupsAccumulator] to store per group aggregates
+///
+/// The architecture is the following:
+///
+/// TODO
+///
+/// [WordAligned]: datafusion_row::layout
+pub(crate) struct GroupedHashAggregateStream2 {
+    schema: SchemaRef,
+    input: SendableRecordBatchStream,
+    mode: AggregateMode,
+
+    /// Accumulators, one for each `AggregateExpr` in the query
+    accumulators: Vec<Box<dyn GroupsAccumulator>>,
+    /// Arguments expressionf or each accumulator
+    aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
+    /// Filter expression to evaluate for each aggregate
+    filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
+
+    /// Converter for each row
+    row_converter: RowConverter,
+    group_by: PhysicalGroupBy,
+
+    /// The memory reservation for this grouping
+    reservation: MemoryReservation,
+
+    /// Logically maps group values to a group_index `group_states`
+    ///
+    /// Uses the raw API of hashbrown to avoid actually storing the
+    /// keys in the table
+    ///
+    /// keys: u64 hashes of the GroupValue
+    /// values: (hash, index into `group_states`)
+    map: RawTable<(u64, usize)>,
+
+    /// The actual group by values, stored in arrow Row format
+    /// the index of group_by_values is the index
+    /// https://github.com/apache/arrow-rs/issues/4466
+    group_by_values: Vec<OwnedRow>,
+
+    /// scratch space for the current Batch / Aggregate being
+    /// processed. Saved here to avoid reallocations
+    current_group_indices: Vec<usize>,
+
+    /// generating input/output?
+    exec_state: ExecutionState,
+
+    baseline_metrics: BaselineMetrics,
+
+    random_state: RandomState,
+    /// size to be used for resulting RecordBatches
+    batch_size: usize,
+}
+
+impl GroupedHashAggregateStream2 {
+    /// Create a new GroupedHashAggregateStream
+    pub fn new(
+        agg: &AggregateExec,
+        context: Arc<TaskContext>,
+        partition: usize,
+    ) -> Result<Self> {
+        info!("Creating GroupedHashAggregateStream2");
+        let agg_schema = Arc::clone(&agg.schema);
+        let agg_group_by = agg.group_by.clone();
+        let agg_filter_expr = agg.filter_expr.clone();
+
+        let batch_size = context.session_config().batch_size();
+        let input = agg.input.execute(partition, Arc::clone(&context))?;
+        let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
+
+        let timer = baseline_metrics.elapsed_compute().timer();
+
+        let mut aggregate_exprs = vec![];
+        let mut aggregate_arguments = vec![];
+
+        // The expressions to evaluate the batch, one vec of expressions per aggregation.
+        // Assuming create_schema() always puts group columns in front of aggregation columns, we set
+        // col_idx_base to the group expression count.
+
+        let all_aggregate_expressions = aggregates::aggregate_expressions(
+            &agg.aggr_expr,
+            &agg.mode,
+            agg_group_by.expr.len(),
+        )?;
+        let filter_expressions = match agg.mode {
+            AggregateMode::Partial | AggregateMode::Single => agg_filter_expr,
+            AggregateMode::Final | AggregateMode::FinalPartitioned => {
+                vec![None; agg.aggr_expr.len()]
+            }
+        };
+
+        for (agg_expr, agg_args) in agg
+            .aggr_expr
+            .iter()
+            .zip(all_aggregate_expressions.into_iter())
+        {
+            aggregate_exprs.push(agg_expr.clone());
+            aggregate_arguments.push(agg_args);
+        }
+
+        let accumulators = create_accumulators(aggregate_exprs)?;
+
+        let group_schema = group_schema(&agg_schema, agg_group_by.expr.len());
+        let row_converter = RowConverter::new(
+            group_schema
+                .fields()
+                .iter()
+                .map(|f| SortField::new(f.data_type().clone()))
+                .collect(),
+        )?;
+
+        let name = format!("GroupedHashAggregateStream2[{partition}]");
+        let reservation = MemoryConsumer::new(name).register(context.memory_pool());
+        let map = RawTable::with_capacity(0);
+        let group_by_values = vec![];
+        let current_group_indices = vec![];
+
+        timer.done();
+
+        let exec_state = ExecutionState::ReadingInput;
+
+        Ok(GroupedHashAggregateStream2 {
+            schema: agg_schema,
+            input,
+            mode: agg.mode,
+            accumulators,
+            aggregate_arguments,
+            filter_expressions,
+            row_converter,
+            group_by: agg_group_by,
+            reservation,
+            map,
+            group_by_values,
+            current_group_indices,
+            exec_state,
+            baseline_metrics,
+            random_state: Default::default(),
+            batch_size,
+        })
+    }
+}
+
+/// Crate a `GroupsAccumulator` for each of the aggregate_exprs to hold the aggregation state
+fn create_accumulators(
+    aggregate_exprs: Vec<Arc<dyn AggregateExpr>>,
+) -> Result<Vec<Box<dyn GroupsAccumulator>>> {
+    info!("Creating accumulator for {aggregate_exprs:#?}");
+    aggregate_exprs
+        .into_iter()
+        .map(|agg_expr| agg_expr.create_groups_accumulator())
+        .collect()
+}
+
+impl Stream for GroupedHashAggregateStream2 {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Self::Item>> {
+        let elapsed_compute = self.baseline_metrics.elapsed_compute().clone();
+
+        loop {
+            let exec_state = self.exec_state.clone();
+            match exec_state {
+                ExecutionState::ReadingInput => {
+                    match ready!(self.input.poll_next_unpin(cx)) {
+                        // new batch to aggregate
+                        Some(Ok(batch)) => {
+                            let timer = elapsed_compute.timer();
+                            let result = self.group_aggregate_batch(batch);
+                            timer.done();
+
+                            // allocate memory
+                            // This happens AFTER we actually used the memory, but simplifies the whole accounting and we are OK with
+                            // overshooting a bit. Also this means we either store the whole record batch or not.
+                            let result = result.and_then(|allocated| {
+                                self.reservation.try_grow(allocated)
+                            });
+
+                            if let Err(e) = result {
+                                return Poll::Ready(Some(Err(e)));
+                            }
+                        }
+                        // inner had error, return to caller
+                        Some(Err(e)) => return Poll::Ready(Some(Err(e))),
+                        // inner is done, producing output
+                        None => {
+                            let timer = elapsed_compute.timer();
+                            match self.create_batch_from_map() {
+                                Ok(batch) => {
+                                    self.exec_state =
+                                        ExecutionState::ProducingOutput(batch)
+                                }
+                                Err(e) => return Poll::Ready(Some(Err(e))),
+                            }
+                            timer.done();
+                        }
+                    }
+                }
+
+                ExecutionState::ProducingOutput(batch) => {
+                    // slice off a part of the batch, if needed
+                    let output_batch = if batch.num_rows() <= self.batch_size {
+                        self.exec_state = ExecutionState::Done;
+                        batch
+                    } else {
+                        // output first batch_size rows
+                        let num_remaining = batch.num_rows() - self.batch_size;
+                        let remaining = batch.slice(self.batch_size, num_remaining);
+                        self.exec_state = ExecutionState::ProducingOutput(remaining);
+                        batch.slice(0, self.batch_size)
+                    };
+                    return Poll::Ready(Some(Ok(
+                        output_batch.record_output(&self.baseline_metrics)
+                    )));
+                }
+
+                ExecutionState::Done => return Poll::Ready(None),
+            }
+        }
+    }
+}
+
+impl RecordBatchStream for GroupedHashAggregateStream2 {
+    fn schema(&self) -> SchemaRef {
+        self.schema.clone()
+    }
+}
+
+impl GroupedHashAggregateStream2 {
+    /// Update self.aggr_state based on the group_by values (result of evalauting the group_by_expressions)
+    ///
+    /// At the return of this function,
+    /// `self.aggr_state.current_group_indices` has the correct
+    /// group_index for each row in the group_values
+    fn update_group_state(
+        &mut self,
+        group_values: &[ArrayRef],
+        allocated: &mut usize,
+    ) -> Result<()> {
+        // Convert the group keys into the row format
+        let group_rows = self.row_converter.convert_columns(group_values)?;
+        let n_rows = group_rows.num_rows();
+        // 1.1 construct the key from the group values
+        // 1.2 construct the mapping key if it does not exist
+
+        // tracks to which group each of the input rows belongs
+        let group_indices = &mut self.current_group_indices;
+        group_indices.clear();
+
+        // 1.1 Calculate the group keys for the group values
+        let mut batch_hashes = vec![0; n_rows];
+        create_hashes(group_values, &self.random_state, &mut batch_hashes)?;
+
+        for (row, hash) in batch_hashes.into_iter().enumerate() {
+            let entry = self.map.get_mut(hash, |(_hash, group_idx)| {
+                // verify that a group that we are inserting with hash is
+                // actually the same key value as the group in
+                // existing_idx  (aka group_values @ row)
+
+                // TODO update *allocated based on size of the row
+                // that was just pushed into
+                // aggr_state.group_by_values
+                group_rows.row(row) == self.group_by_values[*group_idx].row()
+            });
+
+            let group_idx = match entry {
+                // Existing group_index for this group value
+                Some((_hash, group_idx)) => *group_idx,
+                //  1.2 Need to create new entry for the group
+                None => {
+                    // Add new entry to aggr_state and save newly created index
+                    let group_idx = self.group_by_values.len();
+                    self.group_by_values.push(group_rows.row(row).owned());
+
+                    // for hasher function, use precomputed hash value
+                    self.map.insert_accounted(
+                        (hash, group_idx),
+                        |(hash, _group_index)| *hash,
+                        allocated,
+                    );
+                    group_idx
+                }
+            };
+            group_indices.push_accounted(group_idx, allocated);
+        }
+        Ok(())
+    }
+
+    /// Perform group-by aggregation for the given [`RecordBatch`].
+    ///
+    /// If successful, returns the additional amount of memory, in
+    /// bytes, that were allocated during this process.
+    ///
+    fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<usize> {
+        // Evaluate the grouping expressions:
+        let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
+
+        // Keep track of memory allocated:
+        let mut allocated = 0usize;
+
+        // Evaluate the aggregation expressions.
+        let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
+        // Evalaute the filter expressions, if any, against the inputs
+        let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
+
+        let row_converter_size_pre = self.row_converter.size();
+        for group_values in &group_by_values {
+            // calculate the group indicies for each input row
+            self.update_group_state(group_values, &mut allocated)?;
+            let group_indices = &self.current_group_indices;
+
+            // Gather the inputs to call the actual aggregation
+            let t = self
+                .accumulators
+                .iter_mut()
+                .zip(input_values.iter())
+                .zip(filter_values.iter());
+
+            let total_num_groups = self.group_by_values.len();
+
+            for ((acc, values), opt_filter) in t {
+                let acc_size_pre = acc.size();
+                let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
+
+                match self.mode {
+                    AggregateMode::Partial | AggregateMode::Single => {
+                        acc.update_batch(
+                            values,
+                            &group_indices,
+                            opt_filter,
+                            total_num_groups,
+                        )?;
+                    }
+                    AggregateMode::FinalPartitioned | AggregateMode::Final => {
+                        // if aggregation is over intermediate states,
+                        // use merge
+                        acc.merge_batch(
+                            values,
+                            &group_indices,
+                            opt_filter,
+                            total_num_groups,
+                        )?;
+                    }
+                }
+
+                allocated += acc.size().saturating_sub(acc_size_pre);
+            }
+        }
+        allocated += self
+            .row_converter
+            .size()
+            .saturating_sub(row_converter_size_pre);
+
+        Ok(allocated)
+    }
+}
+
+impl GroupedHashAggregateStream2 {
+    /// Create an output RecordBatch with all group keys and accumulator states/values
+    fn create_batch_from_map(&mut self) -> Result<RecordBatch> {
+        if self.group_by_values.is_empty() {
+            let schema = self.schema.clone();
+            return Ok(RecordBatch::new_empty(schema));
+        }
+
+        // First output rows are the groups
+        let groups_rows = self.group_by_values.iter().map(|owned_row| owned_row.row());
+
+        let mut output: Vec<ArrayRef> = self.row_converter.convert_rows(groups_rows)?;
+
+        // Next output the accumulators
+        for acc in self.accumulators.iter_mut() {
+            match self.mode {
+                AggregateMode::Partial => output.extend(acc.state()?),
+                AggregateMode::Final
+                | AggregateMode::FinalPartitioned
+                | AggregateMode::Single => output.push(acc.evaluate()?),
+            }
+        }
+
+        Ok(RecordBatch::try_new(self.schema.clone(), output)?)
+    }
+}
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index 04ba2b9e3872..a8f82e60e473 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -59,6 +59,7 @@ indexmap = "2.0.0"
 itertools = { version = "0.11", features = ["use_std"] }
 lazy_static = { version = "^1.4.0" }
 libc = "0.2.140"
+log = "^0.4"
 md-5 = { version = "^0.10.0", optional = true }
 paste = "^1.0"
 petgraph = "0.6.2"
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 3c76da51a9d4..f81c704d8b7f 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -17,6 +17,9 @@
 
 //! Defines physical expressions that can evaluated at runtime during query execution
 
+use arrow::array::AsArray;
+use log::info;
+
 use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
@@ -29,14 +32,14 @@ use crate::aggregate::sum::sum_batch;
 use crate::aggregate::utils::calculate_result_decimal_for_avg;
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
-use crate::{AggregateExpr, PhysicalExpr};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
-use arrow::datatypes::DataType;
+use arrow::datatypes::{DataType, Decimal128Type, UInt64Type};
 use arrow::{
     array::{ArrayRef, UInt64Array},
     datatypes::Field,
 };
-use arrow_array::Array;
+use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
@@ -155,6 +158,22 @@ impl AggregateExpr for Avg {
             &self.rt_data_type,
         )?))
     }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // instantiate specialized accumulator
+        match self.sum_data_type {
+            DataType::Decimal128(_, _) => {
+                Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type>::new(
+                    &self.sum_data_type,
+                    &self.rt_data_type,
+                )))
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "AvgGroupsAccumulator for {}",
+                self.sum_data_type
+            ))),
+        }
+    }
 }
 
 impl PartialEq<dyn Any> for Avg {
@@ -383,6 +402,199 @@ impl RowAccumulator for AvgRowAccumulator {
     }
 }
 
+/// An accumulator to compute the average of PrimitiveArray<T>.
+/// Stores values as native types
+#[derive(Debug)]
+struct AvgGroupsAccumulator<T: ArrowNumericType + Send> {
+    /// The type of the internal sum
+    sum_data_type: DataType,
+
+    /// The type of the returned sum
+    return_data_type: DataType,
+
+    /// Count per group (use u64 to make UInt64Array)
+    counts: Vec<u64>,
+
+    // Sums per group, stored as the native type
+    sums: Vec<T::Native>,
+}
+
+impl<T: ArrowNumericType + Send> AvgGroupsAccumulator<T> {
+    pub fn new(sum_data_type: &DataType, return_data_type: &DataType) -> Self {
+        info!(
+            "AvgGroupsAccumulator ({}, sum type: {sum_data_type:?}) --> {return_data_type:?}",
+            std::any::type_name::<T>()
+        );
+        Self {
+            return_data_type: return_data_type.clone(),
+            sum_data_type: sum_data_type.clone(),
+            counts: vec![],
+            sums: vec![],
+        }
+    }
+
+    /// Adds the values in `values` to self.sums
+    fn update_sums(
+        &mut self,
+        values: &PrimitiveArray<T>,
+        group_indicies: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        self.sums
+            .resize_with(total_num_groups, || T::default_value());
+
+        // AAL TODO
+        // TODO combine the null mask from values and opt_filter
+        let valids = values.nulls();
+
+        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+        let data: &[T::Native] = values.values();
+
+        match valids {
+            // use all values in group_index
+            None => {
+                let iter = group_indicies.iter().zip(data.iter());
+                for (group_index, new_value) in iter {
+                    self.sums[*group_index].add_wrapping(*new_value);
+                }
+            }
+            //
+            Some(valids) => {
+                let group_indices_chunks = group_indicies.chunks_exact(64);
+                let data_chunks = data.chunks_exact(64);
+                let bit_chunks = valids.inner().bit_chunks();
+
+                let group_indices_remainder = group_indices_chunks.remainder();
+                let data_remainder = data_chunks.remainder();
+
+                group_indices_chunks
+                    .zip(data_chunks)
+                    .zip(bit_chunks.iter())
+                    .for_each(|((group_index_chunk, data_chunk), mask)| {
+                        // index_mask has value 1 << i in the loop
+                        let mut index_mask = 1;
+                        group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                            |(group_index, new_value)| {
+                                if (mask & index_mask) != 0 {
+                                    self.sums[*group_index].add_wrapping(*new_value);
+                                }
+                                index_mask <<= 1;
+                            },
+                        )
+                    });
+
+                let remainder_bits = bit_chunks.remainder_bits();
+                group_indices_remainder
+                    .iter()
+                    .zip(data_remainder.iter())
+                    .enumerate()
+                    .for_each(|(i, (group_index, new_value))| {
+                        if remainder_bits & (1 << i) != 0 {
+                            self.sums[*group_index].add_wrapping(*new_value);
+                        }
+                    });
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<T: ArrowNumericType + Send> GroupsAccumulator for AvgGroupsAccumulator<T> {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indicies: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        // update counts (TOD account for opt_filter)
+        self.counts.resize(total_num_groups, 0);
+        group_indicies.iter().for_each(|&group_idx| {
+            self.counts[group_idx] += 1;
+        });
+
+        // update values
+        self.update_sums(values, group_indicies, opt_filter, total_num_groups)?;
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indicies: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 2, "two arguments to merge_batch");
+        // first batch is counts, second is partial sums
+        let counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
+        let partial_sums = values.get(1).unwrap().as_primitive::<T>();
+
+        // update counts by summing the partial sums (TODO account for opt_filter)
+        self.counts.resize(total_num_groups, 0);
+        group_indicies.iter().zip(counts.values().iter()).for_each(
+            |(&group_idx, &count)| {
+                self.counts[group_idx] += count;
+            },
+        );
+
+        // update values
+        self.update_sums(partial_sums, group_indicies, opt_filter, total_num_groups)?;
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        todo!()
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        let counts = std::mem::take(&mut self.counts);
+        // create array from vec is zero copy
+        let counts = UInt64Array::from(counts);
+
+        let sums = std::mem::take(&mut self.sums);
+        // create array from vec is zero copy
+        // TODO figure out how to do this without the iter / copy
+        let sums: PrimitiveArray<T> = PrimitiveArray::from_iter_values(sums);
+
+        // fix up decimal precision and scale
+        let sums = set_decimal_precision(&self.sum_data_type, Arc::new(sums))?;
+
+        Ok(vec![
+            Arc::new(counts) as ArrayRef,
+            Arc::new(sums) as ArrayRef,
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.counts.capacity() * std::mem::size_of::<usize>()
+    }
+}
+
+/// Adjust array type metadata if needed
+///
+/// Decimal128Arrays are are are created from Vec<NativeType> with default
+/// precision and scale. This function adjusts them down.
+fn set_decimal_precision(sum_data_type: &DataType, array: ArrayRef) -> Result<ArrayRef> {
+    let array = match sum_data_type {
+        DataType::Decimal128(p, s) => Arc::new(
+            array
+                .as_primitive::<Decimal128Type>()
+                .clone()
+                .with_precision_and_scale(*p, *s)?,
+        ),
+        // no adjustment needed for other arrays
+        _ => array,
+    };
+    Ok(array)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator.rs
new file mode 100644
index 000000000000..82cfbfaa31c8
--- /dev/null
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator.rs
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Vectorized [`GroupsAccumulator`]
+
+use arrow_array::{ArrayRef, BooleanArray};
+use datafusion_common::Result;
+
+/// An implementation of GroupAccumulator is for a single aggregate
+/// (e.g. AVG) and stores the state for *all* groups internally
+///
+/// The logical model is that each group is given a `group_index`
+/// assigned and maintained by the hash table.
+///
+/// group_indexes are contiguous (there aren't gaps), and thus it is
+/// expected that each GroupAccumulator will use something like `Vec<..>`
+/// to store the group states.
+pub trait GroupsAccumulator: Send {
+    /// updates the accumulator's state from a vector of arrays:
+    ///
+    /// * `values`: the input arguments to the accumulator
+    /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+    /// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+    /// * `total_num_groups`: the number of groups (the largest group_index is total_num_groups - 1)
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indicies: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()>;
+
+    /// Returns the final aggregate value for each group as a single
+    /// `RecordBatch`
+    ///
+    /// OPEN QUESTION: Should this method take a "batch_size: usize"
+    /// and produce a Vec<RecordBatch> as output to avoid 1) requiring
+    /// one giant intermediate buffer?
+    ///
+    /// For example, the `SUM` accumulator maintains a running sum,
+    /// and `evaluate` will produce that running sum as its output for
+    /// all groups, in group_index order
+    ///
+    /// This call should be treated as consuming (takes `self`, but it
+    /// can not be due to keeping it object save) the accumulator is
+    /// free to release / reset it is internal state after this call
+    /// and error on any subsequent call.
+    fn evaluate(&mut self) -> Result<ArrayRef>;
+
+    /// Returns any intermediate aggregate state used for multi-phase grouping
+    ///
+    /// For example, AVG returns two arrays:  `SUM` and `COUNT`.
+    ///
+    /// This call should be treated as consuming (takes `self`, but it
+    /// can not be due to keeping it object save) the accumulator is
+    /// free to release / reset it is internal state after this call
+    /// and error on any subsequent call.
+    ///
+    /// TODO: consider returning a single Array (which could be a
+    /// StructArray) instead
+    fn state(&mut self) -> Result<Vec<ArrayRef>>;
+
+    /// merges intermediate state (from `state()`) into this accumulators values
+    ///
+    /// For some aggregates (such as `SUM`), merge_batch is the same
+    /// as `update_batch`, but for some aggregrates (such as `COUNT`)
+    /// the operations differ. See [`Self::state`] for more details on how
+    /// state is used and merged.
+    ///
+    /// * `values`: arrays produced from calling `state` previously to the accumulator
+    /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+    /// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+    /// * `total_num_groups`: the number of groups (the largest group_index is total_num_groups - 1)
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indicies: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()>;
+
+    /// Amount of memory used to store the state of this
+    /// accumulator. This function is called once per batch, so it
+    /// should be O(n) to compute
+    fn size(&self) -> usize;
+}
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 9be6d5e1ba12..4b613c8e9b0e 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -25,6 +25,8 @@ use std::any::Any;
 use std::fmt::Debug;
 use std::sync::Arc;
 
+use self::groups_accumulator::GroupsAccumulator;
+
 pub(crate) mod approx_distinct;
 pub(crate) mod approx_median;
 pub(crate) mod approx_percentile_cont;
@@ -45,6 +47,7 @@ pub(crate) mod median;
 #[macro_use]
 pub(crate) mod min_max;
 pub mod build_in;
+pub(crate) mod groups_accumulator;
 mod hyperloglog;
 pub mod moving_min_max;
 pub mod row_accumulator;
@@ -118,6 +121,18 @@ pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
         )))
     }
 
+    /// Return a specialized [`GroupsAccumulator`] that manages state for all groups
+    ///
+    /// For maximum performance, [`GroupsAccumulator`] should be
+    /// implemented rather than [`Accumulator`].
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // TODO: The default should implement a wrapper over
+        // sef.create_accumulator
+        Err(DataFusionError::NotImplemented(format!(
+            "GroupsAccumulator hasn't been implemented for {self:?} yet"
+        )))
+    }
+
     /// Construct an expression that calculates the aggregate in reverse.
     /// Typically the "reverse" expression is itself (e.g. SUM, COUNT).
     /// For aggregates that do not support calculation in reverse,
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 0a2e0e58df7a..6ea8dc94879f 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -45,7 +45,9 @@ pub mod var_provider;
 pub mod window;
 
 // reexport this to maintain compatibility with anything that used from_slice previously
+pub use aggregate::groups_accumulator::GroupsAccumulator;
 pub use aggregate::AggregateExpr;
+
 pub use equivalence::{
     project_equivalence_properties, project_ordering_equivalence_properties,
     EquivalenceProperties, EquivalentClass, OrderingEquivalenceProperties,

From 4ce66716416e9907a76a3e470f4b6fb431247698 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 30 Jun 2023 11:28:48 -0400
Subject: [PATCH 02/89] complete accumulator

---
 .../physical-expr/src/aggregate/average.rs    | 76 +++++++++++++++----
 1 file changed, 61 insertions(+), 15 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index f81c704d8b7f..b23b555805e3 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -18,7 +18,7 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use arrow::array::AsArray;
-use log::info;
+use log::debug;
 
 use std::any::Any;
 use std::convert::TryFrom;
@@ -45,6 +45,8 @@ use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
 
+use super::utils::Decimal128Averager;
+
 /// AVG aggregate expression
 #[derive(Debug, Clone)]
 pub struct Avg {
@@ -161,16 +163,29 @@ impl AggregateExpr for Avg {
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         // instantiate specialized accumulator
-        match self.sum_data_type {
-            DataType::Decimal128(_, _) => {
-                Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type>::new(
+        match (&self.sum_data_type, &self.rt_data_type) {
+            (
+                DataType::Decimal128(_sum_precision, sum_scale),
+                DataType::Decimal128(target_precision, target_scale),
+            ) => {
+                let decimal_averager = Decimal128Averager::try_new(
+                    *sum_scale,
+                    *target_precision,
+                    *target_scale,
+                )?;
+
+                let avg_fn =
+                    move |sum: i128, count: u64| decimal_averager.avg(sum, count as i128);
+
+                Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type, _>::new(
                     &self.sum_data_type,
                     &self.rt_data_type,
+                    avg_fn,
                 )))
             }
             _ => Err(DataFusionError::NotImplemented(format!(
-                "AvgGroupsAccumulator for {}",
-                self.sum_data_type
+                "AvgGroupsAccumulator for ({} --> {})",
+                self.sum_data_type, self.rt_data_type,
             ))),
         }
     }
@@ -403,9 +418,13 @@ impl RowAccumulator for AvgRowAccumulator {
 }
 
 /// An accumulator to compute the average of PrimitiveArray<T>.
-/// Stores values as native types
+/// Stores values as native types, and does overflow checking
 #[derive(Debug)]
-struct AvgGroupsAccumulator<T: ArrowNumericType + Send> {
+struct AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, u64) -> Result<T::Native> + Send,
+{
     /// The type of the internal sum
     sum_data_type: DataType,
 
@@ -415,13 +434,20 @@ struct AvgGroupsAccumulator<T: ArrowNumericType + Send> {
     /// Count per group (use u64 to make UInt64Array)
     counts: Vec<u64>,
 
-    // Sums per group, stored as the native type
+    /// Sums per group, stored as the native type
     sums: Vec<T::Native>,
+
+    /// Function that computes the average (value / count)
+    avg_fn: F,
 }
 
-impl<T: ArrowNumericType + Send> AvgGroupsAccumulator<T> {
-    pub fn new(sum_data_type: &DataType, return_data_type: &DataType) -> Self {
-        info!(
+impl<T, F> AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, u64) -> Result<T::Native> + Send,
+{
+    pub fn new(sum_data_type: &DataType, return_data_type: &DataType, avg_fn: F) -> Self {
+        debug!(
             "AvgGroupsAccumulator ({}, sum type: {sum_data_type:?}) --> {return_data_type:?}",
             std::any::type_name::<T>()
         );
@@ -430,6 +456,7 @@ impl<T: ArrowNumericType + Send> AvgGroupsAccumulator<T> {
             sum_data_type: sum_data_type.clone(),
             counts: vec![],
             sums: vec![],
+            avg_fn,
         }
     }
 
@@ -500,7 +527,11 @@ impl<T: ArrowNumericType + Send> AvgGroupsAccumulator<T> {
     }
 }
 
-impl<T: ArrowNumericType + Send> GroupsAccumulator for AvgGroupsAccumulator<T> {
+impl<T, F> GroupsAccumulator for AvgGroupsAccumulator<T, F>
+where
+    T: ArrowNumericType + Send,
+    F: Fn(T::Native, u64) -> Result<T::Native> + Send,
+{
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
@@ -549,7 +580,22 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for AvgGroupsAccumulator<T> {
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
-        todo!()
+        let counts = std::mem::take(&mut self.counts);
+        let sums = std::mem::take(&mut self.sums);
+
+        let averages: Vec<T::Native> = sums
+            .into_iter()
+            .zip(counts.into_iter())
+            .map(|(sum, count)| (self.avg_fn)(sum, count))
+            .collect::<Result<Vec<_>>>()?;
+
+        // TODO figure out how to do this without the iter / copy
+        let array = PrimitiveArray::<T>::from_iter_values(averages);
+
+        // fix up decimal precision and scale for decimals
+        let array = set_decimal_precision(&self.return_data_type, Arc::new(array))?;
+
+        Ok(array)
     }
 
     // return arrays for sums and counts
@@ -563,7 +609,7 @@ impl<T: ArrowNumericType + Send> GroupsAccumulator for AvgGroupsAccumulator<T> {
         // TODO figure out how to do this without the iter / copy
         let sums: PrimitiveArray<T> = PrimitiveArray::from_iter_values(sums);
 
-        // fix up decimal precision and scale
+        // fix up decimal precision and scale for decimals
         let sums = set_decimal_precision(&self.sum_data_type, Arc::new(sums))?;
 
         Ok(vec![

From 5694190dbae4be1701efe5c19af3b1d868f537e1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 30 Jun 2023 12:56:51 -0400
Subject: [PATCH 03/89] touchups

---
 .../core/src/physical_plan/aggregates/row_hash2.rs     |  6 +++---
 datafusion/physical-expr/src/aggregate/average.rs      | 10 +++++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 90e7cd0724b2..2eb058d8c519 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -20,7 +20,7 @@
 //! POC demonstration of GroupByHashApproach
 
 use datafusion_physical_expr::GroupsAccumulator;
-use log::info;
+use log::debug;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 use std::vec;
@@ -123,7 +123,7 @@ impl GroupedHashAggregateStream2 {
         context: Arc<TaskContext>,
         partition: usize,
     ) -> Result<Self> {
-        info!("Creating GroupedHashAggregateStream2");
+        debug!("Creating GroupedHashAggregateStream2");
         let agg_schema = Arc::clone(&agg.schema);
         let agg_group_by = agg.group_by.clone();
         let agg_filter_expr = agg.filter_expr.clone();
@@ -208,7 +208,7 @@ impl GroupedHashAggregateStream2 {
 fn create_accumulators(
     aggregate_exprs: Vec<Arc<dyn AggregateExpr>>,
 ) -> Result<Vec<Box<dyn GroupsAccumulator>>> {
-    info!("Creating accumulator for {aggregate_exprs:#?}");
+    debug!("Creating accumulator for {aggregate_exprs:#?}");
     aggregate_exprs
         .into_iter()
         .map(|agg_expr| agg_expr.create_groups_accumulator())
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index b23b555805e3..7043ed9ce18f 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -483,7 +483,8 @@ where
             None => {
                 let iter = group_indicies.iter().zip(data.iter());
                 for (group_index, new_value) in iter {
-                    self.sums[*group_index].add_wrapping(*new_value);
+                    let sum = &mut self.sums[*group_index];
+                    *sum = sum.add_wrapping(*new_value);
                 }
             }
             //
@@ -504,7 +505,8 @@ where
                         group_index_chunk.iter().zip(data_chunk.iter()).for_each(
                             |(group_index, new_value)| {
                                 if (mask & index_mask) != 0 {
-                                    self.sums[*group_index].add_wrapping(*new_value);
+                                    let sum = &mut self.sums[*group_index];
+                                    *sum = sum.add_wrapping(*new_value);
                                 }
                                 index_mask <<= 1;
                             },
@@ -518,7 +520,8 @@ where
                     .enumerate()
                     .for_each(|(i, (group_index, new_value))| {
                         if remainder_bits & (1 << i) != 0 {
-                            self.sums[*group_index].add_wrapping(*new_value);
+                            let sum = &mut self.sums[*group_index];
+                            *sum = sum.add_wrapping(*new_value);
                         }
                     });
             }
@@ -550,6 +553,7 @@ where
 
         // update values
         self.update_sums(values, group_indicies, opt_filter, total_num_groups)?;
+
         Ok(())
     }
 

From a58b006e9285867d782fd6df79686143fb6c57f1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 04:59:30 -0400
Subject: [PATCH 04/89] Add comments

---
 .../src/physical_plan/aggregates/row_hash2.rs | 136 +++++++++++++-----
 1 file changed, 103 insertions(+), 33 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 2eb058d8c519..3e9dbfe0cfb9 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -58,66 +58,136 @@ pub(crate) enum ExecutionState {
 
 use super::AggregateExec;
 
-/// Grouping aggregate
+/// Hash based Grouping Aggregator
 ///
-/// For each aggregation entry, we use:
-/// - [Arrow-row] represents grouping keys for fast hash computation and comparison directly on raw bytes.
-/// - [GroupsAccumulator] to store per group aggregates
+/// # Design Goals
 ///
-/// The architecture is the following:
+/// This structure is designed so that much can be vectorized (done in
+/// a tight loop) as possible
 ///
-/// TODO
+/// # Architecture
 ///
-/// [WordAligned]: datafusion_row::layout
+/// ```text
+///
+/// stores "group       stores group values,       internally stores aggregate
+///    indexes"          in arrow_row format         values, for all groups
+///
+/// ┌─────────────┐      ┌────────────┐    ┌──────────────┐       ┌──────────────┐
+/// │   ┌─────┐   │      │ ┌────────┐ │    │┌────────────┐│       │┌────────────┐│
+/// │   │  5  │   │ ┌────┼▶│  "A"   │ │    ││accumulator ││       ││accumulator ││
+/// │   ├─────┤   │ │    │ ├────────┤ │    ││     0      ││       ││     N      ││
+/// │   │  9  │   │ │    │ │  "Z"   │ │    ││ ┌────────┐ ││       ││ ┌────────┐ ││
+/// │   └─────┘   │ │    │ └────────┘ │    ││ │ state  │ ││       ││ │ state  │ ││
+/// │     ...     │ │    │            │    ││ │┌─────┐ │ ││  ...  ││ │┌─────┐ │ ││
+/// │   ┌─────┐   │ │    │    ...     │    ││ │├─────┤ │ ││       ││ │├─────┤ │ ││
+/// │   │  1  │───┼─┘    │            │    ││ │└─────┘ │ ││       ││ │└─────┘ │ ││
+/// │   ├─────┤   │      │            │    ││ │        │ ││       ││ │        │ ││
+/// │   │ 13  │───┼─┐    │ ┌────────┐ │    ││ │  ...   │ ││       ││ │  ...   │ ││
+/// │   └─────┘   │ └────┼▶│  "Q"   │ │    ││ │        │ ││       ││ │        │ ││
+/// └─────────────┘      │ └────────┘ │    ││ │┌─────┐ │ ││       ││ │┌─────┐ │ ││
+///                      │            │    ││ │└─────┘ │ ││       ││ │└─────┘ │ ││
+///                      └────────────┘    ││ └────────┘ ││       ││ └────────┘ ││
+///                                        │└────────────┘│       │└────────────┘│
+///                                        └──────────────┘       └──────────────┘
+///
+///       map            group_values                   accumulators
+///  (Hash Table)
+///
+///  ```
+///
+/// For example, given a query like `COUNT(x), SUM(y) ... GROUP BY z`,
+/// `group_values` will store the distinct values of `z`. There will
+/// be one accumulator for `COUNT(x)`, specialized for the data type
+/// of `x` and one accumulator for `SUM(y)`, specialized for the data
+/// type of `y`.
+///
+/// # Description
+///
+/// The hash table stores "group indices", one for each (distinct)
+/// group value.
+///
+/// The group values are stored in [`Self::group_values`] at the
+/// corresponding group index.
+///
+/// The accumulator state (e.g partial sums) is managed by and stored
+/// by a [`GroupsAccumulator`] accumulator. There is one accumulator
+/// per aggregate expression (COUNT, AVG, etc) in the
+/// query. Internally, each `GroupsAccumulator` manages the state for
+/// multiple groups, and is passed `group_indexes` during update. Note
+/// The accumulator state is not managed by this operator (e.g in the
+/// hash table).
 pub(crate) struct GroupedHashAggregateStream2 {
     schema: SchemaRef,
     input: SendableRecordBatchStream,
     mode: AggregateMode,
 
     /// Accumulators, one for each `AggregateExpr` in the query
+    ///
+    /// For example, if the query has aggregates, `SUM(x)`,
+    /// `COUNT(y)`, there will be two accumulators, each one
+    /// specialized for that partcular aggregate and its input types
     accumulators: Vec<Box<dyn GroupsAccumulator>>,
-    /// Arguments expressionf or each accumulator
+
+    /// Arguments or each accumulator.
     aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
-    /// Filter expression to evaluate for each aggregate
+
+    /// Optional filter expression to evaluate, one for each for
+    /// aggregate. If present, only those rows for which the filter
+    /// evaluate to true should be included in the aggregate results.
+    ///
+    /// For example, for an aggregate like `SUM(x FILTER x > 100)`,
+    /// the filter expression is  `x > 100`.
     filter_expressions: Vec<Option<Arc<dyn PhysicalExpr>>>,
 
     /// Converter for each row
     row_converter: RowConverter,
+
+    /// GROUP BY expressions
     group_by: PhysicalGroupBy,
 
     /// The memory reservation for this grouping
     reservation: MemoryReservation,
 
-    /// Logically maps group values to a group_index `group_states`
+    /// Logically maps group values to a group_index in
+    /// [`Self::group_values`] and in each accumulator
     ///
     /// Uses the raw API of hashbrown to avoid actually storing the
-    /// keys in the table
+    /// keys (group values) in the table
     ///
     /// keys: u64 hashes of the GroupValue
-    /// values: (hash, index into `group_states`)
+    /// values: (hash, group_index)
     map: RawTable<(u64, usize)>,
 
-    /// The actual group by values, stored in arrow Row format
-    /// the index of group_by_values is the index
-    /// https://github.com/apache/arrow-rs/issues/4466
-    group_by_values: Vec<OwnedRow>,
+    /// The actual group by values, stored in arrow [`Row`] format. The
+    /// group_values[i] holds the group value for group_index `i`.
+    ///
+    /// The row format is used to compare group keys quickly. This is
+    /// especially important for multi-column group keys.
+    ///
+    /// TODO, make this Rows (rather than Vec<OwnedRow> to reduce
+    /// allocations once
+    /// https://github.com/apache/arrow-rs/issues/4466 is available
+    group_values: Vec<OwnedRow>,
 
-    /// scratch space for the current Batch / Aggregate being
-    /// processed. Saved here to avoid reallocations
+    /// scratch space for the current input Batch being
+    /// processed. Reused across batches here to avoid reallocations
     current_group_indices: Vec<usize>,
 
-    /// generating input/output?
+    /// Tracks if this stream is generating input/output?
     exec_state: ExecutionState,
 
+    /// Execution metrics
     baseline_metrics: BaselineMetrics,
 
+    /// Random state for creating hashes
     random_state: RandomState,
-    /// size to be used for resulting RecordBatches
+
+    /// max rows in output RecordBatches
     batch_size: usize,
 }
 
 impl GroupedHashAggregateStream2 {
-    /// Create a new GroupedHashAggregateStream
+    /// Create a new GroupedHashAggregateStream2
     pub fn new(
         agg: &AggregateExec,
         context: Arc<TaskContext>,
@@ -137,15 +207,14 @@ impl GroupedHashAggregateStream2 {
         let mut aggregate_exprs = vec![];
         let mut aggregate_arguments = vec![];
 
-        // The expressions to evaluate the batch, one vec of expressions per aggregation.
-        // Assuming create_schema() always puts group columns in front of aggregation columns, we set
-        // col_idx_base to the group expression count.
-
+        // The arguments for each aggregate, one vec of expressions
+        // per aggregation.
         let all_aggregate_expressions = aggregates::aggregate_expressions(
             &agg.aggr_expr,
             &agg.mode,
             agg_group_by.expr.len(),
         )?;
+
         let filter_expressions = match agg.mode {
             AggregateMode::Partial | AggregateMode::Single => agg_filter_expr,
             AggregateMode::Final | AggregateMode::FinalPartitioned => {
@@ -194,7 +263,7 @@ impl GroupedHashAggregateStream2 {
             group_by: agg_group_by,
             reservation,
             map,
-            group_by_values,
+            group_values: group_by_values,
             current_group_indices,
             exec_state,
             baseline_metrics,
@@ -204,7 +273,8 @@ impl GroupedHashAggregateStream2 {
     }
 }
 
-/// Crate a `GroupsAccumulator` for each of the aggregate_exprs to hold the aggregation state
+/// Crate a [`GroupsAccumulator`] for each of the aggregate_exprs to
+/// hold the aggregation state
 fn create_accumulators(
     aggregate_exprs: Vec<Arc<dyn AggregateExpr>>,
 ) -> Result<Vec<Box<dyn GroupsAccumulator>>> {
@@ -326,7 +396,7 @@ impl GroupedHashAggregateStream2 {
                 // TODO update *allocated based on size of the row
                 // that was just pushed into
                 // aggr_state.group_by_values
-                group_rows.row(row) == self.group_by_values[*group_idx].row()
+                group_rows.row(row) == self.group_values[*group_idx].row()
             });
 
             let group_idx = match entry {
@@ -335,8 +405,8 @@ impl GroupedHashAggregateStream2 {
                 //  1.2 Need to create new entry for the group
                 None => {
                     // Add new entry to aggr_state and save newly created index
-                    let group_idx = self.group_by_values.len();
-                    self.group_by_values.push(group_rows.row(row).owned());
+                    let group_idx = self.group_values.len();
+                    self.group_values.push(group_rows.row(row).owned());
 
                     // for hasher function, use precomputed hash value
                     self.map.insert_accounted(
@@ -382,7 +452,7 @@ impl GroupedHashAggregateStream2 {
                 .zip(input_values.iter())
                 .zip(filter_values.iter());
 
-            let total_num_groups = self.group_by_values.len();
+            let total_num_groups = self.group_values.len();
 
             for ((acc, values), opt_filter) in t {
                 let acc_size_pre = acc.size();
@@ -424,13 +494,13 @@ impl GroupedHashAggregateStream2 {
 impl GroupedHashAggregateStream2 {
     /// Create an output RecordBatch with all group keys and accumulator states/values
     fn create_batch_from_map(&mut self) -> Result<RecordBatch> {
-        if self.group_by_values.is_empty() {
+        if self.group_values.is_empty() {
             let schema = self.schema.clone();
             return Ok(RecordBatch::new_empty(schema));
         }
 
         // First output rows are the groups
-        let groups_rows = self.group_by_values.iter().map(|owned_row| owned_row.row());
+        let groups_rows = self.group_values.iter().map(|owned_row| owned_row.row());
 
         let mut output: Vec<ArrayRef> = self.row_converter.convert_rows(groups_rows)?;
 

From 73cb33f02af236946b544cfc7352497c16cb57c0 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 05:07:56 -0400
Subject: [PATCH 05/89] Update comments and simplify code

---
 .../src/physical_plan/aggregates/row_hash2.rs | 54 ++++++++-----------
 1 file changed, 21 insertions(+), 33 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 3e9dbfe0cfb9..792fbb4032bf 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -36,7 +36,7 @@ use crate::physical_plan::aggregates::{
     PhysicalGroupBy,
 };
 use crate::physical_plan::metrics::{BaselineMetrics, RecordOutput};
-use crate::physical_plan::{aggregates, AggregateExpr, PhysicalExpr};
+use crate::physical_plan::{aggregates, PhysicalExpr};
 use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
@@ -204,12 +204,11 @@ impl GroupedHashAggregateStream2 {
 
         let timer = baseline_metrics.elapsed_compute().timer();
 
-        let mut aggregate_exprs = vec![];
-        let mut aggregate_arguments = vec![];
+        let aggregate_exprs = agg.aggr_expr.clone();
 
-        // The arguments for each aggregate, one vec of expressions
-        // per aggregation.
-        let all_aggregate_expressions = aggregates::aggregate_expressions(
+        // arguments for each aggregate, one vec of expressions per
+        // aggregate
+        let aggregate_arguments = aggregates::aggregate_expressions(
             &agg.aggr_expr,
             &agg.mode,
             agg_group_by.expr.len(),
@@ -222,16 +221,11 @@ impl GroupedHashAggregateStream2 {
             }
         };
 
-        for (agg_expr, agg_args) in agg
-            .aggr_expr
+        // Instantiate the accumulators
+        let accumulators: Vec<_> = aggregate_exprs
             .iter()
-            .zip(all_aggregate_expressions.into_iter())
-        {
-            aggregate_exprs.push(agg_expr.clone());
-            aggregate_arguments.push(agg_args);
-        }
-
-        let accumulators = create_accumulators(aggregate_exprs)?;
+            .map(|agg_expr| agg_expr.create_groups_accumulator())
+            .collect::<Result<_>>()?;
 
         let group_schema = group_schema(&agg_schema, agg_group_by.expr.len());
         let row_converter = RowConverter::new(
@@ -273,18 +267,6 @@ impl GroupedHashAggregateStream2 {
     }
 }
 
-/// Crate a [`GroupsAccumulator`] for each of the aggregate_exprs to
-/// hold the aggregation state
-fn create_accumulators(
-    aggregate_exprs: Vec<Arc<dyn AggregateExpr>>,
-) -> Result<Vec<Box<dyn GroupsAccumulator>>> {
-    debug!("Creating accumulator for {aggregate_exprs:#?}");
-    aggregate_exprs
-        .into_iter()
-        .map(|agg_expr| agg_expr.create_groups_accumulator())
-        .collect()
-}
-
 impl Stream for GroupedHashAggregateStream2 {
     type Item = Result<RecordBatch>;
 
@@ -363,11 +345,13 @@ impl RecordBatchStream for GroupedHashAggregateStream2 {
 }
 
 impl GroupedHashAggregateStream2 {
-    /// Update self.aggr_state based on the group_by values (result of evalauting the group_by_expressions)
+    /// Calculates the group indicies for each input row of
+    /// `group_values`.
     ///
     /// At the return of this function,
-    /// `self.aggr_state.current_group_indices` has the correct
-    /// group_index for each row in the group_values
+    /// [`Self::current_group_indicies`] has the same number of
+    /// entries as each array in `group_values` and holds the correct
+    /// group_index for that row.
     fn update_group_state(
         &mut self,
         group_values: &[ArrayRef],
@@ -376,6 +360,7 @@ impl GroupedHashAggregateStream2 {
         // Convert the group keys into the row format
         let group_rows = self.row_converter.convert_columns(group_values)?;
         let n_rows = group_rows.num_rows();
+
         // 1.1 construct the key from the group values
         // 1.2 construct the mapping key if it does not exist
 
@@ -426,9 +411,8 @@ impl GroupedHashAggregateStream2 {
     ///
     /// If successful, returns the additional amount of memory, in
     /// bytes, that were allocated during this process.
-    ///
     fn group_aggregate_batch(&mut self, batch: RecordBatch) -> Result<usize> {
-        // Evaluate the grouping expressions:
+        // Evaluate the grouping expressions
         let group_by_values = evaluate_group_by(&self.group_by, &batch)?;
 
         // Keep track of memory allocated:
@@ -436,10 +420,12 @@ impl GroupedHashAggregateStream2 {
 
         // Evaluate the aggregation expressions.
         let input_values = evaluate_many(&self.aggregate_arguments, &batch)?;
-        // Evalaute the filter expressions, if any, against the inputs
+
+        // Evalute the filter expressions, if any, against the inputs
         let filter_values = evaluate_optional(&self.filter_expressions, &batch)?;
 
         let row_converter_size_pre = self.row_converter.size();
+
         for group_values in &group_by_values {
             // calculate the group indicies for each input row
             self.update_group_state(group_values, &mut allocated)?;
@@ -458,6 +444,8 @@ impl GroupedHashAggregateStream2 {
                 let acc_size_pre = acc.size();
                 let opt_filter = opt_filter.as_ref().map(|filter| filter.as_boolean());
 
+                // Call the appropriate method on each aggregator with
+                // the entire input row and the relevant group indexes
                 match self.mode {
                     AggregateMode::Partial | AggregateMode::Single => {
                         acc.update_batch(

From 0b5d74fb1a32f484c2245c2bbca693b34b2fd379 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 05:24:40 -0400
Subject: [PATCH 06/89] factor out accumulate

---
 .../physical-expr/src/aggregate/average.rs    | 93 ++++++++++++++++++-
 1 file changed, 90 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 7043ed9ce18f..0dcff7ec9bd9 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -417,8 +417,95 @@ impl RowAccumulator for AvgRowAccumulator {
     }
 }
 
+/// This function is called once per row to update the accumulator,
+/// for a `PrimitiveArray<T>` and is the inner loop for many
+/// GroupsAccumulators and thus performance critical.
+///
+/// * `values`: the input arguments to the accumulator
+/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+///
+/// `F`: The function to invoke for a non null input row to update the
+/// accumulator state. Called like `value_fn(group_index, value)
+///
+/// `FN`: The function to call for each null input row.  Called like
+/// `null_fn(group_index)
+fn accumulate_all<T, F, FN>(
+    values: &PrimitiveArray<T>,
+    group_indicies: &[usize],
+    opt_filter: Option<&arrow_array::BooleanArray>,
+    value_fn: F,
+    null_fn: FN,
+) where
+    T: ArrowNumericType + Send,
+    F: Fn(usize, T::Native) + Send,
+    FN: Fn(usize) + Send,
+{
+    // AAL TODO handle filter values
+    // TODO combine the null mask from values and opt_filter
+    let valids = values.nulls();
+
+    // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+    let data: &[T::Native] = values.values();
+
+    match valids {
+        // no nulls
+        None => {
+            let iter = group_indicies.iter().zip(data.iter());
+            for (&group_index, &new_value) in iter {
+                value_fn(group_index, new_value)
+            }
+        }
+        // there are nulls, so handle them specially
+        Some(valids) => {
+            let group_indices_chunks = group_indicies.chunks_exact(64);
+            let data_chunks = data.chunks_exact(64);
+            let bit_chunks = valids.inner().bit_chunks();
+
+            let group_indices_remainder = group_indices_chunks.remainder();
+            let data_remainder = data_chunks.remainder();
+
+            group_indices_chunks
+                .zip(data_chunks)
+                .zip(bit_chunks.iter())
+                .for_each(|((group_index_chunk, data_chunk), mask)| {
+                    // index_mask has value 1 << i in the loop
+                    let mut index_mask = 1;
+                    group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                        |(&group_index, &new_value)| {
+                            // valid bit was set, real vale
+                            if (mask & index_mask) != 0 {
+                                value_fn(group_index, new_value);
+                            } else {
+                                null_fn(group_index)
+                            }
+                            index_mask <<= 1;
+                        },
+                    )
+                });
+
+            // handle any remaining bits (after the intial 64)
+            let remainder_bits = bit_chunks.remainder_bits();
+            group_indices_remainder
+                .iter()
+                .zip(data_remainder.iter())
+                .enumerate()
+                .for_each(|(i, (&group_index, &new_value))| {
+                    if remainder_bits & (1 << i) != 0 {
+                        value_fn(group_index, new_value)
+                    } else {
+                        null_fn(group_index)
+                    }
+                });
+        }
+    }
+}
+
 /// An accumulator to compute the average of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
+///
+/// F: Function that calcuates the average value from a sum of
+/// T::Native and a total count
 #[derive(Debug)]
 struct AvgGroupsAccumulator<T, F>
 where
@@ -597,7 +684,7 @@ where
         let array = PrimitiveArray::<T>::from_iter_values(averages);
 
         // fix up decimal precision and scale for decimals
-        let array = set_decimal_precision(&self.return_data_type, Arc::new(array))?;
+        let array = adjust_output_array(&self.return_data_type, Arc::new(array))?;
 
         Ok(array)
     }
@@ -614,7 +701,7 @@ where
         let sums: PrimitiveArray<T> = PrimitiveArray::from_iter_values(sums);
 
         // fix up decimal precision and scale for decimals
-        let sums = set_decimal_precision(&self.sum_data_type, Arc::new(sums))?;
+        let sums = adjust_output_array(&self.sum_data_type, Arc::new(sums))?;
 
         Ok(vec![
             Arc::new(counts) as ArrayRef,
@@ -631,7 +718,7 @@ where
 ///
 /// Decimal128Arrays are are are created from Vec<NativeType> with default
 /// precision and scale. This function adjusts them down.
-fn set_decimal_precision(sum_data_type: &DataType, array: ArrayRef) -> Result<ArrayRef> {
+fn adjust_output_array(sum_data_type: &DataType, array: ArrayRef) -> Result<ArrayRef> {
     let array = match sum_data_type {
         DataType::Decimal128(p, s) => Arc::new(
             array

From c30874db2ee07bc2bd7f7014def0fb832b927d6b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 05:46:52 -0400
Subject: [PATCH 07/89] split nullable/non nullable handling

---
 .../physical-expr/src/aggregate/average.rs    | 139 ++++++++++--------
 1 file changed, 79 insertions(+), 60 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 0dcff7ec9bd9..20ccadd7e855 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -417,9 +417,13 @@ impl RowAccumulator for AvgRowAccumulator {
     }
 }
 
-/// This function is called once per row to update the accumulator,
-/// for a `PrimitiveArray<T>` and is the inner loop for many
-/// GroupsAccumulators and thus performance critical.
+/// This function is called to update the accumulator state per row,
+/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
+/// many GroupsAccumulators and thus performance critical.
+///
+/// I couldn't find any way to combine this with
+/// accumulate_all_nullable without having to pass in a is_null on
+/// every row.
 ///
 /// * `values`: the input arguments to the accumulator
 /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
@@ -427,80 +431,95 @@ impl RowAccumulator for AvgRowAccumulator {
 ///
 /// `F`: The function to invoke for a non null input row to update the
 /// accumulator state. Called like `value_fn(group_index, value)
-///
-/// `FN`: The function to call for each null input row.  Called like
-/// `null_fn(group_index)
 fn accumulate_all<T, F, FN>(
     values: &PrimitiveArray<T>,
     group_indicies: &[usize],
     opt_filter: Option<&arrow_array::BooleanArray>,
     value_fn: F,
-    null_fn: FN,
 ) where
     T: ArrowNumericType + Send,
     F: Fn(usize, T::Native) + Send,
-    FN: Fn(usize) + Send,
 {
+    assert_eq!(
+        values.null_count(), 0,
+        "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
+    );
+
     // AAL TODO handle filter values
+
+    let data: &[T::Native] = values.values();
+    let iter = group_indicies.iter().zip(data.iter());
+    for (&group_index, &new_value) in iter {
+        value_fn(group_index, new_value)
+    }
+}
+
+
+/// This function is called to update the accumulator state per row,
+/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
+/// many GroupsAccumulators and thus performance critical.
+///
+/// * `values`: the input arguments to the accumulator
+/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+///
+/// `F`: The function to invoke for an input row to update the
+/// accumulator state. Called like `value_fn(group_index, value,
+/// is_valid). NOTE the parameter is true when the value is VALID.
+fn accumulate_all_nullable<T, F, FN>(
+    values: &PrimitiveArray<T>,
+    group_indicies: &[usize],
+    opt_filter: Option<&arrow_array::BooleanArray>,
+    value_fn: F,
+) where
+    T: ArrowNumericType + Send,
+    F: Fn(usize, T::Native, bool) + Send,
+{
+     // AAL TODO handle filter values
     // TODO combine the null mask from values and opt_filter
-    let valids = values.nulls();
+    let valids = values
+        .nulls()
+        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
 
     // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
     let data: &[T::Native] = values.values();
 
-    match valids {
-        // no nulls
-        None => {
-            let iter = group_indicies.iter().zip(data.iter());
-            for (&group_index, &new_value) in iter {
-                value_fn(group_index, new_value)
-            }
-        }
-        // there are nulls, so handle them specially
-        Some(valids) => {
-            let group_indices_chunks = group_indicies.chunks_exact(64);
-            let data_chunks = data.chunks_exact(64);
-            let bit_chunks = valids.inner().bit_chunks();
-
-            let group_indices_remainder = group_indices_chunks.remainder();
-            let data_remainder = data_chunks.remainder();
-
-            group_indices_chunks
-                .zip(data_chunks)
-                .zip(bit_chunks.iter())
-                .for_each(|((group_index_chunk, data_chunk), mask)| {
-                    // index_mask has value 1 << i in the loop
-                    let mut index_mask = 1;
-                    group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                        |(&group_index, &new_value)| {
-                            // valid bit was set, real vale
-                            if (mask & index_mask) != 0 {
-                                value_fn(group_index, new_value);
-                            } else {
-                                null_fn(group_index)
-                            }
-                            index_mask <<= 1;
-                        },
-                    )
-                });
-
-            // handle any remaining bits (after the intial 64)
-            let remainder_bits = bit_chunks.remainder_bits();
-            group_indices_remainder
-                .iter()
-                .zip(data_remainder.iter())
-                .enumerate()
-                .for_each(|(i, (&group_index, &new_value))| {
-                    if remainder_bits & (1 << i) != 0 {
-                        value_fn(group_index, new_value)
-                    } else {
-                        null_fn(group_index)
-                    }
-                });
-        }
-    }
+    let group_indices_chunks = group_indicies.chunks_exact(64);
+    let data_chunks = data.chunks_exact(64);
+    let bit_chunks = valids.inner().bit_chunks();
+
+    let group_indices_remainder = group_indices_chunks.remainder();
+    let data_remainder = data_chunks.remainder();
+
+    group_indices_chunks
+        .zip(data_chunks)
+        .zip(bit_chunks.iter())
+        .for_each(|((group_index_chunk, data_chunk), mask)| {
+            // index_mask has value 1 << i in the loop
+            let mut index_mask = 1;
+            group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                |(&group_index, &new_value)| {
+                    // valid bit was set, real vale
+                    let is_valid = (mask & index_mask) != 0;
+                    value_fn(group_index, new_value, is_valid);
+                    index_mask <<= 1;
+                },
+            )
+        });
+
+    // handle any remaining bits (after the intial 64)
+    let remainder_bits = bit_chunks.remainder_bits();
+    group_indices_remainder
+        .iter()
+        .zip(data_remainder.iter())
+        .enumerate()
+        .for_each(|(i, (&group_index, &new_value))| {
+            let is_valid = remainder_bits & (1 << i) != 0;
+            value_fn(group_index, new_value, is_valid)
+        });
 }
 
+
 /// An accumulator to compute the average of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
 ///

From 2370220541527663ff88e66c90395df5efe0a5d3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 06:07:44 -0400
Subject: [PATCH 08/89] Refactor out accumulation in average

---
 .../physical-expr/src/aggregate/average.rs    | 189 ++++++++++--------
 1 file changed, 105 insertions(+), 84 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 20ccadd7e855..2d9a627a5fde 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -431,14 +431,14 @@ impl RowAccumulator for AvgRowAccumulator {
 ///
 /// `F`: The function to invoke for a non null input row to update the
 /// accumulator state. Called like `value_fn(group_index, value)
-fn accumulate_all<T, F, FN>(
+fn accumulate_all<T, F>(
     values: &PrimitiveArray<T>,
     group_indicies: &[usize],
     opt_filter: Option<&arrow_array::BooleanArray>,
-    value_fn: F,
+    mut value_fn: F,
 ) where
     T: ArrowNumericType + Send,
-    F: Fn(usize, T::Native) + Send,
+    F: FnMut(usize, T::Native) + Send,
 {
     assert_eq!(
         values.null_count(), 0,
@@ -454,7 +454,6 @@ fn accumulate_all<T, F, FN>(
     }
 }
 
-
 /// This function is called to update the accumulator state per row,
 /// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
 /// many GroupsAccumulators and thus performance critical.
@@ -466,16 +465,16 @@ fn accumulate_all<T, F, FN>(
 /// `F`: The function to invoke for an input row to update the
 /// accumulator state. Called like `value_fn(group_index, value,
 /// is_valid). NOTE the parameter is true when the value is VALID.
-fn accumulate_all_nullable<T, F, FN>(
+fn accumulate_all_nullable<T, F>(
     values: &PrimitiveArray<T>,
     group_indicies: &[usize],
     opt_filter: Option<&arrow_array::BooleanArray>,
-    value_fn: F,
+    mut value_fn: F,
 ) where
     T: ArrowNumericType + Send,
-    F: Fn(usize, T::Native, bool) + Send,
+    F: FnMut(usize, T::Native, bool) + Send,
 {
-     // AAL TODO handle filter values
+    // AAL TODO handle filter values
     // TODO combine the null mask from values and opt_filter
     let valids = values
         .nulls()
@@ -519,7 +518,6 @@ fn accumulate_all_nullable<T, F, FN>(
         });
 }
 
-
 /// An accumulator to compute the average of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
 ///
@@ -566,6 +564,72 @@ where
         }
     }
 
+    /// Adds one to each group's counter
+    fn increment_counts(
+        &mut self,
+        values: &PrimitiveArray<T>,
+        group_indicies: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) {
+        self.counts.resize(total_num_groups, 0);
+
+        if values.null_count() == 0 {
+            accumulate_all(
+                values,
+                group_indicies,
+                opt_filter,
+                |group_index, _new_value| {
+                    self.counts[group_index] += 1;
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                values,
+                group_indicies,
+                opt_filter,
+                |group_index, _new_value, is_valid| {
+                    if is_valid {
+                        self.counts[group_index] += 1;
+                    }
+                },
+            )
+        }
+    }
+
+    /// Adds the counts with the partial counts
+    fn update_counts_with_partial_counts(
+        &mut self,
+        partial_counts: &UInt64Array,
+        group_indicies: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) {
+        self.counts.resize(total_num_groups, 0);
+
+        if partial_counts.null_count() == 0 {
+            accumulate_all(
+                partial_counts,
+                group_indicies,
+                opt_filter,
+                |group_index, partial_count| {
+                    self.counts[group_index] += partial_count;
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                partial_counts,
+                group_indicies,
+                opt_filter,
+                |group_index, partial_count, is_valid| {
+                    if is_valid {
+                        self.counts[group_index] += partial_count;
+                    }
+                },
+            )
+        }
+    }
+
     /// Adds the values in `values` to self.sums
     fn update_sums(
         &mut self,
@@ -573,66 +637,33 @@ where
         group_indicies: &[usize],
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
-    ) -> Result<()> {
+    ) {
         self.sums
             .resize_with(total_num_groups, || T::default_value());
 
-        // AAL TODO
-        // TODO combine the null mask from values and opt_filter
-        let valids = values.nulls();
-
-        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
-        let data: &[T::Native] = values.values();
-
-        match valids {
-            // use all values in group_index
-            None => {
-                let iter = group_indicies.iter().zip(data.iter());
-                for (group_index, new_value) in iter {
-                    let sum = &mut self.sums[*group_index];
-                    *sum = sum.add_wrapping(*new_value);
-                }
-            }
-            //
-            Some(valids) => {
-                let group_indices_chunks = group_indicies.chunks_exact(64);
-                let data_chunks = data.chunks_exact(64);
-                let bit_chunks = valids.inner().bit_chunks();
-
-                let group_indices_remainder = group_indices_chunks.remainder();
-                let data_remainder = data_chunks.remainder();
-
-                group_indices_chunks
-                    .zip(data_chunks)
-                    .zip(bit_chunks.iter())
-                    .for_each(|((group_index_chunk, data_chunk), mask)| {
-                        // index_mask has value 1 << i in the loop
-                        let mut index_mask = 1;
-                        group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                            |(group_index, new_value)| {
-                                if (mask & index_mask) != 0 {
-                                    let sum = &mut self.sums[*group_index];
-                                    *sum = sum.add_wrapping(*new_value);
-                                }
-                                index_mask <<= 1;
-                            },
-                        )
-                    });
-
-                let remainder_bits = bit_chunks.remainder_bits();
-                group_indices_remainder
-                    .iter()
-                    .zip(data_remainder.iter())
-                    .enumerate()
-                    .for_each(|(i, (group_index, new_value))| {
-                        if remainder_bits & (1 << i) != 0 {
-                            let sum = &mut self.sums[*group_index];
-                            *sum = sum.add_wrapping(*new_value);
-                        }
-                    });
-            }
+        if values.null_count() == 0 {
+            accumulate_all(
+                values,
+                group_indicies,
+                opt_filter,
+                |group_index, new_value| {
+                    let sum = &mut self.sums[group_index];
+                    *sum = sum.add_wrapping(new_value);
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                values,
+                group_indicies,
+                opt_filter,
+                |group_index, new_value, is_valid| {
+                    if is_valid {
+                        let sum = &mut self.sums[group_index];
+                        *sum = sum.add_wrapping(new_value);
+                    }
+                },
+            )
         }
-        Ok(())
     }
 }
 
@@ -651,14 +682,8 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        // update counts (TOD account for opt_filter)
-        self.counts.resize(total_num_groups, 0);
-        group_indicies.iter().for_each(|&group_idx| {
-            self.counts[group_idx] += 1;
-        });
-
-        // update values
-        self.update_sums(values, group_indicies, opt_filter, total_num_groups)?;
+        self.increment_counts(values, group_indicies, opt_filter, total_num_groups);
+        self.update_sums(values, group_indicies, opt_filter, total_num_groups);
 
         Ok(())
     }
@@ -672,19 +697,15 @@ where
     ) -> Result<()> {
         assert_eq!(values.len(), 2, "two arguments to merge_batch");
         // first batch is counts, second is partial sums
-        let counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
+        let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
         let partial_sums = values.get(1).unwrap().as_primitive::<T>();
-
-        // update counts by summing the partial sums (TODO account for opt_filter)
-        self.counts.resize(total_num_groups, 0);
-        group_indicies.iter().zip(counts.values().iter()).for_each(
-            |(&group_idx, &count)| {
-                self.counts[group_idx] += count;
-            },
+        self.update_counts_with_partial_counts(
+            partial_counts,
+            group_indicies,
+            opt_filter,
+            total_num_groups,
         );
-
-        // update values
-        self.update_sums(partial_sums, group_indicies, opt_filter, total_num_groups)?;
+        self.update_sums(partial_sums, group_indicies, opt_filter, total_num_groups);
 
         Ok(())
     }

From 26570f9554e26e11a6313adf59e713544688748d Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 06:27:34 -0400
Subject: [PATCH 09/89] Move accumulator to their own function

---
 .../physical-expr/src/aggregate/average.rs    | 102 +--------------
 .../groups_accumulator/accumulate.rs          | 121 ++++++++++++++++++
 .../mod.rs}                                   |   2 +
 3 files changed, 124 insertions(+), 101 deletions(-)
 create mode 100644 datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
 rename datafusion/physical-expr/src/aggregate/{groups_accumulator.rs => groups_accumulator/mod.rs} (99%)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 2d9a627a5fde..3f3c7820be12 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -45,6 +45,7 @@ use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
 
+use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
 use super::utils::Decimal128Averager;
 
 /// AVG aggregate expression
@@ -417,107 +418,6 @@ impl RowAccumulator for AvgRowAccumulator {
     }
 }
 
-/// This function is called to update the accumulator state per row,
-/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
-/// many GroupsAccumulators and thus performance critical.
-///
-/// I couldn't find any way to combine this with
-/// accumulate_all_nullable without having to pass in a is_null on
-/// every row.
-///
-/// * `values`: the input arguments to the accumulator
-/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
-///
-/// `F`: The function to invoke for a non null input row to update the
-/// accumulator state. Called like `value_fn(group_index, value)
-fn accumulate_all<T, F>(
-    values: &PrimitiveArray<T>,
-    group_indicies: &[usize],
-    opt_filter: Option<&arrow_array::BooleanArray>,
-    mut value_fn: F,
-) where
-    T: ArrowNumericType + Send,
-    F: FnMut(usize, T::Native) + Send,
-{
-    assert_eq!(
-        values.null_count(), 0,
-        "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
-    );
-
-    // AAL TODO handle filter values
-
-    let data: &[T::Native] = values.values();
-    let iter = group_indicies.iter().zip(data.iter());
-    for (&group_index, &new_value) in iter {
-        value_fn(group_index, new_value)
-    }
-}
-
-/// This function is called to update the accumulator state per row,
-/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
-/// many GroupsAccumulators and thus performance critical.
-///
-/// * `values`: the input arguments to the accumulator
-/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
-///
-/// `F`: The function to invoke for an input row to update the
-/// accumulator state. Called like `value_fn(group_index, value,
-/// is_valid). NOTE the parameter is true when the value is VALID.
-fn accumulate_all_nullable<T, F>(
-    values: &PrimitiveArray<T>,
-    group_indicies: &[usize],
-    opt_filter: Option<&arrow_array::BooleanArray>,
-    mut value_fn: F,
-) where
-    T: ArrowNumericType + Send,
-    F: FnMut(usize, T::Native, bool) + Send,
-{
-    // AAL TODO handle filter values
-    // TODO combine the null mask from values and opt_filter
-    let valids = values
-        .nulls()
-        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
-
-    // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
-    let data: &[T::Native] = values.values();
-
-    let group_indices_chunks = group_indicies.chunks_exact(64);
-    let data_chunks = data.chunks_exact(64);
-    let bit_chunks = valids.inner().bit_chunks();
-
-    let group_indices_remainder = group_indices_chunks.remainder();
-    let data_remainder = data_chunks.remainder();
-
-    group_indices_chunks
-        .zip(data_chunks)
-        .zip(bit_chunks.iter())
-        .for_each(|((group_index_chunk, data_chunk), mask)| {
-            // index_mask has value 1 << i in the loop
-            let mut index_mask = 1;
-            group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                |(&group_index, &new_value)| {
-                    // valid bit was set, real vale
-                    let is_valid = (mask & index_mask) != 0;
-                    value_fn(group_index, new_value, is_valid);
-                    index_mask <<= 1;
-                },
-            )
-        });
-
-    // handle any remaining bits (after the intial 64)
-    let remainder_bits = bit_chunks.remainder_bits();
-    group_indices_remainder
-        .iter()
-        .zip(data_remainder.iter())
-        .enumerate()
-        .for_each(|(i, (&group_index, &new_value))| {
-            let is_valid = remainder_bits & (1 << i) != 0;
-            value_fn(group_index, new_value, is_valid)
-        });
-}
-
 /// An accumulator to compute the average of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
 ///
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
new file mode 100644
index 000000000000..5d72328763ae
--- /dev/null
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Vectorized [`accumulate`] and [`accumulate_nullable`] functions
+
+use arrow_array::{Array, ArrowNumericType, PrimitiveArray};
+
+/// This function is called to update the accumulator state per row,
+/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
+/// many GroupsAccumulators and thus performance critical.
+///
+/// I couldn't find any way to combine this with
+/// accumulate_all_nullable without having to pass in a is_null on
+/// every row.
+///
+/// * `values`: the input arguments to the accumulator
+/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+///
+/// `F`: The function to invoke for a non null input row to update the
+/// accumulator state. Called like `value_fn(group_index, value)
+pub fn accumulate_all<T, F>(
+    values: &PrimitiveArray<T>,
+    group_indicies: &[usize],
+    opt_filter: Option<&arrow_array::BooleanArray>,
+    mut value_fn: F,
+) where
+    T: ArrowNumericType + Send,
+    F: FnMut(usize, T::Native) + Send,
+{
+    assert_eq!(
+        values.null_count(), 0,
+        "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
+    );
+
+    // AAL TODO handle filter values
+
+    let data: &[T::Native] = values.values();
+    let iter = group_indicies.iter().zip(data.iter());
+    for (&group_index, &new_value) in iter {
+        value_fn(group_index, new_value)
+    }
+}
+
+/// This function is called to update the accumulator state per row,
+/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
+/// many GroupsAccumulators and thus performance critical.
+///
+/// * `values`: the input arguments to the accumulator
+/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
+/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+///
+/// `F`: The function to invoke for an input row to update the
+/// accumulator state. Called like `value_fn(group_index, value,
+/// is_valid). NOTE the parameter is true when the value is VALID.
+pub fn accumulate_all_nullable<T, F>(
+    values: &PrimitiveArray<T>,
+    group_indicies: &[usize],
+    opt_filter: Option<&arrow_array::BooleanArray>,
+    mut value_fn: F,
+) where
+    T: ArrowNumericType + Send,
+    F: FnMut(usize, T::Native, bool) + Send,
+{
+    // AAL TODO handle filter values
+    // TODO combine the null mask from values and opt_filter
+    let valids = values
+        .nulls()
+        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
+
+    // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+    let data: &[T::Native] = values.values();
+
+    let group_indices_chunks = group_indicies.chunks_exact(64);
+    let data_chunks = data.chunks_exact(64);
+    let bit_chunks = valids.inner().bit_chunks();
+
+    let group_indices_remainder = group_indices_chunks.remainder();
+    let data_remainder = data_chunks.remainder();
+
+    group_indices_chunks
+        .zip(data_chunks)
+        .zip(bit_chunks.iter())
+        .for_each(|((group_index_chunk, data_chunk), mask)| {
+            // index_mask has value 1 << i in the loop
+            let mut index_mask = 1;
+            group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                |(&group_index, &new_value)| {
+                    // valid bit was set, real vale
+                    let is_valid = (mask & index_mask) != 0;
+                    value_fn(group_index, new_value, is_valid);
+                    index_mask <<= 1;
+                },
+            )
+        });
+
+    // handle any remaining bits (after the intial 64)
+    let remainder_bits = bit_chunks.remainder_bits();
+    group_indices_remainder
+        .iter()
+        .zip(data_remainder.iter())
+        .enumerate()
+        .for_each(|(i, (&group_index, &new_value))| {
+            let is_valid = remainder_bits & (1 << i) != 0;
+            value_fn(group_index, new_value, is_valid)
+        });
+}
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
similarity index 99%
rename from datafusion/physical-expr/src/aggregate/groups_accumulator.rs
rename to datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 82cfbfaa31c8..680eb927a1a8 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -17,6 +17,8 @@
 
 //! Vectorized [`GroupsAccumulator`]
 
+pub mod accumulate;
+
 use arrow_array::{ArrayRef, BooleanArray};
 use datafusion_common::Result;
 

From bed990ef5c5f840b18ad31da394a3ef524e3fc66 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 06:45:04 -0400
Subject: [PATCH 10/89] update more comments

---
 .../physical-expr/src/aggregate/average.rs    | 26 +++----
 .../groups_accumulator/accumulate.rs          | 70 ++++++++++++++-----
 2 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 3f3c7820be12..ee249f3bd1ed 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -467,8 +467,8 @@ where
     /// Adds one to each group's counter
     fn increment_counts(
         &mut self,
-        values: &PrimitiveArray<T>,
         group_indicies: &[usize],
+        values: &PrimitiveArray<T>,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
@@ -476,8 +476,8 @@ where
 
         if values.null_count() == 0 {
             accumulate_all(
-                values,
                 group_indicies,
+                values,
                 opt_filter,
                 |group_index, _new_value| {
                     self.counts[group_index] += 1;
@@ -485,8 +485,8 @@ where
             )
         } else {
             accumulate_all_nullable(
-                values,
                 group_indicies,
+                values,
                 opt_filter,
                 |group_index, _new_value, is_valid| {
                     if is_valid {
@@ -500,8 +500,8 @@ where
     /// Adds the counts with the partial counts
     fn update_counts_with_partial_counts(
         &mut self,
-        partial_counts: &UInt64Array,
         group_indicies: &[usize],
+        partial_counts: &UInt64Array,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
@@ -509,8 +509,8 @@ where
 
         if partial_counts.null_count() == 0 {
             accumulate_all(
-                partial_counts,
                 group_indicies,
+                partial_counts,
                 opt_filter,
                 |group_index, partial_count| {
                     self.counts[group_index] += partial_count;
@@ -518,8 +518,8 @@ where
             )
         } else {
             accumulate_all_nullable(
-                partial_counts,
                 group_indicies,
+                partial_counts,
                 opt_filter,
                 |group_index, partial_count, is_valid| {
                     if is_valid {
@@ -533,8 +533,8 @@ where
     /// Adds the values in `values` to self.sums
     fn update_sums(
         &mut self,
-        values: &PrimitiveArray<T>,
         group_indicies: &[usize],
+        values: &PrimitiveArray<T>,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
@@ -543,8 +543,8 @@ where
 
         if values.null_count() == 0 {
             accumulate_all(
-                values,
                 group_indicies,
+                values,
                 opt_filter,
                 |group_index, new_value| {
                     let sum = &mut self.sums[group_index];
@@ -553,8 +553,8 @@ where
             )
         } else {
             accumulate_all_nullable(
-                values,
                 group_indicies,
+                values,
                 opt_filter,
                 |group_index, new_value, is_valid| {
                     if is_valid {
@@ -582,8 +582,8 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        self.increment_counts(values, group_indicies, opt_filter, total_num_groups);
-        self.update_sums(values, group_indicies, opt_filter, total_num_groups);
+        self.increment_counts(group_indicies, values, opt_filter, total_num_groups);
+        self.update_sums(group_indicies, values, opt_filter, total_num_groups);
 
         Ok(())
     }
@@ -600,12 +600,12 @@ where
         let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
         let partial_sums = values.get(1).unwrap().as_primitive::<T>();
         self.update_counts_with_partial_counts(
-            partial_counts,
             group_indicies,
+            partial_counts,
             opt_filter,
             total_num_groups,
         );
-        self.update_sums(partial_sums, group_indicies, opt_filter, total_num_groups);
+        self.update_sums(group_indicies, partial_sums, opt_filter, total_num_groups);
 
         Ok(())
     }
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 5d72328763ae..f8a6791def1d 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -19,23 +19,55 @@
 
 use arrow_array::{Array, ArrowNumericType, PrimitiveArray};
 
-/// This function is called to update the accumulator state per row,
+/// This function is used to update the accumulator state per row,
 /// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
 /// many GroupsAccumulators and thus performance critical.
 ///
-/// I couldn't find any way to combine this with
-/// accumulate_all_nullable without having to pass in a is_null on
-/// every row.
+/// # Arguments:
 ///
 /// * `values`: the input arguments to the accumulator
 /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+/// * `opt_filter`: if present, invoke value_fn if opt_filter[i] is true
+/// * `value_fn`: function invoked for each (group_index, value) pair.
+///
+/// `F`: Invoked for each input row like `value_fn(group_index, value)
+///
+/// # Example
+///
+/// ```
+///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
+///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
+///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │
+///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+///  │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │
+///  │ └─────┘ │   │ └─────┘ │     └─────┘
+///  └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘
+///
+/// group_indices   values        opt_filter
+/// ```
+///
+/// In the example above, `value_fn` is invoked for each (group_index,
+/// value) pair where `opt_filter[i]` is true
+///
+/// ```text
+/// value_fn(2, 200)
+/// value_fn(0, 200)
+/// value_fn(0, 300)
+/// ```
+///
+/// I couldn't find any way to combine this with
+/// accumulate_all_nullable without having to pass in a is_null on
+/// every row.
 ///
-/// `F`: The function to invoke for a non null input row to update the
-/// accumulator state. Called like `value_fn(group_index, value)
 pub fn accumulate_all<T, F>(
-    values: &PrimitiveArray<T>,
     group_indicies: &[usize],
+    values: &PrimitiveArray<T>,
     opt_filter: Option<&arrow_array::BooleanArray>,
     mut value_fn: F,
 ) where
@@ -57,19 +89,16 @@ pub fn accumulate_all<T, F>(
 }
 
 /// This function is called to update the accumulator state per row,
-/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
-/// many GroupsAccumulators and thus performance critical.
+/// for a `PrimitiveArray<T>` that can have nulls. See
+/// [`accumulate_all`] for more detail and example
 ///
-/// * `values`: the input arguments to the accumulator
-/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-/// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
+/// `F`: Invoked like `value_fn(group_index, value, is_valid).
 ///
-/// `F`: The function to invoke for an input row to update the
-/// accumulator state. Called like `value_fn(group_index, value,
-/// is_valid). NOTE the parameter is true when the value is VALID.
+/// NOTE the parameter is true when the value is VALID (not when it is
+/// NULL).
 pub fn accumulate_all_nullable<T, F>(
-    values: &PrimitiveArray<T>,
     group_indicies: &[usize],
+    values: &PrimitiveArray<T>,
     opt_filter: Option<&arrow_array::BooleanArray>,
     mut value_fn: F,
 ) where
@@ -119,3 +148,10 @@ pub fn accumulate_all_nullable<T, F>(
             value_fn(group_index, new_value, is_valid)
         });
 }
+
+#[cfg(test)]
+mod test {
+
+    #[test]
+    fn basic() {}
+}

From 25787a081e6495b73589bc649621376a4f390a63 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 07:04:49 -0400
Subject: [PATCH 11/89] Begin writing tests for accumulate

---
 .../groups_accumulator/accumulate.rs          | 94 ++++++++++++++++++-
 1 file changed, 92 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index f8a6791def1d..f30bed47a6d2 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -34,7 +34,7 @@ use arrow_array::{Array, ArrowNumericType, PrimitiveArray};
 ///
 /// # Example
 ///
-/// ```
+/// ```text
 ///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
 ///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
 ///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
@@ -151,7 +151,97 @@ pub fn accumulate_all_nullable<T, F>(
 
 #[cfg(test)]
 mod test {
+    use super::*;
+
+    use arrow_array::UInt32Array;
 
     #[test]
-    fn basic() {}
+    fn no_nulls_no_filter() {
+        let fixture = Fixture::new();
+        let opt_filter = None;
+        let mut accumulated = vec![];
+
+        accumulate_all(
+            &fixture.group_indices,
+            &fixture.values_array(),
+            opt_filter,
+            |group_index, value| accumulated.push((group_index, value)),
+        );
+
+        // Should have see all indexes and values in order
+        accumulated
+            .into_iter()
+            .enumerate()
+            .for_each(|(i, (group_index, value))| {
+                assert_eq!(group_index, fixture.group_indices[i]);
+                assert_eq!(value, fixture.values[i]);
+            })
+    }
+
+    #[test]
+    fn nulls_no_filter() {
+        let fixture = Fixture::new();
+        let opt_filter = None;
+        let mut accumulated = vec![];
+
+        accumulate_all_nullable(
+            &fixture.group_indices,
+            &fixture.values_with_nulls_array(),
+            opt_filter,
+            |group_index, value, is_valid| {
+                let value = if is_valid { Some(value) } else { None };
+                accumulated.push((group_index, value));
+            },
+        );
+
+        // Should have see all indexes and values in order
+        accumulated
+            .into_iter()
+            .enumerate()
+            .for_each(|(i, (group_index, value))| {
+                assert_eq!(group_index, fixture.group_indices[i]);
+                assert_eq!(value, fixture.values_with_nulls[i]);
+            })
+    }
+
+    // TODO: filter testing with/without null
+
+    // TODO: calling nulls/nonulls with wrong one panics
+
+    // fuzz testing
+
+    /// Values for testing (there are enough values to exercise the 64 bit chunks
+    struct Fixture {
+        /// 100..0
+        group_indices: Vec<usize>,
+
+        /// 10, 20, ... 1010
+        values: Vec<u32>,
+
+        /// same as values, but every third is null:
+        /// None, Some(20), Some(30), None ...
+        values_with_nulls: Vec<Option<u32>>,
+    }
+
+    impl Fixture {
+        fn new() -> Self {
+            Self {
+                group_indices: (0..100).collect(),
+                values: (0..100).map(|i| (i + 1) * 10).collect(),
+                values_with_nulls: (0..100)
+                    .map(|i| if i % 3 == 0 { None } else { Some((i + 1) * 10) })
+                    .collect(),
+            }
+        }
+
+        /// returns `Self::values` an Array
+        fn values_array(&self) -> UInt32Array {
+            UInt32Array::from(self.values.clone())
+        }
+
+        /// returns `Self::values_with_nulls` as an Array
+        fn values_with_nulls_array(&self) -> UInt32Array {
+            UInt32Array::from(self.values_with_nulls.clone())
+        }
+    }
 }

From 8433d6fc88a13806b2fff4b149ac72bd17b9f69e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 07:12:59 -0400
Subject: [PATCH 12/89] more tets

---
 .../groups_accumulator/accumulate.rs          | 56 +++++++++++++++----
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index f30bed47a6d2..26baad723a27 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -17,7 +17,7 @@
 
 //! Vectorized [`accumulate`] and [`accumulate_nullable`] functions
 
-use arrow_array::{Array, ArrowNumericType, PrimitiveArray};
+use arrow_array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
 
 /// This function is used to update the accumulator state per row,
 /// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
@@ -68,7 +68,7 @@ use arrow_array::{Array, ArrowNumericType, PrimitiveArray};
 pub fn accumulate_all<T, F>(
     group_indicies: &[usize],
     values: &PrimitiveArray<T>,
-    opt_filter: Option<&arrow_array::BooleanArray>,
+    opt_filter: Option<&BooleanArray>,
     mut value_fn: F,
 ) where
     T: ArrowNumericType + Send,
@@ -99,7 +99,7 @@ pub fn accumulate_all<T, F>(
 pub fn accumulate_all_nullable<T, F>(
     group_indicies: &[usize],
     values: &PrimitiveArray<T>,
-    opt_filter: Option<&arrow_array::BooleanArray>,
+    opt_filter: Option<&BooleanArray>,
     mut value_fn: F,
 ) where
     T: ArrowNumericType + Send,
@@ -156,15 +156,14 @@ mod test {
     use arrow_array::UInt32Array;
 
     #[test]
-    fn no_nulls_no_filter() {
+    fn accumulate_no_filter() {
         let fixture = Fixture::new();
-        let opt_filter = None;
         let mut accumulated = vec![];
 
         accumulate_all(
             &fixture.group_indices,
             &fixture.values_array(),
-            opt_filter,
+            fixture.opt_filter(),
             |group_index, value| accumulated.push((group_index, value)),
         );
 
@@ -179,15 +178,29 @@ mod test {
     }
 
     #[test]
-    fn nulls_no_filter() {
+    #[should_panic(
+        expected = "assertion failed: `(left == right)`\n  left: `34`,\n right: `0`: Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
+    )]
+    fn accumulate_with_nullable_panics() {
+        let fixture = Fixture::new();
+        // call with an array that has nulls should panic
+        accumulate_all(
+            &fixture.group_indices,
+            &fixture.values_with_nulls_array(),
+            fixture.opt_filter(),
+            |_, _| {},
+        );
+    }
+
+    #[test]
+    fn accumulate_nullable_no_filter() {
         let fixture = Fixture::new();
-        let opt_filter = None;
         let mut accumulated = vec![];
 
         accumulate_all_nullable(
             &fixture.group_indices,
             &fixture.values_with_nulls_array(),
-            opt_filter,
+            fixture.opt_filter(),
             |group_index, value, is_valid| {
                 let value = if is_valid { Some(value) } else { None };
                 accumulated.push((group_index, value));
@@ -204,9 +217,22 @@ mod test {
             })
     }
 
-    // TODO: filter testing with/without null
+    #[test]
+    #[should_panic(
+        expected = "Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)"
+    )]
+    fn accumulate_nullable_with_non_nullable_panics() {
+        let fixture = Fixture::new();
+        // call with an array that has nulls should panic
+        accumulate_all_nullable(
+            &fixture.group_indices,
+            &fixture.values_array(),
+            fixture.opt_filter(),
+            |_, _, _| {},
+        );
+    }
 
-    // TODO: calling nulls/nonulls with wrong one panics
+    // TODO: filter testing with/without null
 
     // fuzz testing
 
@@ -221,6 +247,9 @@ mod test {
         /// same as values, but every third is null:
         /// None, Some(20), Some(30), None ...
         values_with_nulls: Vec<Option<u32>>,
+
+        /// Optional filter (defaults to None)
+        opt_filter: Option<BooleanArray>,
     }
 
     impl Fixture {
@@ -231,6 +260,7 @@ mod test {
                 values_with_nulls: (0..100)
                     .map(|i| if i % 3 == 0 { None } else { Some((i + 1) * 10) })
                     .collect(),
+                opt_filter: None,
             }
         }
 
@@ -243,5 +273,9 @@ mod test {
         fn values_with_nulls_array(&self) -> UInt32Array {
             UInt32Array::from(self.values_with_nulls.clone())
         }
+
+        fn opt_filter(&self) -> Option<&BooleanArray> {
+            self.opt_filter.as_ref()
+        }
     }
 }

From 7e9b92e5badabe4396c13d06468d7335da5438f3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 07:14:54 -0400
Subject: [PATCH 13/89] more tests

---
 .../src/aggregate/groups_accumulator/accumulate.rs             | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 26baad723a27..24e4b0466188 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -82,6 +82,8 @@ pub fn accumulate_all<T, F>(
     // AAL TODO handle filter values
 
     let data: &[T::Native] = values.values();
+    assert_eq!(data.len(), group_indicies.len());
+
     let iter = group_indicies.iter().zip(data.iter());
     for (&group_index, &new_value) in iter {
         value_fn(group_index, new_value)
@@ -113,6 +115,7 @@ pub fn accumulate_all_nullable<T, F>(
 
     // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
     let data: &[T::Native] = values.values();
+    assert_eq!(data.len(), group_indicies.len());
 
     let group_indices_chunks = group_indicies.chunks_exact(64);
     let data_chunks = data.chunks_exact(64);

From bb37e77a62e9c7982252b0cf11fe16046b15baf1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 07:16:26 -0400
Subject: [PATCH 14/89] comments

---
 .../src/aggregate/groups_accumulator/accumulate.rs            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 24e4b0466188..8ea22acd8a0d 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -74,6 +74,7 @@ pub fn accumulate_all<T, F>(
     T: ArrowNumericType + Send,
     F: FnMut(usize, T::Native) + Send,
 {
+    // Given performance is critical, assert if the wrong flavor is called
     assert_eq!(
         values.null_count(), 0,
         "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
@@ -108,7 +109,8 @@ pub fn accumulate_all_nullable<T, F>(
     F: FnMut(usize, T::Native, bool) + Send,
 {
     // AAL TODO handle filter values
-    // TODO combine the null mask from values and opt_filter
+
+    // Given performance is critical, assert if the wrong flavor is called
     let valids = values
         .nulls()
         .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");

From add7b36805bbdebf5c9baade7468df3c4a8ba1fe Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 1 Jul 2023 08:07:16 -0400
Subject: [PATCH 15/89] Implement fuzz testing

---
 .../groups_accumulator/accumulate.rs          | 142 +++++++++++++-----
 1 file changed, 102 insertions(+), 40 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 8ea22acd8a0d..879c1a3a66b6 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -159,27 +159,11 @@ mod test {
     use super::*;
 
     use arrow_array::UInt32Array;
+    use rand::{rngs::ThreadRng, Rng};
 
     #[test]
     fn accumulate_no_filter() {
-        let fixture = Fixture::new();
-        let mut accumulated = vec![];
-
-        accumulate_all(
-            &fixture.group_indices,
-            &fixture.values_array(),
-            fixture.opt_filter(),
-            |group_index, value| accumulated.push((group_index, value)),
-        );
-
-        // Should have see all indexes and values in order
-        accumulated
-            .into_iter()
-            .enumerate()
-            .for_each(|(i, (group_index, value))| {
-                assert_eq!(group_index, fixture.group_indices[i]);
-                assert_eq!(value, fixture.values[i]);
-            })
+        Fixture::new().accumulate_all_test()
     }
 
     #[test]
@@ -199,27 +183,7 @@ mod test {
 
     #[test]
     fn accumulate_nullable_no_filter() {
-        let fixture = Fixture::new();
-        let mut accumulated = vec![];
-
-        accumulate_all_nullable(
-            &fixture.group_indices,
-            &fixture.values_with_nulls_array(),
-            fixture.opt_filter(),
-            |group_index, value, is_valid| {
-                let value = if is_valid { Some(value) } else { None };
-                accumulated.push((group_index, value));
-            },
-        );
-
-        // Should have see all indexes and values in order
-        accumulated
-            .into_iter()
-            .enumerate()
-            .for_each(|(i, (group_index, value))| {
-                assert_eq!(group_index, fixture.group_indices[i]);
-                assert_eq!(value, fixture.values_with_nulls[i]);
-            })
+        Fixture::new().accumulate_all_nullable_test()
     }
 
     #[test]
@@ -239,7 +203,31 @@ mod test {
 
     // TODO: filter testing with/without null
 
-    // fuzz testing
+    #[test]
+    fn accumulate_fuzz() {
+        let mut rng = rand::thread_rng();
+        for _ in 0..100 {
+            Fixture::new_random(&mut rng).accumulate_all_test();
+        }
+    }
+
+    #[test]
+    fn accumulate_nullable_fuzz() {
+        let mut rng = rand::thread_rng();
+        let mut nullable_called = false;
+        for _ in 0..100 {
+            let fixture = Fixture::new_random(&mut rng);
+            // sometimes the random generator will create an array
+            // with no nulls so avoid panic'ing in tests
+            if fixture.values_with_nulls.iter().any(|v| v.is_none()) {
+                nullable_called = true;
+                fixture.accumulate_all_nullable_test();
+            } else {
+                fixture.accumulate_all_test();
+            }
+            assert!(nullable_called);
+        }
+    }
 
     /// Values for testing (there are enough values to exercise the 64 bit chunks
     struct Fixture {
@@ -269,6 +257,34 @@ mod test {
             }
         }
 
+        fn new_random(rng: &mut ThreadRng) -> Self {
+            let num_groups: usize = rng.gen_range(0..1000);
+            let group_indices: Vec<usize> = (0..num_groups).map(|_| rng.gen()).collect();
+
+            let values: Vec<u32> = (0..num_groups).map(|_| rng.gen()).collect();
+
+            // random values with random number and location of nulls
+            // random null percentage
+            let null_pct: f32 = rng.gen_range(0.0..1.0);
+            let values_with_nulls: Vec<Option<u32>> = (0..num_groups)
+                .map(|_| {
+                    let is_null = null_pct < rng.gen_range(0.0..1.0);
+                    if is_null {
+                        None
+                    } else {
+                        Some(rng.gen())
+                    }
+                })
+                .collect();
+
+            Self {
+                group_indices,
+                values,
+                values_with_nulls,
+                opt_filter: None,
+            }
+        }
+
         /// returns `Self::values` an Array
         fn values_array(&self) -> UInt32Array {
             UInt32Array::from(self.values.clone())
@@ -282,5 +298,51 @@ mod test {
         fn opt_filter(&self) -> Option<&BooleanArray> {
             self.opt_filter.as_ref()
         }
+
+        // Calls `accumulate_all` with group_indices, values, and
+        // opt_filter and ensures it calls the right values
+        fn accumulate_all_test(&self) {
+            let mut accumulated = vec![];
+            accumulate_all(
+                &self.group_indices,
+                &self.values_array(),
+                self.opt_filter(),
+                |group_index, value| accumulated.push((group_index, value)),
+            );
+
+            // Should have see all indexes and values in order
+            accumulated
+                .into_iter()
+                .enumerate()
+                .for_each(|(i, (group_index, value))| {
+                    assert_eq!(group_index, self.group_indices[i]);
+                    assert_eq!(value, self.values[i]);
+                })
+        }
+
+        // Calls `accumulate_all_nullable` with group_indices, values,
+        // and opt_filter and ensures it calls the right values
+        fn accumulate_all_nullable_test(&self) {
+            let mut accumulated = vec![];
+
+            accumulate_all_nullable(
+                &self.group_indices,
+                &self.values_with_nulls_array(),
+                self.opt_filter(),
+                |group_index, value, is_valid| {
+                    let value = if is_valid { Some(value) } else { None };
+                    accumulated.push((group_index, value));
+                },
+            );
+
+            // Should have see all indexes and values in order
+            accumulated
+                .into_iter()
+                .enumerate()
+                .for_each(|(i, (group_index, value))| {
+                    assert_eq!(group_index, self.group_indices[i]);
+                    assert_eq!(value, self.values_with_nulls[i]);
+                })
+        }
     }
 }

From 53aa18bac17d3faf5dbbb88455c24945d7f6dce1 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 16:19:51 -0400
Subject: [PATCH 16/89] Clarify the required order from GroupsAccumulator

---
 .../src/aggregate/groups_accumulator/mod.rs         | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 680eb927a1a8..2c8a9f1b7d83 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -47,7 +47,11 @@ pub trait GroupsAccumulator: Send {
     ) -> Result<()>;
 
     /// Returns the final aggregate value for each group as a single
-    /// `RecordBatch`
+    /// `RecordBatch`.
+    ///
+    /// The rows returned *must* be in group_index order: The value
+    /// for group_index 0, followed by 1, etc.  Any group_index that
+    /// did not have values, should be null.
     ///
     /// OPEN QUESTION: Should this method take a "batch_size: usize"
     /// and produce a Vec<RecordBatch> as output to avoid 1) requiring
@@ -63,7 +67,12 @@ pub trait GroupsAccumulator: Send {
     /// and error on any subsequent call.
     fn evaluate(&mut self) -> Result<ArrayRef>;
 
-    /// Returns any intermediate aggregate state used for multi-phase grouping
+    /// Returns any intermediate aggregate state, used for multi-phase
+    /// grouping.
+    ///
+    /// The rows returned *must* be in group_index order: The value
+    /// for group_index 0, followed by 1, etc.  Any group_index that
+    /// did not have values, should be null.
     ///
     /// For example, AVG returns two arrays:  `SUM` and `COUNT`.
     ///

From 00aac24434df021b874f7553790640ebd2fe8810 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 16:23:05 -0400
Subject: [PATCH 17/89] Zero copy into array

---
 datafusion/physical-expr/src/aggregate/average.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index ee249f3bd1ed..8de2460a1767 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -620,8 +620,9 @@ where
             .map(|(sum, count)| (self.avg_fn)(sum, count))
             .collect::<Result<Vec<_>>>()?;
 
-        // TODO figure out how to do this without the iter / copy
-        let array = PrimitiveArray::<T>::from_iter_values(averages);
+        // Create a primitive array (without a copy)
+        let nulls = None; // TODO implement null handling
+        let array = PrimitiveArray::<T>::new(averages.into(), nulls);
 
         // fix up decimal precision and scale for decimals
         let array = adjust_output_array(&self.return_data_type, Arc::new(array))?;
@@ -637,8 +638,8 @@ where
 
         let sums = std::mem::take(&mut self.sums);
         // create array from vec is zero copy
-        // TODO figure out how to do this without the iter / copy
-        let sums: PrimitiveArray<T> = PrimitiveArray::from_iter_values(sums);
+        let nulls = None; // TODO implement null handling
+        let sums = PrimitiveArray::<T>::new(sums.into(), nulls);
 
         // fix up decimal precision and scale for decimals
         let sums = adjust_output_array(&self.sum_data_type, Arc::new(sums))?;

From d760a5f115688be7b731a782b5b9cd10f3c8df32 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 16:25:29 -0400
Subject: [PATCH 18/89] fix spelling of indices

---
 .../src/physical_plan/aggregates/row_hash2.rs |  2 +-
 .../physical-expr/src/aggregate/average.rs    | 30 +++++++++----------
 .../groups_accumulator/accumulate.rs          | 12 ++++----
 .../src/aggregate/groups_accumulator/mod.rs   |  4 +--
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 792fbb4032bf..c248af8f44f2 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -349,7 +349,7 @@ impl GroupedHashAggregateStream2 {
     /// `group_values`.
     ///
     /// At the return of this function,
-    /// [`Self::current_group_indicies`] has the same number of
+    /// [`Self::current_group_indices`] has the same number of
     /// entries as each array in `group_values` and holds the correct
     /// group_index for that row.
     fn update_group_state(
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 8de2460a1767..0d87a8f72e24 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -467,7 +467,7 @@ where
     /// Adds one to each group's counter
     fn increment_counts(
         &mut self,
-        group_indicies: &[usize],
+        group_indices: &[usize],
         values: &PrimitiveArray<T>,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
@@ -476,7 +476,7 @@ where
 
         if values.null_count() == 0 {
             accumulate_all(
-                group_indicies,
+                group_indices,
                 values,
                 opt_filter,
                 |group_index, _new_value| {
@@ -485,7 +485,7 @@ where
             )
         } else {
             accumulate_all_nullable(
-                group_indicies,
+                group_indices,
                 values,
                 opt_filter,
                 |group_index, _new_value, is_valid| {
@@ -500,7 +500,7 @@ where
     /// Adds the counts with the partial counts
     fn update_counts_with_partial_counts(
         &mut self,
-        group_indicies: &[usize],
+        group_indices: &[usize],
         partial_counts: &UInt64Array,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
@@ -509,7 +509,7 @@ where
 
         if partial_counts.null_count() == 0 {
             accumulate_all(
-                group_indicies,
+                group_indices,
                 partial_counts,
                 opt_filter,
                 |group_index, partial_count| {
@@ -518,7 +518,7 @@ where
             )
         } else {
             accumulate_all_nullable(
-                group_indicies,
+                group_indices,
                 partial_counts,
                 opt_filter,
                 |group_index, partial_count, is_valid| {
@@ -533,7 +533,7 @@ where
     /// Adds the values in `values` to self.sums
     fn update_sums(
         &mut self,
-        group_indicies: &[usize],
+        group_indices: &[usize],
         values: &PrimitiveArray<T>,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
@@ -543,7 +543,7 @@ where
 
         if values.null_count() == 0 {
             accumulate_all(
-                group_indicies,
+                group_indices,
                 values,
                 opt_filter,
                 |group_index, new_value| {
@@ -553,7 +553,7 @@ where
             )
         } else {
             accumulate_all_nullable(
-                group_indicies,
+                group_indices,
                 values,
                 opt_filter,
                 |group_index, new_value, is_valid| {
@@ -575,15 +575,15 @@ where
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
-        group_indicies: &[usize],
+        group_indices: &[usize],
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        self.increment_counts(group_indicies, values, opt_filter, total_num_groups);
-        self.update_sums(group_indicies, values, opt_filter, total_num_groups);
+        self.increment_counts(group_indices, values, opt_filter, total_num_groups);
+        self.update_sums(group_indices, values, opt_filter, total_num_groups);
 
         Ok(())
     }
@@ -591,7 +591,7 @@ where
     fn merge_batch(
         &mut self,
         values: &[ArrayRef],
-        group_indicies: &[usize],
+        group_indices: &[usize],
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
@@ -600,12 +600,12 @@ where
         let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
         let partial_sums = values.get(1).unwrap().as_primitive::<T>();
         self.update_counts_with_partial_counts(
-            group_indicies,
+            group_indices,
             partial_counts,
             opt_filter,
             total_num_groups,
         );
-        self.update_sums(group_indicies, partial_sums, opt_filter, total_num_groups);
+        self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
 
         Ok(())
     }
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 879c1a3a66b6..0b222444913e 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -66,7 +66,7 @@ use arrow_array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
 /// every row.
 ///
 pub fn accumulate_all<T, F>(
-    group_indicies: &[usize],
+    group_indices: &[usize],
     values: &PrimitiveArray<T>,
     opt_filter: Option<&BooleanArray>,
     mut value_fn: F,
@@ -83,9 +83,9 @@ pub fn accumulate_all<T, F>(
     // AAL TODO handle filter values
 
     let data: &[T::Native] = values.values();
-    assert_eq!(data.len(), group_indicies.len());
+    assert_eq!(data.len(), group_indices.len());
 
-    let iter = group_indicies.iter().zip(data.iter());
+    let iter = group_indices.iter().zip(data.iter());
     for (&group_index, &new_value) in iter {
         value_fn(group_index, new_value)
     }
@@ -100,7 +100,7 @@ pub fn accumulate_all<T, F>(
 /// NOTE the parameter is true when the value is VALID (not when it is
 /// NULL).
 pub fn accumulate_all_nullable<T, F>(
-    group_indicies: &[usize],
+    group_indices: &[usize],
     values: &PrimitiveArray<T>,
     opt_filter: Option<&BooleanArray>,
     mut value_fn: F,
@@ -117,9 +117,9 @@ pub fn accumulate_all_nullable<T, F>(
 
     // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
     let data: &[T::Native] = values.values();
-    assert_eq!(data.len(), group_indicies.len());
+    assert_eq!(data.len(), group_indices.len());
 
-    let group_indices_chunks = group_indicies.chunks_exact(64);
+    let group_indices_chunks = group_indices.chunks_exact(64);
     let data_chunks = data.chunks_exact(64);
     let bit_chunks = valids.inner().bit_chunks();
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 2c8a9f1b7d83..f2b0b619b409 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -41,7 +41,7 @@ pub trait GroupsAccumulator: Send {
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
-        group_indicies: &[usize],
+        group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()>;
@@ -99,7 +99,7 @@ pub trait GroupsAccumulator: Send {
     fn merge_batch(
         &mut self,
         values: &[ArrayRef],
-        group_indicies: &[usize],
+        group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()>;

From 8811fa65aecad5b1a34eb8fc33f4994b619b438c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 17:04:59 -0400
Subject: [PATCH 19/89] implement filtering for easy path

---
 .../groups_accumulator/accumulate.rs          | 83 +++++++++++++++----
 1 file changed, 69 insertions(+), 14 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 0b222444913e..6d3951e5b3a5 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -80,14 +80,26 @@ pub fn accumulate_all<T, F>(
         "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
     );
 
-    // AAL TODO handle filter values
-
     let data: &[T::Native] = values.values();
     assert_eq!(data.len(), group_indices.len());
 
     let iter = group_indices.iter().zip(data.iter());
-    for (&group_index, &new_value) in iter {
-        value_fn(group_index, new_value)
+
+    // handle filter values with a specialized loop
+    if let Some(filter) = opt_filter {
+        assert_eq!(filter.len(), group_indices.len());
+        // The performance with a filtering could be improved by
+        // iterating over the filter in masks
+        let iter = iter.zip(filter.iter());
+        for ((&group_index, &new_value), filter_value) in iter {
+            if let Some(true) = filter_value {
+                value_fn(group_index, new_value)
+            }
+        }
+    } else {
+        for (&group_index, &new_value) in iter {
+            value_fn(group_index, new_value)
+        }
     }
 }
 
@@ -166,6 +178,21 @@ mod test {
         Fixture::new().accumulate_all_test()
     }
 
+    #[test]
+    fn accumulate_with_filter() {
+        Fixture::new()
+            .with_filter(|group_index, _value, _value_opt| {
+                if group_index < 20 {
+                    None
+                } else if group_index < 40 {
+                    Some(false)
+                } else {
+                    Some(true)
+                }
+            })
+            .accumulate_all_test();
+    }
+
     #[test]
     #[should_panic(
         expected = "assertion failed: `(left == right)`\n  left: `34`,\n right: `0`: Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
@@ -201,8 +228,6 @@ mod test {
         );
     }
 
-    // TODO: filter testing with/without null
-
     #[test]
     fn accumulate_fuzz() {
         let mut rng = rand::thread_rng();
@@ -229,6 +254,8 @@ mod test {
         }
     }
 
+    // todo accumulate testing with fuzz
+
     /// Values for testing (there are enough values to exercise the 64 bit chunks
     struct Fixture {
         /// 100..0
@@ -257,6 +284,26 @@ mod test {
             }
         }
 
+        /// Applies `f(group_index, value, value_with_null)` for all
+        /// values in this fixture and set `opt_filter` to the result
+        fn with_filter<F>(mut self, mut f: F) -> Self
+        where
+            F: FnMut(usize, u32, Option<u32>) -> Option<bool>,
+        {
+            let filter: BooleanArray = self
+                .group_indices
+                .iter()
+                .zip(self.values.iter())
+                .zip(self.values_with_nulls.iter())
+                .map(|((&group_index, &value), &value_with_null)| {
+                    f(group_index, value, value_with_null)
+                })
+                .collect();
+
+            self.opt_filter = Some(filter);
+            self
+        }
+
         fn new_random(rng: &mut ThreadRng) -> Self {
             let num_groups: usize = rng.gen_range(0..1000);
             let group_indices: Vec<usize> = (0..num_groups).map(|_| rng.gen()).collect();
@@ -310,14 +357,22 @@ mod test {
                 |group_index, value| accumulated.push((group_index, value)),
             );
 
-            // Should have see all indexes and values in order
-            accumulated
-                .into_iter()
-                .enumerate()
-                .for_each(|(i, (group_index, value))| {
-                    assert_eq!(group_index, self.group_indices[i]);
-                    assert_eq!(value, self.values[i]);
-                })
+            // check_values[i] is true if the value[i] should have been included in the output
+            let check_values = match self.opt_filter.as_ref() {
+                Some(filter) => filter.into_iter().collect::<Vec<_>>(),
+                None => vec![Some(true); self.values.len()],
+            };
+
+            // Should have only checked indexes where the filter was true
+            let mut check_idx = 0;
+            for (i, check_value) in check_values.iter().enumerate() {
+                if let Some(true) = check_value {
+                    let (group_index, value) = &accumulated[check_idx];
+                    check_idx += 1;
+                    assert_eq!(*group_index, self.group_indices[i]);
+                    assert_eq!(*value, self.values[i]);
+                }
+            }
         }
 
         // Calls `accumulate_all_nullable` with group_indices, values,

From 93a4e6f3b96e21c7df15dd777db9aaf1838124cd Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 17:21:24 -0400
Subject: [PATCH 20/89] Implement filtering

---
 .../groups_accumulator/accumulate.rs          | 165 ++++++++++++------
 1 file changed, 116 insertions(+), 49 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 6d3951e5b3a5..8fdd1dad1572 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -88,8 +88,9 @@ pub fn accumulate_all<T, F>(
     // handle filter values with a specialized loop
     if let Some(filter) = opt_filter {
         assert_eq!(filter.len(), group_indices.len());
-        // The performance with a filtering could be improved by
-        // iterating over the filter in masks
+        // The performance with a filter could be improved by
+        // iterating over the filter in chunks, rather than a single
+        // iterator. TODO file a ticket
         let iter = iter.zip(filter.iter());
         for ((&group_index, &new_value), filter_value) in iter {
             if let Some(true) = filter_value {
@@ -120,50 +121,70 @@ pub fn accumulate_all_nullable<T, F>(
     T: ArrowNumericType + Send,
     F: FnMut(usize, T::Native, bool) + Send,
 {
-    // AAL TODO handle filter values
-
     // Given performance is critical, assert if the wrong flavor is called
     let valids = values
         .nulls()
         .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
 
-    // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
-    let data: &[T::Native] = values.values();
-    assert_eq!(data.len(), group_indices.len());
-
-    let group_indices_chunks = group_indices.chunks_exact(64);
-    let data_chunks = data.chunks_exact(64);
-    let bit_chunks = valids.inner().bit_chunks();
-
-    let group_indices_remainder = group_indices_chunks.remainder();
-    let data_remainder = data_chunks.remainder();
-
-    group_indices_chunks
-        .zip(data_chunks)
-        .zip(bit_chunks.iter())
-        .for_each(|((group_index_chunk, data_chunk), mask)| {
-            // index_mask has value 1 << i in the loop
-            let mut index_mask = 1;
-            group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                |(&group_index, &new_value)| {
-                    // valid bit was set, real vale
-                    let is_valid = (mask & index_mask) != 0;
-                    value_fn(group_index, new_value, is_valid);
-                    index_mask <<= 1;
-                },
-            )
-        });
-
-    // handle any remaining bits (after the intial 64)
-    let remainder_bits = bit_chunks.remainder_bits();
-    group_indices_remainder
-        .iter()
-        .zip(data_remainder.iter())
-        .enumerate()
-        .for_each(|(i, (&group_index, &new_value))| {
-            let is_valid = remainder_bits & (1 << i) != 0;
-            value_fn(group_index, new_value, is_valid)
-        });
+    if let Some(filter) = opt_filter {
+        assert_eq!(filter.len(), values.len());
+        assert_eq!(filter.len(), group_indices.len());
+        // The performance with a filter could be improved by
+        // iterating over the filter in chunks, rather than using
+        // iterators. TODO file a ticket
+        filter
+            .iter()
+            .zip(group_indices.iter())
+            .zip(values.iter())
+            .for_each(|((filter_value, group_index), new_value)| {
+                // did value[i] pass the filter?
+                if let Some(true) = filter_value {
+                    // Is value[i] valid?
+                    match new_value {
+                        Some(new_value) => value_fn(*group_index, new_value, true),
+                        None => value_fn(*group_index, Default::default(), false),
+                    }
+                }
+            })
+    } else {
+        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+        // iterate over in chunks of 64 bits for more efficient null checking
+        let data: &[T::Native] = values.values();
+        assert_eq!(data.len(), group_indices.len());
+        let group_indices_chunks = group_indices.chunks_exact(64);
+        let data_chunks = data.chunks_exact(64);
+        let bit_chunks = valids.inner().bit_chunks();
+
+        let group_indices_remainder = group_indices_chunks.remainder();
+        let data_remainder = data_chunks.remainder();
+
+        group_indices_chunks
+            .zip(data_chunks)
+            .zip(bit_chunks.iter())
+            .for_each(|((group_index_chunk, data_chunk), mask)| {
+                // index_mask has value 1 << i in the loop
+                let mut index_mask = 1;
+                group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                    |(&group_index, &new_value)| {
+                        // valid bit was set, real vale
+                        let is_valid = (mask & index_mask) != 0;
+                        value_fn(group_index, new_value, is_valid);
+                        index_mask <<= 1;
+                    },
+                )
+            });
+
+        // handle any remaining bits (after the intial 64)
+        let remainder_bits = bit_chunks.remainder_bits();
+        group_indices_remainder
+            .iter()
+            .zip(data_remainder.iter())
+            .enumerate()
+            .for_each(|(i, (&group_index, &new_value))| {
+                let is_valid = remainder_bits & (1 << i) != 0;
+                value_fn(group_index, new_value, is_valid)
+            });
+    }
 }
 
 #[cfg(test)]
@@ -213,6 +234,21 @@ mod test {
         Fixture::new().accumulate_all_nullable_test()
     }
 
+    #[test]
+    fn accumulate_nullable_with_filter() {
+        Fixture::new()
+            .with_filter(|group_index, _value, _value_opt| {
+                if group_index < 20 {
+                    None
+                } else if group_index < 40 {
+                    Some(false)
+                } else {
+                    Some(true)
+                }
+            })
+            .accumulate_all_nullable_test();
+    }
+
     #[test]
     #[should_panic(
         expected = "Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)"
@@ -310,6 +346,28 @@ mod test {
 
             let values: Vec<u32> = (0..num_groups).map(|_| rng.gen()).collect();
 
+            // with 30 percent probability, add a filter
+            let opt_filter = if 0.3 < rng.gen_range(0.0..1.0) {
+                // 10% chance of false
+                // 10% change of null
+                // 80% chance of true
+                let filter: BooleanArray = (0..num_groups)
+                    .map(|_| {
+                        let filter_value = rng.gen_range(0.0..1.0);
+                        if filter_value < 0.1 {
+                            Some(false)
+                        } else if filter_value < 0.2 {
+                            None
+                        } else {
+                            Some(true)
+                        }
+                    })
+                    .collect();
+                Some(filter)
+            } else {
+                None
+            };
+
             // random values with random number and location of nulls
             // random null percentage
             let null_pct: f32 = rng.gen_range(0.0..1.0);
@@ -328,7 +386,7 @@ mod test {
                 group_indices,
                 values,
                 values_with_nulls,
-                opt_filter: None,
+                opt_filter,
             }
         }
 
@@ -390,14 +448,23 @@ mod test {
                 },
             );
 
+            // check_values[i] is true if the value[i] should have been included in the output
+            let check_values = match self.opt_filter.as_ref() {
+                Some(filter) => filter.into_iter().collect::<Vec<_>>(),
+                None => vec![Some(true); self.values.len()],
+            };
+
             // Should have see all indexes and values in order
-            accumulated
-                .into_iter()
-                .enumerate()
-                .for_each(|(i, (group_index, value))| {
-                    assert_eq!(group_index, self.group_indices[i]);
-                    assert_eq!(value, self.values_with_nulls[i]);
-                })
+            let mut check_idx = 0;
+            for (i, check_value) in check_values.iter().enumerate() {
+                if let Some(true) = check_value {
+                    let (group_index, value) = &accumulated[check_idx];
+                    check_idx += 1;
+
+                    assert_eq!(*group_index, self.group_indices[i]);
+                    assert_eq!(*value, self.values_with_nulls[i]);
+                }
+            }
         }
     }
 }

From 966d3d09d3b2cd976b8111eef45f09fd71665092 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sun, 2 Jul 2023 17:55:58 -0400
Subject: [PATCH 21/89] Add null handling in avg

---
 .../physical-expr/src/aggregate/average.rs    | 75 +++++++++++++++----
 1 file changed, 59 insertions(+), 16 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 0d87a8f72e24..8940fe708c15 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -17,7 +17,8 @@
 
 //! Defines physical expressions that can evaluated at runtime during query execution
 
-use arrow::array::AsArray;
+use arrow::array::{AsArray, PrimitiveBuilder};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use log::debug;
 
 use std::any::Any;
@@ -441,6 +442,9 @@ where
     /// Sums per group, stored as the native type
     sums: Vec<T::Native>,
 
+    /// If we have seen a null input value for this group_index
+    null_inputs: BooleanBufferBuilder,
+
     /// Function that computes the average (value / count)
     avg_fn: F,
 }
@@ -455,11 +459,13 @@ where
             "AvgGroupsAccumulator ({}, sum type: {sum_data_type:?}) --> {return_data_type:?}",
             std::any::type_name::<T>()
         );
+
         Self {
             return_data_type: return_data_type.clone(),
             sum_data_type: sum_data_type.clone(),
             counts: vec![],
             sums: vec![],
+            null_inputs: BooleanBufferBuilder::new(0),
             avg_fn,
         }
     }
@@ -538,6 +544,12 @@ where
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
+        if self.null_inputs.len() < total_num_groups {
+            let new_groups = total_num_groups - self.null_inputs.len();
+            // All groups start as valid (and are set to null if we
+            // see a null in the input)
+            self.null_inputs.append_n(new_groups, true);
+        }
         self.sums
             .resize_with(total_num_groups, || T::default_value());
 
@@ -547,6 +559,10 @@ where
                 values,
                 opt_filter,
                 |group_index, new_value| {
+                    // note since add_wrapping doesn't error, we
+                    // simply add values in null sum slots rather than
+                    // checking if they are null first. The theory is
+                    // this is faster
                     let sum = &mut self.sums[group_index];
                     *sum = sum.add_wrapping(new_value);
                 },
@@ -560,11 +576,26 @@ where
                     if is_valid {
                         let sum = &mut self.sums[group_index];
                         *sum = sum.add_wrapping(new_value);
+                    } else {
+                        // input null means this group is now null
+                        self.null_inputs.set_bit(group_index, false);
                     }
                 },
             )
         }
     }
+
+    /// Returns a NullBuffer representing which group_indices have
+    /// null values (if they saw a null input)
+    /// Resets `self.null_inputs`;
+    fn build_nulls(&mut self) -> Option<NullBuffer> {
+        let nulls = NullBuffer::new(self.null_inputs.finish());
+        if nulls.null_count() > 0 {
+            Some(nulls)
+        } else {
+            None
+        }
+    }
 }
 
 impl<T, F> GroupsAccumulator for AvgGroupsAccumulator<T, F>
@@ -613,16 +644,32 @@ where
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let counts = std::mem::take(&mut self.counts);
         let sums = std::mem::take(&mut self.sums);
+        let nulls = self.build_nulls();
 
-        let averages: Vec<T::Native> = sums
-            .into_iter()
-            .zip(counts.into_iter())
-            .map(|(sum, count)| (self.avg_fn)(sum, count))
-            .collect::<Result<Vec<_>>>()?;
+        assert_eq!(counts.len(), sums.len());
 
-        // Create a primitive array (without a copy)
-        let nulls = None; // TODO implement null handling
-        let array = PrimitiveArray::<T>::new(averages.into(), nulls);
+        // don't evaluate averages with null inputs to avoid errors on null vaues
+        let array: PrimitiveArray<T> = if let Some(nulls) = nulls.as_ref() {
+            assert_eq!(nulls.len(), sums.len());
+            let mut builder = PrimitiveBuilder::<T>::with_capacity(nulls.len());
+            let iter = sums.into_iter().zip(counts.into_iter()).zip(nulls.iter());
+
+            for ((sum, count), is_valid) in iter {
+                if is_valid {
+                    builder.append_value((self.avg_fn)(sum, count)?)
+                } else {
+                    builder.append_null();
+                }
+            }
+            builder.finish()
+        } else {
+            let averages: Vec<T::Native> = sums
+                .into_iter()
+                .zip(counts.into_iter())
+                .map(|(sum, count)| (self.avg_fn)(sum, count))
+                .collect::<Result<Vec<_>>>()?;
+            PrimitiveArray::new(averages.into(), nulls) // no copy
+        };
 
         // fix up decimal precision and scale for decimals
         let array = adjust_output_array(&self.return_data_type, Arc::new(array))?;
@@ -632,16 +679,12 @@ where
 
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        let nulls = self.build_nulls();
         let counts = std::mem::take(&mut self.counts);
-        // create array from vec is zero copy
-        let counts = UInt64Array::from(counts);
+        let counts = UInt64Array::from(counts); // zero copy
 
         let sums = std::mem::take(&mut self.sums);
-        // create array from vec is zero copy
-        let nulls = None; // TODO implement null handling
-        let sums = PrimitiveArray::<T>::new(sums.into(), nulls);
-
-        // fix up decimal precision and scale for decimals
+        let sums = PrimitiveArray::<T>::new(sums.into(), nulls); // zero copy
         let sums = adjust_output_array(&self.sum_data_type, Arc::new(sums))?;
 
         Ok(vec![

From 316c78173aa3d2298422bbd6f075b0d40c82a776 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 10:09:33 +0200
Subject: [PATCH 22/89] WIP count

---
 .../physical-expr/src/aggregate/count.rs      | 238 +++++++++++++++++-
 1 file changed, 236 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 22cb2512fc42..c3ad7767b1c7 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -19,17 +19,23 @@
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::marker::PhantomData;
 use std::ops::BitAnd;
 use std::sync::Arc;
 
 use crate::aggregate::row_accumulator::RowAccumulator;
 use crate::aggregate::utils::down_cast_any_ref;
-use crate::{AggregateExpr, PhysicalExpr};
+use crate::{AggregateExpr, PhysicalExpr, GroupsAccumulator};
 use arrow::array::{Array, Int64Array};
 use arrow::compute;
+use arrow::compute::kernels::cast;
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
-use arrow_buffer::BooleanBuffer;
+use arrow_array::builder::PrimitiveBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::types::{UInt64Type, Int64Type, UInt32Type, Int32Type};
+use arrow_array::{PrimitiveArray, UInt64Array, ArrowNumericType};
+use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
@@ -37,6 +43,8 @@ use datafusion_row::accessor::RowAccessor;
 
 use crate::expressions::format_state_name;
 
+use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+
 /// COUNT aggregate expression
 /// Returns the amount of non-null values of the given expression.
 #[derive(Debug, Clone)]
@@ -76,6 +84,200 @@ impl Count {
     }
 }
 
+/// An accumulator to compute the average of PrimitiveArray<T>.
+/// Stores values as native types, and does overflow checking
+///
+/// F: Function that calcuates the average value from a sum of
+/// T::Native and a total count
+#[derive(Debug)]
+struct CountGroupsAccumulator<T>
+where T: ArrowNumericType + Send,
+{
+    /// The type of the returned count
+    return_data_type: DataType,
+
+    /// Count per group (use u64 to make UInt64Array)
+    counts: Vec<u64>,
+
+    /// If we have seen a null input value for this group_index
+    null_inputs: BooleanBufferBuilder,
+
+    // Bind it to struct
+    phantom: PhantomData<T>
+}
+
+
+impl<T> CountGroupsAccumulator<T>
+where T: ArrowNumericType + Send,
+{
+    pub fn new(return_data_type: &DataType) -> Self {
+        Self {
+            return_data_type: return_data_type.clone(),
+            counts: vec![],
+            null_inputs: BooleanBufferBuilder::new(0),
+            phantom: PhantomData {}
+        }
+    }
+
+        /// Adds one to each group's counter
+        fn increment_counts(
+            &mut self,
+            group_indices: &[usize],
+            values: &PrimitiveArray<T>,
+            opt_filter: Option<&arrow_array::BooleanArray>,
+            total_num_groups: usize,
+        ) {
+            self.counts.resize(total_num_groups, 0);
+    
+            if values.null_count() == 0 {
+                accumulate_all(
+                    group_indices,
+                    values,
+                    opt_filter,
+                    |group_index, _new_value| {
+                        self.counts[group_index] += 1;
+                    }
+                )
+            }else {
+                accumulate_all_nullable(
+                    group_indices,
+                    values,
+                    opt_filter,
+                    |group_index, _new_value, is_valid| {
+                        if is_valid {
+                            self.counts[group_index] += 1;
+                        }
+                    },
+                )
+            }
+        }
+
+        /// Adds the counts with the partial counts
+        fn update_counts_with_partial_counts(
+            &mut self,
+            group_indices: &[usize],
+            partial_counts: &UInt64Array,
+            opt_filter: Option<&arrow_array::BooleanArray>,
+            total_num_groups: usize,
+        ) {
+            self.counts.resize(total_num_groups, 0);
+    
+            if partial_counts.null_count() == 0 {
+                accumulate_all(
+                    group_indices,
+                    partial_counts,
+                    opt_filter,
+                    |group_index, partial_count| {
+                        self.counts[group_index] += partial_count;
+                    },
+                )
+            } else {
+                accumulate_all_nullable(
+                    group_indices,
+                    partial_counts,
+                    opt_filter,
+                    |group_index, partial_count, is_valid| {
+                        if is_valid {
+                            self.counts[group_index] += partial_count;
+                        }
+                    },
+                )
+            }
+        }
+
+        /// Returns a NullBuffer representing which group_indices have
+        /// null values (if they saw a null input)
+        /// Resets `self.null_inputs`;
+        fn build_nulls(&mut self) -> Option<NullBuffer> {
+            let nulls = NullBuffer::new(self.null_inputs.finish());
+            if nulls.null_count() > 0 {
+                Some(nulls)
+            } else {
+                None
+            }
+        }
+}
+
+impl <T> GroupsAccumulator for CountGroupsAccumulator<T>
+where T: ArrowNumericType + Send
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        self.increment_counts(group_indices, values, opt_filter, total_num_groups);
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+        // first batch is counts, second is partial sums
+        let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
+        self.update_counts_with_partial_counts(
+            group_indices,
+            partial_counts,
+            opt_filter,
+            total_num_groups,
+        );
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let counts = std::mem::take(&mut self.counts);
+        let nulls = self.build_nulls();
+
+        // don't evaluate averages with null inputs to avoid errors on null vaues
+        let array: PrimitiveArray<UInt64Type> = if let Some(nulls) = nulls.as_ref() {
+            let mut builder = PrimitiveBuilder::<UInt64Type>::with_capacity(nulls.len());
+            let iter = counts.into_iter().zip(nulls.iter());
+
+            for (count, is_valid) in iter {
+                if is_valid {
+                    builder.append_value(count)
+                } else {
+                    builder.append_null();
+                }
+            }
+            builder.finish()
+        } else {
+            PrimitiveArray::<UInt64Type>::new(counts.into(), nulls) // no copy
+        };
+        // TODO remove cast
+        let array = cast(&array, &self.return_data_type)?;
+
+        Ok(array)
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        // TODO nulls
+        let nulls = self.build_nulls();
+        let counts = std::mem::take(&mut self.counts);
+        let counts = UInt64Array::from(counts); // zero copy
+        Ok(vec![
+            Arc::new(counts) as ArrayRef,
+        ])
+    }
+
+    fn size(&self) -> usize {
+        self.counts.capacity() * std::mem::size_of::<usize>()
+    }
+}
+
 /// count null values for multiple columns
 /// for each row if one column value is null, then null_count + 1
 fn null_count_for_multiple_cols(values: &[ArrayRef]) -> usize {
@@ -147,6 +349,38 @@ impl AggregateExpr for Count {
     fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(CountAccumulator::new()))
     }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // instantiate specialized accumulator
+        match &self.data_type {
+            DataType::UInt64 => {
+                Ok(Box::new(CountGroupsAccumulator::<UInt64Type>::new(
+                    &self.data_type,
+                )))
+            },
+                DataType::Int64 => {
+                Ok(Box::new(CountGroupsAccumulator::<Int64Type>::new(
+                    &self.data_type,
+                )))
+            },
+                DataType::UInt32 => {
+                Ok(Box::new(CountGroupsAccumulator::<UInt32Type>::new(
+                    &self.data_type,
+                )))
+            },
+                DataType::Int32 => {
+                Ok(Box::new(CountGroupsAccumulator::<Int32Type>::new(
+                    &self.data_type,
+                )))
+            }
+
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "CountGroupsAccumulator not supported for {}",
+                self.data_type
+            ))),
+        }
+
+    }
 }
 
 impl PartialEq<dyn Any> for Count {

From 754a9ffe5bc7cb23871d0b8a78d8cbcd6860ca79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 12:11:06 +0200
Subject: [PATCH 23/89] WIP count

---
 .../physical-expr/src/aggregate/count.rs      | 219 +++++++-----------
 1 file changed, 87 insertions(+), 132 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index c3ad7767b1c7..47b7588ec518 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -25,17 +25,16 @@ use std::sync::Arc;
 
 use crate::aggregate::row_accumulator::RowAccumulator;
 use crate::aggregate::utils::down_cast_any_ref;
-use crate::{AggregateExpr, PhysicalExpr, GroupsAccumulator};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::array::{Array, Int64Array};
 use arrow::compute;
 use arrow::compute::kernels::cast;
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
-use arrow_array::builder::PrimitiveBuilder;
 use arrow_array::cast::AsArray;
-use arrow_array::types::{UInt64Type, Int64Type, UInt32Type, Int32Type};
-use arrow_array::{PrimitiveArray, UInt64Array, ArrowNumericType};
-use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, NullBuffer};
+use arrow_array::types::{Int32Type, Int64Type, UInt32Type, UInt64Type};
+use arrow_array::{ArrowNumericType, PrimitiveArray, UInt64Array};
+use arrow_buffer::BooleanBuffer;
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
@@ -91,115 +90,100 @@ impl Count {
 /// T::Native and a total count
 #[derive(Debug)]
 struct CountGroupsAccumulator<T>
-where T: ArrowNumericType + Send,
+where
+    T: ArrowNumericType + Send,
 {
     /// The type of the returned count
     return_data_type: DataType,
 
     /// Count per group (use u64 to make UInt64Array)
     counts: Vec<u64>,
-
-    /// If we have seen a null input value for this group_index
-    null_inputs: BooleanBufferBuilder,
-
     // Bind it to struct
-    phantom: PhantomData<T>
+    phantom: PhantomData<T>,
 }
 
-
 impl<T> CountGroupsAccumulator<T>
-where T: ArrowNumericType + Send,
+where
+    T: ArrowNumericType + Send,
 {
     pub fn new(return_data_type: &DataType) -> Self {
         Self {
             return_data_type: return_data_type.clone(),
             counts: vec![],
-            null_inputs: BooleanBufferBuilder::new(0),
-            phantom: PhantomData {}
+            phantom: PhantomData {},
         }
     }
 
-        /// Adds one to each group's counter
-        fn increment_counts(
-            &mut self,
-            group_indices: &[usize],
-            values: &PrimitiveArray<T>,
-            opt_filter: Option<&arrow_array::BooleanArray>,
-            total_num_groups: usize,
-        ) {
-            self.counts.resize(total_num_groups, 0);
-    
-            if values.null_count() == 0 {
-                accumulate_all(
-                    group_indices,
-                    values,
-                    opt_filter,
-                    |group_index, _new_value| {
+    /// Adds one to each group's counter
+    fn increment_counts(
+        &mut self,
+        group_indices: &[usize],
+        values: &PrimitiveArray<T>,
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) {
+        self.counts.resize(total_num_groups, 0);
+
+        if values.null_count() == 0 {
+            accumulate_all(
+                group_indices,
+                values,
+                opt_filter,
+                |group_index, _new_value| {
+                    self.counts[group_index] += 1;
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                group_indices,
+                values,
+                opt_filter,
+                |group_index, _new_value, is_valid| {
+                    if is_valid {
                         self.counts[group_index] += 1;
                     }
-                )
-            }else {
-                accumulate_all_nullable(
-                    group_indices,
-                    values,
-                    opt_filter,
-                    |group_index, _new_value, is_valid| {
-                        if is_valid {
-                            self.counts[group_index] += 1;
-                        }
-                    },
-                )
-            }
+                },
+            )
         }
+    }
 
-        /// Adds the counts with the partial counts
-        fn update_counts_with_partial_counts(
-            &mut self,
-            group_indices: &[usize],
-            partial_counts: &UInt64Array,
-            opt_filter: Option<&arrow_array::BooleanArray>,
-            total_num_groups: usize,
-        ) {
-            self.counts.resize(total_num_groups, 0);
-    
-            if partial_counts.null_count() == 0 {
-                accumulate_all(
-                    group_indices,
-                    partial_counts,
-                    opt_filter,
-                    |group_index, partial_count| {
+    /// Adds the counts with the partial counts
+    fn update_counts_with_partial_counts(
+        &mut self,
+        group_indices: &[usize],
+        partial_counts: &UInt64Array,
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) {
+        self.counts.resize(total_num_groups, 0);
+
+        if partial_counts.null_count() == 0 {
+            accumulate_all(
+                group_indices,
+                partial_counts,
+                opt_filter,
+                |group_index, partial_count| {
+                    self.counts[group_index] += partial_count;
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                group_indices,
+                partial_counts,
+                opt_filter,
+                |group_index, partial_count, is_valid| {
+                    if is_valid {
                         self.counts[group_index] += partial_count;
-                    },
-                )
-            } else {
-                accumulate_all_nullable(
-                    group_indices,
-                    partial_counts,
-                    opt_filter,
-                    |group_index, partial_count, is_valid| {
-                        if is_valid {
-                            self.counts[group_index] += partial_count;
-                        }
-                    },
-                )
-            }
-        }
-
-        /// Returns a NullBuffer representing which group_indices have
-        /// null values (if they saw a null input)
-        /// Resets `self.null_inputs`;
-        fn build_nulls(&mut self) -> Option<NullBuffer> {
-            let nulls = NullBuffer::new(self.null_inputs.finish());
-            if nulls.null_count() > 0 {
-                Some(nulls)
-            } else {
-                None
-            }
+                    }
+                },
+            )
         }
+    }
 }
 
-impl <T> GroupsAccumulator for CountGroupsAccumulator<T>
-where T: ArrowNumericType + Send
+impl<T> GroupsAccumulator for CountGroupsAccumulator<T>
+where
+    T: ArrowNumericType + Send,
 {
     fn update_batch(
         &mut self,
@@ -238,24 +222,8 @@ where T: ArrowNumericType + Send
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let counts = std::mem::take(&mut self.counts);
-        let nulls = self.build_nulls();
-
-        // don't evaluate averages with null inputs to avoid errors on null vaues
-        let array: PrimitiveArray<UInt64Type> = if let Some(nulls) = nulls.as_ref() {
-            let mut builder = PrimitiveBuilder::<UInt64Type>::with_capacity(nulls.len());
-            let iter = counts.into_iter().zip(nulls.iter());
-
-            for (count, is_valid) in iter {
-                if is_valid {
-                    builder.append_value(count)
-                } else {
-                    builder.append_null();
-                }
-            }
-            builder.finish()
-        } else {
-            PrimitiveArray::<UInt64Type>::new(counts.into(), nulls) // no copy
-        };
+
+        let array = PrimitiveArray::<UInt64Type>::new(counts.into(), None);
         // TODO remove cast
         let array = cast(&array, &self.return_data_type)?;
 
@@ -264,13 +232,9 @@ where T: ArrowNumericType + Send
 
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
-        // TODO nulls
-        let nulls = self.build_nulls();
         let counts = std::mem::take(&mut self.counts);
         let counts = UInt64Array::from(counts); // zero copy
-        Ok(vec![
-            Arc::new(counts) as ArrayRef,
-        ])
+        Ok(vec![Arc::new(counts) as ArrayRef])
     }
 
     fn size(&self) -> usize {
@@ -353,33 +317,24 @@ impl AggregateExpr for Count {
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         // instantiate specialized accumulator
         match &self.data_type {
-            DataType::UInt64 => {
-                Ok(Box::new(CountGroupsAccumulator::<UInt64Type>::new(
-                    &self.data_type,
-                )))
-            },
-                DataType::Int64 => {
-                Ok(Box::new(CountGroupsAccumulator::<Int64Type>::new(
-                    &self.data_type,
-                )))
-            },
-                DataType::UInt32 => {
-                Ok(Box::new(CountGroupsAccumulator::<UInt32Type>::new(
-                    &self.data_type,
-                )))
-            },
-                DataType::Int32 => {
-                Ok(Box::new(CountGroupsAccumulator::<Int32Type>::new(
-                    &self.data_type,
-                )))
-            }
+            DataType::UInt64 => Ok(Box::new(CountGroupsAccumulator::<UInt64Type>::new(
+                &self.data_type,
+            ))),
+            DataType::Int64 => Ok(Box::new(CountGroupsAccumulator::<Int64Type>::new(
+                &self.data_type,
+            ))),
+            DataType::UInt32 => Ok(Box::new(CountGroupsAccumulator::<UInt32Type>::new(
+                &self.data_type,
+            ))),
+            DataType::Int32 => Ok(Box::new(CountGroupsAccumulator::<Int32Type>::new(
+                &self.data_type,
+            ))),
 
             _ => Err(DataFusionError::NotImplemented(format!(
                 "CountGroupsAccumulator not supported for {}",
                 self.data_type
             ))),
         }
-
     }
 }
 

From e708723ca5a1d92c2480fc79efde2e129b9c213e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 3 Jul 2023 07:53:25 -0400
Subject: [PATCH 24/89] Sketch out the adapter interface

---
 .../src/physical_plan/aggregates/row_hash2.rs | 21 +++++++++++++++++--
 .../physical-expr/src/aggregate/average.rs    |  6 +++++-
 .../src/aggregate/groups_accumulator/mod.rs   | 14 ++++++++-----
 datafusion/physical-expr/src/aggregate/mod.rs |  9 +++++++-
 datafusion/physical-expr/src/lib.rs           |  2 +-
 5 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index c248af8f44f2..2d2e86bfb712 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -19,7 +19,9 @@
 //!
 //! POC demonstration of GroupByHashApproach
 
-use datafusion_physical_expr::GroupsAccumulator;
+use datafusion_physical_expr::{
+    AggregateExpr, GroupsAccumulator, GroupsAccumulatorAdapter,
+};
 use log::debug;
 use std::sync::Arc;
 use std::task::{Context, Poll};
@@ -224,7 +226,7 @@ impl GroupedHashAggregateStream2 {
         // Instantiate the accumulators
         let accumulators: Vec<_> = aggregate_exprs
             .iter()
-            .map(|agg_expr| agg_expr.create_groups_accumulator())
+            .map(|agg_expr| create_group_accumulator(agg_expr.as_ref()))
             .collect::<Result<_>>()?;
 
         let group_schema = group_schema(&agg_schema, agg_group_by.expr.len());
@@ -267,6 +269,21 @@ impl GroupedHashAggregateStream2 {
     }
 }
 
+/// Create an accumulator for `agg_expr` -- a [`GroupsAccumulator`] if
+/// that is supported by the aggrgate, or a
+/// [`GroupsAccumulatorAdapter`] if not.
+fn create_group_accumulator(
+    agg_expr: &dyn AggregateExpr,
+) -> Result<Box<dyn GroupsAccumulator>> {
+    if agg_expr.groups_accumulator_supported() {
+        agg_expr.create_groups_accumulator()
+    } else {
+        // Adapt the basic accumulator
+        let accumulator = agg_expr.create_accumulator()?;
+        Ok(Box::new(GroupsAccumulatorAdapter::new(accumulator)))
+    }
+}
+
 impl Stream for GroupedHashAggregateStream2 {
     type Item = Result<RecordBatch>;
 
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 8940fe708c15..54e964fcef6b 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -46,7 +46,7 @@ use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
 
-use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+use super::groups_accumulator::{accumulate_all, accumulate_all_nullable};
 use super::utils::Decimal128Averager;
 
 /// AVG aggregate expression
@@ -163,6 +163,10 @@ impl AggregateExpr for Avg {
         )?))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         // instantiate specialized accumulator
         match (&self.sum_data_type, &self.rt_data_type) {
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index f2b0b619b409..512e7c65cbf0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -17,16 +17,20 @@
 
 //! Vectorized [`GroupsAccumulator`]
 
-pub mod accumulate;
+pub(crate) mod accumulate;
+mod adapter;
+
+pub(crate) use accumulate::{accumulate_all, accumulate_all_nullable};
+pub use adapter::GroupsAccumulatorAdapter;
 
 use arrow_array::{ArrayRef, BooleanArray};
 use datafusion_common::Result;
 
-/// An implementation of GroupAccumulator is for a single aggregate
-/// (e.g. AVG) and stores the state for *all* groups internally
+/// `GroupAccumulator` implements a single aggregate (e.g. AVG) and
+/// stores the state for *all* groups internally.
 ///
-/// The logical model is that each group is given a `group_index`
-/// assigned and maintained by the hash table.
+/// Each group is assigned a `group_index` by the hash table and each
+/// accumulator manages the specific state, one per group_index.
 ///
 /// group_indexes are contiguous (there aren't gaps), and thus it is
 /// expected that each GroupAccumulator will use something like `Vec<..>`
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 4b613c8e9b0e..a21cddd62c63 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -121,10 +121,17 @@ pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
         )))
     }
 
+    /// If the aggregate expression has a specialized
+    /// [`GroupsAccumulator`] implementation. If this returns true,
+    /// `[Self::create_groups_accumulator`] will be called.
+    fn groups_accumulator_supported(&self) -> bool {
+        false
+    }
+
     /// Return a specialized [`GroupsAccumulator`] that manages state for all groups
     ///
     /// For maximum performance, [`GroupsAccumulator`] should be
-    /// implemented rather than [`Accumulator`].
+    /// implemented in addition to [`Accumulator`].
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         // TODO: The default should implement a wrapper over
         // sef.create_accumulator
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 6ea8dc94879f..4811b3a19f29 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -45,7 +45,7 @@ pub mod var_provider;
 pub mod window;
 
 // reexport this to maintain compatibility with anything that used from_slice previously
-pub use aggregate::groups_accumulator::GroupsAccumulator;
+pub use aggregate::groups_accumulator::{GroupsAccumulator, GroupsAccumulatorAdapter};
 pub use aggregate::AggregateExpr;
 
 pub use equivalence::{

From 677160ec525a9dfe407e4c81b96e22e7454d93a5 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 3 Jul 2023 08:22:29 -0400
Subject: [PATCH 25/89] More new adapter interface

---
 .../src/physical_plan/aggregates/row_hash2.rs | 10 +--
 .../aggregate/groups_accumulator/adapter.rs   | 86 +++++++++++++++++++
 2 files changed, 91 insertions(+), 5 deletions(-)
 create mode 100644 datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 2d2e86bfb712..a1d416881976 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -226,7 +226,7 @@ impl GroupedHashAggregateStream2 {
         // Instantiate the accumulators
         let accumulators: Vec<_> = aggregate_exprs
             .iter()
-            .map(|agg_expr| create_group_accumulator(agg_expr.as_ref()))
+            .map(create_group_accumulator)
             .collect::<Result<_>>()?;
 
         let group_schema = group_schema(&agg_schema, agg_group_by.expr.len());
@@ -273,14 +273,14 @@ impl GroupedHashAggregateStream2 {
 /// that is supported by the aggrgate, or a
 /// [`GroupsAccumulatorAdapter`] if not.
 fn create_group_accumulator(
-    agg_expr: &dyn AggregateExpr,
+    agg_expr: &Arc<dyn AggregateExpr>,
 ) -> Result<Box<dyn GroupsAccumulator>> {
     if agg_expr.groups_accumulator_supported() {
         agg_expr.create_groups_accumulator()
     } else {
-        // Adapt the basic accumulator
-        let accumulator = agg_expr.create_accumulator()?;
-        Ok(Box::new(GroupsAccumulatorAdapter::new(accumulator)))
+        let agg_expr_captured = agg_expr.clone();
+        let factory = move || agg_expr_captured.create_accumulator();
+        Ok(Box::new(GroupsAccumulatorAdapter::new(factory)))
     }
 }
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
new file mode 100644
index 000000000000..c2988a7428bf
--- /dev/null
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Adapter that makes [`GroupsAccumulator`] out of [`Accumulator`]
+
+use super::GroupsAccumulator;
+use arrow_array::{ArrayRef, BooleanArray};
+use datafusion_common::Result;
+use datafusion_expr::Accumulator;
+
+/// An adpater that implements [`GroupsAccumulator`] for any [`Accumulator`]
+///
+/// While [`Accumulator`] are simpler to implement and can support
+/// more general calculations (like retractable), but are not as fast
+/// as `GroupsAccumulator`. This interface bridges the gap.
+pub struct GroupsAccumulatorAdapter {
+    factory: Box<dyn Fn() -> Result<Box<dyn Accumulator>> + Send>,
+
+    /// [`Accumulators`] for each group, stored in group_index order
+    accumulators: Vec<Box<dyn Accumulator>>,
+}
+
+impl GroupsAccumulatorAdapter {
+    /// Create a new adapter that will create a new [`Accumulator`]
+    /// for each group, using the specified factory function
+    pub fn new<F>(factory: F) -> Self
+    where
+        F: Fn() -> Result<Box<dyn Accumulator>> + Send + 'static,
+    {
+        Self {
+            factory: Box::new(factory),
+            accumulators: vec![],
+        }
+    }
+}
+
+impl GroupsAccumulator for GroupsAccumulatorAdapter {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        todo!()
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        todo!()
+    }
+
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        todo!()
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        todo!()
+    }
+
+    fn size(&self) -> usize {
+        self.accumulators.iter().map(|a| a.size()).sum::<usize>()
+            //include the size of self and self.accumulators itself
+            + self.accumulators.len() * std::mem::size_of::<Box<dyn Accumulator>>()
+            + std::mem::size_of_val(&self.factory)
+    }
+}

From 689e51b461e8779856f94717223bbaaab4734e09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 16:08:55 +0200
Subject: [PATCH 26/89] WIP sum

---
 datafusion/physical-expr/src/aggregate/sum.rs | 198 +++++++++++++++++-
 1 file changed, 197 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index efa55f060264..425901f91a90 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, PhysicalExpr};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
 use arrow::datatypes::DataType;
 use arrow::{
@@ -31,8 +31,13 @@ use arrow::{
     },
     datatypes::Field,
 };
+use arrow_array::cast::AsArray;
+use arrow_array::types::{UInt64Type, Int64Type, UInt32Type, Int32Type, Decimal128Type};
+use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
+use log::debug;
 
 use crate::aggregate::row_accumulator::{
     is_row_accumulator_support_dtype, RowAccumulator,
@@ -44,6 +49,8 @@ use arrow::array::Decimal128Array;
 use arrow::compute::cast;
 use datafusion_row::accessor::RowAccessor;
 
+use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+
 /// SUM aggregate expression
 #[derive(Debug, Clone)]
 pub struct Sum {
@@ -141,6 +148,34 @@ impl AggregateExpr for Sum {
         )))
     }
 
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        // instantiate specialized accumulator
+        match self.data_type {
+            DataType::UInt64 => Ok(Box::new(SumGroupsAccumulator::<UInt64Type>::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Int64 => Ok(Box::new(SumGroupsAccumulator::<Int64Type>::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::UInt32 => Ok(Box::new(SumGroupsAccumulator::<UInt32Type>::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Int32 => Ok(Box::new(SumGroupsAccumulator::<Int32Type>::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Decimal128(_target_precision, _target_scale) => {
+                Ok(Box::new(SumGroupsAccumulator::<Decimal128Type>::new(
+                    &self.data_type, &self.data_type
+                )))
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "SumGroupsAccumulator not supported for {}",
+                self.data_type
+            ))),
+        }
+    }
+
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -424,6 +459,167 @@ impl RowAccumulator for SumRowAccumulator {
     }
 }
 
+/// An accumulator to compute the average of PrimitiveArray<T>.
+/// Stores values as native types, and does overflow checking
+///
+/// F: Function that calcuates the average value from a sum of
+/// T::Native and a total count
+#[derive(Debug)]
+struct SumGroupsAccumulator<T>
+where
+    T: ArrowNumericType + Send,
+{
+    /// The type of the internal sum
+    sum_data_type: DataType,
+
+    /// The type of the returned sum
+    return_data_type: DataType,
+
+    /// Sums per group, stored as the native type
+    sums: Vec<T::Native>,
+
+    /// If we have seen a null input value for this group_index
+    null_inputs: BooleanBufferBuilder,
+}
+
+impl<T> SumGroupsAccumulator<T>
+where
+    T: ArrowNumericType + Send,
+{
+    pub fn new(sum_data_type: &DataType, return_data_type: &DataType) -> Self {
+        debug!(
+            "SumGroupsAccumulator ({}, sum type: {sum_data_type:?}) --> {return_data_type:?}",
+            std::any::type_name::<T>()
+        );
+
+        Self {
+            return_data_type: return_data_type.clone(),
+            sum_data_type: sum_data_type.clone(),
+            sums: vec![],
+            null_inputs: BooleanBufferBuilder::new(0),
+        }
+    }
+
+    /// Adds the values in `values` to self.sums
+    fn update_sums(
+        &mut self,
+        group_indices: &[usize],
+        values: &PrimitiveArray<T>,
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) {
+        if self.null_inputs.len() < total_num_groups {
+            let new_groups = total_num_groups - self.null_inputs.len();
+            // All groups start as valid (and are set to null if we
+            // see a null in the input)
+            self.null_inputs.append_n(new_groups, true);
+        }
+        self.sums
+            .resize_with(total_num_groups, || T::default_value());
+
+        if values.null_count() == 0 {
+            accumulate_all(
+                group_indices,
+                values,
+                opt_filter,
+                |group_index, new_value| {
+                    // note since add_wrapping doesn't error, we
+                    // simply add values in null sum slots rather than
+                    // checking if they are null first. The theory is
+                    // this is faster
+                    let sum = &mut self.sums[group_index];
+                    *sum = sum.add_wrapping(new_value);
+                },
+            )
+        } else {
+            accumulate_all_nullable(
+                group_indices,
+                values,
+                opt_filter,
+                |group_index, new_value, is_valid| {
+                    if is_valid {
+                        let sum = &mut self.sums[group_index];
+                        *sum = sum.add_wrapping(new_value);
+                    } else {
+                        // input null means this group is now null
+                        self.null_inputs.set_bit(group_index, false);
+                    }
+                },
+            )
+        }
+    }
+
+    /// Returns a NullBuffer representing which group_indices have
+    /// null values (if they saw a null input)
+    /// Resets `self.null_inputs`;
+    fn build_nulls(&mut self) -> Option<NullBuffer> {
+        let nulls = NullBuffer::new(self.null_inputs.finish());
+        if nulls.null_count() > 0 {
+            Some(nulls)
+        } else {
+            None
+        }
+    }
+}
+
+impl<T> GroupsAccumulator for SumGroupsAccumulator<T>
+where
+    T: ArrowNumericType + Send,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        self.update_sums(group_indices, values, opt_filter, total_num_groups);
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "two arguments to merge_batch");
+        // first batch is partial sums
+        let partial_sums: &PrimitiveArray<T> = values.get(1).unwrap().as_primitive::<T>();
+        self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let sums = std::mem::take(&mut self.sums);
+        let nulls = self.build_nulls();
+
+        let array = PrimitiveArray::<T>::new(sums.into(), nulls); // no copy
+
+        Ok(Arc::new(array))
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        let nulls = self.build_nulls();
+
+        let sums = std::mem::take(&mut self.sums);
+        let sums = PrimitiveArray::<T>::new(sums.into(), nulls); // zero copy
+
+        Ok(vec![Arc::new(sums) as ArrayRef])
+    }
+
+    fn size(&self) -> usize {
+        self.sums.capacity() * std::mem::size_of::<usize>()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From 7b2015584c471ddd6d0f35fb1c7224615c203fec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 16:17:16 +0200
Subject: [PATCH 27/89] WIP sum

---
 datafusion/physical-expr/src/aggregate/sum.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 425901f91a90..d17f7ec97630 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -590,7 +590,7 @@ where
     ) -> Result<()> {
         assert_eq!(values.len(), 1, "two arguments to merge_batch");
         // first batch is partial sums
-        let partial_sums: &PrimitiveArray<T> = values.get(1).unwrap().as_primitive::<T>();
+        let partial_sums: &PrimitiveArray<T> = values.get(0).unwrap().as_primitive::<T>();
         self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
 
         Ok(())

From 6275a9faaae5c36df76941d94b2ba9e92b075bae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 17:20:07 +0200
Subject: [PATCH 28/89] Use `Rows` API

---
 Cargo.toml                                    |  9 ++++++++
 .../src/physical_plan/aggregates/row_hash2.rs | 22 ++++++++-----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index b5d0a34e7e4d..3d6f5aed88b6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -70,3 +70,12 @@ opt-level = 3
 overflow-checks = false
 panic = 'unwind'
 rpath = false
+
+# TODO remove after 43 release
+[patch.crates-io]
+arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index c248af8f44f2..0a4f1d2ba881 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -26,7 +26,7 @@ use std::task::{Context, Poll};
 use std::vec;
 
 use ahash::RandomState;
-use arrow::row::{OwnedRow, RowConverter, SortField};
+use arrow::row::{RowConverter, SortField, Rows};
 use datafusion_physical_expr::hash_utils::create_hashes;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
@@ -163,11 +163,7 @@ pub(crate) struct GroupedHashAggregateStream2 {
     ///
     /// The row format is used to compare group keys quickly. This is
     /// especially important for multi-column group keys.
-    ///
-    /// TODO, make this Rows (rather than Vec<OwnedRow> to reduce
-    /// allocations once
-    /// https://github.com/apache/arrow-rs/issues/4466 is available
-    group_values: Vec<OwnedRow>,
+    group_values: Rows,
 
     /// scratch space for the current input Batch being
     /// processed. Reused across batches here to avoid reallocations
@@ -239,7 +235,7 @@ impl GroupedHashAggregateStream2 {
         let name = format!("GroupedHashAggregateStream2[{partition}]");
         let reservation = MemoryConsumer::new(name).register(context.memory_pool());
         let map = RawTable::with_capacity(0);
-        let group_by_values = vec![];
+        let group_by_values = row_converter.empty_rows(0, 0);
         let current_group_indices = vec![];
 
         timer.done();
@@ -381,7 +377,7 @@ impl GroupedHashAggregateStream2 {
                 // TODO update *allocated based on size of the row
                 // that was just pushed into
                 // aggr_state.group_by_values
-                group_rows.row(row) == self.group_values[*group_idx].row()
+                group_rows.row(row) == self.group_values.row(*group_idx)
             });
 
             let group_idx = match entry {
@@ -390,8 +386,8 @@ impl GroupedHashAggregateStream2 {
                 //  1.2 Need to create new entry for the group
                 None => {
                     // Add new entry to aggr_state and save newly created index
-                    let group_idx = self.group_values.len();
-                    self.group_values.push(group_rows.row(row).owned());
+                    let group_idx = self.group_values.num_rows();
+                    self.group_values.push(group_rows.row(row));
 
                     // for hasher function, use precomputed hash value
                     self.map.insert_accounted(
@@ -438,7 +434,7 @@ impl GroupedHashAggregateStream2 {
                 .zip(input_values.iter())
                 .zip(filter_values.iter());
 
-            let total_num_groups = self.group_values.len();
+            let total_num_groups = self.group_values.num_rows();
 
             for ((acc, values), opt_filter) in t {
                 let acc_size_pre = acc.size();
@@ -482,13 +478,13 @@ impl GroupedHashAggregateStream2 {
 impl GroupedHashAggregateStream2 {
     /// Create an output RecordBatch with all group keys and accumulator states/values
     fn create_batch_from_map(&mut self) -> Result<RecordBatch> {
-        if self.group_values.is_empty() {
+        if self.group_values.num_rows() == 0 {
             let schema = self.schema.clone();
             return Ok(RecordBatch::new_empty(schema));
         }
 
         // First output rows are the groups
-        let groups_rows = self.group_values.iter().map(|owned_row| owned_row.row());
+        let groups_rows = self.group_values.iter().map(|owned_row| owned_row);
 
         let mut output: Vec<ArrayRef> = self.row_converter.convert_rows(groups_rows)?;
 

From 8902c910189b7692dadd11dc6c981d74baafc822 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 3 Jul 2023 12:50:10 -0400
Subject: [PATCH 29/89] Update adapter

---
 .../aggregate/groups_accumulator/adapter.rs   | 161 +++++++++++++++++-
 1 file changed, 153 insertions(+), 8 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index c2988a7428bf..70ec83b4d63a 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -18,8 +18,13 @@
 //! Adapter that makes [`GroupsAccumulator`] out of [`Accumulator`]
 
 use super::GroupsAccumulator;
-use arrow_array::{ArrayRef, BooleanArray};
-use datafusion_common::Result;
+use arrow::{
+    array::{AsArray, UInt32Builder},
+    compute,
+    datatypes::UInt32Type,
+};
+use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
+use datafusion_common::{utils::get_arrayref_at_indices, DataFusionError, Result};
 use datafusion_expr::Accumulator;
 
 /// An adpater that implements [`GroupsAccumulator`] for any [`Accumulator`]
@@ -31,7 +36,30 @@ pub struct GroupsAccumulatorAdapter {
     factory: Box<dyn Fn() -> Result<Box<dyn Accumulator>> + Send>,
 
     /// [`Accumulators`] for each group, stored in group_index order
-    accumulators: Vec<Box<dyn Accumulator>>,
+    states: Vec<AccumulatorState>,
+}
+
+struct AccumulatorState {
+    /// [`Accumulators`]
+    accumulator: Box<dyn Accumulator>,
+
+    // scratch space for holding the indexes in the input array that
+    // will be fed to this accumulator. Use u32 to match take kernel
+    // input
+    indices: Vec<u32>,
+}
+
+impl AccumulatorState {
+    fn new(accumulator: Box<dyn Accumulator>) -> Self {
+        Self {
+            accumulator,
+            indices: vec![],
+        }
+    }
+
+    fn size(&self) -> usize {
+        todo!()
+    }
 }
 
 impl GroupsAccumulatorAdapter {
@@ -43,9 +71,21 @@ impl GroupsAccumulatorAdapter {
     {
         Self {
             factory: Box::new(factory),
-            accumulators: vec![],
+            states: vec![],
         }
     }
+
+    /// Ensure that self.accumulators has total_num_groups
+    fn make_accumulators_if_needed(&mut self, total_num_groups: usize) -> Result<()> {
+        // can't shrink
+        assert!(total_num_groups >= self.states.len());
+        let new_accumulators = total_num_groups - self.states.len();
+        for _ in 0..new_accumulators {
+            let accumulator = (self.factory)()?;
+            self.states.push(AccumulatorState::new(accumulator));
+        }
+        Ok(())
+    }
 }
 
 impl GroupsAccumulator for GroupsAccumulatorAdapter {
@@ -56,9 +96,72 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        todo!()
-    }
+        self.make_accumulators_if_needed(total_num_groups)?;
+
+        // This logic:
+        // reorderes the input and filter so that values for group_indexes are contiguous.
+        // Then it invokes Accumulator::update / merge for each of those contiguous ranges
+
+        assert_eq!(values[0].len(), group_indices.len());
+
+        // figure out which input rows correspond to which groups
+        for (idx, group_index) in group_indices.iter().enumerate() {
+            self.states[*group_index].indices.push(idx as u32);
+        }
 
+        // groups_per_rows holds a list of group indexes that have
+        // rows that need to be accumulated
+        let mut groups_with_rows = vec![];
+
+        // batch_indices holds indices in values, each group contiguously
+        let mut batch_indices = UInt32Builder::with_capacity(0);
+
+        // offsets[i] is index into batch_indices where the rows for
+        // group_index i starts
+        let mut offsets = vec![0];
+
+        let mut offset_so_far = 0;
+        for (group_index, state) in self.states.iter_mut().enumerate() {
+            let indices = &state.indices;
+            if indices.is_empty() {
+                continue;
+            }
+
+            groups_with_rows.push(group_index);
+            batch_indices.append_slice(indices);
+            offset_so_far += indices.len();
+            offsets.push(offset_so_far);
+        }
+        let batch_indices = batch_indices.finish();
+
+        // reorder the values and opt_filter by batch_indices so that
+        // all values for each group are contiguous, then invoke the
+        // accumulator once per group with values
+        let values = get_arrayref_at_indices(values, &batch_indices)?;
+        let opt_filter = get_filter_at_indices(opt_filter, &batch_indices)?;
+
+        // invoke each accumulator with the appropriate rows, first
+        // pulling the input arguments for this group into their own
+        // RecordBatch(es)
+        let iter = groups_with_rows.iter().zip(offsets.windows(2));
+
+        // TODO memory accounting
+        let mut allocated = 0;
+        for (group_idx, offsets) in iter {
+            let state = &mut self.states[*group_idx as usize];
+
+            //let size_pre = accumulator.size();
+
+            let values_to_accumulate =
+                slice_and_maybe_filter(&values, opt_filter.as_ref(), &offsets)?;
+            state.accumulator.update_batch(&values_to_accumulate)?;
+            state.indices.clear();
+
+            //let size_post = accumulator.size();
+            //*allocated += size_post.saturating_sub(size_pre);
+        }
+        Ok(())
+    }
     fn evaluate(&mut self) -> Result<ArrayRef> {
         todo!()
     }
@@ -78,9 +181,51 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
     }
 
     fn size(&self) -> usize {
-        self.accumulators.iter().map(|a| a.size()).sum::<usize>()
+        // TODO should calculate size incrementally during update and just return value here
+        self.states.iter().map(|a| a.size()).sum::<usize>()
             //include the size of self and self.accumulators itself
-            + self.accumulators.len() * std::mem::size_of::<Box<dyn Accumulator>>()
+            + self.states.len() * std::mem::size_of::<AccumulatorState>()
             + std::mem::size_of_val(&self.factory)
     }
 }
+
+fn get_filter_at_indices(
+    opt_filter: Option<&BooleanArray>,
+    indices: &PrimitiveArray<UInt32Type>,
+) -> Result<Option<ArrayRef>> {
+    opt_filter
+        .map(|filter| {
+            compute::take(
+                &filter, indices, None, // None: no index check
+            )
+        })
+        .transpose()
+        .map_err(DataFusionError::ArrowError)
+}
+
+// Copied from physical-plan
+pub(crate) fn slice_and_maybe_filter(
+    aggr_array: &[ArrayRef],
+    filter_opt: Option<&ArrayRef>,
+    offsets: &[usize],
+) -> Result<Vec<ArrayRef>> {
+    let (offset, length) = (offsets[0], offsets[1] - offsets[0]);
+    let sliced_arrays: Vec<ArrayRef> = aggr_array
+        .iter()
+        .map(|array| array.slice(offset, length))
+        .collect();
+
+    if let Some(f) = filter_opt {
+        let filter_array = f.slice(offset, length);
+        let filter_array = filter_array.as_boolean();
+
+        sliced_arrays
+            .iter()
+            .map(|array| {
+                compute::filter(array, &filter_array).map_err(DataFusionError::ArrowError)
+            })
+            .collect()
+    } else {
+        Ok(sliced_arrays)
+    }
+}

From 6cab205d58fb3481ecf23020ae8762da7e6dd704 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 3 Jul 2023 13:18:25 -0400
Subject: [PATCH 30/89] Add docs, refactor

---
 .../aggregate/groups_accumulator/adapter.rs   | 82 ++++++++++++++++---
 1 file changed, 72 insertions(+), 10 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 70ec83b4d63a..2dbd8b851a1c 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -58,7 +58,9 @@ impl AccumulatorState {
     }
 
     fn size(&self) -> usize {
-        todo!()
+        self.accumulator.size()
+            + std::mem::size_of_val(self)
+            + std::mem::size_of::<u32>() * self.indices.capacity()
     }
 }
 
@@ -82,26 +84,51 @@ impl GroupsAccumulatorAdapter {
         let new_accumulators = total_num_groups - self.states.len();
         for _ in 0..new_accumulators {
             let accumulator = (self.factory)()?;
+            // todo update allocation
             self.states.push(AccumulatorState::new(accumulator));
         }
         Ok(())
     }
-}
 
-impl GroupsAccumulator for GroupsAccumulatorAdapter {
-    fn update_batch(
+    /// invokes f(accumulator, values) for the correct slices of the
+    /// input values of this array.
+    ///
+    /// This first reorders the input and filter so that values for group_indexes
+    /// are contiguous and then invokes f on the contiguous ranges
+    ///
+    /// ```text
+    /// ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐                       ┌─────────┐   ┌ ─ ─ ─ ─ ┐
+    /// │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐              ┏━━━━━┓    │ ┌─────┐ │     ┌─────┐
+    /// │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │            ┃  0  ┃    │ │ 200 │ │   │ │  t  │ │
+    /// │ ├─────┤ │   │ ├─────┤ │     ├─────┤              ┣━━━━━┫    │ ├─────┤ │     ├─────┤
+    /// │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │            ┃  0  ┃    │ │ 300 │ │   │ │  t  │ │
+    /// │ ├─────┤ │   │ ├─────┤ │     ├─────┤              ┣━━━━━┫    │ ├─────┤ │     ├─────┤
+    /// │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │            ┃  1  ┃    │ │ 200 │ │   │ │NULL │ │
+    /// │ ├─────┤ │   │ ├─────┤ │     ├─────┤   ────────▶  ┣━━━━━┫    │ ├─────┤ │     ├─────┤
+    /// │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │            ┃  2  ┃    │ │ 200 │ │   │ │  t  │ │
+    /// │ ├─────┤ │   │ ├─────┤ │     ├─────┤              ┣━━━━━┫    │ ├─────┤ │     ├─────┤
+    /// │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │            ┃  2  ┃    │ │ 100 │ │   │ │  f  │ │
+    /// │ └─────┘ │   │ └─────┘ │     └─────┘              ┗━━━━━┛    │ └─────┘ │     └─────┘
+    /// └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘                       └─────────┘   └ ─ ─ ─ ─ ┘
+    ///
+    ///   values        opt_filter         logical group  values        opt_filter
+    ///                                                 index
+    /// ```
+    fn invoke_per_accumulator<F>(
         &mut self,
         values: &[ArrayRef],
         group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
-    ) -> Result<()> {
+        mut f: F,
+    ) -> Result<()>
+    where
+        F: Fn(&mut dyn Accumulator, &[ArrayRef]) -> Result<()>,
+    {
         self.make_accumulators_if_needed(total_num_groups)?;
 
-        // This logic:
         // reorderes the input and filter so that values for group_indexes are contiguous.
         // Then it invokes Accumulator::update / merge for each of those contiguous ranges
-
         assert_eq!(values[0].len(), group_indices.len());
 
         // figure out which input rows correspond to which groups
@@ -110,7 +137,8 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         }
 
         // groups_per_rows holds a list of group indexes that have
-        // rows that need to be accumulated
+        // any rows that need to be accumulated, stored in order of group_index
+
         let mut groups_with_rows = vec![];
 
         // batch_indices holds indices in values, each group contiguously
@@ -154,7 +182,10 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
 
             let values_to_accumulate =
                 slice_and_maybe_filter(&values, opt_filter.as_ref(), &offsets)?;
-            state.accumulator.update_batch(&values_to_accumulate)?;
+
+            (f)(state.accumulator.as_mut(), &values_to_accumulate)?;
+
+            // clear out the state
             state.indices.clear();
 
             //let size_post = accumulator.size();
@@ -162,6 +193,28 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         }
         Ok(())
     }
+}
+
+impl GroupsAccumulator for GroupsAccumulatorAdapter {
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        self.invoke_per_accumulator(
+            values,
+            group_indices,
+            opt_filter,
+            total_num_groups,
+            |accumulator, values_to_accumulate| {
+                accumulator.update_batch(values_to_accumulate)
+            },
+        )?;
+        Ok(())
+    }
+
     fn evaluate(&mut self) -> Result<ArrayRef> {
         todo!()
     }
@@ -177,7 +230,16 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        todo!()
+        self.invoke_per_accumulator(
+            values,
+            group_indices,
+            opt_filter,
+            total_num_groups,
+            |accumulator, values_to_accumulate| {
+                accumulator.merge_batch(values_to_accumulate)
+            },
+        )?;
+        Ok(())
     }
 
     fn size(&self) -> usize {

From 52c62ece7a843abd5930637a7eccdbaabecf791b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 20:53:33 +0200
Subject: [PATCH 31/89] Merge

---
 Cargo.toml | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 6855a258284c..45b1bf46183f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -69,13 +69,4 @@ lto = false
 opt-level = 3
 overflow-checks = false
 panic = 'unwind'
-rpath = false
-
-# TODO remove after 43 release
-[patch.crates-io]
-arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
-arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
-arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
-arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
-arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
-parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "d7fa775cf76c7cd54c6d2a86542115599d8f53ee" }
+rpath = false
\ No newline at end of file

From 1684916d8e94c6257400406ce42db927d716314c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 21:17:05 +0200
Subject: [PATCH 32/89] WIP count

---
 .../src/physical_plan/aggregates/row_hash2.rs |  2 +-
 .../physical-expr/src/aggregate/count.rs      | 28 +++---
 .../groups_accumulator/accumulate.rs          | 91 +++++++++++++++++++
 .../aggregate/groups_accumulator/adapter.rs   |  2 +-
 datafusion/physical-expr/src/aggregate/sum.rs | 18 ++--
 5 files changed, 116 insertions(+), 25 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 1afff302e757..0b6862faace7 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -28,7 +28,7 @@ use std::task::{Context, Poll};
 use std::vec;
 
 use ahash::RandomState;
-use arrow::row::{RowConverter, SortField, Rows};
+use arrow::row::{RowConverter, Rows, SortField};
 use datafusion_physical_expr::hash_utils::create_hashes;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 47b7588ec518..e9afcc2e2da8 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -42,7 +42,10 @@ use datafusion_row::accessor::RowAccessor;
 
 use crate::expressions::format_state_name;
 
-use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+use super::groups_accumulator::accumulate::{
+    accumulate_all, accumulate_indices_nullable, accumulate_indices,
+};
+use super::groups_accumulator::accumulate_all_nullable;
 
 /// COUNT aggregate expression
 /// Returns the amount of non-null values of the given expression.
@@ -118,30 +121,23 @@ where
     fn increment_counts(
         &mut self,
         group_indices: &[usize],
-        values: &PrimitiveArray<T>,
+        values: &dyn Array,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
         self.counts.resize(total_num_groups, 0);
 
         if values.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, _new_value| {
-                    self.counts[group_index] += 1;
-                },
-            )
+            accumulate_indices(group_indices, opt_filter, |group_index| {
+                self.counts[group_index] += 1;
+            })
         } else {
-            accumulate_all_nullable(
+            accumulate_indices_nullable(
                 group_indices,
                 values,
                 opt_filter,
-                |group_index, _new_value, is_valid| {
-                    if is_valid {
-                        self.counts[group_index] += 1;
-                    }
+                |group_index| {
+                    self.counts[group_index] += 1;
                 },
             )
         }
@@ -193,7 +189,7 @@ where
         total_num_groups: usize,
     ) -> Result<()> {
         assert_eq!(values.len(), 1, "single argument to update_batch");
-        let values = values.get(0).unwrap().as_primitive::<T>();
+        let values = values.get(0).unwrap();
 
         self.increment_counts(group_indices, values, opt_filter, total_num_groups);
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 8fdd1dad1572..3eb4e6e259a0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -104,6 +104,33 @@ pub fn accumulate_all<T, F>(
     }
 }
 
+pub fn accumulate_indices<F>(
+    group_indices: &[usize],
+    opt_filter: Option<&BooleanArray>,
+    mut index_fn: F,
+) where
+    F: FnMut(usize) + Send,
+{
+    let iter = group_indices.iter();
+    // handle filter values with a specialized loop
+    if let Some(filter) = opt_filter {
+        assert_eq!(filter.len(), group_indices.len());
+        // The performance with a filter could be improved by
+        // iterating over the filter in chunks, rather than a single
+        // iterator. TODO file a ticket
+        let iter = iter.zip(filter.iter());
+        for (&group_index, filter_value) in iter {
+            if let Some(true) = filter_value {
+                index_fn(group_index)
+            }
+        }
+    } else {
+        for &group_index in iter {
+            index_fn(group_index)
+        }
+    }
+}
+
 /// This function is called to update the accumulator state per row,
 /// for a `PrimitiveArray<T>` that can have nulls. See
 /// [`accumulate_all`] for more detail and example
@@ -187,6 +214,70 @@ pub fn accumulate_all_nullable<T, F>(
     }
 }
 
+pub fn accumulate_indices_nullable<F>(
+    group_indices: &[usize],
+    array: &dyn Array,
+    opt_filter: Option<&BooleanArray>,
+    mut index_fn: F,
+) where
+    F: FnMut(usize) + Send,
+{
+    // Given performance is critical, assert if the wrong flavor is called
+    let valids = array
+        .nulls()
+        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
+
+    if let Some(filter) = opt_filter {
+        assert_eq!(filter.len(), group_indices.len());
+        // The performance with a filter could be improved by
+        // iterating over the filter in chunks, rather than using
+        // iterators. TODO file a ticket
+        filter.iter().zip(group_indices.iter()).for_each(
+            |(filter_value, &group_index)| {
+                // did value[i] pass the filter?
+                if let Some(true) = filter_value {
+                    // Is value[i] valid?
+                    index_fn(group_index)
+                }
+            },
+        )
+    } else {
+        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+        // iterate over in chunks of 64 bits for more efficient null checking
+        let group_indices_chunks = group_indices.chunks_exact(64);
+        let bit_chunks = valids.inner().bit_chunks();
+
+        let group_indices_remainder = group_indices_chunks.remainder();
+
+        group_indices_chunks.zip(bit_chunks.iter()).for_each(
+            |(group_index_chunk, mask)| {
+                // index_mask has value 1 << i in the loop
+                let mut index_mask = 1;
+                group_index_chunk.iter().for_each(|&group_index| {
+                    // valid bit was set, real vale
+                    let is_valid = (mask & index_mask) != 0;
+                    if is_valid {
+                        index_fn(group_index);
+                    }
+                    index_mask <<= 1;
+                })
+            },
+        );
+
+        // handle any remaining bits (after the intial 64)
+        let remainder_bits = bit_chunks.remainder_bits();
+        group_indices_remainder
+            .iter()
+            .enumerate()
+            .for_each(|(i, &group_index)| {
+                let is_valid = remainder_bits & (1 << i) != 0;
+                if is_valid {
+                    index_fn(group_index)
+                }
+            });
+    }
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 2dbd8b851a1c..d7ac717bf206 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -120,7 +120,7 @@ impl GroupsAccumulatorAdapter {
         group_indices: &[usize],
         opt_filter: Option<&BooleanArray>,
         total_num_groups: usize,
-        mut f: F,
+        f: F,
     ) -> Result<()>
     where
         F: Fn(&mut dyn Accumulator, &[ArrayRef]) -> Result<()>,
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index d17f7ec97630..5bdf0403aca0 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -32,7 +32,7 @@ use arrow::{
     datatypes::Field,
 };
 use arrow_array::cast::AsArray;
-use arrow_array::types::{UInt64Type, Int64Type, UInt32Type, Int32Type, Decimal128Type};
+use arrow_array::types::{Decimal128Type, Int32Type, Int64Type, UInt32Type, UInt64Type};
 use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
 use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
@@ -152,20 +152,25 @@ impl AggregateExpr for Sum {
         // instantiate specialized accumulator
         match self.data_type {
             DataType::UInt64 => Ok(Box::new(SumGroupsAccumulator::<UInt64Type>::new(
-                &self.data_type, &self.data_type
+                &self.data_type,
+                &self.data_type,
             ))),
             DataType::Int64 => Ok(Box::new(SumGroupsAccumulator::<Int64Type>::new(
-                &self.data_type, &self.data_type
+                &self.data_type,
+                &self.data_type,
             ))),
             DataType::UInt32 => Ok(Box::new(SumGroupsAccumulator::<UInt32Type>::new(
-                &self.data_type, &self.data_type
+                &self.data_type,
+                &self.data_type,
             ))),
             DataType::Int32 => Ok(Box::new(SumGroupsAccumulator::<Int32Type>::new(
-                &self.data_type, &self.data_type
+                &self.data_type,
+                &self.data_type,
             ))),
             DataType::Decimal128(_target_precision, _target_scale) => {
                 Ok(Box::new(SumGroupsAccumulator::<Decimal128Type>::new(
-                    &self.data_type, &self.data_type
+                    &self.data_type,
+                    &self.data_type,
                 )))
             }
             _ => Err(DataFusionError::NotImplemented(format!(
@@ -175,7 +180,6 @@ impl AggregateExpr for Sum {
         }
     }
 
-
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }

From a94c346acfef2438f0a3f906fa9196529a7abf17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 21:26:37 +0200
Subject: [PATCH 33/89] WIP count

---
 datafusion/physical-expr/src/aggregate/count.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index e9afcc2e2da8..5423a6b674eb 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -181,6 +181,7 @@ impl<T> GroupsAccumulator for CountGroupsAccumulator<T>
 where
     T: ArrowNumericType + Send,
 {
+
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
@@ -295,6 +296,10 @@ impl AggregateExpr for Count {
         true
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
     fn create_row_accumulator(
         &self,
         start_index: usize,

From 1ba625a263d84d34d6f76df07c5f3c532b1e8d27 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 21:29:55 +0200
Subject: [PATCH 34/89] WIP count

---
 datafusion/physical-expr/src/aggregate/count.rs | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 5423a6b674eb..ce42167a50a6 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -33,7 +33,7 @@ use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
 use arrow_array::cast::AsArray;
 use arrow_array::types::{Int32Type, Int64Type, UInt32Type, UInt64Type};
-use arrow_array::{ArrowNumericType, PrimitiveArray, UInt64Array};
+use arrow_array::{ArrowNumericType, PrimitiveArray};
 use arrow_buffer::BooleanBuffer;
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
@@ -43,7 +43,7 @@ use datafusion_row::accessor::RowAccessor;
 use crate::expressions::format_state_name;
 
 use super::groups_accumulator::accumulate::{
-    accumulate_all, accumulate_indices_nullable, accumulate_indices,
+    accumulate_all, accumulate_indices, accumulate_indices_nullable,
 };
 use super::groups_accumulator::accumulate_all_nullable;
 
@@ -99,8 +99,8 @@ where
     /// The type of the returned count
     return_data_type: DataType,
 
-    /// Count per group (use u64 to make UInt64Array)
-    counts: Vec<u64>,
+    /// Count per group (use u64 to make Int64Array)
+    counts: Vec<i64>,
     // Bind it to struct
     phantom: PhantomData<T>,
 }
@@ -147,7 +147,7 @@ where
     fn update_counts_with_partial_counts(
         &mut self,
         group_indices: &[usize],
-        partial_counts: &UInt64Array,
+        partial_counts: &Int64Array,
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) {
@@ -181,7 +181,6 @@ impl<T> GroupsAccumulator for CountGroupsAccumulator<T>
 where
     T: ArrowNumericType + Send,
 {
-
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
@@ -206,7 +205,7 @@ where
     ) -> Result<()> {
         assert_eq!(values.len(), 1, "one argument to merge_batch");
         // first batch is counts, second is partial sums
-        let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
+        let partial_counts = values.get(0).unwrap().as_primitive::<Int64Type>();
         self.update_counts_with_partial_counts(
             group_indices,
             partial_counts,
@@ -220,7 +219,7 @@ where
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let counts = std::mem::take(&mut self.counts);
 
-        let array = PrimitiveArray::<UInt64Type>::new(counts.into(), None);
+        let array = PrimitiveArray::<Int64Type>::new(counts.into(), None);
         // TODO remove cast
         let array = cast(&array, &self.return_data_type)?;
 
@@ -230,7 +229,7 @@ where
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
         let counts = std::mem::take(&mut self.counts);
-        let counts = UInt64Array::from(counts); // zero copy
+        let counts: PrimitiveArray<Int64Type> = Int64Array::from(counts); // zero copy
         Ok(vec![Arc::new(counts) as ArrayRef])
     }
 

From c2f955d8b643069892c134de9ac9bf751a095e14 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Mon, 3 Jul 2023 21:45:39 +0200
Subject: [PATCH 35/89] WIP count

---
 .../physical-expr/src/aggregate/count.rs      | 51 +++----------------
 1 file changed, 8 insertions(+), 43 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index ce42167a50a6..c6182ef980f3 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -19,7 +19,6 @@
 
 use std::any::Any;
 use std::fmt::Debug;
-use std::marker::PhantomData;
 use std::ops::BitAnd;
 use std::sync::Arc;
 
@@ -28,12 +27,11 @@ use crate::aggregate::utils::down_cast_any_ref;
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::array::{Array, Int64Array};
 use arrow::compute;
-use arrow::compute::kernels::cast;
 use arrow::datatypes::DataType;
 use arrow::{array::ArrayRef, datatypes::Field};
 use arrow_array::cast::AsArray;
-use arrow_array::types::{Int32Type, Int64Type, UInt32Type, UInt64Type};
-use arrow_array::{ArrowNumericType, PrimitiveArray};
+use arrow_array::types::Int64Type;
+use arrow_array::PrimitiveArray;
 use arrow_buffer::BooleanBuffer;
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
@@ -92,28 +90,17 @@ impl Count {
 /// F: Function that calcuates the average value from a sum of
 /// T::Native and a total count
 #[derive(Debug)]
-struct CountGroupsAccumulator<T>
-where
-    T: ArrowNumericType + Send,
+struct CountGroupsAccumulator
 {
-    /// The type of the returned count
-    return_data_type: DataType,
-
     /// Count per group (use u64 to make Int64Array)
     counts: Vec<i64>,
-    // Bind it to struct
-    phantom: PhantomData<T>,
 }
 
-impl<T> CountGroupsAccumulator<T>
-where
-    T: ArrowNumericType + Send,
+impl CountGroupsAccumulator
 {
-    pub fn new(return_data_type: &DataType) -> Self {
+    pub fn new() -> Self {
         Self {
-            return_data_type: return_data_type.clone(),
             counts: vec![],
-            phantom: PhantomData {},
         }
     }
 
@@ -177,9 +164,7 @@ where
     }
 }
 
-impl<T> GroupsAccumulator for CountGroupsAccumulator<T>
-where
-    T: ArrowNumericType + Send,
+impl GroupsAccumulator for CountGroupsAccumulator
 {
     fn update_batch(
         &mut self,
@@ -220,10 +205,8 @@ where
         let counts = std::mem::take(&mut self.counts);
 
         let array = PrimitiveArray::<Int64Type>::new(counts.into(), None);
-        // TODO remove cast
-        let array = cast(&array, &self.return_data_type)?;
 
-        Ok(array)
+        Ok(Arc::new(array))
     }
 
     // return arrays for sums and counts
@@ -316,25 +299,7 @@ impl AggregateExpr for Count {
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         // instantiate specialized accumulator
-        match &self.data_type {
-            DataType::UInt64 => Ok(Box::new(CountGroupsAccumulator::<UInt64Type>::new(
-                &self.data_type,
-            ))),
-            DataType::Int64 => Ok(Box::new(CountGroupsAccumulator::<Int64Type>::new(
-                &self.data_type,
-            ))),
-            DataType::UInt32 => Ok(Box::new(CountGroupsAccumulator::<UInt32Type>::new(
-                &self.data_type,
-            ))),
-            DataType::Int32 => Ok(Box::new(CountGroupsAccumulator::<Int32Type>::new(
-                &self.data_type,
-            ))),
-
-            _ => Err(DataFusionError::NotImplemented(format!(
-                "CountGroupsAccumulator not supported for {}",
-                self.data_type
-            ))),
-        }
+        Ok(Box::new(CountGroupsAccumulator::new()))
     }
 }
 

From 9ff91cb5ebbbae5f98ac2c445f6a1c5a47b73d7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Tue, 4 Jul 2023 08:34:11 +0200
Subject: [PATCH 36/89] Support sum

---
 .../physical-expr/src/aggregate/average.rs    | 20 +-----------
 .../physical-expr/src/aggregate/count.rs      | 13 +++-----
 datafusion/physical-expr/src/aggregate/sum.rs | 32 ++++++++++++++-----
 .../physical-expr/src/aggregate/utils.rs      | 23 +++++++++++++
 4 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 54e964fcef6b..d0df0f25d046 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -47,7 +47,7 @@ use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
 
 use super::groups_accumulator::{accumulate_all, accumulate_all_nullable};
-use super::utils::Decimal128Averager;
+use super::utils::{adjust_output_array, Decimal128Averager};
 
 /// AVG aggregate expression
 #[derive(Debug, Clone)]
@@ -702,24 +702,6 @@ where
     }
 }
 
-/// Adjust array type metadata if needed
-///
-/// Decimal128Arrays are are are created from Vec<NativeType> with default
-/// precision and scale. This function adjusts them down.
-fn adjust_output_array(sum_data_type: &DataType, array: ArrayRef) -> Result<ArrayRef> {
-    let array = match sum_data_type {
-        DataType::Decimal128(p, s) => Arc::new(
-            array
-                .as_primitive::<Decimal128Type>()
-                .clone()
-                .with_precision_and_scale(*p, *s)?,
-        ),
-        // no adjustment needed for other arrays
-        _ => array,
-    };
-    Ok(array)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index c6182ef980f3..ff156a3476f0 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -90,18 +90,14 @@ impl Count {
 /// F: Function that calcuates the average value from a sum of
 /// T::Native and a total count
 #[derive(Debug)]
-struct CountGroupsAccumulator
-{
+struct CountGroupsAccumulator {
     /// Count per group (use u64 to make Int64Array)
     counts: Vec<i64>,
 }
 
-impl CountGroupsAccumulator
-{
+impl CountGroupsAccumulator {
     pub fn new() -> Self {
-        Self {
-            counts: vec![],
-        }
+        Self { counts: vec![] }
     }
 
     /// Adds one to each group's counter
@@ -164,8 +160,7 @@ impl CountGroupsAccumulator
     }
 }
 
-impl GroupsAccumulator for CountGroupsAccumulator
-{
+impl GroupsAccumulator for CountGroupsAccumulator {
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 5bdf0403aca0..4f12e01061dc 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -23,6 +23,7 @@ use std::sync::Arc;
 
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
+use arrow::compute::kernels::cast;
 use arrow::datatypes::DataType;
 use arrow::{
     array::{
@@ -46,10 +47,10 @@ use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
 use arrow::array::Array;
 use arrow::array::Decimal128Array;
-use arrow::compute::cast;
 use datafusion_row::accessor::RowAccessor;
 
 use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+use super::utils::adjust_output_array;
 
 /// SUM aggregate expression
 #[derive(Debug, Clone)]
@@ -138,6 +139,10 @@ impl AggregateExpr for Sum {
         is_row_accumulator_support_dtype(&self.data_type)
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
     fn create_row_accumulator(
         &self,
         start_index: usize,
@@ -473,7 +478,7 @@ struct SumGroupsAccumulator<T>
 where
     T: ArrowNumericType + Send,
 {
-    /// The type of the internal sum
+    /// The type of the computed sum
     sum_data_type: DataType,
 
     /// The type of the returned sum
@@ -497,7 +502,7 @@ where
         );
 
         Self {
-            return_data_type: return_data_type.clone(),
+            return_data_type: sum_data_type.clone(),
             sum_data_type: sum_data_type.clone(),
             sums: vec![],
             null_inputs: BooleanBufferBuilder::new(0),
@@ -592,7 +597,7 @@ where
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        assert_eq!(values.len(), 1, "two arguments to merge_batch");
+        assert_eq!(values.len(), 2, "two arguments to merge_batch");
         // first batch is partial sums
         let partial_sums: &PrimitiveArray<T> = values.get(0).unwrap().as_primitive::<T>();
         self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
@@ -604,9 +609,10 @@ where
         let sums = std::mem::take(&mut self.sums);
         let nulls = self.build_nulls();
 
-        let array = PrimitiveArray::<T>::new(sums.into(), nulls); // no copy
+        let sums = PrimitiveArray::<T>::new(sums.into(), nulls); // no copy
+        let sums = adjust_output_array(&self.return_data_type, Arc::new(sums))?;
 
-        Ok(Arc::new(array))
+        Ok(Arc::new(sums))
     }
 
     // return arrays for sums and counts
@@ -614,9 +620,19 @@ where
         let nulls = self.build_nulls();
 
         let sums = std::mem::take(&mut self.sums);
-        let sums = PrimitiveArray::<T>::new(sums.into(), nulls); // zero copy
 
-        Ok(vec![Arc::new(sums) as ArrayRef])
+        let sums = Arc::new(PrimitiveArray::<T>::new(sums.into(), nulls.clone())); // zero copy
+
+        let sums = adjust_output_array(&self.sum_data_type, sums)?;
+
+        let counts = vec![0 as u64; sums.len()];
+        let counts = Arc::new(PrimitiveArray::<UInt64Type>::new(
+            counts.into(),
+            nulls.clone(),
+        ));
+
+        // TODO: Sum expects sum/count array, but count is not needed
+        Ok(vec![sums.clone() as ArrayRef, counts as ArrayRef])
     }
 
     fn size(&self) -> usize {
diff --git a/datafusion/physical-expr/src/aggregate/utils.rs b/datafusion/physical-expr/src/aggregate/utils.rs
index dbbe0c3f92c0..67ddfa247afc 100644
--- a/datafusion/physical-expr/src/aggregate/utils.rs
+++ b/datafusion/physical-expr/src/aggregate/utils.rs
@@ -20,6 +20,8 @@
 use crate::{AggregateExpr, PhysicalSortExpr};
 use arrow::array::ArrayRef;
 use arrow::datatypes::{MAX_DECIMAL_FOR_EACH_PRECISION, MIN_DECIMAL_FOR_EACH_PRECISION};
+use arrow_array::cast::AsArray;
+use arrow_array::types::Decimal128Type;
 use arrow_schema::{DataType, Field};
 use datafusion_common::{DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
@@ -145,6 +147,27 @@ pub fn calculate_result_decimal_for_avg(
     }
 }
 
+/// Adjust array type metadata if needed
+///
+/// Decimal128Arrays are are are created from Vec<NativeType> with default
+/// precision and scale. This function adjusts them down.
+pub fn adjust_output_array(
+    sum_data_type: &DataType,
+    array: ArrayRef,
+) -> Result<ArrayRef, DataFusionError> {
+    let array = match sum_data_type {
+        DataType::Decimal128(p, s) => Arc::new(
+            array
+                .as_primitive::<Decimal128Type>()
+                .clone()
+                .with_precision_and_scale(*p, *s)?,
+        ),
+        // no adjustment needed for other arrays
+        _ => array,
+    };
+    Ok(array)
+}
+
 /// Downcast a `Box<dyn AggregateExpr>` or `Arc<dyn AggregateExpr>`
 /// and return the inner trait object as [`Any`](std::any::Any) so
 /// that it can be downcast to a specific implementation.

From 180903b6e9592685e34bcf088d80ba5acf027c53 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 4 Jul 2023 10:15:56 -0400
Subject: [PATCH 37/89] Complete adapter

---
 .../aggregate/groups_accumulator/adapter.rs   | 56 ++++++++++++++++---
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index d7ac717bf206..9b1d9bf7adb1 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -24,7 +24,9 @@ use arrow::{
     datatypes::UInt32Type,
 };
 use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
-use datafusion_common::{utils::get_arrayref_at_indices, DataFusionError, Result};
+use datafusion_common::{
+    utils::get_arrayref_at_indices, DataFusionError, Result, ScalarValue,
+};
 use datafusion_expr::Accumulator;
 
 /// An adpater that implements [`GroupsAccumulator`] for any [`Accumulator`]
@@ -90,11 +92,12 @@ impl GroupsAccumulatorAdapter {
         Ok(())
     }
 
-    /// invokes f(accumulator, values) for the correct slices of the
-    /// input values of this array.
+    /// invokes f(accumulator, values) for each group that has values
+    /// in group_indices.
     ///
-    /// This first reorders the input and filter so that values for group_indexes
-    /// are contiguous and then invokes f on the contiguous ranges
+    /// This function first reorders the input and filter so that
+    /// values for each group_index are contiguous and then invokes f
+    /// on the contiguous ranges, to minimize per-row overhead
     ///
     /// ```text
     /// ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐                       ┌─────────┐   ┌ ─ ─ ─ ─ ┐
@@ -216,11 +219,50 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
-        todo!()
+        let states = std::mem::take(&mut self.states);
+
+        // todo update memory usage
+
+        let results: Vec<ScalarValue> = states
+            .into_iter()
+            .map(|state| state.accumulator.evaluate())
+            .collect::<Result<_>>()?;
+
+        ScalarValue::iter_to_array(results)
     }
 
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
-        todo!()
+        let states = std::mem::take(&mut self.states);
+
+        // todo update memory usage
+
+        // each accumulator produces a potential vector of values
+        // which we need to form into columns
+        let mut results: Vec<Vec<ScalarValue>> = vec![];
+
+        for state in states {
+            let accumulator_state = state.accumulator.state()?;
+            results.resize_with(accumulator_state.len(), || vec![]);
+            for (idx, state_val) in accumulator_state.into_iter().enumerate() {
+                results[idx].push(state_val);
+            }
+        }
+
+        // create an array for each intermediate column
+        let arrays = results
+            .into_iter()
+            .map(|state| ScalarValue::iter_to_array(state))
+            .collect::<Result<Vec<_>>>()?;
+
+        // double check each array has the same length (aka the
+        // accumulator was written correctly
+        if let Some(first_col) = arrays.get(0) {
+            for arr in &arrays {
+                assert_eq!(arr.len(), first_col.len())
+            }
+        }
+
+        Ok(arrays)
     }
 
     fn merge_batch(

From 5d8bb352bc668b5dcd2df21081dc25e9c8bbf3c7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 4 Jul 2023 10:35:53 -0400
Subject: [PATCH 38/89] Instantiate all types

---
 .../physical-expr/src/aggregate/average.rs    | 55 +++++++++++++++++--
 1 file changed, 49 insertions(+), 6 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index d0df0f25d046..e2f18860bf00 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -35,12 +35,17 @@ use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
-use arrow::datatypes::{DataType, Decimal128Type, UInt64Type};
+use arrow::datatypes::{
+    BooleanType, DataType, Decimal128Type, Float32Type, Float64Type, Int16Type,
+    Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+};
 use arrow::{
     array::{ArrayRef, UInt64Array},
     datatypes::Field,
 };
-use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
+use arrow_array::{
+    Array, ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, PrimitiveArray,
+};
 use datafusion_common::{downcast_value, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
@@ -96,6 +101,17 @@ impl Avg {
     }
 }
 
+// Instantiates a [`AvgGroupsAccumulator`] for a given [`ArrowNativeType`]
+macro_rules! instantiate_accumulator {
+    ($SELF:expr, $NUMERICTYPE:ident) => {{
+        Ok(Box::new(AvgGroupsAccumulator::<$NUMERICTYPE, _>::new(
+            &$SELF.sum_data_type,
+            &$SELF.rt_data_type,
+            |sum, count| Ok(sum / count as <$NUMERICTYPE as ArrowPrimitiveType>::Native),
+        )))
+    }};
+}
+
 impl AggregateExpr for Avg {
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
@@ -164,15 +180,41 @@ impl AggregateExpr for Avg {
     }
 
     fn groups_accumulator_supported(&self) -> bool {
-        true
+        use DataType::*;
+
+        match &self.sum_data_type {
+            Int8
+            | Int16
+            | Int32
+            | Int64
+            | UInt8
+            | UInt16
+            | UInt32
+            | UInt64
+            | Float32
+            | Float64
+            | Decimal128(_, _) => true,
+            _ => false,
+        }
     }
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
-        // instantiate specialized accumulator
+        use DataType::*;
+        // instantiate specialized accumulator based for the type
         match (&self.sum_data_type, &self.rt_data_type) {
+            (Int8, Int8) => instantiate_accumulator!(self, Int8Type),
+            (Int16, Int16) => instantiate_accumulator!(self, Int16Type),
+            (Int32, Int32) => instantiate_accumulator!(self, Int32Type),
+            (Int64, Int64) => instantiate_accumulator!(self, Int64Type),
+            (UInt8, UInt8) => instantiate_accumulator!(self, UInt8Type),
+            (UInt16, UInt16) => instantiate_accumulator!(self, UInt16Type),
+            (UInt32, UInt32) => instantiate_accumulator!(self, UInt32Type),
+            (UInt64, UInt64) => instantiate_accumulator!(self, UInt64Type),
+            (Float32, Float32) => instantiate_accumulator!(self, Float32Type),
+            (Float64, Float64) => instantiate_accumulator!(self, Float64Type),
             (
-                DataType::Decimal128(_sum_precision, sum_scale),
-                DataType::Decimal128(target_precision, target_scale),
+                Decimal128(_sum_precision, sum_scale),
+                Decimal128(target_precision, target_scale),
             ) => {
                 let decimal_averager = Decimal128Averager::try_new(
                     *sum_scale,
@@ -189,6 +231,7 @@ impl AggregateExpr for Avg {
                     avg_fn,
                 )))
             }
+
             _ => Err(DataFusionError::NotImplemented(format!(
                 "AvgGroupsAccumulator for ({} --> {})",
                 self.sum_data_type, self.rt_data_type,

From 51b0243c2ecee3e05da710c78e05c3ebd656b86e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 4 Jul 2023 10:51:08 -0400
Subject: [PATCH 39/89] Implement memory accounting

---
 .../physical-expr/src/aggregate/average.rs    |  5 ++-
 .../aggregate/groups_accumulator/adapter.rs   | 41 ++++++++++++-------
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index e2f18860bf00..f11aad4bf64c 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -36,8 +36,8 @@ use crate::expressions::format_state_name;
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
 use arrow::datatypes::{
-    BooleanType, DataType, Decimal128Type, Float32Type, Float64Type, Int16Type,
-    Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
+    DataType, Decimal128Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
+    Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow::{
     array::{ArrayRef, UInt64Array},
@@ -107,6 +107,7 @@ macro_rules! instantiate_accumulator {
         Ok(Box::new(AvgGroupsAccumulator::<$NUMERICTYPE, _>::new(
             &$SELF.sum_data_type,
             &$SELF.rt_data_type,
+            // TODO handle overflow (e.g. count as u8 can overflow for 400)
             |sum, count| Ok(sum / count as <$NUMERICTYPE as ArrowPrimitiveType>::Native),
         )))
     }};
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 9b1d9bf7adb1..361cea6c1f1a 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -39,6 +39,12 @@ pub struct GroupsAccumulatorAdapter {
 
     /// [`Accumulators`] for each group, stored in group_index order
     states: Vec<AccumulatorState>,
+
+    /// Current memory usage, in bytes.
+    ///
+    /// Note this is incrementally updated to avoid size() being a
+    /// bottleneck, which we saw in earlier implementations.
+    allocation_bytes: usize,
 }
 
 struct AccumulatorState {
@@ -59,6 +65,7 @@ impl AccumulatorState {
         }
     }
 
+    /// Returns the amount of memory taken by this structre and its accumulator
     fn size(&self) -> usize {
         self.accumulator.size()
             + std::mem::size_of_val(self)
@@ -76,6 +83,7 @@ impl GroupsAccumulatorAdapter {
         Self {
             factory: Box::new(factory),
             states: vec![],
+            allocation_bytes: std::mem::size_of::<GroupsAccumulatorAdapter>(),
         }
     }
 
@@ -83,12 +91,21 @@ impl GroupsAccumulatorAdapter {
     fn make_accumulators_if_needed(&mut self, total_num_groups: usize) -> Result<()> {
         // can't shrink
         assert!(total_num_groups >= self.states.len());
+        let vec_size_pre =
+            std::mem::size_of::<AccumulatorState>() * self.states.capacity();
+
+        // instanatiate new accumulators
         let new_accumulators = total_num_groups - self.states.len();
         for _ in 0..new_accumulators {
             let accumulator = (self.factory)()?;
-            // todo update allocation
-            self.states.push(AccumulatorState::new(accumulator));
+            let state = AccumulatorState::new(accumulator);
+            self.allocation_bytes += state.size();
+            self.states.push(state);
         }
+        let vec_size_post =
+            std::mem::size_of::<AccumulatorState>() * self.states.capacity();
+
+        self.allocation_bytes += vec_size_post.saturating_sub(vec_size_pre);
         Ok(())
     }
 
@@ -134,7 +151,9 @@ impl GroupsAccumulatorAdapter {
         // Then it invokes Accumulator::update / merge for each of those contiguous ranges
         assert_eq!(values[0].len(), group_indices.len());
 
-        // figure out which input rows correspond to which groups
+        // figure out which input rows correspond to which groups Note
+        // that self.state.indices empty for all groups always (it is
+        // cleared out below)
         for (idx, group_index) in group_indices.iter().enumerate() {
             self.states[*group_index].indices.push(idx as u32);
         }
@@ -176,23 +195,19 @@ impl GroupsAccumulatorAdapter {
         // RecordBatch(es)
         let iter = groups_with_rows.iter().zip(offsets.windows(2));
 
-        // TODO memory accounting
-        let mut allocated = 0;
         for (group_idx, offsets) in iter {
             let state = &mut self.states[*group_idx as usize];
-
-            //let size_pre = accumulator.size();
+            let size_pre = state.size();
 
             let values_to_accumulate =
                 slice_and_maybe_filter(&values, opt_filter.as_ref(), &offsets)?;
-
             (f)(state.accumulator.as_mut(), &values_to_accumulate)?;
 
             // clear out the state
             state.indices.clear();
 
-            //let size_post = accumulator.size();
-            //*allocated += size_post.saturating_sub(size_pre);
+            let size_post = state.size();
+            self.allocation_bytes += size_post.saturating_sub(size_pre);
         }
         Ok(())
     }
@@ -285,11 +300,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
     }
 
     fn size(&self) -> usize {
-        // TODO should calculate size incrementally during update and just return value here
-        self.states.iter().map(|a| a.size()).sum::<usize>()
-            //include the size of self and self.accumulators itself
-            + self.states.len() * std::mem::size_of::<AccumulatorState>()
-            + std::mem::size_of_val(&self.factory)
+        self.allocation_bytes
     }
 }
 

From 68f62d1b7284768c318de6bec56b484a7b91e8f5 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 4 Jul 2023 10:55:22 -0400
Subject: [PATCH 40/89] cleanup memory accounting

---
 .../aggregate/groups_accumulator/adapter.rs   | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 361cea6c1f1a..5961256be924 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -80,11 +80,19 @@ impl GroupsAccumulatorAdapter {
     where
         F: Fn() -> Result<Box<dyn Accumulator>> + Send + 'static,
     {
-        Self {
+        let mut new_self = Self {
             factory: Box::new(factory),
             states: vec![],
-            allocation_bytes: std::mem::size_of::<GroupsAccumulatorAdapter>(),
-        }
+            allocation_bytes: 0,
+        };
+        new_self.reset_allocation();
+        new_self
+    }
+
+    // Reset the allocation bytes to empty state
+    fn reset_allocation(&mut self) {
+        assert!(self.states.is_empty());
+        self.allocation_bytes = std::mem::size_of::<GroupsAccumulatorAdapter>();
     }
 
     /// Ensure that self.accumulators has total_num_groups
@@ -243,7 +251,9 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
             .map(|state| state.accumulator.evaluate())
             .collect::<Result<_>>()?;
 
-        ScalarValue::iter_to_array(results)
+        let result = ScalarValue::iter_to_array(results);
+        self.reset_allocation();
+        result
     }
 
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
@@ -277,6 +287,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
             }
         }
 
+        self.reset_allocation();
         Ok(arrays)
     }
 

From ad6d4f31e7bc2c1fb936c0b5da308bf6d5b9979a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Tue, 4 Jul 2023 14:13:35 -0400
Subject: [PATCH 41/89] Fix sum accumulator with filtering, consolidate null
 handling

---
 .../physical-expr/src/aggregate/average.rs    | 190 +++--------
 .../physical-expr/src/aggregate/count.rs      |   4 +-
 .../groups_accumulator/accumulate.rs          | 296 +++++++++++++++++-
 .../src/aggregate/groups_accumulator/mod.rs   |   1 -
 datafusion/physical-expr/src/aggregate/sum.rs | 107 +++----
 5 files changed, 380 insertions(+), 218 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index f11aad4bf64c..bf553f4c6507 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -18,13 +18,13 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use arrow::array::{AsArray, PrimitiveBuilder};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use log::debug;
 
 use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
+use crate::aggregate::groups_accumulator::accumulate::NullState;
 use crate::aggregate::row_accumulator::{
     is_row_accumulator_support_dtype, RowAccumulator,
 };
@@ -51,7 +51,6 @@ use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
 
-use super::groups_accumulator::{accumulate_all, accumulate_all_nullable};
 use super::utils::{adjust_output_array, Decimal128Averager};
 
 /// AVG aggregate expression
@@ -490,10 +489,10 @@ where
     /// Sums per group, stored as the native type
     sums: Vec<T::Native>,
 
-    /// If we have seen a null input value for this group_index
-    null_inputs: BooleanBufferBuilder,
+    /// Track nulls in the input / filters
+    null_state: NullState,
 
-    /// Function that computes the average (value / count)
+    /// Function that computes the final average (value / count)
     avg_fn: F,
 }
 
@@ -513,137 +512,10 @@ where
             sum_data_type: sum_data_type.clone(),
             counts: vec![],
             sums: vec![],
-            null_inputs: BooleanBufferBuilder::new(0),
+            null_state: NullState::new(),
             avg_fn,
         }
     }
-
-    /// Adds one to each group's counter
-    fn increment_counts(
-        &mut self,
-        group_indices: &[usize],
-        values: &PrimitiveArray<T>,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        self.counts.resize(total_num_groups, 0);
-
-        if values.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, _new_value| {
-                    self.counts[group_index] += 1;
-                },
-            )
-        } else {
-            accumulate_all_nullable(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, _new_value, is_valid| {
-                    if is_valid {
-                        self.counts[group_index] += 1;
-                    }
-                },
-            )
-        }
-    }
-
-    /// Adds the counts with the partial counts
-    fn update_counts_with_partial_counts(
-        &mut self,
-        group_indices: &[usize],
-        partial_counts: &UInt64Array,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        self.counts.resize(total_num_groups, 0);
-
-        if partial_counts.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                partial_counts,
-                opt_filter,
-                |group_index, partial_count| {
-                    self.counts[group_index] += partial_count;
-                },
-            )
-        } else {
-            accumulate_all_nullable(
-                group_indices,
-                partial_counts,
-                opt_filter,
-                |group_index, partial_count, is_valid| {
-                    if is_valid {
-                        self.counts[group_index] += partial_count;
-                    }
-                },
-            )
-        }
-    }
-
-    /// Adds the values in `values` to self.sums
-    fn update_sums(
-        &mut self,
-        group_indices: &[usize],
-        values: &PrimitiveArray<T>,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        if self.null_inputs.len() < total_num_groups {
-            let new_groups = total_num_groups - self.null_inputs.len();
-            // All groups start as valid (and are set to null if we
-            // see a null in the input)
-            self.null_inputs.append_n(new_groups, true);
-        }
-        self.sums
-            .resize_with(total_num_groups, || T::default_value());
-
-        if values.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, new_value| {
-                    // note since add_wrapping doesn't error, we
-                    // simply add values in null sum slots rather than
-                    // checking if they are null first. The theory is
-                    // this is faster
-                    let sum = &mut self.sums[group_index];
-                    *sum = sum.add_wrapping(new_value);
-                },
-            )
-        } else {
-            accumulate_all_nullable(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, new_value, is_valid| {
-                    if is_valid {
-                        let sum = &mut self.sums[group_index];
-                        *sum = sum.add_wrapping(new_value);
-                    } else {
-                        // input null means this group is now null
-                        self.null_inputs.set_bit(group_index, false);
-                    }
-                },
-            )
-        }
-    }
-
-    /// Returns a NullBuffer representing which group_indices have
-    /// null values (if they saw a null input)
-    /// Resets `self.null_inputs`;
-    fn build_nulls(&mut self) -> Option<NullBuffer> {
-        let nulls = NullBuffer::new(self.null_inputs.finish());
-        if nulls.null_count() > 0 {
-            Some(nulls)
-        } else {
-            None
-        }
-    }
 }
 
 impl<T, F> GroupsAccumulator for AvgGroupsAccumulator<T, F>
@@ -661,8 +533,30 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        self.increment_counts(group_indices, values, opt_filter, total_num_groups);
-        self.update_sums(group_indices, values, opt_filter, total_num_groups);
+        // increment counts
+        self.counts.resize(total_num_groups, 0);
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, _new_value| {
+                self.counts[group_index] += 1;
+            },
+        );
+
+        // update sums
+        self.sums.resize(total_num_groups, T::default_value());
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let sum = &mut self.sums[group_index];
+                *sum = sum.add_wrapping(new_value);
+            },
+        );
 
         Ok(())
     }
@@ -678,13 +572,31 @@ where
         // first batch is counts, second is partial sums
         let partial_counts = values.get(0).unwrap().as_primitive::<UInt64Type>();
         let partial_sums = values.get(1).unwrap().as_primitive::<T>();
-        self.update_counts_with_partial_counts(
+        // update counts with partial counts
+        self.counts.resize(total_num_groups, 0);
+        self.null_state.accumulate(
             group_indices,
             partial_counts,
             opt_filter,
             total_num_groups,
+            |group_index, partial_count| {
+                self.counts[group_index] += partial_count;
+            },
+        );
+
+        // update sums
+        self.sums
+            .resize_with(total_num_groups, || T::default_value());
+        self.null_state.accumulate(
+            group_indices,
+            partial_sums,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let sum = &mut self.sums[group_index];
+                *sum = sum.add_wrapping(new_value);
+            },
         );
-        self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
 
         Ok(())
     }
@@ -692,7 +604,7 @@ where
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let counts = std::mem::take(&mut self.counts);
         let sums = std::mem::take(&mut self.sums);
-        let nulls = self.build_nulls();
+        let nulls = self.null_state.build();
 
         assert_eq!(counts.len(), sums.len());
 
@@ -727,7 +639,7 @@ where
 
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
-        let nulls = self.build_nulls();
+        let nulls = self.null_state.build();
         let counts = std::mem::take(&mut self.counts);
         let counts = UInt64Array::from(counts); // zero copy
 
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index ff156a3476f0..cc96dc90c359 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -41,9 +41,9 @@ use datafusion_row::accessor::RowAccessor;
 use crate::expressions::format_state_name;
 
 use super::groups_accumulator::accumulate::{
-    accumulate_all, accumulate_indices, accumulate_indices_nullable,
+    accumulate_all, accumulate_all_nullable, accumulate_indices,
+    accumulate_indices_nullable,
 };
-use super::groups_accumulator::accumulate_all_nullable;
 
 /// COUNT aggregate expression
 /// Returns the amount of non-null values of the given expression.
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 3eb4e6e259a0..aca61eb780fb 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -15,9 +15,274 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Vectorized [`accumulate`] and [`accumulate_nullable`] functions
+//! Vectorized [`accumulate`] and [`accumulate_nullable`] functions.
+//!
+//! These functions are designed to be the performance critical inner
+//! loops of accumlators and thus there are multiple versions, to be
+//! invoked depending on the input.
+//!
+//! There are typically 4 potential combinations of input values that
+//! accumulators need to special case for performance,
+//!
+//! With / Without filter
+//! With / Without nulls
+//!
+//! If there are filters present, the accumulator typically needs to
+//! to track if it has seen *any* value for that group (as some values
+//! may be filtered out). Without a filter, the accumulator is only
+//! invoked for groups that actually had a value to accumulate so they
+//! do not need to track if they have seen values for a particular
+//! group.
+//!
+//! If the input has nulls, then the accumulator must also potentially
+//! handle each input null value specially (e.g. for `SUM` to mark the
+//! corresponding sum as null)
 
 use arrow_array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
+
+/// This structure is used to update the accumulator state per row for
+/// a `PrimitiveArray<T>`, and track if values or nulls have been seen
+/// for each group. Since it is the inner loop for many
+/// GroupsAccumulators, the  performance is critical.
+///
+#[derive(Debug)]
+pub struct NullState {
+    /// If we have seen a null input value for `group_index`
+    null_inputs: Option<BooleanBufferBuilder>,
+
+    /// If there has been a filter value, has it seen any non-filtered
+    /// input values for `group_index`?
+    seen_values: Option<BooleanBufferBuilder>,
+}
+
+impl NullState {
+    pub fn new() -> Self {
+        Self {
+            null_inputs: None,
+            seen_values: None,
+        }
+    }
+
+    /// Invokes `value_fn(group_index, value)` for each non null, non
+    /// filtered value, while tracking which groups have seen null
+    /// inputs and which groups have seen any inputs
+    //
+    /// # Arguments:
+    ///
+    /// * `values`: the input arguments to the accumulator
+    /// * `group_indices`:  To which groups do the rows in `values` belong, (aka group_index)
+    /// * `opt_filter`: if present, only rows for which is Some(true) are included
+    /// * `value_fn`: function invoked for  (group_index, value) where value is non null
+    ///
+    /// `F`: Invoked for each input row like `value_fn(group_index,
+    /// value)` for each non null, non filtered value.
+    ///
+    /// # Example
+    ///
+    /// ```text
+    ///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
+    ///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
+    ///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
+    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+    ///  │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │
+    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+    ///  │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │
+    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+    ///  │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │
+    ///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
+    ///  │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │
+    ///  │ └─────┘ │   │ └─────┘ │     └─────┘
+    ///  └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘
+    ///
+    /// group_indices   values        opt_filter
+    /// ```
+    ///
+    /// In the example above, `value_fn` is invoked for each (group_index,
+    /// value) pair where `opt_filter[i]` is true
+    ///
+    /// ```text
+    /// value_fn(2, 200)
+    /// value_fn(0, 200)
+    /// value_fn(0, 300)
+    /// ```
+    ///
+    /// It also sets
+    ///
+    /// 1. `self.seen_values[group_index]` to true for all rows that had a value if there is a filter
+    ///
+    /// 2. `self.null_inputs[group_index]` to true for all rows that had a null in input
+    pub fn accumulate<T, F>(
+        &mut self,
+        group_indices: &[usize],
+        values: &PrimitiveArray<T>,
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+        mut value_fn: F,
+    ) where
+        T: ArrowNumericType + Send,
+        F: FnMut(usize, T::Native) + Send,
+    {
+        let data: &[T::Native] = values.values();
+        assert_eq!(data.len(), group_indices.len());
+
+        match (values.nulls(), opt_filter) {
+            (Some(nulls), None) if nulls.null_count() > 0 => {
+                // All groups start as valid (true), and are set to
+                // null if we see a null in the input)
+                let null_inputs =
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+
+                // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+                // iterate over in chunks of 64 bits for more efficient null checking
+                let data: &[T::Native] = values.values();
+                assert_eq!(data.len(), group_indices.len());
+                let group_indices_chunks = group_indices.chunks_exact(64);
+                let data_chunks = data.chunks_exact(64);
+                let bit_chunks = nulls.inner().bit_chunks();
+
+                let group_indices_remainder = group_indices_chunks.remainder();
+                let data_remainder = data_chunks.remainder();
+
+                group_indices_chunks
+                    .zip(data_chunks)
+                    .zip(bit_chunks.iter())
+                    .for_each(|((group_index_chunk, data_chunk), mask)| {
+                        // index_mask has value 1 << i in the loop
+                        let mut index_mask = 1;
+                        group_index_chunk.iter().zip(data_chunk.iter()).for_each(
+                            |(&group_index, &new_value)| {
+                                // valid bit was set, real vale
+                                let is_valid = (mask & index_mask) != 0;
+                                value_fn(group_index, new_value);
+                                if !is_valid {
+                                    // input null means this group is now null
+                                    null_inputs.set_bit(group_index, false);
+                                }
+                                index_mask <<= 1;
+                            },
+                        )
+                    });
+
+                // handle any remaining bits (after the intial 64)
+                let remainder_bits = bit_chunks.remainder_bits();
+                group_indices_remainder
+                    .iter()
+                    .zip(data_remainder.iter())
+                    .enumerate()
+                    .for_each(|(i, (&group_index, &new_value))| {
+                        let is_valid = remainder_bits & (1 << i) != 0;
+                        value_fn(group_index, new_value);
+                        if !is_valid {
+                            // input null means this group is now null
+                            null_inputs.set_bit(group_index, false);
+                        }
+                    });
+            }
+            // no filter, no nulls
+            (_, None) => {
+                // if we have previously seen nulls, ensure the null
+                // buffer is big enough (start everything at valid)
+                if self.null_inputs.is_some() {
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+                }
+                let iter = group_indices.iter().zip(data.iter());
+                for (&group_index, &new_value) in iter {
+                    value_fn(group_index, new_value)
+                }
+            }
+            // no nulls, but a filter
+            (None, Some(filter)) => {
+                assert_eq!(filter.len(), group_indices.len());
+
+                // default seen to false (we fill it in as we go)
+                let seen_values =
+                    initialize_builder(&mut self.seen_values, total_num_groups, false);
+                // The performance with a filter could be improved by
+                // iterating over the filter in chunks, rather than a single
+                // iterator. TODO file a ticket
+                let iter = group_indices.iter().zip(data.iter());
+                let iter = iter.zip(filter.iter());
+                for ((&group_index, &new_value), filter_value) in iter {
+                    if let Some(true) = filter_value {
+                        value_fn(group_index, new_value);
+                        // remember we have seen a value for this index
+                        seen_values.set_bit(group_index, true);
+                    }
+                }
+            }
+            // both null values and filters
+            (
+                Some(_value_nulls /* nulls obtained via values.iters() */),
+                Some(filter),
+            ) => {
+                let null_inputs =
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+                let seen_values =
+                    initialize_builder(&mut self.seen_values, total_num_groups, false);
+
+                assert_eq!(filter.len(), values.len());
+                assert_eq!(filter.len(), group_indices.len());
+                // The performance with a filter could be improved by
+                // iterating over the filter in chunks, rather than using
+                // iterators. TODO file a ticket
+                filter
+                    .iter()
+                    .zip(group_indices.iter())
+                    .zip(values.iter())
+                    .for_each(|((filter_value, group_index), new_value)| {
+                        if let Some(true) = filter_value {
+                            if let Some(new_value) = new_value {
+                                value_fn(*group_index, new_value)
+                            } else {
+                                // input null means this group is now null
+                                null_inputs.set_bit(*group_index, false);
+                            }
+                            // remember we have seen a value for this index
+                            seen_values.set_bit(*group_index, true);
+                        }
+                    })
+            }
+        }
+    }
+
+    /// Creates the final NullBuffer representing which group_indices have
+    /// null values (if they saw a null input, or because they never saw any values)
+    ///
+    /// resets the internal state to empty
+    ///
+    /// nulls (validity) set false for any group that saw a null
+    /// seen_values (validtity) set true for any group that saw a value
+    pub fn build(&mut self) -> Option<NullBuffer> {
+        let nulls = self
+            .null_inputs
+            .as_mut()
+            .map(|null_inputs| NullBuffer::new(null_inputs.finish()))
+            .and_then(|nulls| {
+                if nulls.null_count() > 0 {
+                    Some(nulls)
+                } else {
+                    None
+                }
+            });
+
+        // if we had filters, some groups may never have seen a group
+        // so they are only non-null if we have seen values
+        let seen_values = self
+            .seen_values
+            .as_mut()
+            .map(|seen_values| NullBuffer::new(seen_values.finish()));
+
+        match (nulls, seen_values) {
+            (None, None) => None,
+            (Some(nulls), None) => Some(nulls),
+            (None, Some(seen_values)) => Some(seen_values),
+            (Some(seen_values), Some(nulls)) => {
+                NullBuffer::union(Some(&seen_values), Some(&nulls))
+            }
+        }
+    }
+}
 
 /// This function is used to update the accumulator state per row,
 /// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
@@ -61,10 +326,6 @@ use arrow_array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
 /// value_fn(0, 300)
 /// ```
 ///
-/// I couldn't find any way to combine this with
-/// accumulate_all_nullable without having to pass in a is_null on
-/// every row.
-///
 pub fn accumulate_all<T, F>(
     group_indices: &[usize],
     values: &PrimitiveArray<T>,
@@ -104,6 +365,10 @@ pub fn accumulate_all<T, F>(
     }
 }
 
+/// This function is called to update the accumulator state per row
+/// when the value is not needed (e.g. COUNT)
+///
+/// `F`: Invoked like `value_fn(group_index).
 pub fn accumulate_indices<F>(
     group_indices: &[usize],
     opt_filter: Option<&BooleanArray>,
@@ -278,6 +543,27 @@ pub fn accumulate_indices_nullable<F>(
     }
 }
 
+/// Enures that `builder` contains a `BooleanBufferBuilder with at
+/// least `total_num_groups`.
+///
+/// All new entries are initialized to `default_value`
+fn initialize_builder(
+    builder: &mut Option<BooleanBufferBuilder>,
+    total_num_groups: usize,
+    default_value: bool,
+) -> &mut BooleanBufferBuilder {
+    if builder.is_none() {
+        *builder = Some(BooleanBufferBuilder::new(total_num_groups));
+    }
+    let builder = builder.as_mut().unwrap();
+
+    if builder.len() < total_num_groups {
+        let new_groups = total_num_groups - builder.len();
+        builder.append_n(new_groups, default_value);
+    }
+    builder
+}
+
 #[cfg(test)]
 mod test {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 512e7c65cbf0..9535d7d49c29 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -20,7 +20,6 @@
 pub(crate) mod accumulate;
 mod adapter;
 
-pub(crate) use accumulate::{accumulate_all, accumulate_all_nullable};
 pub use adapter::GroupsAccumulatorAdapter;
 
 use arrow_array::{ArrayRef, BooleanArray};
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 4f12e01061dc..690415c5e27e 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -35,7 +35,6 @@ use arrow::{
 use arrow_array::cast::AsArray;
 use arrow_array::types::{Decimal128Type, Int32Type, Int64Type, UInt32Type, UInt64Type};
 use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 use log::debug;
@@ -49,7 +48,7 @@ use arrow::array::Array;
 use arrow::array::Decimal128Array;
 use datafusion_row::accessor::RowAccessor;
 
-use super::groups_accumulator::accumulate::{accumulate_all, accumulate_all_nullable};
+use super::groups_accumulator::accumulate::NullState;
 use super::utils::adjust_output_array;
 
 /// SUM aggregate expression
@@ -487,8 +486,8 @@ where
     /// Sums per group, stored as the native type
     sums: Vec<T::Native>,
 
-    /// If we have seen a null input value for this group_index
-    null_inputs: BooleanBufferBuilder,
+    /// Track nulls in the input / filters
+    null_state: NullState,
 }
 
 impl<T> SumGroupsAccumulator<T>
@@ -505,68 +504,7 @@ where
             return_data_type: sum_data_type.clone(),
             sum_data_type: sum_data_type.clone(),
             sums: vec![],
-            null_inputs: BooleanBufferBuilder::new(0),
-        }
-    }
-
-    /// Adds the values in `values` to self.sums
-    fn update_sums(
-        &mut self,
-        group_indices: &[usize],
-        values: &PrimitiveArray<T>,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        if self.null_inputs.len() < total_num_groups {
-            let new_groups = total_num_groups - self.null_inputs.len();
-            // All groups start as valid (and are set to null if we
-            // see a null in the input)
-            self.null_inputs.append_n(new_groups, true);
-        }
-        self.sums
-            .resize_with(total_num_groups, || T::default_value());
-
-        if values.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, new_value| {
-                    // note since add_wrapping doesn't error, we
-                    // simply add values in null sum slots rather than
-                    // checking if they are null first. The theory is
-                    // this is faster
-                    let sum = &mut self.sums[group_index];
-                    *sum = sum.add_wrapping(new_value);
-                },
-            )
-        } else {
-            accumulate_all_nullable(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index, new_value, is_valid| {
-                    if is_valid {
-                        let sum = &mut self.sums[group_index];
-                        *sum = sum.add_wrapping(new_value);
-                    } else {
-                        // input null means this group is now null
-                        self.null_inputs.set_bit(group_index, false);
-                    }
-                },
-            )
-        }
-    }
-
-    /// Returns a NullBuffer representing which group_indices have
-    /// null values (if they saw a null input)
-    /// Resets `self.null_inputs`;
-    fn build_nulls(&mut self) -> Option<NullBuffer> {
-        let nulls = NullBuffer::new(self.null_inputs.finish());
-        if nulls.null_count() > 0 {
-            Some(nulls)
-        } else {
-            None
+            null_state: NullState::new(),
         }
     }
 }
@@ -585,7 +523,21 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        self.update_sums(group_indices, values, opt_filter, total_num_groups);
+        // update sums
+        self.sums
+            .resize_with(total_num_groups, || T::default_value());
+
+        // NullState dispatches / handles tracking nulls and groups that saw no values
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let sum = &mut self.sums[group_index];
+                *sum = sum.add_wrapping(new_value);
+            },
+        );
 
         Ok(())
     }
@@ -600,14 +552,28 @@ where
         assert_eq!(values.len(), 2, "two arguments to merge_batch");
         // first batch is partial sums
         let partial_sums: &PrimitiveArray<T> = values.get(0).unwrap().as_primitive::<T>();
-        self.update_sums(group_indices, partial_sums, opt_filter, total_num_groups);
+
+        // Sum partial sums
+        self.sums
+            .resize_with(total_num_groups, || T::default_value());
+
+        self.null_state.accumulate(
+            group_indices,
+            partial_sums,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let sum = &mut self.sums[group_index];
+                *sum = sum.add_wrapping(new_value);
+            },
+        );
 
         Ok(())
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let sums = std::mem::take(&mut self.sums);
-        let nulls = self.build_nulls();
+        let nulls = self.null_state.build();
 
         let sums = PrimitiveArray::<T>::new(sums.into(), nulls); // no copy
         let sums = adjust_output_array(&self.return_data_type, Arc::new(sums))?;
@@ -617,10 +583,9 @@ where
 
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
-        let nulls = self.build_nulls();
+        let nulls = self.null_state.build();
 
         let sums = std::mem::take(&mut self.sums);
-
         let sums = Arc::new(PrimitiveArray::<T>::new(sums.into(), nulls.clone())); // zero copy
 
         let sums = adjust_output_array(&self.sum_data_type, sums)?;

From 87b54c9aab097d8662d88d521eb991b4251c0cfb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 10:25:57 +0200
Subject: [PATCH 42/89] Add float support for sum

---
 datafusion/physical-expr/src/aggregate/sum.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 690415c5e27e..7b093d17d15f 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -33,7 +33,10 @@ use arrow::{
     datatypes::Field,
 };
 use arrow_array::cast::AsArray;
-use arrow_array::types::{Decimal128Type, Int32Type, Int64Type, UInt32Type, UInt64Type};
+use arrow_array::types::{
+    Decimal128Type, Float32Type, Float64Type, Int32Type, Int64Type, UInt32Type,
+    UInt64Type,
+};
 use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
@@ -171,6 +174,14 @@ impl AggregateExpr for Sum {
                 &self.data_type,
                 &self.data_type,
             ))),
+            DataType::Float32 => Ok(Box::new(SumGroupsAccumulator::<Float32Type>::new(
+                &self.data_type,
+                &self.data_type,
+            ))),
+            DataType::Float64 => Ok(Box::new(SumGroupsAccumulator::<Float64Type>::new(
+                &self.data_type,
+                &self.data_type,
+            ))),
             DataType::Decimal128(_target_precision, _target_scale) => {
                 Ok(Box::new(SumGroupsAccumulator::<Decimal128Type>::new(
                     &self.data_type,

From 917c050cf711f4147717a1061b4a83d3fe0865d6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 5 Jul 2023 07:49:35 -0400
Subject: [PATCH 43/89] Simplify count aggregate, clean up aggregates cleanup,
 fuzz almost passes

---
 .../physical-expr/src/aggregate/count.rs      | 116 +--
 .../groups_accumulator/accumulate.rs          | 808 ++++++++----------
 2 files changed, 420 insertions(+), 504 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index cc96dc90c359..5d5428a07780 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -40,10 +40,7 @@ use datafusion_row::accessor::RowAccessor;
 
 use crate::expressions::format_state_name;
 
-use super::groups_accumulator::accumulate::{
-    accumulate_all, accumulate_all_nullable, accumulate_indices,
-    accumulate_indices_nullable,
-};
+use super::groups_accumulator::accumulate::accumulate_indices;
 
 /// COUNT aggregate expression
 /// Returns the amount of non-null values of the given expression.
@@ -87,8 +84,9 @@ impl Count {
 /// An accumulator to compute the average of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
 ///
-/// F: Function that calcuates the average value from a sum of
-/// T::Native and a total count
+/// Unlike most other accumulators, COUNT never produces NULLs. If no
+/// non-null values are seen in any group the output is 0. Thus, this
+/// accumulator has no additional null or seen filter tracking.
 #[derive(Debug)]
 struct CountGroupsAccumulator {
     /// Count per group (use u64 to make Int64Array)
@@ -99,65 +97,6 @@ impl CountGroupsAccumulator {
     pub fn new() -> Self {
         Self { counts: vec![] }
     }
-
-    /// Adds one to each group's counter
-    fn increment_counts(
-        &mut self,
-        group_indices: &[usize],
-        values: &dyn Array,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        self.counts.resize(total_num_groups, 0);
-
-        if values.null_count() == 0 {
-            accumulate_indices(group_indices, opt_filter, |group_index| {
-                self.counts[group_index] += 1;
-            })
-        } else {
-            accumulate_indices_nullable(
-                group_indices,
-                values,
-                opt_filter,
-                |group_index| {
-                    self.counts[group_index] += 1;
-                },
-            )
-        }
-    }
-
-    /// Adds the counts with the partial counts
-    fn update_counts_with_partial_counts(
-        &mut self,
-        group_indices: &[usize],
-        partial_counts: &Int64Array,
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) {
-        self.counts.resize(total_num_groups, 0);
-
-        if partial_counts.null_count() == 0 {
-            accumulate_all(
-                group_indices,
-                partial_counts,
-                opt_filter,
-                |group_index, partial_count| {
-                    self.counts[group_index] += partial_count;
-                },
-            )
-        } else {
-            accumulate_all_nullable(
-                group_indices,
-                partial_counts,
-                opt_filter,
-                |group_index, partial_count, is_valid| {
-                    if is_valid {
-                        self.counts[group_index] += partial_count;
-                    }
-                },
-            )
-        }
-    }
 }
 
 impl GroupsAccumulator for CountGroupsAccumulator {
@@ -171,7 +110,17 @@ impl GroupsAccumulator for CountGroupsAccumulator {
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap();
 
-        self.increment_counts(group_indices, values, opt_filter, total_num_groups);
+        // Add one to each group's counter for each non null, non
+        // filtered value
+        self.counts.resize(total_num_groups, 0);
+        accumulate_indices(
+            group_indices,
+            values.nulls(), // ignore values
+            opt_filter,
+            |group_index| {
+                self.counts[group_index] += 1;
+            },
+        );
 
         Ok(())
     }
@@ -186,12 +135,29 @@ impl GroupsAccumulator for CountGroupsAccumulator {
         assert_eq!(values.len(), 1, "one argument to merge_batch");
         // first batch is counts, second is partial sums
         let partial_counts = values.get(0).unwrap().as_primitive::<Int64Type>();
-        self.update_counts_with_partial_counts(
-            group_indices,
-            partial_counts,
-            opt_filter,
-            total_num_groups,
-        );
+
+        // intermediate counts are always created as non null
+        assert_eq!(partial_counts.null_count(), 0);
+        let partial_counts = partial_counts.values();
+
+        // Adds the counts with the partial counts
+        self.counts.resize(total_num_groups, 0);
+        match opt_filter {
+            Some(filter) => filter
+                .iter()
+                .zip(group_indices.iter())
+                .zip(partial_counts.iter())
+                .for_each(|((filter_value, &group_index), partial_count)| {
+                    if let Some(true) = filter_value {
+                        self.counts[group_index] += partial_count;
+                    }
+                }),
+            None => group_indices.iter().zip(partial_counts.iter()).for_each(
+                |(&group_index, partial_count)| {
+                    self.counts[group_index] += partial_count;
+                },
+            ),
+        }
 
         Ok(())
     }
@@ -199,7 +165,9 @@ impl GroupsAccumulator for CountGroupsAccumulator {
     fn evaluate(&mut self) -> Result<ArrayRef> {
         let counts = std::mem::take(&mut self.counts);
 
-        let array = PrimitiveArray::<Int64Type>::new(counts.into(), None);
+        // Count is always non null (null inputs just don't contribute to the overall values)
+        let nulls = None;
+        let array = PrimitiveArray::<Int64Type>::new(counts.into(), nulls);
 
         Ok(Arc::new(array))
     }
@@ -207,7 +175,7 @@ impl GroupsAccumulator for CountGroupsAccumulator {
     // return arrays for sums and counts
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
         let counts = std::mem::take(&mut self.counts);
-        let counts: PrimitiveArray<Int64Type> = Int64Array::from(counts); // zero copy
+        let counts: PrimitiveArray<Int64Type> = Int64Array::from(counts); // zero copy, no nulls
         Ok(vec![Arc::new(counts) as ArrayRef])
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index aca61eb780fb..8cd53da933e5 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -48,11 +48,24 @@ use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 ///
 #[derive(Debug)]
 pub struct NullState {
-    /// If we have seen a null input value for `group_index`
+    /// Tracks validity (if we we have seen a null input value for
+    /// `group_index`)
+    ///
+    /// If null_inputs[i] is true, it means we haven't seen any null values for
+    /// that group (including not having seen any)
+    ///
+    /// If null_inputs[i] is false, it means we saw at least one null value for
+    /// that group
     null_inputs: Option<BooleanBufferBuilder>,
 
     /// If there has been a filter value, has it seen any non-filtered
     /// input values for `group_index`?
+    ///
+    /// If seen_values[i] is true, it means we have seen at least one
+    /// non null value for this group
+    ///
+    /// If seen_values[i] is false, it means we have not seen any
+    /// values that pass the filter yet for the group
     seen_values: Option<BooleanBufferBuilder>,
 }
 
@@ -154,8 +167,9 @@ impl NullState {
                             |(&group_index, &new_value)| {
                                 // valid bit was set, real vale
                                 let is_valid = (mask & index_mask) != 0;
-                                value_fn(group_index, new_value);
-                                if !is_valid {
+                                if is_valid {
+                                    value_fn(group_index, new_value);
+                                } else {
                                     // input null means this group is now null
                                     null_inputs.set_bit(group_index, false);
                                 }
@@ -172,8 +186,9 @@ impl NullState {
                     .enumerate()
                     .for_each(|(i, (&group_index, &new_value))| {
                         let is_valid = remainder_bits & (1 << i) != 0;
-                        value_fn(group_index, new_value);
-                        if !is_valid {
+                        if is_valid {
+                            value_fn(group_index, new_value);
+                        } else {
                             // input null means this group is now null
                             null_inputs.set_bit(group_index, false);
                         }
@@ -284,262 +299,91 @@ impl NullState {
     }
 }
 
-/// This function is used to update the accumulator state per row,
-/// for a `PrimitiveArray<T>` with no nulls. It is the inner loop for
-/// many GroupsAccumulators and thus performance critical.
-///
-/// # Arguments:
-///
-/// * `values`: the input arguments to the accumulator
-/// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-/// * `opt_filter`: if present, invoke value_fn if opt_filter[i] is true
-/// * `value_fn`: function invoked for each (group_index, value) pair.
-///
-/// `F`: Invoked for each input row like `value_fn(group_index, value)
-///
-/// # Example
-///
-/// ```text
-///  ┌─────────┐   ┌─────────┐   ┌ ─ ─ ─ ─ ┐
-///  │ ┌─────┐ │   │ ┌─────┐ │     ┌─────┐
-///  │ │  2  │ │   │ │ 200 │ │   │ │  t  │ │
-///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-///  │ │  2  │ │   │ │ 100 │ │   │ │  f  │ │
-///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-///  │ │  0  │ │   │ │ 200 │ │   │ │  t  │ │
-///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-///  │ │  1  │ │   │ │ 200 │ │   │ │NULL │ │
-///  │ ├─────┤ │   │ ├─────┤ │     ├─────┤
-///  │ │  0  │ │   │ │ 300 │ │   │ │  t  │ │
-///  │ └─────┘ │   │ └─────┘ │     └─────┘
-///  └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘
-///
-/// group_indices   values        opt_filter
-/// ```
-///
-/// In the example above, `value_fn` is invoked for each (group_index,
-/// value) pair where `opt_filter[i]` is true
-///
-/// ```text
-/// value_fn(2, 200)
-/// value_fn(0, 200)
-/// value_fn(0, 300)
-/// ```
-///
-pub fn accumulate_all<T, F>(
-    group_indices: &[usize],
-    values: &PrimitiveArray<T>,
-    opt_filter: Option<&BooleanArray>,
-    mut value_fn: F,
-) where
-    T: ArrowNumericType + Send,
-    F: FnMut(usize, T::Native) + Send,
-{
-    // Given performance is critical, assert if the wrong flavor is called
-    assert_eq!(
-        values.null_count(), 0,
-        "Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
-    );
-
-    let data: &[T::Native] = values.values();
-    assert_eq!(data.len(), group_indices.len());
-
-    let iter = group_indices.iter().zip(data.iter());
-
-    // handle filter values with a specialized loop
-    if let Some(filter) = opt_filter {
-        assert_eq!(filter.len(), group_indices.len());
-        // The performance with a filter could be improved by
-        // iterating over the filter in chunks, rather than a single
-        // iterator. TODO file a ticket
-        let iter = iter.zip(filter.iter());
-        for ((&group_index, &new_value), filter_value) in iter {
-            if let Some(true) = filter_value {
-                value_fn(group_index, new_value)
-            }
-        }
-    } else {
-        for (&group_index, &new_value) in iter {
-            value_fn(group_index, new_value)
-        }
-    }
-}
-
 /// This function is called to update the accumulator state per row
 /// when the value is not needed (e.g. COUNT)
 ///
-/// `F`: Invoked like `value_fn(group_index).
+/// `F`: Invoked like `value_fn(group_index) for all non null values
+/// passing the filter. Note that no tracking is done for null inputs
+/// or which groups have seen any values
 pub fn accumulate_indices<F>(
     group_indices: &[usize],
+    nulls: Option<&NullBuffer>,
     opt_filter: Option<&BooleanArray>,
     mut index_fn: F,
 ) where
     F: FnMut(usize) + Send,
 {
-    let iter = group_indices.iter();
-    // handle filter values with a specialized loop
-    if let Some(filter) = opt_filter {
-        assert_eq!(filter.len(), group_indices.len());
-        // The performance with a filter could be improved by
-        // iterating over the filter in chunks, rather than a single
-        // iterator. TODO file a ticket
-        let iter = iter.zip(filter.iter());
-        for (&group_index, filter_value) in iter {
-            if let Some(true) = filter_value {
+    match (nulls, opt_filter) {
+        (None, None) => {
+            for &group_index in group_indices.iter() {
                 index_fn(group_index)
             }
         }
-    } else {
-        for &group_index in iter {
-            index_fn(group_index)
-        }
-    }
-}
-
-/// This function is called to update the accumulator state per row,
-/// for a `PrimitiveArray<T>` that can have nulls. See
-/// [`accumulate_all`] for more detail and example
-///
-/// `F`: Invoked like `value_fn(group_index, value, is_valid).
-///
-/// NOTE the parameter is true when the value is VALID (not when it is
-/// NULL).
-pub fn accumulate_all_nullable<T, F>(
-    group_indices: &[usize],
-    values: &PrimitiveArray<T>,
-    opt_filter: Option<&BooleanArray>,
-    mut value_fn: F,
-) where
-    T: ArrowNumericType + Send,
-    F: FnMut(usize, T::Native, bool) + Send,
-{
-    // Given performance is critical, assert if the wrong flavor is called
-    let valids = values
-        .nulls()
-        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
-
-    if let Some(filter) = opt_filter {
-        assert_eq!(filter.len(), values.len());
-        assert_eq!(filter.len(), group_indices.len());
-        // The performance with a filter could be improved by
-        // iterating over the filter in chunks, rather than using
-        // iterators. TODO file a ticket
-        filter
-            .iter()
-            .zip(group_indices.iter())
-            .zip(values.iter())
-            .for_each(|((filter_value, group_index), new_value)| {
-                // did value[i] pass the filter?
+        (None, Some(filter)) => {
+            assert_eq!(filter.len(), group_indices.len());
+            // The performance with a filter could be improved by
+            // iterating over the filter in chunks, rather than a single
+            // iterator. TODO file a ticket
+            let iter = group_indices.iter().zip(filter.iter());
+            for (&group_index, filter_value) in iter {
                 if let Some(true) = filter_value {
-                    // Is value[i] valid?
-                    match new_value {
-                        Some(new_value) => value_fn(*group_index, new_value, true),
-                        None => value_fn(*group_index, Default::default(), false),
-                    }
+                    index_fn(group_index)
                 }
-            })
-    } else {
-        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
-        // iterate over in chunks of 64 bits for more efficient null checking
-        let data: &[T::Native] = values.values();
-        assert_eq!(data.len(), group_indices.len());
-        let group_indices_chunks = group_indices.chunks_exact(64);
-        let data_chunks = data.chunks_exact(64);
-        let bit_chunks = valids.inner().bit_chunks();
-
-        let group_indices_remainder = group_indices_chunks.remainder();
-        let data_remainder = data_chunks.remainder();
-
-        group_indices_chunks
-            .zip(data_chunks)
-            .zip(bit_chunks.iter())
-            .for_each(|((group_index_chunk, data_chunk), mask)| {
-                // index_mask has value 1 << i in the loop
-                let mut index_mask = 1;
-                group_index_chunk.iter().zip(data_chunk.iter()).for_each(
-                    |(&group_index, &new_value)| {
+            }
+        }
+        (Some(valids), None) => {
+            assert_eq!(valids.len(), group_indices.len());
+            // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
+            // iterate over in chunks of 64 bits for more efficient null checking
+            let group_indices_chunks = group_indices.chunks_exact(64);
+            let bit_chunks = valids.inner().bit_chunks();
+
+            let group_indices_remainder = group_indices_chunks.remainder();
+
+            group_indices_chunks.zip(bit_chunks.iter()).for_each(
+                |(group_index_chunk, mask)| {
+                    // index_mask has value 1 << i in the loop
+                    let mut index_mask = 1;
+                    group_index_chunk.iter().for_each(|&group_index| {
                         // valid bit was set, real vale
                         let is_valid = (mask & index_mask) != 0;
-                        value_fn(group_index, new_value, is_valid);
+                        if is_valid {
+                            index_fn(group_index);
+                        }
                         index_mask <<= 1;
-                    },
-                )
-            });
-
-        // handle any remaining bits (after the intial 64)
-        let remainder_bits = bit_chunks.remainder_bits();
-        group_indices_remainder
-            .iter()
-            .zip(data_remainder.iter())
-            .enumerate()
-            .for_each(|(i, (&group_index, &new_value))| {
-                let is_valid = remainder_bits & (1 << i) != 0;
-                value_fn(group_index, new_value, is_valid)
-            });
-    }
-}
+                    })
+                },
+            );
 
-pub fn accumulate_indices_nullable<F>(
-    group_indices: &[usize],
-    array: &dyn Array,
-    opt_filter: Option<&BooleanArray>,
-    mut index_fn: F,
-) where
-    F: FnMut(usize) + Send,
-{
-    // Given performance is critical, assert if the wrong flavor is called
-    let valids = array
-        .nulls()
-        .expect("Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)");
-
-    if let Some(filter) = opt_filter {
-        assert_eq!(filter.len(), group_indices.len());
-        // The performance with a filter could be improved by
-        // iterating over the filter in chunks, rather than using
-        // iterators. TODO file a ticket
-        filter.iter().zip(group_indices.iter()).for_each(
-            |(filter_value, &group_index)| {
-                // did value[i] pass the filter?
-                if let Some(true) = filter_value {
-                    // Is value[i] valid?
-                    index_fn(group_index)
-                }
-            },
-        )
-    } else {
-        // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
-        // iterate over in chunks of 64 bits for more efficient null checking
-        let group_indices_chunks = group_indices.chunks_exact(64);
-        let bit_chunks = valids.inner().bit_chunks();
-
-        let group_indices_remainder = group_indices_chunks.remainder();
-
-        group_indices_chunks.zip(bit_chunks.iter()).for_each(
-            |(group_index_chunk, mask)| {
-                // index_mask has value 1 << i in the loop
-                let mut index_mask = 1;
-                group_index_chunk.iter().for_each(|&group_index| {
-                    // valid bit was set, real vale
-                    let is_valid = (mask & index_mask) != 0;
+            // handle any remaining bits (after the intial 64)
+            let remainder_bits = bit_chunks.remainder_bits();
+            group_indices_remainder
+                .iter()
+                .enumerate()
+                .for_each(|(i, &group_index)| {
+                    let is_valid = remainder_bits & (1 << i) != 0;
                     if is_valid {
-                        index_fn(group_index);
+                        index_fn(group_index)
+                    }
+                });
+        }
+
+        (Some(valids), Some(filter)) => {
+            assert_eq!(filter.len(), group_indices.len());
+            assert_eq!(valids.len(), group_indices.len());
+            // The performance with a filter could likely be improved by
+            // iterating over the filter in chunks, rather than using
+            // iterators. TODO file a ticket
+            filter
+                .iter()
+                .zip(group_indices.iter())
+                .zip(valids.iter())
+                .for_each(|((filter_value, &group_index), is_valid)| {
+                    if let (Some(true), true) = (filter_value, is_valid) {
+                        index_fn(group_index)
                     }
-                    index_mask <<= 1;
                 })
-            },
-        );
-
-        // handle any remaining bits (after the intial 64)
-        let remainder_bits = bit_chunks.remainder_bits();
-        group_indices_remainder
-            .iter()
-            .enumerate()
-            .for_each(|(i, &group_index)| {
-                let is_valid = remainder_bits & (1 << i) != 0;
-                if is_valid {
-                    index_fn(group_index)
-                }
-            });
+        }
     }
 }
 
@@ -569,106 +413,50 @@ mod test {
     use super::*;
 
     use arrow_array::UInt32Array;
+    use hashbrown::HashSet;
     use rand::{rngs::ThreadRng, Rng};
 
     #[test]
-    fn accumulate_no_filter() {
-        Fixture::new().accumulate_all_test()
-    }
-
-    #[test]
-    fn accumulate_with_filter() {
-        Fixture::new()
-            .with_filter(|group_index, _value, _value_opt| {
-                if group_index < 20 {
+    fn accumulate() {
+        let group_indices = (0..100).collect();
+        let values = (0..100).map(|i| (i + 1) * 10).collect();
+        let values_with_nulls = (0..100)
+            .map(|i| if i % 3 == 0 { None } else { Some((i + 1) * 10) })
+            .collect();
+
+        // default to every fifth value being false, every even
+        // being null
+        let filter: BooleanArray = (0..100)
+            .map(|i| {
+                let is_even = i % 2 == 0;
+                let is_fifth = i % 5 == 0;
+                if is_even {
                     None
-                } else if group_index < 40 {
+                } else if is_fifth {
                     Some(false)
                 } else {
                     Some(true)
                 }
             })
-            .accumulate_all_test();
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "assertion failed: `(left == right)`\n  left: `34`,\n right: `0`: Called accumulate_all with nullable array (call accumulate_all_nullable instead)"
-    )]
-    fn accumulate_with_nullable_panics() {
-        let fixture = Fixture::new();
-        // call with an array that has nulls should panic
-        accumulate_all(
-            &fixture.group_indices,
-            &fixture.values_with_nulls_array(),
-            fixture.opt_filter(),
-            |_, _| {},
-        );
-    }
+            .collect();
 
-    #[test]
-    fn accumulate_nullable_no_filter() {
-        Fixture::new().accumulate_all_nullable_test()
-    }
-
-    #[test]
-    fn accumulate_nullable_with_filter() {
-        Fixture::new()
-            .with_filter(|group_index, _value, _value_opt| {
-                if group_index < 20 {
-                    None
-                } else if group_index < 40 {
-                    Some(false)
-                } else {
-                    Some(true)
-                }
-            })
-            .accumulate_all_nullable_test();
-    }
-
-    #[test]
-    #[should_panic(
-        expected = "Called accumulate_all_nullable with non-nullable array (call accumulate_all instead)"
-    )]
-    fn accumulate_nullable_with_non_nullable_panics() {
-        let fixture = Fixture::new();
-        // call with an array that has nulls should panic
-        accumulate_all_nullable(
-            &fixture.group_indices,
-            &fixture.values_array(),
-            fixture.opt_filter(),
-            |_, _, _| {},
-        );
-    }
-
-    #[test]
-    fn accumulate_fuzz() {
-        let mut rng = rand::thread_rng();
-        for _ in 0..100 {
-            Fixture::new_random(&mut rng).accumulate_all_test();
+        Fixture {
+            group_indices,
+            values,
+            values_with_nulls,
+            filter,
         }
+        .run()
     }
 
     #[test]
-    fn accumulate_nullable_fuzz() {
+    fn accumulate_fuzz() {
         let mut rng = rand::thread_rng();
-        let mut nullable_called = false;
         for _ in 0..100 {
-            let fixture = Fixture::new_random(&mut rng);
-            // sometimes the random generator will create an array
-            // with no nulls so avoid panic'ing in tests
-            if fixture.values_with_nulls.iter().any(|v| v.is_none()) {
-                nullable_called = true;
-                fixture.accumulate_all_nullable_test();
-            } else {
-                fixture.accumulate_all_test();
-            }
-            assert!(nullable_called);
+            Fixture::new_random(&mut rng).run();
         }
     }
 
-    // todo accumulate testing with fuzz
-
     /// Values for testing (there are enough values to exercise the 64 bit chunks
     struct Fixture {
         /// 100..0
@@ -681,74 +469,44 @@ mod test {
         /// None, Some(20), Some(30), None ...
         values_with_nulls: Vec<Option<u32>>,
 
-        /// Optional filter (defaults to None)
-        opt_filter: Option<BooleanArray>,
+        /// filter (defaults to None)
+        filter: BooleanArray,
     }
 
     impl Fixture {
-        fn new() -> Self {
-            Self {
-                group_indices: (0..100).collect(),
-                values: (0..100).map(|i| (i + 1) * 10).collect(),
-                values_with_nulls: (0..100)
-                    .map(|i| if i % 3 == 0 { None } else { Some((i + 1) * 10) })
-                    .collect(),
-                opt_filter: None,
-            }
-        }
-
-        /// Applies `f(group_index, value, value_with_null)` for all
-        /// values in this fixture and set `opt_filter` to the result
-        fn with_filter<F>(mut self, mut f: F) -> Self
-        where
-            F: FnMut(usize, u32, Option<u32>) -> Option<bool>,
-        {
-            let filter: BooleanArray = self
-                .group_indices
-                .iter()
-                .zip(self.values.iter())
-                .zip(self.values_with_nulls.iter())
-                .map(|((&group_index, &value), &value_with_null)| {
-                    f(group_index, value, value_with_null)
-                })
+        fn new_random(rng: &mut ThreadRng) -> Self {
+            // Number of input values in a batch
+            let num_values: usize = rng.gen_range(1..200);
+            // number of distinct groups
+            let num_groups: usize = rng.gen_range(2..1000);
+            let max_group = num_groups - 1;
+
+            let group_indices: Vec<usize> = (0..num_values)
+                .map(|_| rng.gen_range(0..max_group))
                 .collect();
 
-            self.opt_filter = Some(filter);
-            self
-        }
+            let values: Vec<u32> = (0..num_values).map(|_| rng.gen()).collect();
 
-        fn new_random(rng: &mut ThreadRng) -> Self {
-            let num_groups: usize = rng.gen_range(0..1000);
-            let group_indices: Vec<usize> = (0..num_groups).map(|_| rng.gen()).collect();
-
-            let values: Vec<u32> = (0..num_groups).map(|_| rng.gen()).collect();
-
-            // with 30 percent probability, add a filter
-            let opt_filter = if 0.3 < rng.gen_range(0.0..1.0) {
-                // 10% chance of false
-                // 10% change of null
-                // 80% chance of true
-                let filter: BooleanArray = (0..num_groups)
-                    .map(|_| {
-                        let filter_value = rng.gen_range(0.0..1.0);
-                        if filter_value < 0.1 {
-                            Some(false)
-                        } else if filter_value < 0.2 {
-                            None
-                        } else {
-                            Some(true)
-                        }
-                    })
-                    .collect();
-                Some(filter)
-            } else {
-                None
-            };
+            // 10% chance of false
+            // 10% change of null
+            // 80% chance of true
+            let filter: BooleanArray = (0..num_values)
+                .map(|_| {
+                    let filter_value = rng.gen_range(0.0..1.0);
+                    if filter_value < 0.1 {
+                        Some(false)
+                    } else if filter_value < 0.2 {
+                        None
+                    } else {
+                        Some(true)
+                    }
+                })
+                .collect();
 
             // random values with random number and location of nulls
             // random null percentage
             let null_pct: f32 = rng.gen_range(0.0..1.0);
-            let values_with_nulls: Vec<Option<u32>> = (0..num_groups)
+            let values_with_nulls: Vec<Option<u32>> = (0..num_values)
                 .map(|_| {
                     let is_null = null_pct < rng.gen_range(0.0..1.0);
                     if is_null {
@@ -763,7 +521,7 @@ mod test {
                 group_indices,
                 values,
                 values_with_nulls,
-                opt_filter,
+                filter,
             }
         }
 
@@ -777,71 +535,261 @@ mod test {
             UInt32Array::from(self.values_with_nulls.clone())
         }
 
-        fn opt_filter(&self) -> Option<&BooleanArray> {
-            self.opt_filter.as_ref()
-        }
+        /// Calls `NullState::accumulate` and `accumulate_indices`
+        /// with all combinations of nulls and filter values
+        fn run(&self) {
+            let total_num_groups = *self.group_indices.iter().max().unwrap() + 1;
+
+            let group_indices = &self.group_indices;
+            let values_array = self.values_array();
+            let values_with_nulls_array = self.values_with_nulls_array();
+            let filter = &self.filter;
 
-        // Calls `accumulate_all` with group_indices, values, and
-        // opt_filter and ensures it calls the right values
-        fn accumulate_all_test(&self) {
-            let mut accumulated = vec![];
-            accumulate_all(
-                &self.group_indices,
-                &self.values_array(),
-                self.opt_filter(),
-                |group_index, value| accumulated.push((group_index, value)),
+            // no null, no filters
+            Self::accumulate_test(group_indices, &values_array, None, total_num_groups);
+
+            // nulls, no filters
+            Self::accumulate_test(
+                group_indices,
+                &values_with_nulls_array,
+                None,
+                total_num_groups,
             );
 
-            // check_values[i] is true if the value[i] should have been included in the output
-            let check_values = match self.opt_filter.as_ref() {
-                Some(filter) => filter.into_iter().collect::<Vec<_>>(),
-                None => vec![Some(true); self.values.len()],
-            };
+            // no nulls, filters
+            Self::accumulate_test(
+                group_indices,
+                &values_array,
+                Some(filter),
+                total_num_groups,
+            );
 
-            // Should have only checked indexes where the filter was true
-            let mut check_idx = 0;
-            for (i, check_value) in check_values.iter().enumerate() {
-                if let Some(true) = check_value {
-                    let (group_index, value) = &accumulated[check_idx];
-                    check_idx += 1;
-                    assert_eq!(*group_index, self.group_indices[i]);
-                    assert_eq!(*value, self.values[i]);
-                }
-            }
+            // nulls, filters
+            Self::accumulate_test(
+                group_indices,
+                &values_with_nulls_array,
+                Some(filter),
+                total_num_groups,
+            );
         }
 
-        // Calls `accumulate_all_nullable` with group_indices, values,
-        // and opt_filter and ensures it calls the right values
-        fn accumulate_all_nullable_test(&self) {
-            let mut accumulated = vec![];
-
-            accumulate_all_nullable(
-                &self.group_indices,
-                &self.values_with_nulls_array(),
-                self.opt_filter(),
-                |group_index, value, is_valid| {
-                    let value = if is_valid { Some(value) } else { None };
-                    accumulated.push((group_index, value));
+        /// Calls `NullState::accumulate` and `accumulate_indices` to
+        /// ensure it generates the correct values.
+        ///
+        fn accumulate_test(
+            group_indices: &[usize],
+            values: &UInt32Array,
+            opt_filter: Option<&BooleanArray>,
+            total_num_groups: usize,
+        ) {
+            println!("group_indices: {group_indices:?}");
+            println!("values: {values:?}");
+            println!("opt_filter: {opt_filter:?}");
+            println!("total_num_groups: {total_num_groups}");
+            Self::accumulate_values_test(
+                group_indices,
+                values,
+                opt_filter,
+                total_num_groups,
+            );
+            Self::accumulate_indices_test(group_indices, values.nulls(), opt_filter);
+        }
+
+        /// This is effectively a different implementation of
+        /// accumulate that we compare with the above implementation
+        fn accumulate_values_test(
+            group_indices: &[usize],
+            values: &UInt32Array,
+            opt_filter: Option<&BooleanArray>,
+            total_num_groups: usize,
+        ) {
+            let mut accumulated_values = vec![];
+            let mut null_state = NullState::new();
+
+            null_state.accumulate(
+                group_indices,
+                values,
+                opt_filter,
+                total_num_groups,
+                |group_index, value| {
+                    accumulated_values.push((group_index, value));
                 },
             );
 
-            // check_values[i] is true if the value[i] should have been included in the output
-            let check_values = match self.opt_filter.as_ref() {
-                Some(filter) => filter.into_iter().collect::<Vec<_>>(),
-                None => vec![Some(true); self.values.len()],
+            // Figure out the expected values
+            let mut expected_values = vec![];
+            let mut expected_null_input = HashSet::new();
+            let mut expected_seen_values = HashSet::new();
+
+            match opt_filter {
+                None => group_indices.iter().zip(values.iter()).for_each(
+                    |(&group_index, value)| {
+                        expected_seen_values.insert(group_index);
+                        if let Some(value) = value {
+                            expected_values.push((group_index, value));
+                        } else {
+                            expected_null_input.insert(group_index);
+                        }
+                    },
+                ),
+                Some(filter) => {
+                    group_indices
+                        .iter()
+                        .zip(values.iter())
+                        .zip(filter.iter())
+                        .for_each(|((&group_index, value), is_included)| {
+                            // if value passed filter
+                            if let Some(true) = is_included {
+                                expected_seen_values.insert(group_index);
+                                if let Some(value) = value {
+                                    expected_values.push((group_index, value));
+                                } else {
+                                    expected_null_input.insert(group_index);
+                                }
+                            }
+                        });
+                }
+            }
+
+            assert_eq!(accumulated_values, expected_values,
+                       "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}");
+
+            // validate null state
+            if values.null_count() > 0 {
+                let null_inputs =
+                    null_state.null_inputs.as_ref().unwrap().finish_cloned();
+                for (group_index, is_valid) in null_inputs.iter().enumerate() {
+                    let expected_valid = !expected_null_input.contains(&group_index);
+                    assert_eq!(
+                        expected_valid, is_valid,
+                        "mismatch at for group {group_index}"
+                    );
+                }
+            }
+
+            // validate seen_values
+
+            if opt_filter.is_some() {
+                let seen_values =
+                    null_state.seen_values.as_ref().unwrap().finish_cloned();
+                for (group_index, is_seen) in seen_values.iter().enumerate() {
+                    let expected_seen = expected_seen_values.contains(&group_index);
+                    assert_eq!(
+                        expected_seen, is_seen,
+                        "mismatch at for group {group_index}"
+                    );
+                }
+            }
+
+            // Validate the final buffer (one value per group)
+            let expected_null_buffer = match (values.null_count() > 0, opt_filter.is_some()) {
+                (false, false) => None,
+                // only nulls
+                (true, false) => {
+                    let null_buffer: NullBuffer = (0..total_num_groups)
+                        .map(|group_index| {
+                            // there was and no null inputs
+                            !expected_null_input.contains(&group_index)
+                        })
+                        .collect();
+                    Some(null_buffer)
+                },
+                // only filter
+                (false, true) => {
+                    let null_buffer: NullBuffer = (0..total_num_groups)
+                        .map(|group_index| {
+                            // we saw a value
+                            expected_seen_values.contains(&group_index)
+                        })
+                        .collect();
+                    Some(null_buffer)
+                }
+                // nulls and filter
+                (true, true) => {
+                    let null_buffer: NullBuffer = (0..total_num_groups)
+                        .map(|group_index| {
+                            // output is valid if there was at least one
+                            // input value and no null inputs
+                            expected_seen_values.contains(&group_index)
+                                && !expected_null_input.contains(&group_index)
+                        })
+                        .collect();
+                    Some(null_buffer)
+                }
             };
 
-            // Should have see all indexes and values in order
-            let mut check_idx = 0;
-            for (i, check_value) in check_values.iter().enumerate() {
-                if let Some(true) = check_value {
-                    let (group_index, value) = &accumulated[check_idx];
-                    check_idx += 1;
+            let null_buffer = null_state.build();
+
+            if null_buffer != expected_null_buffer {
+                if let (Some(null_buffer), Some(expected_null_buffer)) = (null_buffer.as_ref(), expected_null_buffer.as_ref()) {
+                    null_buffer.iter()
+                        .zip(expected_null_buffer.iter())
+                        .enumerate()
+                        .for_each(|(i, (valid, expected_valid))| {
+                            println!("nulls[{i}]: valid: {valid}, expected: {expected_valid}");
+                            println!("  expected_seen_values: {} expected_null_input: {}",
+                                     expected_seen_values.contains(&i),
+                                     expected_null_input.contains(&i)
+                            );
+
+                            assert_eq!(valid, expected_valid, "Index {i}");
+                        })
+                };
+            }
 
-                    assert_eq!(*group_index, self.group_indices[i]);
-                    assert_eq!(*value, self.values_with_nulls[i]);
+            assert_eq!(null_buffer, expected_null_buffer);
+        }
+
+        // Calls `accumulate_indices`
+        // and opt_filter and ensures it calls the right values
+        fn accumulate_indices_test(
+            group_indices: &[usize],
+            nulls: Option<&NullBuffer>,
+            opt_filter: Option<&BooleanArray>,
+        ) {
+            let mut accumulated_values = vec![];
+
+            accumulate_indices(group_indices, nulls, opt_filter, |group_index| {
+                accumulated_values.push(group_index);
+            });
+
+            // Figure out the expected values
+            let mut expected_values = vec![];
+
+            match (nulls, opt_filter) {
+                (None, None) => group_indices.iter().for_each(|&group_index| {
+                    expected_values.push(group_index);
+                }),
+                (Some(nulls), None) => group_indices.iter().zip(nulls.iter()).for_each(
+                    |(&group_index, is_valid)| {
+                        if is_valid {
+                            expected_values.push(group_index);
+                        }
+                    },
+                ),
+                (None, Some(filter)) => group_indices.iter().zip(filter.iter()).for_each(
+                    |(&group_index, is_included)| {
+                        if let Some(true) = is_included {
+                            expected_values.push(group_index);
+                        }
+                    },
+                ),
+                (Some(nulls), Some(filter)) => {
+                    group_indices
+                        .iter()
+                        .zip(nulls.iter())
+                        .zip(filter.iter())
+                        .for_each(|((&group_index, is_valid), is_included)| {
+                            // if value passed filter
+                            if let (true, Some(true)) = (is_valid, is_included) {
+                                expected_values.push(group_index);
+                            }
+                        });
                 }
             }
+
+            assert_eq!(accumulated_values, expected_values,
+                       "\n\naccumulated_values:{accumulated_values:#?}\n\nexpected_values:{expected_values:#?}");
         }
     }
 }

From c041eccddb93df6fb5ecb0ee6ba160be6617c441 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 5 Jul 2023 12:07:55 -0400
Subject: [PATCH 44/89] fix fmt

---
 .../groups_accumulator/accumulate.rs          | 92 ++++++++-----------
 1 file changed, 36 insertions(+), 56 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 8cd53da933e5..d04cda7c8a66 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -582,10 +582,6 @@ mod test {
             opt_filter: Option<&BooleanArray>,
             total_num_groups: usize,
         ) {
-            println!("group_indices: {group_indices:?}");
-            println!("values: {values:?}");
-            println!("opt_filter: {opt_filter:?}");
-            println!("total_num_groups: {total_num_groups}");
             Self::accumulate_values_test(
                 group_indices,
                 values,
@@ -682,61 +678,45 @@ mod test {
             }
 
             // Validate the final buffer (one value per group)
-            let expected_null_buffer = match (values.null_count() > 0, opt_filter.is_some()) {
-                (false, false) => None,
-                // only nulls
-                (true, false) => {
-                    let null_buffer: NullBuffer = (0..total_num_groups)
-                        .map(|group_index| {
-                            // there was and no null inputs
-                            !expected_null_input.contains(&group_index)
-                        })
-                        .collect();
-                    Some(null_buffer)
-                },
-                // only filter
-                (false, true) => {
-                    let null_buffer: NullBuffer = (0..total_num_groups)
-                        .map(|group_index| {
-                            // we saw a value
-                            expected_seen_values.contains(&group_index)
-                        })
-                        .collect();
-                    Some(null_buffer)
-                }
-                // nulls and filter
-                (true, true) => {
-                    let null_buffer: NullBuffer = (0..total_num_groups)
-                        .map(|group_index| {
-                            // output is valid if there was at least one
-                            // input value and no null inputs
-                            expected_seen_values.contains(&group_index)
-                                && !expected_null_input.contains(&group_index)
-                        })
-                        .collect();
-                    Some(null_buffer)
-                }
-            };
+            let expected_null_buffer =
+                match (values.null_count() > 0, opt_filter.is_some()) {
+                    (false, false) => None,
+                    // only nulls
+                    (true, false) => {
+                        let null_buffer: NullBuffer = (0..total_num_groups)
+                            .map(|group_index| {
+                                // there was and no null inputs
+                                !expected_null_input.contains(&group_index)
+                            })
+                            .collect();
+                        Some(null_buffer)
+                    }
+                    // only filter
+                    (false, true) => {
+                        let null_buffer: NullBuffer = (0..total_num_groups)
+                            .map(|group_index| {
+                                // we saw a value
+                                expected_seen_values.contains(&group_index)
+                            })
+                            .collect();
+                        Some(null_buffer)
+                    }
+                    // nulls and filter
+                    (true, true) => {
+                        let null_buffer: NullBuffer = (0..total_num_groups)
+                            .map(|group_index| {
+                                // output is valid if there was at least one
+                                // input value and no null inputs
+                                expected_seen_values.contains(&group_index)
+                                    && !expected_null_input.contains(&group_index)
+                            })
+                            .collect();
+                        Some(null_buffer)
+                    }
+                };
 
             let null_buffer = null_state.build();
 
-            if null_buffer != expected_null_buffer {
-                if let (Some(null_buffer), Some(expected_null_buffer)) = (null_buffer.as_ref(), expected_null_buffer.as_ref()) {
-                    null_buffer.iter()
-                        .zip(expected_null_buffer.iter())
-                        .enumerate()
-                        .for_each(|(i, (valid, expected_valid))| {
-                            println!("nulls[{i}]: valid: {valid}, expected: {expected_valid}");
-                            println!("  expected_seen_values: {} expected_null_input: {}",
-                                     expected_seen_values.contains(&i),
-                                     expected_null_input.contains(&i)
-                            );
-
-                            assert_eq!(valid, expected_valid, "Index {i}");
-                        })
-                };
-            }
-
             assert_eq!(null_buffer, expected_null_buffer);
         }
 

From f973a6535a067e4a852784e4ec7dfd605bfd5055 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 5 Jul 2023 12:35:01 -0400
Subject: [PATCH 45/89] Fix clippy

---
 .../src/physical_plan/aggregates/row_hash2.rs |  6 ++---
 .../physical-expr/src/aggregate/average.rs    | 27 +++++++++----------
 .../aggregate/groups_accumulator/adapter.rs   | 12 ++++-----
 datafusion/physical-expr/src/aggregate/sum.rs |  7 ++---
 4 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 0b6862faace7..345d0c2be507 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -463,7 +463,7 @@ impl GroupedHashAggregateStream2 {
                     AggregateMode::Partial | AggregateMode::Single => {
                         acc.update_batch(
                             values,
-                            &group_indices,
+                            group_indices,
                             opt_filter,
                             total_num_groups,
                         )?;
@@ -473,7 +473,7 @@ impl GroupedHashAggregateStream2 {
                         // use merge
                         acc.merge_batch(
                             values,
-                            &group_indices,
+                            group_indices,
                             opt_filter,
                             total_num_groups,
                         )?;
@@ -501,7 +501,7 @@ impl GroupedHashAggregateStream2 {
         }
 
         // First output rows are the groups
-        let groups_rows = self.group_values.iter().map(|owned_row| owned_row);
+        let groups_rows = self.group_values.iter();
 
         let mut output: Vec<ArrayRef> = self.row_converter.convert_rows(groups_rows)?;
 
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index bf553f4c6507..3e335e5c032c 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -182,20 +182,19 @@ impl AggregateExpr for Avg {
     fn groups_accumulator_supported(&self) -> bool {
         use DataType::*;
 
-        match &self.sum_data_type {
-            Int8
-            | Int16
-            | Int32
-            | Int64
-            | UInt8
-            | UInt16
-            | UInt32
-            | UInt64
-            | Float32
-            | Float64
-            | Decimal128(_, _) => true,
-            _ => false,
-        }
+        matches!(
+            &self.sum_data_type,
+            Int8 | Int16
+                | Int32
+                | Int64
+                | UInt8
+                | UInt16
+                | UInt32
+                | UInt64
+                | Float32
+                | Float64
+                | Decimal128(_, _)
+        )
     }
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index 5961256be924..aae2a91a7d04 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -203,12 +203,12 @@ impl GroupsAccumulatorAdapter {
         // RecordBatch(es)
         let iter = groups_with_rows.iter().zip(offsets.windows(2));
 
-        for (group_idx, offsets) in iter {
-            let state = &mut self.states[*group_idx as usize];
+        for (&group_idx, offsets) in iter {
+            let state = &mut self.states[group_idx];
             let size_pre = state.size();
 
             let values_to_accumulate =
-                slice_and_maybe_filter(&values, opt_filter.as_ref(), &offsets)?;
+                slice_and_maybe_filter(&values, opt_filter.as_ref(), offsets)?;
             (f)(state.accumulator.as_mut(), &values_to_accumulate)?;
 
             // clear out the state
@@ -267,7 +267,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
 
         for state in states {
             let accumulator_state = state.accumulator.state()?;
-            results.resize_with(accumulator_state.len(), || vec![]);
+            results.resize_with(accumulator_state.len(), Vec::new);
             for (idx, state_val) in accumulator_state.into_iter().enumerate() {
                 results[idx].push(state_val);
             }
@@ -276,7 +276,7 @@ impl GroupsAccumulator for GroupsAccumulatorAdapter {
         // create an array for each intermediate column
         let arrays = results
             .into_iter()
-            .map(|state| ScalarValue::iter_to_array(state))
+            .map(ScalarValue::iter_to_array)
             .collect::<Result<Vec<_>>>()?;
 
         // double check each array has the same length (aka the
@@ -348,7 +348,7 @@ pub(crate) fn slice_and_maybe_filter(
         sliced_arrays
             .iter()
             .map(|array| {
-                compute::filter(array, &filter_array).map_err(DataFusionError::ArrowError)
+                compute::filter(array, filter_array).map_err(DataFusionError::ArrowError)
             })
             .collect()
     } else {
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 7b093d17d15f..e89eca2e65b8 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -601,11 +601,8 @@ where
 
         let sums = adjust_output_array(&self.sum_data_type, sums)?;
 
-        let counts = vec![0 as u64; sums.len()];
-        let counts = Arc::new(PrimitiveArray::<UInt64Type>::new(
-            counts.into(),
-            nulls.clone(),
-        ));
+        let counts = vec![0_u64; sums.len()];
+        let counts = Arc::new(PrimitiveArray::<UInt64Type>::new(counts.into(), nulls));
 
         // TODO: Sum expects sum/count array, but count is not needed
         Ok(vec![sums.clone() as ArrayRef, counts as ArrayRef])

From 24abb1441753d6bc53712628a7d2dd82a428abf7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 5 Jul 2023 12:55:05 -0400
Subject: [PATCH 46/89] Fix docs

---
 datafusion-cli/Cargo.lock                     |  1 +
 .../src/physical_plan/aggregates/row_hash2.rs | 12 +++--
 .../physical-expr/src/aggregate/average.rs    |  2 +-
 .../physical-expr/src/aggregate/count.rs      |  2 +-
 .../groups_accumulator/accumulate.rs          | 16 +++---
 .../aggregate/groups_accumulator/adapter.rs   | 10 ++--
 .../src/aggregate/groups_accumulator/mod.rs   | 54 ++++++++++---------
 datafusion/physical-expr/src/aggregate/sum.rs |  6 +--
 .../physical-expr/src/aggregate/utils.rs      |  5 +-
 9 files changed, 57 insertions(+), 51 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index b04b81cbec6f..45b2b80f7e57 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -1166,6 +1166,7 @@ dependencies = [
  "itertools 0.11.0",
  "lazy_static",
  "libc",
+ "log",
  "md-5",
  "paste",
  "petgraph",
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 345d0c2be507..41e713672fce 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -130,11 +130,11 @@ pub(crate) struct GroupedHashAggregateStream2 {
     /// specialized for that partcular aggregate and its input types
     accumulators: Vec<Box<dyn GroupsAccumulator>>,
 
-    /// Arguments or each accumulator.
+    /// Arguments to pass to  accumulator.
     aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
 
     /// Optional filter expression to evaluate, one for each for
-    /// aggregate. If present, only those rows for which the filter
+    /// accumulator. If present, only those rows for which the filter
     /// evaluate to true should be included in the aggregate results.
     ///
     /// For example, for an aggregate like `SUM(x FILTER x > 100)`,
@@ -161,17 +161,19 @@ pub(crate) struct GroupedHashAggregateStream2 {
     map: RawTable<(u64, usize)>,
 
     /// The actual group by values, stored in arrow [`Row`] format. The
-    /// group_values[i] holds the group value for group_index `i`.
+    /// `group_values[i]` holds the group value for group_index `i`.
     ///
     /// The row format is used to compare group keys quickly. This is
     /// especially important for multi-column group keys.
+    ///
+    /// [`Row`]: arrow::row::Row
     group_values: Rows,
 
-    /// scratch space for the current input Batch being
+    /// scratch space for the current input [`RecordBatch`] being
     /// processed. Reused across batches here to avoid reallocations
     current_group_indices: Vec<usize>,
 
-    /// Tracks if this stream is generating input/output?
+    /// Tracks if this stream is generating input or output
     exec_state: ExecutionState,
 
     /// Execution metrics
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 3e335e5c032c..2957a0136a9e 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -465,7 +465,7 @@ impl RowAccumulator for AvgRowAccumulator {
     }
 }
 
-/// An accumulator to compute the average of PrimitiveArray<T>.
+/// An accumulator to compute the average of `[PrimitiveArray<T>]`.
 /// Stores values as native types, and does overflow checking
 ///
 /// F: Function that calcuates the average value from a sum of
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 5d5428a07780..1b1c12190746 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -81,7 +81,7 @@ impl Count {
     }
 }
 
-/// An accumulator to compute the average of PrimitiveArray<T>.
+/// An accumulator to compute the counts of [`PrimitiveArray<T>`].
 /// Stores values as native types, and does overflow checking
 ///
 /// Unlike most other accumulators, COUNT never produces NULLs. If no
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index d04cda7c8a66..f19576ee67fd 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Vectorized [`accumulate`] and [`accumulate_nullable`] functions.
+//! Vectorized accumulate helpers: [`NullState`] and [`accumulate_indices`]
 //!
 //! These functions are designed to be the performance critical inner
 //! loops of accumlators and thus there are multiple versions, to be
@@ -51,21 +51,21 @@ pub struct NullState {
     /// Tracks validity (if we we have seen a null input value for
     /// `group_index`)
     ///
-    /// If null_inputs[i] is true, it means we haven't seen any null values for
-    /// that group (including not having seen any)
+    /// If `null_inputs[i]` is true, have not seen any null values for
+    /// that group (also true for no values)
     ///
-    /// If null_inputs[i] is false, it means we saw at least one null value for
+    /// If `null_inputs[i]` is false, saw at least one null value for
     /// that group
     null_inputs: Option<BooleanBufferBuilder>,
 
     /// If there has been a filter value, has it seen any non-filtered
     /// input values for `group_index`?
     ///
-    /// If seen_values[i] is true, it means we have seen at least one
-    /// non null value for this group
+    /// If `seen_values[i]` is true, it seen at least one non null
+    /// value for this group
     ///
-    /// If seen_values[i] is false, it means we have not seen any
-    /// values that pass the filter yet for the group
+    /// If `seen_values[i]` is false, have not seen any values that
+    /// pass the filter yet for the group
     seen_values: Option<BooleanBufferBuilder>,
 }
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
index aae2a91a7d04..a403a6d584c0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/adapter.rs
@@ -37,7 +37,7 @@ use datafusion_expr::Accumulator;
 pub struct GroupsAccumulatorAdapter {
     factory: Box<dyn Fn() -> Result<Box<dyn Accumulator>> + Send>,
 
-    /// [`Accumulators`] for each group, stored in group_index order
+    /// state for each group, stored in group_index order
     states: Vec<AccumulatorState>,
 
     /// Current memory usage, in bytes.
@@ -48,12 +48,12 @@ pub struct GroupsAccumulatorAdapter {
 }
 
 struct AccumulatorState {
-    /// [`Accumulators`]
+    /// [`Accumulator`] that stores the per-group state
     accumulator: Box<dyn Accumulator>,
 
-    // scratch space for holding the indexes in the input array that
-    // will be fed to this accumulator. Use u32 to match take kernel
-    // input
+    // scratch space: indexes in the input array that will be fed to
+    // this accumulator. Stores indexes as `u32` to match the arrow
+    // `take` kernel input.
     indices: Vec<u32>,
 }
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 9535d7d49c29..8679b8d7f12e 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -35,12 +35,19 @@ use datafusion_common::Result;
 /// expected that each GroupAccumulator will use something like `Vec<..>`
 /// to store the group states.
 pub trait GroupsAccumulator: Send {
-    /// updates the accumulator's state from a vector of arrays:
+    /// Updates the accumulator's state from its arguments, encoded as
+    /// a vector of arrow [`ArrayRef`]s.
     ///
     /// * `values`: the input arguments to the accumulator
-    /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-    /// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
-    /// * `total_num_groups`: the number of groups (the largest group_index is total_num_groups - 1)
+    ///
+    /// * `group_indices`: To which groups do the rows in `values`
+    /// belong, group id)
+    ///
+    /// * `opt_filter`: if present, only update aggregate state using
+    /// `values[i]` if `opt_filter[i]` is true
+    ///
+    /// * `total_num_groups`: the number of groups (the largest
+    /// group_index is thus `total_num_groups - 1`)
     fn update_batch(
         &mut self,
         values: &[ArrayRef],
@@ -53,25 +60,26 @@ pub trait GroupsAccumulator: Send {
     /// `RecordBatch`.
     ///
     /// The rows returned *must* be in group_index order: The value
-    /// for group_index 0, followed by 1, etc.  Any group_index that
-    /// did not have values, should be null.
+    /// for group_index 0, followed by 1, etc.
     ///
     /// OPEN QUESTION: Should this method take a "batch_size: usize"
-    /// and produce a Vec<RecordBatch> as output to avoid 1) requiring
-    /// one giant intermediate buffer?
+    /// and produce a `Vec<RecordBatch>` as output to avoid requiring
+    /// a contiguous intermediate buffer?
     ///
     /// For example, the `SUM` accumulator maintains a running sum,
     /// and `evaluate` will produce that running sum as its output for
     /// all groups, in group_index order
     ///
-    /// This call should be treated as consuming (takes `self`, but it
-    /// can not be due to keeping it object save) the accumulator is
-    /// free to release / reset it is internal state after this call
-    /// and error on any subsequent call.
+    /// This call should be treated as consuming (takes `self`) as no
+    /// other functions will be called after this. This can not
+    /// actually take `self` otherwise the trait would not be object
+    /// safe). The accumulator is free to release / reset it is
+    /// internal state after this call and error on any subsequent
+    /// call.
     fn evaluate(&mut self) -> Result<ArrayRef>;
 
-    /// Returns any intermediate aggregate state, used for multi-phase
-    /// grouping.
+    /// Returns the intermediate aggregate state for this accumulator,
+    /// used for multi-phase grouping.
     ///
     /// The rows returned *must* be in group_index order: The value
     /// for group_index 0, followed by 1, etc.  Any group_index that
@@ -79,16 +87,15 @@ pub trait GroupsAccumulator: Send {
     ///
     /// For example, AVG returns two arrays:  `SUM` and `COUNT`.
     ///
-    /// This call should be treated as consuming (takes `self`, but it
-    /// can not be due to keeping it object save) the accumulator is
-    /// free to release / reset it is internal state after this call
-    /// and error on any subsequent call.
+    /// Note more sophisticated internal state can be passed as
+    /// single `StructArray` rather than multiple arrays.
     ///
-    /// TODO: consider returning a single Array (which could be a
-    /// StructArray) instead
+    /// This call should be treated as consuming, as described in the
+    /// comments of [`Self::evaluate`].
     fn state(&mut self) -> Result<Vec<ArrayRef>>;
 
-    /// merges intermediate state (from `state()`) into this accumulators values
+    /// Merges intermediate state (from [`Self::state`]) into this
+    /// accumulator's values.
     ///
     /// For some aggregates (such as `SUM`), merge_batch is the same
     /// as `update_batch`, but for some aggregrates (such as `COUNT`)
@@ -96,9 +103,8 @@ pub trait GroupsAccumulator: Send {
     /// state is used and merged.
     ///
     /// * `values`: arrays produced from calling `state` previously to the accumulator
-    /// * `group_indices`:  To which groups do the rows in `values` belong, group id)
-    /// * `opt_filter`: if present, only update aggregate state using values[i] if opt_filter[i] is true
-    /// * `total_num_groups`: the number of groups (the largest group_index is total_num_groups - 1)
+    ///
+    /// Other arguments are the same as for [`Self::update_batch`];
     fn merge_batch(
         &mut self,
         values: &[ArrayRef],
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index e89eca2e65b8..91e4211bbf2f 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -478,11 +478,7 @@ impl RowAccumulator for SumRowAccumulator {
     }
 }
 
-/// An accumulator to compute the average of PrimitiveArray<T>.
-/// Stores values as native types, and does overflow checking
-///
-/// F: Function that calcuates the average value from a sum of
-/// T::Native and a total count
+/// An accumulator to compute the sum of values in [`PrimitiveArray<T>`]
 #[derive(Debug)]
 struct SumGroupsAccumulator<T>
 where
diff --git a/datafusion/physical-expr/src/aggregate/utils.rs b/datafusion/physical-expr/src/aggregate/utils.rs
index 67ddfa247afc..0cd0821e08b0 100644
--- a/datafusion/physical-expr/src/aggregate/utils.rs
+++ b/datafusion/physical-expr/src/aggregate/utils.rs
@@ -149,8 +149,9 @@ pub fn calculate_result_decimal_for_avg(
 
 /// Adjust array type metadata if needed
 ///
-/// Decimal128Arrays are are are created from Vec<NativeType> with default
-/// precision and scale. This function adjusts them down.
+/// Since `Decimal128Arrays` created from `Vec<NativeType>` have
+/// default precision and scale, this function adjusts the output to
+/// match `sum_data_type`.
 pub fn adjust_output_array(
     sum_data_type: &DataType,
     array: ArrayRef,

From 6e740a488dd8668566b2270867ebdc5b12c1ebe5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 20:08:11 +0200
Subject: [PATCH 47/89] Min/Max for primitives

---
 .../groups_accumulator/accumulate.rs          |  23 ++
 .../physical-expr/src/aggregate/min_max.rs    | 252 +++++++++++++++++-
 datafusion/physical-expr/src/aggregate/sum.rs |   3 +-
 3 files changed, 276 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index f19576ee67fd..66531ccc0adf 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -717,6 +717,29 @@ mod test {
 
             let null_buffer = null_state.build();
 
+            if null_buffer != expected_null_buffer {
+                if let (Some(null_buffer), Some(expected_null_buffer)) =
+                    (null_buffer.as_ref(), expected_null_buffer.as_ref())
+                {
+                    null_buffer
+                        .iter()
+                        .zip(expected_null_buffer.iter())
+                        .enumerate()
+                        .for_each(|(i, (valid, expected_valid))| {
+                            println!(
+                                "nulls[{i}]: valid: {valid}, expected: {expected_valid}"
+                            );
+                            println!(
+                                "  expected_seen_values: {} expected_null_input: {}",
+                                expected_seen_values.contains(&i),
+                                expected_null_input.contains(&i)
+                            );
+
+                            assert_eq!(valid, expected_valid, "Index {i}");
+                        })
+                };
+            }
+
             assert_eq!(null_buffer, expected_null_buffer);
         }
 
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index e3c061dc1354..17819d4fa2e3 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, PhysicalExpr};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
 use arrow::datatypes::{DataType, TimeUnit};
 use arrow::{
@@ -35,9 +35,15 @@ use arrow::{
     },
     datatypes::Field,
 };
+use arrow_array::cast::AsArray;
+use arrow_array::types::{
+    ArrowPrimitiveType, Decimal128Type, Float32Type, Float64Type, UInt32Type, UInt64Type,
+};
+use arrow_array::{ArrowNumericType, PrimitiveArray};
 use datafusion_common::ScalarValue;
 use datafusion_common::{downcast_value, DataFusionError, Result};
 use datafusion_expr::Accumulator;
+use log::debug;
 
 use crate::aggregate::row_accumulator::{
     is_row_accumulator_support_dtype, RowAccumulator,
@@ -48,7 +54,9 @@ use arrow::array::Array;
 use arrow::array::Decimal128Array;
 use datafusion_row::accessor::RowAccessor;
 
+use super::groups_accumulator::accumulate::NullState;
 use super::moving_min_max;
+use super::utils::adjust_output_array;
 
 // Min/max aggregation can take Dictionary encode input but always produces unpacked
 // (aka non Dictionary) output. We need to adjust the output data type to reflect this.
@@ -125,6 +133,10 @@ impl AggregateExpr for Max {
         is_row_accumulator_support_dtype(&self.data_type)
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        self.data_type.is_primitive()
+    }
+
     fn create_row_accumulator(
         &self,
         start_index: usize,
@@ -135,6 +147,47 @@ impl AggregateExpr for Max {
         )))
     }
 
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        match self.data_type {
+            DataType::UInt32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                UInt32Type,
+                false,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::UInt64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                UInt64Type,
+                false,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Float32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                Float32Type,
+                false,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Float64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                Float64Type,
+                false,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Decimal128(_, _) => {
+                Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                    Decimal128Type,
+                    false,
+                >::new(
+                    &self.data_type, &self.data_type
+                )))
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "MinMaxGroupsPrimitiveAccumulator not supported for {}",
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -835,6 +888,55 @@ impl AggregateExpr for Min {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        Max::groups_accumulator_supported(&Max::new(
+            self.expr.clone(),
+            self.name.clone(),
+            self.data_type.clone(),
+        ))
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        match self.data_type {
+            DataType::UInt32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                UInt32Type,
+                true,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::UInt64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                UInt64Type,
+                true,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Float32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                Float32Type,
+                true,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Float64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                Float64Type,
+                true,
+            >::new(
+                &self.data_type, &self.data_type
+            ))),
+            DataType::Decimal128(_, _) => {
+                Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+                    Decimal128Type,
+                    true,
+                >::new(
+                    &self.data_type, &self.data_type
+                )))
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "MinMaxGroupsPrimitiveAccumulator not supported for {}",
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -1022,6 +1124,154 @@ impl RowAccumulator for MinRowAccumulator {
     }
 }
 
+/// An accumulator to compute the min or max of PrimitiveArray<T>.
+/// Stores values as native types, and does overflow checking
+#[derive(Debug)]
+struct MinMaxGroupsPrimitiveAccumulator<T, const MIN: bool>
+where
+    T: ArrowNumericType + Send,
+{
+    /// The type of the computed sum
+    min_max_data_type: DataType,
+
+    /// The type of the returned sum
+    return_data_type: DataType,
+
+    /// Min/max per group, stored as the native type
+    min_max: Vec<T::Native>,
+
+    /// Track nulls in the input / filters
+    null_state: NullState,
+}
+
+impl<T, const MIN: bool> MinMaxGroupsPrimitiveAccumulator<T, MIN>
+where
+    T: ArrowNumericType + Send,
+{
+    pub fn new(min_max_data_type: &DataType, return_data_type: &DataType) -> Self {
+        debug!(
+            "MinMaxGroupsPrimitiveAccumulator ({}, sum type: {min_max_data_type:?}) --> {return_data_type:?}",
+            std::any::type_name::<T>()
+        );
+
+        Self {
+            return_data_type: return_data_type.clone(),
+            min_max_data_type: min_max_data_type.clone(),
+            min_max: vec![],
+            null_state: NullState::new(),
+        }
+    }
+}
+
+impl<T, const MIN: bool> GroupsAccumulator for MinMaxGroupsPrimitiveAccumulator<T, MIN>
+where
+    T: ArrowNumericType + Send,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        // update sums
+        self.min_max
+            .resize_with(total_num_groups, || T::default_value());
+
+        // NullState dispatches / handles tracking nulls and groups that saw no values
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let val: &mut <T as ArrowPrimitiveType>::Native =
+                    &mut self.min_max[group_index];
+                if MIN {
+                    if new_value < *val {
+                        *val = new_value;
+                    }
+                } else {
+                    if new_value > *val {
+                        *val = new_value;
+                    }
+                }
+            },
+        );
+
+        Ok(())
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "one argument to merge_batch");
+        // first batch is partial sums
+        let partial_min_max: &PrimitiveArray<T> =
+            values.get(0).unwrap().as_primitive::<T>();
+
+        // Sum partial sums
+        self.min_max
+            .resize_with(total_num_groups, || T::default_value());
+
+        self.null_state.accumulate(
+            group_indices,
+            partial_min_max,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let val = &mut self.min_max[group_index];
+                // TODO: support min and max
+                if MIN {
+                    if new_value < *val {
+                        *val = new_value;
+                    }
+                } else {
+                    if new_value > *val {
+                        *val = new_value;
+                    }
+                }
+            },
+        );
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let min_max = std::mem::take(&mut self.min_max);
+        let nulls = self.null_state.build();
+
+        let min_max = PrimitiveArray::<T>::new(min_max.into(), nulls); // no copy
+        let min_max = adjust_output_array(&self.return_data_type, Arc::new(min_max))?;
+
+        Ok(Arc::new(min_max))
+    }
+
+    // return arrays for sums and counts
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        let nulls = self.null_state.build();
+
+        let min_max = std::mem::take(&mut self.min_max);
+        let min_max = Arc::new(PrimitiveArray::<T>::new(min_max.into(), nulls.clone())); // zero copy
+
+        let sums = adjust_output_array(&self.min_max_data_type, min_max)?;
+
+        // TODO: Sum expects sum/count array, but count is not needed
+        Ok(vec![sums.clone() as ArrayRef])
+    }
+
+    fn size(&self) -> usize {
+        self.min_max.capacity() * std::mem::size_of::<usize>()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 91e4211bbf2f..c72a9005f0f6 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -478,7 +478,8 @@ impl RowAccumulator for SumRowAccumulator {
     }
 }
 
-/// An accumulator to compute the sum of values in [`PrimitiveArray<T>`]
+/// An accumulator to compute the sum of PrimitiveArray<T>.
+/// Stores values as native types, and does overflow checking
 #[derive(Debug)]
 struct SumGroupsAccumulator<T>
 where

From 9d2c7bf37b0569697b6be643abc72ceaadd236d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 20:09:47 +0200
Subject: [PATCH 48/89] Min/Max for primitives

---
 .../groups_accumulator/accumulate.rs          | 23 -------------------
 datafusion/physical-expr/src/aggregate/sum.rs |  3 +--
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 66531ccc0adf..f19576ee67fd 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -717,29 +717,6 @@ mod test {
 
             let null_buffer = null_state.build();
 
-            if null_buffer != expected_null_buffer {
-                if let (Some(null_buffer), Some(expected_null_buffer)) =
-                    (null_buffer.as_ref(), expected_null_buffer.as_ref())
-                {
-                    null_buffer
-                        .iter()
-                        .zip(expected_null_buffer.iter())
-                        .enumerate()
-                        .for_each(|(i, (valid, expected_valid))| {
-                            println!(
-                                "nulls[{i}]: valid: {valid}, expected: {expected_valid}"
-                            );
-                            println!(
-                                "  expected_seen_values: {} expected_null_input: {}",
-                                expected_seen_values.contains(&i),
-                                expected_null_input.contains(&i)
-                            );
-
-                            assert_eq!(valid, expected_valid, "Index {i}");
-                        })
-                };
-            }
-
             assert_eq!(null_buffer, expected_null_buffer);
         }
 
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index c72a9005f0f6..91e4211bbf2f 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -478,8 +478,7 @@ impl RowAccumulator for SumRowAccumulator {
     }
 }
 
-/// An accumulator to compute the sum of PrimitiveArray<T>.
-/// Stores values as native types, and does overflow checking
+/// An accumulator to compute the sum of values in [`PrimitiveArray<T>`]
 #[derive(Debug)]
 struct SumGroupsAccumulator<T>
 where

From ecc980dd9edd718243f922a9d716cc572c977ded Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 21:58:05 +0200
Subject: [PATCH 49/89] Min/Max initialization

---
 .../physical-expr/src/aggregate/min_max.rs    | 75 ++++++++++++++++++-
 1 file changed, 72 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 17819d4fa2e3..4f697457fd91 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1124,12 +1124,75 @@ impl RowAccumulator for MinRowAccumulator {
     }
 }
 
+trait MinMax {
+    fn min() -> Self;
+    fn max() -> Self;
+}
+
+impl MinMax for u32 {
+    fn min() -> Self {
+        u32::MIN
+    }
+    fn max() -> Self {
+        u32::MAX
+    }
+}
+impl MinMax for i32 {
+    fn min() -> Self {
+        i32::MIN
+    }
+    fn max() -> Self {
+        i32::MAX
+    }
+}
+impl MinMax for i64 {
+    fn min() -> Self {
+        i64::MIN
+    }
+    fn max() -> Self {
+        i64::MAX
+    }
+}
+impl MinMax for u64 {
+    fn min() -> Self {
+        u64::MIN
+    }
+    fn max() -> Self {
+        u64::MAX
+    }
+}
+impl MinMax for f32 {
+    fn min() -> Self {
+        f32::MIN
+    }
+    fn max() -> Self {
+        f32::MAX
+    }
+}
+impl MinMax for f64 {
+    fn min() -> Self {
+        f64::MIN
+    }
+    fn max() -> Self {
+        f64::MAX
+    }
+}
+impl MinMax for i128 {
+    fn min() -> Self {
+        i128::MIN
+    }
+    fn max() -> Self {
+        i128::MAX
+    }
+}
+
 /// An accumulator to compute the min or max of PrimitiveArray<T>.
 /// Stores values as native types, and does overflow checking
 #[derive(Debug)]
 struct MinMaxGroupsPrimitiveAccumulator<T, const MIN: bool>
 where
     T: ArrowNumericType + Send,
+    T::Native: MinMax,
 {
     /// The type of the computed sum
     min_max_data_type: DataType,
@@ -1147,6 +1210,7 @@ where
 impl<T, const MIN: bool> MinMaxGroupsPrimitiveAccumulator<T, MIN>
 where
     T: ArrowNumericType + Send,
+    T::Native: MinMax,
 {
     pub fn new(min_max_data_type: &DataType, return_data_type: &DataType) -> Self {
         debug!(
@@ -1166,6 +1230,7 @@ where
 impl<T, const MIN: bool> GroupsAccumulator for MinMaxGroupsPrimitiveAccumulator<T, MIN>
 where
     T: ArrowNumericType + Send,
+    T::Native: MinMax,
 {
     fn update_batch(
         &mut self,
@@ -1218,8 +1283,13 @@ where
             values.get(0).unwrap().as_primitive::<T>();
 
         // Sum partial sums
-        self.min_max
-            .resize_with(total_num_groups, || T::default_value());
+        self.min_max.resize_with(total_num_groups, || {
+            if MIN {
+                T::Native::min()
+            } else {
+                T::Native::max()
+            }
+        });
 
         self.null_state.accumulate(
             group_indices,
@@ -1228,7 +1298,6 @@ where
             total_num_groups,
             |group_index, new_value| {
                 let val = &mut self.min_max[group_index];
-                // TODO: support min and max
                 if MIN {
                     if new_value < *val {
                         *val = new_value;

From fede0323c074fb0b6ad519f4a5e0792373f2a988 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 21:58:53 +0200
Subject: [PATCH 50/89] Min/Max initialization

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 4f697457fd91..929082706ec6 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1285,9 +1285,9 @@ where
         // Sum partial sums
         self.min_max.resize_with(total_num_groups, || {
             if MIN {
-                T::Native::min()
-            } else {
                 T::Native::max()
+            } else {
+                T::Native::min()
             }
         });
 

From 5076245b67e31365af2998e895df2a06ee8d8a17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:24:57 +0200
Subject: [PATCH 51/89] Initial min/max support for primitive

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 929082706ec6..ad7c80df7c39 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1242,9 +1242,13 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        // update sums
-        self.min_max
-            .resize_with(total_num_groups, || T::default_value());
+        self.min_max.resize_with(total_num_groups, || {
+            if MIN {
+                T::Native::max()
+            } else {
+                T::Native::min()
+            }
+        });
 
         // NullState dispatches / handles tracking nulls and groups that saw no values
         self.null_state.accumulate(

From 8de4ada7e97dce0f736004dfc7e8f0b3288f1ab2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:27:54 +0200
Subject: [PATCH 52/89] Refactor

---
 .../physical-expr/src/aggregate/min_max.rs    | 35 +------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index ad7c80df7c39..ccb2f9f386c1 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1281,40 +1281,7 @@ where
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        assert_eq!(values.len(), 1, "one argument to merge_batch");
-        // first batch is partial sums
-        let partial_min_max: &PrimitiveArray<T> =
-            values.get(0).unwrap().as_primitive::<T>();
-
-        // Sum partial sums
-        self.min_max.resize_with(total_num_groups, || {
-            if MIN {
-                T::Native::max()
-            } else {
-                T::Native::min()
-            }
-        });
-
-        self.null_state.accumulate(
-            group_indices,
-            partial_min_max,
-            opt_filter,
-            total_num_groups,
-            |group_index, new_value| {
-                let val = &mut self.min_max[group_index];
-                if MIN {
-                    if new_value < *val {
-                        *val = new_value;
-                    }
-                } else {
-                    if new_value > *val {
-                        *val = new_value;
-                    }
-                }
-            },
-        );
-
-        Ok(())
+        Self::update_batch(self, values, group_indices, opt_filter, total_num_groups)
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {

From 09b93294187e75d84c6e8a31e5d43467378ed164 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:29:36 +0200
Subject: [PATCH 53/89] Clippy

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index ccb2f9f386c1..c4810c1eb846 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1259,13 +1259,16 @@ where
             |group_index, new_value| {
                 let val: &mut <T as ArrowPrimitiveType>::Native =
                     &mut self.min_max[group_index];
-                if MIN {
-                    if new_value < *val {
-                        *val = new_value;
+                match MIN {
+                    true => {
+                        if new_value < *val {
+                            *val = new_value;
+                        }
                     }
-                } else {
-                    if new_value > *val {
-                        *val = new_value;
+                    false => {
+                        if new_value > *val {
+                            *val = new_value;
+                        }
                     }
                 }
             },

From ea0ce25ffb184b86502cc44f7bc3b5c898808692 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:30:13 +0200
Subject: [PATCH 54/89] Clippy

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index c4810c1eb846..6bfda51e9ac8 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1302,7 +1302,7 @@ where
         let nulls = self.null_state.build();
 
         let min_max = std::mem::take(&mut self.min_max);
-        let min_max = Arc::new(PrimitiveArray::<T>::new(min_max.into(), nulls.clone())); // zero copy
+        let min_max = Arc::new(PrimitiveArray::<T>::new(min_max.into(), nulls)); // zero copy
 
         let sums = adjust_output_array(&self.min_max_data_type, min_max)?;
 

From be8a1e2fabbe25772eafd7cefe9419c6f9da6a13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:33:03 +0200
Subject: [PATCH 55/89] Cleanup

---
 .../physical-expr/src/aggregate/min_max.rs      | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index 6bfda51e9ac8..c36652ae8b1c 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1187,17 +1187,17 @@ impl MinMax for i128 {
 }
 
 /// An accumulator to compute the min or max of PrimitiveArray<T>.
-/// Stores values as native types, and does overflow checking
+/// Stores values as native/primitive type
 #[derive(Debug)]
 struct MinMaxGroupsPrimitiveAccumulator<T, const MIN: bool>
 where
     T: ArrowNumericType + Send,
     T::Native: MinMax,
 {
-    /// The type of the computed sum
+    /// The type of the computed min/max
     min_max_data_type: DataType,
 
-    /// The type of the returned sum
+    /// The type of the returned min/max
     return_data_type: DataType,
 
     /// Min/max per group, stored as the native type
@@ -1214,7 +1214,7 @@ where
 {
     pub fn new(min_max_data_type: &DataType, return_data_type: &DataType) -> Self {
         debug!(
-            "MinMaxGroupsPrimitiveAccumulator ({}, sum type: {min_max_data_type:?}) --> {return_data_type:?}",
+            "MinMaxGroupsPrimitiveAccumulator ({}, min/max type: {min_max_data_type:?}) --> {return_data_type:?}",
             std::any::type_name::<T>()
         );
 
@@ -1257,7 +1257,7 @@ where
             opt_filter,
             total_num_groups,
             |group_index, new_value| {
-                let val: &mut <T as ArrowPrimitiveType>::Native =
+                let val =
                     &mut self.min_max[group_index];
                 match MIN {
                     true => {
@@ -1297,17 +1297,16 @@ where
         Ok(Arc::new(min_max))
     }
 
-    // return arrays for sums and counts
+    // return arrays for min/max values
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
         let nulls = self.null_state.build();
 
         let min_max = std::mem::take(&mut self.min_max);
         let min_max = Arc::new(PrimitiveArray::<T>::new(min_max.into(), nulls)); // zero copy
 
-        let sums = adjust_output_array(&self.min_max_data_type, min_max)?;
+        let min_max = adjust_output_array(&self.min_max_data_type, min_max)?;
 
-        // TODO: Sum expects sum/count array, but count is not needed
-        Ok(vec![sums.clone() as ArrayRef])
+        Ok(vec![min_max])
     }
 
     fn size(&self) -> usize {

From 890b51708889beebcfa34fa7136238b9fedc7e19 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Wed, 5 Jul 2023 22:53:34 +0200
Subject: [PATCH 56/89] Fmt

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index c36652ae8b1c..d4af866516e3 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1257,8 +1257,7 @@ where
             opt_filter,
             total_num_groups,
             |group_index, new_value| {
-                let val =
-                    &mut self.min_max[group_index];
+                let val = &mut self.min_max[group_index];
                 match MIN {
                     true => {
                         if new_value < *val {

From 6846970fa052ff653a471532a1a6c9b32579ee55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Thu, 6 Jul 2023 00:13:24 +0200
Subject: [PATCH 57/89] Speed up avg

---
 datafusion/physical-expr/src/aggregate/average.rs | 15 +++------------
 datafusion/physical-expr/src/aggregate/min_max.rs |  2 +-
 2 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 2957a0136a9e..71ac157ee6f1 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -532,19 +532,8 @@ where
         assert_eq!(values.len(), 1, "single argument to update_batch");
         let values = values.get(0).unwrap().as_primitive::<T>();
 
-        // increment counts
+        // increment counts, update sums
         self.counts.resize(total_num_groups, 0);
-        self.null_state.accumulate(
-            group_indices,
-            values,
-            opt_filter,
-            total_num_groups,
-            |group_index, _new_value| {
-                self.counts[group_index] += 1;
-            },
-        );
-
-        // update sums
         self.sums.resize(total_num_groups, T::default_value());
         self.null_state.accumulate(
             group_indices,
@@ -554,6 +543,8 @@ where
             |group_index, new_value| {
                 let sum = &mut self.sums[group_index];
                 *sum = sum.add_wrapping(new_value);
+
+                self.counts[group_index] += 1;
             },
         );
 
diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index d4af866516e3..b37c659c21c7 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -37,7 +37,7 @@ use arrow::{
 };
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    ArrowPrimitiveType, Decimal128Type, Float32Type, Float64Type, UInt32Type, UInt64Type,
+    Decimal128Type, Float32Type, Float64Type, UInt32Type, UInt64Type,
 };
 use arrow_array::{ArrowNumericType, PrimitiveArray};
 use datafusion_common::ScalarValue;

From 2f4907abdab3e7400274456b5cc26a3d1c1dfc3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Thu, 6 Jul 2023 00:20:03 +0200
Subject: [PATCH 58/89] Fmt

---
 datafusion/physical-expr/src/aggregate/average.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 71ac157ee6f1..c4bfbc145101 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -582,7 +582,7 @@ where
             partial_sums,
             opt_filter,
             total_num_groups,
-            |group_index, new_value| {
+            |group_index, new_value: <T as ArrowPrimitiveType>::Native| {
                 let sum = &mut self.sums[group_index];
                 *sum = sum.add_wrapping(new_value);
             },

From 7ecf148b2f319324cdc0b993d917257e54cda317 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 5 Jul 2023 10:55:08 -0400
Subject: [PATCH 59/89] Add clickbench queries to sqllogictest coverage (#6836)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add clickbench queries to sqllogictest coverage

* rowsort

* Update datafusion/core/tests/sqllogictests/test_files/clickbench.slt

Co-authored-by: Daniël Heres <danielheres@gmail.com>

* fix typo -- :facepalm:

* Update queries now that they pass

---------

Co-authored-by: Daniël Heres <danielheres@gmail.com>
---
 .../tests/data/clickbench_hits_10.parquet     | Bin 0 -> 13124 bytes
 .../sqllogictests/test_files/clickbench.slt   | 275 ++++++++++++++++++
 2 files changed, 275 insertions(+)
 create mode 100644 datafusion/core/tests/data/clickbench_hits_10.parquet
 create mode 100644 datafusion/core/tests/sqllogictests/test_files/clickbench.slt

diff --git a/datafusion/core/tests/data/clickbench_hits_10.parquet b/datafusion/core/tests/data/clickbench_hits_10.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c57421d5834b382b6a2b2d393e2b5b39ffbb4852
GIT binary patch
literal 13124
zcmeHO4RBP|6@Hs+lHL4~K;E*^sNpA<AA$VxlQqHolcga^N%)x#MIXD5>?50f@$DuU
zwbWAE@rPj?J4MDS(=rTWtwWJgr4*E5sM;BHQmoZc%cxlVSB5g^AY#wC@4oxq%Wjed
ztK)RK<39GDd%kndJ@?%6?mahl)-Nv?1@Uo*xX&?1EP$WsV!9x#JhLK25QG9pVD|jd
z;wk-`etn_kwU>TTaZ7m0d)w}Kw5~O6-*flZ4sThN|Nhy#n@QWypH6vtYd?FU|8)7K
z9a-wfjYt1H_hg>fc+*EGew@1{Z}OVgOPe>pvGn!ZLBSf)Ytb~%t1A*7atNYRbcxw*
zVu}d5o!RgZfPbRsLH<15GIz1g?<l~&HR82aS<_h`0Jl1^AI5dJZCrIa$Exg8-$(IV
z>)uIoIZrjaUCz}l@Ys;)E_S{@6Mnti=Jq;Qceq_OpVhoWdCxD?F{7%zX0f4mjW{!b
zS^1!FlT*kpfPblw_5@6Op1tj<dBAk=KL5>*yqdqRz5R#yeWb^;0=p<~X#(7mR6zio
zbB5r~tN9CP!0*4{iygWrHd$aRt8_=aQvAR#eG6Xs=USJr=9ULPUOG9rC@ts3??r#~
zn@`h7q~zY%)CC#&u9}}l9~>Hb_uuOcc523`(}?rZU{S{B*yj)&9*47L2H1t)Iq<~~
z71uHf!kYmcFo-nQ9QMn=Y)=!nI4!8-DMm1XqUYxVTPPOm_m!50)UX;<!V#r+o;Fab
z4Md}gR9ZHF!$7#Ath|!+sX;khTh=Ow<uj%AN;D+R>ywnQqoOV##iX#>Q(9SGRk~5_
zlOqoCRyh(Fsx2q|Qjc6&dZVmq>Yx;x+pk1Gys}sj+q~=4NWZ4s>Zr_CB0)LLeSbiS
z$bC|&q{T|himMkbD6XogDz2!kEdJY1hW|MH)bP{8hke412BEvYtgLd;!a0=-s~66?
zp;#N3Mb@>I3*zFNW9onwEp6_VVsekFDMM08MGJ{)wV_!>i=tt$K$9cNkc^(Wl2tV3
zR9<<fEl@3Uv*z(?K`5)G(~48eKk?yhMR|blRro%^zW5tI<45C_58=v?SCGi2S_0X=
zVA>%w;*zQwg+s{j<q1W)E0{Azb$kx6O@!|R_H{ED1WmCxy$${WLSBs%o}WH*xC8%f
zf*+v&Dm~S*G@MhW=Z_SZkm*Pn+dfA=BqqCkSLW6K=DXtxX2|AL_Nw{)Th%n}ZV5u#
zy=U4?0l!Z^e2st|bk-8RpwO-Q3+li7GW-VII;PJ@>--8|=Jf45+Y+<JJBM6+%K;7$
zF1dt!7xmG%ZTA&S{<m!+^`7-5y3)vmw>gBml83`z{Pz07Uf-^dR(*IhzfS<-cPQ_F
z1I}`anPM*68m8r8{LF{Y8EbC{d%wGEd%qezWwpGd@8jOx58J#)4!T{!mpLxylEdyi
zr*r~HZ#)B!*+*gLH?&9R{N0C^o!_Wg_`0|(Jbp?apJOz2&fPv=zTm}m1RuJJ#ut`o
z!@0J}e+)OA)gM&0!-jM6&oWlv_s-PXUhEomeX=#4IDw7d=@fItiEKVIytoI@Uo)f6
zs1uF=1q|UTVsEJIF@xYT*ZF0#kCy%DQRaWB^RnFnE6@7Y<C*J{Lx7F><>smQNNH~^
z8gtsm^!;Uy=Kt)YoxZ<Qo*8q>N8O36zJBwT884su!q|z#l?&J@`Gq?wFAT4ncORUR
z4{e>8slNT`S^3L5bKacx_8RZ%xuHLNnZCMs@W`O!g(LSo7uvo*U#$LBlYtpEfzmXQ
zQh0I0e8HO&90>FV*WDBigy5hkp1@U7lH$!+CB*_Ee^beHK}>;m=2B@;YK}<j!g5Km
zAZ7z8ty_sf3tj=p;9jDt!RA3Z62pEJ;ij!_rB615=#(ZnNtSpSs!<(?#AFQ=P1Px`
z#;^iH{GFVzT<%fSh|Ym>(^o_(fLWH!L^Ot_XcTp$H7Olk#!ol0*Y_~P(D2k1%iGXw
z{GG8}UN391#)sjL0!5RfAp@V!I2!c>@DLcnluTefRs&Ia4#<z^&I0O4PLL2)!virG
z-zp^-<2v(9daJDTgs4wZv<1*4_nUys78reKnY2OG4C@$4iRdI8Y?jW2qzYreu68+Q
z!%>2PLY=wXft(M8S;&+d9E#j99cr&4o1uclJdB=bKvVi-_70anx=dZC=s|^?S$eZG
zq(*q4<ymOUuwY_LUV5jbNqtdcC|Uka7|cdh>ovnEw_Ag{N}vmh7y;Rwy4)^V(gLC8
zNDtIUbTW=9TPJshG%3m>JF7l8Xl0~k`lIz)OaaMK*l>9MDyX@r8}<0LP3J^wk9|UO
zb#`1OKVU<~7@we%<5tk5E+srDYhjFum9iFvz$rKMC+Mpfqp#9fU<q(lh@%nH3ALui
zkfcTB7<Y3c%mGtw$Vm4`+muMJp)S)OZBjQz!m1QBNLl`9yBceN<sP6DOOs5?iimOx
zxF41>)z+aYFd<TyEl~7yYO6V!+5TupILOw!AvnVy<!Oga4Gjw!(u%>8)1*dX6xXW6
zc#?3fQevT2SU1Rq`d!f52n9Qu4bZ^cHWel~ZWdA@ofx81yX0Gq-|7BH07`X6)9uI9
zCPz(;?&az_H5QT0MCDp8_o*@2Om~@jth7rYho!{Kj(#~JYgSTDs}hZ=+EAMu>Cv+Q
z+CQN|Q#V4kYm*{912A#MI>}{p>H%%YAm?<c0YwV4HDge-b!vS8vyw6A?$++^&Soe$
za+hXy$&nz0trF=m>~uG^ciAa9jcOz!2V!>WB*<?4Y9uOm#h@-}?}Kcs_cf1{p4B45
z8_CWs>bB~u+g$r>I`gc(7|1vpoYqVlAf1{5p~ylno02-TAbqs}ANn(g5*r6JC~*d;
z+c3Z;*r;amn1s_5jY+XCSsMh=biSCRiAG<e)YmU5J@`6fUD-xgeb6N48AP7A7`J;x
z_cEPk;E}RS4k`nE1|<t9h8}~Ki8NRQ_!?xQGJw=AYaEu*qN#n|QV;gyI9;$bmNd5M
z14OrFvG*NaGT=A5gkh-~Z3H<fV5;CLC@d<xs1Tt@bQX$2A>UAlDV_q)35VxZ2XWPS
zUB)es*Ja$_c!Ze=sBlU{St(SOWivv>ZSwfYn~oZ<6Az^N6Qo#L44qwABIFgajSEGt
z^sFVjQ^-RpMGFKE9@0%E=7xsps3V`JfDSAG=ugyPwis9+t-1Fx#=FNs4y3|>S9j=l
zI$qa?O#N(+1CFQKZJ{X$H%{D^)6SX&pdW93y!2@Yuq5ekVt}P&cN!Rv7eHPoUI(G^
zX3GZVTQG+0Cub0g_d;GLUIii6Y}v8K7KdTO<jr)z;#H6rZisZfW{Y8Ur?HJ4MGiYW
zJ00YB26)W40rI+xJ0SE}Jk@50ZCA)_@sUp;<Sb!<;{X&`hEMpIvkx0ch6<8L-GIUi
zBQvzjmJMa@H5oWWwqybrZ;wo{*<#>q8r8ThrLd>dJo}vF901GNEz+oWF#08v<r;<v
zGcO;+&7jM}*b-A)aDnWbKz2_^ut4}$nGM!Q?#fHnzwx_jF68N?g^b*Qa}l2>EyBK6
zX9B`xUp@>3uZhfAGh4PPw{Gh>e8)t9;~kL+H(NHiWkXN5jC=_9PD<95afbyi<5<!%
zEN0Mi1s8ECX%Utq2uB|%NOlmG(+WqQO#;nQy7gp3@ff!UMDn;8AGiF(!xS}<dp%$#
zW`A?K%$9As_~D3QSCSVIYp!TaDQ3%twH%o6oW>-C$*##D0dK}k?PiNfV8gVCz%v&W
zG3ePu_It_w-XgCPFV4J9yf*U)9;dZ!v&42r<0IKx2zKc2GvKB#2(x9w#~tUG3?F$1
zWgyxdi`lZt;3q$h-Emd2SnDB?!(UAT&fFMxprk@DWOiIl9=O^XG7P~^nw;<A)#Qwg
zPv4kWPoT`OFnO^E#)X%1UMF79VT711n@>g@R+)r4a_}0EfNs^FD8X#mCD_lkOoESG
zKncJ(*1WKx+Rngyz(=kn`>(Y+h7H(qHs;D6xNdx9hQG{XG>0Eg8XkY*=0bj6oU9EA
z4(D9Tk))-JILC7d7f`}i@X?<*b!LlAoxb}SdE}=eU>np<_D_Lbka2Zs{sLJEsJ3uO
zaI`Pyrfr^@tcwzeso-J`eH}5CT+I2eBc_Ur**k6gL9sy3veY&x9uvU%Y2<@x3F6#R
z^WeIkpFX}WJ<eHqgd;bXB#X3EQaJih(r9bZg~QJ$0cU46R-d6z2*veeFM&`ru715P
z%qaN5P>iQS2X3ehcUy`(R3N&0=M1uKM!dVR=)>(jHzQeR+3G_azh`E$c%ziWq3<OP
z1y9qm09KtieCMoWHQNhRT)-Pi3*faZj@>qUeC2Fvc-@PFS9@+ICufu4*+pK*eE1Kx
z5-IvGHlccOjHr!KD4gpbbCA8)Ly#CZ*It)#myJOZPh~Rob%KR8Hh`7nwK*_=>!9D<
zOPDP?3~F!;)<pKs1(>;g!;EXTY%o@?<H&PKBhAVlxAwq1P`le_wbpFe)S5*=u6b)|
zvNrR2A;b2Q6Ntq-bFUL`&2hYD%cj^;T4ZP+Ia&s2^NoaPvt>tPNlLFma`eTd(e{ER
z7w}~H`081Zjx26c8EZU2vblnsDKGM-!mgJ-XuR?k8v_$Eq7q7FtcASxUIqDkMG@UF
zXX{TY1vk{JZI@#&ol-HF&YrC#2P>`VjH$KNR5|yqDzd%G%B?Q~v(`#EvAA`<3fu%1
zSeAm>VwUM(OIenq_s&liZB}qO_=BXu_F^v=uxmlG+7s1<xtNnli-|8Gb0OOoCToke
z+RWi6l7=TPOmjh77L9KQ3!S(kHWh-wc485Ec9AuF^t@~-Zc|oxZ#R%#)sT`gWbkAD
z!j!DjtW=;_-rKgyI9GnQnw+k-Dre@}OXidv*XTcd<d_fEDE$2dedue{XfcI)1dlAQ
zQyI7j9=#!HrzI-IbJIV$fxLY~0@D-J=sD(i4LMSi0AnlObMD7#$&T6t+;LTXj<|a<
zxtIuHmisyD>BZ!=#R-I)_W+#r*p1}CjR{z-7X+MnXC2vAmw?%FbHI7;tS9FZ@fueO
zobhBmIZ>ZLsBzc884op(y$x1IJs9{M1ZBi1*xERL6zB`VdR;-`^f2{2;9bUFUzope
zCB#!%V6xpXyXv5_V5b8W{cTMk%=ohmugmzu3ltVlwM99-C*gxS);#v0V7U|!moPKE
T3I154sX;hAMG!jS|DX8}WWJ$-

literal 0
HcmV?d00001

diff --git a/datafusion/core/tests/sqllogictests/test_files/clickbench.slt b/datafusion/core/tests/sqllogictests/test_files/clickbench.slt
new file mode 100644
index 000000000000..bf785545d849
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/clickbench.slt
@@ -0,0 +1,275 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+# This file contains the clickbench schema and queries
+# and the first 10 rows of data. Since ClickBench contains case sensitive queries
+# this is also a good test of that usecase too
+
+# create.sql came from
+# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/create.sql
+# Data file made with DuckDB:
+# COPY (SELECT * FROM 'hits.parquet' LIMIT 10) TO 'clickbench_hits_10.parquet' (FORMAT PARQUET);
+
+statement ok
+CREATE EXTERNAL TABLE hits
+STORED AS PARQUET
+LOCATION 'tests/data/clickbench_hits_10.parquet';
+
+
+# queries.sql came from
+# https://github.com/ClickHouse/ClickBench/blob/8b9e3aa05ea18afa427f14909ddc678b8ef0d5e6/datafusion/queries.sql
+
+query I
+SELECT COUNT(*) FROM hits;
+----
+10
+
+query I
+SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0;
+----
+0
+
+query IIR
+SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits;
+----
+0 10 0
+
+query R
+SELECT AVG("UserID") FROM hits;
+----
+-304548765855551600
+
+query I
+SELECT COUNT(DISTINCT "UserID") FROM hits;
+----
+5
+
+query I
+SELECT COUNT(DISTINCT "SearchPhrase") FROM hits;
+----
+1
+
+query DD
+SELECT MIN("EventDate"::INT::DATE), MAX("EventDate"::INT::DATE) FROM hits;
+----
+2013-07-15 2013-07-15
+
+query II
+SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC;
+----
+
+query II rowsort
+SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10;
+----
+197 1
+229 1
+39 1
+839 2
+
+query IIIRI rowsort
+SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10;
+----
+197 0 2 0 1
+229 0 1 0 1
+39 0 1 0 1
+839 0 6 0 2
+
+query TI
+SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
+----
+
+query ITI
+SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10;
+----
+
+query TI
+SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+
+query TI
+SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10;
+----
+
+query ITI
+SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+
+query II rowsort
+SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+-2461439046089301801 5
+376160620089546609 1
+427738049800818189 1
+519640690937130534 2
+7418527520126366595 1
+
+query ITI rowsort
+SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+-2461439046089301801 (empty) 5
+376160620089546609 (empty) 1
+427738049800818189 (empty) 1
+519640690937130534 (empty) 2
+7418527520126366595 (empty) 1
+
+query ITI rowsort
+SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10;
+----
+-2461439046089301801 (empty) 5
+376160620089546609 (empty) 1
+427738049800818189 (empty) 1
+519640690937130534 (empty) 2
+7418527520126366595 (empty) 1
+
+query IRTI rowsort
+SELECT "UserID", extract(minute FROM to_timestamp_seconds("EventTime")) AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10;
+----
+-2461439046089301801 18 (empty) 1
+-2461439046089301801 33 (empty) 1
+-2461439046089301801 38 (empty) 1
+-2461439046089301801 56 (empty) 1
+-2461439046089301801 58 (empty) 1
+376160620089546609 30 (empty) 1
+427738049800818189 40 (empty) 1
+519640690937130534 26 (empty) 1
+519640690937130534 36 (empty) 1
+7418527520126366595 18 (empty) 1
+
+query I
+SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449;
+----
+
+query I
+SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%';
+----
+0
+
+query TTI
+SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+
+query TTTII
+SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10;
+----
+
+query IITIIIIIIIIIITTIIIIIIIIIITIIITIIIITTIIITIIIIIIIIIITIIIIITIIIIIITIIIIIIIIIITTTTIIIIIIIITITTITTTTTTTTTTIIII
+SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
+----
+
+query T
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime") LIMIT 10;
+----
+
+query T
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10;
+----
+
+query T
+SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY to_timestamp_seconds("EventTime"), "SearchPhrase" LIMIT 10;
+----
+
+query IRI
+SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+----
+
+query TRIT
+SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25;
+----
+
+query IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits;
+----
+0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890
+
+query IIIIR
+SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10;
+----
+
+query IIIIR
+SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
+----
+
+query IIIIR rowsort
+SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10;
+----
+4894690465724379622 1568366281 1 0 0
+5206346422301499756 -1216690514 1 0 0
+6308646140879811077 -1216690514 1 0 0
+6635790769678439148 1427531677 1 0 0
+6864353419233967042 1568366281 1 0 0
+8120543446287442873 -1216690514 1 0 0
+8156744413230856864 -1216690514 1 0 0
+8740403056911509777 1615432634 1 0 0
+8924809397503602651 -1216690514 1 0 0
+9110818468285196899 -1216690514 1 0 0
+
+query TI rowsort
+SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10;
+----
+(empty) 5
+http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1
+http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1
+http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1
+http://holodilnik.ru/russia/05jul2013&model=0 1
+http://tours/Ekategoriya%2F&sr=http://slovareniye 1
+
+query ITI rowsort
+SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10;
+----
+1 (empty) 5
+1 http://afisha.mail.ru/catalog/314/women.ru/ency=1&page3/?errovat-pinniki 1
+1 http://bonprix.ru/index.ru/cinema/art/0 986 424 233 сезон 1
+1 http://bonprix.ru/index.ru/cinema/art/A00387,3797); ru)&bL 1
+1 http://holodilnik.ru/russia/05jul2013&model=0 1
+1 http://tours/Ekategoriya%2F&sr=http://slovareniye 1
+
+query IIIII rowsort
+SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10;
+----
+-1216690514 -1216690515 -1216690516 -1216690517 6
+1427531677 1427531676 1427531675 1427531674 1
+1568366281 1568366280 1568366279 1568366278 2
+1615432634 1615432633 1615432632 1615432631 1
+
+query TI
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10;
+----
+
+query TI
+SELECT "Title", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY PageViews DESC LIMIT 10;
+----
+
+query TI
+SELECT "URL", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+----
+
+query IIITTI
+SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS Src, "URL" AS Dst, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000;
+----
+
+query IDI
+SELECT "URLHash", "EventDate"::INT::DATE, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate"::INT::DATE ORDER BY PageViews DESC LIMIT 10 OFFSET 100;
+----
+
+query III
+SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-01' AND "EventDate"::INT::DATE <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY PageViews DESC LIMIT 10 OFFSET 10000;
+----
+
+query PI
+SELECT DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) AS M, COUNT(*) AS PageViews FROM hits WHERE "CounterID" = 62 AND "EventDate"::INT::DATE >= '2013-07-14' AND "EventDate"::INT::DATE <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', to_timestamp_seconds("EventTime")) ORDER BY DATE_TRUNC('minute', M) LIMIT 10 OFFSET 1000;
+----

From 9adcf9786010a67d132f04ba9bc900c4a33ca211 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=96zg=C3=BCr=20Akkurt?= <oezgurmakkurt@gmail.com>
Date: Wed, 5 Jul 2023 23:20:48 +0600
Subject: [PATCH 60/89] feat: implement posgres style `encode`/`decode` (#6821)

* feat: add encode, decode functions

* add test

* add licenses

* fix return types

* delete files

* toml fmt

* add logical expr

* fix NULL case, add test for NULL and empty

* add sqllogic tests

* update error msgs, run cargo update in cli dir

* update sqllogictest

* add more tests
---
 datafusion-cli/Cargo.lock                     | 158 +++++---
 datafusion/core/Cargo.toml                    |   3 +-
 datafusion/core/tests/sql/expr.rs             |  60 ++++
 .../sqllogictests/test_files/encoding.slt     |  50 +++
 datafusion/expr/src/built_in_function.rs      |  54 +++
 datafusion/expr/src/expr_fn.rs                |  30 ++
 datafusion/physical-expr/Cargo.toml           |   5 +-
 .../physical-expr/src/encoding_expressions.rs | 340 ++++++++++++++++++
 datafusion/physical-expr/src/functions.rs     |  26 ++
 datafusion/physical-expr/src/lib.rs           |   2 +
 datafusion/proto/proto/datafusion.proto       |   2 +
 datafusion/proto/src/generated/pbjson.rs      |   6 +
 datafusion/proto/src/generated/prost.rs       |   6 +
 .../proto/src/logical_plan/from_proto.rs      |   2 +
 datafusion/proto/src/logical_plan/to_proto.rs |   2 +
 15 files changed, 697 insertions(+), 49 deletions(-)
 create mode 100644 datafusion/core/tests/sqllogictests/test_files/encoding.slt
 create mode 100644 datafusion/physical-expr/src/encoding_expressions.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 45b2b80f7e57..415c39b6d51c 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -2,6 +2,15 @@
 # It is not intended for manual editing.
 version = 3
 
+[[package]]
+name = "addr2line"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
+dependencies = [
+ "gimli",
+]
+
 [[package]]
 name = "adler"
 version = "1.0.2"
@@ -647,6 +656,21 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "backtrace"
+version = "0.3.68"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
+dependencies = [
+ "addr2line",
+ "cc",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+]
+
 [[package]]
 name = "base64"
 version = "0.21.2"
@@ -669,6 +693,12 @@ version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
+[[package]]
+name = "bitflags"
+version = "2.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
+
 [[package]]
 name = "blake2"
 version = "0.10.6"
@@ -813,9 +843,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9cc2b23599e6d7479755f3594285efb3f74a1bdca7a7374948bc831e23a552"
+checksum = "f1369bc6b9e9a7dfdae2055f6ec151fe9c554a9d23d357c0237cee2e25eaabb7"
 dependencies = [
  "chrono",
  "chrono-tz-build",
@@ -824,9 +854,9 @@ dependencies = [
 
 [[package]]
 name = "chrono-tz-build"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9998fb9f7e9b2111641485bf8beb32f92945f97f92a3d061f744cfef335f751"
+checksum = "e2f5ebdc942f57ed96d560a6d1a459bae5851102a25d5bf89dc04ae453e31ecf"
 dependencies = [
  "parse-zoneinfo",
  "phf",
@@ -840,7 +870,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123"
 dependencies = [
  "atty",
- "bitflags",
+ "bitflags 1.3.2",
  "clap_derive",
  "clap_lex",
  "indexmap 1.9.3",
@@ -995,9 +1025,9 @@ dependencies = [
 
 [[package]]
 name = "ctor"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1586fa608b1dab41f667475b4a41faec5ba680aee428bfa5de4ea520fdc6e901"
+checksum = "eed5fff0d93c7559121e9c72bf9c242295869396255071ff2cb1617147b608c5"
 dependencies = [
  "quote",
  "syn 2.0.22",
@@ -1154,6 +1184,7 @@ dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-schema",
+ "base64",
  "blake2",
  "blake3",
  "chrono",
@@ -1162,6 +1193,7 @@ dependencies = [
  "datafusion-row",
  "half",
  "hashbrown 0.14.0",
+ "hex",
  "indexmap 2.0.0",
  "itertools 0.11.0",
  "lazy_static",
@@ -1345,12 +1377,12 @@ dependencies = [
 
 [[package]]
 name = "fd-lock"
-version = "3.0.12"
+version = "3.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39ae6b3d9530211fb3b12a95374b8b0823be812f53d09e18c5675c0146b09642"
+checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
 dependencies = [
  "cfg-if",
- "rustix",
+ "rustix 0.38.1",
  "windows-sys 0.48.0",
 ]
 
@@ -1366,7 +1398,7 @@ version = "23.5.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "rustc_version",
 ]
 
@@ -1520,6 +1552,12 @@ dependencies = [
  "wasi",
 ]
 
+[[package]]
+name = "gimli"
+version = "0.27.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
+
 [[package]]
 name = "glob"
 version = "0.3.1"
@@ -1587,15 +1625,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "hermit-abi"
-version = "0.2.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "hermit-abi"
 version = "0.3.1"
@@ -1934,6 +1963,12 @@ version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
+
 [[package]]
 name = "lock_api"
 version = "0.4.10"
@@ -2046,7 +2081,7 @@ version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "cfg-if",
  "libc",
  "static_assertions",
@@ -2137,14 +2172,23 @@ dependencies = [
 
 [[package]]
 name = "num_cpus"
-version = "1.15.0"
+version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b"
+checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.2.6",
+ "hermit-abi 0.3.1",
  "libc",
 ]
 
+[[package]]
+name = "object"
+version = "0.31.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "object_store"
 version = "0.6.1"
@@ -2336,18 +2380,18 @@ dependencies = [
 
 [[package]]
 name = "pin-project"
-version = "1.1.0"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
+checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.0"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
+checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2356,9 +2400,9 @@ dependencies = [
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.9"
+version = "0.2.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116"
+checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
 
 [[package]]
 name = "pin-utils"
@@ -2460,9 +2504,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.28"
+version = "1.0.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488"
+checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
 dependencies = [
  "proc-macro2",
 ]
@@ -2513,7 +2557,7 @@ version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -2522,7 +2566,7 @@ version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
 ]
 
 [[package]]
@@ -2641,6 +2685,12 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@@ -2652,15 +2702,28 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.37.20"
+version = "0.37.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0"
+checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "errno",
  "io-lifetimes",
  "libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.3.8",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "rustix"
+version = "0.38.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbc6396159432b5c8490d4e301d8c705f61860b8b6c863bf79942ce5401968f3"
+dependencies = [
+ "bitflags 2.3.3",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.3",
  "windows-sys 0.48.0",
 ]
 
@@ -2702,9 +2765,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pemfile"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b"
+checksum = "2d3987094b1d07b653b7dfdc3f70ce9a1da9c51ac18c1b06b662e4f9a0e9f4b2"
 dependencies = [
  "base64",
 ]
@@ -2731,7 +2794,7 @@ version = "11.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5dfc8644681285d1fb67a467fb3021bfea306b99b4146b166a1fe3ada965eece"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "cfg-if",
  "clipboard-win",
  "dirs-next",
@@ -2794,7 +2857,7 @@ version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8"
 dependencies = [
- "bitflags",
+ "bitflags 1.3.2",
  "core-foundation",
  "core-foundation-sys",
  "libc",
@@ -3060,7 +3123,7 @@ dependencies = [
  "cfg-if",
  "fastrand",
  "redox_syscall 0.3.5",
- "rustix",
+ "rustix 0.37.21",
  "windows-sys 0.48.0",
 ]
 
@@ -3168,11 +3231,12 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.28.2"
+version = "1.29.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2"
+checksum = "532826ff75199d5833b9d2c5fe410f29235e25704ee5f0ef599fb51c21f4a4da"
 dependencies = [
  "autocfg",
+ "backtrace",
  "bytes",
  "libc",
  "mio",
@@ -3615,9 +3679,9 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.48.0"
+version = "0.48.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5"
+checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
 dependencies = [
  "windows_aarch64_gnullvm 0.48.0",
  "windows_aarch64_msvc 0.48.0",
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 4682194703a4..4e0e2fde5f06 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -38,10 +38,11 @@ path = "src/lib.rs"
 avro = ["apache-avro", "num-traits", "datafusion-common/avro"]
 compression = ["xz2", "bzip2", "flate2", "zstd", "async-compression"]
 crypto_expressions = ["datafusion-physical-expr/crypto_expressions", "datafusion-optimizer/crypto_expressions"]
-default = ["crypto_expressions", "regex_expressions", "unicode_expressions", "compression"]
+default = ["crypto_expressions", "encoding__expressions", "regex_expressions", "unicode_expressions", "compression"]
 # Enables support for non-scalar, binary operations on dictionaries
 # Note: this results in significant additional codegen
 dictionary_expressions = ["datafusion-physical-expr/dictionary_expressions", "datafusion-optimizer/dictionary_expressions"]
+encoding__expressions = ["datafusion-physical-expr/encoding_expressions"]
 # Used for testing ONLY: causes all values to hash to the same value (test for collisions)
 force_hash_collisions = []
 pyarrow = ["datafusion-common/pyarrow"]
diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs
index 711f10cef253..5444b3a88f05 100644
--- a/datafusion/core/tests/sql/expr.rs
+++ b/datafusion/core/tests/sql/expr.rs
@@ -63,6 +63,66 @@ async fn test_mathematical_expressions_with_null() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+#[cfg_attr(not(feature = "crypto_expressions"), ignore)]
+async fn test_encoding_expressions() -> Result<()> {
+    // Input Utf8
+    test_expression!("encode('tom','base64')", "dG9t");
+    test_expression!("arrow_cast(decode('dG9t','base64'), 'Utf8')", "tom");
+    test_expression!("encode('tom','hex')", "746f6d");
+    test_expression!("arrow_cast(decode('746f6d','hex'), 'Utf8')", "tom");
+
+    // Input LargeUtf8
+    test_expression!("encode(arrow_cast('tom', 'LargeUtf8'),'base64')", "dG9t");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('dG9t', 'LargeUtf8'),'base64'), 'Utf8')",
+        "tom"
+    );
+    test_expression!("encode(arrow_cast('tom', 'LargeUtf8'),'hex')", "746f6d");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('746f6d', 'LargeUtf8'),'hex'), 'Utf8')",
+        "tom"
+    );
+
+    // Input Binary
+    test_expression!("encode(arrow_cast('tom', 'Binary'),'base64')", "dG9t");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('dG9t', 'Binary'),'base64'), 'Utf8')",
+        "tom"
+    );
+    test_expression!("encode(arrow_cast('tom', 'Binary'),'hex')", "746f6d");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('746f6d', 'Binary'),'hex'), 'Utf8')",
+        "tom"
+    );
+
+    // Input LargeBinary
+    test_expression!("encode(arrow_cast('tom', 'LargeBinary'),'base64')", "dG9t");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('dG9t', 'LargeBinary'),'base64'), 'Utf8')",
+        "tom"
+    );
+    test_expression!("encode(arrow_cast('tom', 'LargeBinary'),'hex')", "746f6d");
+    test_expression!(
+        "arrow_cast(decode(arrow_cast('746f6d', 'LargeBinary'),'hex'), 'Utf8')",
+        "tom"
+    );
+
+    // NULL
+    test_expression!("encode(NULL,'base64')", "NULL");
+    test_expression!("decode(NULL,'base64')", "NULL");
+    test_expression!("encode(NULL,'hex')", "NULL");
+    test_expression!("decode(NULL,'hex')", "NULL");
+
+    // Empty string
+    test_expression!("encode('','base64')", "");
+    test_expression!("decode('','base64')", "");
+    test_expression!("encode('','hex')", "");
+    test_expression!("decode('','hex')", "");
+
+    Ok(())
+}
+
 #[tokio::test]
 #[cfg_attr(not(feature = "crypto_expressions"), ignore)]
 async fn test_crypto_expressions() -> Result<()> {
diff --git a/datafusion/core/tests/sqllogictests/test_files/encoding.slt b/datafusion/core/tests/sqllogictests/test_files/encoding.slt
new file mode 100644
index 000000000000..b16ceebd6deb
--- /dev/null
+++ b/datafusion/core/tests/sqllogictests/test_files/encoding.slt
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE TABLE test(
+  num INT,
+  bin_field  BYTEA,
+  base64_field TEXT,
+  hex_field TEXT,
+) as VALUES
+  (0, 'abc',  encode('abc', 'base64'), encode('abc', 'hex')),
+  (1, 'qweqwe',    encode('qweqwe', 'base64'), encode('qweqwe', 'hex')),
+  (2, NULL, NULL, NULL)
+;
+
+# Arrays tests
+query T
+SELECT encode(bin_field, 'hex') FROM test ORDER BY num;
+----
+616263
+717765717765
+NULL
+
+query T
+SELECT arrow_cast(decode(base64_field, 'base64'), 'Utf8') FROM test ORDER BY num;
+----
+abc
+qweqwe
+NULL
+
+query T
+SELECT arrow_cast(decode(hex_field, 'hex'), 'Utf8') FROM test ORDER BY num;
+----
+abc
+qweqwe
+NULL
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 2eaa2792b9db..69054622757d 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -63,10 +63,14 @@ pub enum BuiltinScalarFunction {
     Cos,
     /// cos
     Cosh,
+    /// Decode
+    Decode,
     /// degrees
     Degrees,
     /// Digest
     Digest,
+    /// Encode
+    Encode,
     /// exp
     Exp,
     /// factorial
@@ -298,7 +302,9 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Coalesce => Volatility::Immutable,
             BuiltinScalarFunction::Cos => Volatility::Immutable,
             BuiltinScalarFunction::Cosh => Volatility::Immutable,
+            BuiltinScalarFunction::Decode => Volatility::Immutable,
             BuiltinScalarFunction::Degrees => Volatility::Immutable,
+            BuiltinScalarFunction::Encode => Volatility::Immutable,
             BuiltinScalarFunction::Exp => Volatility::Immutable,
             BuiltinScalarFunction::Factorial => Volatility::Immutable,
             BuiltinScalarFunction::Floor => Volatility::Immutable,
@@ -626,6 +632,32 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Digest => {
                 utf8_or_binary_to_binary_type(&input_expr_types[0], "digest")
             }
+            BuiltinScalarFunction::Encode => Ok(match input_expr_types[0] {
+                Utf8 => Utf8,
+                LargeUtf8 => LargeUtf8,
+                Binary => Utf8,
+                LargeBinary => LargeUtf8,
+                Null => Null,
+                _ => {
+                    // this error is internal as `data_types` should have captured this.
+                    return Err(DataFusionError::Internal(
+                        "The encode function can only accept utf8 or binary.".to_string(),
+                    ));
+                }
+            }),
+            BuiltinScalarFunction::Decode => Ok(match input_expr_types[0] {
+                Utf8 => Binary,
+                LargeUtf8 => LargeBinary,
+                Binary => Binary,
+                LargeBinary => LargeBinary,
+                Null => Null,
+                _ => {
+                    // this error is internal as `data_types` should have captured this.
+                    return Err(DataFusionError::Internal(
+                        "The decode function can only accept utf8 or binary.".to_string(),
+                    ));
+                }
+            }),
             BuiltinScalarFunction::SplitPart => {
                 utf8_to_str_type(&input_expr_types[0], "split_part")
             }
@@ -895,6 +927,24 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
+            BuiltinScalarFunction::Encode => Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![Binary, Utf8]),
+                    Exact(vec![LargeBinary, Utf8]),
+                ],
+                self.volatility(),
+            ),
+            BuiltinScalarFunction::Decode => Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![Binary, Utf8]),
+                    Exact(vec![LargeBinary, Utf8]),
+                ],
+                self.volatility(),
+            ),
             BuiltinScalarFunction::DateTrunc => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
@@ -1175,6 +1225,10 @@ fn aliases(func: &BuiltinScalarFunction) -> &'static [&'static str] {
         BuiltinScalarFunction::SHA384 => &["sha384"],
         BuiltinScalarFunction::SHA512 => &["sha512"],
 
+        // encode/decode
+        BuiltinScalarFunction::Encode => &["encode"],
+        BuiltinScalarFunction::Decode => &["decode"],
+
         // other functions
         BuiltinScalarFunction::Struct => &["struct"],
         BuiltinScalarFunction::ArrowTypeof => &["arrow_typeof"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index a45cf0febaa0..480ea5d60890 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -636,6 +636,8 @@ scalar_expr!(
     "converts the Unicode code point to a UTF8 character"
 );
 scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`");
+scalar_expr!(Encode, encode, input encoding, "encode the `input`, using the `encoding`. encoding can be base64 or hex");
+scalar_expr!(Decode, decode, input encoding, "decode the`input`, using the `encoding`. encoding can be base64 or hex");
 scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase");
 scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`");
 scalar_expr!(Lower, lower, string, "convert the string to lower case");
@@ -942,6 +944,8 @@ mod test {
         test_scalar_expr!(CharacterLength, character_length, string);
         test_scalar_expr!(Chr, chr, string);
         test_scalar_expr!(Digest, digest, string, algorithm);
+        test_scalar_expr!(Encode, encode, string, encoding);
+        test_scalar_expr!(Decode, decode, string, encoding);
         test_scalar_expr!(Gcd, gcd, arg_1, arg_2);
         test_scalar_expr!(Lcm, lcm, arg_1, arg_2);
         test_scalar_expr!(InitCap, initcap, string);
@@ -1036,4 +1040,30 @@ mod test {
             unreachable!();
         }
     }
+
+    #[test]
+    fn encode_function_definitions() {
+        if let Expr::ScalarFunction(ScalarFunction { fun, args }) =
+            encode(col("tableA.a"), lit("base64"))
+        {
+            let name = BuiltinScalarFunction::Encode;
+            assert_eq!(name, fun);
+            assert_eq!(2, args.len());
+        } else {
+            unreachable!();
+        }
+    }
+
+    #[test]
+    fn decode_function_definitions() {
+        if let Expr::ScalarFunction(ScalarFunction { fun, args }) =
+            decode(col("tableA.a"), lit("hex"))
+        {
+            let name = BuiltinScalarFunction::Decode;
+            assert_eq!(name, fun);
+            assert_eq!(2, args.len());
+        } else {
+            unreachable!();
+        }
+    }
 }
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index a8f82e60e473..d77ce8e2973c 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -34,10 +34,11 @@ path = "src/lib.rs"
 
 [features]
 crypto_expressions = ["md-5", "sha2", "blake2", "blake3"]
-default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
+default = ["crypto_expressions", "regex_expressions", "unicode_expressions", "encoding_expressions"]
 # Enables support for non-scalar, binary operations on dictionaries
 # Note: this results in significant additional codegen
 dictionary_expressions = ["arrow/dyn_cmp_dict"]
+encoding_expressions = ["base64", "hex"]
 regex_expressions = ["regex"]
 unicode_expressions = ["unicode-segmentation"]
 
@@ -47,6 +48,7 @@ arrow = { workspace = true }
 arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
+base64 = { version = "0.21", optional = true }
 blake2 = { version = "^0.10.2", optional = true }
 blake3 = { version = "1.0", optional = true }
 chrono = { version = "0.4.23", default-features = false }
@@ -55,6 +57,7 @@ datafusion-expr = { path = "../expr", version = "27.0.0" }
 datafusion-row = { path = "../row", version = "27.0.0" }
 half = { version = "2.1", default-features = false }
 hashbrown = { version = "0.14", features = ["raw"] }
+hex = { version = "0.4", optional = true }
 indexmap = "2.0.0"
 itertools = { version = "0.11", features = ["use_std"] }
 lazy_static = { version = "^1.4.0" }
diff --git a/datafusion/physical-expr/src/encoding_expressions.rs b/datafusion/physical-expr/src/encoding_expressions.rs
new file mode 100644
index 000000000000..e8b4331e9298
--- /dev/null
+++ b/datafusion/physical-expr/src/encoding_expressions.rs
@@ -0,0 +1,340 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Encoding expressions
+
+use arrow::{
+    array::{Array, ArrayRef, BinaryArray, OffsetSizeTrait, StringArray},
+    datatypes::DataType,
+};
+use base64::{engine::general_purpose, Engine as _};
+use datafusion_common::cast::{as_generic_binary_array, as_generic_string_array};
+use datafusion_common::ScalarValue;
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use std::sync::Arc;
+use std::{fmt, str::FromStr};
+
+#[derive(Debug, Copy, Clone)]
+enum Encoding {
+    Base64,
+    Hex,
+}
+
+fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarValue> {
+    match value {
+        ColumnarValue::Array(a) => match a.data_type() {
+            DataType::Utf8 => encoding.encode_utf8_array::<i32>(a.as_ref()),
+            DataType::LargeUtf8 => encoding.encode_utf8_array::<i64>(a.as_ref()),
+            DataType::Binary => encoding.encode_binary_array::<i32>(a.as_ref()),
+            DataType::LargeBinary => encoding.encode_binary_array::<i64>(a.as_ref()),
+            other => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {other:?} for function encode({encoding})",
+            ))),
+        },
+        ColumnarValue::Scalar(scalar) => {
+            match scalar {
+                ScalarValue::Utf8(a) => {
+                    Ok(encoding.encode_scalar(a.as_ref().map(|s: &String| s.as_bytes())))
+                }
+                ScalarValue::LargeUtf8(a) => Ok(encoding
+                    .encode_large_scalar(a.as_ref().map(|s: &String| s.as_bytes()))),
+                ScalarValue::Binary(a) => Ok(
+                    encoding.encode_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))
+                ),
+                ScalarValue::LargeBinary(a) => Ok(encoding
+                    .encode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
+                other => Err(DataFusionError::Internal(format!(
+                    "Unsupported data type {other:?} for function encode({encoding})",
+                ))),
+            }
+        }
+    }
+}
+
+fn decode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarValue> {
+    match value {
+        ColumnarValue::Array(a) => match a.data_type() {
+            DataType::Utf8 => encoding.decode_utf8_array::<i32>(a.as_ref()),
+            DataType::LargeUtf8 => encoding.decode_utf8_array::<i64>(a.as_ref()),
+            DataType::Binary => encoding.decode_binary_array::<i32>(a.as_ref()),
+            DataType::LargeBinary => encoding.decode_binary_array::<i64>(a.as_ref()),
+            other => Err(DataFusionError::Internal(format!(
+                "Unsupported data type {other:?} for function decode({encoding})",
+            ))),
+        },
+        ColumnarValue::Scalar(scalar) => {
+            match scalar {
+                ScalarValue::Utf8(a) => {
+                    encoding.decode_scalar(a.as_ref().map(|s: &String| s.as_bytes()))
+                }
+                ScalarValue::LargeUtf8(a) => encoding
+                    .decode_large_scalar(a.as_ref().map(|s: &String| s.as_bytes())),
+                ScalarValue::Binary(a) => {
+                    encoding.decode_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))
+                }
+                ScalarValue::LargeBinary(a) => encoding
+                    .decode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice())),
+                other => Err(DataFusionError::Internal(format!(
+                    "Unsupported data type {other:?} for function decode({encoding})",
+                ))),
+            }
+        }
+    }
+}
+
+fn hex_encode(input: &[u8]) -> String {
+    hex::encode(input)
+}
+
+fn base64_encode(input: &[u8]) -> String {
+    general_purpose::STANDARD_NO_PAD.encode(input)
+}
+
+fn hex_decode(input: &[u8]) -> Result<Vec<u8>> {
+    hex::decode(input).map_err(|e| {
+        DataFusionError::Internal(format!("Failed to decode from hex: {}", e))
+    })
+}
+
+fn base64_decode(input: &[u8]) -> Result<Vec<u8>> {
+    general_purpose::STANDARD_NO_PAD.decode(input).map_err(|e| {
+        DataFusionError::Internal(format!("Failed to decode from base64: {}", e))
+    })
+}
+
+macro_rules! encode_to_array {
+    ($METHOD: ident, $INPUT:expr) => {{
+        let utf8_array: StringArray = $INPUT
+            .iter()
+            .map(|x| x.map(|x| $METHOD(x.as_ref())))
+            .collect();
+        Arc::new(utf8_array)
+    }};
+}
+
+macro_rules! decode_to_array {
+    ($METHOD: ident, $INPUT:expr) => {{
+        let binary_array: BinaryArray = $INPUT
+            .iter()
+            .map(|x| x.map(|x| $METHOD(x.as_ref())).transpose())
+            .collect::<Result<_>>()?;
+        Arc::new(binary_array)
+    }};
+}
+
+impl Encoding {
+    fn encode_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
+        ColumnarValue::Scalar(match self {
+            Self::Base64 => ScalarValue::Utf8(
+                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
+            ),
+            Self::Hex => ScalarValue::Utf8(value.map(hex::encode)),
+        })
+    }
+
+    fn encode_large_scalar(self, value: Option<&[u8]>) -> ColumnarValue {
+        ColumnarValue::Scalar(match self {
+            Self::Base64 => ScalarValue::LargeUtf8(
+                value.map(|v| general_purpose::STANDARD_NO_PAD.encode(v)),
+            ),
+            Self::Hex => ScalarValue::LargeUtf8(value.map(hex::encode)),
+        })
+    }
+
+    fn encode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+    where
+        T: OffsetSizeTrait,
+    {
+        let input_value = as_generic_binary_array::<T>(value)?;
+        let array: ArrayRef = match self {
+            Self::Base64 => encode_to_array!(base64_encode, input_value),
+            Self::Hex => encode_to_array!(hex_encode, input_value),
+        };
+        Ok(ColumnarValue::Array(array))
+    }
+
+    fn encode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+    where
+        T: OffsetSizeTrait,
+    {
+        let input_value = as_generic_string_array::<T>(value)?;
+        let array: ArrayRef = match self {
+            Self::Base64 => encode_to_array!(base64_encode, input_value),
+            Self::Hex => encode_to_array!(hex_encode, input_value),
+        };
+        Ok(ColumnarValue::Array(array))
+    }
+
+    fn decode_scalar(self, value: Option<&[u8]>) -> Result<ColumnarValue> {
+        let value = match value {
+            Some(value) => value,
+            None => return Ok(ColumnarValue::Scalar(ScalarValue::Binary(None))),
+        };
+
+        let out = match self {
+            Self::Base64 => {
+                general_purpose::STANDARD_NO_PAD
+                    .decode(value)
+                    .map_err(|e| {
+                        DataFusionError::Internal(format!(
+                            "Failed to decode value using base64: {}",
+                            e
+                        ))
+                    })?
+            }
+            Self::Hex => hex::decode(value).map_err(|e| {
+                DataFusionError::Internal(format!(
+                    "Failed to decode value using hex: {}",
+                    e
+                ))
+            })?,
+        };
+
+        Ok(ColumnarValue::Scalar(ScalarValue::Binary(Some(out))))
+    }
+
+    fn decode_large_scalar(self, value: Option<&[u8]>) -> Result<ColumnarValue> {
+        let value = match value {
+            Some(value) => value,
+            None => return Ok(ColumnarValue::Scalar(ScalarValue::LargeBinary(None))),
+        };
+
+        let out = match self {
+            Self::Base64 => {
+                general_purpose::STANDARD_NO_PAD
+                    .decode(value)
+                    .map_err(|e| {
+                        DataFusionError::Internal(format!(
+                            "Failed to decode value using base64: {}",
+                            e
+                        ))
+                    })?
+            }
+            Self::Hex => hex::decode(value).map_err(|e| {
+                DataFusionError::Internal(format!(
+                    "Failed to decode value using hex: {}",
+                    e
+                ))
+            })?,
+        };
+
+        Ok(ColumnarValue::Scalar(ScalarValue::LargeBinary(Some(out))))
+    }
+
+    fn decode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+    where
+        T: OffsetSizeTrait,
+    {
+        let input_value = as_generic_binary_array::<T>(value)?;
+        let array: ArrayRef = match self {
+            Self::Base64 => decode_to_array!(base64_decode, input_value),
+            Self::Hex => decode_to_array!(hex_decode, input_value),
+        };
+        Ok(ColumnarValue::Array(array))
+    }
+
+    fn decode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
+    where
+        T: OffsetSizeTrait,
+    {
+        let input_value = as_generic_string_array::<T>(value)?;
+        let array: ArrayRef = match self {
+            Self::Base64 => decode_to_array!(base64_decode, input_value),
+            Self::Hex => decode_to_array!(hex_decode, input_value),
+        };
+        Ok(ColumnarValue::Array(array))
+    }
+}
+
+impl fmt::Display for Encoding {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", format!("{self:?}").to_lowercase())
+    }
+}
+
+impl FromStr for Encoding {
+    type Err = DataFusionError;
+    fn from_str(name: &str) -> Result<Encoding> {
+        Ok(match name {
+            "base64" => Self::Base64,
+            "hex" => Self::Hex,
+            _ => {
+                let options = [Self::Base64, Self::Hex]
+                    .iter()
+                    .map(|i| i.to_string())
+                    .collect::<Vec<_>>()
+                    .join(", ");
+                return Err(DataFusionError::Plan(format!(
+                    "There is no built-in encoding named '{name}', currently supported encodings are: {options}",
+                )));
+            }
+        })
+    }
+}
+
+/// Encodes the given data, accepts Binary, LargeBinary, Utf8 or LargeUtf8 and returns a [`ColumnarValue`].
+/// Second argument is the encoding to use.
+/// Standard encodings are base64 and hex.
+pub fn encode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 2 {
+        return Err(DataFusionError::Internal(format!(
+            "{:?} args were supplied but encode takes exactly two arguments",
+            args.len(),
+        )));
+    }
+    let encoding = match &args[1] {
+        ColumnarValue::Scalar(scalar) => match scalar {
+            ScalarValue::Utf8(Some(method)) | ScalarValue::LargeUtf8(Some(method)) => {
+                method.parse::<Encoding>()
+            }
+            _ => Err(DataFusionError::NotImplemented(
+                "Second argument to encode must be a constant: Encode using dynamically decided method is not yet supported".into(),
+            )),
+        },
+        ColumnarValue::Array(_) => Err(DataFusionError::NotImplemented(
+            "Second argument to encode must be a constant: Encode using dynamically decided method is not yet supported".into(),
+        )),
+    }?;
+    encode_process(&args[0], encoding)
+}
+
+/// Decodes the given data, accepts Binary, LargeBinary, Utf8 or LargeUtf8 and returns a [`ColumnarValue`].
+/// Second argument is the encoding to use.
+/// Standard encodings are base64 and hex.
+pub fn decode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 2 {
+        return Err(DataFusionError::Internal(format!(
+            "{:?} args were supplied but decode takes exactly two arguments",
+            args.len(),
+        )));
+    }
+    let encoding = match &args[1] {
+        ColumnarValue::Scalar(scalar) => match scalar {
+            ScalarValue::Utf8(Some(method)) | ScalarValue::LargeUtf8(Some(method)) => {
+                method.parse::<Encoding>()
+            }
+            _ => Err(DataFusionError::NotImplemented(
+                "Second argument to decode must be a utf8 constant: Decode using dynamically decided method is not yet supported".into(),
+            )),
+        },
+        ColumnarValue::Array(_) => Err(DataFusionError::NotImplemented(
+            "Second argument to decode must be a utf8 constant: Decode using dynamically decided method is not yet supported".into(),
+        )),
+    }?;
+    decode_process(&args[0], encoding)
+}
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 3221b6f2932c..08916d89c986 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -181,6 +181,26 @@ pub fn create_physical_expr(
     )))
 }
 
+#[cfg(feature = "encoding_expressions")]
+macro_rules! invoke_if_encoding_expressions_feature_flag {
+    ($FUNC:ident, $NAME:expr) => {{
+        use crate::encoding_expressions;
+        encoding_expressions::$FUNC
+    }};
+}
+
+#[cfg(not(feature = "encoding_expressions"))]
+macro_rules! invoke_if_encoding_expressions_feature_flag {
+    ($FUNC:ident, $NAME:expr) => {
+        |_: &[ColumnarValue]| -> Result<ColumnarValue> {
+            Err(DataFusionError::Internal(format!(
+                "function {} requires compilation with feature flag: encoding_expressions.",
+                $NAME
+            )))
+        }
+    };
+}
+
 #[cfg(feature = "crypto_expressions")]
 macro_rules! invoke_if_crypto_expressions_feature_flag {
     ($FUNC:ident, $NAME:expr) => {{
@@ -565,6 +585,12 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Digest => {
             Arc::new(invoke_if_crypto_expressions_feature_flag!(digest, "digest"))
         }
+        BuiltinScalarFunction::Decode => Arc::new(
+            invoke_if_encoding_expressions_feature_flag!(decode, "decode"),
+        ),
+        BuiltinScalarFunction::Encode => Arc::new(
+            invoke_if_encoding_expressions_feature_flag!(encode, "encode"),
+        ),
         BuiltinScalarFunction::NullIf => Arc::new(nullif_func),
         BuiltinScalarFunction::OctetLength => Arc::new(|args| match &args[0] {
             ColumnarValue::Array(v) => Ok(ColumnarValue::Array(length(v.as_ref())?)),
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index 4811b3a19f29..b695ee169eed 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -21,6 +21,8 @@ pub mod conditional_expressions;
 #[cfg(feature = "crypto_expressions")]
 pub mod crypto_expressions;
 pub mod datetime_expressions;
+#[cfg(feature = "encoding_expressions")]
+pub mod encoding_expressions;
 pub mod equivalence;
 pub mod execution_props;
 pub mod expressions;
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 81a8bc6b2342..00fa28906cce 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -564,6 +564,8 @@ enum ScalarFunction {
   Cardinality = 98;
   TrimArray = 99;
   ArrayContains = 100;
+  Encode = 101;
+  Decode = 102;
 }
 
 message ScalarFunctionNode {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 3c7763a15463..63303fc32c1a 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -17884,6 +17884,8 @@ impl serde::Serialize for ScalarFunction {
             Self::Cardinality => "Cardinality",
             Self::TrimArray => "TrimArray",
             Self::ArrayContains => "ArrayContains",
+            Self::Encode => "Encode",
+            Self::Decode => "Decode",
         };
         serializer.serialize_str(variant)
     }
@@ -17996,6 +17998,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Cardinality",
             "TrimArray",
             "ArrayContains",
+            "Encode",
+            "Decode",
         ];
 
         struct GeneratedVisitor;
@@ -18139,6 +18143,8 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Cardinality" => Ok(ScalarFunction::Cardinality),
                     "TrimArray" => Ok(ScalarFunction::TrimArray),
                     "ArrayContains" => Ok(ScalarFunction::ArrayContains),
+                    "Encode" => Ok(ScalarFunction::Encode),
+                    "Decode" => Ok(ScalarFunction::Decode),
                     _ => Err(serde::de::Error::unknown_variant(value, FIELDS)),
                 }
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index aca90c5f57b8..00eea4d6ed1f 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2239,6 +2239,8 @@ pub enum ScalarFunction {
     Cardinality = 98,
     TrimArray = 99,
     ArrayContains = 100,
+    Encode = 101,
+    Decode = 102,
 }
 impl ScalarFunction {
     /// String value of the enum field names used in the ProtoBuf definition.
@@ -2348,6 +2350,8 @@ impl ScalarFunction {
             ScalarFunction::Cardinality => "Cardinality",
             ScalarFunction::TrimArray => "TrimArray",
             ScalarFunction::ArrayContains => "ArrayContains",
+            ScalarFunction::Encode => "Encode",
+            ScalarFunction::Decode => "Decode",
         }
     }
     /// Creates an enum from field names used in the ProtoBuf definition.
@@ -2454,6 +2458,8 @@ impl ScalarFunction {
             "Cardinality" => Some(Self::Cardinality),
             "TrimArray" => Some(Self::TrimArray),
             "ArrayContains" => Some(Self::ArrayContains),
+            "Encode" => Some(Self::Encode),
+            "Decode" => Some(Self::Decode),
             _ => None,
         }
     }
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index c4dc8eb9b256..6977cc39328a 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -474,6 +474,8 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Sha384 => Self::SHA384,
             ScalarFunction::Sha512 => Self::SHA512,
             ScalarFunction::Digest => Self::Digest,
+            ScalarFunction::Encode => Self::Encode,
+            ScalarFunction::Decode => Self::Decode,
             ScalarFunction::ToTimestampMillis => Self::ToTimestampMillis,
             ScalarFunction::Log2 => Self::Log2,
             ScalarFunction::Signum => Self::Signum,
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index d81e92c3f3d3..a046be35d455 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1394,6 +1394,8 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::SHA384 => Self::Sha384,
             BuiltinScalarFunction::SHA512 => Self::Sha512,
             BuiltinScalarFunction::Digest => Self::Digest,
+            BuiltinScalarFunction::Decode => Self::Decode,
+            BuiltinScalarFunction::Encode => Self::Encode,
             BuiltinScalarFunction::ToTimestampMillis => Self::ToTimestampMillis,
             BuiltinScalarFunction::Log2 => Self::Log2,
             BuiltinScalarFunction::Signum => Self::Signum,

From 4aa1656820852e060c0f1cc3923292e865173339 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 5 Jul 2023 13:27:27 -0400
Subject: [PATCH 61/89] chore(deps): update rstest requirement from 0.17.0 to
 0.18.0 (#6847)

Updates the requirements on [rstest](https://github.com/la10736/rstest) to permit the latest version.
- [Release notes](https://github.com/la10736/rstest/releases)
- [Changelog](https://github.com/la10736/rstest/blob/master/CHANGELOG.md)
- [Commits](https://github.com/la10736/rstest/compare/0.17.0...v0.18.0)

---
updated-dependencies:
- dependency-name: rstest
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 datafusion/core/Cargo.toml          | 2 +-
 datafusion/physical-expr/Cargo.toml | 2 +-
 datafusion/sql/Cargo.toml           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 4e0e2fde5f06..1cde72911218 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -107,7 +107,7 @@ env_logger = "0.10"
 half = "2.2.1"
 postgres-protocol = "0.6.4"
 postgres-types = { version = "0.2.4", features = ["derive", "with-chrono-0_4"] }
-rstest = "0.17.0"
+rstest = "0.18.0"
 rust_decimal = { version = "1.27.0", features = ["tokio-pg"] }
 sqllogictest = "0.14.0"
 test-utils = { path = "../../test-utils" }
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index d77ce8e2973c..b7ffa1810cce 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -75,7 +75,7 @@ uuid = { version = "^1.2", features = ["v4"] }
 [dev-dependencies]
 criterion = "0.5"
 rand = "0.8"
-rstest = "0.17.0"
+rstest = "0.18.0"
 
 [[bench]]
 harness = false
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index 91a98809f4f8..58cc6f6f1e0a 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -48,4 +48,4 @@ sqlparser = { workspace = true }
 ctor = "0.2.0"
 env_logger = "0.10"
 paste = "^1.0"
-rstest = "0.17"
+rstest = "0.18"

From c02d4e494fc7d9fb91b89ab780bef17aa8bbe5b7 Mon Sep 17 00:00:00 2001
From: Kun Liu <liukun@apache.org>
Date: Thu, 6 Jul 2023 01:29:26 +0800
Subject: [PATCH 62/89] [minior] support serde for some function (#6846)

* fill some scalar function for serde

* fix fmt
---
 datafusion/proto/src/logical_plan/from_proto.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 6977cc39328a..4e2f59a118ea 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -40,8 +40,8 @@ use datafusion_expr::{
     array_fill, array_length, array_ndims, array_position, array_positions,
     array_prepend, array_remove, array_replace, array_to_string, ascii, asin, asinh,
     atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil, character_length,
-    chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, date_bin, date_part,
-    date_trunc, degrees, digest, exp,
+    chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, current_date, current_time,
+    date_bin, date_part, date_trunc, degrees, digest, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, floor, from_unixtime, gcd, lcm, left, ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
@@ -1481,6 +1481,8 @@ pub fn parse_expr(
                     parse_expr(&args[0], registry)?,
                     parse_expr(&args[1], registry)?,
                 )),
+                ScalarFunction::CurrentDate => Ok(current_date()),
+                ScalarFunction::CurrentTime => Ok(current_time()),
                 _ => Err(proto_error(
                     "Protobuf deserialization error: Unsupported scalar function",
                 )),

From e044b5ca9418d7a85226b7dae3a58ce0cdcd1e0d Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Thu, 6 Jul 2023 01:35:20 +0800
Subject: [PATCH 63/89] Support fixed_size_list for make_array (#6759)

* support make_array for fixed_size_list

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* add arrow-typeof in test

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* fix schema mismatch

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* cleanup code

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* create array data with correct len

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/common/src/scalar.rs               |  62 +++++++------
 .../tests/data/fixed_size_list_array.parquet  | Bin 0 -> 718 bytes
 .../tests/sqllogictests/test_files/array.slt  |  39 +++++++-
 .../optimizer/src/analyzer/type_coercion.rs   |  86 ++++++++++++++++--
 .../physical-expr/src/array_expressions.rs    |   4 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   4 +
 datafusion/sql/src/expr/arrow_cast.rs         |  19 +++-
 7 files changed, 174 insertions(+), 40 deletions(-)
 create mode 100644 datafusion/core/tests/data/fixed_size_list_array.parquet

diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index 4fef60020f77..b0769df1e9db 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -101,7 +101,9 @@ pub enum ScalarValue {
     FixedSizeBinary(i32, Option<Vec<u8>>),
     /// large binary
     LargeBinary(Option<Vec<u8>>),
-    /// list of nested ScalarValue
+    /// Fixed size list of nested ScalarValue
+    Fixedsizelist(Option<Vec<ScalarValue>>, FieldRef, i32),
+    /// List of nested ScalarValue
     List(Option<Vec<ScalarValue>>, FieldRef),
     /// Date stored as a signed 32bit int days since UNIX epoch 1970-01-01
     Date32(Option<i32>),
@@ -196,6 +198,10 @@ impl PartialEq for ScalarValue {
             (FixedSizeBinary(_, _), _) => false,
             (LargeBinary(v1), LargeBinary(v2)) => v1.eq(v2),
             (LargeBinary(_), _) => false,
+            (Fixedsizelist(v1, t1, l1), Fixedsizelist(v2, t2, l2)) => {
+                v1.eq(v2) && t1.eq(t2) && l1.eq(l2)
+            }
+            (Fixedsizelist(_, _, _), _) => false,
             (List(v1, t1), List(v2, t2)) => v1.eq(v2) && t1.eq(t2),
             (List(_, _), _) => false,
             (Date32(v1), Date32(v2)) => v1.eq(v2),
@@ -315,6 +321,14 @@ impl PartialOrd for ScalarValue {
             (FixedSizeBinary(_, _), _) => None,
             (LargeBinary(v1), LargeBinary(v2)) => v1.partial_cmp(v2),
             (LargeBinary(_), _) => None,
+            (Fixedsizelist(v1, t1, l1), Fixedsizelist(v2, t2, l2)) => {
+                if t1.eq(t2) && l1.eq(l2) {
+                    v1.partial_cmp(v2)
+                } else {
+                    None
+                }
+            }
+            (Fixedsizelist(_, _, _), _) => None,
             (List(v1, t1), List(v2, t2)) => {
                 if t1.eq(t2) {
                     v1.partial_cmp(v2)
@@ -1518,6 +1532,11 @@ impl std::hash::Hash for ScalarValue {
             Binary(v) => v.hash(state),
             FixedSizeBinary(_, v) => v.hash(state),
             LargeBinary(v) => v.hash(state),
+            Fixedsizelist(v, t, l) => {
+                v.hash(state);
+                t.hash(state);
+                l.hash(state);
+            }
             List(v, t) => {
                 v.hash(state);
                 t.hash(state);
@@ -1994,6 +2013,10 @@ impl ScalarValue {
             ScalarValue::Binary(_) => DataType::Binary,
             ScalarValue::FixedSizeBinary(sz, _) => DataType::FixedSizeBinary(*sz),
             ScalarValue::LargeBinary(_) => DataType::LargeBinary,
+            ScalarValue::Fixedsizelist(_, field, length) => DataType::FixedSizeList(
+                Arc::new(Field::new("item", field.data_type().clone(), true)),
+                *length,
+            ),
             ScalarValue::List(_, field) => DataType::List(Arc::new(Field::new(
                 "item",
                 field.data_type().clone(),
@@ -2142,6 +2165,7 @@ impl ScalarValue {
             ScalarValue::Binary(v) => v.is_none(),
             ScalarValue::FixedSizeBinary(_, v) => v.is_none(),
             ScalarValue::LargeBinary(v) => v.is_none(),
+            ScalarValue::Fixedsizelist(v, ..) => v.is_none(),
             ScalarValue::List(v, _) => v.is_none(),
             ScalarValue::Date32(v) => v.is_none(),
             ScalarValue::Date64(v) => v.is_none(),
@@ -2847,6 +2871,9 @@ impl ScalarValue {
                         .collect::<LargeBinaryArray>(),
                 ),
             },
+            ScalarValue::Fixedsizelist(..) => {
+                unimplemented!("FixedSizeList is not supported yet")
+            }
             ScalarValue::List(values, field) => Arc::new(match field.data_type() {
                 DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size),
                 DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
@@ -3294,6 +3321,7 @@ impl ScalarValue {
             ScalarValue::LargeBinary(val) => {
                 eq_array_primitive!(array, index, LargeBinaryArray, val)
             }
+            ScalarValue::Fixedsizelist(..) => unimplemented!(),
             ScalarValue::List(_, _) => unimplemented!(),
             ScalarValue::Date32(val) => {
                 eq_array_primitive!(array, index, Date32Array, val)
@@ -3414,7 +3442,8 @@ impl ScalarValue {
                 | ScalarValue::LargeBinary(b) => {
                     b.as_ref().map(|b| b.capacity()).unwrap_or_default()
                 }
-                ScalarValue::List(vals, field) => {
+                ScalarValue::Fixedsizelist(vals, field, _)
+                | ScalarValue::List(vals, field) => {
                     vals.as_ref()
                         .map(|vals| Self::size_of_vec(vals) - std::mem::size_of_val(vals))
                         .unwrap_or_default()
@@ -3732,29 +3761,9 @@ impl fmt::Display for ScalarValue {
             ScalarValue::TimestampNanosecond(e, _) => format_option!(f, e)?,
             ScalarValue::Utf8(e) => format_option!(f, e)?,
             ScalarValue::LargeUtf8(e) => format_option!(f, e)?,
-            ScalarValue::Binary(e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{v}"))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::FixedSizeBinary(_, e) => match e {
-                Some(l) => write!(
-                    f,
-                    "{}",
-                    l.iter()
-                        .map(|v| format!("{v}"))
-                        .collect::<Vec<_>>()
-                        .join(",")
-                )?,
-                None => write!(f, "NULL")?,
-            },
-            ScalarValue::LargeBinary(e) => match e {
+            ScalarValue::Binary(e)
+            | ScalarValue::FixedSizeBinary(_, e)
+            | ScalarValue::LargeBinary(e) => match e {
                 Some(l) => write!(
                     f,
                     "{}",
@@ -3765,7 +3774,7 @@ impl fmt::Display for ScalarValue {
                 )?,
                 None => write!(f, "NULL")?,
             },
-            ScalarValue::List(e, _) => match e {
+            ScalarValue::Fixedsizelist(e, ..) | ScalarValue::List(e, _) => match e {
                 Some(l) => write!(
                     f,
                     "{}",
@@ -3849,6 +3858,7 @@ impl fmt::Debug for ScalarValue {
             }
             ScalarValue::LargeBinary(None) => write!(f, "LargeBinary({self})"),
             ScalarValue::LargeBinary(Some(_)) => write!(f, "LargeBinary(\"{self}\")"),
+            ScalarValue::Fixedsizelist(..) => write!(f, "FixedSizeList([{self}])"),
             ScalarValue::List(_, _) => write!(f, "List([{self}])"),
             ScalarValue::Date32(_) => write!(f, "Date32(\"{self}\")"),
             ScalarValue::Date64(_) => write!(f, "Date64(\"{self}\")"),
diff --git a/datafusion/core/tests/data/fixed_size_list_array.parquet b/datafusion/core/tests/data/fixed_size_list_array.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..aafc5ce62f52a37e3110e64277ca5f165acc71d5
GIT binary patch
literal 718
zcmcgqO-sW-5S?t6n2LuWZrDH$IcV_E)>bGMX`z$i7gnqRQNc?swJo%^rqzIdSAT&A
z5l_x;T2EfQIEOcxnK$!h$hI*q3oaU>+7h;ab%1~ykTZbgRvKVJW_;lVh(K)SiRlPS
zSiZEE6=@p_jtkqrWU1)X7@@hU&^DR*2H}_C3+`1Mu)Hd}FY(cEHs`~6Z!DN=yB~)8
zZiKg?{TCfCPv?MOK;V!AAca;1q&4dfdgBhIDom1?V8UwEs9xbx3#%T!#iU*UT<-av
z|34e5OL=J1wHpAQFV`@R&#(5gh2^KQqiheAU1&r+MNBd3;~`=nn|+;)BE&QG)kHi|
zp5mpa=s=}3ytHXvN>PlLDyd1?Xlg-hDbU$ytOp_<DIF@3CsTS9kT|v8V3cOMG<>;V
zgo7WZqApGAcdp~nUB;yTRPB454b`+LE7j{DgJ~yueCo{$-RZQ@Ns>t#Jol2>aPkn8
UcZ;QB$@6{sg|GDZ+VqmY0X-RgtN;K2

literal 0
HcmV?d00001

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 0d99e6cbb3a1..1f43c5f8e154 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -417,8 +417,6 @@ select make_array(x, y) from foo2;
 
 # array_contains
 
-
-
 # array_contains scalar function #1
 query BBB rowsort
 select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
@@ -531,3 +529,40 @@ SELECT
 FROM t
 ----
 true true
+
+statement ok
+CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION 'tests/data/fixed_size_list_array.parquet';
+
+query T
+select arrow_typeof(f0) from fixed_size_list_array;
+----
+FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
+FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
+
+query ?
+select * from fixed_size_list_array;
+----
+[1, 2]
+[3, 4]
+
+query ?
+select f0 from fixed_size_list_array;
+----
+[1, 2]
+[3, 4]
+
+query ?
+select arrow_cast(f0, 'List(Int64)') from fixed_size_list_array;
+----
+[1, 2]
+[3, 4]
+
+query ?
+select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
+----
+[[1, 2], [3, 4]]
+
+query ?
+select make_array(f0) from fixed_size_list_array
+----
+[[1, 2], [3, 4]]
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 5d1fef53520b..7cf4a233f7e0 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -330,8 +330,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                     &self.schema,
                     &fun.signature,
                 )?;
-                let expr = Expr::ScalarUDF(ScalarUDF::new(fun, new_expr));
-                Ok(expr)
+                Ok(Expr::ScalarUDF(ScalarUDF::new(fun, new_expr)))
             }
             Expr::ScalarFunction(ScalarFunction { fun, args }) => {
                 let new_args = coerce_arguments_for_signature(
@@ -520,7 +519,7 @@ fn coerce_window_frame(
 fn get_casted_expr_for_bool_op(expr: &Expr, schema: &DFSchemaRef) -> Result<Expr> {
     let left_type = expr.get_type(schema)?;
     get_input_types(&left_type, &Operator::IsDistinctFrom, &DataType::Boolean)?;
-    expr.clone().cast_to(&DataType::Boolean, schema)
+    cast_expr(expr, &DataType::Boolean, schema)
 }
 
 /// Returns `expressions` coerced to types compatible with
@@ -559,6 +558,25 @@ fn coerce_arguments_for_fun(
         return Ok(vec![]);
     }
 
+    let mut expressions: Vec<Expr> = expressions.to_vec();
+
+    // Cast Fixedsizelist to List for array functions
+    if *fun == BuiltinScalarFunction::MakeArray {
+        expressions = expressions
+            .into_iter()
+            .map(|expr| {
+                let data_type = expr.get_type(schema).unwrap();
+                if let DataType::FixedSizeList(field, _) = data_type {
+                    let field = field.as_ref().clone();
+                    let to_type = DataType::List(Arc::new(field));
+                    expr.cast_to(&to_type, schema)
+                } else {
+                    Ok(expr)
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;
+    }
+
     if *fun == BuiltinScalarFunction::MakeArray {
         // Find the final data type for the function arguments
         let current_types = expressions
@@ -579,8 +597,7 @@ fn coerce_arguments_for_fun(
             .map(|(expr, from_type)| cast_array_expr(expr, &from_type, &new_type, schema))
             .collect();
     }
-
-    Ok(expressions.to_vec())
+    Ok(expressions)
 }
 
 /// Cast `expr` to the specified type, if possible
@@ -598,7 +615,7 @@ fn cast_array_expr(
     if from_type.equals_datatype(&DataType::Null) {
         Ok(expr.clone())
     } else {
-        expr.clone().cast_to(to_type, schema)
+        cast_expr(expr, to_type, schema)
     }
 }
 
@@ -625,7 +642,7 @@ fn coerce_agg_exprs_for_signature(
     input_exprs
         .iter()
         .enumerate()
-        .map(|(i, expr)| expr.clone().cast_to(&coerced_types[i], schema))
+        .map(|(i, expr)| cast_expr(expr, &coerced_types[i], schema))
         .collect::<Result<Vec<_>>>()
 }
 
@@ -746,6 +763,7 @@ mod test {
 
     use arrow::datatypes::{DataType, TimeUnit};
 
+    use arrow::datatypes::Field;
     use datafusion_common::tree_node::TreeNode;
     use datafusion_common::{DFField, DFSchema, DFSchemaRef, Result, ScalarValue};
     use datafusion_expr::expr::{self, InSubquery, Like, ScalarFunction};
@@ -763,7 +781,7 @@ mod test {
     use datafusion_physical_expr::expressions::AvgAccumulator;
 
     use crate::analyzer::type_coercion::{
-        coerce_case_expression, TypeCoercion, TypeCoercionRewriter,
+        cast_expr, coerce_case_expression, TypeCoercion, TypeCoercionRewriter,
     };
     use crate::test::assert_analyzed_plan_eq;
 
@@ -1220,6 +1238,58 @@ mod test {
         Ok(())
     }
 
+    #[test]
+    fn test_casting_for_fixed_size_list() -> Result<()> {
+        let val = lit(ScalarValue::Fixedsizelist(
+            Some(vec![
+                ScalarValue::from(1i32),
+                ScalarValue::from(2i32),
+                ScalarValue::from(3i32),
+            ]),
+            Arc::new(Field::new("item", DataType::Int32, true)),
+            3,
+        ));
+        let expr = Expr::ScalarFunction(ScalarFunction {
+            fun: BuiltinScalarFunction::MakeArray,
+            args: vec![val.clone()],
+        });
+        let schema = Arc::new(DFSchema::new_with_metadata(
+            vec![DFField::new_unqualified(
+                "item",
+                DataType::FixedSizeList(
+                    Arc::new(Field::new("a", DataType::Int32, true)),
+                    3,
+                ),
+                true,
+            )],
+            std::collections::HashMap::new(),
+        )?);
+        let mut rewriter = TypeCoercionRewriter { schema };
+        let result = expr.rewrite(&mut rewriter)?;
+
+        let schema = Arc::new(DFSchema::new_with_metadata(
+            vec![DFField::new_unqualified(
+                "item",
+                DataType::List(Arc::new(Field::new("a", DataType::Int32, true))),
+                true,
+            )],
+            std::collections::HashMap::new(),
+        )?);
+        let expected_casted_expr = cast_expr(
+            &val,
+            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            &schema,
+        )?;
+
+        let expected = Expr::ScalarFunction(ScalarFunction {
+            fun: BuiltinScalarFunction::MakeArray,
+            args: vec![expected_casted_expr],
+        });
+
+        assert_eq!(result, expected);
+        Ok(())
+    }
+
     #[test]
     fn test_type_coercion_rewrite() -> Result<()> {
         // gt
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index 911c94b06d76..bddeef526a4d 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -111,7 +111,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
         DataType::List(..) => {
             let arrays =
                 downcast_vec!(args, ListArray).collect::<Result<Vec<&ListArray>>>()?;
-            let len: i32 = arrays.len() as i32;
+            let len = arrays.iter().map(|arr| arr.len() as i32).sum();
             let capacity =
                 Capacities::Array(arrays.iter().map(|a| a.get_array_memory_size()).sum());
             let array_data: Vec<_> =
@@ -125,7 +125,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
             }
 
             let list_data_type =
-                DataType::List(Arc::new(Field::new("item", data_type, false)));
+                DataType::List(Arc::new(Field::new("item", data_type, true)));
 
             let list_data = ArrayData::builder(list_data_type)
                 .len(1)
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index a046be35d455..4a4b16db809a 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1068,6 +1068,10 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue {
                     Value::LargeUtf8Value(s.to_owned())
                 })
             }
+            ScalarValue::Fixedsizelist(..) => Err(Error::General(
+                "Proto serialization error: ScalarValue::Fixedsizelist not supported"
+                    .to_string(),
+            )),
             ScalarValue::List(values, boxed_field) => {
                 let is_null = values.is_none();
 
diff --git a/datafusion/sql/src/expr/arrow_cast.rs b/datafusion/sql/src/expr/arrow_cast.rs
index 91a42f473642..46957a9cdd86 100644
--- a/datafusion/sql/src/expr/arrow_cast.rs
+++ b/datafusion/sql/src/expr/arrow_cast.rs
@@ -18,9 +18,9 @@
 //! Implementation of the `arrow_cast` function that allows
 //! casting to arbitrary arrow types (rather than SQL types)
 
-use std::{fmt::Display, iter::Peekable, str::Chars};
+use std::{fmt::Display, iter::Peekable, str::Chars, sync::Arc};
 
-use arrow_schema::{DataType, IntervalUnit, TimeUnit};
+use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit};
 use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue};
 
 use datafusion_expr::{Expr, ExprSchemable};
@@ -150,6 +150,7 @@ impl<'a> Parser<'a> {
             Token::Decimal128 => self.parse_decimal_128(),
             Token::Decimal256 => self.parse_decimal_256(),
             Token::Dictionary => self.parse_dictionary(),
+            Token::List => self.parse_list(),
             tok => Err(make_error(
                 self.val,
                 &format!("finding next type, got unexpected '{tok}'"),
@@ -157,6 +158,16 @@ impl<'a> Parser<'a> {
         }
     }
 
+    /// Parses the List type
+    fn parse_list(&mut self) -> Result<DataType> {
+        self.expect_token(Token::LParen)?;
+        let data_type = self.parse_next_type()?;
+        self.expect_token(Token::RParen)?;
+        Ok(DataType::List(Arc::new(Field::new(
+            "item", data_type, true,
+        ))))
+    }
+
     /// Parses the next timeunit
     fn parse_time_unit(&mut self, context: &str) -> Result<TimeUnit> {
         match self.next_token()? {
@@ -486,6 +497,8 @@ impl<'a> Tokenizer<'a> {
             "Date32" => Token::SimpleType(DataType::Date32),
             "Date64" => Token::SimpleType(DataType::Date64),
 
+            "List" => Token::List,
+
             "Second" => Token::TimeUnit(TimeUnit::Second),
             "Millisecond" => Token::TimeUnit(TimeUnit::Millisecond),
             "Microsecond" => Token::TimeUnit(TimeUnit::Microsecond),
@@ -573,12 +586,14 @@ enum Token {
     None,
     Integer(i64),
     DoubleQuotedString(String),
+    List,
 }
 
 impl Display for Token {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Token::SimpleType(t) => write!(f, "{t}"),
+            Token::List => write!(f, "List"),
             Token::Timestamp => write!(f, "Timestamp"),
             Token::Time32 => write!(f, "Time32"),
             Token::Time64 => write!(f, "Time64"),

From e8d5c1776f46ab31143bbacefbe0e38e05a376d3 Mon Sep 17 00:00:00 2001
From: vincev <vince.vasta@gmail.com>
Date: Wed, 5 Jul 2023 20:48:36 +0200
Subject: [PATCH 64/89] Improve median performance. (#6837)

* Improve median performance.

* Fix formatting.

* Review feedback

* Renamed arrays size.
---
 .../physical-expr/src/aggregate/median.rs     | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/median.rs b/datafusion/physical-expr/src/aggregate/median.rs
index 6f79c98a6c3a..28f1fc31995a 100644
--- a/datafusion/physical-expr/src/aggregate/median.rs
+++ b/datafusion/physical-expr/src/aggregate/median.rs
@@ -66,6 +66,7 @@ impl AggregateExpr for Median {
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
         Ok(Box::new(MedianAccumulator {
             data_type: self.data_type.clone(),
+            arrays: vec![],
             all_values: vec![],
         }))
     }
@@ -108,16 +109,21 @@ impl PartialEq<dyn Any> for Median {
 /// The median accumulator accumulates the raw input values
 /// as `ScalarValue`s
 ///
-/// The intermediate state is represented as a List of those scalars
+/// The intermediate state is represented as a List of scalar values updated by
+/// `merge_batch` and a `Vec` of `ArrayRef` that are converted to scalar values
+/// in the final evaluation step so that we avoid expensive conversions and
+/// allocations during `update_batch`.
 struct MedianAccumulator {
     data_type: DataType,
+    arrays: Vec<ArrayRef>,
     all_values: Vec<ScalarValue>,
 }
 
 impl Accumulator for MedianAccumulator {
     fn state(&self) -> Result<Vec<ScalarValue>> {
-        let state =
-            ScalarValue::new_list(Some(self.all_values.clone()), self.data_type.clone());
+        let all_values = to_scalar_values(&self.arrays)?;
+        let state = ScalarValue::new_list(Some(all_values), self.data_type.clone());
+
         Ok(vec![state])
     }
 
@@ -125,12 +131,9 @@ impl Accumulator for MedianAccumulator {
         assert_eq!(values.len(), 1);
         let array = &values[0];
 
+        // Defer conversions to scalar values to final evaluation.
         assert_eq!(array.data_type(), &self.data_type);
-        self.all_values.reserve(array.len());
-        for index in 0..array.len() {
-            self.all_values
-                .push(ScalarValue::try_from_array(array, index)?);
-        }
+        self.arrays.push(array.clone());
 
         Ok(())
     }
@@ -157,7 +160,14 @@ impl Accumulator for MedianAccumulator {
     }
 
     fn evaluate(&self) -> Result<ScalarValue> {
-        if !self.all_values.iter().any(|v| !v.is_null()) {
+        let batch_values = to_scalar_values(&self.arrays)?;
+
+        if !self
+            .all_values
+            .iter()
+            .chain(batch_values.iter())
+            .any(|v| !v.is_null())
+        {
             return ScalarValue::try_from(&self.data_type);
         }
 
@@ -166,6 +176,7 @@ impl Accumulator for MedianAccumulator {
         let array = ScalarValue::iter_to_array(
             self.all_values
                 .iter()
+                .chain(batch_values.iter())
                 // ignore null values
                 .filter(|v| !v.is_null())
                 .cloned(),
@@ -214,13 +225,30 @@ impl Accumulator for MedianAccumulator {
     }
 
     fn size(&self) -> usize {
-        std::mem::size_of_val(self) + ScalarValue::size_of_vec(&self.all_values)
+        let arrays_size: usize = self.arrays.iter().map(|a| a.len()).sum();
+
+        std::mem::size_of_val(self)
+            + ScalarValue::size_of_vec(&self.all_values)
+            + arrays_size
             - std::mem::size_of_val(&self.all_values)
             + self.data_type.size()
             - std::mem::size_of_val(&self.data_type)
     }
 }
 
+fn to_scalar_values(arrays: &[ArrayRef]) -> Result<Vec<ScalarValue>> {
+    let num_values: usize = arrays.iter().map(|a| a.len()).sum();
+    let mut all_values = Vec::with_capacity(num_values);
+
+    for array in arrays {
+        for index in 0..array.len() {
+            all_values.push(ScalarValue::try_from_array(&array, index)?);
+        }
+    }
+
+    Ok(all_values)
+}
+
 /// Given a returns `array[indicies[indicie_index]]` as a `ScalarValue`
 fn scalar_at_index(
     array: &dyn Array,

From cf72ea065d1b77fb4f8c63cd97cb8d6b56d9b20b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Berkay=20=C5=9Eahin?=
 <124376117+berkaysynnada@users.noreply.github.com>
Date: Wed, 5 Jul 2023 21:50:51 +0300
Subject: [PATCH 65/89] Mismatch in MemTable of Select Into when projecting on
 aggregate window functions (#6566)

* Schema check of partitions and input plan is removed for newly registered tables.

* minor changes

* In Select Into queries, aggregate windows are realiased with physical_name()

* debugging

* display_name() output is simplified for window functions

* Windows are displayed in long format

* Window names in tests are edited

* Create table as test is added

---------

Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
---
 datafusion/core/src/physical_planner.rs       |   2 +-
 .../tests/sqllogictests/test_files/ddl.slt    |  30 ++-
 .../tests/sqllogictests/test_files/insert.slt |  10 +-
 .../tests/sqllogictests/test_files/window.slt | 194 ++++++++---------
 .../user_defined_window_functions.rs          | 196 +++++++++---------
 5 files changed, 230 insertions(+), 202 deletions(-)

diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index a4aab95635c8..f00f5e0d5e4b 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1597,7 +1597,7 @@ pub fn create_window_expr(
     // unpack aliased logical expressions, e.g. "sum(col) over () as total"
     let (name, e) = match e {
         Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()),
-        _ => (physical_name(e)?, e),
+        _ => (e.display_name()?, e),
     };
     create_window_expr_with_name(
         e,
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index 1cf67be3a218..954bc4d991bc 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -348,6 +348,9 @@ SELECT * FROM new_table;
 statement ok
 DROP TABLE new_table
 
+statement ok
+DROP TABLE my_table;
+
 # create_table_with_schema_as_multiple_values
 statement ok
 CREATE TABLE test_table(c1 int, c2 float, c3 varchar) AS VALUES(1, 2, 'hello'),(2, 1, 'there'),(3, 0, '!');
@@ -362,7 +365,32 @@ SELECT * FROM new_table
 2 1 there
 
 statement ok
-DROP TABLE my_table;
+DROP TABLE new_table;
+
+# Select into without alias names of window aggregates
+statement ok
+SELECT SUM(c1) OVER(ORDER BY c2), c2, c3 INTO new_table FROM test_table
+
+query IRT
+SELECT * FROM new_table
+----
+3 0 !
+5 1 there
+6 2 hello
+
+statement ok
+DROP TABLE new_table;
+
+# Create table as without alias names of window aggregates
+statement ok
+CREATE TABLE new_table AS SELECT SUM(c1) OVER(ORDER BY c2), c2, c3 FROM test_table
+
+query IRT
+SELECT * FROM new_table
+----
+3 0 !
+5 1 there
+6 2 hello
 
 statement ok
 DROP TABLE new_table;
diff --git a/datafusion/core/tests/sqllogictests/test_files/insert.slt b/datafusion/core/tests/sqllogictests/test_files/insert.slt
index c710859a7b72..faa519834c6f 100644
--- a/datafusion/core/tests/sqllogictests/test_files/insert.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/insert.slt
@@ -67,8 +67,8 @@ physical_plan
 InsertExec: sink=MemoryTable (partitions=1)
 --ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
-------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4), COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(UInt8(1)), c1@0 as c1]
---------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4): Ok(Field { name: "SUM(aggregate_test_100.c4)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
+--------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST]
 ------------CoalesceBatchesExec: target_batch_size=8192
 --------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -128,7 +128,7 @@ physical_plan
 InsertExec: sink=MemoryTable (partitions=1)
 --CoalescePartitionsExec
 ----ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4): Ok(Field { name: "SUM(aggregate_test_100.c4)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 --------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST]
 ----------CoalesceBatchesExec: target_batch_size=8192
 ------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
@@ -179,8 +179,8 @@ InsertExec: sink=MemoryTable (partitions=8)
 --ProjectionExec: expr=[a1@0 as a1, a2@1 as a2]
 ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 ------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
---------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4): Ok(Field { name: "SUM(aggregate_test_100.c4)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST]
+ --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+ ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST]
 ------------CoalesceBatchesExec: target_batch_size=8192
 --------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
 ----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
diff --git a/datafusion/core/tests/sqllogictests/test_files/window.slt b/datafusion/core/tests/sqllogictests/test_files/window.slt
index d77df127a80a..09339d7e499f 100644
--- a/datafusion/core/tests/sqllogictests/test_files/window.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/window.slt
@@ -359,7 +359,7 @@ SortPreservingMergeExec: [b@0 ASC NULLS LAST]
 --ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)]
 ----AggregateExec: mode=Single, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=FullyOrdered
 ------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
---------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST]
 ------------CoalesceBatchesExec: target_batch_size=8192
 --------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
@@ -1213,9 +1213,9 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat
 --------TableScan: aggregate_test_100 projection=[c8, c9]
 physical_plan
 ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2]
---BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
-----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9)]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true
 
@@ -1233,10 +1233,10 @@ Projection: aggregate_test_100.c2, MAX(aggregate_test_100.c9) ORDER BY [aggregat
 ------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
 --------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
-ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9), SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9), MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9)]
---WindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
-----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9): Ok(Field { name: "MAX(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
-------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9): Ok(Field { name: "MIN(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+--WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
+----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
@@ -1257,11 +1257,11 @@ Sort: aggregate_test_100.c2 ASC NULLS LAST
 ----------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
 SortExec: expr=[c2@0 ASC NULLS LAST]
---ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9), SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9), MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9)]
-----WindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
-------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9): Ok(Field { name: "MAX(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+----WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
+------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c9@1 ASC NULLS LAST,c2@0 ASC NULLS LAST]
-----------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9): Ok(Field { name: "MIN(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------------SortExec: expr=[c2@0 ASC NULLS LAST,c9@1 ASC NULLS LAST]
 --------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], has_header=true
 
@@ -1282,13 +1282,13 @@ Projection: SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggr
 ------WindowAggr: windowExpr=[[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]]
 --------TableScan: aggregate_test_100 projection=[c1, c2, c4]
 physical_plan
-ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as SUM(aggregate_test_100.c4), COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as COUNT(UInt8(1))]
---BoundedWindowAggExec: wdw=[COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
+--BoundedWindowAggExec: wdw=[COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 ----SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST]
 ------CoalesceBatchesExec: target_batch_size=4096
 --------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4)]
-------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4): Ok(Field { name: "SUM(aggregate_test_100.c4)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
+------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 --------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST]
 ----------------CoalesceBatchesExec: target_batch_size=4096
 ------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
@@ -1315,8 +1315,8 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat
 physical_plan
 ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 --------SortExec: expr=[c9@0 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -1356,8 +1356,8 @@ Projection: aggregate_test_100.c9, FIRST_VALUE(aggregate_test_100.c9) ORDER BY [
 physical_plan
 ProjectionExec: expr=[c9@0 as c9, FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[FIRST_VALUE(aggregate_test_100.c9): Ok(Field { name: "FIRST_VALUE(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)): Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101))", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)) }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)): Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101))", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
-------BoundedWindowAggExec: wdw=[FIRST_VALUE(aggregate_test_100.c9): Ok(Field { name: "FIRST_VALUE(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)): Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101))", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)): Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101))", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)) }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "FIRST_VALUE(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }, LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 --------SortExec: expr=[c9@0 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -1399,9 +1399,9 @@ Projection: aggregate_test_100.c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9
 physical_plan
 ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ------SortExec: expr=[c9@0 ASC NULLS LAST]
---------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ----------SortExec: expr=[c9@0 DESC]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -1441,10 +1441,10 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat
 physical_plan
 ProjectionExec: expr=[c9@2 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ------SortExec: expr=[c9@2 ASC NULLS LAST,c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST]
---------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ------------SortExec: expr=[c9@2 DESC,c1@0 DESC]
 --------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], has_header=true
 
@@ -1523,19 +1523,19 @@ Projection: SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BET
 physical_plan
 ProjectionExec: expr=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as a, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as b, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as c, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as d, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@7 as e, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as f, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as g, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as i, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as j, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as l, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as m, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@15 as n, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as o, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as p, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as a1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as b1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as c1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as d1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@9 as e1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as f1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as g1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as j1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as l1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as m1, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as n1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as o1, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as h11, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as j11, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as k11, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as l11, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@10 as m11, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as n11, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as o11]
 --GlobalLimitExec: skip=0, fetch=5
-----WindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
-------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as SUM(null_cases.c1), SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as SUM(null_cases.c1)]
---------BoundedWindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----WindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
+------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+--------BoundedWindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----------SortExec: expr=[c3@2 ASC NULLS LAST,c2@1 ASC NULLS LAST]
-------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 ASC]
-----------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------------------SortExec: expr=[c3@2 ASC NULLS LAST,c1@0 DESC]
---------------------WindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }]
-----------------------WindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
+--------------------WindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }]
+----------------------WindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
 ------------------------SortExec: expr=[c3@2 DESC NULLS LAST]
---------------------------WindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
-----------------------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1): Ok(Field { name: "SUM(null_cases.c1)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--------------------------WindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)) }, SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)) }]
+----------------------------BoundedWindowAggExec: wdw=[SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------------------------------SortExec: expr=[c3@2 DESC,c1@0 ASC NULLS LAST]
 --------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], has_header=true
 
@@ -1609,8 +1609,8 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat
 physical_plan
 ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+ ----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 --------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
 
@@ -1653,8 +1653,8 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) PARTITION BY [aggr
 physical_plan
 ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum1, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum2]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 --------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], has_header=true
 
@@ -1698,9 +1698,9 @@ Projection: aggregate_test_100.c3, SUM(aggregate_test_100.c9) ORDER BY [aggregat
 physical_plan
 ProjectionExec: expr=[c3@0 as c3, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2]
 --GlobalLimitExec: skip=0, fetch=5
-----WindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)) }]
-------ProjectionExec: expr=[c3@1 as c3, c4@2 as c4, c9@3 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as SUM(aggregate_test_100.c9)]
---------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)) }]
+------ProjectionExec: expr=[c3@1 as c3, c4@2 as c4, c9@3 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+--------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----------SortExec: expr=[c3@1 + c4@2 DESC,c9@3 DESC,c2@0 ASC NULLS LAST]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c4, c9], has_header=true
 
@@ -1794,13 +1794,13 @@ physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
 ----ProjectionExec: expr=[c3@0 as c3, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c3@0 ASC NULLS LAST,c9@1 DESC]
 ----------CoalesceBatchesExec: target_batch_size=4096
 ------------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
 --------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-----------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as SUM(aggregate_test_100.c9)]
-------------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+------------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int16(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------------------SortExec: expr=[c3@1 DESC,c9@2 DESC,c2@0 ASC NULLS LAST]
 ----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], has_header=true
 
@@ -1836,7 +1836,7 @@ Sort: aggregate_test_100.c1 ASC NULLS LAST
 physical_plan
 SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 --ProjectionExec: expr=[c1@0 as c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-----BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
 ------SortExec: expr=[c1@0 ASC NULLS LAST]
 --------CoalesceBatchesExec: target_batch_size=4096
 ----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -1963,7 +1963,7 @@ physical_plan
 SortExec: expr=[c1@0 ASC NULLS LAST]
 --CoalescePartitionsExec
 ----ProjectionExec: expr=[c1@0 as c1, ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted]
 --------SortExec: expr=[c1@0 ASC NULLS LAST]
 ----------CoalesceBatchesExec: target_batch_size=4096
 ------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -1989,10 +1989,10 @@ Sort: aggregate_test_100.c1 ASC NULLS LAST
 physical_plan
 SortExec: expr=[c1@0 ASC NULLS LAST]
 --ProjectionExec: expr=[c1@0 as c1, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
-----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ------SortPreservingMergeExec: [c9@1 ASC NULLS LAST]
 --------SortExec: expr=[c9@1 ASC NULLS LAST]
-----------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
 ------------SortExec: expr=[c1@0 ASC NULLS LAST,c9@1 ASC NULLS LAST]
 --------------CoalesceBatchesExec: target_batch_size=4096
 ----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -2081,11 +2081,11 @@ physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortExec: fetch=5, expr=[c9@0 ASC NULLS LAST]
 ----ProjectionExec: expr=[c9@2 as c9, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
-------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
---------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9), SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(aggregate_test_100.c9), SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as SUM(aggregate_test_100.c9)]
-----------WindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
-------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
---------------WindowAggExec: wdw=[SUM(aggregate_test_100.c9): Ok(Field { name: "SUM(aggregate_test_100.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
+------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
+----------WindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
+------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------WindowAggExec: wdw=[SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
 ----------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST]
 ------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
 
@@ -2137,13 +2137,13 @@ Projection: t1.c9, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NU
 physical_plan
 ProjectionExec: expr=[c9@1 as c9, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(t1.c9): Ok(Field { name: "SUM(t1.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(t1.c9), SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(t1.c9), SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as SUM(t1.c9)]
---------WindowAggExec: wdw=[SUM(t1.c9): Ok(Field { name: "SUM(t1.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
+----BoundedWindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
+--------WindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
 ----------SortExec: expr=[c2@0 ASC NULLS LAST,c1_alias@3 ASC NULLS LAST,c9@2 ASC NULLS LAST,c8@1 ASC NULLS LAST]
-------------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as SUM(t1.c9), SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as SUM(t1.c9)]
---------------BoundedWindowAggExec: wdw=[SUM(t1.c9): Ok(Field { name: "SUM(t1.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------------WindowAggExec: wdw=[SUM(t1.c9): Ok(Field { name: "SUM(t1.c9)", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
+------------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]
+--------------BoundedWindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----------------WindowAggExec: wdw=[SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)) }]
 ------------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST,c9@3 ASC NULLS LAST,c8@2 ASC NULLS LAST]
 --------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
 ----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], has_header=true
@@ -2188,9 +2188,9 @@ ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[c9@2 ASC NULLS LAST]
 ------ProjectionExec: expr=[SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
---------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c12): Ok(Field { name: "SUM(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)) }], mode=[Sorted]
-----------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as SUM(aggregate_test_100.c12)]
-------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c12): Ok(Field { name: "SUM(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Ok(Field { name: "SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(3)) }], mode=[Sorted]
+----------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
+------------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Groups, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
 --------------SortExec: expr=[c1@0 ASC NULLS LAST,c2@1 ASC NULLS LAST]
 ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], has_header=true
 
@@ -2226,7 +2226,7 @@ Limit: skip=0, fetch=5
 physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-----BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------SortExec: expr=[c9@0 ASC NULLS LAST]
 --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2265,7 +2265,7 @@ Limit: skip=0, fetch=5
 physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-----BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------SortExec: expr=[c9@0 DESC]
 --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2305,7 +2305,7 @@ physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortExec: fetch=5, expr=[rn1@1 DESC]
 ----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c9@0 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2348,7 +2348,7 @@ physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortExec: fetch=5, expr=[rn1@1 ASC NULLS LAST,c9@0 ASC NULLS LAST]
 ----ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-------BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 --------SortExec: expr=[c9@0 DESC]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2401,7 +2401,7 @@ Limit: skip=0, fetch=5
 physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --ProjectionExec: expr=[c9@0 as c9, ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-----BoundedWindowAggExec: wdw=[ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ------SortExec: expr=[c9@0 DESC]
 --------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], has_header=true
 
@@ -2532,10 +2532,10 @@ ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[inc_col@24 DESC]
 ------ProjectionExec: expr=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, SUM(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, COUNT(UInt8(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@0 as inc_col]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.desc_col): Ok(Field { name: "SUM(annotated_data_finite.desc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-----------ProjectionExec: expr=[inc_col@1 as inc_col, desc_col@2 as desc_col, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@3 as SUM(annotated_data_finite.inc_col), SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@4 as SUM(annotated_data_finite.desc_col), SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(annotated_data_finite.desc_col), MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@6 as MIN(annotated_data_finite.inc_col), MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@7 as MIN(annotated_data_finite.desc_col), MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as MIN(annotated_data_finite.inc_col), MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as MAX(annotated_data_finite.inc_col), MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@10 as MAX(annotated_data_finite.desc_col), MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@11 as MAX(annotated_data_finite.inc_col), COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@12 as COUNT(UInt8(1)), COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@13 as COUNT(UInt8(1)), SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as SUM(annotated_data_finite.inc_col), SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@15 as SUM(annotated_data_finite.desc_col), SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as SUM(annotated_data_finite.inc_col), MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as MIN(annotated_data_finite.inc_col), MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@18 as MIN(annotated_data_finite.desc_col), MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@19 as MIN(annotated_data_finite.inc_col), MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@20 as MAX(annotated_data_finite.inc_col), MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@21 as MAX(annotated_data_finite.desc_col), MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as MAX(annotated_data_finite.inc_col), COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@23 as COUNT(UInt8(1)), COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as COUNT(UInt8(1))]
-------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col): Ok(Field { name: "SUM(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col): Ok(Field { name: "SUM(annotated_data_finite.desc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.inc_col): Ok(Field { name: "SUM(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, MIN(annotated_data_finite.desc_col): Ok(Field { name: "MIN(annotated_data_finite.desc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, MAX(annotated_data_finite.desc_col): Ok(Field { name: "MAX(annotated_data_finite.desc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
---------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col): Ok(Field { name: "SUM(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col): Ok(Field { name: "SUM(annotated_data_finite.desc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col): Ok(Field { name: "SUM(annotated_data_finite.desc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, MIN(annotated_data_finite.desc_col): Ok(Field { name: "MIN(annotated_data_finite.desc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, MAX(annotated_data_finite.desc_col): Ok(Field { name: "MAX(annotated_data_finite.desc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)) }, COUNT(UInt8(1)): Ok(Field { name: "COUNT(UInt8(1))", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----------ProjectionExec: expr=[inc_col@1 as inc_col, desc_col@2 as desc_col, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@3 as SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@4 as SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@6 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@7 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@10 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@11 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@12 as COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@13 as COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@15 as SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@18 as MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@19 as MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@20 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@21 as MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@23 as COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
+------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(5)), end_bound: Following(Int32(1)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(8)) }, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(8)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+--------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(4)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(8)), end_bound: Following(Int32(1)) }, SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(1)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(5)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(2)), end_bound: Following(Int32(6)) }, COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(UInt8(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(8)) }], mode=[Sorted]
 ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIII
@@ -2620,8 +2620,8 @@ ProjectionExec: expr=[fv1@0 as fv1, fv2@1 as fv2, lv1@2 as lv1, lv2@3 as lv2, nv
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[ts@24 DESC]
 ------ProjectionExec: expr=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2, ts@0 as ts]
---------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)): Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)): Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, ROW_NUMBER(): Ok(Field { name: "ROW_NUMBER()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, RANK(): Ok(Field { name: "RANK()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, RANK(): Ok(Field { name: "RANK()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, DENSE_RANK(): Ok(Field { name: "DENSE_RANK()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, DENSE_RANK(): Ok(Field { name: "DENSE_RANK()", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)): Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)): Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)): Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)): Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)): Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)): Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)): Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)): Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "ROW_NUMBER() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "DENSE_RANK() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(10)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(1)), end_bound: Following(Int32(10)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAG(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }, LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(10)), end_bound: Following(Int32(1)) }, LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LEAD(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(10)) }], mode=[Sorted]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIII
@@ -2692,8 +2692,8 @@ ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[inc_col@10 ASC NULLS LAST]
 ------ProjectionExec: expr=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@7 as sum1, SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as sum2, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@8 as min1, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as min2, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as max1, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as max2, COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as count1, COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as count2, AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as avg1, AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as avg2, inc_col@1 as inc_col]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col): Ok(Field { name: "SUM(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, COUNT(annotated_data_finite.inc_col): Ok(Field { name: "COUNT(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, AVG(annotated_data_finite.inc_col): Ok(Field { name: "AVG(annotated_data_finite.inc_col)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col): Ok(Field { name: "SUM(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, MIN(annotated_data_finite.inc_col): Ok(Field { name: "MIN(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, MAX(annotated_data_finite.inc_col): Ok(Field { name: "MAX(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, COUNT(annotated_data_finite.inc_col): Ok(Field { name: "COUNT(annotated_data_finite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, AVG(annotated_data_finite.inc_col): Ok(Field { name: "AVG(annotated_data_finite.inc_col)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }, AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Ok(Field { name: "AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(5)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MAX(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "COUNT(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }, AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "AVG(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: Following(Int32(3)) }], mode=[Sorted]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIIIIIIRR
@@ -2745,8 +2745,8 @@ ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_va
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[inc_col@5 ASC NULLS LAST]
 ------ProjectionExec: expr=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
---------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)): Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(2))", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, LAST_VALUE(annotated_data_finite.inc_col): Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "NTH_VALUE(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "FIRST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "LAST_VALUE(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 query IIIII
@@ -2790,8 +2790,8 @@ physical_plan
 ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 --GlobalLimitExec: skip=0, fetch=5
 ----ProjectionExec: expr=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as sum1, SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as sum2, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as count1, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as count2, ts@0 as ts]
-------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col): Ok(Field { name: "SUM(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, COUNT(annotated_data_infinite.inc_col): Ok(Field { name: "COUNT(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col): Ok(Field { name: "SUM(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, COUNT(annotated_data_infinite.inc_col): Ok(Field { name: "COUNT(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 
@@ -2836,8 +2836,8 @@ physical_plan
 ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, count1@2 as count1, count2@3 as count2]
 --GlobalLimitExec: skip=0, fetch=5
 ----ProjectionExec: expr=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as sum1, SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as sum2, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as count1, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as count2, ts@0 as ts]
-------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col): Ok(Field { name: "SUM(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, COUNT(annotated_data_infinite.inc_col): Ok(Field { name: "COUNT(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col): Ok(Field { name: "SUM(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, COUNT(annotated_data_infinite.inc_col): Ok(Field { name: "COUNT(annotated_data_infinite.inc_col)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(1)) }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }, COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "COUNT(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(3)) }], mode=[Sorted]
 ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST], has_header=true
 
 
@@ -2934,12 +2934,12 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_da
 physical_plan
 ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@8 as sum1, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@9 as sum2, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@14 as sum3, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@15 as sum4, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@4 as sum5, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@5 as sum6, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@10 as sum7, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@11 as sum8, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@6 as sum9, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@7 as sum10, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@12 as sum11, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@13 as sum12]
 --GlobalLimitExec: skip=0, fetch=5
-----BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Linear]
-------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[PartiallySorted([1, 0])]
---------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
-----------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[PartiallySorted([0])]
-------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[PartiallySorted([0, 1])]
---------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c): Ok(Field { name: "SUM(annotated_data_infinite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Linear]
+------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[PartiallySorted([1, 0])]
+--------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[PartiallySorted([0])]
+------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[PartiallySorted([0, 1])]
+--------------BoundedWindowAggExec: wdw=[SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 
@@ -3003,17 +3003,17 @@ physical_plan
 GlobalLimitExec: skip=0, fetch=5
 --SortExec: fetch=5, expr=[c@2 ASC NULLS LAST]
 ----ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@8 as sum1, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@9 as sum2, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@14 as sum3, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@15 as sum4, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@4 as sum5, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@5 as sum6, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@10 as sum7, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@11 as sum8, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@6 as sum9, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@7 as sum10, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@12 as sum11, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@13 as sum12]
-------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Sorted]
+------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Preceding(UInt64(1)) }], mode=[Sorted]
 --------SortExec: expr=[d@3 ASC NULLS LAST,a@0 ASC NULLS LAST,b@1 ASC NULLS LAST,c@2 ASC NULLS LAST]
-----------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[Sorted]
+----------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(1)) }], mode=[Sorted]
 ------------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST,d@3 ASC NULLS LAST,c@2 ASC NULLS LAST]
---------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ----------------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST,c@2 ASC NULLS LAST]
-------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Following(UInt64(1)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 --------------------SortExec: expr=[a@0 ASC NULLS LAST,d@3 ASC NULLS LAST,b@1 ASC NULLS LAST,c@2 ASC NULLS LAST]
-----------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[Sorted]
+----------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: CurrentRow }], mode=[Sorted]
 ------------------------SortExec: expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST,d@3 ASC NULLS LAST,c@2 ASC NULLS LAST]
---------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c): Ok(Field { name: "SUM(annotated_data_finite2.c)", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
+--------------------------BoundedWindowAggExec: wdw=[SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(2)), end_bound: Following(UInt64(1)) }, SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Ok(Field { name: "SUM(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(5)), end_bound: Following(UInt64(5)) }], mode=[Sorted]
 ----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 
@@ -3105,10 +3105,10 @@ ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[c3@2 ASC NULLS LAST]
 ------ProjectionExec: expr=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1, c3@0 as c3]
---------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12): Ok(Field { name: "MAX(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----------SortExec: expr=[c12@1 ASC NULLS LAST]
-------------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as MIN(aggregate_test_100.c12)]
---------------WindowAggExec: wdw=[MIN(aggregate_test_100.c12): Ok(Field { name: "MIN(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
+------------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
+--------------WindowAggExec: wdw=[MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "MIN(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }]
 ----------------SortExec: expr=[c11@1 ASC NULLS LAST]
 ------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], has_header=true
 
@@ -3150,7 +3150,7 @@ ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 --GlobalLimitExec: skip=0, fetch=5
 ----SortExec: fetch=5, expr=[c3@2 ASC NULLS LAST]
 ------ProjectionExec: expr=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
---------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12): Ok(Field { name: "MAX(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }, MIN(aggregate_test_100.c12): Ok(Field { name: "MIN(aggregate_test_100.c12)", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
+--------BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }, MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Float64(NULL)), end_bound: CurrentRow }], mode=[Sorted]
 ----------SortExec: expr=[c12@1 ASC NULLS LAST]
 ------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], has_header=true
 
diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index 946aa31172f8..dfa178128549 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -80,20 +80,20 @@ async fn test_udwf() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 1                  |",
-        "| 1 | b | 1   | 1                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 2                  |",
-        "| 2 | e | 4   | 2                  |",
-        "| 2 | f | 5   | 2                  |",
-        "| 2 | g | 6   | 2                  |",
-        "| 2 | h | 6   | 2                  |",
-        "| 2 | i | 6   | 2                  |",
-        "| 2 | j | 6   | 2                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 1                                                                                                                     |",
+    "| 1 | b | 1   | 1                                                                                                                     |",
+    "| 1 | c | 2   | 1                                                                                                                     |",
+    "| 2 | d | 3   | 2                                                                                                                     |",
+    "| 2 | e | 4   | 2                                                                                                                     |",
+    "| 2 | f | 5   | 2                                                                                                                     |",
+    "| 2 | g | 6   | 2                                                                                                                     |",
+    "| 2 | h | 6   | 2                                                                                                                     |",
+    "| 2 | i | 6   | 2                                                                                                                     |",
+    "| 2 | j | 6   | 2                                                                                                                     |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -111,20 +111,20 @@ async fn test_udwf_bounded_window_ignores_frame() {
 
     // Since the UDWF doesn't say it needs the window frame, the frame is ignored
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 1                  |",
-        "| 1 | b | 1   | 1                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 2                  |",
-        "| 2 | e | 4   | 2                  |",
-        "| 2 | f | 5   | 2                  |",
-        "| 2 | g | 6   | 2                  |",
-        "| 2 | h | 6   | 2                  |",
-        "| 2 | i | 6   | 2                  |",
-        "| 2 | j | 6   | 2                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 1                                                                                                            |",
+    "| 1 | b | 1   | 1                                                                                                            |",
+    "| 1 | c | 2   | 1                                                                                                            |",
+    "| 2 | d | 3   | 2                                                                                                            |",
+    "| 2 | e | 4   | 2                                                                                                            |",
+    "| 2 | f | 5   | 2                                                                                                            |",
+    "| 2 | g | 6   | 2                                                                                                            |",
+    "| 2 | h | 6   | 2                                                                                                            |",
+    "| 2 | i | 6   | 2                                                                                                            |",
+    "| 2 | j | 6   | 2                                                                                                            |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -142,20 +142,20 @@ async fn test_udwf_bounded_window() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 1                  |",
-        "| 1 | b | 1   | 1                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 1                  |",
-        "| 2 | e | 4   | 2                  |",
-        "| 2 | f | 5   | 1                  |",
-        "| 2 | g | 6   | 1                  |",
-        "| 2 | h | 6   | 0                  |",
-        "| 2 | i | 6   | 0                  |",
-        "| 2 | j | 6   | 0                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 1                                                                                                            |",
+    "| 1 | b | 1   | 1                                                                                                            |",
+    "| 1 | c | 2   | 1                                                                                                            |",
+    "| 2 | d | 3   | 1                                                                                                            |",
+    "| 2 | e | 4   | 2                                                                                                            |",
+    "| 2 | f | 5   | 1                                                                                                            |",
+    "| 2 | g | 6   | 1                                                                                                            |",
+    "| 2 | h | 6   | 0                                                                                                            |",
+    "| 2 | i | 6   | 0                                                                                                            |",
+    "| 2 | j | 6   | 0                                                                                                            |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -175,20 +175,20 @@ async fn test_stateful_udwf() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 0                  |",
-        "| 1 | b | 1   | 1                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 1                  |",
-        "| 2 | e | 4   | 1                  |",
-        "| 2 | f | 5   | 2                  |",
-        "| 2 | g | 6   | 2                  |",
-        "| 2 | h | 6   | 2                  |",
-        "| 2 | i | 6   | 2                  |",
-        "| 2 | j | 6   | 2                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 0                                                                                                                     |",
+    "| 1 | b | 1   | 1                                                                                                                     |",
+    "| 1 | c | 2   | 1                                                                                                                     |",
+    "| 2 | d | 3   | 1                                                                                                                     |",
+    "| 2 | e | 4   | 1                                                                                                                     |",
+    "| 2 | f | 5   | 2                                                                                                                     |",
+    "| 2 | g | 6   | 2                                                                                                                     |",
+    "| 2 | h | 6   | 2                                                                                                                     |",
+    "| 2 | i | 6   | 2                                                                                                                     |",
+    "| 2 | j | 6   | 2                                                                                                                     |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -208,20 +208,20 @@ async fn test_stateful_udwf_bounded_window() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 1                  |",
-        "| 1 | b | 1   | 1                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 1                  |",
-        "| 2 | e | 4   | 2                  |",
-        "| 2 | f | 5   | 1                  |",
-        "| 2 | g | 6   | 1                  |",
-        "| 2 | h | 6   | 0                  |",
-        "| 2 | i | 6   | 0                  |",
-        "| 2 | j | 6   | 0                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 1                                                                                                            |",
+    "| 1 | b | 1   | 1                                                                                                            |",
+    "| 1 | c | 2   | 1                                                                                                            |",
+    "| 2 | d | 3   | 1                                                                                                            |",
+    "| 2 | e | 4   | 2                                                                                                            |",
+    "| 2 | f | 5   | 1                                                                                                            |",
+    "| 2 | g | 6   | 1                                                                                                            |",
+    "| 2 | h | 6   | 0                                                                                                            |",
+    "| 2 | i | 6   | 0                                                                                                            |",
+    "| 2 | j | 6   | 0                                                                                                            |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -240,20 +240,20 @@ async fn test_udwf_query_include_rank() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 3                  |",
-        "| 1 | b | 1   | 2                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 7                  |",
-        "| 2 | e | 4   | 6                  |",
-        "| 2 | f | 5   | 5                  |",
-        "| 2 | g | 6   | 4                  |",
-        "| 2 | h | 6   | 3                  |",
-        "| 2 | i | 6   | 2                  |",
-        "| 2 | j | 6   | 1                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 3                                                                                                                     |",
+    "| 1 | b | 1   | 2                                                                                                                     |",
+    "| 1 | c | 2   | 1                                                                                                                     |",
+    "| 2 | d | 3   | 7                                                                                                                     |",
+    "| 2 | e | 4   | 6                                                                                                                     |",
+    "| 2 | f | 5   | 5                                                                                                                     |",
+    "| 2 | g | 6   | 4                                                                                                                     |",
+    "| 2 | h | 6   | 3                                                                                                                     |",
+    "| 2 | i | 6   | 2                                                                                                                     |",
+    "| 2 | j | 6   | 1                                                                                                                     |",
+    "+---+---+-----+-----------------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,
@@ -272,20 +272,20 @@ async fn test_udwf_bounded_query_include_rank() {
     let TestContext { ctx, test_state } = TestContext::new(test_state);
 
     let expected = vec![
-        "+---+---+-----+--------------------+",
-        "| x | y | val | odd_counter(t.val) |",
-        "+---+---+-----+--------------------+",
-        "| 1 | a | 0   | 3                  |",
-        "| 1 | b | 1   | 2                  |",
-        "| 1 | c | 2   | 1                  |",
-        "| 2 | d | 3   | 7                  |",
-        "| 2 | e | 4   | 6                  |",
-        "| 2 | f | 5   | 5                  |",
-        "| 2 | g | 6   | 4                  |",
-        "| 2 | h | 6   | 3                  |",
-        "| 2 | i | 6   | 2                  |",
-        "| 2 | j | 6   | 1                  |",
-        "+---+---+-----+--------------------+",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| x | y | val | odd_counter(t.val) PARTITION BY [t.x] ORDER BY [t.y ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
+    "| 1 | a | 0   | 3                                                                                                            |",
+    "| 1 | b | 1   | 2                                                                                                            |",
+    "| 1 | c | 2   | 1                                                                                                            |",
+    "| 2 | d | 3   | 7                                                                                                            |",
+    "| 2 | e | 4   | 6                                                                                                            |",
+    "| 2 | f | 5   | 5                                                                                                            |",
+    "| 2 | g | 6   | 4                                                                                                            |",
+    "| 2 | h | 6   | 3                                                                                                            |",
+    "| 2 | i | 6   | 2                                                                                                            |",
+    "| 2 | j | 6   | 1                                                                                                            |",
+    "+---+---+-----+--------------------------------------------------------------------------------------------------------------+",
     ];
     assert_batches_eq!(
         expected,

From aab9103a9d52b8a5728f62d21eb8b6252f379cf1 Mon Sep 17 00:00:00 2001
From: Igor Izvekov <izveigor@gmail.com>
Date: Wed, 5 Jul 2023 22:34:18 +0300
Subject: [PATCH 66/89] feat: column support for `array_append`,
 `array_prepend`, `array_position` and `array_positions` (#6805)

* test: sqllogictests with columns for array_append, array_prepend, array_position and array_positions

* feat: column support for array_append and array_prepend

* feat: column support for array_position and array_positions

* fix: error type
---
 .../tests/sqllogictests/test_files/array.slt  | 491 ++++++++++++------
 datafusion/expr/src/built_in_function.rs      |   4 +-
 .../physical-expr/src/array_expressions.rs    | 338 +++++++-----
 datafusion/physical-expr/src/functions.rs     |   8 +-
 4 files changed, 541 insertions(+), 300 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 1f43c5f8e154..7eebb23d9cc9 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -19,108 +19,294 @@
 ## Array expressions Tests
 #############
 
+
+### Tables
+
+
+statement ok
+CREATE TABLE values(
+  a INT,
+  b INT,
+  c INT,
+  d FLOAT,
+  e VARCHAR
+) AS VALUES
+  (1, 1, 2, 1.1, 'Lorem'),
+  (2, 3, 4, 2.2, 'ipsum'),
+  (3, 5, 6, 3.3, 'dolor'),
+  (4, 7, 8, 4.4, 'sit'),
+  (NULL, 9, 10, 5.5, 'amet'),
+  (5, NULL, 12, 6.6, ','),
+  (6, 11, NULL, 7.7, 'consectetur'),
+  (7, 13, 14, NULL, 'adipiscing'),
+  (8, 15, 16, 8.8, NULL)
+;
+
+statement ok
+CREATE TABLE arrays
+AS VALUES
+  (make_array(make_array(NULL, 2),make_array(3, NULL)), make_array(1.1, 2.2, 3.3), make_array('L', 'o', 'r', 'e', 'm')),
+  (make_array(make_array(3, 4),make_array(5, 6)), make_array(NULL, 5.5, 6.6), make_array('i', 'p', NULL, 'u', 'm')),
+  (make_array(make_array(5, 6),make_array(7, 8)), make_array(7.7, 8.8, 9.9), make_array('d', NULL, 'l', 'o', 'r')),
+  (make_array(make_array(7, NULL),make_array(9, 10)), make_array(10.1, NULL, 12.2), make_array('s', 'i', 't')),
+  (NULL, make_array(13.3, 14.4, 15.5), make_array('a', 'm', 'e', 't')),
+  (make_array(make_array(11, 12),make_array(13, 14)), NULL, make_array(',')),
+  (make_array(make_array(15, 16),make_array(NULL, 18)), make_array(16.6, 17.7, 18.8), NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values
+AS VALUES
+  (make_array(NULL, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, NULL, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, NULL, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, NULL, 37, 38, 39, 40), 34, 4, 'ok'),
+  (NULL, 44, 5, '@'),
+  (make_array(41, 42, 43, 44, 45, 46, 47, 48, 49, 50), NULL, 6, '$'),
+  (make_array(51, 52, NULL, 54, 55, 56, 57, 58, 59, 60), 55, NULL, '^'),
+  (make_array(61, 62, 63, 64, 65, 66, 67, 68, 69, 70), 66, 7, NULL)
+;
+
+statement ok
+CREATE TABLE arrays_values_without_nulls
+AS VALUES
+  (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
+  (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
+  (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+;
+
+# arrays table
+query ???
+select column1, column2, column3 from arrays;
+----
+[[, 2], [3, ]] [1.1, 2.2, 3.3] [L, o, r, e, m]
+[[3, 4], [5, 6]] [, 5.5, 6.6] [i, p, , u, m]
+[[5, 6], [7, 8]] [7.7, 8.8, 9.9] [d, , l, o, r]
+[[7, ], [9, 10]] [10.1, , 12.2] [s, i, t]
+NULL [13.3, 14.4, 15.5] [a, m, e, t]
+[[11, 12], [13, 14]] NULL [,]
+[[15, 16], [, 18]] [16.6, 17.7, 18.8] NULL
+
+# values table
+query IIIRT
+select a, b, c, d, e from values;
+----
+1 1 2 1.1 Lorem
+2 3 4 2.2 ipsum
+3 5 6 3.3 dolor
+4 7 8 4.4 sit
+NULL 9 10 5.5 amet
+5 NULL 12 6.6 ,
+6 11 NULL 7.7 consectetur
+7 13 14 NULL adipiscing
+8 15 16 8.8 NULL
+
+# arrays_values table
+query ?IIT
+select column1, column2, column3, column4 from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, , 20] 12 2 .
+[21, 22, 23, , 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, , 37, 38, 39, 40] 34 4 ok
+NULL 44 5 @
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50] NULL 6 $
+[51, 52, , 54, 55, 56, 57, 58, 59, 60] 55 NULL ^
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70] 66 7 NULL
+
+# arrays_values_without_nulls table
+query ?II
+select column1, column2, column3 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+
+### Array function tests
+
+
 ## make_array
 
-# array scalar function #1
-query ??? rowsort
+# make_array scalar function #1
+query ???
 select make_array(1, 2, 3), make_array(1.0, 2.0, 3.0), make_array('h', 'e', 'l', 'l', 'o');
 ----
 [1, 2, 3] [1.0, 2.0, 3.0] [h, e, l, l, o]
 
-# array scalar function #2
-query ??? rowsort
+# make_array scalar function #2
+query ???
 select make_array(1, 2, 3), make_array(make_array(1, 2), make_array(3, 4)), make_array([[[[1], [2]]]]);
 ----
 [1, 2, 3] [[1, 2], [3, 4]] [[[[[1], [2]]]]]
 
-# array scalar function #3
-query ?? rowsort
+# make_array scalar function #3
+query ??
 select make_array([1, 2, 3], [4, 5, 6], [7, 8, 9]), make_array([[1, 2], [3, 4]], [[5, 6], [7, 8]]);
 ----
 [[1, 2, 3], [4, 5, 6], [7, 8, 9]] [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
 
-# array scalar function #4
-query ?? rowsort
+# make_array scalar function #4
+query ??
 select make_array([1.0, 2.0], [3.0, 4.0]), make_array('h', 'e', 'l', 'l', 'o');
 ----
 [[1.0, 2.0], [3.0, 4.0]] [h, e, l, l, o]
 
-# array scalar function #5
-query ? rowsort
+# make_array scalar function #5
+query ?
 select make_array(make_array(make_array(make_array(1, 2, 3), make_array(4, 5, 6)), make_array(make_array(7, 8, 9), make_array(10, 11, 12))))
 ----
 [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]]
 
-# array scalar function #6
-query ? rowsort
+# make_array scalar function #6
+query ?
 select make_array()
 ----
 []
 
-# array scalar function #7
-query ?? rowsort
+# make_array scalar function #7
+query ??
 select make_array(make_array()), make_array(make_array(make_array()))
 ----
 [[]] [[[]]]
 
-# array scalar function with nulls
-query ??? rowsort
+# make_array scalar function with nulls
+query ???
 select make_array(1, NULL, 3), make_array(NULL, 2.0, NULL), make_array('h', NULL, 'l', NULL, 'o');
 ----
 [1, , 3] [, 2.0, ] [h, , l, , o]
 
-# array scalar function with nulls #2
-query ?? rowsort
+# make_array scalar function with nulls #2
+query ??
 select make_array(1, 2, NULL), make_array(make_array(NULL, 2), make_array(NULL, 3));
 ----
 [1, 2, ] [[, 2], [, 3]]
 
-# array scalar function with nulls #3
-query ??? rowsort
+# make_array scalar function with nulls #3
+query ???
 select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NULL, NULL), make_array(NULL, NULL));
 ----
 [] [] [[], []]
 
+# make_array with columns #1
+query ????
+select make_array(a), make_array(b, c), make_array(d), make_array(e) from values;
+----
+[1] [1, 2] [1.1] [Lorem]
+[2] [3, 4] [2.2] [ipsum]
+[3] [5, 6] [3.3] [dolor]
+[4] [7, 8] [4.4] [sit]
+[0] [9, 10] [5.5] [amet]
+[5] [0, 12] [6.6] [,]
+[6] [11, 0] [7.7] [consectetur]
+[7] [13, 14] [0.0] [adipiscing]
+[8] [15, 16] [8.8] []
+
+# make_array with columns #2
+query ?
+select make_array(a, b, c, d) from values;
+----
+[1.0, 1.0, 2.0, 1.1]
+[2.0, 3.0, 4.0, 2.2]
+[3.0, 5.0, 6.0, 3.3]
+[4.0, 7.0, 8.0, 4.4]
+[0.0, 9.0, 10.0, 5.5]
+[5.0, 0.0, 12.0, 6.6]
+[6.0, 11.0, 0.0, 7.7]
+[7.0, 13.0, 14.0, 0.0]
+[8.0, 15.0, 16.0, 8.8]
+
 ## array_append
 
 # array_append scalar function #2
-query ? rowsort
+query ?
 select array_append(make_array(), 4);
 ----
 [4]
 
 # array_append scalar function #2
-query ?? rowsort
+query ??
 select array_append(make_array(), make_array()), array_append(make_array(), make_array(4));
 ----
 [[]] [[4]]
 
 # array_append scalar function #3
-query ??? rowsort
+query ???
 select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
+# array_append with columns
+query ?
+select array_append(column1, column2) from arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
+[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12]
+[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23]
+[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34]
+[44]
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ]
+[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+
+# array_append with columns and scalars
+query ??
+select array_append(column2, 100.1), array_append(column3, '.') from arrays;
+----
+[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .]
+[, 5.5, 6.6, 100.1] [i, p, , u, m, .]
+[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .]
+[10.1, , 12.2, 100.1] [s, i, t, .]
+[13.3, 14.4, 15.5, 100.1] [a, m, e, t, .]
+[100.1] [,, .]
+[16.6, 17.7, 18.8, 100.1] [.]
+
 ## array_prepend
 
 # array_prepend scalar function #1
-query ? rowsort
+query ?
 select array_prepend(4, make_array());
 ----
 [4]
 
 # array_prepend scalar function #2
-query ?? rowsort
+query ??
 select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array());
 ----
 [[]] [[4]]
 
 # array_prepend scalar function #3
-query ??? rowsort
+query ???
 select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, 3.0, 4.0)), array_prepend('h', make_array('e', 'l', 'l', 'o'));
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
+# array_prepend with columns
+query ?
+select array_prepend(column2, column1) from arrays_values;
+----
+[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40]
+[44]
+[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
+# array_prepend with columns and scalars
+query ??
+select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
+----
+[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m]
+[100.1, , 5.5, 6.6] [., i, p, , u, m]
+[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r]
+[100.1, 10.1, , 12.2] [., s, i, t]
+[100.1, 13.3, 14.4, 15.5] [., a, m, e, t]
+[100.1] [., ,]
+[100.1, 16.6, 17.7, 18.8] [.]
+
 ## array_fill
 
 # array_fill scalar function #1
@@ -144,37 +330,37 @@ select array_fill(1, make_array())
 ## array_concat
 
 # array_concat scalar function #1
-query ?? rowsort
+query ??
 select array_concat(make_array(1, 2, 3), make_array(4, 5, 6), make_array(7, 8, 9)), array_concat(make_array([1], [2]), make_array([3], [4]));
 ----
 [1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
 
 # array_concat scalar function #2
-query ? rowsort
+query ?
 select array_concat(make_array(make_array(1, 2), make_array(3, 4)), make_array(make_array(5, 6), make_array(7, 8)));
 ----
 [[1, 2], [3, 4], [5, 6], [7, 8]]
 
 # array_concat scalar function #3
-query ? rowsort
+query ?
 select array_concat(make_array([1], [2], [3]), make_array([4], [5], [6]), make_array([7], [8], [9]));
 ----
 [[1], [2], [3], [4], [5], [6], [7], [8], [9]]
 
 # array_concat scalar function #4
-query ? rowsort
+query ?
 select array_concat(make_array([[1]]), make_array([[2]]));
 ----
 [[[1]], [[2]]]
 
 # array_concat scalar function #5
-query ? rowsort
+query ?
 select array_concat(make_array(2, 3), make_array());
 ----
 [2, 3]
 
 # array_concat scalar function #6
-query ? rowsort
+query ?
 select array_concat(make_array(), make_array(2, 3));
 ----
 [2, 3]
@@ -193,12 +379,50 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2,
 ----
 4 5 2
 
+# array_position with columns
+query II
+select array_position(column1, column2), array_position(column1, column2, column3) from arrays_values_without_nulls;
+----
+1 1
+2 2
+3 3
+4 4
+
+# array_position with columns and scalars
+query II
+select array_position(column1, 3), array_position(column1, 3, 5) from arrays_values_without_nulls;
+----
+3 NULL
+NULL NULL
+NULL NULL
+NULL NULL
+
+## array_positions
+
 # array_positions scalar function
-query ??? rowsort
+query ???
 select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1);
 ----
 [3, 4] [5] [1, 2, 3]
 
+# array_positions with columns
+query ?
+select array_positions(column1, column2) from arrays_values_without_nulls;
+----
+[1]
+[2]
+[3]
+[4]
+
+# array_positions with columns and scalars
+query ??
+select array_positions(column1, 4), array_positions(array[1, 2, 23, 13, 33, 45], column2) from arrays_values_without_nulls;
+----
+[4] [1]
+[] []
+[] [3]
+[] []
+
 ## array_replace
 
 # array_replace scalar function
@@ -210,7 +434,7 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1,
 ## array_to_string
 
 # array_to_string scalar function
-query TTT rowsort
+query TTT
 select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|');
 ----
 h,e,l,l,o 1-2-3-4-5 1|2|3
@@ -228,13 +452,13 @@ Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name:
 select array_to_string(make_array(), ',')
 
 # array_to_string scalar function with nulls #1
-query TTT rowsort
+query TTT
 select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), array_to_string(make_array(NULL, 2.0, 3.0), '|');
 ----
 h,l,o 1-3-5 2|3
 
 # array_to_string scalar function with nulls #2
-query TTT rowsort
+query TTT
 select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
 ----
 h,-,-,-,o nil-2-nil-4-5 1|0|3
@@ -288,19 +512,19 @@ select trim_array(make_array(), 0), trim_array(make_array(), 1)
 ## array_length
 
 # array_length scalar function
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5)), array_length(make_array(1, 2, 3)), array_length(make_array([1, 2], [3, 4], [5, 6]));
 ----
 5 3 3
 
 # array_length scalar function #2
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5), 1), array_length(make_array(1, 2, 3), 1), array_length(make_array([1, 2], [3, 4], [5, 6]), 1);
 ----
 5 3 3
 
 # array_length scalar function #3
-query III rowsort
+query III
 select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, 3), 2), array_length(make_array([1, 2], [3, 4], [5, 6]), 2);
 ----
 NULL NULL 2
@@ -312,7 +536,7 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item",
 select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4);
 
 # array_length scalar function #5
-query III rowsort
+query III
 select array_length(make_array()), array_length(make_array(), 1), array_length(make_array(), 2)
 ----
 0 0 NULL
@@ -337,14 +561,14 @@ caused by
 Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8
 select array_dims(make_array()), array_dims(make_array(make_array()))
 
+## array_ndims
+
 # array_ndims scalar function
-query III rowsort
+query III
 select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])), array_ndims(make_array([[[[1], [2]]]]));
 ----
 1 2 5
 
-## array_ndims
-
 # array_ndims scalar function #2
 query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\)
 caused by
@@ -352,183 +576,106 @@ Error during planning: Cannot automatically convert List\(Field \{ name: "item",
 select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
 
 # array_ndims scalar function #3
-query II rowsort
+query II
 select array_ndims(make_array()), array_ndims(make_array(make_array()))
 ----
 1 2
 
-## array concatenate opeartor
-
-# array concatenate operator #1 (like array_concat scalar function)
-query ?? rowsort
-select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]);
-----
-[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
-
-# array concatenate operator #2 (like array_append scalar function)
-query ??? rowsort
-select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o';
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# array concatenate operator #3 (like array_prepend scalar function)
-query ??? rowsort
-select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o');
-----
-[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
-# make_array
-
-query ?
-select make_array(1, 2.0)
-----
-[1.0, 2.0]
-
-query ?
-select make_array(null, 1.0)
-----
-[, 1.0]
-
-query ?
-select make_array(1, 2.0, null, 3)
-----
-[1.0, 2.0, , 3.0]
-
-query ?
-select make_array(1.0, '2', null)
-----
-[1.0, 2, ]
-
-statement ok
-create table foo1 (x int, y double) as values (1, 2.0);
-
-query ?
-select make_array(x, y) from foo1;
-----
-[1.0, 2.0]
-
-statement ok
-create table foo2 (x float, y varchar) as values (1.0, '1');
-
-query ?
-select make_array(x, y) from foo2;
-----
-[1.0, 1]
-
-# array_contains
+## array_contains
 
 # array_contains scalar function #1
-query BBB rowsort
+query BBB
 select array_contains(make_array(1, 2, 3), make_array(1, 1, 2, 3)), array_contains([1, 2, 3], [1, 1, 2]), array_contains([1, 2, 3], [2, 1, 3, 1]);
 ----
 true true true
 
 # array_contains scalar function #2
-query BB rowsort
+query BB
 select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 3]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 4]);
 ----
 true true
 
 # array_contains scalar function #3
-query BBB rowsort
+query BBB
 select array_contains(make_array(1, 2, 3), make_array(1, 2, 3, 4)), array_contains([1, 2, 3], [1, 1, 4]), array_contains([1, 2, 3], [2, 1, 3, 4]);
 ----
 false false false
 
 # array_contains scalar function #4
-query BB rowsort
+query BB
 select array_contains([[1, 2], [3, 4]], [[1, 2], [3, 4], [1, 5]]), array_contains([[[1], [2]], [[3], [4]]], [1, 2, 2, 3, 5]);
 ----
 false false
 
 # array_contains scalar function #5
-query BB rowsort
+query BB
 select array_contains([true, true, false, true, false], [true, false, false]), array_contains([true, false, true], [true, true]);
 ----
 true true
 
 # array_contains scalar function #6
-query BB rowsort
+query BB
 select array_contains(make_array(true, true, true), make_array(false, false)), array_contains([false, false, false], [true, true]);
 ----
 false false
 
-## array_contains array
 
+### Array operators tests
 
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(1,2,3), make_array(1,2,3)),
-(make_array(1,2,3), make_array(2,3)),
-(make_array(2,3),   make_array(1,2,3)),
-(null,              make_array(1,2,3)),
-(make_array(2,3),   null)
-;
 
+## array concatenate operator
 
+# array concatenate operator with scalars #1 (like array_concat scalar function)
 query ??
-SELECT
-  column1,
-  column2
-FROM t
+select make_array(1, 2, 3) || make_array(4, 5, 6) || make_array(7, 8, 9), make_array([1], [2]) || make_array([3], [4]);
 ----
-[1, 2, 3] [1, 2, 3]
-[1, 2, 3] [2, 3]
-[2, 3] [1, 2, 3]
-NULL [1, 2, 3]
-[2, 3] NULL
-
+[1, 2, 3, 4, 5, 6, 7, 8, 9] [[1], [2], [3], [4]]
 
-# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709
+# array concatenate operator with scalars #2 (like array_append scalar function)
+query ???
+select make_array(1, 2, 3) || 4, make_array(1.0, 2.0, 3.0) || 4.0, make_array('h', 'e', 'l', 'l') || 'o';
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
-query BB
-SELECT
-  array_contains(column1, column2) as c12,
-  array_contains(column1, column2) as c21
-FROM t
+# array concatenate operator with scalars #3 (like array_prepend scalar function)
+query ???
+select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_array('e', 'l', 'l', 'o');
 ----
-true true
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 
-statement ok
-drop table t
+### Array casting tests
 
 
-## array_contains array (nested)
+## make_array
 
+# make_array scalar function #1
+query ?
+select make_array(1, 2.0)
+----
+[1.0, 2.0]
 
-statement ok
-CREATE TABLE t
-AS VALUES
-(make_array(make_array(2),make_array(3,4)),   make_array(3,4)),
-(make_array(make_array(2),make_array(3,4)),   make_array(4,3)),
-(make_array(make_array(2),make_array(3,4)),   make_array(3)),
-(make_array(make_array(2),make_array(3,4)),   make_array(2))
-;
+# make_array scalar function #2
+query ?
+select make_array(null, 1.0)
+----
+[, 1.0]
 
+# make_array scalar function #3
+query ?
+select make_array(1, 2.0, null, 3)
+----
+[1.0, 2.0, , 3.0]
 
-query ??
-SELECT
-  column1,
-  column2
-FROM t
+# make_array scalar function #4
+query ?
+select make_array(1.0, '2', null)
 ----
-[[2], [3, 4]] [3, 4]
-[[2], [3, 4]] [4, 3]
-[[2], [3, 4]] [3]
-[[2], [3, 4]] [2]
+[1.0, 2, ]
 
 
-# incorrect answer (one row) to https://github.com/apache/arrow-datafusion/issues/6709
+### FixedSizeListArray
 
-query BB
-SELECT
-  array_contains(column1, column2) as c12,
-  array_contains(column1, column2) as c21
-FROM t
-----
-true true
 
 statement ok
 CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION 'tests/data/fixed_size_list_array.parquet';
@@ -566,3 +713,19 @@ query ?
 select make_array(f0) from fixed_size_list_array
 ----
 [[1, 2], [3, 4]]
+
+
+### Delete tables
+
+
+statement ok
+drop table values;
+
+statement ok
+drop table arrays;
+
+statement ok
+drop table arrays_values;
+
+statement ok
+drop table arrays_values_without_nulls;
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 69054622757d..103f1047fa48 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -488,9 +488,9 @@ impl BuiltinScalarFunction {
             )))),
             BuiltinScalarFunction::ArrayLength => Ok(UInt8),
             BuiltinScalarFunction::ArrayNdims => Ok(UInt8),
-            BuiltinScalarFunction::ArrayPosition => Ok(UInt8),
+            BuiltinScalarFunction::ArrayPosition => Ok(UInt64),
             BuiltinScalarFunction::ArrayPositions => {
-                Ok(List(Arc::new(Field::new("item", UInt8, true))))
+                Ok(List(Arc::new(Field::new("item", UInt64, true))))
             }
             BuiltinScalarFunction::ArrayPrepend => Ok(List(Arc::new(Field::new(
                 "item",
diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs
index bddeef526a4d..cd174918db37 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -18,7 +18,7 @@
 //! Array expressions
 
 use arrow::array::*;
-use arrow::buffer::Buffer;
+use arrow::buffer::{Buffer, OffsetBuffer};
 use arrow::compute;
 use arrow::datatypes::{DataType, Field};
 use core::any::type_name;
@@ -197,15 +197,53 @@ pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
 
 macro_rules! append {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone();
+
         let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
-        let cat = compute::concat(&[child_array, element])?;
-        let mut scalars = vec![];
-        for i in 0..cat.len() {
-            scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+        for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be empty",))
+            })?;
+            match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            child_array,
+                            &$ARRAY_TYPE::from(vec![el])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + child_array.len() as i32 + 1i32);
+                }
+                None => {
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el.clone()])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + 1i32);
+                }
+            }
         }
-        scalars
+
+        let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
@@ -221,7 +259,7 @@ pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
     let arr = as_list_array(&args[0])?;
     let element = &args[1];
 
-    let scalars = match (arr.value_type(), element.data_type()) {
+    let res = match (arr.value_type(), element.data_type()) {
                 (DataType::Utf8, DataType::Utf8) => append!(arr, element, StringArray),
                 (DataType::LargeUtf8, DataType::LargeUtf8) => append!(arr, element, LargeStringArray),
                 (DataType::Boolean, DataType::Boolean) => append!(arr, element, BooleanArray),
@@ -243,20 +281,58 @@ pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
                 }
     };
 
-    Ok(array(scalars.as_slice())?.into_array(1))
+    Ok(res)
 }
 
 macro_rules! prepend {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array($ELEMENT.data_type()), $ARRAY_TYPE).clone();
+
         let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
-        let cat = compute::concat(&[element, child_array])?;
-        let mut scalars = vec![];
-        for i in 0..cat.len() {
-            scalars.push(ColumnarValue::Scalar(ScalarValue::try_from_array(&cat, i)?));
+        for (arr, el) in $ARRAY.iter().zip(element.iter()) {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be empty",))
+            })?;
+            match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el]),
+                            child_array
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + child_array.len() as i32 + 1i32);
+                }
+                None => {
+                    values = downcast_arg!(
+                        compute::concat(&[
+                            &values,
+                            &$ARRAY_TYPE::from(vec![el.clone()])
+                        ])?
+                        .clone(),
+                        $ARRAY_TYPE
+                    )
+                    .clone();
+                    offsets.push(last_offset + 1i32);
+                }
+            }
         }
-        scalars
+
+        let field = Arc::new(Field::new("item", $ELEMENT.data_type().clone(), true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
@@ -272,7 +348,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> {
     let element = &args[0];
     let arr = as_list_array(&args[1])?;
 
-    let scalars = match (arr.value_type(), element.data_type()) {
+    let res = match (arr.value_type(), element.data_type()) {
                 (DataType::Utf8, DataType::Utf8) => prepend!(arr, element, StringArray),
                 (DataType::LargeUtf8, DataType::LargeUtf8) => prepend!(arr, element, LargeStringArray),
                 (DataType::Boolean, DataType::Boolean) => prepend!(arr, element, BooleanArray),
@@ -294,7 +370,7 @@ pub fn array_prepend(args: &[ArrayRef]) -> Result<ArrayRef> {
                 }
     };
 
-    Ok(array(scalars.as_slice())?.into_array(1))
+    Ok(res)
 }
 
 /// Array_concat/Array_cat SQL function
@@ -420,74 +496,58 @@ pub fn array_fill(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 
 macro_rules! position {
     ($ARRAY:expr, $ELEMENT:expr, $INDEX:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        match child_array
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        $ARRAY
             .iter()
-            .skip($INDEX)
-            .position(|x| x == Some(element))
-        {
-            Some(value) => Ok(ColumnarValue::Scalar(ScalarValue::UInt8(Some(
-                (value + $INDEX + 1) as u8,
-            )))),
-            None => Ok(ColumnarValue::Scalar(ScalarValue::Null)),
-        }
+            .zip(element.iter())
+            .zip($INDEX.iter())
+            .map(|((arr, el), i)| {
+                let index = match i {
+                    Some(i) => {
+                        if i <= 0 {
+                            0
+                        } else {
+                            i - 1
+                        }
+                    }
+                    None => {
+                        return Err(DataFusionError::Execution(
+                            "initial position must not be null".to_string(),
+                        ))
+                    }
+                };
+
+                match arr {
+                    Some(arr) => {
+                        let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+
+                        match child_array
+                            .iter()
+                            .skip(index as usize)
+                            .position(|x| x == el)
+                        {
+                            Some(value) => Ok(Some(value as u64 + index as u64 + 1u64)),
+                            None => Ok(None),
+                        }
+                    }
+                    None => Ok(None),
+                }
+            })
+            .collect::<Result<UInt64Array>>()?
     }};
 }
 
 /// Array_position SQL function
-pub fn array_position(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arr = match &args[0] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        ColumnarValue::Array(arr) => arr.clone(),
-    };
-
-    let element = match &args[1] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        _ => {
-            return Err(DataFusionError::Internal(
-                "Array_position function requires scalar element".to_string(),
-            ))
-        }
-    };
+pub fn array_position(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let arr = as_list_array(&args[0])?;
+    let element = &args[1];
 
-    let mut index: usize = 0;
+    let mut index = Int64Array::from_value(0, arr.len());
     if args.len() == 3 {
-        let scalar =
-            match &args[2] {
-                ColumnarValue::Scalar(scalar) => scalar.clone(),
-                _ => return Err(DataFusionError::Internal(
-                    "Array_position function requires positive integer scalar element"
-                        .to_string(),
-                )),
-            };
-
-        index =
-            match scalar {
-                ScalarValue::Int8(Some(value)) => value as usize,
-                ScalarValue::Int16(Some(value)) => value as usize,
-                ScalarValue::Int32(Some(value)) => value as usize,
-                ScalarValue::Int64(Some(value)) => value as usize,
-                ScalarValue::UInt8(Some(value)) => value as usize,
-                ScalarValue::UInt16(Some(value)) => value as usize,
-                ScalarValue::UInt32(Some(value)) => value as usize,
-                ScalarValue::UInt64(Some(value)) => value as usize,
-                _ => return Err(DataFusionError::Internal(
-                    "Array_position function requires positive integer scalar element"
-                        .to_string(),
-                )),
-            };
-
-        if index == 0 {
-            index = 0;
-        } else {
-            index -= 1;
-        }
+        index = as_int64_array(&args[2])?.clone();
     }
 
-    match arr.data_type() {
+    let res = match arr.data_type() {
         DataType::List(field) => match field.data_type() {
             DataType::Utf8 => position!(arr, element, index, StringArray),
             DataType::LargeUtf8 => position!(arr, element, index, LargeStringArray),
@@ -502,50 +562,75 @@ pub fn array_position(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             DataType::UInt16 => position!(arr, element, index, UInt16Array),
             DataType::UInt32 => position!(arr, element, index, UInt32Array),
             DataType::UInt64 => position!(arr, element, index, UInt64Array),
-            data_type => Err(DataFusionError::NotImplemented(format!(
-                "Array_position is not implemented for types '{data_type:?}'."
-            ))),
+            data_type => {
+                return Err(DataFusionError::NotImplemented(format!(
+                    "Array_position is not implemented for types '{data_type:?}'."
+                )))
+            }
         },
-        data_type => Err(DataFusionError::NotImplemented(format!(
-            "Array is not type '{data_type:?}'."
-        ))),
-    }
+        data_type => {
+            return Err(DataFusionError::NotImplemented(format!(
+                "Array is not type '{data_type:?}'."
+            )))
+        }
+    };
+
+    Ok(Arc::new(res))
 }
 
 macro_rules! positions {
     ($ARRAY:expr, $ELEMENT:expr, $ARRAY_TYPE:ident) => {{
-        let child_array =
-            downcast_arg!(downcast_arg!($ARRAY, ListArray).values(), $ARRAY_TYPE);
-        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE).value(0);
-
-        let mut res = vec![];
-        for (i, x) in child_array.iter().enumerate() {
-            if x == Some(element) {
-                res.push(ColumnarValue::Array(Arc::new(UInt8Array::from(vec![
-                    Some((i + 1) as u8),
-                ]))));
-            }
+        let element = downcast_arg!($ELEMENT, $ARRAY_TYPE);
+        let mut offsets: Vec<i32> = vec![0];
+        let mut values =
+            downcast_arg!(new_empty_array(&DataType::UInt64), UInt64Array).clone();
+        for comp in $ARRAY
+            .iter()
+            .zip(element.iter())
+            .map(|(arr, el)| match arr {
+                Some(arr) => {
+                    let child_array = downcast_arg!(arr, $ARRAY_TYPE);
+                    let res = child_array
+                        .iter()
+                        .enumerate()
+                        .filter(|(_, x)| *x == el)
+                        .flat_map(|(i, _)| Some((i + 1) as u64))
+                        .collect::<UInt64Array>();
+
+                    Ok(res)
+                }
+                None => Ok(downcast_arg!(
+                    new_empty_array(&DataType::UInt64),
+                    UInt64Array
+                )
+                .clone()),
+            })
+            .collect::<Result<Vec<UInt64Array>>>()?
+        {
+            let last_offset: i32 = offsets.last().copied().ok_or_else(|| {
+                DataFusionError::Internal(format!("offsets should not be empty",))
+            })?;
+            values =
+                downcast_arg!(compute::concat(&[&values, &comp,])?.clone(), UInt64Array)
+                    .clone();
+            offsets.push(last_offset + comp.len() as i32);
         }
 
-        res
+        let field = Arc::new(Field::new("item", DataType::UInt64, true));
+
+        Arc::new(ListArray::try_new(
+            field,
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(values),
+            None,
+        )?)
     }};
 }
 
 /// Array_positions SQL function
-pub fn array_positions(args: &[ColumnarValue]) -> Result<ColumnarValue> {
-    let arr = match &args[0] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        ColumnarValue::Array(arr) => arr.clone(),
-    };
-
-    let element = match &args[1] {
-        ColumnarValue::Scalar(scalar) => scalar.to_array().clone(),
-        _ => {
-            return Err(DataFusionError::Internal(
-                "Array_positions function requires scalar element".to_string(),
-            ))
-        }
-    };
+pub fn array_positions(args: &[ArrayRef]) -> Result<ArrayRef> {
+    let arr = as_list_array(&args[0])?;
+    let element = &args[1];
 
     let res = match arr.data_type() {
         DataType::List(field) => match field.data_type() {
@@ -575,7 +660,7 @@ pub fn array_positions(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         }
     };
 
-    array(res.as_slice())
+    Ok(res)
 }
 
 macro_rules! remove {
@@ -1465,29 +1550,22 @@ mod tests {
     #[test]
     fn test_array_position() {
         // array_position([1, 2, 3, 4], 3) = 3
-        let list_array = return_array();
-        let array = array_position(&[
-            list_array,
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
-        ])
-        .expect("failed to initialize function array_position")
-        .into_array(1);
-        let result =
-            as_uint8_array(&array).expect("failed to initialize function array_position");
+        let list_array = return_array().into_array(1);
+        let array = array_position(&[list_array, Arc::new(Int64Array::from_value(3, 1))])
+            .expect("failed to initialize function array_position");
+        let result = as_uint64_array(&array)
+            .expect("failed to initialize function array_position");
 
-        assert_eq!(result, &UInt8Array::from(vec![3]));
+        assert_eq!(result, &UInt64Array::from(vec![3]));
     }
 
     #[test]
     fn test_array_positions() {
         // array_positions([1, 2, 3, 4], 3) = [3]
-        let list_array = return_array();
-        let array = array_positions(&[
-            list_array,
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
-        ])
-        .expect("failed to initialize function array_position")
-        .into_array(1);
+        let list_array = return_array().into_array(1);
+        let array =
+            array_positions(&[list_array, Arc::new(Int64Array::from_value(3, 1))])
+                .expect("failed to initialize function array_position");
         let result =
             as_list_array(&array).expect("failed to initialize function array_position");
 
@@ -1497,7 +1575,7 @@ mod tests {
             result
                 .value(0)
                 .as_any()
-                .downcast_ref::<UInt8Array>()
+                .downcast_ref::<UInt64Array>()
                 .unwrap()
                 .values()
         );
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 08916d89c986..215582a1a8be 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -423,11 +423,11 @@ pub fn create_physical_fun(
             Arc::new(|args| make_scalar_function(array_expressions::array_ndims)(args))
         }
         BuiltinScalarFunction::ArrayPosition => {
-            Arc::new(array_expressions::array_position)
-        }
-        BuiltinScalarFunction::ArrayPositions => {
-            Arc::new(array_expressions::array_positions)
+            Arc::new(|args| make_scalar_function(array_expressions::array_position)(args))
         }
+        BuiltinScalarFunction::ArrayPositions => Arc::new(|args| {
+            make_scalar_function(array_expressions::array_positions)(args)
+        }),
         BuiltinScalarFunction::ArrayPrepend => {
             Arc::new(|args| make_scalar_function(array_expressions::array_prepend)(args))
         }

From 0fb5de72056d14d331dc92ec2c579119358068fa Mon Sep 17 00:00:00 2001
From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Date: Thu, 6 Jul 2023 09:46:28 +0300
Subject: [PATCH 67/89] MINOR: Fix ordering of the aggregate_source_with_order
 table (#6852)

* Fix ordering of the source

* rename file path more decriptive
---
 .../aggregate_test_100_order_by_c1_asc.csv    | 101 ++++++++++++++++++
 .../sqllogictests/test_files/explain.slt      |   4 +-
 2 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100644 datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv

diff --git a/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv b/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv
new file mode 100644
index 000000000000..9cdf2f845e85
--- /dev/null
+++ b/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv
@@ -0,0 +1,101 @@
+c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13
+a,1,-85,-15154,1171968280,1919439543497968449,77,52286,774637006,12101411955859039553,0.12285209,0.6864391962767343,0keZ5G8BffGwgF2RwQD59TFzMStxCB
+a,3,13,12613,1299719633,2020498574254265315,191,17835,3998790955,14881411008939145569,0.041445434,0.8813167497816289,Amn2K87Db5Es3dFQO9cw9cvpAM6h35
+a,4,-38,20744,762932956,308913475857409919,7,45465,1787652631,878137512938218976,0.7459874,0.02182578039211991,ydkwycaISlYSlEq3TlkS2m15I2pcp8
+a,4,-54,-2376,434021400,5502271306323260832,113,15777,2502326480,7966148640299601101,0.5720931,0.30585375151301186,KJFcmTVjdkCMv94wYCtfHMFhzyRsmH
+a,5,36,-16974,623103518,6834444206535996609,71,29458,141047417,17448660630302620693,0.17100024,0.04429073092078406,OF7fQ37GzaZ5ikA2oMyvleKtgnLjXh
+a,1,-25,15295,383352709,4980135132406487265,231,102,3276123488,12763583666216333412,0.53796273,0.17592486905979987,XemNcT1xp61xcM1Qz3wZ1VECCnq06O
+a,5,-31,-12907,586844478,-4862189775214031241,170,28086,1013876852,11005002152861474932,0.35319167,0.05573662213439634,MeSTAXq8gVxVjbEjgkvU9YLte0X9uE
+a,2,45,15673,-1899175111,398282800995316041,99,2555,145294611,8554426087132697832,0.17333257,0.6405262429561641,b3b9esRhTzFEawbs6XhpKnD9ojutHB
+a,3,13,32064,912707948,3826618523497875379,42,21463,2214035726,10771380284714693539,0.6133468,0.7325106678655877,i6RQVXKUh7MzuGMDaNclUYnFUAireU
+a,3,17,-22796,1337043149,-1282905594104562444,167,2809,754775609,732272194388185106,0.3884129,0.658671129040488,VDhtJkYjAYPykCgOU9x3v7v3t4SO1a
+a,4,65,-28462,-1813935549,7602389238442209730,18,363,1865307672,11378396836996498283,0.09130204,0.5593249815276734,WHmjWk2AY4c6m7DA4GitUx6nmb1yYS
+a,4,-101,11640,1993193190,2992662416070659899,230,40566,466439833,16778113360088370541,0.3991115,0.574210838214554,NEhyk8uIx4kEULJGa8qIyFjjBcP2G6
+a,2,-48,-18025,439738328,-313657814587041987,222,13763,3717551163,9135746610908713318,0.055064857,0.9800193410444061,ukyD7b0Efj7tNlFSRmzZ0IqkEzg2a8
+a,1,-56,8692,2106705285,-7811675384226570375,231,15573,1454057357,677091006469429514,0.42794758,0.2739938529235548,JN0VclewmjwYlSl8386MlWv5rEhWCz
+a,1,-5,12636,794623392,2909750622865366631,15,24022,2669374863,4776679784701509574,0.29877836,0.2537253407987472,waIGbOGl1PM6gnzZ4uuZt4E2yDWRHs
+a,3,14,28162,397430452,-452851601758273256,57,14722,431948861,8164671015278284913,0.40199697,0.07260475960924484,TtDKUZxzVxsq758G6AWPSYuZgVgbcl
+a,1,83,-14704,2143473091,-4387559599038777245,37,829,4015442341,4602675983996931623,0.89542526,0.9567595541247681,ErJFw6hzZ5fmI5r8bhE4JzlscnhKZU
+a,3,-12,-9168,1489733240,-1569376002217735076,206,33821,3959216334,16060348691054629425,0.9488028,0.9293883502480845,oLZ21P2JEDooxV1pU31cIxQHEeeoLu
+a,3,-72,-11122,-2141451704,-2578916903971263854,83,30296,1995343206,17452974532402389080,0.94209343,0.3231750610081745,e2Gh6Ov8XkXoFdJWhl0EjwEHlMDYyG
+a,2,-43,13080,370975815,5881039805148485053,2,20120,2939920218,906367167997372130,0.42733806,0.16301110515739792,m6jD0LBIQWaMfenwRCTANI9eOdyyto
+a,5,-101,-12484,-842693467,-6140627905445351305,57,57885,2496054700,2243924747182709810,0.59520596,0.9491397432856566,QJYm7YRA3YetcBHI5wkMZeLXVmfuNy
+b,1,29,-18218,994303988,5983957848665088916,204,9489,3275293996,14857091259186476033,0.53840446,0.17909035118828576,AyYVExXK6AR2qUTxNZ7qRHQOVGMLcz
+b,5,-82,22080,1824882165,7373730676428214987,208,34331,3342719438,3330177516592499461,0.82634634,0.40975383525297016,Ig1QcuKsjHXkproePdERo2w0mYzIqd
+b,4,-111,-1967,-4229382,1892872227362838079,67,9832,1243785310,8382489916947120498,0.06563997,0.152498292971736,Sfx0vxv1skzZWT1PqVdoRDdO6Sb6xH
+b,1,54,-18410,1413111008,-7145106120930085900,249,5382,1842680163,17818611040257178339,0.8881188,0.24899794314659673,6FPJlLAcaQ5uokyOWZ9HGdLZObFvOZ
+b,3,17,14457,670497898,-2390782464845307388,255,24770,1538863055,12662506238151717757,0.34077626,0.7614304100703713,6x93sxYioWuq5c9Kkk8oTAAORM7cH0
+b,5,-5,24896,1955646088,2430204191283109071,118,43655,2424630722,11429640193932435507,0.87989986,0.7328050041291218,JafwVLSVk5AVoXFuzclesQ000EE2k1
+b,2,63,21456,-2138770630,-2380041687053733364,181,57594,2705709344,13144161537396946288,0.09683716,0.3051364088814128,nYVJnVicpGRqKZibHyBAmtmzBXAFfT
+b,5,68,21576,1188285940,5717755781990389024,224,27600,974297360,9865419128970328044,0.80895734,0.7973920072996036,ioEncce3mPOXD2hWhpZpCPWGATG6GU
+b,2,31,23127,-800561771,-8706387435232961848,153,27034,1098639440,3343692635488765507,0.35692692,0.5590205548347534,okOkcWflkNXIy4R8LzmySyY1EC3sYd
+b,4,17,-28070,-673237643,1904316899655860234,188,27744,933879086,3732692885824435932,0.41860116,0.40342283197779727,JHNgc2UCaiXOdmkxwDDyGhRlO0mnBQ
+b,2,-60,-21739,-1908480893,-8897292622858103761,59,50009,2525744318,1719090662556698549,0.52930677,0.560333188635217,l7uwDoTepWwnAP0ufqtHJS3CRi7RfP
+b,4,-117,19316,2051224722,-5534418579506232438,133,52046,3023531799,13684453606722360110,0.62608826,0.8506721053047003,mhjME0zBHbrK6NMkytMTQzOssOa1gF
+b,5,62,16337,41423756,-2274773899098124524,121,34206,2307004493,10575647935385523483,0.23794776,0.1754261586710173,qnPOOmslCJaT45buUisMRnM0rc77EK
+b,2,68,15874,49866617,1179733259727844435,121,23948,3455216719,3898128009708892708,0.6306253,0.9185813970744787,802bgTGl6Bk5TlkPYYTxp5JkKyaYUA
+b,1,12,7652,-1448995523,-5332734971209541785,136,49283,4076864659,15449267433866484283,0.6214579,0.05636955101974106,akiiY5N0I44CMwEnBL6RTBk7BRkxEj
+b,4,-59,25286,1423957796,2646602445954944051,0,61069,3570297463,15100310750150419896,0.49619365,0.04893135681998029,fuyvs0w7WsKSlXqJ1e6HFSoLmx03AG
+b,3,-101,-13217,-346989627,5456800329302529236,26,54276,243203849,17929716297117857676,0.05422181,0.09465635123783445,MXhhH1Var3OzzJCtI9VNyYvA0q8UyJ
+b,5,-44,15788,-629486480,5822642169425315613,13,11872,3457053821,2413406423648025909,0.44318348,0.32869374687050157,ALuRhobVWbnQTTWZdSOk0iVe8oYFhW
+b,4,47,20690,-1009656194,-2027442591571700798,200,7781,326151275,2881913079548128905,0.57360977,0.2145232647388039,52mKlRE3aHCBZtjECq6sY9OqVf8Dze
+c,2,1,18109,2033001162,-6513304855495910254,25,43062,1491205016,5863949479783605708,0.110830784,0.9294097332465232,6WfVFBVGJSQb7FhA7E0lBwdvjfZnSW
+c,1,103,-22186,431378678,1346564663822463162,146,12393,3766999078,10901819591635583995,0.064453244,0.7784918983501654,2T3wSlHdEmASmO0xcXHnndkKEt6bz8
+c,2,-29,25305,-537142430,-7683452043175617798,150,31648,598822671,11759014161799384683,0.8315913,0.946325164889271,9UbObCsVkmYpJGcGrgfK90qOnwb2Lj
+c,4,123,16620,852509237,-3087630526856906991,196,33715,3566741189,4546434653720168472,0.07606989,0.819715865079681,8LIh0b6jmDGm87BmIyjdxNIpX4ugjD
+c,2,-60,-16312,-1808210365,-3368300253197863813,71,39635,2844041986,7045482583778080653,0.805363,0.6425694115212065,BJqx5WokrmrrezZA0dUbleMYkG5U2O
+c,1,41,-4667,-644225469,7049620391314639084,196,48099,2125812933,15419512479294091215,0.5780736,0.9255031346434324,mzbkwXKrPeZnxg2Kn1LRF5hYSsmksS
+c,3,73,-9565,-382483011,1765659477910680019,186,1535,1088543984,2906943497598597237,0.680652,0.6009475544728957,Ow5PGpfTm4dXCfTDsXAOTatXRoAydR
+c,3,-2,-18655,-2141999138,-3154042970870838072,251,34970,3862393166,13062025193350212516,0.034291923,0.7697753383420857,IWl0G3ZlMNf7WT8yjIB49cx7MmYOmr
+c,3,22,13741,-2098805236,8604102724776612452,45,2516,1362369177,196777795886465166,0.94669616,0.0494924465469434,6oIXZuIPIqEoPBvFmbt2Nxy3tryGUE
+c,1,-24,-24085,-1882293856,7385529783747709716,41,48048,520189543,2402288956117186783,0.39761502,0.3600766362333053,Fi4rJeTQq4eXj8Lxg3Hja5hBVTVV5u
+c,2,-106,-1114,-1927628110,1080308211931669384,177,20421,141680161,7464432081248293405,0.56749094,0.565352842229935,Vp3gmWunM5A7wOC9YW2JroFqTWjvTi
+c,4,-79,5281,-237425046,373011991904079451,121,55620,2818832252,2464584078983135763,0.49774808,0.9237877978193884,t6fQUjJejPcjc04wHvHTPe55S65B4V
+c,1,70,27752,1325868318,1241882478563331892,63,61637,473294098,4976799313755010034,0.13801557,0.5081765563442366,Ktb7GQ0N1DrxwkCkEUsTaIXk0xYinn
+c,5,-94,-15880,2025611582,-3348824099853919681,5,40622,4268716378,12849419495718510869,0.34163946,0.4830878559436823,RilTlL1tKkPOUFuzmLydHAVZwv1OGl
+c,4,-90,-2935,1579876740,6733733506744649678,254,12876,3593959807,4094315663314091142,0.5708688,0.5603062368164834,Ld2ej8NEv5zNcqU60FwpHeZKBhfpiV
+c,2,-117,-30187,-1222533990,-191957437217035800,136,47061,2293105904,12659011877190539078,0.2047385,0.9706712283358269,pLk3i59bZwd5KBZrI1FiweYTd5hteG
+c,2,29,-3855,1354539333,4742062657200940467,81,53815,3398507249,562977550464243101,0.7124534,0.991517828651004,Oq6J4Rx6nde0YlhOIJkFsX2MsSvAQ0
+c,4,3,-30508,659422734,-6455460736227846736,133,59663,2306130875,8622584762448622224,0.16999894,0.4273123318932347,EcCuckwsF3gV1Ecgmh5v4KM8g1ozif
+c,2,-107,-2904,-1011669561,782342092880993439,18,29527,1157161427,4403623840168496677,0.31988364,0.36936304600612724,QYlaIAnJA6r8rlAb6f59wcxvcPcWFf
+c,5,118,19208,-134213907,-2120241105523909127,86,57751,1229567292,16493024289408725403,0.5536642,0.9723580396501548,TTQUwpMNSXZqVBKAFvXu7OlWvKXJKX
+c,3,97,29106,-903316089,2874859437662206732,207,42171,3473924576,8188072741116415408,0.32792538,0.2667177795079635,HKSMQ9nTnwXCJIte1JrM1dtYnDtJ8g
+d,5,-40,22614,706441268,-7542719935673075327,155,14337,3373581039,11720144131976083864,0.69632107,0.3114712539863804,C2GT5KVyOPZpgKVl110TyZO0NcJ434
+d,1,38,18384,-335410409,-1632237090406591229,26,57510,2712615025,1842662804748246269,0.6064476,0.6404495093354053,4HX6feIvmNXBN7XGqgO4YVBkhu8GDI
+d,1,57,28781,-1143802338,2662536767954229885,202,62167,879082834,4338034436871150616,0.7618384,0.42950521730777025,VY0zXmXeksCT8BzvpzpPLbmU9Kp9Y4
+d,2,113,3917,-108973366,-7220140168410319165,197,24380,63044568,4225581724448081782,0.11867094,0.2944158618048994,90gAtmGEeIqUTbo1ZrxCvWtsseukXC
+d,1,-98,13630,-1991133944,1184110014998006843,220,2986,225513085,9634106610243643486,0.89651865,0.1640882545084913,y7C453hRWd4E7ImjNDWlpexB8nUqjh
+d,3,77,15091,-1302295658,8795481303066536947,154,35477,2093538928,17419098323248948387,0.11952883,0.7035635283169166,O66j6PaYuZhEUtqV6fuU7TyjM2WxC5
+d,1,-99,5613,1213926989,-8863698443222021480,19,18736,4216440507,14933742247195536130,0.6067944,0.33639590659276175,aDxBtor7Icd9C5hnTvvw5NrIre740e
+d,2,93,-12642,2053379412,6468763445799074329,147,50842,1000948272,5536487915963301239,0.4279275,0.28534428578703896,lqhzgLsXZ8JhtpeeUWWNbMz8PHI705
+d,4,102,-24558,1991172974,-7823479531661596016,14,36599,1534194097,2240998421986827216,0.028003037,0.8824879447595726,0og6hSkhbX8AC1ktFS4kounvTzy8Vo
+d,1,-8,27138,-1383162419,7682021027078563072,36,64517,2861376515,9904216782086286050,0.80954456,0.9463098243875633,AFGCj7OWlEB5QfniEFgonMq90Tq5uH
+d,1,125,31106,-1176490478,-4306856842351827308,90,17910,3625286410,17869394731126786457,0.8882508,0.7631239070049998,dVdvo6nUD5FgCgsbOZLds28RyGTpnx
+d,5,-59,2045,-2117946883,1170799768349713170,189,63353,1365198901,2501626630745849169,0.75173044,0.18628859265874176,F7NSTjWvQJyBburN7CXRUlbgp2dIrA
+d,4,55,-1471,1902023838,1252101628560265705,157,3691,811650497,1524771507450695976,0.2968701,0.5437595540422571,f9ALCzwDAKmdu7Rk2msJaB1wxe5IBX
+d,3,-76,8809,141218956,-9110406195556445909,58,5494,1824517658,12046662515387914426,0.8557294,0.6668423897406515,Z2sWcQr0qyCJRMHDpRy3aQr7PkHtkK
+d,2,122,10130,-168758331,-3179091803916845592,30,794,4061635107,15695681119022625322,0.69592506,0.9748360509016578,OPwBqCEK5PWTjWaiOyL45u2NLTaDWv
+d,1,-72,25590,1188089983,3090286296481837049,241,832,3542840110,5885937420286765261,0.41980565,0.21535402343780985,wwXqSGKLyBQyPkonlzBNYUJTCo4LRS
+d,4,5,-7688,702611616,6239356364381313700,4,39363,3126475872,35363005357834672,0.3766935,0.061029375346466685,H5j5ZHy1FGesOAHjkQEDYCucbpKWRu
+d,3,123,29533,240273900,1176001466590906949,117,30972,2592330556,12883447461717956514,0.39075065,0.38870280983958583,1aOcrEGd0cOqZe2I5XBOm0nDcwtBZO
+e,3,104,-25136,1738331255,300633854973581194,139,20807,3577318119,13079037564113702254,0.40154034,0.7764360990307122,DuJNG8tufSqW0ZstHqWj3aGvFLMg4A
+e,3,112,-6823,-421042466,8535335158538929274,129,32712,3759340273,9916295859593918600,0.6424343,0.6316565296547284,BsM5ZAYifRh5Lw3Y8X1r53I0cTJnfE
+e,2,49,24495,-587831330,9178511478067509438,129,12757,1289293657,10948666249269100825,0.5610077,0.5991138115095911,bgK1r6v3BCTh0aejJUhkA1Hn6idXGp
+e,2,97,18167,1593800404,-9112448817105133638,163,45185,3188005828,2792105417953811674,0.38175434,0.4094218353587008,ukOiFGGFnQJDHFgZxHMpvhD3zybF0M
+e,4,-56,-31500,1544188174,3096047390018154410,220,417,557517119,2774306934041974261,0.15459597,0.19113293583306745,IZTkHMLvIKuiLjhDjYMmIHxh166we4
+e,4,-53,13788,2064155045,-691093532952651300,243,35106,2778168728,9463973906560740422,0.34515214,0.27159190516490006,0VVIHzxWtNOFLtnhjHEKjXaJOSLJfm
+e,4,97,-13181,2047637360,6176835796788944083,158,53000,2042457019,9726016502640071617,0.7085086,0.12357539988406441,oHJMNvWuunsIMIWFnYG31RCfkOo2V7
+e,1,36,-21481,-928766616,-3471238138418013024,150,52569,2610290479,7788847578701297242,0.2578469,0.7670021786149205,gpo8K5qtYePve6jyPt6xgJx4YOVjms
+e,2,52,23388,715235348,605432070100399212,165,56980,3314983189,7386391799827871203,0.46076488,0.980809631269599,jQimhdepw3GKmioWUlVSWeBVRKFkY3
+e,4,73,-22501,1282464673,2541794052864382235,67,21119,538589788,9575476605699527641,0.48515016,0.296036538664718,4JznSdBajNWhu4hRQwjV1FjTTxY68i
+e,2,-61,-2888,-1660426473,2553892468492435401,126,35429,4144173353,939909697866979632,0.4405142,0.9231889896940375,BPtQMxnuSPpxMExYV9YkDa6cAN7GP3
+e,4,74,-12612,-1885422396,1702850374057819332,130,3583,3198969145,10767179755613315144,0.5518061,0.5614503754617461,QEHVvcP8gxI6EMJIrvcnIhgzPNjIvv
+e,3,71,194,1436496767,-5639533800082367925,158,44507,3105312559,3998472996619161534,0.930117,0.6108938307533,pTeu0WMjBRTaNRT15rLCuEh3tBJVc5
+e,1,71,-5479,-1339586153,-3920238763788954243,123,53012,4229654142,10297218950720052365,0.73473036,0.5773498217058918,cBGc0kSm32ylBDnxogG727C0uhZEYZ
+e,4,96,-30336,427197269,7506304308750926996,95,48483,3521368277,5437030162957481122,0.58104324,0.42073125331890115,3BEOHQsMEFZ58VcNTOJYShTBpAPzbt
+e,2,52,-12056,-1090239422,9011500141803970147,238,4168,2013662838,12565360638488684051,0.6694766,0.39144436569161134,xipQ93429ksjNcXPX5326VSg1xJZcW
+e,5,64,-26526,1689098844,8950618259486183091,224,45253,662099130,16127995415060805595,0.2897315,0.5759450483859969,56MZa5O1hVtX4c5sbnCfxuX5kDChqI
+e,5,-86,32514,-467659022,-8012578250188146150,254,2684,2861911482,2126626171973341689,0.12559289,0.01479305307777301,gxfHWUF8XgY2KdFxigxvNEXe2V2XMl
+e,1,120,10837,-1331533190,6342019705133850847,245,3975,2830981072,16439861276703750332,0.6623719,0.9965400387585364,LiEBxds3X0Uw0lxiYjDqrkAaAwoiIW
+e,3,-95,13611,2030965207,927403809957470678,119,59134,559847112,10966649192992996919,0.5301289,0.047343434291126085,gTpyQnEODMcpsPnJMZC66gh33i3m0b
+e,4,30,-16110,61035129,-3356533792537910152,159,299,28774375,13526465947516666293,0.6999775,0.03968347085780355,cq4WSAIFwx3wwTUS5bp1wCe71R6U5I
diff --git a/datafusion/core/tests/sqllogictests/test_files/explain.slt b/datafusion/core/tests/sqllogictests/test_files/explain.slt
index d230286adcb9..1032b9c7f992 100644
--- a/datafusion/core/tests/sqllogictests/test_files/explain.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/explain.slt
@@ -70,7 +70,7 @@ CREATE EXTERNAL TABLE aggregate_test_100_with_order (
 STORED AS CSV
 WITH HEADER ROW
 WITH ORDER (c1 ASC)
-LOCATION '../../testing/data/csv/aggregate_test_100.csv';
+LOCATION 'tests/data/aggregate_test_100_order_by_c1_asc.csv';
 
 query TT
 explain SELECT c1 FROM aggregate_test_100_with_order order by c1 ASC limit 10
@@ -81,7 +81,7 @@ Limit: skip=0, fetch=10
 ----TableScan: aggregate_test_100_with_order projection=[c1]
 physical_plan
 GlobalLimitExec: skip=0, fetch=10
---CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
+--CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_test_100_order_by_c1_asc.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true
 
 
 ## explain_physical_plan_only

From 5705b3ab523fc4f169b17ef11e4da34a5b92c338 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 6 Jul 2023 00:20:22 -0700
Subject: [PATCH 68/89] Return error when internal multiplication overflowing
 in decimal division kernel (#6833)

---
 .../core/tests/sqllogictests/test_files/decimal.slt      | 9 +++++++++
 .../src/expressions/binary/kernels_arrow.rs              | 8 ++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/decimal.slt b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
index fd4e80e1afe4..f41351774172 100644
--- a/datafusion/core/tests/sqllogictests/test_files/decimal.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/decimal.slt
@@ -603,3 +603,12 @@ query R
 select try_cast(1234567 as decimal(7,3));
 ----
 NULL
+
+statement ok
+create table foo (a DECIMAL(38, 20), b DECIMAL(38, 0));
+
+statement ok
+insert into foo VALUES (1, 5);
+
+query error DataFusion error: Arrow error: Compute error: Overflow happened on: 100000000000000000000 \* 100000000000000000000000000000000000000
+select a / b from foo;
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
index e7d7f62c86d2..9c6645c30371 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs
@@ -20,8 +20,8 @@
 
 use arrow::compute::{
     add_dyn, add_scalar_dyn, divide_dyn_checked, divide_scalar_dyn, modulus_dyn,
-    modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_dyn, subtract_dyn,
-    subtract_scalar_dyn, try_unary,
+    modulus_scalar_dyn, multiply_fixed_point, multiply_scalar_checked_dyn,
+    multiply_scalar_dyn, subtract_dyn, subtract_scalar_dyn, try_unary,
 };
 use arrow::datatypes::{Date32Type, Date64Type, Decimal128Type};
 use arrow::{array::*, datatypes::ArrowNumericType};
@@ -662,7 +662,7 @@ pub(crate) fn divide_decimal_dyn_scalar(
     let (precision, scale) = get_precision_scale(result_type)?;
 
     let mul = 10_i128.pow(scale as u32);
-    let array = multiply_scalar_dyn::<Decimal128Type>(left, mul)?;
+    let array = multiply_scalar_checked_dyn::<Decimal128Type>(left, mul)?;
 
     let array = divide_scalar_dyn::<Decimal128Type>(&array, right)?;
     decimal_array_with_precision_scale(array, precision, scale)
@@ -719,7 +719,7 @@ pub(crate) fn divide_dyn_checked_decimal(
     let (precision, scale) = get_precision_scale(result_type)?;
 
     let mul = 10_i128.pow(scale as u32);
-    let array = multiply_scalar_dyn::<Decimal128Type>(left, mul)?;
+    let array = multiply_scalar_checked_dyn::<Decimal128Type>(left, mul)?;
 
     // Restore to original precision and scale (metadata only)
     let (org_precision, org_scale) = get_precision_scale(right.data_type())?;

From e324e9f72f138e2cdf27d2762593fe7ef7c16cd7 Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
Date: Thu, 6 Jul 2023 14:18:41 +0100
Subject: [PATCH 69/89] Deprecate ScalarValue::and, ScalarValue::or (#6842)
 (#6844)

* Deprecate ScalarValue::and, ScalarValue::or (#6842)

* Review feedback
---
 datafusion/common/src/scalar.rs               |  2 +
 .../src/aggregate/bool_and_or.rs              | 65 ++++++++-----------
 2 files changed, 28 insertions(+), 39 deletions(-)

diff --git a/datafusion/common/src/scalar.rs b/datafusion/common/src/scalar.rs
index b0769df1e9db..2011247346e0 100644
--- a/datafusion/common/src/scalar.rs
+++ b/datafusion/common/src/scalar.rs
@@ -2109,11 +2109,13 @@ impl ScalarValue {
         impl_checked_op!(self, rhs, checked_sub, -)
     }
 
+    #[deprecated(note = "Use arrow kernels or specialization (#6842)")]
     pub fn and<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
         let rhs = other.borrow();
         impl_op!(self, rhs, &&)
     }
 
+    #[deprecated(note = "Use arrow kernels or specialization (#6842)")]
     pub fn or<T: Borrow<ScalarValue>>(&self, other: T) -> Result<ScalarValue> {
         let rhs = other.borrow();
         impl_op!(self, rhs, ||)
diff --git a/datafusion/physical-expr/src/aggregate/bool_and_or.rs b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
index bbab4dfce660..e444dc61ee1b 100644
--- a/datafusion/physical-expr/src/aggregate/bool_and_or.rs
+++ b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
@@ -18,7 +18,6 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use std::any::Any;
-use std::convert::TryFrom;
 use std::sync::Arc;
 
 use crate::{AggregateExpr, PhysicalExpr};
@@ -161,7 +160,7 @@ impl AggregateExpr for BoolAnd {
     }
 
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(BoolAndAccumulator::try_new(&self.data_type)?))
+        Ok(Box::<BoolAndAccumulator>::default())
     }
 
     fn state_fields(&self) -> Result<Vec<Field>> {
@@ -199,7 +198,7 @@ impl AggregateExpr for BoolAnd {
     }
 
     fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(BoolAndAccumulator::try_new(&self.data_type)?))
+        Ok(Box::<BoolAndAccumulator>::default())
     }
 }
 
@@ -217,25 +216,20 @@ impl PartialEq<dyn Any> for BoolAnd {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Default)]
 struct BoolAndAccumulator {
-    bool_and: ScalarValue,
-}
-
-impl BoolAndAccumulator {
-    /// new bool_and accumulator
-    pub fn try_new(data_type: &DataType) -> Result<Self> {
-        Ok(Self {
-            bool_and: ScalarValue::try_from(data_type)?,
-        })
-    }
+    acc: Option<bool>,
 }
 
 impl Accumulator for BoolAndAccumulator {
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let values = &values[0];
-        let delta = &bool_and_batch(values)?;
-        self.bool_and = self.bool_and.and(delta)?;
+        self.acc = match (self.acc, bool_and_batch(values)?) {
+            (None, ScalarValue::Boolean(v)) => v,
+            (Some(v), ScalarValue::Boolean(None)) => Some(v),
+            (Some(a), ScalarValue::Boolean(Some(b))) => Some(a && b),
+            _ => unreachable!(),
+        };
         Ok(())
     }
 
@@ -244,16 +238,15 @@ impl Accumulator for BoolAndAccumulator {
     }
 
     fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.bool_and.clone()])
+        Ok(vec![ScalarValue::Boolean(self.acc)])
     }
 
     fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.bool_and.clone())
+        Ok(ScalarValue::Boolean(self.acc))
     }
 
     fn size(&self) -> usize {
-        std::mem::size_of_val(self) - std::mem::size_of_val(&self.bool_and)
-            + self.bool_and.size()
+        std::mem::size_of_val(self)
     }
 }
 
@@ -355,7 +348,7 @@ impl AggregateExpr for BoolOr {
     }
 
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(BoolOrAccumulator::try_new(&self.data_type)?))
+        Ok(Box::<BoolOrAccumulator>::default())
     }
 
     fn state_fields(&self) -> Result<Vec<Field>> {
@@ -393,7 +386,7 @@ impl AggregateExpr for BoolOr {
     }
 
     fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(BoolOrAccumulator::try_new(&self.data_type)?))
+        Ok(Box::<BoolOrAccumulator>::default())
     }
 }
 
@@ -411,29 +404,24 @@ impl PartialEq<dyn Any> for BoolOr {
     }
 }
 
-#[derive(Debug)]
+#[derive(Debug, Default)]
 struct BoolOrAccumulator {
-    bool_or: ScalarValue,
-}
-
-impl BoolOrAccumulator {
-    /// new bool_or accumulator
-    pub fn try_new(data_type: &DataType) -> Result<Self> {
-        Ok(Self {
-            bool_or: ScalarValue::try_from(data_type)?,
-        })
-    }
+    acc: Option<bool>,
 }
 
 impl Accumulator for BoolOrAccumulator {
     fn state(&self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.bool_or.clone()])
+        Ok(vec![ScalarValue::Boolean(self.acc)])
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
         let values = &values[0];
-        let delta = bool_or_batch(values)?;
-        self.bool_or = self.bool_or.or(&delta)?;
+        self.acc = match (self.acc, bool_or_batch(values)?) {
+            (None, ScalarValue::Boolean(v)) => v,
+            (Some(v), ScalarValue::Boolean(None)) => Some(v),
+            (Some(a), ScalarValue::Boolean(Some(b))) => Some(a || b),
+            _ => unreachable!(),
+        };
         Ok(())
     }
 
@@ -442,12 +430,11 @@ impl Accumulator for BoolOrAccumulator {
     }
 
     fn evaluate(&self) -> Result<ScalarValue> {
-        Ok(self.bool_or.clone())
+        Ok(ScalarValue::Boolean(self.acc))
     }
 
     fn size(&self) -> usize {
-        std::mem::size_of_val(self) - std::mem::size_of_val(&self.bool_or)
-            + self.bool_or.size()
+        std::mem::size_of_val(self)
     }
 }
 

From dec1b971f76aaad109101af52d214ab5abd347a9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 6 Jul 2023 11:15:08 -0400
Subject: [PATCH 70/89] chore(deps): update bigdecimal requirement from 0.3.0
 to 0.4.0 (#6848)

* chore(deps): update bigdecimal requirement from 0.3.0 to 0.4.0

Updates the requirements on [bigdecimal](https://github.com/akubera/bigdecimal-rs) to permit the latest version.
- [Commits](https://github.com/akubera/bigdecimal-rs/commits)

---
updated-dependencies:
- dependency-name: bigdecimal
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update tests for decimal rounding

* Update datafusion-cli

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion-cli/Cargo.lock                     | 243 +++++++-----------
 datafusion/core/Cargo.toml                    |   2 +-
 .../tests/sqllogictests/test_files/insert.slt |   2 +-
 .../tests/sqllogictests/test_files/window.slt |   4 +-
 4 files changed, 100 insertions(+), 151 deletions(-)

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 415c39b6d51c..72406c24c165 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -340,13 +340,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.68"
+version = "0.1.71"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
+checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -754,12 +754,11 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.5.0"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5"
+checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
 dependencies = [
  "memchr",
- "once_cell",
  "regex-automata",
  "serde",
 ]
@@ -970,9 +969,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
 
 [[package]]
 name = "cpufeatures"
-version = "0.2.8"
+version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03e69e28e9f7f77debdedbaafa2866e1de9ba56df55a8bd7cfc724c25a09987c"
+checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
 dependencies = [
  "libc",
 ]
@@ -1030,7 +1029,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eed5fff0d93c7559121e9c72bf9c242295869396255071ff2cb1617147b608c5"
 dependencies = [
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -1156,7 +1155,7 @@ dependencies = [
  "lazy_static",
  "sqlparser",
  "strum 0.25.0",
- "strum_macros 0.25.0",
+ "strum_macros 0.25.1",
 ]
 
 [[package]]
@@ -1343,7 +1342,7 @@ checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a"
 dependencies = [
  "errno-dragonfly",
  "libc",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1382,8 +1381,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5"
 dependencies = [
  "cfg-if",
- "rustix 0.38.1",
- "windows-sys 0.48.0",
+ "rustix 0.38.3",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1492,7 +1491,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -1627,9 +1626,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
+checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b"
 
 [[package]]
 name = "hex"
@@ -1727,13 +1726,14 @@ dependencies = [
 
 [[package]]
 name = "hyper-rustls"
-version = "0.24.0"
+version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7"
+checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97"
 dependencies = [
+ "futures-util",
  "http",
  "hyper",
- "rustls 0.21.2",
+ "rustls 0.21.3",
  "tokio",
  "tokio-rustls 0.24.1",
 ]
@@ -1812,9 +1812,9 @@ version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
 dependencies = [
- "hermit-abi 0.3.1",
+ "hermit-abi 0.3.2",
  "libc",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1843,9 +1843,9 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.6"
+version = "1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
+checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
 
 [[package]]
 name = "jobserver"
@@ -2063,7 +2063,7 @@ checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2"
 dependencies = [
  "libc",
  "wasi",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -2176,7 +2176,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.3.1",
+ "hermit-abi 0.3.2",
  "libc",
 ]
 
@@ -2320,9 +2320,9 @@ dependencies = [
 
 [[package]]
 name = "paste"
-version = "1.0.12"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f746c4065a8fa3fe23974dd82f15431cc8d40779821001404d10d2e79ca7d79"
+checksum = "b4b27ab7be369122c218afc2079489cdcb4b517c0a3fc386ff11e1fedfcc2b35"
 
 [[package]]
 name = "percent-encoding"
@@ -2395,7 +2395,7 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -2582,26 +2582,32 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.8.4"
+version = "1.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0ab3ca65655bb1e41f2a8c8cd662eb4fb035e67c3f78da1d61dffe89d07300f"
+checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
 dependencies = [
  "aho-corasick",
  "memchr",
+ "regex-automata",
  "regex-syntax",
 ]
 
 [[package]]
 name = "regex-automata"
-version = "0.1.10"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
+checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
 
 [[package]]
 name = "regex-syntax"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78"
+checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
 
 [[package]]
 name = "reqwest"
@@ -2618,7 +2624,7 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "hyper-rustls 0.24.0",
+ "hyper-rustls 0.24.1",
  "ipnet",
  "js-sys",
  "log",
@@ -2626,7 +2632,7 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls 0.21.2",
+ "rustls 0.21.3",
  "rustls-pemfile",
  "serde",
  "serde_json",
@@ -2702,29 +2708,29 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.37.21"
+version = "0.37.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62f25693a73057a1b4cb56179dd3c7ea21a7c6c5ee7d85781f5749b46f34b79c"
+checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06"
 dependencies = [
  "bitflags 1.3.2",
  "errno",
  "io-lifetimes",
  "libc",
  "linux-raw-sys 0.3.8",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
 name = "rustix"
-version = "0.38.1"
+version = "0.38.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbc6396159432b5c8490d4e301d8c705f61860b8b6c863bf79942ce5401968f3"
+checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4"
 dependencies = [
  "bitflags 2.3.3",
  "errno",
  "libc",
  "linux-raw-sys 0.4.3",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -2741,9 +2747,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.21.2"
+version = "0.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e32ca28af694bc1bbf399c33a516dbdf1c90090b8ab23c2bc24f834aa2247f5f"
+checksum = "b19faa85ecb5197342b54f987b142fb3e30d0c90da40f80ef4fa9a726e6676ed"
 dependencies = [
  "log",
  "ring",
@@ -2774,9 +2780,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.100.1"
+version = "0.101.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b"
+checksum = "89efed4bd0af2a8de0feb22ba38030244c93db56112b8aa67d27022286852b1c"
 dependencies = [
  "ring",
  "untrusted",
@@ -2784,9 +2790,9 @@ dependencies = [
 
 [[package]]
 name = "rustversion"
-version = "1.0.12"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
+checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f"
 
 [[package]]
 name = "rustyline"
@@ -2813,9 +2819,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
+checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
 
 [[package]]
 name = "same-file"
@@ -2828,11 +2834,11 @@ dependencies = [
 
 [[package]]
 name = "schannel"
-version = "0.1.21"
+version = "0.1.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
+checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88"
 dependencies = [
- "windows-sys 0.42.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -2882,35 +2888,35 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
 
 [[package]]
 name = "seq-macro"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b44e8fc93a14e66336d230954dda83d18b4605ccace8fe09bc7514a71ad0bc"
+checksum = "63134939175b3131fe4d2c131b103fd42f25ccca89423d43b5e4f267920ccf03"
 
 [[package]]
 name = "serde"
-version = "1.0.164"
+version = "1.0.166"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
+checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.164"
+version = "1.0.166"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9735b638ccc51c28bf6914d90a2e9725b377144fc612c49a611fddd1b631d68"
+checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.99"
+version = "1.0.100"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3"
+checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
 dependencies = [
  "itoa",
  "ryu",
@@ -3056,7 +3062,7 @@ version = "0.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
 dependencies = [
- "strum_macros 0.25.0",
+ "strum_macros 0.25.1",
 ]
 
 [[package]]
@@ -3074,15 +3080,15 @@ dependencies = [
 
 [[package]]
 name = "strum_macros"
-version = "0.25.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe9f3bd7d2e45dcc5e265fbb88d6513e4747d8ef9444cf01a533119bce28a157"
+checksum = "6069ca09d878a33f883cc06aaa9718ede171841d3832450354410b718b097232"
 dependencies = [
  "heck",
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -3104,9 +3110,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.22"
+version = "2.0.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2efbeae7acf4eabd6bcdcbd11c92f45231ddda7539edc7806bd1a04a03b24616"
+checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3123,8 +3129,8 @@ dependencies = [
  "cfg-if",
  "fastrand",
  "redox_syscall 0.3.5",
- "rustix 0.37.21",
- "windows-sys 0.48.0",
+ "rustix 0.37.23",
+ "windows-sys",
 ]
 
 [[package]]
@@ -3150,22 +3156,22 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
 
 [[package]]
 name = "thiserror"
-version = "1.0.40"
+version = "1.0.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
+checksum = "c16a64ba9387ef3fdae4f9c1a7f07a0997fce91985c0336f1ddc1822b3b37802"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.40"
+version = "1.0.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
+checksum = "d14928354b01c4d6a4f0e549069adef399a284e7995c7ccca94e8a07a5346c59"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -3245,7 +3251,7 @@ dependencies = [
  "pin-project-lite",
  "socket2",
  "tokio-macros",
- "windows-sys 0.48.0",
+ "windows-sys",
 ]
 
 [[package]]
@@ -3256,7 +3262,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -3276,7 +3282,7 @@ version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
- "rustls 0.21.2",
+ "rustls 0.21.3",
  "tokio",
 ]
 
@@ -3354,7 +3360,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
 ]
 
 [[package]]
@@ -3396,9 +3402,9 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.9"
+version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0"
+checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
 
 [[package]]
 name = "unicode-normalization"
@@ -3526,7 +3532,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
  "wasm-bindgen-shared",
 ]
 
@@ -3560,7 +3566,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.22",
+ "syn 2.0.23",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3653,21 +3659,6 @@ dependencies = [
  "windows-targets",
 ]
 
-[[package]]
-name = "windows-sys"
-version = "0.42.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7"
-dependencies = [
- "windows_aarch64_gnullvm 0.42.2",
- "windows_aarch64_msvc 0.42.2",
- "windows_i686_gnu 0.42.2",
- "windows_i686_msvc 0.42.2",
- "windows_x86_64_gnu 0.42.2",
- "windows_x86_64_gnullvm 0.42.2",
- "windows_x86_64_msvc 0.42.2",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
@@ -3683,93 +3674,51 @@ version = "0.48.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f"
 dependencies = [
- "windows_aarch64_gnullvm 0.48.0",
- "windows_aarch64_msvc 0.48.0",
- "windows_i686_gnu 0.48.0",
- "windows_i686_msvc 0.48.0",
- "windows_x86_64_gnu 0.48.0",
- "windows_x86_64_gnullvm 0.48.0",
- "windows_x86_64_msvc 0.48.0",
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
 ]
 
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8"
-
 [[package]]
 name = "windows_aarch64_gnullvm"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
 
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43"
-
 [[package]]
 name = "windows_aarch64_msvc"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
 
-[[package]]
-name = "windows_i686_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f"
-
 [[package]]
 name = "windows_i686_gnu"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
 
-[[package]]
-name = "windows_i686_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060"
-
 [[package]]
 name = "windows_i686_msvc"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
 
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36"
-
 [[package]]
 name = "windows_x86_64_gnu"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
 
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3"
-
 [[package]]
 name = "windows_x86_64_gnullvm"
 version = "0.48.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
 
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.42.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0"
-
 [[package]]
 name = "windows_x86_64_msvc"
 version = "0.48.0"
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 1cde72911218..5bcb8bc594ff 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -98,7 +98,7 @@ zstd = { version = "0.12", optional = true, default-features = false }
 
 [dev-dependencies]
 async-trait = "0.1.53"
-bigdecimal = "0.3.0"
+bigdecimal = "0.4.0"
 criterion = { version = "0.5", features = ["async_tokio"] }
 csv = "1.1.6"
 ctor = "0.2.0"
diff --git a/datafusion/core/tests/sqllogictests/test_files/insert.slt b/datafusion/core/tests/sqllogictests/test_files/insert.slt
index faa519834c6f..9f4122ac5ba9 100644
--- a/datafusion/core/tests/sqllogictests/test_files/insert.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/insert.slt
@@ -299,4 +299,4 @@ select * from table_without_values;
 2 NULL
 
 statement ok
-drop table table_without_values;
\ No newline at end of file
+drop table table_without_values;
diff --git a/datafusion/core/tests/sqllogictests/test_files/window.slt b/datafusion/core/tests/sqllogictests/test_files/window.slt
index 09339d7e499f..c4d745f8f190 100644
--- a/datafusion/core/tests/sqllogictests/test_files/window.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/window.slt
@@ -448,7 +448,7 @@ ORDER BY c9
 LIMIT 5
 ----
 -48302 -16100.666666666666 3
-11243 3747.666666666667 3
+11243 3747.666666666666 3
 -51311 -17103.666666666668 3
 -2391 -797 3
 46756 15585.333333333334 3
@@ -468,7 +468,7 @@ LIMIT 5
 46721.33333333174 31147.555555554496 216.151181660734 176.486700789477
 2639429.333333332 1759619.5555555548 1624.632060908971 1326.50652299774
 746202.3333333324 497468.2222222216 863.830037295146 705.314271954156
-768422.9999999981 512281.9999999988 876.597399037893 715.738779164577
+768422.9999999981 512281.9999999988 876.597399037892 715.738779164577
 66526.3333333288 44350.88888888587 257.926992254259 210.596507304575
 
 # window_frame_rows_preceding_with_partition_unique_order_by

From 49fc6c1331c5a747c1e1d413f01f36ab47d94649 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 6 Jul 2023 16:56:00 -0400
Subject: [PATCH 71/89] Update tests, and fix memory accounting

---
 .../core/src/physical_plan/aggregates/mod.rs  |  4 +-
 .../src/physical_plan/aggregates/row_hash2.rs | 38 +++++++++++--------
 .../physical-expr/src/aggregate/count.rs      | 11 +++++-
 3 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/mod.rs b/datafusion/core/src/physical_plan/aggregates/mod.rs
index e086b545b885..e2f37faf396e 100644
--- a/datafusion/core/src/physical_plan/aggregates/mod.rs
+++ b/datafusion/core/src/physical_plan/aggregates/mod.rs
@@ -1785,10 +1785,10 @@ mod tests {
                     assert!(matches!(stream, StreamType::AggregateStream(_)));
                 }
                 1 => {
-                    assert!(matches!(stream, StreamType::GroupedHashAggregateStream(_)));
+                    assert!(matches!(stream, StreamType::GroupedHashAggregateStream2(_)));
                 }
                 2 => {
-                    assert!(matches!(stream, StreamType::GroupedHashAggregateStream(_)));
+                    assert!(matches!(stream, StreamType::GroupedHashAggregateStream2(_)));
                 }
                 _ => panic!("Unknown version: {version}"),
             }
diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 41e713672fce..b34ae7f1a407 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -43,7 +43,7 @@ use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::Result;
-use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
+use datafusion_execution::memory_pool::proxy::RawTableAllocExt;
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use hashbrown::raw::RawTable;
@@ -239,7 +239,7 @@ impl GroupedHashAggregateStream2 {
         let name = format!("GroupedHashAggregateStream2[{partition}]");
         let reservation = MemoryConsumer::new(name).register(context.memory_pool());
         let map = RawTable::with_capacity(0);
-        let group_by_values = row_converter.empty_rows(0, 0);
+        let group_values = row_converter.empty_rows(0, 0);
         let current_group_indices = vec![];
 
         timer.done();
@@ -257,7 +257,7 @@ impl GroupedHashAggregateStream2 {
             group_by: agg_group_by,
             reservation,
             map,
-            group_values: group_by_values,
+            group_values,
             current_group_indices,
             exec_state,
             baseline_metrics,
@@ -302,9 +302,13 @@ impl Stream for GroupedHashAggregateStream2 {
                             let result = self.group_aggregate_batch(batch);
                             timer.done();
 
-                            // allocate memory
-                            // This happens AFTER we actually used the memory, but simplifies the whole accounting and we are OK with
-                            // overshooting a bit. Also this means we either store the whole record batch or not.
+                            // allocate memory AFTER we actually used
+                            // the memory, which simplifies the whole
+                            // accounting and we are OK with
+                            // overshooting a bit.
+                            //
+                            // Also this means we either store the
+                            // whole record batch or not.
                             let result = result.and_then(|allocated| {
                                 self.reservation.try_grow(allocated)
                             });
@@ -364,8 +368,8 @@ impl GroupedHashAggregateStream2 {
     /// `group_values`.
     ///
     /// At the return of this function,
-    /// [`Self::current_group_indices`] has the same number of
-    /// entries as each array in `group_values` and holds the correct
+    /// [`Self::current_group_indices`] has the same number of entries
+    /// as each array in `group_values` and holds the correct
     /// group_index for that row.
     fn update_group_state(
         &mut self,
@@ -376,13 +380,12 @@ impl GroupedHashAggregateStream2 {
         let group_rows = self.row_converter.convert_columns(group_values)?;
         let n_rows = group_rows.num_rows();
 
-        // 1.1 construct the key from the group values
-        // 1.2 construct the mapping key if it does not exist
-
         // tracks to which group each of the input rows belongs
         let group_indices = &mut self.current_group_indices;
         group_indices.clear();
 
+        let group_values_size_pre = self.group_values.size();
+
         // 1.1 Calculate the group keys for the group values
         let mut batch_hashes = vec![0; n_rows];
         create_hashes(group_values, &self.random_state, &mut batch_hashes)?;
@@ -392,10 +395,6 @@ impl GroupedHashAggregateStream2 {
                 // verify that a group that we are inserting with hash is
                 // actually the same key value as the group in
                 // existing_idx  (aka group_values @ row)
-
-                // TODO update *allocated based on size of the row
-                // that was just pushed into
-                // aggr_state.group_by_values
                 group_rows.row(row) == self.group_values.row(*group_idx)
             });
 
@@ -417,8 +416,15 @@ impl GroupedHashAggregateStream2 {
                     group_idx
                 }
             };
-            group_indices.push_accounted(group_idx, allocated);
+            group_indices.push(group_idx);
         }
+
+        // account for any memory increase used to store group_values
+        *allocated += self
+            .group_values
+            .size()
+            .saturating_sub(group_values_size_pre);
+
         Ok(())
     }
 
diff --git a/datafusion/physical-expr/src/aggregate/count.rs b/datafusion/physical-expr/src/aggregate/count.rs
index 1b1c12190746..37a756894c72 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -49,6 +49,10 @@ pub struct Count {
     name: String,
     data_type: DataType,
     nullable: bool,
+    /// Input exprs
+    ///
+    /// For `COUNT(c1)` this is `[c1]`
+    /// For `COUNT(c1, c2)` this is `[c1, c2]`
     exprs: Vec<Arc<dyn PhysicalExpr>>,
 }
 
@@ -89,7 +93,7 @@ impl Count {
 /// accumulator has no additional null or seen filter tracking.
 #[derive(Debug)]
 struct CountGroupsAccumulator {
-    /// Count per group (use u64 to make Int64Array)
+    /// Count per group (use i64 to make Int64Array)
     counts: Vec<i64>,
 }
 
@@ -242,7 +246,10 @@ impl AggregateExpr for Count {
     }
 
     fn groups_accumulator_supported(&self) -> bool {
-        true
+        // groups accumulator only supports `COUNT(c1)`, not
+        // `COUNT(c1, c2)`, etc
+        // TODO file a ticket to optimize
+        self.exprs.len() == 1
     }
 
     fn create_row_accumulator(

From b137df600e815f98818ce4c32051faa3b9ec4f4b Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 6 Jul 2023 17:25:24 -0400
Subject: [PATCH 72/89] fix doc comments

---
 datafusion/physical-expr/src/aggregate/min_max.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index b37c659c21c7..b03c452c0348 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -1186,7 +1186,7 @@ impl MinMax for i128 {
     }
 }
 
-/// An accumulator to compute the min or max of PrimitiveArray<T>.
+/// An accumulator to compute the min or max of [`PrimitiveArray<T>`].
 /// Stores values as native/primitive type
 #[derive(Debug)]
 struct MinMaxGroupsPrimitiveAccumulator<T, const MIN: bool>

From 1d3185c5979fdfb9b23fc9d432d597555045f3e8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Thu, 6 Jul 2023 17:33:05 -0400
Subject: [PATCH 73/89] add ticket referece

---
 datafusion/core/src/physical_plan/aggregates/row_hash2.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index b34ae7f1a407..404ac3aaf9e7 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -377,6 +377,7 @@ impl GroupedHashAggregateStream2 {
         allocated: &mut usize,
     ) -> Result<()> {
         // Convert the group keys into the row format
+        // Avoid reallocation when https://github.com/apache/arrow-rs/issues/4479 is available
         let group_rows = self.row_converter.convert_columns(group_values)?;
         let n_rows = group_rows.num_rows();
 

From d9cca240555d4bc3dec09945a1ed0c8e12ecdbf6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 04:51:03 -0400
Subject: [PATCH 74/89] Only make code for average types that can be
 instantiated

---
 .../physical-expr/src/aggregate/average.rs    | 48 ++++---------------
 1 file changed, 9 insertions(+), 39 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index c4bfbc145101..6081564ccdf1 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -35,10 +35,7 @@ use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::compute;
-use arrow::datatypes::{
-    DataType, Decimal128Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
-    Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
-};
+use arrow::datatypes::{DataType, Decimal128Type, Float64Type, UInt64Type};
 use arrow::{
     array::{ArrayRef, UInt64Array},
     datatypes::Field,
@@ -100,18 +97,6 @@ impl Avg {
     }
 }
 
-// Instantiates a [`AvgGroupsAccumulator`] for a given [`ArrowNativeType`]
-macro_rules! instantiate_accumulator {
-    ($SELF:expr, $NUMERICTYPE:ident) => {{
-        Ok(Box::new(AvgGroupsAccumulator::<$NUMERICTYPE, _>::new(
-            &$SELF.sum_data_type,
-            &$SELF.rt_data_type,
-            // TODO handle overflow (e.g. count as u8 can overflow for 400)
-            |sum, count| Ok(sum / count as <$NUMERICTYPE as ArrowPrimitiveType>::Native),
-        )))
-    }};
-}
-
 impl AggregateExpr for Avg {
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
@@ -182,35 +167,20 @@ impl AggregateExpr for Avg {
     fn groups_accumulator_supported(&self) -> bool {
         use DataType::*;
 
-        matches!(
-            &self.sum_data_type,
-            Int8 | Int16
-                | Int32
-                | Int64
-                | UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Float32
-                | Float64
-                | Decimal128(_, _)
-        )
+        matches!(&self.rt_data_type, Float64 | Decimal128(_, _))
     }
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
         // instantiate specialized accumulator based for the type
         match (&self.sum_data_type, &self.rt_data_type) {
-            (Int8, Int8) => instantiate_accumulator!(self, Int8Type),
-            (Int16, Int16) => instantiate_accumulator!(self, Int16Type),
-            (Int32, Int32) => instantiate_accumulator!(self, Int32Type),
-            (Int64, Int64) => instantiate_accumulator!(self, Int64Type),
-            (UInt8, UInt8) => instantiate_accumulator!(self, UInt8Type),
-            (UInt16, UInt16) => instantiate_accumulator!(self, UInt16Type),
-            (UInt32, UInt32) => instantiate_accumulator!(self, UInt32Type),
-            (UInt64, UInt64) => instantiate_accumulator!(self, UInt64Type),
-            (Float32, Float32) => instantiate_accumulator!(self, Float32Type),
-            (Float64, Float64) => instantiate_accumulator!(self, Float64Type),
+            (Float64, Float64) => {
+                Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
+                    &self.sum_data_type,
+                    &self.rt_data_type,
+                    |sum: f64, count: u64| Ok(sum / count as f64),
+                )))
+            }
             (
                 Decimal128(_sum_precision, sum_scale),
                 Decimal128(target_precision, target_scale),

From c68c39b817d8d351227ed09bb908102780032d55 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 04:51:35 -0400
Subject: [PATCH 75/89] Improve aggregate_fuzz output

---
 .../core/tests/fuzz_cases/aggregate_fuzz.rs   | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 74370049e81f..8b1da3457083 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -28,8 +28,8 @@ use datafusion::physical_plan::aggregates::{
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 
-use datafusion::physical_plan::collect;
 use datafusion::physical_plan::memory::MemoryExec;
+use datafusion::physical_plan::{collect, displayable, ExecutionPlan};
 use datafusion::prelude::{SessionConfig, SessionContext};
 use datafusion_physical_expr::expressions::{col, Sum};
 use datafusion_physical_expr::{AggregateExpr, PhysicalSortExpr};
@@ -118,7 +118,7 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
             schema.clone(),
         )
         .unwrap(),
-    ) as _;
+    ) as Arc<dyn ExecutionPlan>;
 
     let aggregate_exec_usual = Arc::new(
         AggregateExec::try_new(
@@ -131,14 +131,14 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
             schema.clone(),
         )
         .unwrap(),
-    ) as _;
+    ) as Arc<dyn ExecutionPlan>;
 
     let task_ctx = ctx.task_ctx();
-    let collected_usual = collect(aggregate_exec_usual, task_ctx.clone())
+    let collected_usual = collect(aggregate_exec_usual.clone(), task_ctx.clone())
         .await
         .unwrap();
 
-    let collected_running = collect(aggregate_exec_running, task_ctx.clone())
+    let collected_running = collect(aggregate_exec_running.clone(), task_ctx.clone())
         .await
         .unwrap();
     assert!(collected_running.len() > 2);
@@ -162,7 +162,23 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
         .zip(&running_formatted_sorted)
         .enumerate()
     {
-        assert_eq!((i, usual_line), (i, running_line), "Inconsistent result");
+        assert_eq!(
+            (i, usual_line),
+            (i, running_line),
+            "Inconsistent result\n\n\
+             Left Plan:\n{}\n\
+             Right Plan:\n{}\n\
+             schema:\n{schema}\n\
+             Left Ouptut:\n{}\n\
+             Right Output:\n{}\n\
+             input:\n{}\n\
+             ",
+            displayable(aggregate_exec_usual.as_ref()).indent(false),
+            displayable(aggregate_exec_running.as_ref()).indent(false),
+            usual_formatted,
+            running_formatted,
+            pretty_format_batches(&input1).unwrap(),
+        );
     }
 }
 

From 012791704409a349ae2baeaab07ec3d6f9fcecc3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 05:11:19 -0400
Subject: [PATCH 76/89] Fix fuzz tests by emulating retractable batch

---
 .../core/tests/fuzz_cases/aggregate_fuzz.rs    |  4 ++++
 datafusion/physical-expr/src/aggregate/sum.rs  | 18 +++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
index 8b1da3457083..74dd9ee1d13e 100644
--- a/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
@@ -107,6 +107,10 @@ async fn run_aggregate_test(input1: Vec<RecordBatch>, group_by_columns: Vec<&str
         .map(|elem| (col(elem, &schema).unwrap(), elem.to_string()))
         .collect::<Vec<_>>();
     let group_by = PhysicalGroupBy::new_single(expr);
+
+    println!("aggregate_expr: {aggregate_expr:?}");
+    println!("group_by: {group_by:?}");
+
     let aggregate_exec_running = Arc::new(
         AggregateExec::try_new(
             AggregateMode::Partial,
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 91e4211bbf2f..1fe1ab53cff0 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -38,6 +38,7 @@ use arrow_array::types::{
     UInt64Type,
 };
 use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
+use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 use log::debug;
@@ -516,6 +517,13 @@ where
     }
 }
 
+/// Create a buffer of len elements, representing all NULL values
+fn make_all_nulls(len: usize) -> NullBuffer {
+    let mut nulls = BooleanBufferBuilder::new(len);
+    nulls.append_n(len, false);
+    NullBuffer::new(nulls.finish())
+}
+
 impl<T> GroupsAccumulator for SumGroupsAccumulator<T>
 where
     T: ArrowNumericType + Send,
@@ -593,14 +601,18 @@ where
         let nulls = self.null_state.build();
 
         let sums = std::mem::take(&mut self.sums);
-        let sums = Arc::new(PrimitiveArray::<T>::new(sums.into(), nulls.clone())); // zero copy
+        let sums = Arc::new(PrimitiveArray::<T>::new(sums.into(), nulls));
 
         let sums = adjust_output_array(&self.sum_data_type, sums)?;
 
+        // TODO File a ticket: Sum expects sum/count array, but count
+        // is only needed for retractable aggregates. We could improve
+        // performance by only including it when needed.
         let counts = vec![0_u64; sums.len()];
-        let counts = Arc::new(PrimitiveArray::<UInt64Type>::new(counts.into(), nulls));
+        let all_nulls = Some(make_all_nulls(sums.len()));
+        let counts =
+            Arc::new(PrimitiveArray::<UInt64Type>::new(counts.into(), all_nulls));
 
-        // TODO: Sum expects sum/count array, but count is not needed
         Ok(vec![sums.clone() as ArrayRef, counts as ArrayRef])
     }
 

From e36a972e3ab8b2d074f872d80898afb91b286c0e Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 05:39:34 -0400
Subject: [PATCH 77/89] Fix and simplify min/max

---
 .../physical-expr/src/aggregate/min_max.rs    | 155 ++++++++++--------
 datafusion/physical-expr/src/aggregate/sum.rs |   1 +
 .../physical-expr/src/aggregate/utils.rs      |   6 +-
 3 files changed, 90 insertions(+), 72 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/min_max.rs b/datafusion/physical-expr/src/aggregate/min_max.rs
index b03c452c0348..914299bcedbd 100644
--- a/datafusion/physical-expr/src/aggregate/min_max.rs
+++ b/datafusion/physical-expr/src/aggregate/min_max.rs
@@ -37,7 +37,8 @@ use arrow::{
 };
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    Decimal128Type, Float32Type, Float64Type, UInt32Type, UInt64Type,
+    Decimal128Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
+    UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_array::{ArrowNumericType, PrimitiveArray};
 use datafusion_common::ScalarValue;
@@ -95,6 +96,15 @@ impl Max {
     }
 }
 
+macro_rules! instantiate_min_max_accumulator {
+    ($SELF:expr, $NUMERICTYPE:ident, $MIN:expr) => {{
+        Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
+            $NUMERICTYPE,
+            $MIN,
+        >::new(&$SELF.data_type)))
+    }};
+}
+
 impl AggregateExpr for Max {
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
@@ -149,37 +159,26 @@ impl AggregateExpr for Max {
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         match self.data_type {
-            DataType::UInt32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                UInt32Type,
-                false,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::UInt64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                UInt64Type,
-                false,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::Float32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                Float32Type,
-                false,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::Float64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                Float64Type,
-                false,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
+            DataType::Int8 => instantiate_min_max_accumulator!(self, Int8Type, false),
+            DataType::Int16 => instantiate_min_max_accumulator!(self, Int16Type, false),
+            DataType::Int32 => instantiate_min_max_accumulator!(self, Int32Type, false),
+            DataType::Int64 => instantiate_min_max_accumulator!(self, Int64Type, false),
+            DataType::UInt8 => instantiate_min_max_accumulator!(self, UInt8Type, false),
+            DataType::UInt16 => instantiate_min_max_accumulator!(self, UInt16Type, false),
+            DataType::UInt32 => instantiate_min_max_accumulator!(self, UInt32Type, false),
+            DataType::UInt64 => instantiate_min_max_accumulator!(self, UInt64Type, false),
+            DataType::Float32 => {
+                instantiate_min_max_accumulator!(self, Float32Type, false)
+            }
+            DataType::Float64 => {
+                instantiate_min_max_accumulator!(self, Float64Type, false)
+            }
+
             DataType::Decimal128(_, _) => {
                 Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
                     Decimal128Type,
                     false,
-                >::new(
-                    &self.data_type, &self.data_type
-                )))
+                >::new(&self.data_type)))
             }
             _ => Err(DataFusionError::NotImplemented(format!(
                 "MinMaxGroupsPrimitiveAccumulator not supported for {}",
@@ -898,37 +897,26 @@ impl AggregateExpr for Min {
 
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         match self.data_type {
-            DataType::UInt32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                UInt32Type,
-                true,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::UInt64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                UInt64Type,
-                true,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::Float32 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                Float32Type,
-                true,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
-            DataType::Float64 => Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
-                Float64Type,
-                true,
-            >::new(
-                &self.data_type, &self.data_type
-            ))),
+            DataType::Int8 => instantiate_min_max_accumulator!(self, Int8Type, true),
+            DataType::Int16 => instantiate_min_max_accumulator!(self, Int16Type, true),
+            DataType::Int32 => instantiate_min_max_accumulator!(self, Int32Type, true),
+            DataType::Int64 => instantiate_min_max_accumulator!(self, Int64Type, true),
+            DataType::UInt8 => instantiate_min_max_accumulator!(self, UInt8Type, true),
+            DataType::UInt16 => instantiate_min_max_accumulator!(self, UInt16Type, true),
+            DataType::UInt32 => instantiate_min_max_accumulator!(self, UInt32Type, true),
+            DataType::UInt64 => instantiate_min_max_accumulator!(self, UInt64Type, true),
+            DataType::Float32 => {
+                instantiate_min_max_accumulator!(self, Float32Type, true)
+            }
+            DataType::Float64 => {
+                instantiate_min_max_accumulator!(self, Float64Type, true)
+            }
+
             DataType::Decimal128(_, _) => {
                 Ok(Box::new(MinMaxGroupsPrimitiveAccumulator::<
                     Decimal128Type,
                     true,
-                >::new(
-                    &self.data_type, &self.data_type
-                )))
+                >::new(&self.data_type)))
             }
             _ => Err(DataFusionError::NotImplemented(format!(
                 "MinMaxGroupsPrimitiveAccumulator not supported for {}",
@@ -1129,6 +1117,38 @@ trait MinMax {
     fn max() -> Self;
 }
 
+impl MinMax for u8 {
+    fn min() -> Self {
+        u8::MIN
+    }
+    fn max() -> Self {
+        u8::MAX
+    }
+}
+impl MinMax for i8 {
+    fn min() -> Self {
+        i8::MIN
+    }
+    fn max() -> Self {
+        i8::MAX
+    }
+}
+impl MinMax for u16 {
+    fn min() -> Self {
+        u16::MIN
+    }
+    fn max() -> Self {
+        u16::MAX
+    }
+}
+impl MinMax for i16 {
+    fn min() -> Self {
+        i16::MIN
+    }
+    fn max() -> Self {
+        i16::MAX
+    }
+}
 impl MinMax for u32 {
     fn min() -> Self {
         u32::MIN
@@ -1194,17 +1214,14 @@ where
     T: ArrowNumericType + Send,
     T::Native: MinMax,
 {
-    /// The type of the computed min/max
-    min_max_data_type: DataType,
-
-    /// The type of the returned min/max
-    return_data_type: DataType,
-
     /// Min/max per group, stored as the native type
     min_max: Vec<T::Native>,
 
     /// Track nulls in the input / filters
     null_state: NullState,
+
+    /// The output datatype (needed for decimal precision/scale)
+    data_type: DataType,
 }
 
 impl<T, const MIN: bool> MinMaxGroupsPrimitiveAccumulator<T, MIN>
@@ -1212,17 +1229,17 @@ where
     T: ArrowNumericType + Send,
     T::Native: MinMax,
 {
-    pub fn new(min_max_data_type: &DataType, return_data_type: &DataType) -> Self {
+    pub fn new(data_type: &DataType) -> Self {
         debug!(
-            "MinMaxGroupsPrimitiveAccumulator ({}, min/max type: {min_max_data_type:?}) --> {return_data_type:?}",
-            std::any::type_name::<T>()
+            "MinMaxGroupsPrimitiveAccumulator ({}, {})",
+            std::any::type_name::<T>(),
+            MIN,
         );
 
         Self {
-            return_data_type: return_data_type.clone(),
-            min_max_data_type: min_max_data_type.clone(),
             min_max: vec![],
             null_state: NullState::new(),
+            data_type: data_type.clone(),
         }
     }
 }
@@ -1291,7 +1308,7 @@ where
         let nulls = self.null_state.build();
 
         let min_max = PrimitiveArray::<T>::new(min_max.into(), nulls); // no copy
-        let min_max = adjust_output_array(&self.return_data_type, Arc::new(min_max))?;
+        let min_max = adjust_output_array(&self.data_type, Arc::new(min_max))?;
 
         Ok(Arc::new(min_max))
     }
@@ -1301,9 +1318,9 @@ where
         let nulls = self.null_state.build();
 
         let min_max = std::mem::take(&mut self.min_max);
-        let min_max = Arc::new(PrimitiveArray::<T>::new(min_max.into(), nulls)); // zero copy
+        let min_max = PrimitiveArray::<T>::new(min_max.into(), nulls); // zero copy
 
-        let min_max = adjust_output_array(&self.min_max_data_type, min_max)?;
+        let min_max = adjust_output_array(&self.data_type, Arc::new(min_max))?;
 
         Ok(vec![min_max])
     }
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 1fe1ab53cff0..a3b576158139 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -608,6 +608,7 @@ where
         // TODO File a ticket: Sum expects sum/count array, but count
         // is only needed for retractable aggregates. We could improve
         // performance by only including it when needed.
+        // https://github.com/apache/arrow-datafusion/issues/6878
         let counts = vec![0_u64; sums.len()];
         let all_nulls = Some(make_all_nulls(sums.len()));
         let counts =
diff --git a/datafusion/physical-expr/src/aggregate/utils.rs b/datafusion/physical-expr/src/aggregate/utils.rs
index 0cd0821e08b0..63587c925b43 100644
--- a/datafusion/physical-expr/src/aggregate/utils.rs
+++ b/datafusion/physical-expr/src/aggregate/utils.rs
@@ -151,12 +151,12 @@ pub fn calculate_result_decimal_for_avg(
 ///
 /// Since `Decimal128Arrays` created from `Vec<NativeType>` have
 /// default precision and scale, this function adjusts the output to
-/// match `sum_data_type`.
+/// match `data_type`.
 pub fn adjust_output_array(
-    sum_data_type: &DataType,
+    data_type: &DataType,
     array: ArrayRef,
 ) -> Result<ArrayRef, DataFusionError> {
-    let array = match sum_data_type {
+    let array = match data_type {
         DataType::Decimal128(p, s) => Arc::new(
             array
                 .as_primitive::<Decimal128Type>()

From b6bde8df419c3838f4e7d6ec0199aeadc7221480 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 11:12:57 -0400
Subject: [PATCH 78/89] Improve memory accounting

---
 .../core/src/physical_plan/aggregates/row_hash2.rs     | 10 +++++++---
 datafusion/execution/src/memory_pool/proxy.rs          |  8 ++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
index 404ac3aaf9e7..335ce40754fa 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash2.rs
@@ -43,7 +43,7 @@ use crate::physical_plan::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::*;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::Result;
-use datafusion_execution::memory_pool::proxy::RawTableAllocExt;
+use datafusion_execution::memory_pool::proxy::{RawTableAllocExt, VecAllocExt};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use hashbrown::raw::RawTable;
@@ -383,11 +383,11 @@ impl GroupedHashAggregateStream2 {
 
         // tracks to which group each of the input rows belongs
         let group_indices = &mut self.current_group_indices;
-        group_indices.clear();
-
+        let group_indices_size_pre = group_indices.allocated_size();
         let group_values_size_pre = self.group_values.size();
 
         // 1.1 Calculate the group keys for the group values
+        group_indices.clear();
         let mut batch_hashes = vec![0; n_rows];
         create_hashes(group_values, &self.random_state, &mut batch_hashes)?;
 
@@ -420,6 +420,10 @@ impl GroupedHashAggregateStream2 {
             group_indices.push(group_idx);
         }
 
+        // memory growth in group_indieces
+        *allocated += group_indices.allocated_size();
+        *allocated -= group_indices_size_pre; // subtract after adding to avoid underflow
+
         // account for any memory increase used to store group_values
         *allocated += self
             .group_values
diff --git a/datafusion/execution/src/memory_pool/proxy.rs b/datafusion/execution/src/memory_pool/proxy.rs
index 43532f9a81f1..2bf485c6ee76 100644
--- a/datafusion/execution/src/memory_pool/proxy.rs
+++ b/datafusion/execution/src/memory_pool/proxy.rs
@@ -26,6 +26,11 @@ pub trait VecAllocExt {
 
     /// [Push](Vec::push) new element to vector and store additional allocated bytes in `accounting` (additive).
     fn push_accounted(&mut self, x: Self::T, accounting: &mut usize);
+
+    /// Return the amount of memory allocated by this Vec (not
+    /// recursively counting any heap allocations contained within the
+    /// structure). Does not include the size of `self`
+    fn allocated_size(&self) -> usize;
 }
 
 impl<T> VecAllocExt for Vec<T> {
@@ -44,6 +49,9 @@ impl<T> VecAllocExt for Vec<T> {
 
         self.push(x);
     }
+    fn allocated_size(&self) -> usize {
+        std::mem::size_of::<T>() * self.capacity()
+    }
 }
 
 /// Extension trait for [`RawTable`] to account for allocations.

From cb5b8cbbc77ee4bf8cbd8d10d851735292e19ed1 Mon Sep 17 00:00:00 2001
From: Renjie Liu <liurenjie2008@gmail.com>
Date: Fri, 7 Jul 2023 21:02:16 +0800
Subject: [PATCH 79/89] feat: Add graphviz display format for execution plan.
 (#6726)

* Implement graphviz format for execution plan

* Update cargo.lock

* fix ci

* fix test

* Fix comment

* Resolve conflicts with main
---
 datafusion/common/src/display/graphviz.rs     | 105 +++++++++++++++
 .../common/src/{display.rs => display/mod.rs} |   3 +
 datafusion/core/src/physical_plan/display.rs  | 127 ++++++++++++++++++
 .../core/src/physical_plan/streaming.rs       |   3 +-
 datafusion/core/src/physical_planner.rs       |  30 +++++
 datafusion/core/tests/sql/explain_analyze.rs  |  16 ++-
 datafusion/expr/src/logical_plan/display.rs   |  58 +++-----
 datafusion/expr/src/logical_plan/plan.rs      |  72 +++++-----
 8 files changed, 334 insertions(+), 80 deletions(-)
 create mode 100644 datafusion/common/src/display/graphviz.rs
 rename datafusion/common/src/{display.rs => display/mod.rs} (99%)

diff --git a/datafusion/common/src/display/graphviz.rs b/datafusion/common/src/display/graphviz.rs
new file mode 100644
index 000000000000..f84490cd3ea4
--- /dev/null
+++ b/datafusion/common/src/display/graphviz.rs
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Logic related to creating DOT language graphs.
+
+use std::fmt;
+
+#[derive(Default)]
+pub struct GraphvizBuilder {
+    id_gen: usize,
+}
+
+impl GraphvizBuilder {
+    // Generate next id in graphviz.
+    pub fn next_id(&mut self) -> usize {
+        self.id_gen += 1;
+        self.id_gen
+    }
+
+    // Write out the start of whole graph.
+    pub fn start_graph(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(
+            f,
+            r#"
+// Begin DataFusion GraphViz Plan,
+// display it online here: https://dreampuf.github.io/GraphvizOnline
+"#
+        )?;
+        writeln!(f, "digraph {{")
+    }
+
+    pub fn end_graph(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(f, "}}")?;
+        writeln!(f, "// End DataFusion GraphViz Plan")
+    }
+
+    // write out the start of the subgraph cluster
+    pub fn start_cluster(&mut self, f: &mut fmt::Formatter, title: &str) -> fmt::Result {
+        writeln!(f, "  subgraph cluster_{}", self.next_id())?;
+        writeln!(f, "  {{")?;
+        writeln!(f, "    graph[label={}]", Self::quoted(title))
+    }
+
+    // write out the end of the subgraph cluster
+    pub fn end_cluster(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(f, "  }}")
+    }
+
+    /// makes a quoted string suitable for inclusion in a graphviz chart
+    pub fn quoted(label: &str) -> String {
+        let label = label.replace('"', "_");
+        format!("\"{label}\"")
+    }
+
+    pub fn add_node(
+        &self,
+        f: &mut fmt::Formatter,
+        id: usize,
+        label: &str,
+        tooltip: Option<&str>,
+    ) -> fmt::Result {
+        if let Some(tooltip) = tooltip {
+            writeln!(
+                f,
+                "    {}[shape=box label={}, tooltip={}]",
+                id,
+                GraphvizBuilder::quoted(label),
+                GraphvizBuilder::quoted(tooltip),
+            )
+        } else {
+            writeln!(
+                f,
+                "    {}[shape=box label={}]",
+                id,
+                GraphvizBuilder::quoted(label),
+            )
+        }
+    }
+
+    pub fn add_edge(
+        &self,
+        f: &mut fmt::Formatter,
+        from_id: usize,
+        to_id: usize,
+    ) -> fmt::Result {
+        writeln!(
+            f,
+            "    {from_id} -> {to_id} [arrowhead=none, arrowtail=normal, dir=back]"
+        )
+    }
+}
diff --git a/datafusion/common/src/display.rs b/datafusion/common/src/display/mod.rs
similarity index 99%
rename from datafusion/common/src/display.rs
rename to datafusion/common/src/display/mod.rs
index 79de9bc031d6..766b37ce2891 100644
--- a/datafusion/common/src/display.rs
+++ b/datafusion/common/src/display/mod.rs
@@ -17,6 +17,9 @@
 
 //! Types for plan display
 
+mod graphviz;
+pub use graphviz::*;
+
 use std::{
     fmt::{self, Display, Formatter},
     sync::Arc,
diff --git a/datafusion/core/src/physical_plan/display.rs b/datafusion/core/src/physical_plan/display.rs
index 674cbcf2478b..63b4f0c28cd0 100644
--- a/datafusion/core/src/physical_plan/display.rs
+++ b/datafusion/core/src/physical_plan/display.rs
@@ -20,10 +20,12 @@
 //! format
 
 use std::fmt;
+use std::fmt::Formatter;
 
 use datafusion_common::display::StringifiedPlan;
 
 use super::{accept, ExecutionPlan, ExecutionPlanVisitor};
+use datafusion_common::display::GraphvizBuilder;
 
 /// Options for controlling how each [`ExecutionPlan`] should format itself
 #[derive(Debug, Clone, Copy)]
@@ -110,6 +112,49 @@ impl<'a> DisplayableExecutionPlan<'a> {
         }
     }
 
+    /// Returns a `format`able structure that produces graphviz format for execution plan, which can
+    /// be directly visualized [here](https://dreampuf.github.io/GraphvizOnline).
+    ///
+    /// An example is
+    /// ```dot
+    /// strict digraph dot_plan {
+    //     0[label="ProjectionExec: expr=[id@0 + 2 as employee.id + Int32(2)]",tooltip=""]
+    //     1[label="EmptyExec: produce_one_row=false",tooltip=""]
+    //     0 -> 1
+    // }
+    /// ```
+    pub fn graphviz(&self) -> impl fmt::Display + 'a {
+        struct Wrapper<'a> {
+            plan: &'a dyn ExecutionPlan,
+            show_metrics: ShowMetrics,
+        }
+        impl<'a> fmt::Display for Wrapper<'a> {
+            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+                let t = DisplayFormatType::Default;
+
+                let mut visitor = GraphvizVisitor {
+                    f,
+                    t,
+                    show_metrics: self.show_metrics,
+                    graphviz_builder: GraphvizBuilder::default(),
+                    parents: Vec::new(),
+                };
+
+                visitor.start_graph()?;
+
+                accept(self.plan, &mut visitor)?;
+
+                visitor.end_graph()?;
+                Ok(())
+            }
+        }
+
+        Wrapper {
+            plan: self.inner,
+            show_metrics: self.show_metrics,
+        }
+    }
+
     /// Return a single-line summary of the root of the plan
     /// Example: `ProjectionExec: expr=[a@0 as a]`.
     pub fn one_line(&self) -> impl fmt::Display + 'a {
@@ -209,6 +254,88 @@ impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> {
     }
 }
 
+struct GraphvizVisitor<'a, 'b> {
+    f: &'a mut Formatter<'b>,
+    /// How to format each node
+    t: DisplayFormatType,
+    /// How to show metrics
+    show_metrics: ShowMetrics,
+    graphviz_builder: GraphvizBuilder,
+    /// Used to record parent node ids when visiting a plan.
+    parents: Vec<usize>,
+}
+
+impl GraphvizVisitor<'_, '_> {
+    fn start_graph(&mut self) -> fmt::Result {
+        self.graphviz_builder.start_graph(self.f)
+    }
+
+    fn end_graph(&mut self) -> fmt::Result {
+        self.graphviz_builder.end_graph(self.f)
+    }
+}
+
+impl ExecutionPlanVisitor for GraphvizVisitor<'_, '_> {
+    type Error = fmt::Error;
+
+    fn pre_visit(
+        &mut self,
+        plan: &dyn ExecutionPlan,
+    ) -> datafusion_common::Result<bool, Self::Error> {
+        let id = self.graphviz_builder.next_id();
+
+        struct Wrapper<'a>(&'a dyn ExecutionPlan, DisplayFormatType);
+
+        impl<'a> std::fmt::Display for Wrapper<'a> {
+            fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+                self.0.fmt_as(self.1, f)
+            }
+        }
+
+        let label = { format!("{}", Wrapper(plan, self.t)) };
+
+        let metrics = match self.show_metrics {
+            ShowMetrics::None => "".to_string(),
+            ShowMetrics::Aggregated => {
+                if let Some(metrics) = plan.metrics() {
+                    let metrics = metrics
+                        .aggregate_by_name()
+                        .sorted_for_display()
+                        .timestamps_removed();
+
+                    format!("metrics=[{metrics}]")
+                } else {
+                    "metrics=[]".to_string()
+                }
+            }
+            ShowMetrics::Full => {
+                if let Some(metrics) = plan.metrics() {
+                    format!("metrics=[{metrics}]")
+                } else {
+                    "metrics=[]".to_string()
+                }
+            }
+        };
+
+        self.graphviz_builder
+            .add_node(self.f, id, &label, Some(&metrics))?;
+
+        if let Some(parent_node_id) = self.parents.last() {
+            self.graphviz_builder
+                .add_edge(self.f, *parent_node_id, id)?;
+        }
+
+        self.parents.push(id);
+
+        Ok(true)
+    }
+
+    fn post_visit(&mut self, _plan: &dyn ExecutionPlan) -> Result<bool, Self::Error> {
+        self.parents.pop();
+        Ok(true)
+    }
+}
+
 /// Trait for types which could have additional details when formatted in `Verbose` mode
 pub trait DisplayAs {
     /// Format according to `DisplayFormatType`, used when verbose representation looks
diff --git a/datafusion/core/src/physical_plan/streaming.rs b/datafusion/core/src/physical_plan/streaming.rs
index 97b244d1acf5..f06f61ca1e52 100644
--- a/datafusion/core/src/physical_plan/streaming.rs
+++ b/datafusion/core/src/physical_plan/streaming.rs
@@ -28,12 +28,11 @@ use datafusion_common::{DataFusionError, Result, Statistics};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use log::debug;
 
-use crate::datasource::physical_plan::{OutputOrderingDisplay, ProjectSchemaDisplay};
 use crate::physical_plan::stream::RecordBatchStreamAdapter;
 use crate::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 use datafusion_execution::TaskContext;
 
-use super::{DisplayAs, DisplayFormatType};
+use super::DisplayAs;
 
 /// A partition that can be converted into a [`SendableRecordBatchStream`]
 pub trait PartitionStream: Send + Sync {
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 92c7604c3735..de34353d59fa 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2610,4 +2610,34 @@ mod tests {
             ctx.read_csv(path, options).await?.into_optimized_plan()?,
         ))
     }
+
+    #[tokio::test]
+    async fn test_display_plan_in_graphviz_format() {
+        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+
+        let logical_plan = scan_empty(Some("employee"), &schema, None)
+            .unwrap()
+            .project(vec![col("id") + lit(2)])
+            .unwrap()
+            .build()
+            .unwrap();
+
+        let plan = plan(&logical_plan).await.unwrap();
+
+        let expected_graph = r###"
+// Begin DataFusion GraphViz Plan,
+// display it online here: https://dreampuf.github.io/GraphvizOnline
+
+digraph {
+    1[shape=box label="ProjectionExec: expr=[id@0 + 2 as employee.id + Int32(2)]", tooltip=""]
+    2[shape=box label="EmptyExec: produce_one_row=false", tooltip=""]
+    1 -> 2 [arrowhead=none, arrowtail=normal, dir=back]
+}
+// End DataFusion GraphViz Plan
+"###;
+
+        let generated_graph = format!("{}", displayable(&*plan).graphviz());
+
+        assert_eq!(expected_graph, generated_graph);
+    }
 }
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index e0130cb09c8c..9125dc0ba739 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -210,7 +210,9 @@ async fn csv_explain_plans() {
     //
     // verify the grahviz format of the plan
     let expected = vec![
-        "// Begin DataFusion GraphViz Plan (see https://graphviz.org)",
+        "// Begin DataFusion GraphViz Plan,",
+        "// display it online here: https://dreampuf.github.io/GraphvizOnline",
+        "",
         "digraph {",
         "  subgraph cluster_1",
         "  {",
@@ -282,7 +284,9 @@ async fn csv_explain_plans() {
     //
     // verify the grahviz format of the plan
     let expected = vec![
-        "// Begin DataFusion GraphViz Plan (see https://graphviz.org)",
+        "// Begin DataFusion GraphViz Plan,",
+        "// display it online here: https://dreampuf.github.io/GraphvizOnline",
+        "",
         "digraph {",
         "  subgraph cluster_1",
         "  {",
@@ -427,7 +431,9 @@ async fn csv_explain_verbose_plans() {
     //
     // verify the grahviz format of the plan
     let expected = vec![
-        "// Begin DataFusion GraphViz Plan (see https://graphviz.org)",
+        "// Begin DataFusion GraphViz Plan,",
+        "// display it online here: https://dreampuf.github.io/GraphvizOnline",
+        "",
         "digraph {",
         "  subgraph cluster_1",
         "  {",
@@ -499,7 +505,9 @@ async fn csv_explain_verbose_plans() {
     //
     // verify the grahviz format of the plan
     let expected = vec![
-        "// Begin DataFusion GraphViz Plan (see https://graphviz.org)",
+        "// Begin DataFusion GraphViz Plan,",
+        "// display it online here: https://dreampuf.github.io/GraphvizOnline",
+        "",
         "digraph {",
         "  subgraph cluster_1",
         "  {",
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index c82689b2ccd7..112dbf74dba1 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -18,6 +18,7 @@
 
 use crate::LogicalPlan;
 use arrow::datatypes::Schema;
+use datafusion_common::display::GraphvizBuilder;
 use datafusion_common::tree_node::{TreeNodeVisitor, VisitRecursion};
 use datafusion_common::DataFusionError;
 use std::fmt;
@@ -123,37 +124,6 @@ pub fn display_schema(schema: &Schema) -> impl fmt::Display + '_ {
     Wrapper(schema)
 }
 
-/// Logic related to creating DOT language graphs.
-#[derive(Default)]
-struct GraphvizBuilder {
-    id_gen: usize,
-}
-
-impl GraphvizBuilder {
-    fn next_id(&mut self) -> usize {
-        self.id_gen += 1;
-        self.id_gen
-    }
-
-    // write out the start of the subgraph cluster
-    fn start_cluster(&mut self, f: &mut fmt::Formatter, title: &str) -> fmt::Result {
-        writeln!(f, "  subgraph cluster_{}", self.next_id())?;
-        writeln!(f, "  {{")?;
-        writeln!(f, "    graph[label={}]", Self::quoted(title))
-    }
-
-    // write out the end of the subgraph cluster
-    fn end_cluster(&mut self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "  }}")
-    }
-
-    /// makes a quoted string suitable for inclusion in a graphviz chart
-    fn quoted(label: &str) -> String {
-        let label = label.replace('"', "_");
-        format!("\"{label}\"")
-    }
-}
-
 /// Formats plans for graphical display using the `DOT` language. This
 /// format can be visualized using software from
 /// [`graphviz`](https://graphviz.org/)
@@ -190,6 +160,14 @@ impl<'a, 'b> GraphvizVisitor<'a, 'b> {
     pub fn post_visit_plan(&mut self) -> fmt::Result {
         self.graphviz_builder.end_cluster(self.f)
     }
+
+    pub fn start_graph(&mut self) -> fmt::Result {
+        self.graphviz_builder.start_graph(self.f)
+    }
+
+    pub fn end_graph(&mut self) -> fmt::Result {
+        self.graphviz_builder.end_graph(self.f)
+    }
 }
 
 impl<'a, 'b> TreeNodeVisitor for GraphvizVisitor<'a, 'b> {
@@ -213,22 +191,16 @@ impl<'a, 'b> TreeNodeVisitor for GraphvizVisitor<'a, 'b> {
             format!("{}", plan.display())
         };
 
-        writeln!(
-            self.f,
-            "    {}[shape=box label={}]",
-            id,
-            GraphvizBuilder::quoted(&label)
-        )
-        .map_err(|_e| DataFusionError::Internal("Fail to format".to_string()))?;
+        self.graphviz_builder
+            .add_node(self.f, id, &label, None)
+            .map_err(|_e| DataFusionError::Internal("Fail to format".to_string()))?;
 
         // Create an edge to our parent node, if any
         //  parent_id -> id
         if let Some(parent_id) = self.parent_ids.last() {
-            writeln!(
-                self.f,
-                "    {parent_id} -> {id} [arrowhead=none, arrowtail=normal, dir=back]"
-            )
-            .map_err(|_e| DataFusionError::Internal("Fail to format".to_string()))?;
+            self.graphviz_builder
+                .add_edge(self.f, *parent_id, id)
+                .map_err(|_e| DataFusionError::Internal("Fail to format".to_string()))?;
         }
 
         self.parent_ids.push(id);
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index e058708701b9..d9bb2557334a 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -898,14 +898,10 @@ impl LogicalPlan {
         struct Wrapper<'a>(&'a LogicalPlan);
         impl<'a> Display for Wrapper<'a> {
             fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-                writeln!(
-                    f,
-                    "// Begin DataFusion GraphViz Plan (see https://graphviz.org)"
-                )?;
-                writeln!(f, "digraph {{")?;
-
                 let mut visitor = GraphvizVisitor::new(f);
 
+                visitor.start_graph()?;
+
                 visitor.pre_visit_plan("LogicalPlan")?;
                 self.0.visit(&mut visitor).map_err(|_| fmt::Error)?;
                 visitor.post_visit_plan()?;
@@ -915,8 +911,7 @@ impl LogicalPlan {
                 self.0.visit(&mut visitor).map_err(|_| fmt::Error)?;
                 visitor.post_visit_plan()?;
 
-                writeln!(f, "}}")?;
-                writeln!(f, "// End DataFusion GraphViz Plan")?;
+                visitor.end_graph()?;
                 Ok(())
             }
         }
@@ -1850,31 +1845,46 @@ mod tests {
     fn test_display_graphviz() -> Result<()> {
         let plan = display_plan()?;
 
+        let expected_graphviz = r###"
+// Begin DataFusion GraphViz Plan,
+// display it online here: https://dreampuf.github.io/GraphvizOnline
+
+digraph {
+  subgraph cluster_1
+  {
+    graph[label="LogicalPlan"]
+    2[shape=box label="Projection: employee_csv.id"]
+    3[shape=box label="Filter: employee_csv.state IN (<subquery>)"]
+    2 -> 3 [arrowhead=none, arrowtail=normal, dir=back]
+    4[shape=box label="Subquery:"]
+    3 -> 4 [arrowhead=none, arrowtail=normal, dir=back]
+    5[shape=box label="TableScan: employee_csv projection=[state]"]
+    4 -> 5 [arrowhead=none, arrowtail=normal, dir=back]
+    6[shape=box label="TableScan: employee_csv projection=[id, state]"]
+    3 -> 6 [arrowhead=none, arrowtail=normal, dir=back]
+  }
+  subgraph cluster_7
+  {
+    graph[label="Detailed LogicalPlan"]
+    8[shape=box label="Projection: employee_csv.id\nSchema: [id:Int32]"]
+    9[shape=box label="Filter: employee_csv.state IN (<subquery>)\nSchema: [id:Int32, state:Utf8]"]
+    8 -> 9 [arrowhead=none, arrowtail=normal, dir=back]
+    10[shape=box label="Subquery:\nSchema: [state:Utf8]"]
+    9 -> 10 [arrowhead=none, arrowtail=normal, dir=back]
+    11[shape=box label="TableScan: employee_csv projection=[state]\nSchema: [state:Utf8]"]
+    10 -> 11 [arrowhead=none, arrowtail=normal, dir=back]
+    12[shape=box label="TableScan: employee_csv projection=[id, state]\nSchema: [id:Int32, state:Utf8]"]
+    9 -> 12 [arrowhead=none, arrowtail=normal, dir=back]
+  }
+}
+// End DataFusion GraphViz Plan
+"###;
+
         // just test for a few key lines in the output rather than the
         // whole thing to make test mainteance easier.
         let graphviz = format!("{}", plan.display_graphviz());
 
-        assert!(
-            graphviz.contains(
-                r#"// Begin DataFusion GraphViz Plan (see https://graphviz.org)"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(
-            graphviz.contains(
-                r#"[shape=box label="TableScan: employee_csv projection=[id, state]"]"#
-            ),
-            "\n{}",
-            plan.display_graphviz()
-        );
-        assert!(graphviz.contains(r#"[shape=box label="TableScan: employee_csv projection=[id, state]\nSchema: [id:Int32, state:Utf8]"]"#),
-                "\n{}", plan.display_graphviz());
-        assert!(
-            graphviz.contains(r#"// End DataFusion GraphViz Plan"#),
-            "\n{}",
-            plan.display_graphviz()
-        );
+        assert_eq!(expected_graphviz, graphviz);
         Ok(())
     }
 
@@ -1895,7 +1905,7 @@ mod tests {
                 _ => {
                     return Err(DataFusionError::NotImplemented(
                         "unknown plan type".to_string(),
-                    ))
+                    ));
                 }
             };
 
@@ -1911,7 +1921,7 @@ mod tests {
                 _ => {
                     return Err(DataFusionError::NotImplemented(
                         "unknown plan type".to_string(),
-                    ))
+                    ));
                 }
             };
 

From 07f8d77afff0ff95a00df5316fb850ee91c47627 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 11:14:39 -0400
Subject: [PATCH 80/89] Fix (another) logical conflict (#6882)

---
 datafusion/core/src/physical_plan/streaming.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_plan/streaming.rs b/datafusion/core/src/physical_plan/streaming.rs
index f06f61ca1e52..97b244d1acf5 100644
--- a/datafusion/core/src/physical_plan/streaming.rs
+++ b/datafusion/core/src/physical_plan/streaming.rs
@@ -28,11 +28,12 @@ use datafusion_common::{DataFusionError, Result, Statistics};
 use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};
 use log::debug;
 
+use crate::datasource::physical_plan::{OutputOrderingDisplay, ProjectSchemaDisplay};
 use crate::physical_plan::stream::RecordBatchStreamAdapter;
 use crate::physical_plan::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 use datafusion_execution::TaskContext;
 
-use super::DisplayAs;
+use super::{DisplayAs, DisplayFormatType};
 
 /// A partition that can be converted into a [`SendableRecordBatchStream`]
 pub trait PartitionStream: Send + Sync {

From 4dcac2a0c90fcee087d14ccc87f31354216e8ca8 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 16:23:28 -0400
Subject: [PATCH 81/89] Implement groups accumulators for bit operations

simplify
---
 .../src/aggregate/bit_and_or_xor.rs           | 238 +++++++++++++++++-
 .../groups_accumulator/accumulate.rs          |   5 +-
 datafusion/physical-expr/src/aggregate/sum.rs |  19 +-
 3 files changed, 246 insertions(+), 16 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
index 4bbe563edce8..4feac9c6988f 100644
--- a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
+++ b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
@@ -1,5 +1,5 @@
 // Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
+// or more contributaor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
@@ -18,12 +18,16 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use ahash::RandomState;
+use arrow_array::PrimitiveArray;
 use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, PhysicalExpr};
-use arrow::datatypes::DataType;
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use arrow::datatypes::{
+    ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
+    UInt32Type, UInt64Type, UInt8Type,
+};
 use arrow::{
     array::{
         ArrayRef, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array,
@@ -40,10 +44,12 @@ use crate::aggregate::row_accumulator::{
 };
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
-use arrow::array::Array;
+use arrow::array::{Array, AsArray};
 use arrow::compute::{bit_and, bit_or, bit_xor};
 use datafusion_row::accessor::RowAccessor;
 
+use super::groups_accumulator::accumulate::NullState;
+
 // returns the new value after bit_and/bit_or/bit_xor with the new values, taking nullability into account
 macro_rules! typed_bit_and_or_xor_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
@@ -206,6 +212,14 @@ impl BitAnd {
     }
 }
 
+macro_rules! instantiate_bitop_accumulator {
+    ($NUMERICTYPE:ident, $FN:expr) => {{
+        Ok(Box::new(BitOpGroupsAccumulator::<$NUMERICTYPE, _>::new(
+            $FN,
+        )))
+    }};
+}
+
 impl AggregateExpr for BitAnd {
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
@@ -254,6 +268,46 @@ impl AggregateExpr for BitAnd {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        use std::ops::BitAndAssign;
+        match self.data_type {
+            DataType::Int8 => {
+                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::Int16 => {
+                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::Int32 => {
+                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::Int64 => {
+                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::UInt8 => {
+                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::UInt16 => {
+                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::UInt32 => {
+                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitand_assign(y))
+            }
+            DataType::UInt64 => {
+                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitand_assign(y))
+            }
+
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "BitOpGroupsAccumulator not supported for {} with {}",
+                self.name(),
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -444,6 +498,46 @@ impl AggregateExpr for BitOr {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        use std::ops::BitOrAssign;
+        match self.data_type {
+            DataType::Int8 => {
+                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::Int16 => {
+                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::Int32 => {
+                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::Int64 => {
+                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::UInt8 => {
+                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::UInt16 => {
+                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::UInt32 => {
+                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitor_assign(y))
+            }
+            DataType::UInt64 => {
+                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitor_assign(y))
+            }
+
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "BitOpGroupsAccumulator not supported for {} with {}",
+                self.name(),
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -635,6 +729,46 @@ impl AggregateExpr for BitXor {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        use std::ops::BitXorAssign;
+        match self.data_type {
+            DataType::Int8 => {
+                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::Int16 => {
+                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::Int32 => {
+                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::Int64 => {
+                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::UInt8 => {
+                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::UInt16 => {
+                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::UInt32 => {
+                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitxor_assign(y))
+            }
+            DataType::UInt64 => {
+                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitxor_assign(y))
+            }
+
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "BitOpGroupsAccumulator not supported for {} with {}",
+                self.name(),
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
@@ -917,6 +1051,102 @@ impl Accumulator for DistinctBitXorAccumulator {
     }
 }
 
+/// An accumulator that implements bitwise operations over native types
+///
+/// F: The bitwise function to apply to two elements. The first
+/// argument is the existing value and should be updated with the
+/// second value (e.g. [`std::ops::BitAndAssign`] style).
+#[derive(Debug)]
+struct BitOpGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    /// values per group, stored as the native type
+    values: Vec<T::Native>,
+
+    /// Track nulls in the input / filters
+    null_state: NullState,
+
+    /// Function that computes the bitwise function
+    bitop_fn: F,
+}
+
+impl<T, F> BitOpGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    pub fn new(bitop_fn: F) -> Self {
+        Self {
+            values: vec![],
+            null_state: NullState::new(),
+            bitop_fn,
+        }
+    }
+}
+
+impl<T, F> GroupsAccumulator for BitOpGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        // update values
+        self.values
+            .resize_with(total_num_groups, || T::default_value());
+
+        // NullState dispatches / handles tracking nulls and groups that saw no values
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let value = &mut self.values[group_index];
+                (self.bitop_fn)(value, new_value);
+            },
+        );
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let values = std::mem::take(&mut self.values);
+        let nulls = self.null_state.build();
+        let values = PrimitiveArray::<T>::new(values.into(), nulls); // no copy
+        Ok(Arc::new(values))
+    }
+
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        self.evaluate().map(|arr| vec![arr])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&arrow_array::BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // update / merge are the same
+        self.update_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn size(&self) -> usize {
+        self.values.capacity() * std::mem::size_of::<T::Native>()
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index f19576ee67fd..d660ae18362e 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -38,7 +38,8 @@
 //! handle each input null value specially (e.g. for `SUM` to mark the
 //! corresponding sum as null)
 
-use arrow_array::{Array, ArrowNumericType, BooleanArray, PrimitiveArray};
+use arrow::datatypes::ArrowPrimitiveType;
+use arrow_array::{Array, BooleanArray, PrimitiveArray};
 use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 
 /// This structure is used to update the accumulator state per row for
@@ -133,7 +134,7 @@ impl NullState {
         total_num_groups: usize,
         mut value_fn: F,
     ) where
-        T: ArrowNumericType + Send,
+        T: ArrowPrimitiveType + Send,
         F: FnMut(usize, T::Native) + Send,
     {
         let data: &[T::Native] = values.values();
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index a3b576158139..6e6d77974dab 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -21,7 +21,14 @@ use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
+use crate::aggregate::row_accumulator::{
+    is_row_accumulator_support_dtype, RowAccumulator,
+};
+use crate::aggregate::utils::down_cast_any_ref;
+use crate::expressions::format_state_name;
 use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use arrow::array::Array;
+use arrow::array::Decimal128Array;
 use arrow::compute;
 use arrow::compute::kernels::cast;
 use arrow::datatypes::DataType;
@@ -41,16 +48,8 @@ use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
 use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
-use log::debug;
-
-use crate::aggregate::row_accumulator::{
-    is_row_accumulator_support_dtype, RowAccumulator,
-};
-use crate::aggregate::utils::down_cast_any_ref;
-use crate::expressions::format_state_name;
-use arrow::array::Array;
-use arrow::array::Decimal128Array;
 use datafusion_row::accessor::RowAccessor;
+use log::debug;
 
 use super::groups_accumulator::accumulate::NullState;
 use super::utils::adjust_output_array;
@@ -618,7 +617,7 @@ where
     }
 
     fn size(&self) -> usize {
-        self.sums.capacity() * std::mem::size_of::<usize>()
+        self.sums.capacity() * std::mem::size_of::<T::Native>()
     }
 }
 

From 5d6f815f4234f3ebf9535aa5e088cb441e7d8ed7 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 17:07:48 -0400
Subject: [PATCH 82/89] Almost there

---
 .../src/aggregate/bit_and_or_xor.rs           | 171 ++++--------------
 .../src/aggregate/bool_and_or.rs              |  21 ++-
 .../aggregate/groups_accumulator/bool_op.rs   | 137 ++++++++++++++
 .../src/aggregate/groups_accumulator/mod.rs   |   4 +-
 .../aggregate/groups_accumulator/prim_op.rs   | 138 ++++++++++++++
 5 files changed, 331 insertions(+), 140 deletions(-)
 create mode 100644 datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
 create mode 100644 datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs

diff --git a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
index 4feac9c6988f..af7fc9b3515a 100644
--- a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
+++ b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
@@ -18,15 +18,16 @@
 //! Defines physical expressions that can evaluated at runtime during query execution
 
 use ahash::RandomState;
-use arrow_array::PrimitiveArray;
 use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
+use crate::{
+    instantiate_primitive_accumulator, AggregateExpr, GroupsAccumulator, PhysicalExpr,
+};
 use arrow::datatypes::{
-    ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
-    UInt32Type, UInt64Type, UInt8Type,
+    DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
+    UInt64Type, UInt8Type,
 };
 use arrow::{
     array::{
@@ -44,12 +45,10 @@ use crate::aggregate::row_accumulator::{
 };
 use crate::aggregate::utils::down_cast_any_ref;
 use crate::expressions::format_state_name;
-use arrow::array::{Array, AsArray};
+use arrow::array::Array;
 use arrow::compute::{bit_and, bit_or, bit_xor};
 use datafusion_row::accessor::RowAccessor;
 
-use super::groups_accumulator::accumulate::NullState;
-
 // returns the new value after bit_and/bit_or/bit_xor with the new values, taking nullability into account
 macro_rules! typed_bit_and_or_xor_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
@@ -212,14 +211,6 @@ impl BitAnd {
     }
 }
 
-macro_rules! instantiate_bitop_accumulator {
-    ($NUMERICTYPE:ident, $FN:expr) => {{
-        Ok(Box::new(BitOpGroupsAccumulator::<$NUMERICTYPE, _>::new(
-            $FN,
-        )))
-    }};
-}
-
 impl AggregateExpr for BitAnd {
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
@@ -276,32 +267,32 @@ impl AggregateExpr for BitAnd {
         use std::ops::BitAndAssign;
         match self.data_type {
             DataType::Int8 => {
-                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(Int8Type, |x, y| x.bitand_assign(y))
             }
             DataType::Int16 => {
-                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(Int16Type, |x, y| x.bitand_assign(y))
             }
             DataType::Int32 => {
-                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(Int32Type, |x, y| x.bitand_assign(y))
             }
             DataType::Int64 => {
-                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(Int64Type, |x, y| x.bitand_assign(y))
             }
             DataType::UInt8 => {
-                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(UInt8Type, |x, y| x.bitand_assign(y))
             }
             DataType::UInt16 => {
-                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(UInt16Type, |x, y| x.bitand_assign(y))
             }
             DataType::UInt32 => {
-                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(UInt32Type, |x, y| x.bitand_assign(y))
             }
             DataType::UInt64 => {
-                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitand_assign(y))
+                instantiate_primitive_accumulator!(UInt64Type, |x, y| x.bitand_assign(y))
             }
 
             _ => Err(DataFusionError::NotImplemented(format!(
-                "BitOpGroupsAccumulator not supported for {} with {}",
+                "GroupsAccumulator not supported for {} with {}",
                 self.name(),
                 self.data_type
             ))),
@@ -506,32 +497,32 @@ impl AggregateExpr for BitOr {
         use std::ops::BitOrAssign;
         match self.data_type {
             DataType::Int8 => {
-                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(Int8Type, |x, y| x.bitor_assign(y))
             }
             DataType::Int16 => {
-                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(Int16Type, |x, y| x.bitor_assign(y))
             }
             DataType::Int32 => {
-                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(Int32Type, |x, y| x.bitor_assign(y))
             }
             DataType::Int64 => {
-                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(Int64Type, |x, y| x.bitor_assign(y))
             }
             DataType::UInt8 => {
-                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(UInt8Type, |x, y| x.bitor_assign(y))
             }
             DataType::UInt16 => {
-                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(UInt16Type, |x, y| x.bitor_assign(y))
             }
             DataType::UInt32 => {
-                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(UInt32Type, |x, y| x.bitor_assign(y))
             }
             DataType::UInt64 => {
-                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitor_assign(y))
+                instantiate_primitive_accumulator!(UInt64Type, |x, y| x.bitor_assign(y))
             }
 
             _ => Err(DataFusionError::NotImplemented(format!(
-                "BitOpGroupsAccumulator not supported for {} with {}",
+                "GroupsAccumulator not supported for {} with {}",
                 self.name(),
                 self.data_type
             ))),
@@ -737,32 +728,32 @@ impl AggregateExpr for BitXor {
         use std::ops::BitXorAssign;
         match self.data_type {
             DataType::Int8 => {
-                instantiate_bitop_accumulator!(Int8Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(Int8Type, |x, y| x.bitxor_assign(y))
             }
             DataType::Int16 => {
-                instantiate_bitop_accumulator!(Int16Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(Int16Type, |x, y| x.bitxor_assign(y))
             }
             DataType::Int32 => {
-                instantiate_bitop_accumulator!(Int32Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(Int32Type, |x, y| x.bitxor_assign(y))
             }
             DataType::Int64 => {
-                instantiate_bitop_accumulator!(Int64Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(Int64Type, |x, y| x.bitxor_assign(y))
             }
             DataType::UInt8 => {
-                instantiate_bitop_accumulator!(UInt8Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(UInt8Type, |x, y| x.bitxor_assign(y))
             }
             DataType::UInt16 => {
-                instantiate_bitop_accumulator!(UInt16Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(UInt16Type, |x, y| x.bitxor_assign(y))
             }
             DataType::UInt32 => {
-                instantiate_bitop_accumulator!(UInt32Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(UInt32Type, |x, y| x.bitxor_assign(y))
             }
             DataType::UInt64 => {
-                instantiate_bitop_accumulator!(UInt64Type, |x, y| x.bitxor_assign(y))
+                instantiate_primitive_accumulator!(UInt64Type, |x, y| x.bitxor_assign(y))
             }
 
             _ => Err(DataFusionError::NotImplemented(format!(
-                "BitOpGroupsAccumulator not supported for {} with {}",
+                "GroupsAccumulator not supported for {} with {}",
                 self.name(),
                 self.data_type
             ))),
@@ -1051,102 +1042,6 @@ impl Accumulator for DistinctBitXorAccumulator {
     }
 }
 
-/// An accumulator that implements bitwise operations over native types
-///
-/// F: The bitwise function to apply to two elements. The first
-/// argument is the existing value and should be updated with the
-/// second value (e.g. [`std::ops::BitAndAssign`] style).
-#[derive(Debug)]
-struct BitOpGroupsAccumulator<T, F>
-where
-    T: ArrowPrimitiveType + Send,
-    F: Fn(&mut T::Native, T::Native) + Send + Sync,
-{
-    /// values per group, stored as the native type
-    values: Vec<T::Native>,
-
-    /// Track nulls in the input / filters
-    null_state: NullState,
-
-    /// Function that computes the bitwise function
-    bitop_fn: F,
-}
-
-impl<T, F> BitOpGroupsAccumulator<T, F>
-where
-    T: ArrowPrimitiveType + Send,
-    F: Fn(&mut T::Native, T::Native) + Send + Sync,
-{
-    pub fn new(bitop_fn: F) -> Self {
-        Self {
-            values: vec![],
-            null_state: NullState::new(),
-            bitop_fn,
-        }
-    }
-}
-
-impl<T, F> GroupsAccumulator for BitOpGroupsAccumulator<T, F>
-where
-    T: ArrowPrimitiveType + Send,
-    F: Fn(&mut T::Native, T::Native) + Send + Sync,
-{
-    fn update_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()> {
-        assert_eq!(values.len(), 1, "single argument to update_batch");
-        let values = values.get(0).unwrap().as_primitive::<T>();
-
-        // update values
-        self.values
-            .resize_with(total_num_groups, || T::default_value());
-
-        // NullState dispatches / handles tracking nulls and groups that saw no values
-        self.null_state.accumulate(
-            group_indices,
-            values,
-            opt_filter,
-            total_num_groups,
-            |group_index, new_value| {
-                let value = &mut self.values[group_index];
-                (self.bitop_fn)(value, new_value);
-            },
-        );
-
-        Ok(())
-    }
-
-    fn evaluate(&mut self) -> Result<ArrayRef> {
-        let values = std::mem::take(&mut self.values);
-        let nulls = self.null_state.build();
-        let values = PrimitiveArray::<T>::new(values.into(), nulls); // no copy
-        Ok(Arc::new(values))
-    }
-
-    fn state(&mut self) -> Result<Vec<ArrayRef>> {
-        self.evaluate().map(|arr| vec![arr])
-    }
-
-    fn merge_batch(
-        &mut self,
-        values: &[ArrayRef],
-        group_indices: &[usize],
-        opt_filter: Option<&arrow_array::BooleanArray>,
-        total_num_groups: usize,
-    ) -> Result<()> {
-        // update / merge are the same
-        self.update_batch(values, group_indices, opt_filter, total_num_groups)
-    }
-
-    fn size(&self) -> usize {
-        self.values.capacity() * std::mem::size_of::<T::Native>()
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/datafusion/physical-expr/src/aggregate/bool_and_or.rs b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
index e444dc61ee1b..d60cef338784 100644
--- a/datafusion/physical-expr/src/aggregate/bool_and_or.rs
+++ b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
@@ -20,7 +20,9 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::{AggregateExpr, PhysicalExpr};
+use crate::{
+    instantiate_boolean_accumulator, AggregateExpr, GroupsAccumulator, PhysicalExpr,
+};
 use arrow::datatypes::DataType;
 use arrow::{
     array::{ArrayRef, BooleanArray},
@@ -193,6 +195,23 @@ impl AggregateExpr for BoolAnd {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        match self.data_type {
+            DataType::Boolean => {
+                instantiate_boolean_accumulator!(|x, y| x && y)
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "GroupsAccumulator not supported for {} with {}",
+                self.name(),
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
new file mode 100644
index 000000000000..c52a4de5d4f9
--- /dev/null
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::AsArray;
+use arrow_array::{ArrayRef, BooleanArray};
+use arrow_buffer::BooleanBufferBuilder;
+use datafusion_common::Result;
+
+use crate::GroupsAccumulator;
+
+use super::accumulate::NullState;
+
+/// Creates a [`BooleanGroupsAccumulator`] that applies `$FN` is
+/// applied to each element
+#[macro_export]
+macro_rules! instantiate_boolean_accumulator {
+    ($FN:expr) => {{
+        use crate::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
+        Ok(Box::new(BooleanGroupsAccumulator::<_>::new($FN)))
+    }};
+}
+
+/// An accumulator that implements a single operation over
+/// Boolean where the accumulated state is the same as the input
+/// type (such as [`BitAndAssign`])
+///
+/// F: The function to apply to two elements. The first argument is
+/// the existing value and should be updated with the second value
+/// (e.g. [`BitAndAssign`] style).
+///
+/// [`BitAndAssign`]: std::ops::BitAndAssign
+#[derive(Debug)]
+pub struct BooleanGroupsAccumulator<F>
+where
+    F: Fn(bool, bool) -> bool + Send + Sync,
+{
+    /// values per group
+    values: BooleanBufferBuilder,
+
+    /// Track nulls in the input / filters
+    null_state: NullState,
+
+    /// Function that computes the output
+    bool_fn: F,
+}
+
+impl<F> BooleanGroupsAccumulator<F>
+where
+    F: Fn(bool, bool) -> bool + Send + Sync,
+{
+    pub fn new(bitop_fn: F) -> Self {
+        Self {
+            values: BooleanBufferBuilder::new(0),
+            null_state: NullState::new(),
+            bool_fn: bitop_fn,
+        }
+    }
+}
+
+impl<F> GroupsAccumulator for BooleanGroupsAccumulator<F>
+where
+    F: Fn(bool, bool) -> bool + Send + Sync,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_boolean();
+
+        if self.values.len() < total_num_groups {
+            let new_groups = total_num_groups - self.values.len();
+            self.values.append_n(new_groups, Default::default());
+        }
+
+        // NullState dispatches / handles tracking nulls and groups that saw no values
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let value = self.values.get_bit(group_index);
+                self.values
+                    .set_bit(group_index, (self.bool_fn)(value, new_value));
+            },
+        );
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let values = self.value.finish();
+        let nulls = self.null_state.build();
+        let values = BooleanArray::new(values, nulls);
+        Ok(Arc::new(values))
+    }
+
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        self.evaluate().map(|arr| vec![arr])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // update / merge are the same
+        self.update_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn size(&self) -> usize {
+        // capacity is in bits, so convert to bytes
+        self.values.capacity() / 8
+    }
+}
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
index 8679b8d7f12e..5741aab7a24d 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/mod.rs
@@ -19,9 +19,11 @@
 
 pub(crate) mod accumulate;
 mod adapter;
-
 pub use adapter::GroupsAccumulatorAdapter;
 
+pub(crate) mod bool_op;
+pub(crate) mod prim_op;
+
 use arrow_array::{ArrayRef, BooleanArray};
 use datafusion_common::Result;
 
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
new file mode 100644
index 000000000000..d000eab460bf
--- /dev/null
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::{array::AsArray, datatypes::ArrowPrimitiveType};
+use arrow_array::{ArrayRef, BooleanArray, PrimitiveArray};
+use datafusion_common::Result;
+
+use crate::GroupsAccumulator;
+
+use super::accumulate::NullState;
+
+/// Creates a [`PrimitiveGroupsAccumulator`] with the specified
+/// [`ArrowPrimitiveType`] which applies `$FN` to each element
+#[macro_export]
+macro_rules! instantiate_primitive_accumulator {
+    ($NUMERICTYPE:ident, $FN:expr) => {{
+        use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
+        Ok(Box::new(
+            PrimitiveGroupsAccumulator::<$NUMERICTYPE, _>::new($FN),
+        ))
+    }};
+}
+
+/// An accumulator that implements a single operation over
+/// PrimtiveTypes where the accumulated state is the same as the input
+/// type (such as [`BitAndAssign`])
+///
+/// F: The function to apply to two elements. The first argument is
+/// the existing value and should be updated with the second value
+/// (e.g. [`BitAndAssign`] style).
+///
+/// [`BitAndAssign`]: std::ops::BitAndAssign
+#[derive(Debug)]
+pub struct PrimitiveGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    /// values per group, stored as the native type
+    values: Vec<T::Native>,
+
+    /// Track nulls in the input / filters
+    null_state: NullState,
+
+    /// Function that computes the bitwise function
+    bitop_fn: F,
+}
+
+impl<T, F> PrimitiveGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    pub fn new(bitop_fn: F) -> Self {
+        Self {
+            values: vec![],
+            null_state: NullState::new(),
+            bitop_fn,
+        }
+    }
+}
+
+impl<T, F> GroupsAccumulator for PrimitiveGroupsAccumulator<T, F>
+where
+    T: ArrowPrimitiveType + Send,
+    F: Fn(&mut T::Native, T::Native) + Send + Sync,
+{
+    fn update_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        assert_eq!(values.len(), 1, "single argument to update_batch");
+        let values = values.get(0).unwrap().as_primitive::<T>();
+
+        // update values
+        self.values
+            .resize_with(total_num_groups, || T::default_value());
+
+        // NullState dispatches / handles tracking nulls and groups that saw no values
+        self.null_state.accumulate(
+            group_indices,
+            values,
+            opt_filter,
+            total_num_groups,
+            |group_index, new_value| {
+                let value = &mut self.values[group_index];
+                (self.bitop_fn)(value, new_value);
+            },
+        );
+
+        Ok(())
+    }
+
+    fn evaluate(&mut self) -> Result<ArrayRef> {
+        let values = std::mem::take(&mut self.values);
+        let nulls = self.null_state.build();
+        let values = PrimitiveArray::<T>::new(values.into(), nulls); // no copy
+        Ok(Arc::new(values))
+    }
+
+    fn state(&mut self) -> Result<Vec<ArrayRef>> {
+        self.evaluate().map(|arr| vec![arr])
+    }
+
+    fn merge_batch(
+        &mut self,
+        values: &[ArrayRef],
+        group_indices: &[usize],
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+    ) -> Result<()> {
+        // update / merge are the same
+        self.update_batch(values, group_indices, opt_filter, total_num_groups)
+    }
+
+    fn size(&self) -> usize {
+        self.values.capacity() * std::mem::size_of::<T::Native>()
+    }
+}

From 60ee2ef4925e86560eb42c21841244f09ca5f395 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Fri, 7 Jul 2023 17:11:15 -0400
Subject: [PATCH 83/89] it compiles

---
 .../aggregate/groups_accumulator/accumulate.rs  | 17 +++++++++++++++++
 .../src/aggregate/groups_accumulator/bool_op.rs | 10 +++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index d660ae18362e..29b21b85db40 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -262,6 +262,23 @@ impl NullState {
         }
     }
 
+    /// Invokes `value_fn(group_index, value)` for each non null, non
+    /// filtered value, while tracking which groups have seen null
+    /// inputs and which groups have seen any inputs, for
+    /// [`BooleanArray`]s. See [`Self::accumulate`] for more details.
+    pub fn accumulate_boolean<F>(
+        &mut self,
+        group_indices: &[usize],
+        values: &BooleanArray,
+        opt_filter: Option<&BooleanArray>,
+        total_num_groups: usize,
+        mut value_fn: F,
+    ) where
+        F: FnMut(usize, bool) + Send,
+    {
+        todo!();
+    }
+
     /// Creates the final NullBuffer representing which group_indices have
     /// null values (if they saw a null input, or because they never saw any values)
     ///
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
index c52a4de5d4f9..b673fe7a7cc8 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
@@ -93,15 +93,15 @@ where
         }
 
         // NullState dispatches / handles tracking nulls and groups that saw no values
-        self.null_state.accumulate(
+        self.null_state.accumulate_boolean(
             group_indices,
             values,
             opt_filter,
             total_num_groups,
             |group_index, new_value| {
-                let value = self.values.get_bit(group_index);
-                self.values
-                    .set_bit(group_index, (self.bool_fn)(value, new_value));
+                let current_value = self.values.get_bit(group_index);
+                let value = (self.bool_fn)(current_value, new_value);
+                self.values.set_bit(group_index, value);
             },
         );
 
@@ -109,7 +109,7 @@ where
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
-        let values = self.value.finish();
+        let values = self.values.finish();
         let nulls = self.null_state.build();
         let values = BooleanArray::new(values, nulls);
         Ok(Arc::new(values))

From f2fc450699d51e2a4ab5b99b72843ba510c1d223 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <daniel.heres@coralogix.com>
Date: Sat, 8 Jul 2023 10:34:37 +0200
Subject: [PATCH 84/89] Reuse hashes buffer

---
 .../core/src/physical_plan/aggregates/row_hash.rs   | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
index 46f460d5a6d8..a3b9af5bae6e 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
@@ -111,6 +111,8 @@ pub(crate) struct GroupedHashAggregateStream {
     /// first element in the array corresponds to normal accumulators
     /// second element in the array corresponds to row accumulators
     indices: [Vec<Range<usize>>; 2],
+    // buffer to be reused to store hashes
+    hashes_buffer: Vec<u64>,
 }
 
 impl GroupedHashAggregateStream {
@@ -231,6 +233,7 @@ impl GroupedHashAggregateStream {
             scalar_update_factor,
             row_group_skip_position: 0,
             indices: [normal_agg_indices, row_agg_indices],
+            hashes_buffer: vec![],
         })
     }
 }
@@ -324,15 +327,17 @@ impl GroupedHashAggregateStream {
         let mut groups_with_rows = vec![];
 
         // 1.1 Calculate the group keys for the group values
-        let mut batch_hashes = vec![0; n_rows];
-        create_hashes(group_values, &self.random_state, &mut batch_hashes)?;
+        let batch_hashes = &mut self.hashes_buffer;
+        batch_hashes.clear();
+        batch_hashes.resize(n_rows, 0);
+        create_hashes(group_values, &self.random_state, batch_hashes)?;
 
         let AggregationState {
             map, group_states, ..
         } = &mut self.aggr_state;
 
         for (row, hash) in batch_hashes.into_iter().enumerate() {
-            let entry = map.get_mut(hash, |(_hash, group_idx)| {
+            let entry = map.get_mut(*hash, |(_hash, group_idx)| {
                 // verify that a group that we are inserting with hash is
                 // actually the same key value as the group in
                 // existing_idx  (aka group_values @ row)
@@ -387,7 +392,7 @@ impl GroupedHashAggregateStream {
 
                     // for hasher function, use precomputed hash value
                     map.insert_accounted(
-                        (hash, group_idx),
+                        (*hash, group_idx),
                         |(hash, _group_index)| *hash,
                         allocated,
                     );

From b7819106a71c47e6ff6cb5c985522dbdd52c145c Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 8 Jul 2023 10:55:38 -0400
Subject: [PATCH 85/89] Complete BoolAnd and BoolOr accumulators

---
 .../src/aggregate/bit_and_or_xor.rs           |  17 +-
 .../src/aggregate/bool_and_or.rs              |  29 ++-
 .../groups_accumulator/accumulate.rs          | 208 +++++++++++++-----
 .../aggregate/groups_accumulator/bool_op.rs   |  10 -
 .../aggregate/groups_accumulator/prim_op.rs   |  12 -
 5 files changed, 182 insertions(+), 94 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
index af7fc9b3515a..7e420c07e1b6 100644
--- a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
+++ b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
@@ -15,16 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines physical expressions that can evaluated at runtime during query execution
+//! Defines BitAnd, BitOr, and BitXor Aggregate accumulators
 
 use ahash::RandomState;
 use std::any::Any;
 use std::convert::TryFrom;
 use std::sync::Arc;
 
-use crate::{
-    instantiate_primitive_accumulator, AggregateExpr, GroupsAccumulator, PhysicalExpr,
-};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::datatypes::{
     DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
     UInt64Type, UInt8Type,
@@ -40,6 +38,7 @@ use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 use std::collections::HashSet;
 
+use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
 use crate::aggregate::row_accumulator::{
     is_row_accumulator_support_dtype, RowAccumulator,
 };
@@ -49,6 +48,16 @@ use arrow::array::Array;
 use arrow::compute::{bit_and, bit_or, bit_xor};
 use datafusion_row::accessor::RowAccessor;
 
+/// Creates a [`PrimitiveGroupsAccumulator`] with the specified
+/// [`ArrowPrimitiveType`] which applies `$FN` to each element
+macro_rules! instantiate_primitive_accumulator {
+    ($NUMERICTYPE:ident, $FN:expr) => {{
+        Ok(Box::new(
+            PrimitiveGroupsAccumulator::<$NUMERICTYPE, _>::new($FN),
+        ))
+    }};
+}
+
 // returns the new value after bit_and/bit_or/bit_xor with the new values, taking nullability into account
 macro_rules! typed_bit_and_or_xor_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident, $OP:ident) => {{
diff --git a/datafusion/physical-expr/src/aggregate/bool_and_or.rs b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
index d60cef338784..6107b0972c81 100644
--- a/datafusion/physical-expr/src/aggregate/bool_and_or.rs
+++ b/datafusion/physical-expr/src/aggregate/bool_and_or.rs
@@ -17,12 +17,7 @@
 
 //! Defines physical expressions that can evaluated at runtime during query execution
 
-use std::any::Any;
-use std::sync::Arc;
-
-use crate::{
-    instantiate_boolean_accumulator, AggregateExpr, GroupsAccumulator, PhysicalExpr,
-};
+use crate::{AggregateExpr, GroupsAccumulator, PhysicalExpr};
 use arrow::datatypes::DataType;
 use arrow::{
     array::{ArrayRef, BooleanArray},
@@ -30,7 +25,10 @@ use arrow::{
 };
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
+use std::any::Any;
+use std::sync::Arc;
 
+use crate::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
 use crate::aggregate::row_accumulator::{
     is_row_accumulator_support_dtype, RowAccumulator,
 };
@@ -202,7 +200,7 @@ impl AggregateExpr for BoolAnd {
     fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
         match self.data_type {
             DataType::Boolean => {
-                instantiate_boolean_accumulator!(|x, y| x && y)
+                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x && y)))
             }
             _ => Err(DataFusionError::NotImplemented(format!(
                 "GroupsAccumulator not supported for {} with {}",
@@ -400,6 +398,23 @@ impl AggregateExpr for BoolOr {
         )))
     }
 
+    fn groups_accumulator_supported(&self) -> bool {
+        true
+    }
+
+    fn create_groups_accumulator(&self) -> Result<Box<dyn GroupsAccumulator>> {
+        match self.data_type {
+            DataType::Boolean => {
+                Ok(Box::new(BooleanGroupsAccumulator::new(|x, y| x || y)))
+            }
+            _ => Err(DataFusionError::NotImplemented(format!(
+                "GroupsAccumulator not supported for {} with {}",
+                self.name(),
+                self.data_type
+            ))),
+        }
+    }
+
     fn reverse_expr(&self) -> Option<Arc<dyn AggregateExpr>> {
         Some(Arc::new(self.clone()))
     }
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
index 29b21b85db40..a9627ece7c43 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/accumulate.rs
@@ -15,45 +15,48 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Vectorized accumulate helpers: [`NullState`] and [`accumulate_indices`]
+//! [`GroupsAccumulator`] helpers: [`NullState`] and [`accumulate_indices`]
 //!
 //! These functions are designed to be the performance critical inner
-//! loops of accumlators and thus there are multiple versions, to be
-//! invoked depending on the input.
+//! loops of [`GroupsAccumulator`], so there are multiple type
+//! specific methods, invoked depending on the input.
 //!
-//! There are typically 4 potential combinations of input values that
-//! accumulators need to special case for performance,
-//!
-//! With / Without filter
-//! With / Without nulls
-//!
-//! If there are filters present, the accumulator typically needs to
-//! to track if it has seen *any* value for that group (as some values
-//! may be filtered out). Without a filter, the accumulator is only
-//! invoked for groups that actually had a value to accumulate so they
-//! do not need to track if they have seen values for a particular
-//! group.
-//!
-//! If the input has nulls, then the accumulator must also potentially
-//! handle each input null value specially (e.g. for `SUM` to mark the
-//! corresponding sum as null)
+//! [`GroupsAccumulator`]: crate::GroupsAccumulator
 
 use arrow::datatypes::ArrowPrimitiveType;
 use arrow_array::{Array, BooleanArray, PrimitiveArray};
 use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 
-/// This structure is used to update the accumulator state per row for
-/// a `PrimitiveArray<T>`, and track if values or nulls have been seen
-/// for each group. Since it is the inner loop for many
-/// GroupsAccumulators, the  performance is critical.
+/// Track the accumulator null state per row: if any values for that
+/// group were null and if any values have been seen at all for that group.
+///
+/// This is part of the inner loop for many GroupsAccumulators, and
+/// thus the performance is critical.
+///
+/// typically 4 potential combinations of input values that
+/// accumulators need to special case for performance,
+///
+/// GroupsAccumulators need handle all four combinations of:
+///
+/// * With / Without filter
+/// * With / Without nulls in the input
 ///
+/// If there are filters present, `NullState` tarcks if it has seen
+/// *any* value for that group (as some values may be filtered
+/// out). Without a filter, the accumulator is only passed groups
+/// that actually had a value to accumulate so they do not need to
+/// track if they have seen values for a particular group.
+///
+/// If the input has nulls, then the accumulator must potentially
+/// handle each input null value specially (e.g. for `SUM` to mark the
+/// corresponding sum as null)
 #[derive(Debug)]
 pub struct NullState {
-    /// Tracks validity (if we we have seen a null input value for
-    /// `group_index`)
+    /// Tracks if a null input value has been seen for `group_index`,
+    /// if there were any nulls in the input.
     ///
     /// If `null_inputs[i]` is true, have not seen any null values for
-    /// that group (also true for no values)
+    /// that group, or have not seen any vaues
     ///
     /// If `null_inputs[i]` is false, saw at least one null value for
     /// that group
@@ -140,8 +143,22 @@ impl NullState {
         let data: &[T::Native] = values.values();
         assert_eq!(data.len(), group_indices.len());
 
-        match (values.nulls(), opt_filter) {
-            (Some(nulls), None) if nulls.null_count() > 0 => {
+        match (values.null_count() > 0, opt_filter) {
+            // no nulls, no filter,
+            (false, None) => {
+                // if we have previously seen nulls, ensure the null
+                // buffer is big enough (start everything at valid)
+                if self.null_inputs.is_some() {
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+                }
+                let iter = group_indices.iter().zip(data.iter());
+                for (&group_index, &new_value) in iter {
+                    value_fn(group_index, new_value)
+                }
+            }
+            // nulls, no filter
+            (true, None) => {
+                let nulls = values.nulls().unwrap();
                 // All groups start as valid (true), and are set to
                 // null if we see a null in the input)
                 let null_inputs =
@@ -149,8 +166,6 @@ impl NullState {
 
                 // This is based on (ahem, COPY/PASTA) arrow::compute::aggregate::sum
                 // iterate over in chunks of 64 bits for more efficient null checking
-                let data: &[T::Native] = values.values();
-                assert_eq!(data.len(), group_indices.len());
                 let group_indices_chunks = group_indices.chunks_exact(64);
                 let data_chunks = data.chunks_exact(64);
                 let bit_chunks = nulls.inner().bit_chunks();
@@ -195,20 +210,8 @@ impl NullState {
                         }
                     });
             }
-            // no filter, no nulls
-            (_, None) => {
-                // if we have previously seen nulls, ensure the null
-                // buffer is big enough (start everything at valid)
-                if self.null_inputs.is_some() {
-                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
-                }
-                let iter = group_indices.iter().zip(data.iter());
-                for (&group_index, &new_value) in iter {
-                    value_fn(group_index, new_value)
-                }
-            }
             // no nulls, but a filter
-            (None, Some(filter)) => {
+            (false, Some(filter)) => {
                 assert_eq!(filter.len(), group_indices.len());
 
                 // default seen to false (we fill it in as we go)
@@ -217,27 +220,25 @@ impl NullState {
                 // The performance with a filter could be improved by
                 // iterating over the filter in chunks, rather than a single
                 // iterator. TODO file a ticket
-                let iter = group_indices.iter().zip(data.iter());
-                let iter = iter.zip(filter.iter());
-                for ((&group_index, &new_value), filter_value) in iter {
-                    if let Some(true) = filter_value {
-                        value_fn(group_index, new_value);
-                        // remember we have seen a value for this index
-                        seen_values.set_bit(group_index, true);
-                    }
-                }
+                group_indices
+                    .iter()
+                    .zip(data.iter())
+                    .zip(filter.iter())
+                    .for_each(|((&group_index, &new_value), filter_value)| {
+                        if let Some(true) = filter_value {
+                            value_fn(group_index, new_value);
+                            // remember we have seen a value for this index
+                            seen_values.set_bit(group_index, true);
+                        }
+                    })
             }
             // both null values and filters
-            (
-                Some(_value_nulls /* nulls obtained via values.iters() */),
-                Some(filter),
-            ) => {
+            (true, Some(filter)) => {
                 let null_inputs =
                     initialize_builder(&mut self.null_inputs, total_num_groups, true);
                 let seen_values =
                     initialize_builder(&mut self.seen_values, total_num_groups, false);
 
-                assert_eq!(filter.len(), values.len());
                 assert_eq!(filter.len(), group_indices.len());
                 // The performance with a filter could be improved by
                 // iterating over the filter in chunks, rather than using
@@ -263,9 +264,12 @@ impl NullState {
     }
 
     /// Invokes `value_fn(group_index, value)` for each non null, non
-    /// filtered value, while tracking which groups have seen null
-    /// inputs and which groups have seen any inputs, for
-    /// [`BooleanArray`]s. See [`Self::accumulate`] for more details.
+    /// filtered value in `values`, while tracking which groups have
+    /// seen null inputs and which groups have seen any inputs, for
+    /// [`BooleanArray`]s.
+    ///
+    /// See [`Self::accumulate`], which handles [`PrimitiveArray`]s,
+    /// for more details.
     pub fn accumulate_boolean<F>(
         &mut self,
         group_indices: &[usize],
@@ -276,7 +280,89 @@ impl NullState {
     ) where
         F: FnMut(usize, bool) + Send,
     {
-        todo!();
+        let data = values.values();
+        assert_eq!(data.len(), group_indices.len());
+
+        // These could be made more performant by iterating in chunks of 64 bits at a time
+        match (values.null_count() > 0, opt_filter) {
+            // no nulls, no filter,
+            (false, None) => {
+                // if we have previously seen nulls, ensure the null
+                // buffer is big enough (start everything at valid)
+                if self.null_inputs.is_some() {
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+                }
+                group_indices.iter().zip(data.iter()).for_each(
+                    |(&group_index, new_value)| value_fn(group_index, new_value),
+                )
+            }
+            // nulls, no filter
+            (true, None) => {
+                let nulls = values.nulls().unwrap();
+                // All groups start as valid (true), and are set to
+                // null if we see a null in the input)
+                let null_inputs =
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+
+                group_indices
+                    .iter()
+                    .zip(data.iter())
+                    .zip(nulls.iter())
+                    .for_each(|((&group_index, new_value), is_valid)| {
+                        if is_valid {
+                            value_fn(group_index, new_value);
+                        } else {
+                            // input null means this group is now null
+                            null_inputs.set_bit(group_index, false);
+                        }
+                    })
+            }
+            // no nulls, but a filter
+            (false, Some(filter)) => {
+                assert_eq!(filter.len(), group_indices.len());
+
+                // default seen to false (we fill it in as we go)
+                let seen_values =
+                    initialize_builder(&mut self.seen_values, total_num_groups, false);
+
+                group_indices
+                    .iter()
+                    .zip(data.iter())
+                    .zip(filter.iter())
+                    .for_each(|((&group_index, new_value), filter_value)| {
+                        if let Some(true) = filter_value {
+                            value_fn(group_index, new_value);
+                            // remember we have seen a value for this index
+                            seen_values.set_bit(group_index, true);
+                        }
+                    })
+            }
+            // both null values and filters
+            (true, Some(filter)) => {
+                let null_inputs =
+                    initialize_builder(&mut self.null_inputs, total_num_groups, true);
+                let seen_values =
+                    initialize_builder(&mut self.seen_values, total_num_groups, false);
+
+                assert_eq!(filter.len(), group_indices.len());
+                filter
+                    .iter()
+                    .zip(group_indices.iter())
+                    .zip(values.iter())
+                    .for_each(|((filter_value, group_index), new_value)| {
+                        if let Some(true) = filter_value {
+                            if let Some(new_value) = new_value {
+                                value_fn(*group_index, new_value)
+                            } else {
+                                // input null means this group is now null
+                                null_inputs.set_bit(*group_index, false);
+                            }
+                            // remember we have seen a value for this index
+                            seen_values.set_bit(*group_index, true);
+                        }
+                    })
+            }
+        }
     }
 
     /// Creates the final NullBuffer representing which group_indices have
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
index b673fe7a7cc8..0f6dfdf045f4 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/bool_op.rs
@@ -26,16 +26,6 @@ use crate::GroupsAccumulator;
 
 use super::accumulate::NullState;
 
-/// Creates a [`BooleanGroupsAccumulator`] that applies `$FN` is
-/// applied to each element
-#[macro_export]
-macro_rules! instantiate_boolean_accumulator {
-    ($FN:expr) => {{
-        use crate::aggregate::groups_accumulator::bool_op::BooleanGroupsAccumulator;
-        Ok(Box::new(BooleanGroupsAccumulator::<_>::new($FN)))
-    }};
-}
-
 /// An accumulator that implements a single operation over
 /// Boolean where the accumulated state is the same as the input
 /// type (such as [`BitAndAssign`])
diff --git a/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
index d000eab460bf..fe7fc7ecbaf0 100644
--- a/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/physical-expr/src/aggregate/groups_accumulator/prim_op.rs
@@ -25,18 +25,6 @@ use crate::GroupsAccumulator;
 
 use super::accumulate::NullState;
 
-/// Creates a [`PrimitiveGroupsAccumulator`] with the specified
-/// [`ArrowPrimitiveType`] which applies `$FN` to each element
-#[macro_export]
-macro_rules! instantiate_primitive_accumulator {
-    ($NUMERICTYPE:ident, $FN:expr) => {{
-        use crate::aggregate::groups_accumulator::prim_op::PrimitiveGroupsAccumulator;
-        Ok(Box::new(
-            PrimitiveGroupsAccumulator::<$NUMERICTYPE, _>::new($FN),
-        ))
-    }};
-}
-
 /// An accumulator that implements a single operation over
 /// PrimtiveTypes where the accumulated state is the same as the input
 /// type (such as [`BitAndAssign`])

From aebe77f439703ee4c80eedd9f34a674dbe76d8b9 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 8 Jul 2023 10:58:32 -0400
Subject: [PATCH 86/89] Fix doc

---
 .../physical-expr/src/aggregate/bit_and_or_xor.rs      | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
index 7e420c07e1b6..9f239a09ddef 100644
--- a/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
+++ b/datafusion/physical-expr/src/aggregate/bit_and_or_xor.rs
@@ -50,11 +50,13 @@ use datafusion_row::accessor::RowAccessor;
 
 /// Creates a [`PrimitiveGroupsAccumulator`] with the specified
 /// [`ArrowPrimitiveType`] which applies `$FN` to each element
+///
+/// [`ArrowPrimitiveType`]: arrow::datatypes::ArrowPrimitiveType
 macro_rules! instantiate_primitive_accumulator {
-    ($NUMERICTYPE:ident, $FN:expr) => {{
-        Ok(Box::new(
-            PrimitiveGroupsAccumulator::<$NUMERICTYPE, _>::new($FN),
-        ))
+    ($PRIMTYPE:ident, $FN:expr) => {{
+        Ok(Box::new(PrimitiveGroupsAccumulator::<$PRIMTYPE, _>::new(
+            $FN,
+        )))
     }};
 }
 

From f684ae83bd50daddbc3de5d7c0d4f6a091329d64 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 8 Jul 2023 11:29:33 -0400
Subject: [PATCH 87/89] clippy

---
 datafusion/core/src/physical_plan/aggregates/row_hash.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/src/physical_plan/aggregates/row_hash.rs b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
index a3b9af5bae6e..4741f181f9ea 100644
--- a/datafusion/core/src/physical_plan/aggregates/row_hash.rs
+++ b/datafusion/core/src/physical_plan/aggregates/row_hash.rs
@@ -336,7 +336,7 @@ impl GroupedHashAggregateStream {
             map, group_states, ..
         } = &mut self.aggr_state;
 
-        for (row, hash) in batch_hashes.into_iter().enumerate() {
+        for (row, hash) in batch_hashes.iter_mut().enumerate() {
             let entry = map.get_mut(*hash, |(_hash, group_idx)| {
                 // verify that a group that we are inserting with hash is
                 // actually the same key value as the group in

From e798074b8297ead9c104a6032478e242f1d6413a Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 8 Jul 2023 10:38:25 -0400
Subject: [PATCH 88/89] Performance: Use a specialized sum accumulator for
 retractable aggregates

---
 datafusion/physical-expr/src/aggregate/sum.rs | 73 +++++++++++++++----
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 6e6d77974dab..dd00299e9e0c 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Defines physical expressions that can evaluated at runtime during query execution
+//! Defines `SUM` and `SUM DISTINCT` aggregate accumulators
 
 use std::any::Any;
 use std::convert::TryFrom;
@@ -115,18 +115,11 @@ impl AggregateExpr for Sum {
     }
 
     fn state_fields(&self) -> Result<Vec<Field>> {
-        Ok(vec![
-            Field::new(
-                format_state_name(&self.name, "sum"),
-                self.data_type.clone(),
-                self.nullable,
-            ),
-            Field::new(
-                format_state_name(&self.name, "count"),
-                DataType::UInt64,
-                self.nullable,
-            ),
-        ])
+        Ok(vec![Field::new(
+            format_state_name(&self.name, "sum"),
+            self.data_type.clone(),
+            self.nullable,
+        )])
     }
 
     fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>> {
@@ -200,7 +193,7 @@ impl AggregateExpr for Sum {
     }
 
     fn create_sliding_accumulator(&self) -> Result<Box<dyn Accumulator>> {
-        Ok(Box::new(SumAccumulator::try_new(&self.data_type)?))
+        Ok(Box::new(SlidingSumAccumulator::try_new(&self.data_type)?))
     }
 }
 
@@ -218,10 +211,10 @@ impl PartialEq<dyn Any> for Sum {
     }
 }
 
+/// This accumulator computes SUM incrementally
 #[derive(Debug)]
 struct SumAccumulator {
     sum: ScalarValue,
-    count: u64,
 }
 
 impl SumAccumulator {
@@ -229,12 +222,32 @@ impl SumAccumulator {
     pub fn try_new(data_type: &DataType) -> Result<Self> {
         Ok(Self {
             sum: ScalarValue::try_from(data_type)?,
+        })
+    }
+}
+
+/// This accumulator incrementally computes sums over a sliding window
+#[derive(Debug)]
+struct SlidingSumAccumulator {
+    sum: ScalarValue,
+    count: u64,
+}
+
+impl SlidingSumAccumulator {
+    /// new sum accumulator
+    pub fn try_new(data_type: &DataType) -> Result<Self> {
+        Ok(Self {
+            // start at zero
+            sum: ScalarValue::try_from(data_type)?,
             count: 0,
         })
     }
 }
 
-// returns the new value after sum with the new values, taking nullability into account
+/// Sums the contents of the `$VALUES` array using the arrow compute
+/// kernel, and return a `ScalarValue::$SCALAR`.
+///
+/// Handles nullability
 macro_rules! typed_sum_delta_batch {
     ($VALUES:expr, $ARRAYTYPE:ident, $SCALAR:ident) => {{
         let array = downcast_value!($VALUES, $ARRAYTYPE);
@@ -376,6 +389,34 @@ pub(crate) fn update_avg_to_row(
 }
 
 impl Accumulator for SumAccumulator {
+    fn state(&self) -> Result<Vec<ScalarValue>> {
+        Ok(vec![self.sum.clone()])
+    }
+
+    fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
+        let values = &values[0];
+        let delta = sum_batch(values, &self.sum.get_datatype())?;
+        self.sum = self.sum.add(&delta)?;
+        Ok(())
+    }
+
+    fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> {
+        // sum(sum1, sum2, sum3, ...) = sum1 + sum2 + sum3 + ...
+        self.update_batch(states)
+    }
+
+    fn evaluate(&self) -> Result<ScalarValue> {
+        // TODO: add the checker for overflow
+        // For the decimal(precision,_) data type, the absolute of value must be less than 10^precision.
+        Ok(self.sum.clone())
+    }
+
+    fn size(&self) -> usize {
+        std::mem::size_of_val(self) - std::mem::size_of_val(&self.sum) + self.sum.size()
+    }
+}
+
+impl Accumulator for SlidingSumAccumulator {
     fn state(&self) -> Result<Vec<ScalarValue>> {
         Ok(vec![self.sum.clone(), ScalarValue::from(self.count)])
     }

From afcab34181dc4d3800713606b68a058c3bba5be6 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Sat, 8 Jul 2023 11:46:11 -0400
Subject: [PATCH 89/89] Simplify sum and make it faster

---
 datafusion/physical-expr/src/aggregate/sum.rs | 42 ++-----------------
 1 file changed, 3 insertions(+), 39 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index dd00299e9e0c..c8e9a4028f40 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -45,7 +45,6 @@ use arrow_array::types::{
     UInt64Type,
 };
 use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray};
-use arrow_buffer::{BooleanBufferBuilder, NullBuffer};
 use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 use datafusion_row::accessor::RowAccessor;
@@ -557,13 +556,6 @@ where
     }
 }
 
-/// Create a buffer of len elements, representing all NULL values
-fn make_all_nulls(len: usize) -> NullBuffer {
-    let mut nulls = BooleanBufferBuilder::new(len);
-    nulls.append_n(len, false);
-    NullBuffer::new(nulls.finish())
-}
-
 impl<T> GroupsAccumulator for SumGroupsAccumulator<T>
 where
     T: ArrowNumericType + Send,
@@ -604,26 +596,7 @@ where
         opt_filter: Option<&arrow_array::BooleanArray>,
         total_num_groups: usize,
     ) -> Result<()> {
-        assert_eq!(values.len(), 2, "two arguments to merge_batch");
-        // first batch is partial sums
-        let partial_sums: &PrimitiveArray<T> = values.get(0).unwrap().as_primitive::<T>();
-
-        // Sum partial sums
-        self.sums
-            .resize_with(total_num_groups, || T::default_value());
-
-        self.null_state.accumulate(
-            group_indices,
-            partial_sums,
-            opt_filter,
-            total_num_groups,
-            |group_index, new_value| {
-                let sum = &mut self.sums[group_index];
-                *sum = sum.add_wrapping(new_value);
-            },
-        );
-
-        Ok(())
+        self.update_batch(values, group_indices, opt_filter, total_num_groups)
     }
 
     fn evaluate(&mut self) -> Result<ArrayRef> {
@@ -636,7 +609,7 @@ where
         Ok(Arc::new(sums))
     }
 
-    // return arrays for sums and counts
+    // return arrays for sums
     fn state(&mut self) -> Result<Vec<ArrayRef>> {
         let nulls = self.null_state.build();
 
@@ -645,16 +618,7 @@ where
 
         let sums = adjust_output_array(&self.sum_data_type, sums)?;
 
-        // TODO File a ticket: Sum expects sum/count array, but count
-        // is only needed for retractable aggregates. We could improve
-        // performance by only including it when needed.
-        // https://github.com/apache/arrow-datafusion/issues/6878
-        let counts = vec![0_u64; sums.len()];
-        let all_nulls = Some(make_all_nulls(sums.len()));
-        let counts =
-            Arc::new(PrimitiveArray::<UInt64Type>::new(counts.into(), all_nulls));
-
-        Ok(vec![sums.clone() as ArrayRef, counts as ArrayRef])
+        Ok(vec![sums.clone() as ArrayRef])
     }
 
     fn size(&self) -> usize {