From cf11a700eb6a5385a6ebade2b92c684380940296 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Wed, 21 Feb 2024 10:53:02 -0500
Subject: [PATCH 01/45] Fix toml format script. (#9309)

---
 ci/scripts/rust_toml_fmt.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ci/scripts/rust_toml_fmt.sh b/ci/scripts/rust_toml_fmt.sh
index 9204a562a3fe..393ad55f4168 100755
--- a/ci/scripts/rust_toml_fmt.sh
+++ b/ci/scripts/rust_toml_fmt.sh
@@ -21,5 +21,4 @@
 # without overwritng the file. If any error occur, you may want to
 # rerun `taplo format` to fix the formatting automatically.
 set -ex
-taplo format
-done
+taplo format --check

From 91f3eb2e5430d23e2b551e66732bec1a3a575971 Mon Sep 17 00:00:00 2001
From: SteveLauC <stevelauc@outlook.com>
Date: Thu, 22 Feb 2024 10:53:36 +0800
Subject: [PATCH 02/45] docs: update contributor guide (format toml/inte test)
 (#9301)

---
 docs/source/contributor-guide/index.md | 27 +++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/source/contributor-guide/index.md b/docs/source/contributor-guide/index.md
index 3b0d4932c2cf..839055f6ed7a 100644
--- a/docs/source/contributor-guide/index.md
+++ b/docs/source/contributor-guide/index.md
@@ -202,7 +202,7 @@ There are several tests of the public interface of the DataFusion library in the
 You can run these tests individually using `cargo` as normal command such as
 
 ```shell
-cargo test -p datafusion --test dataframe
+cargo test -p datafusion --test parquet_exec
 ```
 
 ## Benchmarks
@@ -338,3 +338,28 @@ After you've confirmed your prettier version, you can format all the `.md` files
 ```bash
 prettier -w {datafusion,datafusion-cli,datafusion-examples,dev,docs}/**/*.md
 ```
+
+## How to format `.toml` files
+
+We use `taplo` to format `.toml` files.
+
+For Rust developers, you can install it via:
+
+```sh
+cargo install taplo-cli --locked
+```
+
+> Refer to the [Installation section][doc] on other ways to install it.
+>
+> [doc]: https://taplo.tamasfe.dev/cli/installation/binary.html
+
+```bash
+$ taplo --version
+taplo 0.9.0
+```
+
+After you've confirmed your `taplo` version, you can format all the `.toml` files:
+
+```bash
+taplo fmt
+```

From 10000fb5bdb88a5e7a5cbfb81f3bd5d028bbeea7 Mon Sep 17 00:00:00 2001
From: metesynnada <100111937+metesynnada@users.noreply.github.com>
Date: Thu, 22 Feb 2024 09:56:23 +0300
Subject: [PATCH 03/45] Delete docs.yaml

---
 .github/workflows/docs.yaml | 64 -------------------------------------
 1 file changed, 64 deletions(-)
 delete mode 100644 .github/workflows/docs.yaml

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
deleted file mode 100644
index ab6a615ab60b..000000000000
--- a/.github/workflows/docs.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - .asf.yaml
-      - .github/workflows/docs.yaml
-      - docs/**
-
-name: Deploy DataFusion site
-
-jobs:
-  build-docs:
-    name: Build docs
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout docs sources
-        uses: actions/checkout@v4
-
-      - name: Checkout asf-site branch
-        uses: actions/checkout@v4
-        with:
-          ref: asf-site
-          path: asf-site
-
-      - name: Setup Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
-      - name: Install dependencies
-        run: |
-          set -x
-          python3 -m venv venv
-          source venv/bin/activate
-          pip install -r docs/requirements.txt
-
-      - name: Build docs
-        run: |
-          set -x
-          source venv/bin/activate
-          cd docs
-          ./build.sh
-
-      - name: Copy & push the generated HTML
-        run: |
-          set -x
-          cd asf-site/
-          rsync \
-            -a \
-            --delete \
-            --exclude '/.git/' \
-            ../docs/build/html/ \
-            ./
-          cp ../.asf.yaml .
-          touch .nojekyll
-          git status --porcelain
-          if [ "$(git status --porcelain)" != "" ]; then
-            git config user.name "github-actions[bot]"
-            git config user.email "github-actions[bot]@users.noreply.github.com"
-            git add --all
-            git commit -m 'Publish built docs triggered by ${{ github.sha }}'
-            git push || git push --force
-          fi

From 02c948d91b82f3e8988fafd12a072a476500c13d Mon Sep 17 00:00:00 2001
From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Date: Thu, 22 Feb 2024 20:05:13 +0300
Subject: [PATCH 04/45] [MINOR]: Limit stream replace with slice (#9303)

* Initial commit

* Minor changes
---
 datafusion/physical-plan/src/limit.rs | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index c31d5f62c726..417bc4cf977b 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -29,9 +29,8 @@ use crate::{
     DisplayFormatType, Distribution, EquivalenceProperties, ExecutionPlan, Partitioning,
 };
 
-use arrow::array::ArrayRef;
 use arrow::datatypes::SchemaRef;
-use arrow::record_batch::{RecordBatch, RecordBatchOptions};
+use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
@@ -507,26 +506,15 @@ impl LimitStream {
             //
             self.fetch -= batch.num_rows();
             Some(batch)
-        } else {
+        } else if batch.num_rows() >= self.fetch {
             let batch_rows = self.fetch;
             self.fetch = 0;
             self.input = None; // clear input so it can be dropped early
 
-            let limited_columns: Vec<ArrayRef> = batch
-                .columns()
-                .iter()
-                .map(|col| col.slice(0, col.len().min(batch_rows)))
-                .collect();
-            let options =
-                RecordBatchOptions::new().with_row_count(Option::from(batch_rows));
-            Some(
-                RecordBatch::try_new_with_options(
-                    batch.schema(),
-                    limited_columns,
-                    &options,
-                )
-                .unwrap(),
-            )
+            // It is guaranteed that batch_rows is <= batch.num_rows
+            Some(batch.slice(0, batch_rows))
+        } else {
+            unreachable!()
         }
     }
 }
@@ -575,6 +563,7 @@ mod tests {
     use crate::{common, test};
 
     use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
+    use arrow_array::RecordBatchOptions;
     use arrow_schema::Schema;
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalExpr;

From a851ecf1cc24a6b867d40087d8e890b9307137c1 Mon Sep 17 00:00:00 2001
From: comphead <comphead@users.noreply.github.com>
Date: Thu, 22 Feb 2024 19:16:30 -0800
Subject: [PATCH 05/45] Support IGNORE NULLS for LAG window function (#9221)

* WIP lag/lead ignore nulls

* Support IGNORE NULLS for LAG function

* fmt

* comments

* remove comments

* Add new tests, minor changes, trigger evalaute_all

* Make algorithm pruning friendly

---------

Co-authored-by: Mustafa Akur <mustafa.akur@synnada.ai>
---
 datafusion/core/src/dataframe/mod.rs          |   1 +
 .../core/src/physical_optimizer/test_utils.rs |   1 +
 datafusion/core/src/physical_planner.rs       |   6 +
 datafusion/core/tests/dataframe/mod.rs        |   1 +
 .../core/tests/fuzz_cases/window_fuzz.rs      |   3 +
 datafusion/expr/src/expr.rs                   |  18 +++
 datafusion/expr/src/tree_node/expr.rs         |   2 +
 datafusion/expr/src/udwf.rs                   |   1 +
 datafusion/expr/src/utils.rs                  |  10 ++
 .../src/analyzer/count_wildcard_rule.rs       |   3 +
 .../optimizer/src/analyzer/type_coercion.rs   |   2 +
 .../optimizer/src/push_down_projection.rs     |   2 +
 .../physical-expr/src/window/lead_lag.rs      |  88 ++++++++++++--
 datafusion/physical-plan/src/windows/mod.rs   |   8 +-
 .../proto/src/logical_plan/from_proto.rs      |   6 +
 datafusion/proto/src/logical_plan/to_proto.rs |   2 +
 .../proto/src/physical_plan/from_proto.rs     |   1 +
 .../tests/cases/roundtrip_logical_plan.rs     |   6 +
 datafusion/sql/src/expr/function.rs           |  16 ++-
 datafusion/sqllogictest/test_files/window.slt | 107 ++++++++++++++++++
 .../substrait/src/logical_plan/consumer.rs    |   1 +
 .../substrait/src/logical_plan/producer.rs    |   1 +
 22 files changed, 272 insertions(+), 14 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 4ec16ac942b2..e407c477ae4c 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1685,6 +1685,7 @@ mod tests {
             vec![col("aggregate_test_100.c2")],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let t2 = t.select(vec![col("c1"), first_row])?;
         let plan = t2.plan.clone();
diff --git a/datafusion/core/src/physical_optimizer/test_utils.rs b/datafusion/core/src/physical_optimizer/test_utils.rs
index ca7fb78d21b1..3898fb6345f0 100644
--- a/datafusion/core/src/physical_optimizer/test_utils.rs
+++ b/datafusion/core/src/physical_optimizer/test_utils.rs
@@ -245,6 +245,7 @@ pub fn bounded_window_exec(
                 &sort_exprs,
                 Arc::new(WindowFrame::new(Some(false))),
                 schema.as_ref(),
+                false,
             )
             .unwrap()],
             input.clone(),
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index dabf0a91b2d3..23ac7e08cad8 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -100,6 +100,7 @@ use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use itertools::{multiunzip, Itertools};
 use log::{debug, trace};
+use sqlparser::ast::NullTreatment;
 
 fn create_function_physical_name(
     fun: &str,
@@ -1581,6 +1582,7 @@ pub fn create_window_expr_with_name(
             partition_by,
             order_by,
             window_frame,
+            null_treatment,
         }) => {
             let args = args
                 .iter()
@@ -1605,6 +1607,9 @@ pub fn create_window_expr_with_name(
             }
 
             let window_frame = Arc::new(window_frame.clone());
+            let ignore_nulls = null_treatment
+                .unwrap_or(sqlparser::ast::NullTreatment::RespectNulls)
+                == NullTreatment::IgnoreNulls;
             windows::create_window_expr(
                 fun,
                 name,
@@ -1613,6 +1618,7 @@ pub fn create_window_expr_with_name(
                 &order_by,
                 window_frame,
                 physical_input_schema,
+                ignore_nulls,
             )
         }
         other => plan_err!("Invalid window expression '{other:?}'"),
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index f650e9e39d88..b08b2b8fc7a2 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -182,6 +182,7 @@ async fn test_count_wildcard_on_window() -> Result<()> {
                 WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
                 WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
             ),
+            None,
         ))])?
         .explain(false, false)?
         .collect()
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index d22d0c0f2ee0..609d26c9c253 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -281,6 +281,7 @@ async fn bounded_window_causal_non_causal() -> Result<()> {
                     &orderby_exprs,
                     Arc::new(window_frame),
                     schema.as_ref(),
+                    false,
                 )?;
                 let running_window_exec = Arc::new(BoundedWindowAggExec::try_new(
                     vec![window_expr],
@@ -642,6 +643,7 @@ async fn run_window_test(
                 &orderby_exprs,
                 Arc::new(window_frame.clone()),
                 schema.as_ref(),
+                false,
             )
             .unwrap()],
             exec1,
@@ -664,6 +666,7 @@ async fn run_window_test(
                 &orderby_exprs,
                 Arc::new(window_frame.clone()),
                 schema.as_ref(),
+                false,
             )
             .unwrap()],
             exec2,
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 09de4b708de9..f40ccb6cdb58 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -30,6 +30,7 @@ use arrow::datatypes::DataType;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{internal_err, DFSchema, OwnedTableReference};
 use datafusion_common::{plan_err, Column, DataFusionError, Result, ScalarValue};
+use sqlparser::ast::NullTreatment;
 use std::collections::HashSet;
 use std::fmt;
 use std::fmt::{Display, Formatter, Write};
@@ -646,6 +647,7 @@ pub struct WindowFunction {
     pub order_by: Vec<Expr>,
     /// Window frame
     pub window_frame: window_frame::WindowFrame,
+    pub null_treatment: Option<NullTreatment>,
 }
 
 impl WindowFunction {
@@ -656,6 +658,7 @@ impl WindowFunction {
         partition_by: Vec<Expr>,
         order_by: Vec<Expr>,
         window_frame: window_frame::WindowFrame,
+        null_treatment: Option<NullTreatment>,
     ) -> Self {
         Self {
             fun,
@@ -663,6 +666,7 @@ impl WindowFunction {
             partition_by,
             order_by,
             window_frame,
+            null_treatment,
         }
     }
 }
@@ -1440,8 +1444,14 @@ impl fmt::Display for Expr {
                 partition_by,
                 order_by,
                 window_frame,
+                null_treatment,
             }) => {
                 fmt_function(f, &fun.to_string(), false, args, true)?;
+
+                if let Some(nt) = null_treatment {
+                    write!(f, "{}", nt)?;
+                }
+
                 if !partition_by.is_empty() {
                     write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
                 }
@@ -1768,15 +1778,23 @@ fn create_name(e: &Expr) -> Result<String> {
             window_frame,
             partition_by,
             order_by,
+            null_treatment,
         }) => {
             let mut parts: Vec<String> =
                 vec![create_function_name(&fun.to_string(), false, args)?];
+
+            if let Some(nt) = null_treatment {
+                parts.push(format!("{}", nt));
+            }
+
             if !partition_by.is_empty() {
                 parts.push(format!("PARTITION BY [{}]", expr_vec_fmt!(partition_by)));
             }
+
             if !order_by.is_empty() {
                 parts.push(format!("ORDER BY [{}]", expr_vec_fmt!(order_by)));
             }
+
             parts.push(format!("{window_frame}"));
             Ok(parts.join(" "))
         }
diff --git a/datafusion/expr/src/tree_node/expr.rs b/datafusion/expr/src/tree_node/expr.rs
index add15b3d7ad7..def25ed9242f 100644
--- a/datafusion/expr/src/tree_node/expr.rs
+++ b/datafusion/expr/src/tree_node/expr.rs
@@ -283,12 +283,14 @@ impl TreeNode for Expr {
                 partition_by,
                 order_by,
                 window_frame,
+                null_treatment,
             }) => Expr::WindowFunction(WindowFunction::new(
                 fun,
                 transform_vec(args, &mut transform)?,
                 transform_vec(partition_by, &mut transform)?,
                 transform_vec(order_by, &mut transform)?,
                 window_frame,
+                null_treatment,
             )),
             Expr::AggregateFunction(AggregateFunction {
                 args,
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 953483408865..7e3eb6c001a1 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -130,6 +130,7 @@ impl WindowUDF {
             partition_by,
             order_by,
             window_frame,
+            null_treatment: None,
         })
     }
 
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index e855554f3687..2fda81d8896f 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -1255,6 +1255,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
@@ -1262,6 +1263,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
@@ -1269,6 +1271,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let sum4 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Sum),
@@ -1276,6 +1279,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let exprs = &[max1.clone(), max2.clone(), min3.clone(), sum4.clone()];
         let result = group_window_expr_by_sort_keys(exprs.to_vec())?;
@@ -1298,6 +1302,7 @@ mod tests {
             vec![],
             vec![age_asc.clone(), name_desc.clone()],
             WindowFrame::new(Some(false)),
+            None,
         ));
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Max),
@@ -1305,6 +1310,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let min3 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Min),
@@ -1312,6 +1318,7 @@ mod tests {
             vec![],
             vec![age_asc.clone(), name_desc.clone()],
             WindowFrame::new(Some(false)),
+            None,
         ));
         let sum4 = Expr::WindowFunction(expr::WindowFunction::new(
             WindowFunctionDefinition::AggregateFunction(AggregateFunction::Sum),
@@ -1319,6 +1326,7 @@ mod tests {
             vec![],
             vec![name_desc.clone(), age_asc.clone(), created_at_desc.clone()],
             WindowFrame::new(Some(false)),
+            None,
         ));
         // FIXME use as_ref
         let exprs = &[max1.clone(), max2.clone(), min3.clone(), sum4.clone()];
@@ -1353,6 +1361,7 @@ mod tests {
                     Expr::Sort(expr::Sort::new(Box::new(col("name")), false, true)),
                 ],
                 WindowFrame::new(Some(false)),
+                None,
             )),
             Expr::WindowFunction(expr::WindowFunction::new(
                 WindowFunctionDefinition::AggregateFunction(AggregateFunction::Sum),
@@ -1364,6 +1373,7 @@ mod tests {
                     Expr::Sort(expr::Sort::new(Box::new(col("created_at")), false, true)),
                 ],
                 WindowFrame::new(Some(false)),
+                None,
             )),
         ];
         let expected = vec![
diff --git a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
index 35a859783239..9242e68562c6 100644
--- a/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
+++ b/datafusion/optimizer/src/analyzer/count_wildcard_rule.rs
@@ -128,6 +128,7 @@ impl TreeNodeRewriter for CountWildcardRewriter {
                 partition_by,
                 order_by,
                 window_frame,
+                null_treatment,
             }) if args.len() == 1 => match args[0] {
                 Expr::Wildcard { qualifier: None } => {
                     Expr::WindowFunction(expr::WindowFunction {
@@ -138,6 +139,7 @@ impl TreeNodeRewriter for CountWildcardRewriter {
                         partition_by,
                         order_by,
                         window_frame,
+                        null_treatment,
                     })
                 }
 
@@ -351,6 +353,7 @@ mod tests {
                     WindowFrameBound::Preceding(ScalarValue::UInt32(Some(6))),
                     WindowFrameBound::Following(ScalarValue::UInt32(Some(2))),
                 ),
+                None,
             ))])?
             .project(vec![count(wildcard())])?
             .build()?;
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index fba77047dd74..8cdb4d7dbdf6 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -392,6 +392,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                 partition_by,
                 order_by,
                 window_frame,
+                null_treatment,
             }) => {
                 let window_frame =
                     coerce_window_frame(window_frame, &self.schema, &order_by)?;
@@ -414,6 +415,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter {
                     partition_by,
                     order_by,
                     window_frame,
+                    null_treatment,
                 ));
                 Ok(expr)
             }
diff --git a/datafusion/optimizer/src/push_down_projection.rs b/datafusion/optimizer/src/push_down_projection.rs
index 6a003ecb5fa8..8b7a9148b590 100644
--- a/datafusion/optimizer/src/push_down_projection.rs
+++ b/datafusion/optimizer/src/push_down_projection.rs
@@ -587,6 +587,7 @@ mod tests {
             vec![col("test.b")],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
 
         let max2 = Expr::WindowFunction(expr::WindowFunction::new(
@@ -595,6 +596,7 @@ mod tests {
             vec![],
             vec![],
             WindowFrame::new(None),
+            None,
         ));
         let col1 = col(max1.display_name()?);
         let col2 = col(max2.display_name()?);
diff --git a/datafusion/physical-expr/src/window/lead_lag.rs b/datafusion/physical-expr/src/window/lead_lag.rs
index 6a33f26ca126..6e1aad575f6a 100644
--- a/datafusion/physical-expr/src/window/lead_lag.rs
+++ b/datafusion/physical-expr/src/window/lead_lag.rs
@@ -23,10 +23,14 @@ use crate::PhysicalExpr;
 use arrow::array::ArrayRef;
 use arrow::compute::cast;
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{arrow_datafusion_err, DataFusionError, Result, ScalarValue};
+use arrow_array::Array;
+use datafusion_common::{
+    arrow_datafusion_err, exec_datafusion_err, DataFusionError, Result, ScalarValue,
+};
 use datafusion_expr::PartitionEvaluator;
 use std::any::Any;
 use std::cmp::min;
+use std::collections::VecDeque;
 use std::ops::{Neg, Range};
 use std::sync::Arc;
 
@@ -39,6 +43,7 @@ pub struct WindowShift {
     shift_offset: i64,
     expr: Arc<dyn PhysicalExpr>,
     default_value: Option<ScalarValue>,
+    ignore_nulls: bool,
 }
 
 impl WindowShift {
@@ -60,6 +65,7 @@ pub fn lead(
     expr: Arc<dyn PhysicalExpr>,
     shift_offset: Option<i64>,
     default_value: Option<ScalarValue>,
+    ignore_nulls: bool,
 ) -> WindowShift {
     WindowShift {
         name,
@@ -67,6 +73,7 @@ pub fn lead(
         shift_offset: shift_offset.map(|v| v.neg()).unwrap_or(-1),
         expr,
         default_value,
+        ignore_nulls,
     }
 }
 
@@ -77,6 +84,7 @@ pub fn lag(
     expr: Arc<dyn PhysicalExpr>,
     shift_offset: Option<i64>,
     default_value: Option<ScalarValue>,
+    ignore_nulls: bool,
 ) -> WindowShift {
     WindowShift {
         name,
@@ -84,6 +92,7 @@ pub fn lag(
         shift_offset: shift_offset.unwrap_or(1),
         expr,
         default_value,
+        ignore_nulls,
     }
 }
 
@@ -110,6 +119,8 @@ impl BuiltInWindowFunctionExpr for WindowShift {
         Ok(Box::new(WindowShiftEvaluator {
             shift_offset: self.shift_offset,
             default_value: self.default_value.clone(),
+            ignore_nulls: self.ignore_nulls,
+            non_null_offsets: VecDeque::new(),
         }))
     }
 
@@ -120,6 +131,7 @@ impl BuiltInWindowFunctionExpr for WindowShift {
             shift_offset: -self.shift_offset,
             expr: self.expr.clone(),
             default_value: self.default_value.clone(),
+            ignore_nulls: self.ignore_nulls,
         }))
     }
 }
@@ -128,6 +140,16 @@ impl BuiltInWindowFunctionExpr for WindowShift {
 pub(crate) struct WindowShiftEvaluator {
     shift_offset: i64,
     default_value: Option<ScalarValue>,
+    ignore_nulls: bool,
+    // VecDeque contains offset values that between non-null entries
+    non_null_offsets: VecDeque<usize>,
+}
+
+impl WindowShiftEvaluator {
+    fn is_lag(&self) -> bool {
+        // Mode is LAG, when shift_offset is positive
+        self.shift_offset > 0
+    }
 }
 
 fn create_empty_array(
@@ -182,9 +204,13 @@ fn shift_with_default_value(
 
 impl PartitionEvaluator for WindowShiftEvaluator {
     fn get_range(&self, idx: usize, n_rows: usize) -> Result<Range<usize>> {
-        if self.shift_offset > 0 {
-            let offset = self.shift_offset as usize;
-            let start = idx.saturating_sub(offset);
+        if self.is_lag() {
+            let start = if self.non_null_offsets.len() == self.shift_offset as usize {
+                let offset: usize = self.non_null_offsets.iter().sum();
+                idx.saturating_sub(offset + 1)
+            } else {
+                0
+            };
             let end = idx + 1;
             Ok(Range { start, end })
         } else {
@@ -196,7 +222,7 @@ impl PartitionEvaluator for WindowShiftEvaluator {
 
     fn is_causal(&self) -> bool {
         // Lagging windows are causal by definition:
-        self.shift_offset > 0
+        self.is_lag()
     }
 
     fn evaluate(
@@ -204,17 +230,57 @@ impl PartitionEvaluator for WindowShiftEvaluator {
         values: &[ArrayRef],
         range: &Range<usize>,
     ) -> Result<ScalarValue> {
+        // TODO: try to get rid of i64 usize conversion
+        // TODO: do not recalculate default value every call
+        // TODO: support LEAD mode for IGNORE NULLS
         let array = &values[0];
         let dtype = array.data_type();
+        let len = array.len() as i64;
         // LAG mode
-        let idx = if self.shift_offset > 0 {
+        let mut idx = if self.is_lag() {
             range.end as i64 - self.shift_offset - 1
         } else {
             // LEAD mode
             range.start as i64 - self.shift_offset
         };
 
-        if idx < 0 || idx as usize >= array.len() {
+        // Support LAG only for now, as LEAD requires some brainstorm first
+        // LAG with IGNORE NULLS calculated as the current row index - offset, but only for non-NULL rows
+        // If current row index points to NULL value the row is NOT counted
+        if self.ignore_nulls && self.is_lag() {
+            // Find the nonNULL row index that shifted by offset comparing to current row index
+            idx = if self.non_null_offsets.len() == self.shift_offset as usize {
+                let total_offset: usize = self.non_null_offsets.iter().sum();
+                (range.end - 1 - total_offset) as i64
+            } else {
+                -1
+            };
+
+            // Keep track of offset values between non-null entries
+            if array.is_valid(range.end - 1) {
+                // Non-null add new offset
+                self.non_null_offsets.push_back(1);
+                if self.non_null_offsets.len() > self.shift_offset as usize {
+                    // WE do not need to keep track of more than `lag number of offset` values.
+                    self.non_null_offsets.pop_front();
+                }
+            } else if !self.non_null_offsets.is_empty() {
+                // Entry is null, increment offset value of the last entry.
+                let end_idx = self.non_null_offsets.len() - 1;
+                self.non_null_offsets[end_idx] += 1;
+            }
+        } else if self.ignore_nulls && !self.is_lag() {
+            // IGNORE NULLS and LEAD mode.
+            return Err(exec_datafusion_err!(
+                "IGNORE NULLS mode for LEAD is not supported for BoundedWindowAggExec"
+            ));
+        }
+
+        // Set the default value if
+        // - index is out of window bounds
+        // OR
+        // - ignore nulls mode and current value is null and is within window bounds
+        if idx < 0 || idx >= len || (self.ignore_nulls && array.is_null(idx as usize)) {
             get_default_value(self.default_value.as_ref(), dtype)
         } else {
             ScalarValue::try_from_array(array, idx as usize)
@@ -226,6 +292,11 @@ impl PartitionEvaluator for WindowShiftEvaluator {
         values: &[ArrayRef],
         _num_rows: usize,
     ) -> Result<ArrayRef> {
+        if self.ignore_nulls {
+            return Err(exec_datafusion_err!(
+                "IGNORE NULLS mode for LAG and LEAD is not supported for WindowAggExec"
+            ));
+        }
         // LEAD, LAG window functions take single column, values will have size 1
         let value = &values[0];
         shift_with_default_value(value, self.shift_offset, self.default_value.as_ref())
@@ -279,6 +350,7 @@ mod tests {
                 Arc::new(Column::new("c3", 0)),
                 None,
                 None,
+                false,
             ),
             [
                 Some(-2),
@@ -301,6 +373,7 @@ mod tests {
                 Arc::new(Column::new("c3", 0)),
                 None,
                 None,
+                false,
             ),
             [
                 None,
@@ -323,6 +396,7 @@ mod tests {
                 Arc::new(Column::new("c3", 0)),
                 None,
                 Some(ScalarValue::Int32(Some(100))),
+                false,
             ),
             [
                 Some(100),
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 693d20e90a66..bf6ed925356c 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -55,6 +55,7 @@ pub use datafusion_physical_expr::window::{
 };
 
 /// Create a physical expression for window function
+#[allow(clippy::too_many_arguments)]
 pub fn create_window_expr(
     fun: &WindowFunctionDefinition,
     name: String,
@@ -63,6 +64,7 @@ pub fn create_window_expr(
     order_by: &[PhysicalSortExpr],
     window_frame: Arc<WindowFrame>,
     input_schema: &Schema,
+    ignore_nulls: bool,
 ) -> Result<Arc<dyn WindowExpr>> {
     Ok(match fun {
         WindowFunctionDefinition::AggregateFunction(fun) => {
@@ -83,7 +85,7 @@ pub fn create_window_expr(
         }
         WindowFunctionDefinition::BuiltInWindowFunction(fun) => {
             Arc::new(BuiltInWindowExpr::new(
-                create_built_in_window_expr(fun, args, input_schema, name)?,
+                create_built_in_window_expr(fun, args, input_schema, name, ignore_nulls)?,
                 partition_by,
                 order_by,
                 window_frame,
@@ -159,6 +161,7 @@ fn create_built_in_window_expr(
     args: &[Arc<dyn PhysicalExpr>],
     input_schema: &Schema,
     name: String,
+    ignore_nulls: bool,
 ) -> Result<Arc<dyn BuiltInWindowFunctionExpr>> {
     // need to get the types into an owned vec for some reason
     let input_types: Vec<_> = args
@@ -208,6 +211,7 @@ fn create_built_in_window_expr(
                 arg,
                 shift_offset,
                 default_value,
+                ignore_nulls,
             ))
         }
         BuiltInWindowFunction::Lead => {
@@ -222,6 +226,7 @@ fn create_built_in_window_expr(
                 arg,
                 shift_offset,
                 default_value,
+                ignore_nulls,
             ))
         }
         BuiltInWindowFunction::NthValue => {
@@ -671,6 +676,7 @@ mod tests {
                 &[],
                 Arc::new(WindowFrame::new(None)),
                 schema.as_ref(),
+                false,
             )?],
             blocking_exec,
             vec![],
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index f1ee84a8221d..2554018a9273 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -1100,6 +1100,8 @@ pub fn parse_expr(
                         "missing window frame during deserialization".to_string(),
                     )
                 })?;
+            // TODO: support proto for null treatment
+            let null_treatment = None;
             regularize_window_order_by(&window_frame, &mut order_by)?;
 
             match window_function {
@@ -1114,6 +1116,7 @@ pub fn parse_expr(
                         partition_by,
                         order_by,
                         window_frame,
+                        None
                     )))
                 }
                 window_expr_node::WindowFunction::BuiltInFunction(i) => {
@@ -1133,6 +1136,7 @@ pub fn parse_expr(
                         partition_by,
                         order_by,
                         window_frame,
+                        null_treatment
                     )))
                 }
                 window_expr_node::WindowFunction::Udaf(udaf_name) => {
@@ -1148,6 +1152,7 @@ pub fn parse_expr(
                         partition_by,
                         order_by,
                         window_frame,
+                        None,
                     )))
                 }
                 window_expr_node::WindowFunction::Udwf(udwf_name) => {
@@ -1163,6 +1168,7 @@ pub fn parse_expr(
                         partition_by,
                         order_by,
                         window_frame,
+                        None,
                     )))
                 }
             }
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index a6348e909cb0..ccadbb217a58 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -606,6 +606,8 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode {
                 ref partition_by,
                 ref order_by,
                 ref window_frame,
+                // TODO: support null treatment in proto
+                null_treatment: _,
             }) => {
                 let window_function = match fun {
                     WindowFunctionDefinition::AggregateFunction(fun) => {
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 628ee5ad9b7a..af0aa485c348 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -176,6 +176,7 @@ pub fn parse_physical_window_expr(
         &order_by,
         Arc::new(window_frame),
         input_schema,
+        false,
     )
 }
 
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 81f59975476f..6ca757908159 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -1718,6 +1718,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         WindowFrame::new(Some(false)),
+        None,
     ));
 
     // 2. with default window_frame
@@ -1729,6 +1730,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         WindowFrame::new(Some(false)),
+        None,
     ));
 
     // 3. with window_frame with row numbers
@@ -1746,6 +1748,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         range_number_frame,
+        None,
     ));
 
     // 4. test with AggregateFunction
@@ -1761,6 +1764,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         row_number_frame.clone(),
+        None,
     ));
 
     // 5. test with AggregateUDF
@@ -1812,6 +1816,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         row_number_frame.clone(),
+        None,
     ));
     ctx.register_udaf(dummy_agg);
 
@@ -1887,6 +1892,7 @@ fn roundtrip_window() {
         vec![col("col1")],
         vec![col("col2")],
         row_number_frame,
+        None,
     ));
 
     ctx.register_udwf(dummy_window_udf);
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index 64b8d6957d2b..f56138066cb6 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -52,8 +52,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             order_by,
         } = function;
 
-        if let Some(null_treatment) = null_treatment {
-            return not_impl_err!("Null treatment in aggregate functions is not supported: {null_treatment}");
+        // If function is a window function (it has an OVER clause),
+        // it shouldn't have ordering requirement as function argument
+        // required ordering should be defined in OVER clause.
+        let is_function_window = over.is_some();
+
+        match null_treatment {
+            Some(null_treatment) if !is_function_window => return not_impl_err!("Null treatment in aggregate functions is not supported: {null_treatment}"),
+            _ => {}
         }
 
         let name = if name.0.len() > 1 {
@@ -120,10 +126,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             return Ok(Expr::ScalarFunction(ScalarFunction::new(fun, args)));
         };
 
-        // If function is a window function (it has an OVER clause),
-        // it shouldn't have ordering requirement as function argument
-        // required ordering should be defined in OVER clause.
-        let is_function_window = over.is_some();
         if !order_by.is_empty() && is_function_window {
             return plan_err!(
                 "Aggregate ORDER BY is not implemented for window functions"
@@ -198,6 +200,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                             partition_by,
                             order_by,
                             window_frame,
+                            null_treatment,
                         ))
                     }
                     _ => Expr::WindowFunction(expr::WindowFunction::new(
@@ -206,6 +209,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                         partition_by,
                         order_by,
                         window_frame,
+                        null_treatment,
                     )),
                 };
                 return Ok(expr);
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index 9276f6e1e325..8d6b314747bb 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -4102,3 +4102,110 @@ ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRE
 ----------CoalesceBatchesExec: target_batch_size=4096
 ------------FilterExec: a@0 = 1
 --------------MemoryExec: partitions=1, partition_sizes=[1]
+
+# LAG window function IGNORE/RESPECT NULLS support with ascending order and default offset 1
+query TTTTTT
+select lag(a) ignore nulls over (order by id) as x,
+       lag(a, 1, null) ignore nulls over (order by id) as x1,
+       lag(a, 1, 'def') ignore nulls over (order by id) as x2,
+       lag(a) respect nulls over (order by id) as x3,
+       lag(a, 1, null) respect nulls over (order by id) as x4,
+       lag(a, 1, 'def') respect nulls over (order by id) as x5
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+----
+NULL NULL def NULL NULL def
+NULL NULL def NULL NULL NULL
+b b b b b b
+b b b NULL NULL NULL
+
+# LAG window function IGNORE/RESPECT NULLS support with descending order and default offset 1
+query TTTTTT
+select lag(a) ignore nulls over (order by id desc) as x,
+       lag(a, 1, null) ignore nulls over (order by id desc) as x1,
+       lag(a, 1, 'def') ignore nulls over (order by id desc) as x2,
+       lag(a) respect nulls over (order by id desc) as x3,
+       lag(a, 1, null) respect nulls over (order by id desc) as x4,
+       lag(a, 1, 'def') respect nulls over (order by id desc) as x5
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+----
+NULL NULL def NULL NULL def
+x x x x x x
+x x x NULL NULL NULL
+b b b b b b
+
+# LAG window function IGNORE/RESPECT NULLS support with ascending order and nondefault offset
+query TTTT
+select lag(a, 2, null) ignore nulls over (order by id) as x1,
+       lag(a, 2, 'def') ignore nulls over (order by id) as x2,
+       lag(a, 2, null) respect nulls over (order by id) as x4,
+       lag(a, 2, 'def') respect nulls over (order by id) as x5
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+----
+NULL def NULL def
+NULL def NULL def
+NULL def NULL NULL
+NULL def b b
+
+# LAG window function IGNORE/RESPECT NULLS support with descending order and nondefault offset
+query TTTT
+select lag(a, 2, null) ignore nulls over (order by id desc) as x1,
+       lag(a, 2, 'def') ignore nulls over (order by id desc) as x2,
+       lag(a, 2, null) respect nulls over (order by id desc) as x4,
+       lag(a, 2, 'def') respect nulls over (order by id desc) as x5
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+----
+NULL def NULL def
+NULL def NULL def
+NULL def x x
+x x NULL NULL
+
+# LAG window function IGNORE/RESPECT NULLS support with descending order and nondefault offset.
+# To trigger WindowAggExec, we added a sum window function with all of the ranges.
+statement error Execution error: IGNORE NULLS mode for LAG and LEAD is not supported for WindowAggExec
+select lag(a, 2, null) ignore nulls over (order by id desc) as x1,
+       lag(a, 2, 'def') ignore nulls over (order by id desc) as x2,
+       lag(a, 2, null) respect nulls over (order by id desc) as x4,
+       lag(a, 2, 'def') respect nulls over (order by id desc) as x5,
+       sum(id) over (order by id desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as sum_id
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+
+# LEAD window function IGNORE/RESPECT NULLS support with descending order and nondefault offset
+statement error Execution error: IGNORE NULLS mode for LEAD is not supported for BoundedWindowAggExec
+select lead(a, 2, null) ignore nulls over (order by id desc) as x1,
+       lead(a, 2, 'def') ignore nulls over (order by id desc) as x2,
+       lead(a, 2, null) respect nulls over (order by id desc) as x4,
+       lead(a, 2, 'def') respect nulls over (order by id desc) as x5
+from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x')
+
+statement ok
+set datafusion.execution.batch_size = 1000;
+
+query I
+SELECT LAG(c1, 2) IGNORE NULLS OVER()
+FROM null_cases
+ORDER BY c2
+LIMIT 5;
+----
+78
+63
+3
+24
+14
+
+# result should be same with above, when lag algorithm work with pruned data.
+# decreasing batch size, causes data to be produced in smaller chunks at the source.
+# Hence sliding window algorithm is used during calculations.
+statement ok
+set datafusion.execution.batch_size = 1;
+
+query I
+SELECT LAG(c1, 2) IGNORE NULLS OVER()
+FROM null_cases
+ORDER BY c2
+LIMIT 5;
+----
+78
+63
+3
+24
+14
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 58a741c63401..23a7ee05d73e 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -978,6 +978,7 @@ pub async fn from_substrait_rex(
                     from_substrait_bound(&window.lower_bound, true)?,
                     from_substrait_bound(&window.upper_bound, false)?,
                 ),
+                null_treatment: None,
             })))
         }
         Some(RexType::Subquery(subquery)) => match &subquery.as_ref().subquery_type {
diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs
index fc9517c90a45..9b29c0c67765 100644
--- a/datafusion/substrait/src/logical_plan/producer.rs
+++ b/datafusion/substrait/src/logical_plan/producer.rs
@@ -1115,6 +1115,7 @@ pub fn to_substrait_rex(
             partition_by,
             order_by,
             window_frame,
+            null_treatment: _,
         }) => {
             // function reference
             let function_anchor = _register_function(fun.to_string(), extension_info);

From ae4113dc4e15fc6aea5eb99c91c63c63b305eb1b Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Thu, 22 Feb 2024 23:15:11 -0600
Subject: [PATCH 06/45] fix: issue #9213 substitute ArrayAgg to NthValue to
 optimize query plan (#9295)

* fix: issue #9213 substitute ArrayAgg to NthValue to optimize query plan

* fix format

* adding type check

* adding test
---
 datafusion/sql/src/expr/mod.rs                |  46 +++++++-
 .../test_files/agg_func_substitute.slt        | 100 ++++++++++++++++++
 2 files changed, 142 insertions(+), 4 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/agg_func_substitute.slt

diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index ecf510da7bce..da6c3a6074d4 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -203,9 +203,44 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             }
 
             SQLExpr::ArrayIndex { obj, indexes } => {
+                fn is_unsupported(expr: &SQLExpr) -> bool {
+                    matches!(expr, SQLExpr::JsonAccess { .. })
+                }
+                fn simplify_array_index_expr(expr: Expr, index: Expr) -> (Expr, bool) {
+                    match &expr {
+                        Expr::AggregateFunction(agg_func) if agg_func.func_def == datafusion_expr::expr::AggregateFunctionDefinition::BuiltIn(AggregateFunction::ArrayAgg) => {
+                            let mut new_args = agg_func.args.clone();
+                            new_args.push(index.clone());
+                            (Expr::AggregateFunction(datafusion_expr::expr::AggregateFunction::new(
+                                datafusion_expr::AggregateFunction::NthValue,
+                                new_args,
+                                agg_func.distinct,
+                                agg_func.filter.clone(),
+                                agg_func.order_by.clone(),
+                            )), true)
+                        },
+                        _ => (expr, false),
+                    }
+                }
                 let expr =
                     self.sql_expr_to_logical_expr(*obj, schema, planner_context)?;
-                self.plan_indexed(expr, indexes, schema, planner_context)
+                if indexes.len() > 1 || is_unsupported(&indexes[0]) {
+                    return self.plan_indexed(expr, indexes, schema, planner_context);
+                }
+                let (new_expr, changed) = simplify_array_index_expr(
+                    expr,
+                    self.sql_expr_to_logical_expr(
+                        indexes[0].clone(),
+                        schema,
+                        planner_context,
+                    )?,
+                );
+
+                if changed {
+                    Ok(new_expr)
+                } else {
+                    self.plan_indexed(new_expr, indexes, schema, planner_context)
+                }
             }
 
             SQLExpr::CompoundIdentifier(ids) => {
@@ -557,7 +592,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             limit,
             within_group,
         } = array_agg;
-
         let order_by = if let Some(order_by) = order_by {
             Some(self.order_by_to_sort_expr(
                 &order_by,
@@ -581,10 +615,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             vec![self.sql_expr_to_logical_expr(*expr, input_schema, planner_context)?];
 
         // next, aggregate built-ins
-        let fun = AggregateFunction::ArrayAgg;
         Ok(Expr::AggregateFunction(expr::AggregateFunction::new(
-            fun, args, distinct, None, order_by,
+            AggregateFunction::ArrayAgg,
+            args,
+            distinct,
+            None,
+            order_by,
         )))
+        // see if we can rewrite it into NTH-VALUE
     }
 
     fn sql_in_list_to_expr(
diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
new file mode 100644
index 000000000000..650ec1ad8e5c
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#######
+# Setup test data table
+#######
+statement ok
+CREATE EXTERNAL TABLE multiple_ordered_table (
+  a0 INTEGER,
+  a INTEGER,
+  b INTEGER,
+  c INTEGER,
+  d INTEGER
+)
+STORED AS CSV
+WITH HEADER ROW
+WITH ORDER (a ASC, b ASC)
+WITH ORDER (c ASC)
+LOCATION '../../datafusion/core/tests/data/window_2.csv';
+
+
+query TT
+EXPLAIN SELECT a, ARRAY_AGG(c ORDER BY c)[1] as result
+                        FROM multiple_ordered_table
+                        GROUP BY a;
+----
+logical_plan
+Projection: multiple_ordered_table.a, NTH_VALUE(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS result
+--Aggregate: groupBy=[[multiple_ordered_table.a]], aggr=[[NTH_VALUE(multiple_ordered_table.c, Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]]
+----TableScan: multiple_ordered_table projection=[a, c]
+physical_plan
+ProjectionExec: expr=[a@0 as a, NTH_VALUE(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
+--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1))], ordering_mode=Sorted
+----SortExec: expr=[a@0 ASC NULLS LAST]
+------CoalesceBatchesExec: target_batch_size=8192
+--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1))], ordering_mode=Sorted
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], has_header=true
+
+
+query TT
+EXPLAIN SELECT a, NTH_VALUE(c, 1 ORDER BY c) as result
+                        FROM multiple_ordered_table
+                        GROUP BY a;
+----
+logical_plan
+Projection: multiple_ordered_table.a, NTH_VALUE(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS result
+--Aggregate: groupBy=[[multiple_ordered_table.a]], aggr=[[NTH_VALUE(multiple_ordered_table.c, Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]]
+----TableScan: multiple_ordered_table projection=[a, c]
+physical_plan
+ProjectionExec: expr=[a@0 as a, NTH_VALUE(multiple_ordered_table.c,Int64(1)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
+--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1))], ordering_mode=Sorted
+----SortExec: expr=[a@0 ASC NULLS LAST]
+------CoalesceBatchesExec: target_batch_size=8192
+--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1))], ordering_mode=Sorted
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], has_header=true
+
+query TT
+EXPLAIN SELECT a, ARRAY_AGG(c ORDER BY c)[1 + 100] as result
+                        FROM multiple_ordered_table
+                        GROUP BY a;
+----
+logical_plan
+Projection: multiple_ordered_table.a, NTH_VALUE(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS result
+--Aggregate: groupBy=[[multiple_ordered_table.a]], aggr=[[NTH_VALUE(multiple_ordered_table.c, Int64(101)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] AS NTH_VALUE(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]]]
+----TableScan: multiple_ordered_table projection=[a, c]
+physical_plan
+ProjectionExec: expr=[a@0 as a, NTH_VALUE(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]@1 as result]
+--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+----SortExec: expr=[a@0 ASC NULLS LAST]
+------CoalesceBatchesExec: target_batch_size=8192
+--------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[NTH_VALUE(multiple_ordered_table.c,Int64(1) + Int64(100)) ORDER BY [multiple_ordered_table.c ASC NULLS LAST]], ordering_mode=Sorted
+------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], has_header=true
+
+query II
+SELECT a, ARRAY_AGG(c ORDER BY c)[1] as result
+                        FROM multiple_ordered_table
+                        GROUP BY a;
+----
+0 0
+1 50

From fad60615623079ee733c6e48f3ef4749d001ac19 Mon Sep 17 00:00:00 2001
From: comphead <comphead@users.noreply.github.com>
Date: Thu, 22 Feb 2024 22:38:00 -0800
Subject: [PATCH 07/45] Minor: Adding missing fields to debug for (#9325)

---
 datafusion/physical-expr/src/scalar_function.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datafusion/physical-expr/src/scalar_function.rs b/datafusion/physical-expr/src/scalar_function.rs
index b73626aa4340..bfe0fdb279f5 100644
--- a/datafusion/physical-expr/src/scalar_function.rs
+++ b/datafusion/physical-expr/src/scalar_function.rs
@@ -69,6 +69,8 @@ impl Debug for ScalarFunctionExpr {
             .field("name", &self.name)
             .field("args", &self.args)
             .field("return_type", &self.return_type)
+            .field("monotonicity", &self.monotonicity)
+            .field("supports_zero_argument", &self.supports_zero_argument)
             .finish()
     }
 }

From 3b355c798a3258f118016b33f26c5a55fed36220 Mon Sep 17 00:00:00 2001
From: SteveLauC <stevelauc@outlook.com>
Date: Fri, 23 Feb 2024 17:58:38 +0800
Subject: [PATCH 08/45] docs: document range() alias generate_series() (#9321)

---
 docs/source/user-guide/sql/scalar_functions.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index a6e872cab4c1..707e8c24b326 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1935,6 +1935,7 @@ from_unixtime(expression)
 - [array_to_string](#array_to_string)
 - [cardinality](#cardinality)
 - [empty](#empty)
+- [generate_series](#generate_series)
 - [list_append](#list_append)
 - [list_sort](#list_sort)
 - [list_cat](#list_cat)
@@ -2881,6 +2882,10 @@ empty(array)
 +------------------+
 ```
 
+### `generate_series`
+
+_Alias of [range](#range)._
+
 ### `list_append`
 
 _Alias of [array_append](#array_append)._
@@ -3090,6 +3095,10 @@ Step can not be 0 (then the range will be nonsense.).
 - **end**: end of the range (not included)
 - **step**: increase by step (can not be 0)
 
+#### Aliases
+
+- generate_series
+
 ## Struct Functions
 
 - [struct](#struct)

From 73c08b5045fc627c3ed81d1bcc5234a7436a8e38 Mon Sep 17 00:00:00 2001
From: universalmind303 <cory.grinstead@gmail.com>
Date: Sat, 24 Feb 2024 10:25:04 -0600
Subject: [PATCH 09/45] chore: statically link xz2 (#9311)

* chore: statically link xz2

* toml fmt
---
 datafusion/core/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 874e4551323d..506be6667ef2 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -116,7 +116,7 @@ tokio = { workspace = true }
 tokio-util = { version = "0.7.4", features = ["io"], optional = true }
 url = { workspace = true }
 uuid = { version = "1.0", features = ["v4"] }
-xz2 = { version = "0.1", optional = true }
+xz2 = { version = "0.1", optional = true, features = ["static"] }
 zstd = { version = "0.13", optional = true, default-features = false }
 
 [dev-dependencies]

From ff36f6d99285404cf45f870ccc1c650411fdb335 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Sat, 24 Feb 2024 12:20:53 -0600
Subject: [PATCH 10/45] fix:  throw error when incursion happen in dataframe
 api (#9330)

* fix: issue #9327 throw error when incursion happen in dataframe api

* fix
---
 datafusion/core/src/dataframe/mod.rs | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index e407c477ae4c..3a60d57f6685 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -49,8 +49,8 @@ use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
-    Column, DFSchema, DataFusionError, FileType, FileTypeWriterOptions, ParamValues,
-    SchemaError, UnnestOptions,
+    plan_err, Column, DFSchema, DataFusionError, FileType, FileTypeWriterOptions,
+    ParamValues, SchemaError, UnnestOptions,
 };
 use datafusion_expr::dml::CopyOptions;
 use datafusion_expr::{
@@ -1044,6 +1044,9 @@ impl DataFrame {
     /// # }
     /// ```
     pub fn explain(self, verbose: bool, analyze: bool) -> Result<DataFrame> {
+        if matches!(self.plan, LogicalPlan::Explain(_)) {
+            return plan_err!("Nested EXPLAINs are not supported");
+        }
         let plan = LogicalPlanBuilder::from(self.plan)
             .explain(verbose, analyze)?
             .build()?;
@@ -2975,4 +2978,15 @@ mod tests {
 
         Ok(())
     }
+    #[tokio::test]
+    async fn nested_explain_should_fail() -> Result<()> {
+        let ctx = SessionContext::new();
+        // must be error
+        let mut result = ctx.sql("explain select 1").await?.explain(false, false);
+        assert!(result.is_err());
+        // must be error
+        result = ctx.sql("explain explain select 1").await;
+        assert!(result.is_err());
+        Ok(())
+    }
 }

From 148b4d22f1d6de2bc6269ab96ba2e48e4735a9f9 Mon Sep 17 00:00:00 2001
From: Hoang Pham <a2k40pbc@gmail.com>
Date: Sun, 25 Feb 2024 20:36:56 +0700
Subject: [PATCH 11/45] Support CopyTo::partition_by in datafusion proto
 (#9306)

* add support for CopyTo::partition_by in proto

Signed-off-by: Hoang Pham <a2k40pbc@gmail.com>

* simplify partition_by logic

Signed-off-by: Hoang Pham <a2k40pbc@gmail.com>

---------

Signed-off-by: Hoang Pham <a2k40pbc@gmail.com>
---
 datafusion/expr/src/logical_plan/dml.rs        |  2 +-
 datafusion/proto/proto/datafusion.proto        |  1 +
 datafusion/proto/src/generated/pbjson.rs       | 18 ++++++++++++++++++
 datafusion/proto/src/generated/prost.rs        |  2 ++
 datafusion/proto/src/logical_plan/mod.rs       |  6 ++++--
 .../tests/cases/roundtrip_logical_plan.rs      | 11 +++++++----
 6 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/datafusion/expr/src/logical_plan/dml.rs b/datafusion/expr/src/logical_plan/dml.rs
index a55781eda643..7f04bd8973d6 100644
--- a/datafusion/expr/src/logical_plan/dml.rs
+++ b/datafusion/expr/src/logical_plan/dml.rs
@@ -36,7 +36,7 @@ pub struct CopyTo {
     pub output_url: String,
     /// The file format to output (explicitly defined or inferred from file extension)
     pub file_format: FileType,
-    /// Detmines which, if any, columns should be used for hive-style partitioned writes
+    /// Determines which, if any, columns should be used for hive-style partitioned writes
     pub partition_by: Vec<String>,
     /// Arbitrary options as tuples
     pub copy_options: CopyOptions,
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index e779e29cb8da..7673ce86ae1d 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -327,6 +327,7 @@ message CopyToNode {
         FileTypeWriterOptions writer_options = 5;
     }
     string file_type = 6;
+    repeated string partition_by = 7;
 }
 
 message SQLOptions {
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index f5f15aa3e428..65483f9ac467 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -3795,6 +3795,9 @@ impl serde::Serialize for CopyToNode {
         if !self.file_type.is_empty() {
             len += 1;
         }
+        if !self.partition_by.is_empty() {
+            len += 1;
+        }
         if self.copy_options.is_some() {
             len += 1;
         }
@@ -3808,6 +3811,9 @@ impl serde::Serialize for CopyToNode {
         if !self.file_type.is_empty() {
             struct_ser.serialize_field("fileType", &self.file_type)?;
         }
+        if !self.partition_by.is_empty() {
+            struct_ser.serialize_field("partitionBy", &self.partition_by)?;
+        }
         if let Some(v) = self.copy_options.as_ref() {
             match v {
                 copy_to_node::CopyOptions::SqlOptions(v) => {
@@ -3833,6 +3839,8 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
             "outputUrl",
             "file_type",
             "fileType",
+            "partition_by",
+            "partitionBy",
             "sql_options",
             "sqlOptions",
             "writer_options",
@@ -3844,6 +3852,7 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
             Input,
             OutputUrl,
             FileType,
+            PartitionBy,
             SqlOptions,
             WriterOptions,
         }
@@ -3870,6 +3879,7 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                             "input" => Ok(GeneratedField::Input),
                             "outputUrl" | "output_url" => Ok(GeneratedField::OutputUrl),
                             "fileType" | "file_type" => Ok(GeneratedField::FileType),
+                            "partitionBy" | "partition_by" => Ok(GeneratedField::PartitionBy),
                             "sqlOptions" | "sql_options" => Ok(GeneratedField::SqlOptions),
                             "writerOptions" | "writer_options" => Ok(GeneratedField::WriterOptions),
                             _ => Err(serde::de::Error::unknown_field(value, FIELDS)),
@@ -3894,6 +3904,7 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                 let mut input__ = None;
                 let mut output_url__ = None;
                 let mut file_type__ = None;
+                let mut partition_by__ = None;
                 let mut copy_options__ = None;
                 while let Some(k) = map_.next_key()? {
                     match k {
@@ -3915,6 +3926,12 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                             }
                             file_type__ = Some(map_.next_value()?);
                         }
+                        GeneratedField::PartitionBy => {
+                            if partition_by__.is_some() {
+                                return Err(serde::de::Error::duplicate_field("partitionBy"));
+                            }
+                            partition_by__ = Some(map_.next_value()?);
+                        }
                         GeneratedField::SqlOptions => {
                             if copy_options__.is_some() {
                                 return Err(serde::de::Error::duplicate_field("sqlOptions"));
@@ -3935,6 +3952,7 @@ impl<'de> serde::Deserialize<'de> for CopyToNode {
                     input: input__,
                     output_url: output_url__.unwrap_or_default(),
                     file_type: file_type__.unwrap_or_default(),
+                    partition_by: partition_by__.unwrap_or_default(),
                     copy_options: copy_options__,
                 })
             }
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 69d035239cb8..a567269e3356 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -511,6 +511,8 @@ pub struct CopyToNode {
     pub output_url: ::prost::alloc::string::String,
     #[prost(string, tag = "6")]
     pub file_type: ::prost::alloc::string::String,
+    #[prost(string, repeated, tag = "7")]
+    pub partition_by: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
     #[prost(oneof = "copy_to_node::CopyOptions", tags = "4, 5")]
     pub copy_options: ::core::option::Option<copy_to_node::CopyOptions>,
 }
diff --git a/datafusion/proto/src/logical_plan/mod.rs b/datafusion/proto/src/logical_plan/mod.rs
index aaaf165e3276..f107af757a71 100644
--- a/datafusion/proto/src/logical_plan/mod.rs
+++ b/datafusion/proto/src/logical_plan/mod.rs
@@ -913,12 +913,13 @@ impl AsLogicalPlan for LogicalPlanNode {
                     }
                     None => return Err(proto_error("CopyTo missing CopyOptions")),
                 };
+
                 Ok(datafusion_expr::LogicalPlan::Copy(
                     datafusion_expr::dml::CopyTo {
                         input: Arc::new(input),
                         output_url: copy.output_url.clone(),
                         file_format: FileType::from_str(&copy.file_type)?,
-                        partition_by: vec![],
+                        partition_by: copy.partition_by.clone(),
                         copy_options,
                     },
                 ))
@@ -1642,7 +1643,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 output_url,
                 file_format,
                 copy_options,
-                partition_by: _,
+                partition_by,
             }) => {
                 let input = protobuf::LogicalPlanNode::try_from_logical_plan(
                     input,
@@ -1726,6 +1727,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                             output_url: output_url.to_string(),
                             file_type: file_format.to_string(),
                             copy_options: copy_options_proto,
+                            partition_by: partition_by.clone(),
                         },
                     ))),
                 })
diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
index 6ca757908159..e3bd2cb1dc47 100644
--- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs
@@ -324,7 +324,7 @@ async fn roundtrip_logical_plan_copy_to_sql_options() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
         file_format: FileType::CSV,
-        partition_by: vec![],
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
         copy_options: CopyOptions::SQLOptions(StatementOptions::from(&options)),
     });
 
@@ -355,7 +355,7 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.parquet".to_string(),
         file_format: FileType::PARQUET,
-        partition_by: vec![],
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
         copy_options: CopyOptions::WriterOptions(Box::new(
             FileTypeWriterOptions::Parquet(ParquetWriterOptions::new(writer_properties)),
         )),
@@ -369,6 +369,7 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.parquet", copy_to.output_url);
             assert_eq!(FileType::PARQUET, copy_to.file_format);
+            assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
             match &copy_to.copy_options {
                 CopyOptions::WriterOptions(y) => match y.as_ref() {
                     FileTypeWriterOptions::Parquet(p) => {
@@ -404,7 +405,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.arrow".to_string(),
         file_format: FileType::ARROW,
-        partition_by: vec![],
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
         copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow(
             ArrowWriterOptions::new(),
         ))),
@@ -418,6 +419,7 @@ async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.arrow", copy_to.output_url);
             assert_eq!(FileType::ARROW, copy_to.file_format);
+            assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
             match &copy_to.copy_options {
                 CopyOptions::WriterOptions(y) => match y.as_ref() {
                     FileTypeWriterOptions::Arrow(_) => {}
@@ -450,7 +452,7 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
         input: Arc::new(input),
         output_url: "test.csv".to_string(),
         file_format: FileType::CSV,
-        partition_by: vec![],
+        partition_by: vec!["a".to_string(), "b".to_string(), "c".to_string()],
         copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::CSV(
             CsvWriterOptions::new(
                 writer_properties,
@@ -467,6 +469,7 @@ async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
         LogicalPlan::Copy(copy_to) => {
             assert_eq!("test.csv", copy_to.output_url);
             assert_eq!(FileType::CSV, copy_to.file_format);
+            assert_eq!(vec!["a", "b", "c"], copy_to.partition_by);
             match &copy_to.copy_options {
                 CopyOptions::WriterOptions(y) => match y.as_ref() {
                     FileTypeWriterOptions::CSV(p) => {

From 10cbb056a31f409fb1303df4601d777cedd3aaed Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Sun, 25 Feb 2024 10:18:35 -0700
Subject: [PATCH 12/45] Add test to prevent circular dependencies from being
 added (#9292)

---
 datafusion/core/Cargo.toml        |   1 +
 datafusion/core/tests/depcheck.rs |  78 +++++
 dev/release/crate-deps.dot        | 101 ++++--
 dev/release/crate-deps.svg        | 524 +++++++++++++++++++++---------
 4 files changed, 524 insertions(+), 180 deletions(-)
 create mode 100644 datafusion/core/tests/depcheck.rs

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 506be6667ef2..c3bd89037cfe 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -122,6 +122,7 @@ zstd = { version = "0.13", optional = true, default-features = false }
 [dev-dependencies]
 async-trait = { workspace = true }
 bigdecimal = { workspace = true }
+cargo = "0.77.0"
 criterion = { version = "0.5", features = ["async_tokio"] }
 csv = "1.1.6"
 ctor = { workspace = true }
diff --git a/datafusion/core/tests/depcheck.rs b/datafusion/core/tests/depcheck.rs
new file mode 100644
index 000000000000..94448818691e
--- /dev/null
+++ b/datafusion/core/tests/depcheck.rs
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Check for circular dependencies between DataFusion crates
+use std::collections::{HashMap, HashSet};
+use std::env;
+use std::path::Path;
+
+use cargo::util::config::Config;
+#[test]
+fn test_deps() -> Result<(), Box<dyn std::error::Error>> {
+    let config = Config::default()?;
+    let path = env::var("CARGO_MANIFEST_DIR").unwrap();
+    let dir = Path::new(&path);
+    let root_cargo_toml = dir.join("Cargo.toml");
+    let workspace = cargo::core::Workspace::new(&root_cargo_toml, &config)?;
+    let (_, resolve) = cargo::ops::resolve_ws(&workspace)?;
+
+    let mut package_deps = HashMap::new();
+    for package_id in resolve
+        .iter()
+        .filter(|id| id.name().starts_with("datafusion"))
+    {
+        let deps: Vec<String> = resolve
+            .deps(package_id)
+            .filter(|(package_id, _)| package_id.name().starts_with("datafusion"))
+            .map(|(package_id, _)| package_id.name().to_string())
+            .collect();
+        package_deps.insert(package_id.name().to_string(), deps);
+    }
+
+    // check for circular dependencies
+    for (root_package, deps) in &package_deps {
+        let mut seen = HashSet::new();
+        for dep in deps {
+            check_circular_deps(root_package, dep, &package_deps, &mut seen);
+        }
+    }
+
+    Ok(())
+}
+
+fn check_circular_deps(
+    root_package: &str,
+    current_dep: &str,
+    package_deps: &HashMap<String, Vec<String>>,
+    seen: &mut HashSet<String>,
+) {
+    if root_package == current_dep {
+        panic!(
+            "circular dependency detected from {root_package} to self via one of {:?}",
+            seen
+        );
+    }
+    if seen.contains(current_dep) {
+        return;
+    }
+    seen.insert(current_dep.to_string());
+    if let Some(deps) = package_deps.get(current_dep) {
+        for dep in deps {
+            check_circular_deps(root_package, dep, package_deps, seen);
+        }
+    }
+}
diff --git a/dev/release/crate-deps.dot b/dev/release/crate-deps.dot
index 618eb56afb75..69811c7d6109 100644
--- a/dev/release/crate-deps.dot
+++ b/dev/release/crate-deps.dot
@@ -16,39 +16,76 @@
 // under the License.
 
 digraph G {
-
-    datafusion_common
-
-	datafusion_expr -> datafusion_common
-
-	datafusion_sql -> datafusion_common
-	datafusion_sql -> datafusion_expr
-
-	datafusion_optimizer -> datafusion_common
-	datafusion_optimizer -> datafusion_expr
-
-	datafusion_physical_expr -> datafusion_common
-	datafusion_physical_expr -> datafusion_expr
-
-        datafusion_execution -> datafusion_common
-        datafusion_execution -> datafusion_expr
-
+        datafusion_examples
+        datafusion_examples -> datafusion
+        datafusion_examples -> datafusion_common
+        datafusion_examples -> datafusion_expr
+        datafusion_examples -> datafusion_optimizer
+        datafusion_examples -> datafusion_physical_expr
+        datafusion_examples -> datafusion_sql
+        datafusion_expr
+        datafusion_expr -> datafusion_common
+        datafusion_functions
+        datafusion_functions -> datafusion_common
+        datafusion_functions -> datafusion_execution
+        datafusion_functions -> datafusion_expr
+        datafusion_wasmtest
+        datafusion_wasmtest -> datafusion
+        datafusion_wasmtest -> datafusion_common
+        datafusion_wasmtest -> datafusion_execution
+        datafusion_wasmtest -> datafusion_expr
+        datafusion_wasmtest -> datafusion_optimizer
+        datafusion_wasmtest -> datafusion_physical_expr
+        datafusion_wasmtest -> datafusion_physical_plan
+        datafusion_wasmtest -> datafusion_sql
+        datafusion_common
+        datafusion_sql
+        datafusion_sql -> datafusion_common
+        datafusion_sql -> datafusion_expr
+        datafusion_physical_plan
         datafusion_physical_plan -> datafusion_common
         datafusion_physical_plan -> datafusion_execution
         datafusion_physical_plan -> datafusion_expr
         datafusion_physical_plan -> datafusion_physical_expr
-
-	datafusion -> datafusion_common
-    datafusion -> datafusion_execution
-	datafusion -> datafusion_expr
-	datafusion -> datafusion_optimizer
-	datafusion -> datafusion_physical_expr
-	datafusion -> datafusion_physical_plan
-	datafusion -> datafusion_sql
-
-	datafusion_proto -> datafusion
-
-	datafusion_substrait -> datafusion
-
-	datafusion_cli -> datafusion
-}
+        datafusion_benchmarks
+        datafusion_benchmarks -> datafusion
+        datafusion_benchmarks -> datafusion_common
+        datafusion_benchmarks -> datafusion_proto
+        datafusion_docs_tests
+        datafusion_docs_tests -> datafusion
+        datafusion_optimizer
+        datafusion_optimizer -> datafusion_common
+        datafusion_optimizer -> datafusion_expr
+        datafusion_optimizer -> datafusion_physical_expr
+        datafusion_optimizer -> datafusion_sql
+        datafusion_proto
+        datafusion_proto -> datafusion
+        datafusion_proto -> datafusion_common
+        datafusion_proto -> datafusion_expr
+        datafusion_physical_expr
+        datafusion_physical_expr -> datafusion_common
+        datafusion_physical_expr -> datafusion_execution
+        datafusion_physical_expr -> datafusion_expr
+        datafusion_sqllogictest
+        datafusion_sqllogictest -> datafusion
+        datafusion_sqllogictest -> datafusion_common
+        datafusion
+        datafusion -> datafusion_common
+        datafusion -> datafusion_execution
+        datafusion -> datafusion_expr
+        datafusion -> datafusion_functions
+        datafusion -> datafusion_functions_array
+        datafusion -> datafusion_optimizer
+        datafusion -> datafusion_physical_expr
+        datafusion -> datafusion_physical_plan
+        datafusion -> datafusion_sql
+        datafusion_functions_array
+        datafusion_functions_array -> datafusion_common
+        datafusion_functions_array -> datafusion_execution
+        datafusion_functions_array -> datafusion_expr
+        datafusion_execution
+        datafusion_execution -> datafusion_common
+        datafusion_execution -> datafusion_expr
+        datafusion_substrait
+        datafusion_substrait -> datafusion
+}
\ No newline at end of file
diff --git a/dev/release/crate-deps.svg b/dev/release/crate-deps.svg
index a7c7b7fe4acd..cf60bf752642 100644
--- a/dev/release/crate-deps.svg
+++ b/dev/release/crate-deps.svg
@@ -1,217 +1,445 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Generated by graphviz version 8.1.0 (20230707.0739)
+<!-- Generated by graphviz version 2.43.0 (0)
  -->
 <!-- Title: G Pages: 1 -->
-<svg width="900pt" height="404pt"
- viewBox="0.00 0.00 900.01 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
-<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)">
+<svg width="1695pt" height="548pt"
+ viewBox="0.00 0.00 1695.02 548.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 544)">
 <title>G</title>
-<polygon fill="white" stroke="none" points="-4,4 -4,-400 896.01,-400 896.01,4 -4,4"/>
-<!-- datafusion_common -->
+<polygon fill="white" stroke="transparent" points="-4,4 -4,-544 1691.02,-544 1691.02,4 -4,4"/>
+<!-- datafusion_examples -->
 <g id="node1" class="node">
-<title>datafusion_common</title>
-<ellipse fill="none" stroke="black" cx="396.28" cy="-18" rx="87.18" ry="18"/>
-<text text-anchor="middle" x="396.28" y="-12.95" font-family="Times,serif" font-size="14.00">datafusion_common</text>
+<title>datafusion_examples</title>
+<ellipse fill="none" stroke="black" cx="144.38" cy="-450" rx="107.78" ry="18"/>
+<text text-anchor="middle" x="144.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_examples</text>
 </g>
-<!-- datafusion_expr -->
+<!-- datafusion -->
 <g id="node2" class="node">
-<title>datafusion_expr</title>
-<ellipse fill="none" stroke="black" cx="396.28" cy="-90" rx="70.29" ry="18"/>
-<text text-anchor="middle" x="396.28" y="-84.95" font-family="Times,serif" font-size="14.00">datafusion_expr</text>
+<title>datafusion</title>
+<ellipse fill="none" stroke="black" cx="974.38" cy="-378" rx="59.59" ry="18"/>
+<text text-anchor="middle" x="974.38" y="-374.3" font-family="Times,serif" font-size="14.00">datafusion</text>
 </g>
-<!-- datafusion_expr&#45;&gt;datafusion_common -->
+<!-- datafusion_examples&#45;&gt;datafusion -->
 <g id="edge1" class="edge">
-<title>datafusion_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M396.28,-71.7C396.28,-64.24 396.28,-55.32 396.28,-46.97"/>
-<polygon fill="black" stroke="black" points="399.78,-47.1 396.28,-37.1 392.78,-47.1 399.78,-47.1"/>
+<title>datafusion_examples&#45;&gt;datafusion</title>
+<path fill="none" stroke="black" d="M238.38,-441.07C407.7,-426.79 759.98,-397.08 907.32,-384.66"/>
+<polygon fill="black" stroke="black" points="907.9,-388.12 917.57,-383.79 907.31,-381.14 907.9,-388.12"/>
 </g>
-<!-- datafusion_sql -->
+<!-- datafusion_common -->
 <g id="node3" class="node">
-<title>datafusion_sql</title>
-<ellipse fill="none" stroke="black" cx="88.28" cy="-162" rx="64.66" ry="18"/>
-<text text-anchor="middle" x="88.28" y="-156.95" font-family="Times,serif" font-size="14.00">datafusion_sql</text>
+<title>datafusion_common</title>
+<ellipse fill="none" stroke="black" cx="817.38" cy="-18" rx="102.88" ry="18"/>
+<text text-anchor="middle" x="817.38" y="-14.3" font-family="Times,serif" font-size="14.00">datafusion_common</text>
 </g>
-<!-- datafusion_sql&#45;&gt;datafusion_common -->
+<!-- datafusion_examples&#45;&gt;datafusion_common -->
 <g id="edge2" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M120.66,-146.07C176.1,-120.51 289.31,-68.32 352.01,-39.41"/>
-<polygon fill="black" stroke="black" points="353.38,-42.17 360.99,-34.81 350.45,-35.81 353.38,-42.17"/>
+<title>datafusion_examples&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M120.33,-432.33C89.06,-408.49 38.38,-361.45 38.38,-307 38.38,-307 38.38,-307 38.38,-161 38.38,-86.89 109.11,-95.52 179.38,-72 274.07,-40.31 550.14,-26.99 706.33,-21.87"/>
+<polygon fill="black" stroke="black" points="706.56,-25.37 716.44,-21.55 706.33,-18.37 706.56,-25.37"/>
 </g>
-<!-- datafusion_sql&#45;&gt;datafusion_expr -->
+<!-- datafusion_expr -->
+<g id="node4" class="node">
+<title>datafusion_expr</title>
+<ellipse fill="none" stroke="black" cx="636.38" cy="-90" rx="85.29" ry="18"/>
+<text text-anchor="middle" x="636.38" y="-86.3" font-family="Times,serif" font-size="14.00">datafusion_expr</text>
+</g>
+<!-- datafusion_examples&#45;&gt;datafusion_expr -->
 <g id="edge3" class="edge">
-<title>datafusion_sql&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M136.56,-149.7C144.81,-147.79 153.29,-145.83 161.28,-144 219.71,-130.61 286.38,-115.6 333.74,-104.98"/>
-<polygon fill="black" stroke="black" points="334.45,-108.18 343.45,-102.58 332.92,-101.35 334.45,-108.18"/>
+<title>datafusion_examples&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M132.74,-431.83C106.54,-390.84 48.73,-285.5 95.38,-216 145.15,-141.87 406.15,-109.57 546.64,-97.38"/>
+<polygon fill="black" stroke="black" points="547.29,-100.84 556.96,-96.5 546.69,-93.86 547.29,-100.84"/>
 </g>
 <!-- datafusion_optimizer -->
-<g id="node4" class="node">
+<g id="node5" class="node">
 <title>datafusion_optimizer</title>
-<ellipse fill="none" stroke="black" cx="260.28" cy="-162" rx="89.74" ry="18"/>
-<text text-anchor="middle" x="260.28" y="-156.95" font-family="Times,serif" font-size="14.00">datafusion_optimizer</text>
+<ellipse fill="none" stroke="black" cx="327.38" cy="-306" rx="108.58" ry="18"/>
+<text text-anchor="middle" x="327.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_optimizer</text>
 </g>
-<!-- datafusion_optimizer&#45;&gt;datafusion_common -->
+<!-- datafusion_examples&#45;&gt;datafusion_optimizer -->
 <g id="edge4" class="edge">
+<title>datafusion_examples&#45;&gt;datafusion_optimizer</title>
+<path fill="none" stroke="black" d="M166.09,-432.15C198.84,-406.75 260.94,-358.56 297.79,-329.96"/>
+<polygon fill="black" stroke="black" points="299.95,-332.72 305.7,-323.82 295.65,-327.19 299.95,-332.72"/>
+</g>
+<!-- datafusion_physical_expr -->
+<g id="node6" class="node">
+<title>datafusion_physical_expr</title>
+<ellipse fill="none" stroke="black" cx="480.38" cy="-234" rx="127.28" ry="18"/>
+<text text-anchor="middle" x="480.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_physical_expr</text>
+</g>
+<!-- datafusion_examples&#45;&gt;datafusion_physical_expr -->
+<g id="edge5" class="edge">
+<title>datafusion_examples&#45;&gt;datafusion_physical_expr</title>
+<path fill="none" stroke="black" d="M214.78,-436.27C282.15,-420.68 382.57,-388.01 445.38,-324 461.94,-307.12 470.91,-281.48 475.61,-262.05"/>
+<polygon fill="black" stroke="black" points="479.03,-262.78 477.75,-252.26 472.19,-261.28 479.03,-262.78"/>
+</g>
+<!-- datafusion_sql -->
+<g id="node7" class="node">
+<title>datafusion_sql</title>
+<ellipse fill="none" stroke="black" cx="181.38" cy="-234" rx="77.19" ry="18"/>
+<text text-anchor="middle" x="181.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_sql</text>
+</g>
+<!-- datafusion_examples&#45;&gt;datafusion_sql -->
+<g id="edge6" class="edge">
+<title>datafusion_examples&#45;&gt;datafusion_sql</title>
+<path fill="none" stroke="black" d="M139.73,-431.93C132.4,-401.83 120.71,-337.82 138.38,-288 142.11,-277.51 148.89,-267.58 155.98,-259.21"/>
+<polygon fill="black" stroke="black" points="158.67,-261.46 162.79,-251.7 153.48,-256.75 158.67,-261.46"/>
+</g>
+<!-- datafusion&#45;&gt;datafusion_common -->
+<g id="edge41" class="edge">
+<title>datafusion&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1032.05,-373.07C1164.93,-362.45 1481.38,-327.44 1481.38,-235 1481.38,-235 1481.38,-235 1481.38,-161 1481.38,-49.59 1119.38,-25.28 929.96,-20.19"/>
+<polygon fill="black" stroke="black" points="929.76,-16.69 919.68,-19.93 929.59,-23.69 929.76,-16.69"/>
+</g>
+<!-- datafusion&#45;&gt;datafusion_expr -->
+<g id="edge43" class="edge">
+<title>datafusion&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M955.24,-360.8C898.46,-312.76 730.77,-170.86 664.13,-114.48"/>
+<polygon fill="black" stroke="black" points="666.05,-111.52 656.16,-107.73 661.53,-116.86 666.05,-111.52"/>
+</g>
+<!-- datafusion&#45;&gt;datafusion_optimizer -->
+<g id="edge46" class="edge">
+<title>datafusion&#45;&gt;datafusion_optimizer</title>
+<path fill="none" stroke="black" d="M919.22,-371.03C809.73,-359.19 563.67,-332.56 425.78,-317.65"/>
+<polygon fill="black" stroke="black" points="425.96,-314.14 415.64,-316.55 425.2,-321.1 425.96,-314.14"/>
+</g>
+<!-- datafusion&#45;&gt;datafusion_physical_expr -->
+<g id="edge47" class="edge">
+<title>datafusion&#45;&gt;datafusion_physical_expr</title>
+<path fill="none" stroke="black" d="M914.47,-377.77C837.99,-376.73 702.92,-368 597.38,-324 561.14,-308.89 526.2,-279.79 504.09,-258.98"/>
+<polygon fill="black" stroke="black" points="506.46,-256.41 496.82,-252 501.61,-261.45 506.46,-256.41"/>
+</g>
+<!-- datafusion&#45;&gt;datafusion_sql -->
+<g id="edge49" class="edge">
+<title>datafusion&#45;&gt;datafusion_sql</title>
+<path fill="none" stroke="black" d="M914.48,-376.67C743.89,-375.21 264.27,-367.27 209.38,-324 190.86,-309.4 184.35,-282.61 182.18,-262.23"/>
+<polygon fill="black" stroke="black" points="185.67,-261.95 181.4,-252.25 178.69,-262.49 185.67,-261.95"/>
+</g>
+<!-- datafusion_functions -->
+<g id="node8" class="node">
+<title>datafusion_functions</title>
+<ellipse fill="none" stroke="black" cx="1019.38" cy="-234" rx="106.68" ry="18"/>
+<text text-anchor="middle" x="1019.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions</text>
+</g>
+<!-- datafusion&#45;&gt;datafusion_functions -->
+<g id="edge44" class="edge">
+<title>datafusion&#45;&gt;datafusion_functions</title>
+<path fill="none" stroke="black" d="M979.81,-359.87C987.51,-335.56 1001.69,-290.82 1010.82,-262.01"/>
+<polygon fill="black" stroke="black" points="1014.25,-262.78 1013.94,-252.19 1007.58,-260.66 1014.25,-262.78"/>
+</g>
+<!-- datafusion_execution -->
+<g id="node9" class="node">
+<title>datafusion_execution</title>
+<ellipse fill="none" stroke="black" cx="886.38" cy="-162" rx="108.58" ry="18"/>
+<text text-anchor="middle" x="886.38" y="-158.3" font-family="Times,serif" font-size="14.00">datafusion_execution</text>
+</g>
+<!-- datafusion&#45;&gt;datafusion_execution -->
+<g id="edge42" class="edge">
+<title>datafusion&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M969.67,-359.99C964.02,-341.35 953.41,-311.04 938.38,-288 926.19,-269.31 913.69,-271.79 903.38,-252 893.36,-232.75 889.21,-208.41 887.5,-190.05"/>
+<polygon fill="black" stroke="black" points="890.99,-189.73 886.76,-180.02 884.01,-190.25 890.99,-189.73"/>
+</g>
+<!-- datafusion_physical_plan -->
+<g id="node11" class="node">
+<title>datafusion_physical_plan</title>
+<ellipse fill="none" stroke="black" cx="732.38" cy="-306" rx="126.18" ry="18"/>
+<text text-anchor="middle" x="732.38" y="-302.3" font-family="Times,serif" font-size="14.00">datafusion_physical_plan</text>
+</g>
+<!-- datafusion&#45;&gt;datafusion_physical_plan -->
+<g id="edge48" class="edge">
+<title>datafusion&#45;&gt;datafusion_physical_plan</title>
+<path fill="none" stroke="black" d="M932.78,-364.97C895.04,-354.05 838.86,-337.8 795.58,-325.28"/>
+<polygon fill="black" stroke="black" points="796.29,-321.84 785.71,-322.43 794.34,-328.57 796.29,-321.84"/>
+</g>
+<!-- datafusion_functions_array -->
+<g id="node16" class="node">
+<title>datafusion_functions_array</title>
+<ellipse fill="none" stroke="black" cx="1279.38" cy="-234" rx="135.68" ry="18"/>
+<text text-anchor="middle" x="1279.38" y="-230.3" font-family="Times,serif" font-size="14.00">datafusion_functions_array</text>
+</g>
+<!-- datafusion&#45;&gt;datafusion_functions_array -->
+<g id="edge45" class="edge">
+<title>datafusion&#45;&gt;datafusion_functions_array</title>
+<path fill="none" stroke="black" d="M1005.59,-362.47C1059.89,-337.19 1171.92,-285.03 1234.54,-255.88"/>
+<polygon fill="black" stroke="black" points="1236.19,-258.97 1243.78,-251.58 1233.23,-252.63 1236.19,-258.97"/>
+</g>
+<!-- datafusion_expr&#45;&gt;datafusion_common -->
+<g id="edge7" class="edge">
+<title>datafusion_expr&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M675.66,-73.81C702.56,-63.41 738.47,-49.52 767.41,-38.33"/>
+<polygon fill="black" stroke="black" points="768.87,-41.52 776.93,-34.64 766.34,-34.99 768.87,-41.52"/>
+</g>
+<!-- datafusion_optimizer&#45;&gt;datafusion_common -->
+<g id="edge29" class="edge">
 <title>datafusion_optimizer&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M268.56,-143.8C278.33,-124.73 295.97,-93.77 317.28,-72 328.82,-60.21 343.39,-49.54 356.87,-40.93"/>
-<polygon fill="black" stroke="black" points="358.31,-43.53 364.97,-35.3 354.62,-37.58 358.31,-43.53"/>
+<path fill="none" stroke="black" d="M323.59,-287.75C320.33,-269.42 317.31,-239.86 325.38,-216 338.29,-177.85 349.94,-170.36 380.38,-144 472.7,-64.04 613.13,-35.08 709.74,-24.67"/>
+<polygon fill="black" stroke="black" points="710.24,-28.14 719.83,-23.63 709.52,-21.17 710.24,-28.14"/>
 </g>
 <!-- datafusion_optimizer&#45;&gt;datafusion_expr -->
-<g id="edge5" class="edge">
+<g id="edge30" class="edge">
 <title>datafusion_optimizer&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M291.82,-144.76C310.97,-134.91 335.6,-122.23 356.13,-111.66"/>
-<polygon fill="black" stroke="black" points="357.55,-114.35 364.84,-106.67 354.35,-108.13 357.55,-114.35"/>
+<path fill="none" stroke="black" d="M326.16,-287.98C325.67,-268.49 327.82,-236.59 344.38,-216 397.49,-150 492,-118.2 559.1,-103.32"/>
+<polygon fill="black" stroke="black" points="560.17,-106.67 569.21,-101.16 558.7,-99.83 560.17,-106.67"/>
 </g>
-<!-- datafusion_physical_expr -->
-<g id="node5" class="node">
-<title>datafusion_physical_expr</title>
-<ellipse fill="none" stroke="black" cx="550.28" cy="-162" rx="105.6" ry="18"/>
-<text text-anchor="middle" x="550.28" y="-156.95" font-family="Times,serif" font-size="14.00">datafusion_physical_expr</text>
+<!-- datafusion_optimizer&#45;&gt;datafusion_physical_expr -->
+<g id="edge31" class="edge">
+<title>datafusion_optimizer&#45;&gt;datafusion_physical_expr</title>
+<path fill="none" stroke="black" d="M362.48,-288.94C384.12,-279.04 412.07,-266.25 435.33,-255.61"/>
+<polygon fill="black" stroke="black" points="437.01,-258.69 444.65,-251.35 434.1,-252.33 437.01,-258.69"/>
+</g>
+<!-- datafusion_optimizer&#45;&gt;datafusion_sql -->
+<g id="edge32" class="edge">
+<title>datafusion_optimizer&#45;&gt;datafusion_sql</title>
+<path fill="none" stroke="black" d="M293.52,-288.76C272.44,-278.66 245.18,-265.59 222.81,-254.86"/>
+<polygon fill="black" stroke="black" points="224.09,-251.59 213.56,-250.43 221.06,-257.91 224.09,-251.59"/>
 </g>
 <!-- datafusion_physical_expr&#45;&gt;datafusion_common -->
-<g id="edge6" class="edge">
+<g id="edge36" class="edge">
 <title>datafusion_physical_expr&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M537.99,-143.69C524.08,-124.79 500.1,-94.25 475.28,-72 462.69,-60.72 447.53,-49.98 433.88,-41.16"/>
-<polygon fill="black" stroke="black" points="436.04,-37.75 425.71,-35.38 432.3,-43.67 436.04,-37.75"/>
+<path fill="none" stroke="black" d="M481.69,-215.81C485.19,-182.52 497.82,-109.29 542.38,-72 568.76,-49.93 649.98,-36.06 717.37,-28.08"/>
+<polygon fill="black" stroke="black" points="718.1,-31.52 727.63,-26.89 717.29,-24.56 718.1,-31.52"/>
 </g>
 <!-- datafusion_physical_expr&#45;&gt;datafusion_expr -->
-<g id="edge7" class="edge">
+<g id="edge38" class="edge">
 <title>datafusion_physical_expr&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M514.56,-144.76C492.17,-134.59 463.16,-121.4 439.48,-110.64"/>
-<polygon fill="black" stroke="black" points="441.24,-107.14 430.69,-106.19 438.35,-113.51 441.24,-107.14"/>
+<path fill="none" stroke="black" d="M498.89,-216.15C526.57,-190.96 578.85,-143.37 610.36,-114.68"/>
+<polygon fill="black" stroke="black" points="612.86,-117.14 617.9,-107.82 608.15,-111.97 612.86,-117.14"/>
 </g>
-<!-- datafusion_execution -->
-<g id="node6" class="node">
-<title>datafusion_execution</title>
-<ellipse fill="none" stroke="black" cx="802.28" cy="-162" rx="89.74" ry="18"/>
-<text text-anchor="middle" x="802.28" y="-156.95" font-family="Times,serif" font-size="14.00">datafusion_execution</text>
+<!-- datafusion_physical_expr&#45;&gt;datafusion_execution -->
+<g id="edge37" class="edge">
+<title>datafusion_physical_expr&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M557.7,-219.67C628.24,-207.51 732.15,-189.59 804.18,-177.17"/>
+<polygon fill="black" stroke="black" points="804.86,-180.61 814.12,-175.46 803.67,-173.71 804.86,-180.61"/>
 </g>
-<!-- datafusion_execution&#45;&gt;datafusion_common -->
-<g id="edge8" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M787.65,-143.81C769.09,-123.22 735.03,-89.34 698.28,-72 633.67,-41.52 553.81,-28.43 492.45,-22.88"/>
-<polygon fill="black" stroke="black" points="492.86,-19.32 482.6,-21.96 492.27,-26.29 492.86,-19.32"/>
+<!-- datafusion_sql&#45;&gt;datafusion_common -->
+<g id="edge19" class="edge">
+<title>datafusion_sql&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M177.9,-215.85C172.25,-182.65 165.44,-109.55 205.38,-72 240.96,-38.55 538.93,-25.83 705.59,-21.31"/>
+<polygon fill="black" stroke="black" points="705.98,-24.8 715.89,-21.03 705.8,-17.8 705.98,-24.8"/>
 </g>
-<!-- datafusion_execution&#45;&gt;datafusion_expr -->
-<g id="edge9" class="edge">
-<title>datafusion_execution&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M735.69,-149.52C660.75,-136.6 539.65,-115.72 464.18,-102.71"/>
-<polygon fill="black" stroke="black" points="464.83,-99.1 454.38,-100.85 463.64,-105.99 464.83,-99.1"/>
+<!-- datafusion_sql&#45;&gt;datafusion_expr -->
+<g id="edge20" class="edge">
+<title>datafusion_sql&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M210.26,-217.08C228.16,-206.95 251.47,-193.27 271.38,-180 293.57,-165.21 294.87,-154.5 319.38,-144 390.71,-113.46 478.09,-100.46 543.03,-94.95"/>
+<polygon fill="black" stroke="black" points="543.56,-98.42 553.25,-94.13 543,-91.44 543.56,-98.42"/>
 </g>
-<!-- datafusion_physical_plan -->
-<g id="node7" class="node">
-<title>datafusion_physical_plan</title>
-<ellipse fill="none" stroke="black" cx="550.28" cy="-234" rx="105.09" ry="18"/>
-<text text-anchor="middle" x="550.28" y="-228.95" font-family="Times,serif" font-size="14.00">datafusion_physical_plan</text>
+<!-- datafusion_functions&#45;&gt;datafusion_common -->
+<g id="edge8" class="edge">
+<title>datafusion_functions&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1026.08,-216.03C1032.36,-197.16 1039.08,-166.38 1026.38,-144 995,-88.66 928.31,-55.19 878.65,-37"/>
+<polygon fill="black" stroke="black" points="879.56,-33.61 868.96,-33.56 877.22,-40.21 879.56,-33.61"/>
 </g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_common -->
+<!-- datafusion_functions&#45;&gt;datafusion_expr -->
 <g id="edge10" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M611.93,-219.04C632.47,-211.13 653.19,-198.88 665.28,-180 673.9,-166.53 673.88,-157.49 665.28,-144 623.62,-78.71 538.09,-47 474.59,-31.94"/>
-<polygon fill="black" stroke="black" points="475.73,-28.38 465.2,-29.57 474.18,-35.2 475.73,-28.38"/>
+<title>datafusion_functions&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M1021.9,-215.92C1023.8,-195.75 1023.44,-162.61 1004.38,-144 966.64,-107.14 827.05,-95.75 731.07,-92.32"/>
+<polygon fill="black" stroke="black" points="731.16,-88.82 721.05,-91.99 730.93,-95.82 731.16,-88.82"/>
 </g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_expr -->
-<g id="edge12" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M497.84,-218.01C476.26,-209.69 452.37,-197.42 435.28,-180 418.68,-163.09 408.56,-137.95 402.8,-118.7"/>
-<polygon fill="black" stroke="black" points="405.92,-117.86 399.9,-109.15 399.17,-119.72 405.92,-117.86"/>
+<!-- datafusion_functions&#45;&gt;datafusion_execution -->
+<g id="edge9" class="edge">
+<title>datafusion_functions&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M988.54,-216.76C970.04,-207.03 946.31,-194.54 926.38,-184.05"/>
+<polygon fill="black" stroke="black" points="928,-180.95 917.52,-179.39 924.74,-187.14 928,-180.95"/>
 </g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_physical_expr -->
-<g id="edge13" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M550.28,-215.7C550.28,-208.24 550.28,-199.32 550.28,-190.97"/>
-<polygon fill="black" stroke="black" points="553.78,-191.1 550.28,-181.1 546.78,-191.1 553.78,-191.1"/>
+<!-- datafusion_execution&#45;&gt;datafusion_common -->
+<g id="edge53" class="edge">
+<title>datafusion_execution&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M878.06,-143.87C866.15,-119.35 844.12,-74.03 830.14,-45.26"/>
+<polygon fill="black" stroke="black" points="833.25,-43.65 825.74,-36.19 826.96,-46.71 833.25,-43.65"/>
 </g>
-<!-- datafusion_physical_plan&#45;&gt;datafusion_execution -->
+<!-- datafusion_execution&#45;&gt;datafusion_expr -->
+<g id="edge54" class="edge">
+<title>datafusion_execution&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M833.67,-146.24C793.22,-134.91 737.38,-119.28 695.43,-107.53"/>
+<polygon fill="black" stroke="black" points="696.2,-104.11 685.63,-104.79 694.31,-110.85 696.2,-104.11"/>
+</g>
+<!-- datafusion_wasmtest -->
+<g id="node10" class="node">
+<title>datafusion_wasmtest</title>
+<ellipse fill="none" stroke="black" cx="540.38" cy="-450" rx="108.58" ry="18"/>
+<text text-anchor="middle" x="540.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_wasmtest</text>
+</g>
+<!-- datafusion_wasmtest&#45;&gt;datafusion -->
 <g id="edge11" class="edge">
-<title>datafusion_physical_plan&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M603.72,-218.15C643.81,-207.02 698.77,-191.75 740.7,-180.1"/>
-<polygon fill="black" stroke="black" points="741.36,-183.27 750.06,-177.23 739.49,-176.53 741.36,-183.27"/>
+<title>datafusion_wasmtest&#45;&gt;datafusion</title>
+<path fill="none" stroke="black" d="M619.01,-437.59C720.75,-422.79 886.69,-398.58 900.38,-396 906.59,-394.83 913.06,-393.51 919.49,-392.12"/>
+<polygon fill="black" stroke="black" points="920.49,-395.49 929.5,-389.91 918.98,-388.65 920.49,-395.49"/>
 </g>
-<!-- datafusion -->
-<g id="node8" class="node">
-<title>datafusion</title>
-<ellipse fill="none" stroke="black" cx="378.28" cy="-306" rx="49.3" ry="18"/>
-<text text-anchor="middle" x="378.28" y="-300.95" font-family="Times,serif" font-size="14.00">datafusion</text>
+<!-- datafusion_wasmtest&#45;&gt;datafusion_common -->
+<g id="edge12" class="edge">
+<title>datafusion_wasmtest&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M617.4,-437.24C630.72,-435.37 644.44,-433.54 657.38,-432 753.48,-420.54 1519.38,-403.78 1519.38,-307 1519.38,-307 1519.38,-307 1519.38,-161 1519.38,-102.16 1475.29,-95.58 1421.38,-72 1335.95,-34.62 1079.45,-23.6 929.6,-20.35"/>
+<polygon fill="black" stroke="black" points="929.52,-16.85 919.45,-20.14 929.37,-23.84 929.52,-16.85"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_common -->
+<!-- datafusion_wasmtest&#45;&gt;datafusion_expr -->
 <g id="edge14" class="edge">
-<title>datafusion&#45;&gt;datafusion_common</title>
-<path fill="none" stroke="black" d="M329.86,-302.18C213.65,-293.5 -65.71,-260.28 14.28,-144 49.43,-92.9 219.97,-52.49 320.91,-32.57"/>
-<polygon fill="black" stroke="black" points="321.52,-35.82 330.66,-30.47 320.18,-28.95 321.52,-35.82"/>
+<title>datafusion_wasmtest&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M545.58,-431.76C554.76,-401.95 575.01,-339.06 597.38,-288 604.64,-271.43 610.96,-269.26 616.38,-252 630.65,-206.62 634.8,-151.03 635.97,-118.47"/>
+<polygon fill="black" stroke="black" points="639.47,-118.45 636.26,-108.35 632.47,-118.25 639.47,-118.45"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_expr -->
+<!-- datafusion_wasmtest&#45;&gt;datafusion_optimizer -->
+<g id="edge15" class="edge">
+<title>datafusion_wasmtest&#45;&gt;datafusion_optimizer</title>
+<path fill="none" stroke="black" d="M515.53,-432.43C477.24,-406.9 403.74,-357.91 360.82,-329.29"/>
+<polygon fill="black" stroke="black" points="362.67,-326.32 352.41,-323.68 358.78,-332.14 362.67,-326.32"/>
+</g>
+<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_expr -->
 <g id="edge16" class="edge">
-<title>datafusion&#45;&gt;datafusion_expr</title>
-<path fill="none" stroke="black" d="M379.72,-287.85C382.82,-250.99 390.14,-163.92 393.92,-118.96"/>
-<polygon fill="black" stroke="black" points="397.48,-119.49 394.83,-109.23 390.5,-118.9 397.48,-119.49"/>
+<title>datafusion_wasmtest&#45;&gt;datafusion_physical_expr</title>
+<path fill="none" stroke="black" d="M535.57,-431.85C525.17,-394.75 500.52,-306.81 487.98,-262.1"/>
+<polygon fill="black" stroke="black" points="491.28,-260.92 485.22,-252.23 484.54,-262.81 491.28,-260.92"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_sql -->
-<g id="edge20" class="edge">
-<title>datafusion&#45;&gt;datafusion_sql</title>
-<path fill="none" stroke="black" d="M349.68,-291C297.94,-265.66 188.66,-212.15 129.09,-182.99"/>
-<polygon fill="black" stroke="black" points="130.79,-179.43 120.27,-178.18 127.72,-185.72 130.79,-179.43"/>
+<!-- datafusion_wasmtest&#45;&gt;datafusion_sql -->
+<g id="edge18" class="edge">
+<title>datafusion_wasmtest&#45;&gt;datafusion_sql</title>
+<path fill="none" stroke="black" d="M439.39,-443.4C356.27,-432.89 242.93,-403.72 184.38,-324 171.49,-306.44 171.88,-281.25 174.84,-262.15"/>
+<polygon fill="black" stroke="black" points="178.29,-262.72 176.68,-252.25 171.41,-261.44 178.29,-262.72"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_optimizer -->
+<!-- datafusion_wasmtest&#45;&gt;datafusion_execution -->
+<g id="edge13" class="edge">
+<title>datafusion_wasmtest&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M598.04,-434.64C682.46,-412.41 833.41,-367.64 867.38,-324 883.14,-303.76 886.09,-230.71 886.48,-190.32"/>
+<polygon fill="black" stroke="black" points="889.99,-190.12 886.54,-180.1 882.99,-190.09 889.99,-190.12"/>
+</g>
+<!-- datafusion_wasmtest&#45;&gt;datafusion_physical_plan -->
 <g id="edge17" class="edge">
-<title>datafusion&#45;&gt;datafusion_optimizer</title>
-<path fill="none" stroke="black" d="M364.51,-288.43C344.01,-263.77 305.32,-217.2 281.28,-188.27"/>
-<polygon fill="black" stroke="black" points="283.4,-186.36 274.32,-180.9 278.02,-190.83 283.4,-186.36"/>
+<title>datafusion_wasmtest&#45;&gt;datafusion_physical_plan</title>
+<path fill="none" stroke="black" d="M563.16,-432.15C597.52,-406.75 662.67,-358.56 701.34,-329.96"/>
+<polygon fill="black" stroke="black" points="703.68,-332.59 709.63,-323.82 699.51,-326.96 703.68,-332.59"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_physical_expr -->
-<g id="edge18" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_expr</title>
-<path fill="none" stroke="black" d="M385.83,-287.87C395.2,-268.28 412.86,-236.3 436.28,-216 452.42,-202 472.89,-190.96 492.02,-182.66"/>
-<polygon fill="black" stroke="black" points="493.27,-185.51 501.17,-178.45 490.59,-179.05 493.27,-185.51"/>
+<!-- datafusion_physical_plan&#45;&gt;datafusion_common -->
+<g id="edge21" class="edge">
+<title>datafusion_physical_plan&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M735.32,-287.86C740.6,-258.18 752.64,-195.49 768.38,-144 779.04,-109.17 794.94,-70.29 805.72,-45.28"/>
+<polygon fill="black" stroke="black" points="808.93,-46.66 809.71,-36.09 802.51,-43.87 808.93,-46.66"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_execution -->
-<g id="edge15" class="edge">
-<title>datafusion&#45;&gt;datafusion_execution</title>
-<path fill="none" stroke="black" d="M426.66,-301.78C484.84,-296.62 584.61,-283.64 664.28,-252 704.98,-235.83 746.53,-207.02 773.25,-186.56"/>
-<polygon fill="black" stroke="black" points="774.98,-188.87 780.72,-179.97 770.68,-183.35 774.98,-188.87"/>
+<!-- datafusion_physical_plan&#45;&gt;datafusion_expr -->
+<g id="edge23" class="edge">
+<title>datafusion_physical_plan&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M724.69,-287.85C707.94,-250.5 668.07,-161.63 648.14,-117.21"/>
+<polygon fill="black" stroke="black" points="651.27,-115.63 643.98,-107.94 644.88,-118.49 651.27,-115.63"/>
 </g>
-<!-- datafusion&#45;&gt;datafusion_physical_plan -->
-<g id="edge19" class="edge">
-<title>datafusion&#45;&gt;datafusion_physical_plan</title>
-<path fill="none" stroke="black" d="M410.23,-292C435.6,-281.67 471.54,-267.05 500.65,-255.2"/>
-<polygon fill="black" stroke="black" points="501.69,-258.14 509.64,-251.13 499.06,-251.66 501.69,-258.14"/>
+<!-- datafusion_physical_plan&#45;&gt;datafusion_physical_expr -->
+<g id="edge24" class="edge">
+<title>datafusion_physical_plan&#45;&gt;datafusion_physical_expr</title>
+<path fill="none" stroke="black" d="M677.39,-289.72C638.59,-278.95 586.46,-264.46 545.56,-253.11"/>
+<polygon fill="black" stroke="black" points="546.26,-249.67 535.69,-250.36 544.39,-256.41 546.26,-249.67"/>
+</g>
+<!-- datafusion_physical_plan&#45;&gt;datafusion_execution -->
+<g id="edge22" class="edge">
+<title>datafusion_physical_plan&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M741.25,-287.92C751.89,-268.66 771.21,-237.26 794.38,-216 808.12,-203.39 825.36,-192.48 841.2,-183.88"/>
+<polygon fill="black" stroke="black" points="843.1,-186.84 850.32,-179.09 839.84,-180.64 843.1,-186.84"/>
+</g>
+<!-- datafusion_benchmarks -->
+<g id="node12" class="node">
+<title>datafusion_benchmarks</title>
+<ellipse fill="none" stroke="black" cx="1148.38" cy="-522" rx="120.48" ry="18"/>
+<text text-anchor="middle" x="1148.38" y="-518.3" font-family="Times,serif" font-size="14.00">datafusion_benchmarks</text>
+</g>
+<!-- datafusion_benchmarks&#45;&gt;datafusion -->
+<g id="edge25" class="edge">
+<title>datafusion_benchmarks&#45;&gt;datafusion</title>
+<path fill="none" stroke="black" d="M1149.44,-503.72C1149.68,-483.98 1147,-451.85 1129.38,-432 1106.81,-406.57 1071.52,-393.33 1040.3,-386.44"/>
+<polygon fill="black" stroke="black" points="1040.83,-382.98 1030.34,-384.42 1039.44,-389.84 1040.83,-382.98"/>
+</g>
+<!-- datafusion_benchmarks&#45;&gt;datafusion_common -->
+<g id="edge26" class="edge">
+<title>datafusion_benchmarks&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1029.67,-519.04C749.26,-513.92 66.41,-498.4 27.38,-468 -5.23,-442.6 0.38,-420.34 0.38,-379 0.38,-379 0.38,-379 0.38,-161 0.38,-113.54 18.38,-95.89 59.38,-72 113.79,-40.3 508.68,-26.4 705.91,-21.39"/>
+<polygon fill="black" stroke="black" points="706.1,-24.88 716.01,-21.13 705.93,-17.89 706.1,-24.88"/>
 </g>
 <!-- datafusion_proto -->
-<g id="node9" class="node">
+<g id="node13" class="node">
 <title>datafusion_proto</title>
-<ellipse fill="none" stroke="black" cx="202.28" cy="-378" rx="73.36" ry="18"/>
-<text text-anchor="middle" x="202.28" y="-372.95" font-family="Times,serif" font-size="14.00">datafusion_proto</text>
+<ellipse fill="none" stroke="black" cx="1292.38" cy="-450" rx="89.08" ry="18"/>
+<text text-anchor="middle" x="1292.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_proto</text>
+</g>
+<!-- datafusion_benchmarks&#45;&gt;datafusion_proto -->
+<g id="edge27" class="edge">
+<title>datafusion_benchmarks&#45;&gt;datafusion_proto</title>
+<path fill="none" stroke="black" d="M1182.15,-504.59C1202.66,-494.62 1229,-481.81 1250.79,-471.22"/>
+<polygon fill="black" stroke="black" points="1252.36,-474.35 1259.82,-466.83 1249.3,-468.05 1252.36,-474.35"/>
 </g>
 <!-- datafusion_proto&#45;&gt;datafusion -->
-<g id="edge21" class="edge">
+<g id="edge33" class="edge">
 <title>datafusion_proto&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M239.6,-362.15C267.76,-350.95 306.43,-335.58 335.78,-323.9"/>
-<polygon fill="black" stroke="black" points="336.74,-326.89 344.74,-319.94 334.15,-320.38 336.74,-326.89"/>
+<path fill="none" stroke="black" d="M1234.43,-436.24C1176.63,-423.52 1088.42,-404.1 1031.07,-391.48"/>
+<polygon fill="black" stroke="black" points="1031.61,-388.01 1021.09,-389.28 1030.1,-394.85 1031.61,-388.01"/>
+</g>
+<!-- datafusion_proto&#45;&gt;datafusion_common -->
+<g id="edge34" class="edge">
+<title>datafusion_proto&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1368.68,-440.64C1446.96,-427.32 1557.38,-393.25 1557.38,-307 1557.38,-307 1557.38,-307 1557.38,-161 1557.38,-106.3 1521.76,-95.52 1472.38,-72 1379.47,-27.74 1092.21,-19.54 930.67,-18.56"/>
+<polygon fill="black" stroke="black" points="930.23,-15.06 920.21,-18.5 930.2,-22.06 930.23,-15.06"/>
+</g>
+<!-- datafusion_proto&#45;&gt;datafusion_expr -->
+<g id="edge35" class="edge">
+<title>datafusion_proto&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M1329.71,-433.53C1374.42,-412.22 1443.38,-369.26 1443.38,-307 1443.38,-307 1443.38,-307 1443.38,-233 1443.38,-160.79 935.69,-113.79 725.68,-97.44"/>
+<polygon fill="black" stroke="black" points="725.84,-93.95 715.6,-96.67 725.3,-100.93 725.84,-93.95"/>
+</g>
+<!-- datafusion_docs_tests -->
+<g id="node14" class="node">
+<title>datafusion_docs_tests</title>
+<ellipse fill="none" stroke="black" cx="1008.38" cy="-450" rx="112.38" ry="18"/>
+<text text-anchor="middle" x="1008.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_docs_tests</text>
+</g>
+<!-- datafusion_docs_tests&#45;&gt;datafusion -->
+<g id="edge28" class="edge">
+<title>datafusion_docs_tests&#45;&gt;datafusion</title>
+<path fill="none" stroke="black" d="M999.98,-431.7C996.11,-423.73 991.43,-414.1 987.14,-405.26"/>
+<polygon fill="black" stroke="black" points="990.21,-403.57 982.69,-396.1 983.91,-406.63 990.21,-403.57"/>
+</g>
+<!-- datafusion_sqllogictest -->
+<g id="node15" class="node">
+<title>datafusion_sqllogictest</title>
+<ellipse fill="none" stroke="black" cx="1569.38" cy="-450" rx="117.78" ry="18"/>
+<text text-anchor="middle" x="1569.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_sqllogictest</text>
+</g>
+<!-- datafusion_sqllogictest&#45;&gt;datafusion -->
+<g id="edge39" class="edge">
+<title>datafusion_sqllogictest&#45;&gt;datafusion</title>
+<path fill="none" stroke="black" d="M1479.34,-438.41C1358.17,-424.15 1145.58,-399.14 1039.06,-386.61"/>
+<polygon fill="black" stroke="black" points="1039.43,-383.13 1029.09,-385.44 1038.61,-390.08 1039.43,-383.13"/>
+</g>
+<!-- datafusion_sqllogictest&#45;&gt;datafusion_common -->
+<g id="edge40" class="edge">
+<title>datafusion_sqllogictest&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1574.65,-431.7C1582.24,-405.14 1595.38,-352.61 1595.38,-307 1595.38,-307 1595.38,-307 1595.38,-161 1595.38,-108.7 1565.1,-95.53 1518.38,-72 1467.03,-46.14 1111.46,-29.63 927.36,-22.72"/>
+<polygon fill="black" stroke="black" points="927.23,-19.22 917.1,-22.34 926.97,-26.21 927.23,-19.22"/>
+</g>
+<!-- datafusion_functions_array&#45;&gt;datafusion_common -->
+<g id="edge50" class="edge">
+<title>datafusion_functions_array&#45;&gt;datafusion_common</title>
+<path fill="none" stroke="black" d="M1253.53,-216.22C1203.95,-184.59 1091.36,-115.46 989.38,-72 955.26,-57.46 915.79,-45 883.28,-35.83"/>
+<polygon fill="black" stroke="black" points="884,-32.4 873.43,-33.09 882.12,-39.14 884,-32.4"/>
+</g>
+<!-- datafusion_functions_array&#45;&gt;datafusion_expr -->
+<g id="edge52" class="edge">
+<title>datafusion_functions_array&#45;&gt;datafusion_expr</title>
+<path fill="none" stroke="black" d="M1240.69,-216.61C1191.68,-196.5 1104.24,-162.78 1026.38,-144 924.46,-119.42 804.61,-105.26 724.94,-97.87"/>
+<polygon fill="black" stroke="black" points="725.22,-94.38 714.94,-96.96 724.59,-101.35 725.22,-94.38"/>
+</g>
+<!-- datafusion_functions_array&#45;&gt;datafusion_execution -->
+<g id="edge51" class="edge">
+<title>datafusion_functions_array&#45;&gt;datafusion_execution</title>
+<path fill="none" stroke="black" d="M1201.74,-219.17C1134.05,-207.11 1036.21,-189.69 967.5,-177.45"/>
+<polygon fill="black" stroke="black" points="968.05,-173.99 957.59,-175.68 966.82,-180.88 968.05,-173.99"/>
 </g>
 <!-- datafusion_substrait -->
-<g id="node10" class="node">
+<g id="node17" class="node">
 <title>datafusion_substrait</title>
-<ellipse fill="none" stroke="black" cx="378.28" cy="-378" rx="85.13" ry="18"/>
-<text text-anchor="middle" x="378.28" y="-372.95" font-family="Times,serif" font-size="14.00">datafusion_substrait</text>
+<ellipse fill="none" stroke="black" cx="772.38" cy="-450" rx="105.88" ry="18"/>
+<text text-anchor="middle" x="772.38" y="-446.3" font-family="Times,serif" font-size="14.00">datafusion_substrait</text>
 </g>
 <!-- datafusion_substrait&#45;&gt;datafusion -->
-<g id="edge22" class="edge">
+<g id="edge55" class="edge">
 <title>datafusion_substrait&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M378.28,-359.7C378.28,-352.24 378.28,-343.32 378.28,-334.97"/>
-<polygon fill="black" stroke="black" points="381.78,-335.1 378.28,-325.1 374.78,-335.1 381.78,-335.1"/>
-</g>
-<!-- datafusion_cli -->
-<g id="node11" class="node">
-<title>datafusion_cli</title>
-<ellipse fill="none" stroke="black" cx="544.28" cy="-378" rx="63.12" ry="18"/>
-<text text-anchor="middle" x="544.28" y="-372.95" font-family="Times,serif" font-size="14.00">datafusion_cli</text>
-</g>
-<!-- datafusion_cli&#45;&gt;datafusion -->
-<g id="edge23" class="edge">
-<title>datafusion_cli&#45;&gt;datafusion</title>
-<path fill="none" stroke="black" d="M509.88,-362.5C483.66,-351.44 447.45,-336.17 419.65,-324.45"/>
-<polygon fill="black" stroke="black" points="421.39,-320.96 410.82,-320.3 418.67,-327.41 421.39,-320.96"/>
+<path fill="none" stroke="black" d="M816.71,-433.64C849.38,-422.32 893.73,-406.95 927.07,-395.39"/>
+<polygon fill="black" stroke="black" points="928.3,-398.67 936.6,-392.09 926.01,-392.06 928.3,-398.67"/>
 </g>
 </g>
 </svg>

From 22585586bff554cbd0c08099d303dcc95ef61cfc Mon Sep 17 00:00:00 2001
From: Jay Zhan <jayzhan211@gmail.com>
Date: Mon, 26 Feb 2024 07:30:24 +0800
Subject: [PATCH 13/45] Substrait: Support ScalarUDF (#9337)

* support udf in substrait

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

* clippy

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>

---------

Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---
 datafusion/functions/src/math/mod.rs          |  2 +-
 .../substrait/src/logical_plan/consumer.rs    | 69 +++++++++++++++----
 .../tests/cases/roundtrip_logical_plan.rs     |  5 ++
 3 files changed, 60 insertions(+), 16 deletions(-)

diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 67d2d957ea1f..873625948a35 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! "core" DataFusion functions
+//! "math" DataFusion functions
 
 mod nans;
 
diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs
index 23a7ee05d73e..095806c538d1 100644
--- a/datafusion/substrait/src/logical_plan/consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer.rs
@@ -28,7 +28,7 @@ use datafusion::logical_expr::{
 };
 use datafusion::logical_expr::{
     expr, Cast, Extension, GroupingSet, Like, LogicalPlanBuilder, Partitioning,
-    Repartition, Subquery, WindowFrameBound, WindowFrameUnits,
+    Repartition, ScalarUDF, Subquery, WindowFrameBound, WindowFrameUnits,
 };
 use datafusion::prelude::JoinType;
 use datafusion::sql::TableReference;
@@ -78,6 +78,7 @@ enum ScalarFunctionType {
     Builtin(BuiltinScalarFunction),
     Op(Operator),
     Expr(BuiltinExprBuilder),
+    Udf(Arc<ScalarUDF>),
 }
 
 pub fn name_to_op(name: &str) -> Result<Operator> {
@@ -113,7 +114,15 @@ pub fn name_to_op(name: &str) -> Result<Operator> {
     }
 }
 
-fn scalar_function_type_from_str(name: &str) -> Result<ScalarFunctionType> {
+fn scalar_function_type_from_str(
+    ctx: &SessionContext,
+    name: &str,
+) -> Result<ScalarFunctionType> {
+    let s = ctx.state();
+    if let Some(func) = s.scalar_functions().get(name) {
+        return Ok(ScalarFunctionType::Udf(func.to_owned()));
+    }
+
     if let Ok(op) = name_to_op(name) {
         return Ok(ScalarFunctionType::Op(op));
     }
@@ -859,21 +868,51 @@ pub async fn from_substrait_rex(
                     f.function_reference
                 ))
             })?;
-            let fn_type = scalar_function_type_from_str(fn_name)?;
+
+            // Convert function arguments from Substrait to DataFusion
+            async fn decode_arguments(
+                ctx: &SessionContext,
+                input_schema: &DFSchema,
+                extensions: &HashMap<u32, &String>,
+                function_args: &[FunctionArgument],
+            ) -> Result<Vec<Expr>> {
+                let mut args = Vec::with_capacity(function_args.len());
+                for arg in function_args {
+                    let arg_expr = match &arg.arg_type {
+                        Some(ArgType::Value(e)) => {
+                            from_substrait_rex(ctx, e, input_schema, extensions).await
+                        }
+                        _ => not_impl_err!(
+                            "Aggregated function argument non-Value type not supported"
+                        ),
+                    }?;
+                    args.push(arg_expr.as_ref().clone());
+                }
+                Ok(args)
+            }
+
+            let fn_type = scalar_function_type_from_str(ctx, fn_name)?;
             match fn_type {
+                ScalarFunctionType::Udf(fun) => {
+                    let args = decode_arguments(
+                        ctx,
+                        input_schema,
+                        extensions,
+                        f.arguments.as_slice(),
+                    )
+                    .await?;
+                    Ok(Arc::new(Expr::ScalarFunction(
+                        expr::ScalarFunction::new_udf(fun, args),
+                    )))
+                }
                 ScalarFunctionType::Builtin(fun) => {
-                    let mut args = Vec::with_capacity(f.arguments.len());
-                    for arg in &f.arguments {
-                        let arg_expr = match &arg.arg_type {
-                            Some(ArgType::Value(e)) => {
-                                from_substrait_rex(ctx, e, input_schema, extensions).await
-                            }
-                            _ => not_impl_err!(
-                                "Aggregated function argument non-Value type not supported"
-                            ),
-                        };
-                        args.push(arg_expr?.as_ref().clone());
-                    }
+                    let args = decode_arguments(
+                        ctx,
+                        input_schema,
+                        extensions,
+                        f.arguments.as_slice(),
+                    )
+                    .await?;
                     Ok(Arc::new(Expr::ScalarFunction(expr::ScalarFunction::new(
                         fun, args,
                     ))))
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 79cf76de5985..331d63cc22b2 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -309,6 +309,11 @@ async fn simple_scalar_function_abs() -> Result<()> {
     roundtrip("SELECT ABS(a) FROM data").await
 }
 
+#[tokio::test]
+async fn simple_scalar_function_isnan() -> Result<()> {
+    roundtrip("SELECT ISNAN(a) FROM data").await
+}
+
 #[tokio::test]
 async fn simple_scalar_function_pow() -> Result<()> {
     roundtrip("SELECT POW(a, 2) FROM data").await

From 3050699580fcebaaf06d27960e59038a99be99d5 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove73@gmail.com>
Date: Sun, 25 Feb 2024 19:17:17 -0700
Subject: [PATCH 14/45] Make agg_func_substitute test deterministic (#9340)

---
 datafusion/sqllogictest/test_files/agg_func_substitute.slt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/sqllogictest/test_files/agg_func_substitute.slt b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
index 650ec1ad8e5c..ff485e97ca0c 100644
--- a/datafusion/sqllogictest/test_files/agg_func_substitute.slt
+++ b/datafusion/sqllogictest/test_files/agg_func_substitute.slt
@@ -94,7 +94,8 @@ ProjectionExec: expr=[a@0 as a, NTH_VALUE(multiple_ordered_table.c,Int64(1) + In
 query II
 SELECT a, ARRAY_AGG(c ORDER BY c)[1] as result
                         FROM multiple_ordered_table
-                        GROUP BY a;
+                        GROUP BY a
+                        ORDER BY a;
 ----
 0 0
 1 50

From ace9815ae5d8c99001c84fb71100536989364c34 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Thu, 22 Feb 2024 15:54:39 +0300
Subject: [PATCH 15/45] use create_cache_convention

---
 .../examples/custom_datasource.rs             |  18 +--
 .../datasource/physical_plan/arrow_file.rs    |  34 ++---
 .../core/src/datasource/physical_plan/avro.rs |  25 ++--
 .../core/src/datasource/physical_plan/csv.rs  |  35 ++---
 .../core/src/datasource/physical_plan/json.rs |  35 ++---
 .../datasource/physical_plan/parquet/mod.rs   |  35 ++---
 .../enforce_distribution.rs                   |  16 +--
 .../physical_optimizer/output_requirements.rs |  16 +--
 datafusion/core/src/physical_planner.rs       |  19 +--
 datafusion/core/src/test/mod.rs               |  19 ++-
 datafusion/core/src/test_util/mod.rs          |  27 ++--
 datafusion/core/tests/custom_sources.rs       |  20 +--
 .../provider_filter_pushdown.rs               |  21 ++-
 .../tests/custom_sources_cases/statistics.rs  |  19 ++-
 .../tests/user_defined/user_defined_plan.rs   |  20 +--
 .../physical-plan/src/aggregates/mod.rs       |  65 ++++-----
 datafusion/physical-plan/src/analyze.rs       |  21 ++-
 .../physical-plan/src/coalesce_batches.rs     |  17 +--
 .../physical-plan/src/coalesce_partitions.rs  |  15 +--
 datafusion/physical-plan/src/empty.rs         |  27 ++--
 datafusion/physical-plan/src/explain.rs       |  20 ++-
 datafusion/physical-plan/src/filter.rs        | 112 ++++++++--------
 datafusion/physical-plan/src/insert.rs        |  26 ++--
 .../physical-plan/src/joins/cross_join.rs     |  36 ++---
 .../physical-plan/src/joins/hash_join.rs      |  42 +++---
 .../src/joins/nested_loop_join.rs             |  42 +++---
 .../src/joins/sort_merge_join.rs              |  42 +++---
 .../src/joins/symmetric_hash_join.rs          |  42 +++---
 datafusion/physical-plan/src/lib.rs           |  16 ---
 datafusion/physical-plan/src/limit.rs         |  34 ++---
 datafusion/physical-plan/src/memory.rs        |  35 ++---
 .../physical-plan/src/placeholder_row.rs      |  29 ++--
 datafusion/physical-plan/src/projection.rs    |  31 ++---
 .../physical-plan/src/recursive_query.rs      |  21 +--
 .../physical-plan/src/repartition/mod.rs      |  60 ++++++---
 .../physical-plan/src/sorts/partial_sort.rs   |  48 ++++---
 datafusion/physical-plan/src/sorts/sort.rs    |  51 ++++---
 .../src/sorts/sort_preserving_merge.rs        |  17 +--
 datafusion/physical-plan/src/streaming.rs     |  34 +++--
 datafusion/physical-plan/src/test/exec.rs     | 124 ++++++++----------
 datafusion/physical-plan/src/union.rs         |  43 +++---
 datafusion/physical-plan/src/unnest.rs        |  24 ++--
 datafusion/physical-plan/src/values.rs        |  19 +--
 .../src/windows/bounded_window_agg_exec.rs    |  29 ++--
 .../src/windows/window_agg_exec.rs            |  23 ++--
 datafusion/physical-plan/src/work_table.rs    |  18 +--
 46 files changed, 787 insertions(+), 735 deletions(-)

diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
index 9516dc570d6d..d3cd66b2c9bc 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -35,6 +35,7 @@ use datafusion::physical_plan::{
 };
 use datafusion::prelude::*;
 use datafusion_expr::{Expr, LogicalPlanBuilder};
+use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 use tokio::time::timeout;
@@ -199,22 +200,21 @@ impl CustomExec {
         db: CustomDataSource,
     ) -> Self {
         let projected_schema = project_schema(&schema, projections).unwrap();
-        let cache = PlanPropertiesCache::new_default(projected_schema.clone());
+        let cache = Self::create_cache(projected_schema.clone());
         Self {
             db,
             projected_schema,
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 1a27f9315b34..24e825a6920b 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -61,7 +61,11 @@ impl ArrowExec {
     pub fn new(base_config: FileScanConfig) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = PlanPropertiesCache::new_default(projected_schema.clone());
+        let cache = Self::create_cache(
+            projected_schema.clone(),
+            &projected_output_ordering,
+            &base_config,
+        );
         Self {
             base_config,
             projected_schema,
@@ -70,36 +74,36 @@ impl ArrowExec {
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
     /// Ref to the base configs
     pub fn base_config(&self) -> &FileScanConfig {
         &self.base_config
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
+    fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning {
+        Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        projected_output_ordering: &[LexOrdering],
+        file_scan_config: &FileScanConfig,
+    ) -> PlanPropertiesCache {
         // Equivalence Properties
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
+        let eq_properties =
+            EquivalenceProperties::new_with_orderings(schema, projected_output_ordering);
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,
-            self.output_partitioning_helper(), // Output Partitioning
-            ExecutionMode::Bounded,            // Execution Mode
-        );
-        self
+            Self::output_partitioning_helper(file_scan_config), // Output Partitioning
+            ExecutionMode::Bounded,                             // Execution Mode
+        )
     }
 
     fn with_file_groups(mut self, file_groups: Vec<Vec<PartitionedFile>>) -> Self {
         self.base_config.file_groups = file_groups;
         // Changing file groups may invalidate output partitioning. Update it also
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(&self.base_config);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index fb2cd627a1da..6b6e7bce90c1 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -50,7 +50,11 @@ impl AvroExec {
     pub fn new(base_config: FileScanConfig) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = PlanPropertiesCache::new_default(projected_schema.clone());
+        let cache = Self::create_cache(
+            projected_schema.clone(),
+            &projected_output_ordering,
+            &base_config,
+        );
         Self {
             base_config,
             projected_schema,
@@ -59,27 +63,26 @@ impl AvroExec {
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
     /// Ref to the base configs
     pub fn base_config(&self) -> &FileScanConfig {
         &self.base_config
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        file_scan_config: &FileScanConfig,
+    ) -> PlanPropertiesCache {
         // Equivalence Properties
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
-        let n_partitions = self.base_config.file_groups.len();
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
+        let n_partitions = file_scan_config.file_groups.len();
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,
             Partitioning::UnknownPartitioning(n_partitions), // Output Partitioning
             ExecutionMode::Bounded,                          // Execution Mode
-        );
-        self
+        )
     }
 }
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 55cf62507788..19281bc3c189 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -53,7 +53,6 @@ use tokio::task::JoinSet;
 pub struct CsvExec {
     base_config: FileScanConfig,
     projected_statistics: Statistics,
-    projected_output_ordering: Vec<LexOrdering>,
     has_header: bool,
     delimiter: u8,
     quote: u8,
@@ -77,11 +76,14 @@ impl CsvExec {
     ) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = PlanPropertiesCache::new_default(projected_schema);
+        let cache = Self::create_cache(
+            projected_schema,
+            &projected_output_ordering,
+            &base_config,
+        );
         Self {
             base_config,
             projected_statistics,
-            projected_output_ordering,
             has_header,
             delimiter,
             quote,
@@ -90,7 +92,6 @@ impl CsvExec {
             file_compression_type,
             cache,
         }
-        .with_cache()
     }
 
     /// Ref to the base configs
@@ -116,29 +117,29 @@ impl CsvExec {
         self.escape
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
+    fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning {
+        Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        file_scan_config: &FileScanConfig,
+    ) -> PlanPropertiesCache {
         // Equivalence Properties
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,
-            self.output_partitioning_helper(), // Output Partitioning
-            ExecutionMode::Bounded,            // Execution Mode
-        );
-        self
+            Self::output_partitioning_helper(file_scan_config), // Output Partitioning
+            ExecutionMode::Bounded,                             // Execution Mode
+        )
     }
 
     fn with_file_groups(mut self, file_groups: Vec<Vec<PartitionedFile>>) -> Self {
         self.base_config.file_groups = file_groups;
         // Changing file groups may invalidate output partitioning. Update it also
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(&self.base_config);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 231b48f28d96..6e17e58d8444 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -52,7 +52,6 @@ use tokio::task::JoinSet;
 pub struct NdJsonExec {
     base_config: FileScanConfig,
     projected_statistics: Statistics,
-    projected_output_ordering: Vec<LexOrdering>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     file_compression_type: FileCompressionType,
@@ -67,16 +66,18 @@ impl NdJsonExec {
     ) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = PlanPropertiesCache::new_default(projected_schema);
+        let cache = Self::create_cache(
+            projected_schema,
+            &projected_output_ordering,
+            &base_config,
+        );
         Self {
             base_config,
             projected_statistics,
-            projected_output_ordering,
             metrics: ExecutionPlanMetricsSet::new(),
             file_compression_type,
             cache,
         }
-        .with_cache()
     }
 
     /// Ref to the base configs
@@ -84,29 +85,29 @@ impl NdJsonExec {
         &self.base_config
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
+    fn output_partitioning_helper(file_scan_config: &FileScanConfig) -> Partitioning {
+        Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        file_scan_config: &FileScanConfig,
+    ) -> PlanPropertiesCache {
         // Equivalence Properties
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,
-            self.output_partitioning_helper(), // Output Partitioning
-            ExecutionMode::Bounded,            // Execution Mode
-        );
-        self
+            Self::output_partitioning_helper(file_scan_config), // Output Partitioning
+            ExecutionMode::Bounded,                             // Execution Mode
+        )
     }
 
     fn with_file_groups(mut self, file_groups: Vec<Vec<PartitionedFile>>) -> Self {
         self.base_config.file_groups = file_groups;
         // Changing file groups may invalidate output partitioning. Update it also
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(&self.base_config);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index e47c8e516385..810a84646c86 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -88,7 +88,6 @@ pub struct ParquetExec {
     /// Base configuration for this scan
     base_config: FileScanConfig,
     projected_statistics: Statistics,
-    projected_output_ordering: Vec<LexOrdering>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Optional predicate for row filtering during parquet scan
@@ -149,7 +148,11 @@ impl ParquetExec {
 
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = PlanPropertiesCache::new_default(projected_schema);
+        let cache = Self::create_cache(
+            projected_schema,
+            &projected_output_ordering,
+            &base_config,
+        );
         Self {
             pushdown_filters: None,
             reorder_filters: None,
@@ -157,7 +160,6 @@ impl ParquetExec {
             enable_bloom_filter: None,
             base_config,
             projected_statistics,
-            projected_output_ordering,
             metrics,
             predicate,
             pruning_predicate,
@@ -166,7 +168,6 @@ impl ParquetExec {
             parquet_file_reader_factory: None,
             cache,
         }
-        .with_cache()
     }
 
     /// Ref to the base configs
@@ -261,29 +262,29 @@ impl ParquetExec {
             .unwrap_or(config_options.execution.parquet.bloom_filter_enabled)
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.base_config.file_groups.len())
+    fn output_partitioning_helper(file_config: &FileScanConfig) -> Partitioning {
+        Partitioning::UnknownPartitioning(file_config.file_groups.len())
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        file_config: &FileScanConfig,
+    ) -> PlanPropertiesCache {
         // Equivalence Properties
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,
-            self.output_partitioning_helper(), // Output Partitioning
-            ExecutionMode::Bounded,            // Execution Mode
-        );
-        self
+            Self::output_partitioning_helper(file_config), // Output Partitioning
+            ExecutionMode::Bounded,                        // Execution Mode
+        )
     }
 
     fn with_file_groups(mut self, file_groups: Vec<Vec<PartitionedFile>>) -> Self {
         self.base_config.file_groups = file_groups;
         // Changing file groups may invalidate output partitioning. Update it also
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(&self.base_config);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index c7bfe4742bdf..a5ad2d546d41 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1352,22 +1352,20 @@ pub(crate) mod tests {
             input: Arc<dyn ExecutionPlan>,
             requirement: Vec<PhysicalSortExpr>,
         ) -> Self {
-            let cache = PlanPropertiesCache::new_default(input.schema());
+            let cache = Self::create_cache(&input);
             Self {
                 input,
                 expr: requirement,
                 cache,
             }
-            .with_cache()
         }
 
-        fn with_cache(mut self) -> Self {
-            self.cache = PlanPropertiesCache::new(
-                self.input.equivalence_properties().clone(), // Equivalence Properties
-                self.input.output_partitioning().clone(),    // Output Partitioning
-                self.input.execution_mode(),                 // Execution Mode
-            );
-            self
+        fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+            PlanPropertiesCache::new(
+                input.equivalence_properties().clone(), // Equivalence Properties
+                input.output_partitioning().clone(),    // Output Partitioning
+                input.execution_mode(),                 // Execution Mode
+            )
         }
     }
 
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index 5ddba79515ee..a806580ce716 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -99,27 +99,25 @@ impl OutputRequirementExec {
         requirements: Option<LexRequirement>,
         dist_requirement: Distribution,
     ) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         Self {
             input,
             order_requirement: requirements,
             dist_requirement,
             cache,
         }
-        .with_cache()
     }
 
     pub(crate) fn input(&self) -> Arc<dyn ExecutionPlan> {
         self.input.clone()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = PlanPropertiesCache::new(
-            self.input.equivalence_properties().clone(), // Equivalence Properties
-            self.input.output_partitioning().clone(),    // Output Partitioning
-            self.input.execution_mode(),                 // Execution Mode
-        );
-        self
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+        PlanPropertiesCache::new(
+            input.equivalence_properties().clone(), // Equivalence Properties
+            input.output_partitioning().clone(),    // Output Partitioning
+            input.execution_mode(),                 // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 1a334678b6d7..e571bc76f4d5 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2013,6 +2013,7 @@ mod tests {
         col, lit, sum, Extension, GroupingSet, LogicalPlanBuilder,
         UserDefinedLogicalNodeCore,
     };
+    use datafusion_physical_expr::EquivalenceProperties;
 
     fn make_session_state() -> SessionState {
         let runtime = Arc::new(RuntimeEnv::default());
@@ -2579,19 +2580,19 @@ mod tests {
 
     impl NoOpExecutionPlan {
         fn new(schema: SchemaRef) -> Self {
-            let cache = PlanPropertiesCache::new_default(schema.clone());
-            Self { cache }.with_cache()
+            let cache = Self::create_cache(schema.clone());
+            Self { cache }
         }
 
-        fn with_cache(mut self) -> Self {
-            self.cache = self
-                .cache
+        fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+            let eq_properties = EquivalenceProperties::new(schema);
+            PlanPropertiesCache::new(
+                eq_properties,
                 // Output Partitioning
-                .with_partitioning(Partitioning::UnknownPartitioning(1))
+                Partitioning::UnknownPartitioning(1),
                 // Execution Mode
-                .with_exec_mode(ExecutionMode::Bounded);
-
-            self
+                ExecutionMode::Bounded,
+            )
         }
     }
 
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 277901ff9915..f8eb67cfdaf5 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -42,7 +42,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{DataFusionError, FileType, Statistics};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
-use datafusion_physical_expr::{Partitioning, PhysicalSortExpr};
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, PlanPropertiesCache,
@@ -376,24 +376,23 @@ impl StatisticsExec {
             stats.column_statistics.len(), schema.fields().len(),
             "if defined, the column statistics vector length should be the number of fields"
         );
-        let cache = PlanPropertiesCache::new_default(Arc::new(schema.clone()));
+        let cache = Self::create_cache(Arc::new(schema.clone()));
         Self {
             stats,
             schema: Arc::new(schema),
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
             // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(2))
+            Partitioning::UnknownPartitioning(2),
             // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index bb016f93c351..dda6d730ce84 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -55,6 +55,7 @@ use tempfile::TempDir;
 #[cfg(feature = "parquet")]
 pub use datafusion_common::test_util::parquet_test_data;
 pub use datafusion_common::test_util::{arrow_test_data, get_data_dir};
+use datafusion_physical_expr::EquivalenceProperties;
 
 /// Scan an empty data source, mainly used in tests
 pub fn scan_empty(
@@ -226,7 +227,6 @@ impl TableProvider for TestTableProvider {
 pub struct UnboundedExec {
     batch_produce: Option<usize>,
     batch: RecordBatch,
-    partitions: usize,
     cache: PlanPropertiesCache,
 }
 impl UnboundedExec {
@@ -238,29 +238,30 @@ impl UnboundedExec {
         batch: RecordBatch,
         partitions: usize,
     ) -> Self {
-        let cache = PlanPropertiesCache::new_default(batch.schema());
+        let cache = Self::create_cache(batch.schema(), batch_produce, partitions);
         Self {
             batch_produce,
             batch,
-            partitions,
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        let mode = if self.batch_produce.is_none() {
+    fn create_cache(
+        schema: SchemaRef,
+        batch_produce: Option<usize>,
+        n_partitions: usize,
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        let mode = if batch_produce.is_none() {
             ExecutionMode::Unbounded
         } else {
             ExecutionMode::Bounded
         };
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(self.partitions))
-            // Execution Mode
-            .with_exec_mode(mode);
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(n_partitions),
+            mode,
+        )
     }
 }
 
diff --git a/datafusion/core/tests/custom_sources.rs b/datafusion/core/tests/custom_sources.rs
index a167258ee1d5..11f29192276c 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -38,6 +38,7 @@ use datafusion::scalar::ScalarValue;
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::project_schema;
 use datafusion_common::stats::Precision;
+use datafusion_physical_expr::EquivalenceProperties;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_physical_plan::{ExecutionMode, PlanPropertiesCache};
 
@@ -81,19 +82,18 @@ impl CustomExecutionPlan {
         let schema = TEST_CUSTOM_SCHEMA_REF!();
         let schema =
             project_schema(&schema, projection.as_ref()).expect("projected schema");
-        let cache = PlanPropertiesCache::new_default(schema);
-        Self { projection, cache }.with_cache()
+        let cache = Self::create_cache(schema);
+        Self { projection, cache }
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
             // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index 9423f0170c7e..da00effa00a8 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -35,6 +35,7 @@ use datafusion::scalar::ScalarValue;
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{internal_err, not_impl_err, DataFusionError};
 use datafusion_expr::expr::{BinaryExpr, Cast};
+use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 
@@ -62,19 +63,17 @@ struct CustomPlan {
 
 impl CustomPlan {
     fn new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema);
-        Self { batches, cache }.with_cache()
+        let cache = Self::create_cache(schema);
+        Self { batches, cache }
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 315c7cb6dd26..37854908f021 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -33,6 +33,7 @@ use datafusion::{
     scalar::ScalarValue,
 };
 use datafusion_common::{project_schema, stats::Precision};
+use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 
@@ -52,24 +53,22 @@ impl StatisticsValidation {
             schema.fields().len(),
             "the column statistics vector length should be the number of fields"
         );
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone());
         Self {
             stats,
             schema,
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(2))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(2),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 947376dfb6a0..f2b6f6c93615 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -91,6 +91,7 @@ use datafusion::{
 };
 
 use async_trait::async_trait;
+use datafusion_physical_expr::EquivalenceProperties;
 use futures::{Stream, StreamExt};
 
 /// Execute the specified sql and return the resulting record batches
@@ -416,19 +417,18 @@ struct TopKExec {
 
 impl TopKExec {
     fn new(input: Arc<dyn ExecutionPlan>, k: usize) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
-        Self { input, k, cache }.with_cache()
+        let cache = Self::create_cache(input.schema());
+        Self { input, k, cache }
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index ea3fc3a737b2..fa5b65e40123 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -260,9 +260,6 @@ pub struct AggregateExec {
     /// We need the input schema of partial aggregate to be able to deserialize aggregate
     /// expressions from protobuf for final aggregate.
     pub input_schema: SchemaRef,
-    /// The mapping used to normalize expressions like Partitioning and
-    /// PhysicalSortExpr that maps input to output
-    projection_mapping: ProjectionMapping,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     required_input_ordering: Option<LexRequirement>,
@@ -365,8 +362,14 @@ impl AggregateExec {
         let required_input_ordering =
             (!new_requirement.is_empty()).then_some(new_requirement);
 
-        let cache = PlanPropertiesCache::new_default(schema.clone());
-        let aggregate = AggregateExec {
+        let cache = Self::create_cache(
+            &input,
+            schema.clone(),
+            &projection_mapping,
+            &mode,
+            &input_order_mode,
+        );
+        Ok(AggregateExec {
             mode,
             group_by,
             aggr_expr,
@@ -374,14 +377,12 @@ impl AggregateExec {
             input,
             schema,
             input_schema,
-            projection_mapping,
             metrics: ExecutionPlanMetricsSet::new(),
             required_input_ordering,
             limit: None,
             input_order_mode,
             cache,
-        };
-        Ok(aggregate.with_cache())
+        })
     }
 
     /// Aggregation mode (full, partial)
@@ -505,26 +506,31 @@ impl AggregateExec {
         true
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        projection_mapping: &ProjectionMapping,
+        mode: &AggregateMode,
+        input_order_mode: &InputOrderMode,
+    ) -> PlanPropertiesCache {
         // Construct equivalence properties:
-        let eq_properties = self
-            .input
+        let eq_properties = input
             .equivalence_properties()
-            .project(&self.projection_mapping, self.schema());
+            .project(projection_mapping, schema);
 
         // Get output partitioning:
-        let mut output_partitioning = self.input.output_partitioning().clone();
-        if self.mode.is_first_stage() {
+        let mut output_partitioning = input.output_partitioning().clone();
+        if mode.is_first_stage() {
             // First stage aggregation will not change the output partitioning,
             // but needs to respect aliases (e.g. mapping in the GROUP BY
             // expression).
-            let input_eq_properties = self.input.equivalence_properties();
+            let input_eq_properties = input.equivalence_properties();
             if let Partitioning::Hash(exprs, part) = output_partitioning {
                 let normalized_exprs = exprs
                     .iter()
                     .map(|expr| {
                         input_eq_properties
-                            .project_expr(expr, &self.projection_mapping)
+                            .project_expr(expr, projection_mapping)
                             .unwrap_or_else(|| {
                                 Arc::new(UnKnownColumn::new(&expr.to_string()))
                             })
@@ -535,18 +541,15 @@ impl AggregateExec {
         }
 
         // Determine execution mode:
-        let mut exec_mode = self.input.execution_mode();
+        let mut exec_mode = input.execution_mode();
         if exec_mode == ExecutionMode::Unbounded
-            && self.input_order_mode == InputOrderMode::Linear
+            && *input_order_mode == InputOrderMode::Linear
         {
             // Cannot run without breaking the pipeline
             exec_mode = ExecutionMode::PipelineBreaking;
         }
 
-        self.cache =
-            PlanPropertiesCache::new(eq_properties, output_partitioning, exec_mode);
-
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, exec_mode)
     }
 
     pub fn input_order_mode(&self) -> &InputOrderMode {
@@ -1622,19 +1625,19 @@ mod tests {
     impl TestYieldingExec {
         fn new(yield_first: bool) -> Self {
             let schema = some_data().0;
-            let cache = PlanPropertiesCache::new_default(schema);
-            Self { yield_first, cache }.with_cache()
+            let cache = Self::create_cache(schema);
+            Self { yield_first, cache }
         }
 
-        fn with_cache(mut self) -> Self {
-            self.cache = self
-                .cache
+        fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+            let eq_properties = EquivalenceProperties::new(schema);
+            PlanPropertiesCache::new(
+                eq_properties,
                 // Output Partitioning
-                .with_partitioning(Partitioning::UnknownPartitioning(1))
+                Partitioning::UnknownPartitioning(1),
                 // Execution Mode
-                .with_exec_mode(ExecutionMode::Bounded);
-
-            self
+                ExecutionMode::Bounded,
+            )
         }
     }
 
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 99b3add2acd6..731f3e3c7ebf 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -29,6 +29,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use futures::StreamExt;
 
@@ -55,7 +56,7 @@ impl AnalyzeExec {
         input: Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
     ) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(&input, schema.clone());
         AnalyzeExec {
             verbose,
             show_statistics,
@@ -63,7 +64,6 @@ impl AnalyzeExec {
             schema,
             cache,
         }
-        .with_cache()
     }
 
     /// access to verbose
@@ -81,15 +81,14 @@ impl AnalyzeExec {
         &self.input
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(self.input.execution_mode());
-
-        self
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        let output_partitioning = Partitioning::UnknownPartitioning(1);
+        let exec_mode = input.execution_mode();
+        PlanPropertiesCache::new(eq_properties, output_partitioning, exec_mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index e01060f3784d..e83bce0664a3 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -54,14 +54,13 @@ pub struct CoalesceBatchesExec {
 impl CoalesceBatchesExec {
     /// Create a new CoalesceBatchesExec
     pub fn new(input: Arc<dyn ExecutionPlan>, target_batch_size: usize) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         Self {
             input,
             target_batch_size,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
 
     /// The input plan
@@ -74,16 +73,14 @@ impl CoalesceBatchesExec {
         self.target_batch_size
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         // The coalesce batches operator does not make any changes to the
         // partitioning of its input.
-        self.cache = PlanPropertiesCache::new(
-            self.input.equivalence_properties().clone(), // Equivalence Properties
-            self.input.output_partitioning().clone(),    // Output Partitioning
-            self.input.execution_mode(),                 // Execution Mode
-        );
-
-        self
+        PlanPropertiesCache::new(
+            input.equivalence_properties().clone(), // Equivalence Properties
+            input.output_partitioning().clone(),    // Output Partitioning
+            input.execution_mode(),                 // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 255e996bd122..27f58c9bfd85 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -44,13 +44,12 @@ pub struct CoalescePartitionsExec {
 impl CoalescePartitionsExec {
     /// Create a new CoalescePartitionsExec
     pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         CoalescePartitionsExec {
             input,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
 
     /// Input execution plan
@@ -58,18 +57,16 @@ impl CoalescePartitionsExec {
         &self.input
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         // Coalescing partitions loses existing orderings:
-        let mut eq_properties = self.input.equivalence_properties().clone();
+        let mut eq_properties = input.equivalence_properties().clone();
         eq_properties.clear_orderings();
 
-        self.cache = PlanPropertiesCache::new(
+        PlanPropertiesCache::new(
             eq_properties,                        // Equivalence Properties
             Partitioning::UnknownPartitioning(1), // Output Partitioning
-            self.input.execution_mode(),          // Execution Mode
-        );
-
-        self
+            input.execution_mode(),               // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index d91395825135..942bee81f472 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -30,6 +30,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use log::trace;
 
@@ -46,20 +47,19 @@ pub struct EmptyExec {
 impl EmptyExec {
     /// Create a new EmptyExec
     pub fn new(schema: SchemaRef) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone(), 1);
         EmptyExec {
             schema,
             partitions: 1,
             cache,
         }
-        .with_cache()
     }
 
     /// Create a new EmptyExec with specified partition number
     pub fn with_partitions(mut self, partitions: usize) -> Self {
         self.partitions = partitions;
         // Changing partitions may invalidate output partitioning, so update it:
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(self.partitions);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
@@ -68,21 +68,20 @@ impl EmptyExec {
         Ok(vec![])
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions)
+    fn output_partitioning_helper(n_partitions: usize) -> Partitioning {
+        Partitioning::UnknownPartitioning(n_partitions)
     }
 
-    fn with_cache(mut self) -> Self {
-        let output_partitioning = self.output_partitioning_helper();
-
-        self.cache = self
-            .cache
+    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        let output_partitioning = Self::output_partitioning_helper(n_partitions);
+        PlanPropertiesCache::new(
+            eq_properties,
             // Output Partitioning
-            .with_partitioning(output_partitioning)
+            output_partitioning,
             // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index 935b37c168da..689ef32aa1a9 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -28,6 +28,7 @@ use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatc
 use datafusion_common::display::StringifiedPlan;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use log::trace;
 
@@ -52,14 +53,13 @@ impl ExplainExec {
         stringified_plans: Vec<StringifiedPlan>,
         verbose: bool,
     ) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone());
         ExplainExec {
             schema,
             stringified_plans,
             verbose,
             cache,
         }
-        .with_cache()
     }
 
     /// The strings to be printed
@@ -72,15 +72,13 @@ impl ExplainExec {
         self.verbose
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index cc8fdcbcd0cd..d6942f0d5678 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -73,15 +73,15 @@ impl FilterExec {
     ) -> Result<Self> {
         match predicate.data_type(input.schema().as_ref())? {
             DataType::Boolean => {
-                let cache = PlanPropertiesCache::new_default(input.schema());
+                let default_selectivity = 20;
+                let cache = Self::create_cache(&input, &predicate, default_selectivity)?;
                 Ok(Self {
                     predicate,
                     input: input.clone(),
                     metrics: ExecutionPlanMetricsSet::new(),
-                    default_selectivity: 20,
+                    default_selectivity,
                     cache,
-                }
-                .with_cache())
+                })
             }
             other => {
                 plan_err!("Filter predicate must return boolean values, not {other:?}")
@@ -115,12 +115,58 @@ impl FilterExec {
         self.default_selectivity
     }
 
-    fn with_cache(mut self) -> Self {
+    fn statistics_helper(
+        input: &Arc<dyn ExecutionPlan>,
+        predicate: &Arc<dyn PhysicalExpr>,
+        default_selectivity: u8,
+    ) -> Result<Statistics> {
+        let input_stats = input.statistics()?;
+        let schema = input.schema();
+        if !check_support(predicate, &schema) {
+            let selectivity = default_selectivity as f64 / 100.0;
+            let mut stats = input_stats.into_inexact();
+            stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
+            stats.total_byte_size = stats
+                .total_byte_size
+                .with_estimated_selectivity(selectivity);
+            return Ok(stats);
+        }
+
+        let num_rows = input_stats.num_rows;
+        let total_byte_size = input_stats.total_byte_size;
+        let input_analysis_ctx = AnalysisContext::try_from_statistics(
+            &input.schema(),
+            &input_stats.column_statistics,
+        )?;
+
+        let analysis_ctx = analyze(predicate, input_analysis_ctx, &schema)?;
+
+        // Estimate (inexact) selectivity of predicate
+        let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
+        let num_rows = num_rows.with_estimated_selectivity(selectivity);
+        let total_byte_size = total_byte_size.with_estimated_selectivity(selectivity);
+
+        let column_statistics = collect_new_statistics(
+            &input_stats.column_statistics,
+            analysis_ctx.boundaries,
+        );
+        Ok(Statistics {
+            num_rows,
+            total_byte_size,
+            column_statistics,
+        })
+    }
+
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        predicate: &Arc<dyn PhysicalExpr>,
+        default_selectivity: u8,
+    ) -> Result<PlanPropertiesCache> {
         // Combine the equal predicates with the input equivalence properties
         // to construct the equivalence properties:
-        let stats = self.statistics().unwrap();
-        let mut eq_properties = self.input.equivalence_properties().clone();
-        let (equal_pairs, _) = collect_columns_from_predicate(&self.predicate);
+        let stats = Self::statistics_helper(input, predicate, default_selectivity)?;
+        let mut eq_properties = input.equivalence_properties().clone();
+        let (equal_pairs, _) = collect_columns_from_predicate(predicate);
         for (lhs, rhs) in equal_pairs {
             let lhs_expr = Arc::new(lhs.clone()) as _;
             let rhs_expr = Arc::new(rhs.clone()) as _;
@@ -128,19 +174,17 @@ impl FilterExec {
         }
         // Add the columns that have only one viable value (singleton) after
         // filtering to constants.
-        let constants = collect_columns(self.predicate())
+        let constants = collect_columns(predicate)
             .into_iter()
             .filter(|column| stats.column_statistics[column.index()].is_singleton())
             .map(|column| Arc::new(column) as _);
         eq_properties = eq_properties.add_constants(constants);
 
-        self.cache = PlanPropertiesCache::new(
+        Ok(PlanPropertiesCache::new(
             eq_properties,
-            self.input.output_partitioning().clone(), // Output Partitioning
-            self.input.execution_mode(),              // Execution Mode
-        );
-
-        self
+            input.output_partitioning().clone(), // Output Partitioning
+            input.execution_mode(),              // Execution Mode
+        ))
     }
 }
 
@@ -211,43 +255,7 @@ impl ExecutionPlan for FilterExec {
     /// The output statistics of a filtering operation can be estimated if the
     /// predicate's selectivity value can be determined for the incoming data.
     fn statistics(&self) -> Result<Statistics> {
-        let predicate = self.predicate();
-
-        let input_stats = self.input.statistics()?;
-        let schema = self.schema();
-        if !check_support(predicate, &schema) {
-            let selectivity = self.default_selectivity as f64 / 100.0;
-            let mut stats = input_stats.into_inexact();
-            stats.num_rows = stats.num_rows.with_estimated_selectivity(selectivity);
-            stats.total_byte_size = stats
-                .total_byte_size
-                .with_estimated_selectivity(selectivity);
-            return Ok(stats);
-        }
-
-        let num_rows = input_stats.num_rows;
-        let total_byte_size = input_stats.total_byte_size;
-        let input_analysis_ctx = AnalysisContext::try_from_statistics(
-            &self.input.schema(),
-            &input_stats.column_statistics,
-        )?;
-
-        let analysis_ctx = analyze(predicate, input_analysis_ctx, &self.schema())?;
-
-        // Estimate (inexact) selectivity of predicate
-        let selectivity = analysis_ctx.selectivity.unwrap_or(1.0);
-        let num_rows = num_rows.with_estimated_selectivity(selectivity);
-        let total_byte_size = total_byte_size.with_estimated_selectivity(selectivity);
-
-        let column_statistics = collect_new_statistics(
-            &input_stats.column_statistics,
-            analysis_ctx.boundaries,
-        );
-        Ok(Statistics {
-            num_rows,
-            total_byte_size,
-            column_statistics,
-        })
+        Self::statistics_helper(&self.input, self.predicate(), self.default_selectivity)
     }
 }
 
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index b20e8cac7926..472c65f25b30 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -35,7 +35,9 @@ use arrow_array::{ArrayRef, UInt64Array};
 use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::{exec_err, internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{Distribution, PhysicalSortRequirement};
+use datafusion_physical_expr::{
+    Distribution, EquivalenceProperties, PhysicalSortRequirement,
+};
 
 use async_trait::async_trait;
 use futures::StreamExt;
@@ -104,7 +106,7 @@ impl FileSinkExec {
         sort_order: Option<Vec<PhysicalSortRequirement>>,
     ) -> Self {
         let count_schema = make_count_schema();
-        let cache = PlanPropertiesCache::new_default(count_schema);
+        let cache = Self::create_schema(&input, count_schema);
         Self {
             input,
             sink,
@@ -113,7 +115,6 @@ impl FileSinkExec {
             sort_order,
             cache,
         }
-        .with_cache()
     }
 
     fn execute_input_stream(
@@ -176,15 +177,16 @@ impl FileSinkExec {
         self.sink.metrics()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(self.input.execution_mode());
-
-        self
+    fn create_schema(
+        input: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            input.execution_mode(),
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 8b12b02b4667..42758e635060 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -27,9 +27,9 @@ use crate::coalesce_batches::concat_batches;
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
-    ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionMode,
-    ExecutionPlan, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
-    Statistics,
+    exec_mode_flatten, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution,
+    ExecutionMode, ExecutionPlan, PlanPropertiesCache, RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
 
 use arrow::datatypes::{Fields, Schema, SchemaRef};
@@ -77,7 +77,7 @@ impl CrossJoinExec {
         };
 
         let schema = Arc::new(Schema::new(all_columns));
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(&left, &right, schema.clone());
         CrossJoinExec {
             left,
             right,
@@ -86,7 +86,6 @@ impl CrossJoinExec {
             metrics: ExecutionPlanMetricsSet::default(),
             cache,
         }
-        .with_cache()
     }
 
     /// left (build) side which gets loaded in memory
@@ -99,15 +98,19 @@ impl CrossJoinExec {
         &self.right
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        left: &Arc<dyn ExecutionPlan>,
+        right: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties
         // TODO: Check equivalence properties of cross join, it may preserve
         //       ordering in some cases.
         let eq_properties = join_equivalence_properties(
-            self.left.equivalence_properties().clone(),
-            self.right.equivalence_properties().clone(),
+            left.equivalence_properties().clone(),
+            right.equivalence_properties().clone(),
             &JoinType::Full,
-            self.schema(),
+            schema,
             &[false, false],
             None,
             &[],
@@ -117,19 +120,18 @@ impl CrossJoinExec {
         // TODO: Optimize the cross join implementation to generate M * N
         //       partitions.
         let output_partitioning = adjust_right_output_partitioning(
-            self.right.output_partitioning(),
-            self.left.schema().fields.len(),
+            right.output_partitioning(),
+            left.schema().fields.len(),
         );
 
         // Determine the execution mode:
-        let mode = match (self.left.execution_mode(), self.right.execution_mode()) {
-            (ExecutionMode::Bounded, ExecutionMode::Bounded) => ExecutionMode::Bounded,
+        let mut mode = exec_mode_flatten([left, right]);
+        if mode.is_unbounded() {
             // If any of the inputs is unbounded, cross join breaks the pipeline.
-            (_, _) => ExecutionMode::PipelineBreaking,
-        };
+            mode = ExecutionMode::PipelineBreaking;
+        }
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 3aae053151cd..2b88ec449a04 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -65,6 +65,7 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::PhysicalExprRef;
 
+use crate::joins::utils::JoinOnRef;
 use ahash::RandomState;
 use futures::{ready, Stream, StreamExt, TryStreamExt};
 
@@ -327,7 +328,14 @@ impl HashJoinExec {
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
 
-        let cache = PlanPropertiesCache::new_default(Arc::new(schema.clone()));
+        let cache = Self::create_cache(
+            &left,
+            &right,
+            Arc::new(schema.clone()),
+            *join_type,
+            &on,
+            partition_mode,
+        );
 
         Ok(HashJoinExec {
             left,
@@ -343,8 +351,7 @@ impl HashJoinExec {
             column_indices,
             null_equals_null,
             cache,
-        }
-        .with_cache())
+        })
     }
 
     /// left (build) side which gets hashed
@@ -399,25 +406,29 @@ impl HashJoinExec {
         JoinSide::Right
     }
 
-    fn with_cache(mut self) -> Self {
-        let left = &self.left;
-        let right = &self.right;
-        let schema = self.schema();
+    fn create_cache(
+        left: &Arc<dyn ExecutionPlan>,
+        right: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+        on: JoinOnRef,
+        mode: PartitionMode,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
             left.equivalence_properties().clone(),
             right.equivalence_properties().clone(),
-            &self.join_type,
+            &join_type,
             schema,
-            &Self::maintains_input_order(self.join_type),
+            &Self::maintains_input_order(join_type),
             Some(Self::probe_side()),
-            &self.on,
+            on,
         );
 
         // Get output partitioning:
         let left_columns_len = left.schema().fields.len();
-        let output_partitioning = match self.mode {
-            PartitionMode::CollectLeft => match self.join_type {
+        let output_partitioning = match mode {
+            PartitionMode::CollectLeft => match join_type {
                 JoinType::Inner | JoinType::Right => adjust_right_output_partitioning(
                     right.output_partitioning(),
                     left_columns_len,
@@ -433,7 +444,7 @@ impl HashJoinExec {
                 ),
             },
             PartitionMode::Partitioned => partitioned_join_output_partitioning(
-                self.join_type,
+                join_type,
                 left.output_partitioning(),
                 right.output_partitioning(),
                 left_columns_len,
@@ -449,7 +460,7 @@ impl HashJoinExec {
         let pipeline_breaking = left.execution_mode().is_unbounded()
             || (right.execution_mode().is_unbounded()
                 && matches!(
-                    self.join_type,
+                    join_type,
                     JoinType::Left
                         | JoinType::Full
                         | JoinType::LeftAnti
@@ -462,8 +473,7 @@ impl HashJoinExec {
             exec_mode_flatten([left, right])
         };
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 1978338d2b6a..89beac14816d 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -109,19 +109,19 @@ impl NestedLoopJoinExec {
         check_join_is_valid(&left_schema, &right_schema, &[])?;
         let (schema, column_indices) =
             build_join_schema(&left_schema, &right_schema, join_type);
-        let cache = PlanPropertiesCache::new_default(Arc::new(schema.clone()));
+        let schema = Arc::new(schema);
+        let cache = Self::create_cache(&left, &right, schema.clone(), *join_type);
         Ok(NestedLoopJoinExec {
             left,
             right,
             filter,
             join_type: *join_type,
-            schema: Arc::new(schema),
+            schema,
             inner_table: Default::default(),
             column_indices,
             metrics: Default::default(),
             cache,
-        }
-        .with_cache())
+        })
     }
 
     /// left side
@@ -144,39 +144,43 @@ impl NestedLoopJoinExec {
         &self.join_type
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        left: &Arc<dyn ExecutionPlan>,
+        right: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
-            self.left.equivalence_properties().clone(),
-            self.right.equivalence_properties().clone(),
-            &self.join_type,
-            self.schema(),
-            &self.maintains_input_order(),
+            left.equivalence_properties().clone(),
+            right.equivalence_properties().clone(),
+            &join_type,
+            schema,
+            &[false, false],
             None,
             // No on columns in nested loop join
             &[],
         );
 
         // Get output partitioning,
-        let output_partitioning = if self.join_type == JoinType::Full {
-            self.left.output_partitioning().clone()
+        let output_partitioning = if join_type == JoinType::Full {
+            left.output_partitioning().clone()
         } else {
             partitioned_join_output_partitioning(
-                self.join_type,
-                self.left.output_partitioning(),
-                self.right.output_partitioning(),
-                self.left.schema().fields.len(),
+                join_type,
+                left.output_partitioning(),
+                right.output_partitioning(),
+                left.schema().fields.len(),
             )
         };
 
         // Determine execution mode:
-        let mut mode = exec_mode_flatten([&self.left, &self.right]);
+        let mut mode = exec_mode_flatten([left, right]);
         if mode.is_unbounded() {
             mode = ExecutionMode::PipelineBreaking;
         }
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 0d7cd995a5a2..f7d754a99e0e 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -33,7 +33,7 @@ use std::task::{Context, Poll};
 use crate::expressions::PhysicalSortExpr;
 use crate::joins::utils::{
     build_join_schema, check_join_is_valid, estimate_join_statistics,
-    partitioned_join_output_partitioning, JoinFilter, JoinOn,
+    partitioned_join_output_partitioning, JoinFilter, JoinOn, JoinOnRef,
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use crate::{
@@ -137,7 +137,7 @@ impl SortMergeJoinExec {
 
         let schema =
             Arc::new(build_join_schema(&left_schema, &right_schema, &join_type).0);
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(&left, &right, schema.clone(), join_type, &on);
         Ok(Self {
             left,
             right,
@@ -151,8 +151,7 @@ impl SortMergeJoinExec {
             sort_options,
             null_equals_null,
             cache,
-        }
-        .with_cache())
+        })
     }
 
     /// Get probe side (e.g streaming side) information for this sort merge join.
@@ -201,32 +200,37 @@ impl SortMergeJoinExec {
         self.left.as_ref()
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        left: &Arc<dyn ExecutionPlan>,
+        right: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+        join_on: JoinOnRef,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
-            self.left.equivalence_properties().clone(),
-            self.right.equivalence_properties().clone(),
-            &self.join_type,
-            self.schema(),
-            &self.maintains_input_order(),
-            Some(Self::probe_side(&self.join_type)),
-            self.on(),
+            left.equivalence_properties().clone(),
+            right.equivalence_properties().clone(),
+            &join_type,
+            schema,
+            &Self::maintains_input_order(join_type),
+            Some(Self::probe_side(&join_type)),
+            join_on,
         );
 
         // Get output partitioning:
-        let left_columns_len = self.left.schema().fields.len();
+        let left_columns_len = left.schema().fields.len();
         let output_partitioning = partitioned_join_output_partitioning(
-            self.join_type,
-            self.left.output_partitioning(),
-            self.right.output_partitioning(),
+            join_type,
+            left.output_partitioning(),
+            right.output_partitioning(),
             left_columns_len,
         );
 
         // Determine execution mode:
-        let mode = exec_mode_flatten([&self.left, &self.right]);
+        let mode = exec_mode_flatten([left, right]);
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 5fd89e98a58f..4e07b10dd517 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -43,7 +43,7 @@ use crate::joins::stream_join_utils::{
 use crate::joins::utils::{
     apply_join_filter_to_indices, build_batch_from_indices, build_join_schema,
     check_join_is_valid, partitioned_join_output_partitioning, ColumnIndex, JoinFilter,
-    JoinHashMapType, JoinOn, StatefulStreamResult,
+    JoinHashMapType, JoinOn, JoinOnRef, StatefulStreamResult,
 };
 use crate::{
     exec_mode_flatten,
@@ -233,8 +233,8 @@ impl SymmetricHashJoinExec {
 
         // Initialize the random state for the join operation:
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
-
-        let cache = PlanPropertiesCache::new_default(Arc::new(schema));
+        let schema = Arc::new(schema);
+        let cache = Self::create_cache(&left, &right, schema.clone(), *join_type, &on);
         Ok(SymmetricHashJoinExec {
             left,
             right,
@@ -249,37 +249,41 @@ impl SymmetricHashJoinExec {
             right_sort_exprs,
             mode,
             cache,
-        }
-        .with_cache())
+        })
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        left: &Arc<dyn ExecutionPlan>,
+        right: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+        join_type: JoinType,
+        join_on: JoinOnRef,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
-            self.left.equivalence_properties().clone(),
-            self.right.equivalence_properties().clone(),
-            &self.join_type,
-            self.schema(),
-            &self.maintains_input_order(),
+            left.equivalence_properties().clone(),
+            right.equivalence_properties().clone(),
+            &join_type,
+            schema,
+            &[false, false],
             // Has alternating probe side
             None,
-            self.on(),
+            join_on,
         );
 
         // Get output partitioning:
-        let left_columns_len = self.left.schema().fields.len();
+        let left_columns_len = left.schema().fields.len();
         let output_partitioning = partitioned_join_output_partitioning(
-            self.join_type,
-            self.left.output_partitioning(),
-            self.right.output_partitioning(),
+            join_type,
+            left.output_partitioning(),
+            right.output_partitioning(),
             left_columns_len,
         );
 
         // Determine execution mode:
-        let mode = exec_mode_flatten([&self.left, &self.right]);
+        let mode = exec_mode_flatten([left, right]);
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 
     /// left stream
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 52bfb0063d40..f90bbf061d38 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -540,22 +540,6 @@ impl PlanPropertiesCache {
         }
     }
 
-    /// Construct a default `PlanPropertiesCache`, for a given schema.
-    pub fn new_default(schema: SchemaRef) -> PlanPropertiesCache {
-        // Default values are the most restrictive possible values.
-        let eq_properties = EquivalenceProperties::new(schema);
-        // Please note that this default is not safe, and should be overwritten.
-        let partitioning = Partitioning::UnknownPartitioning(0);
-        let exec_mode = ExecutionMode::PipelineBreaking;
-        let output_ordering = None;
-        Self {
-            eq_properties,
-            partitioning,
-            exec_mode,
-            output_ordering,
-        }
-    }
-
     /// Overwrite output partitioning with its new value.
     pub fn with_partitioning(mut self, partitioning: Partitioning) -> Self {
         self.partitioning = partitioning;
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 59559e84fd75..a4b924d71066 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -57,7 +57,7 @@ pub struct GlobalLimitExec {
 impl GlobalLimitExec {
     /// Create a new GlobalLimitExec
     pub fn new(input: Arc<dyn ExecutionPlan>, skip: usize, fetch: Option<usize>) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         GlobalLimitExec {
             input,
             skip,
@@ -65,7 +65,6 @@ impl GlobalLimitExec {
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
 
     /// Input execution plan
@@ -83,14 +82,12 @@ impl GlobalLimitExec {
         self.fetch
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = PlanPropertiesCache::new(
-            self.input.equivalence_properties().clone(), // Equivalence Properties
-            Partitioning::UnknownPartitioning(1),        // Output Partitioning
-            ExecutionMode::Bounded,                      // Execution Mode
-        );
-
-        self
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+        PlanPropertiesCache::new(
+            input.equivalence_properties().clone(), // Equivalence Properties
+            Partitioning::UnknownPartitioning(1),   // Output Partitioning
+            ExecutionMode::Bounded,                 // Execution Mode
+        )
     }
 }
 
@@ -276,14 +273,13 @@ pub struct LocalLimitExec {
 impl LocalLimitExec {
     /// Create a new LocalLimitExec partition
     pub fn new(input: Arc<dyn ExecutionPlan>, fetch: usize) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         Self {
             input,
             fetch,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
 
     /// Input execution plan
@@ -296,14 +292,12 @@ impl LocalLimitExec {
         self.fetch
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = PlanPropertiesCache::new(
-            self.input.equivalence_properties().clone(), // Equivalence Properties
-            self.input.output_partitioning().clone(),    // Output Partitioning
-            ExecutionMode::Bounded,                      // Execution Mode
-        );
-
-        self
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+        PlanPropertiesCache::new(
+            input.equivalence_properties().clone(), // Equivalence Properties
+            input.output_partitioning().clone(),    // Output Partitioning
+            ExecutionMode::Bounded,                 // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 206d22e72226..f6039ee8b3ed 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -153,7 +153,7 @@ impl MemoryExec {
         projection: Option<Vec<usize>>,
     ) -> Result<Self> {
         let projected_schema = project_schema(&schema, projection.as_ref())?;
-        let cache = PlanPropertiesCache::new_default(projected_schema.clone());
+        let cache = Self::create_cache(projected_schema.clone(), &[], partitions);
         Ok(Self {
             partitions: partitions.to_vec(),
             schema,
@@ -161,8 +161,7 @@ impl MemoryExec {
             projection,
             sort_information: vec![],
             cache,
-        }
-        .with_cache())
+        })
     }
 
     pub fn partitions(&self) -> &[Vec<RecordBatch>] {
@@ -193,27 +192,29 @@ impl MemoryExec {
         self.sort_information = sort_information;
 
         // We need to update equivalence properties when updating sort information.
-        let eq_properties = self.equivalent_properties_helper();
+        let eq_properties = EquivalenceProperties::new_with_orderings(
+            self.schema(),
+            &self.sort_information,
+        );
         self.cache = self.cache.with_eq_properties(eq_properties);
-        self.with_cache()
+        self
     }
 
     pub fn original_schema(&self) -> SchemaRef {
         self.schema.clone()
     }
 
-    fn equivalent_properties_helper(&self) -> EquivalenceProperties {
-        EquivalenceProperties::new_with_orderings(self.schema(), &self.sort_information)
-    }
-
-    fn with_cache(mut self) -> Self {
-        self.cache = PlanPropertiesCache::new(
-            self.equivalent_properties_helper(), // Equivalence Properties
-            Partitioning::UnknownPartitioning(self.partitions.len()), // Output Partitioning
-            ExecutionMode::Bounded,                                   // Execution Mode
-        );
-
-        self
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        partitions: &[Vec<RecordBatch>],
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
+        PlanPropertiesCache::new(
+            eq_properties,                                       // Equivalence Properties
+            Partitioning::UnknownPartitioning(partitions.len()), // Output Partitioning
+            ExecutionMode::Bounded,                              // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index a9b27cb13fe1..9d4204ddb589 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -32,6 +32,7 @@ use arrow::record_batch::RecordBatch;
 use arrow_array::RecordBatchOptions;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use log::trace;
 
@@ -48,20 +49,20 @@ pub struct PlaceholderRowExec {
 impl PlaceholderRowExec {
     /// Create a new PlaceholderRowExec
     pub fn new(schema: SchemaRef) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let partitions = 1;
+        let cache = Self::create_cache(schema.clone(), partitions);
         PlaceholderRowExec {
             schema,
-            partitions: 1,
+            partitions,
             cache,
         }
-        .with_cache()
     }
 
     /// Create a new PlaceholderRowExecPlaceholderRowExec with specified partition number
     pub fn with_partitions(mut self, partitions: usize) -> Self {
         self.partitions = partitions;
         // Update output partitioning when updating partitions:
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(self.partitions);
         self.cache = self.cache.with_partitioning(output_partitioning);
         self
     }
@@ -89,20 +90,20 @@ impl PlaceholderRowExec {
         })
     }
 
-    fn output_partitioning_helper(&self) -> Partitioning {
-        Partitioning::UnknownPartitioning(self.partitions)
+    fn output_partitioning_helper(n_partitions: usize) -> Partitioning {
+        Partitioning::UnknownPartitioning(n_partitions)
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
         // Get output partitioning:
-        let output_partitioning = self.output_partitioning_helper();
+        let output_partitioning = Self::output_partitioning_helper(n_partitions);
 
-        self.cache = self
-            .cache
-            .with_partitioning(output_partitioning)
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            output_partitioning,
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 4c17aa3d834a..7420cf58b5ce 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -56,9 +56,6 @@ pub struct ProjectionExec {
     schema: SchemaRef,
     /// The input plan
     input: Arc<dyn ExecutionPlan>,
-    /// The mapping used to normalize expressions like Partitioning and
-    /// PhysicalSortExpr that maps input to output
-    projection_mapping: ProjectionMapping,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
@@ -96,16 +93,14 @@ impl ProjectionExec {
 
         // construct a map from the input expressions to the output expression of the Projection
         let projection_mapping = ProjectionMapping::try_new(&expr, &input_schema)?;
-        let cache = PlanPropertiesCache::new_default(schema.clone());
-        let projection = Self {
+        let cache = Self::create_cache(&input, &projection_mapping, schema.clone())?;
+        Ok(Self {
             expr,
             schema,
             input,
-            projection_mapping,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
-        };
-        projection.with_cache()
+        })
     }
 
     /// The projection expressions stored as tuples of (expression, output column name)
@@ -118,13 +113,15 @@ impl ProjectionExec {
         &self.input
     }
 
-    fn with_cache(mut self) -> Result<Self> {
-        let input = &self.input;
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        projection_mapping: &ProjectionMapping,
+        schema: SchemaRef,
+    ) -> Result<PlanPropertiesCache> {
         // Calculate equivalence properties:
         let mut input_eq_properties = input.equivalence_properties().clone();
-        input_eq_properties.substitute_oeq_class(&self.projection_mapping)?;
-        let eq_properties =
-            input_eq_properties.project(&self.projection_mapping, self.schema.clone());
+        input_eq_properties.substitute_oeq_class(projection_mapping)?;
+        let eq_properties = input_eq_properties.project(projection_mapping, schema);
 
         // Calculate output partitioning, which needs to respect aliases:
         let input_partition = input.output_partitioning();
@@ -134,7 +131,7 @@ impl ProjectionExec {
                 .iter()
                 .map(|expr| {
                     input_eq_properties
-                        .project_expr(expr, &self.projection_mapping)
+                        .project_expr(expr, projection_mapping)
                         .unwrap_or_else(|| {
                             Arc::new(UnKnownColumn::new(&expr.to_string()))
                         })
@@ -145,13 +142,11 @@ impl ProjectionExec {
             input_partition.clone()
         };
 
-        self.cache = PlanPropertiesCache::new(
+        Ok(PlanPropertiesCache::new(
             eq_properties,
             output_partitioning,
             input.execution_mode(),
-        );
-
-        Ok(self)
+        ))
     }
 }
 
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index 97a626c25116..adc675ba2730 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -33,7 +33,8 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{not_impl_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
+
 use futures::{ready, Stream, StreamExt};
 
 /// Recursive query execution plan.
@@ -81,7 +82,7 @@ impl RecursiveQueryExec {
         let work_table = Arc::new(WorkTable::new());
         // Use the same work table for both the WorkTableExec and the recursive term
         let recursive_term = assign_work_table(recursive_term, work_table.clone())?;
-        let cache = PlanPropertiesCache::new_default(static_term.schema());
+        let cache = Self::create_cache(static_term.schema());
         Ok(RecursiveQueryExec {
             name,
             static_term,
@@ -90,17 +91,17 @@ impl RecursiveQueryExec {
             work_table,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
-        }
-        .with_cache())
+        })
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 6d2835df05f0..dc1e88f52e56 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -44,7 +44,7 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::{arrow_datafusion_err, not_impl_err, DataFusionError, Result};
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr, PhysicalSortExpr};
 
 use futures::stream::Stream;
 use futures::{FutureExt, StreamExt};
@@ -436,12 +436,7 @@ impl ExecutionPlan for RepartitionExec {
     }
 
     fn maintains_input_order(&self) -> Vec<bool> {
-        if self.preserve_order {
-            vec![true]
-        } else {
-            // We preserve ordering when input partitioning is 1
-            vec![self.input().output_partitioning().partition_count() <= 1]
-        }
+        Self::maintains_input_order_helper(self.input(), self.preserve_order)
     }
 
     fn execute(
@@ -602,7 +597,8 @@ impl RepartitionExec {
         input: Arc<dyn ExecutionPlan>,
         partitioning: Partitioning,
     ) -> Result<Self> {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let preserve_order = false;
+        let cache = Self::create_cache(&input, partitioning.clone(), preserve_order);
         Ok(RepartitionExec {
             input,
             partitioning,
@@ -611,27 +607,49 @@ impl RepartitionExec {
                 abort_helper: Arc::new(AbortOnDropMany::<()>(vec![])),
             })),
             metrics: ExecutionPlanMetricsSet::new(),
-            preserve_order: false,
+            preserve_order,
             cache,
+        })
+    }
+
+    fn maintains_input_order_helper(
+        input: &Arc<dyn ExecutionPlan>,
+        preserve_order: bool,
+    ) -> Vec<bool> {
+        if preserve_order {
+            vec![true]
+        } else {
+            // We preserve ordering when input partitioning is 1
+            vec![input.output_partitioning().partition_count() <= 1]
         }
-        .with_cache())
     }
 
-    fn with_cache(mut self) -> Self {
+    fn eq_properties_helper(
+        input: &Arc<dyn ExecutionPlan>,
+        preserve_order: bool,
+    ) -> EquivalenceProperties {
         // Equivalence Properties
-        let mut eq_properties = self.input.equivalence_properties().clone();
+        let mut eq_properties = input.equivalence_properties().clone();
         // If the ordering is lost, reset the ordering equivalence class:
-        if !self.maintains_input_order()[0] {
+        if !Self::maintains_input_order_helper(input, preserve_order)[0] {
             eq_properties.clear_orderings();
         }
+        eq_properties
+    }
 
-        self.cache = PlanPropertiesCache::new(
-            eq_properties,               // Equivalence Properties
-            self.partitioning.clone(),   // Output Partitioning
-            self.input.execution_mode(), // Execution Mode
-        );
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        partitioning: Partitioning,
+        preserve_order: bool,
+    ) -> PlanPropertiesCache {
+        // Equivalence Properties
+        let eq_properties = Self::eq_properties_helper(input, preserve_order);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,          // Equivalence Properties
+            partitioning,           // Output Partitioning
+            input.execution_mode(), // Execution Mode
+        )
     }
 
     /// Specify if this reparititoning operation should preserve the order of
@@ -648,7 +666,9 @@ impl RepartitionExec {
                 // if there is only one input partition, merging is not required
                 // to maintain order
                 self.input.output_partitioning().partition_count() > 1;
-        self.with_cache()
+        let eq_properties = Self::eq_properties_helper(&self.input, self.preserve_order);
+        self.cache = self.cache.with_eq_properties(eq_properties);
+        self
     }
 
     /// Return the sort expressions that are used to merge
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 99ead9f904a1..16c4bc8601b6 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -71,6 +71,7 @@ use arrow::record_batch::RecordBatch;
 use datafusion_common::utils::evaluate_partition_ranges;
 use datafusion_common::Result;
 use datafusion_execution::{RecordBatchStream, TaskContext};
+use datafusion_physical_expr::LexOrdering;
 
 use futures::{ready, Stream, StreamExt};
 use log::trace;
@@ -104,17 +105,17 @@ impl PartialSortExec {
         common_prefix_length: usize,
     ) -> Self {
         assert!(common_prefix_length > 0);
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let preserve_partitioning = false;
+        let cache = Self::create_cache(&input, expr.clone(), preserve_partitioning);
         Self {
             input,
             expr,
             common_prefix_length,
             metrics_set: ExecutionPlanMetricsSet::new(),
-            preserve_partitioning: false,
+            preserve_partitioning,
             fetch: None,
             cache,
         }
-        .with_cache()
     }
 
     /// Whether this `PartialSortExec` preserves partitioning of the children
@@ -131,6 +132,12 @@ impl PartialSortExec {
     /// input partitions producing a single, sorted partition.
     pub fn with_preserve_partitioning(mut self, preserve_partitioning: bool) -> Self {
         self.preserve_partitioning = preserve_partitioning;
+        self.cache = self
+            .cache
+            .with_partitioning(Self::output_partitioning_helper(
+                &self.input,
+                self.preserve_partitioning,
+            ));
         self
     }
 
@@ -161,27 +168,38 @@ impl PartialSortExec {
         self.fetch
     }
 
-    fn with_cache(mut self) -> Self {
+    fn output_partitioning_helper(
+        input: &Arc<dyn ExecutionPlan>,
+        preserve_partitioning: bool,
+    ) -> Partitioning {
+        // Get output partitioning:
+        if preserve_partitioning {
+            input.output_partitioning().clone()
+        } else {
+            Partitioning::UnknownPartitioning(1)
+        }
+    }
+
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        sort_exprs: LexOrdering,
+        preserve_partitioning: bool,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties; i.e. reset the ordering equivalence
         // class with the new ordering:
-        let eq_properties = self
-            .input
+        let eq_properties = input
             .equivalence_properties()
             .clone()
-            .with_reorder(self.expr.to_vec());
+            .with_reorder(sort_exprs);
 
         // Get output partitioning:
-        let output_partitioning = if self.preserve_partitioning {
-            self.input.output_partitioning().clone()
-        } else {
-            Partitioning::UnknownPartitioning(1)
-        };
+        let output_partitioning =
+            Self::output_partitioning_helper(input, preserve_partitioning);
 
         // Determine execution mode:
-        let mode = self.input.execution_mode();
+        let mode = input.execution_mode();
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 55a3c9f068f1..a74705dd32ab 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -52,6 +52,7 @@ use datafusion_execution::memory_pool::{
 };
 use datafusion_execution::runtime_env::RuntimeEnv;
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::LexOrdering;
 
 use futures::{StreamExt, TryStreamExt};
 use log::{debug, error, trace};
@@ -694,16 +695,16 @@ impl SortExec {
     /// Create a new sort execution plan that produces a single,
     /// sorted output partition.
     pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let preserve_partitioning = false;
+        let cache = Self::create_cache(&input, expr.clone(), preserve_partitioning);
         Self {
             expr,
             input,
             metrics_set: ExecutionPlanMetricsSet::new(),
-            preserve_partitioning: false,
+            preserve_partitioning,
             fetch: None,
             cache,
         }
-        .with_cache()
     }
 
     /// Create a new sort execution plan with the option to preserve
@@ -737,7 +738,13 @@ impl SortExec {
     /// input partitions producing a single, sorted partition.
     pub fn with_preserve_partitioning(mut self, preserve_partitioning: bool) -> Self {
         self.preserve_partitioning = preserve_partitioning;
-        self.with_cache()
+        self.cache = self
+            .cache
+            .with_partitioning(Self::output_partitioning_helper(
+                &self.input,
+                self.preserve_partitioning,
+            ));
+        self
     }
 
     /// Modify how many rows to include in the result
@@ -767,33 +774,43 @@ impl SortExec {
         self.fetch
     }
 
-    fn with_cache(mut self) -> Self {
+    fn output_partitioning_helper(
+        input: &Arc<dyn ExecutionPlan>,
+        preserve_partitioning: bool,
+    ) -> Partitioning {
+        // Get output partitioning:
+        if preserve_partitioning {
+            input.output_partitioning().clone()
+        } else {
+            Partitioning::UnknownPartitioning(1)
+        }
+    }
+
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        sort_exprs: LexOrdering,
+        preserve_partitioning: bool,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties; i.e. reset the ordering equivalence
         // class with the new ordering:
-        let eq_properties = self
-            .input
+        let eq_properties = input
             .equivalence_properties()
             .clone()
-            .with_reorder(self.expr.to_vec());
+            .with_reorder(sort_exprs);
 
         // Get output partitioning:
-        let output_partitioning = if self.preserve_partitioning {
-            self.input.output_partitioning().clone()
-        } else {
-            Partitioning::UnknownPartitioning(1)
-        };
+        let output_partitioning =
+            Self::output_partitioning_helper(input, preserve_partitioning);
 
         // Determine execution mode:
-        let mode = match self.input.execution_mode() {
+        let mode = match input.execution_mode() {
             ExecutionMode::Unbounded | ExecutionMode::PipelineBreaking => {
                 ExecutionMode::PipelineBreaking
             }
             ExecutionMode::Bounded => ExecutionMode::Bounded,
         };
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index eadd2d0711fe..c07ae72d5492 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -80,7 +80,7 @@ pub struct SortPreservingMergeExec {
 impl SortPreservingMergeExec {
     /// Create a new sort execution plan
     pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
-        let cache = PlanPropertiesCache::new_default(input.schema());
+        let cache = Self::create_cache(&input);
         Self {
             input,
             expr,
@@ -88,7 +88,6 @@ impl SortPreservingMergeExec {
             fetch: None,
             cache,
         }
-        .with_cache()
     }
     /// Sets the number of rows to fetch
     pub fn with_fetch(mut self, fetch: Option<usize>) -> Self {
@@ -111,14 +110,12 @@ impl SortPreservingMergeExec {
         self.fetch
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = PlanPropertiesCache::new(
-            self.input.equivalence_properties().clone(), // Equivalence Properties
-            Partitioning::UnknownPartitioning(1),        // Output Partitioning
-            self.input.execution_mode(),                 // Execution Mode
-        );
-
-        self
+    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+        PlanPropertiesCache::new(
+            input.equivalence_properties().clone(), // Equivalence Properties
+            Partitioning::UnknownPartitioning(1),   // Output Partitioning
+            input.execution_mode(),                 // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index 60b372446805..e95fd37ab1b2 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -85,16 +85,22 @@ impl StreamingTableExec {
             Some(p) => Arc::new(schema.project(p)?),
             None => schema,
         };
-        let cache = PlanPropertiesCache::new_default(projected_schema.clone());
+        let projected_output_ordering =
+            projected_output_ordering.into_iter().collect::<Vec<_>>();
+        let cache = Self::create_cache(
+            projected_schema.clone(),
+            &projected_output_ordering,
+            &partitions,
+            infinite,
+        );
         Ok(Self {
             partitions,
             projected_schema,
             projection: projection.cloned().map(Into::into),
-            projected_output_ordering: projected_output_ordering.into_iter().collect(),
+            projected_output_ordering,
             infinite,
             cache,
-        }
-        .with_cache())
+        })
     }
 
     pub fn partitions(&self) -> &Vec<Arc<dyn PartitionStream>> {
@@ -121,26 +127,26 @@ impl StreamingTableExec {
         self.infinite
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        orderings: &[LexOrdering],
+        partitions: &[Arc<dyn PartitionStream>],
+        is_infinite: bool,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
-        let eq_properties = EquivalenceProperties::new_with_orderings(
-            self.schema(),
-            &self.projected_output_ordering,
-        );
+        let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
         // Get output partitioning:
-        let output_partitioning =
-            Partitioning::UnknownPartitioning(self.partitions.len());
+        let output_partitioning = Partitioning::UnknownPartitioning(partitions.len());
 
         // Determine execution mode:
-        let mode = if self.infinite {
+        let mode = if is_infinite {
             ExecutionMode::Unbounded
         } else {
             ExecutionMode::Bounded
         };
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index 77ff8d27157a..a677907295a7 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -34,6 +34,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use futures::Stream;
 use tokio::sync::Barrier;
@@ -132,14 +133,13 @@ impl MockExec {
     /// ensure any poll loops are correct. This behavior can be
     /// changed with `with_use_task`
     pub fn new(data: Vec<Result<RecordBatch>>, schema: SchemaRef) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone());
         Self {
             data,
             schema,
             use_task: true,
             cache,
         }
-        .with_cache()
     }
 
     /// If `use_task` is true (the default) then the batches are sent
@@ -150,15 +150,14 @@ impl MockExec {
         self
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
@@ -290,14 +289,13 @@ impl BarrierExec {
     pub fn new(data: Vec<Vec<RecordBatch>>, schema: SchemaRef) -> Self {
         // wait for all streams and the input
         let barrier = Arc::new(Barrier::new(data.len() + 1));
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone(), &data);
         Self {
             data,
             schema,
             barrier,
             cache,
         }
-        .with_cache()
     }
 
     /// wait until all the input streams and this function is ready
@@ -307,15 +305,13 @@ impl BarrierExec {
         println!("BarrierExec::wait done waiting");
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(self.data.len()))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+    fn create_cache(schema: SchemaRef, data: &[Vec<RecordBatch>]) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(data.len()),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
@@ -412,19 +408,18 @@ impl ErrorExec {
             DataType::Int64,
             true,
         )]));
-        let cache = PlanPropertiesCache::new_default(schema.clone());
-        Self { cache }.with_cache()
+        let cache = Self::create_cache(schema.clone());
+        Self { cache }
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
@@ -486,24 +481,22 @@ impl StatisticsExec {
                 .column_statistics.len(), schema.fields().len(),
             "if defined, the column statistics vector length should be the number of fields"
         );
-        let cache = PlanPropertiesCache::new_default(Arc::new(schema.clone()));
+        let cache = Self::create_cache(Arc::new(schema.clone()));
         Self {
             stats,
             schema: Arc::new(schema),
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(2))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(2),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
@@ -567,9 +560,6 @@ pub struct BlockingExec {
     /// Schema that is mocked by this plan.
     schema: SchemaRef,
 
-    /// Number of output partitions.
-    n_partitions: usize,
-
     /// Ref-counting helper to check if the plan and the produced stream are still in memory.
     refs: Arc<()>,
     cache: PlanPropertiesCache,
@@ -578,14 +568,12 @@ pub struct BlockingExec {
 impl BlockingExec {
     /// Create new [`BlockingExec`] with a give schema and number of partitions.
     pub fn new(schema: SchemaRef, n_partitions: usize) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone(), n_partitions);
         Self {
             schema,
-            n_partitions,
             refs: Default::default(),
             cache,
         }
-        .with_cache()
     }
 
     /// Weak pointer that can be used for ref-counting this execution plan and its streams.
@@ -597,15 +585,14 @@ impl BlockingExec {
         Arc::downgrade(&self.refs)
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(self.n_partitions))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(n_partitions),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
@@ -719,13 +706,13 @@ impl PanicExec {
     /// Create new [`PanicExec`] with a give schema and number of
     /// partitions, which will each panic immediately.
     pub fn new(schema: SchemaRef, n_partitions: usize) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let batches_until_panics = vec![0; n_partitions];
+        let cache = Self::create_cache(schema.clone(), &batches_until_panics);
         Self {
             schema,
-            batches_until_panics: vec![0; n_partitions],
+            batches_until_panics,
             cache,
         }
-        .with_cache()
     }
 
     /// Set the number of batches prior to panic for a partition
@@ -734,17 +721,18 @@ impl PanicExec {
         self
     }
 
-    fn with_cache(mut self) -> Self {
-        let num_partitions = self.batches_until_panics.len();
+    fn create_cache(
+        schema: SchemaRef,
+        batches_until_panics: &[usize],
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+        let num_partitions = batches_until_panics.len();
 
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(Partitioning::UnknownPartitioning(num_partitions))
-            // Execution Mode
-            .with_exec_mode(ExecutionMode::Bounded);
-
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(num_partitions),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 42e5ce58edb0..06a870123255 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -98,13 +98,12 @@ impl UnionExec {
     /// Create a new UnionExec
     pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self {
         let schema = union_schema(&inputs);
-        let cache = PlanPropertiesCache::new_default(schema);
+        let cache = Self::create_cache(&inputs, schema);
         UnionExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
         }
-        .with_cache()
     }
 
     /// Get inputs of the execution plan
@@ -112,16 +111,18 @@ impl UnionExec {
         &self.inputs
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        inputs: &[Arc<dyn ExecutionPlan>],
+        schema: SchemaRef,
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
         // TODO: In some cases, we should be able to preserve some equivalence
         //       classes and constants. Add support for such cases.
-        let children_eqs = self
-            .inputs
+        let children_eqs = inputs
             .iter()
             .map(|child| child.equivalence_properties())
             .collect::<Vec<_>>();
-        let mut eq_properties = EquivalenceProperties::new(self.schema());
+        let mut eq_properties = EquivalenceProperties::new(schema);
         // Use the ordering equivalence class of the first child as the seed:
         let mut meets = children_eqs[0]
             .oeq_class()
@@ -152,18 +153,16 @@ impl UnionExec {
         eq_properties.add_new_orderings(meets);
 
         // Calculate output partitioning; i.e. sum output partitions of the inputs.
-        let num_partitions = self
-            .inputs
+        let num_partitions = inputs
             .iter()
             .map(|plan| plan.output_partitioning().partition_count())
             .sum();
         let output_partitioning = Partitioning::UnknownPartitioning(num_partitions);
 
         // Determine execution mode:
-        let mode = exec_mode_flatten(self.inputs.iter());
+        let mode = exec_mode_flatten(inputs.iter());
 
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -323,20 +322,17 @@ pub struct InterleaveExec {
 impl InterleaveExec {
     /// Create a new InterleaveExec
     pub fn try_new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Result<Self> {
-        let schema = union_schema(&inputs);
-
         if !can_interleave(inputs.iter()) {
             return internal_err!(
                 "Not all InterleaveExec children have a consistent hash partitioning"
             );
         }
-        let cache = PlanPropertiesCache::new_default(schema);
+        let cache = Self::create_cache(&inputs);
         Ok(InterleaveExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
             cache,
-        }
-        .with_cache())
+        })
     }
 
     /// Get inputs of the execution plan
@@ -344,18 +340,15 @@ impl InterleaveExec {
         &self.inputs
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(inputs: &[Arc<dyn ExecutionPlan>]) -> PlanPropertiesCache {
+        let schema = union_schema(inputs);
+        let eq_properties = EquivalenceProperties::new(schema);
         // Get output partitioning:
-        let output_partitioning = self.inputs[0].output_partitioning().clone();
+        let output_partitioning = inputs[0].output_partitioning().clone();
         // Determine execution mode:
-        let mode = exec_mode_flatten(self.inputs.iter());
+        let mode = exec_mode_flatten(inputs.iter());
 
-        self.cache = self
-            .cache
-            .with_partitioning(output_partitioning)
-            .with_exec_mode(mode);
-
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index b9df57d84f81..ba90e8b4f1fc 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -37,6 +37,7 @@ use arrow::datatypes::{
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{exec_err, DataFusionError, Result, UnnestOptions};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 use futures::{Stream, StreamExt};
@@ -70,7 +71,7 @@ impl UnnestExec {
         schema: SchemaRef,
         options: UnnestOptions,
     ) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(&input, schema.clone());
         UnnestExec {
             input,
             schema,
@@ -79,18 +80,19 @@ impl UnnestExec {
             metrics: Default::default(),
             cache,
         }
-        .with_cache()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            // Output Partitioning
-            .with_partitioning(self.input.output_partitioning().clone())
-            // Execution Mode
-            .with_exec_mode(self.input.execution_mode());
-
-        self
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        schema: SchemaRef,
+    ) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
+
+        PlanPropertiesCache::new(
+            eq_properties,
+            input.output_partitioning().clone(),
+            input.execution_mode(),
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index 7fc242099379..20c8eddce6bd 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -33,6 +33,7 @@ use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
 use datafusion_common::{internal_err, plan_err, DataFusionError, Result, ScalarValue};
 use datafusion_execution::TaskContext;
+use datafusion_physical_expr::EquivalenceProperties;
 
 /// Execution plan for values list based relation (produces constant rows)
 #[derive(Debug)]
@@ -113,13 +114,12 @@ impl ValuesExec {
             }
         }
 
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone());
         Ok(ValuesExec {
             schema,
             data: batches,
             cache,
-        }
-        .with_cache())
+        })
     }
 
     /// provides the data
@@ -127,13 +127,14 @@ impl ValuesExec {
         self.data.clone()
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index cb512302cb6f..a9dfc9bfeedd 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -121,8 +121,8 @@ impl BoundedWindowAggExec {
                 vec![]
             }
         };
-        let cache = PlanPropertiesCache::new_default(schema.clone());
-        let window = Self {
+        let cache = Self::create_cache(&input, &schema, &window_expr);
+        Ok(Self {
             input,
             window_expr,
             schema,
@@ -131,8 +131,7 @@ impl BoundedWindowAggExec {
             input_order_mode,
             ordered_partition_by_indices,
             cache,
-        };
-        Ok(window.with_cache())
+        })
     }
 
     /// Window expressions
@@ -183,23 +182,25 @@ impl BoundedWindowAggExec {
         })
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        input: &Arc<dyn ExecutionPlan>,
+        schema: &SchemaRef,
+        window_expr: &[Arc<dyn WindowExpr>],
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
-        let eq_properties =
-            window_equivalence_properties(&self.schema, &self.input, &self.window_expr);
+        let eq_properties = window_equivalence_properties(schema, input, window_expr);
 
         // As we can have repartitioning using the partition keys, this can
         // be either one or more than one, depending on the presence of
         // repartitioning.
-        let output_partitioning = self.input.output_partitioning().clone();
+        let output_partitioning = input.output_partitioning().clone();
 
         // Construct properties cache
-        self.cache = PlanPropertiesCache::new(
-            eq_properties,               // Equivalence Properties
-            output_partitioning,         // Output Partitioning
-            self.input.execution_mode(), // Execution Mode
-        );
-        self
+        PlanPropertiesCache::new(
+            eq_properties,          // Equivalence Properties
+            output_partitioning,    // Output Partitioning
+            input.execution_mode(), // Execution Mode
+        )
     }
 }
 
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 247588c971a2..852698bafe3a 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -80,8 +80,8 @@ impl WindowAggExec {
 
         let ordered_partition_by_indices =
             get_ordered_partition_by_indices(window_expr[0].partition_by(), &input);
-        let cache = PlanPropertiesCache::new_default(schema.clone());
-        let window = Self {
+        let cache = Self::create_cache(schema.clone(), &input, &window_expr);
+        Ok(Self {
             input,
             window_expr,
             schema,
@@ -89,8 +89,7 @@ impl WindowAggExec {
             metrics: ExecutionPlanMetricsSet::new(),
             ordered_partition_by_indices,
             cache,
-        };
-        Ok(window.with_cache())
+        })
     }
 
     /// Window expressions
@@ -117,18 +116,21 @@ impl WindowAggExec {
         )
     }
 
-    fn with_cache(mut self) -> Self {
+    fn create_cache(
+        schema: SchemaRef,
+        input: &Arc<dyn ExecutionPlan>,
+        window_expr: &[Arc<dyn WindowExpr>],
+    ) -> PlanPropertiesCache {
         // Calculate equivalence properties:
-        let eq_properties =
-            window_equivalence_properties(&self.schema, &self.input, &self.window_expr);
+        let eq_properties = window_equivalence_properties(&schema, input, window_expr);
 
         // Get output partitioning:
         // Because we can have repartitioning using the partition keys this
         // would be either 1 or more than 1 depending on the presense of repartitioning.
-        let output_partitioning = self.input.output_partitioning().clone();
+        let output_partitioning = input.output_partitioning().clone();
 
         // Determine execution mode:
-        let mode = match self.input.execution_mode() {
+        let mode = match input.execution_mode() {
             ExecutionMode::Bounded => ExecutionMode::Bounded,
             ExecutionMode::Unbounded | ExecutionMode::PipelineBreaking => {
                 ExecutionMode::PipelineBreaking
@@ -136,8 +138,7 @@ impl WindowAggExec {
         };
 
         // Construct properties cache:
-        self.cache = PlanPropertiesCache::new(eq_properties, output_partitioning, mode);
-        self
+        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
     }
 }
 
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index 33c611dd30d8..9a0b5daf27e4 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -33,7 +33,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_execution::TaskContext;
-use datafusion_physical_expr::Partitioning;
+use datafusion_physical_expr::{EquivalenceProperties, Partitioning};
 
 /// The name is from PostgreSQL's terminology.
 /// See <https://wiki.postgresql.org/wiki/CTEReadme#How_Recursion_Works>
@@ -91,7 +91,7 @@ pub struct WorkTableExec {
 impl WorkTableExec {
     /// Create a new execution plan for a worktable exec.
     pub fn new(name: String, schema: SchemaRef) -> Self {
-        let cache = PlanPropertiesCache::new_default(schema.clone());
+        let cache = Self::create_cache(schema.clone());
         Self {
             name,
             schema,
@@ -99,7 +99,6 @@ impl WorkTableExec {
             work_table: Arc::new(WorkTable::new()),
             cache,
         }
-        .with_cache()
     }
 
     pub(super) fn with_work_table(&self, work_table: Arc<WorkTable>) -> Self {
@@ -112,13 +111,14 @@ impl WorkTableExec {
         }
     }
 
-    fn with_cache(mut self) -> Self {
-        self.cache = self
-            .cache
-            .with_partitioning(Partitioning::UnknownPartitioning(1))
-            .with_exec_mode(ExecutionMode::Bounded);
+    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        let eq_properties = EquivalenceProperties::new(schema);
 
-        self
+        PlanPropertiesCache::new(
+            eq_properties,
+            Partitioning::UnknownPartitioning(1),
+            ExecutionMode::Bounded,
+        )
     }
 }
 

From b728232b91862826061dfa878cefe90d25576f78 Mon Sep 17 00:00:00 2001
From: Alex Huang <huangweijun1001@gmail.com>
Date: Mon, 26 Feb 2024 21:10:13 +0800
Subject: [PATCH 16/45] feat: support `FixedSizeList` Type Coercion (#9108)

* support FixedSizeList Type Coercion

* add allow null type coercion parameter

* support null column in FixedSizeList

* Add test

* Add tests for cardinality with fixed size lists

* chore

* fix ci

* add comment

* Fix array_element function signature

* Remove unused imports and simplify code

* Fix array function signatures and behavior

* fix conflict

* fix conflict

* add tests for FixedSizeList

* remove unreacheable null check

* simplify the code

* remove null checking

* reformat output

* simplify code

* add tests for array_dims

* Refactor type coercion functions in datafusion/expr module
---
 datafusion/expr/src/built_in_function.rs      |  23 +-
 datafusion/expr/src/signature.rs              |  17 +-
 .../expr/src/type_coercion/functions.rs       | 109 ++--
 datafusion/sqllogictest/test_files/array.slt  | 562 +++++++++++++++++-
 4 files changed, 628 insertions(+), 83 deletions(-)

diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index f92ae87d6e6c..8b4e65121c79 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -31,7 +31,7 @@ use crate::{
 };
 
 use arrow::datatypes::{DataType, Field, Fields, IntervalUnit, TimeUnit};
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
+use datafusion_common::{exec_err, plan_err, DataFusionError, Result};
 
 use strum::IntoEnumIterator;
 use strum_macros::EnumIter;
@@ -543,10 +543,11 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::Flatten => {
                 fn get_base_type(data_type: &DataType) -> Result<DataType> {
                     match data_type {
-                        DataType::List(field) if matches!(field.data_type(), DataType::List(_)) => get_base_type(field.data_type()),
+                        DataType::List(field) | DataType::FixedSizeList(field, _) if matches!(field.data_type(), DataType::List(_)|DataType::FixedSizeList(_,_ )) => get_base_type(field.data_type()),
                         DataType::LargeList(field) if matches!(field.data_type(), DataType::LargeList(_)) => get_base_type(field.data_type()),
                         DataType::Null | DataType::List(_) | DataType::LargeList(_) => Ok(data_type.to_owned()),
-                        _ => internal_err!("Not reachable, data_type should be List or LargeList"),
+                        DataType::FixedSizeList(field,_ ) => Ok(DataType::List(field.clone())),
+                        _ => exec_err!("Not reachable, data_type should be List, LargeList or FixedSizeList"),
                     }
                 }
 
@@ -929,18 +930,18 @@ impl BuiltinScalarFunction {
                 // 0 or more arguments of arbitrary type
                 Signature::one_of(vec![VariadicEqual, Any(0)], self.volatility())
             }
-            BuiltinScalarFunction::ArrayPopFront => Signature::any(1, self.volatility()),
-            BuiltinScalarFunction::ArrayPopBack => Signature::any(1, self.volatility()),
+            BuiltinScalarFunction::ArrayPopFront => Signature::array(self.volatility()),
+            BuiltinScalarFunction::ArrayPopBack => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayConcat => {
                 Signature::variadic_any(self.volatility())
             }
-            BuiltinScalarFunction::ArrayDims => Signature::any(1, self.volatility()),
-            BuiltinScalarFunction::ArrayEmpty => Signature::any(1, self.volatility()),
+            BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()),
+            BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayElement => {
                 Signature::array_and_index(self.volatility())
             }
             BuiltinScalarFunction::ArrayExcept => Signature::any(2, self.volatility()),
-            BuiltinScalarFunction::Flatten => Signature::any(1, self.volatility()),
+            BuiltinScalarFunction::Flatten => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayHasAll | BuiltinScalarFunction::ArrayHasAny => {
                 Signature::any(2, self.volatility())
             }
@@ -950,8 +951,8 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::ArrayLength => {
                 Signature::variadic_any(self.volatility())
             }
-            BuiltinScalarFunction::ArrayNdims => Signature::any(1, self.volatility()),
-            BuiltinScalarFunction::ArrayDistinct => Signature::any(1, self.volatility()),
+            BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()),
+            BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayPosition => {
                 Signature::array_and_element_and_optional_index(self.volatility())
             }
@@ -981,7 +982,7 @@ impl BuiltinScalarFunction {
 
             BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()),
             BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()),
-            BuiltinScalarFunction::Cardinality => Signature::any(1, self.volatility()),
+            BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()),
             BuiltinScalarFunction::ArrayResize => {
                 Signature::variadic_any(self.volatility())
             }
diff --git a/datafusion/expr/src/signature.rs b/datafusion/expr/src/signature.rs
index e8d9d8fb3966..663ecf7b1b8e 100644
--- a/datafusion/expr/src/signature.rs
+++ b/datafusion/expr/src/signature.rs
@@ -123,7 +123,7 @@ pub enum TypeSignature {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum ArrayFunctionSignature {
     /// Specialized Signature for ArrayAppend and similar functions
-    /// The first argument should be List/LargeList, and the second argument should be non-list or list.
+    /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be non-list or list.
     /// The second argument's list dimension should be one dimension less than the first argument's list dimension.
     /// List dimension of the List/LargeList is equivalent to the number of List.
     /// List dimension of the non-list is 0.
@@ -133,9 +133,14 @@ pub enum ArrayFunctionSignature {
     /// The first argument's list dimension should be one dimension less than the second argument's list dimension.
     ElementAndArray,
     /// Specialized Signature for Array functions of the form (List/LargeList, Index)
+    /// The first argument should be List/LargeList/FixedSizedList, and the second argument should be Int64.
     ArrayAndIndex,
     /// Specialized Signature for Array functions of the form (List/LargeList, Element, Optional Index)
     ArrayAndElementAndOptionalIndex,
+    /// Specialized Signature for ArrayEmpty and similar functions
+    /// The function takes a single argument that must be a List/LargeList/FixedSizeList
+    /// or something that can be coerced to one of those types.
+    Array,
 }
 
 impl std::fmt::Display for ArrayFunctionSignature {
@@ -153,6 +158,9 @@ impl std::fmt::Display for ArrayFunctionSignature {
             ArrayFunctionSignature::ArrayAndIndex => {
                 write!(f, "array, index")
             }
+            ArrayFunctionSignature::Array => {
+                write!(f, "array")
+            }
         }
     }
 }
@@ -325,6 +333,13 @@ impl Signature {
             volatility,
         }
     }
+    /// Specialized Signature for ArrayEmpty and similar functions
+    pub fn array(volatility: Volatility) -> Self {
+        Signature {
+            type_signature: TypeSignature::ArraySignature(ArrayFunctionSignature::Array),
+            volatility,
+        }
+    }
 }
 
 /// Monotonicity of the `ScalarFunctionExpr` with respect to its arguments.
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 9cab04bc7605..2022d67879f8 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -80,6 +80,36 @@ fn get_valid_types(
     signature: &TypeSignature,
     current_types: &[DataType],
 ) -> Result<Vec<Vec<DataType>>> {
+    fn array_element_and_optional_index(
+        current_types: &[DataType],
+    ) -> Result<Vec<Vec<DataType>>> {
+        // make sure there's 2 or 3 arguments
+        if !(current_types.len() == 2 || current_types.len() == 3) {
+            return Ok(vec![vec![]]);
+        }
+
+        let first_two_types = &current_types[0..2];
+        let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?;
+
+        // Early return if there are only 2 arguments
+        if current_types.len() == 2 {
+            return Ok(valid_types);
+        }
+
+        let valid_types_with_index = valid_types
+            .iter()
+            .map(|t| {
+                let mut t = t.clone();
+                t.push(DataType::Int64);
+                t
+            })
+            .collect::<Vec<_>>();
+
+        valid_types.extend(valid_types_with_index);
+
+        Ok(valid_types)
+    }
+
     fn array_append_or_prepend_valid_types(
         current_types: &[DataType],
         is_append: bool,
@@ -111,71 +141,37 @@ fn get_valid_types(
             )
         })?;
 
-        let array_type = datafusion_common::utils::coerced_type_with_base_type_only(
+        let new_array_type = datafusion_common::utils::coerced_type_with_base_type_only(
             array_type,
             &new_base_type,
         );
 
-        match array_type {
+        match new_array_type {
             DataType::List(ref field)
             | DataType::LargeList(ref field)
             | DataType::FixedSizeList(ref field, _) => {
-                let elem_type = field.data_type();
+                let new_elem_type = field.data_type();
                 if is_append {
-                    Ok(vec![vec![array_type.clone(), elem_type.clone()]])
+                    Ok(vec![vec![new_array_type.clone(), new_elem_type.clone()]])
                 } else {
-                    Ok(vec![vec![elem_type.to_owned(), array_type.clone()]])
+                    Ok(vec![vec![new_elem_type.to_owned(), new_array_type.clone()]])
                 }
             }
             _ => Ok(vec![vec![]]),
         }
     }
-    fn array_element_and_optional_index(
-        current_types: &[DataType],
-    ) -> Result<Vec<Vec<DataType>>> {
-        // make sure there's 2 or 3 arguments
-        if !(current_types.len() == 2 || current_types.len() == 3) {
-            return Ok(vec![vec![]]);
-        }
-
-        let first_two_types = &current_types[0..2];
-        let mut valid_types = array_append_or_prepend_valid_types(first_two_types, true)?;
-
-        // Early return if there are only 2 arguments
-        if current_types.len() == 2 {
-            return Ok(valid_types);
-        }
-
-        let valid_types_with_index = valid_types
-            .iter()
-            .map(|t| {
-                let mut t = t.clone();
-                t.push(DataType::Int64);
-                t
-            })
-            .collect::<Vec<_>>();
-
-        valid_types.extend(valid_types_with_index);
-
-        Ok(valid_types)
-    }
-    fn array_and_index(current_types: &[DataType]) -> Result<Vec<Vec<DataType>>> {
-        if current_types.len() != 2 {
-            return Ok(vec![vec![]]);
-        }
-
-        let array_type = &current_types[0];
-
+    fn array(array_type: &DataType) -> Option<DataType> {
         match array_type {
             DataType::List(_)
             | DataType::LargeList(_)
             | DataType::FixedSizeList(_, _) => {
                 let array_type = coerced_fixed_size_list_to_list(array_type);
-                Ok(vec![vec![array_type, DataType::Int64]])
+                Some(array_type)
             }
-            _ => Ok(vec![vec![]]),
+            _ => None,
         }
     }
+
     let valid_types = match signature {
         TypeSignature::Variadic(valid_types) => valid_types
             .iter()
@@ -211,19 +207,32 @@ fn get_valid_types(
         TypeSignature::ArraySignature(ref function_signature) => match function_signature
         {
             ArrayFunctionSignature::ArrayAndElement => {
-                return array_append_or_prepend_valid_types(current_types, true)
+                array_append_or_prepend_valid_types(current_types, true)?
             }
-            ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
-                return array_element_and_optional_index(current_types)
+            ArrayFunctionSignature::ElementAndArray => {
+                array_append_or_prepend_valid_types(current_types, false)?
             }
             ArrayFunctionSignature::ArrayAndIndex => {
-                return array_and_index(current_types)
+                if current_types.len() != 2 {
+                    return Ok(vec![vec![]]);
+                }
+                array(&current_types[0]).map_or_else(
+                    || vec![vec![]],
+                    |array_type| vec![vec![array_type, DataType::Int64]],
+                )
             }
-            ArrayFunctionSignature::ElementAndArray => {
-                return array_append_or_prepend_valid_types(current_types, false)
+            ArrayFunctionSignature::ArrayAndElementAndOptionalIndex => {
+                array_element_and_optional_index(current_types)?
             }
-        },
+            ArrayFunctionSignature::Array => {
+                if current_types.len() != 1 {
+                    return Ok(vec![vec![]]);
+                }
 
+                array(&current_types[0])
+                    .map_or_else(|| vec![vec![]], |array_type| vec![vec![array_type]])
+            }
+        },
         TypeSignature::Any(number) => {
             if current_types.len() != *number {
                 return plan_err!(
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 7f263d904819..da02a80a104f 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -123,6 +123,13 @@ AS VALUES
   (make_array(NULL, 10, 11, 12))
 ;
 
+statement ok
+CREATE TABLE large_arrayspop
+AS SELECT
+  arrow_cast(column1, 'LargeList(Int64)') AS column1
+FROM arrayspop
+;
+
 statement ok
 CREATE TABLE nested_arrays
 AS VALUES
@@ -172,6 +179,15 @@ AS SELECT
 FROM arrays_values
 ;
 
+statement ok
+CREATE TABLE fixed_arrays_values
+AS SELECT
+  arrow_cast(column1, 'FixedSizeList(10, Int64)') AS column1,
+  column2,
+  column3,
+  column4
+FROM arrays_values
+;
 
 statement ok
 CREATE TABLE arrays_values_v2
@@ -212,6 +228,22 @@ AS
   FROM flatten_table
 ;
 
+statement ok
+CREATE TABLE fixed_size_flatten_table
+AS VALUES
+  (arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'),
+   arrow_cast(make_array([[1, 2, 3]], [[4, 5]], [[6]]), 'FixedSizeList(3, List(List(Int64)))'),
+   arrow_cast(make_array([[[1]]], [[[2, 3]]]), 'FixedSizeList(2, List(List(List(Int64))))'),
+   arrow_cast(make_array([1.0], [2.1, 2.2], [3.2, 3.3, 3.4]), 'FixedSizeList(3, List(Float64))')
+   ),
+  (
+    arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'FixedSizeList(3, List(Int64))'),
+    arrow_cast(make_array([[8]], [[9, 10]], [[11, 12, 13]]), 'FixedSizeList(3, List(List(Int64)))'),
+    arrow_cast(make_array([[[1,2]]], [[[3]]]), 'FixedSizeList(2, List(List(List(Int64))))'),
+    arrow_cast(make_array([1.0, 2.0], [3.0, 4.0], [5.0, 6.0]), 'FixedSizeList(3, List(Float64))')
+    )
+;
+
 statement ok
 CREATE TABLE array_has_table_1D
 AS VALUES
@@ -346,10 +378,31 @@ AS VALUES
 
 statement ok
 CREATE TABLE array_distinct_table_1D_large
+AS SELECT
+  arrow_cast(column1, 'LargeList(Int64)') AS column1
+FROM array_distinct_table_1D
+;
+
+statement ok
+CREATE TABLE array_distinct_table_1D_fixed
+AS SELECT
+  arrow_cast(column1, 'FixedSizeList(5, Int64)') AS column1
+FROM array_distinct_table_1D
+;
+
+statement ok
+CREATE TABLE array_distinct_table_1D_UTF8_fixed
+AS SELECT
+  arrow_cast(column1, 'FixedSizeList(5, Utf8)') AS column1
+FROM array_distinct_table_1D_UTF8
+;
+
+statement ok
+CREATE TABLE array_distinct_table_2D_fixed
 AS VALUES
-  (arrow_cast(make_array(1, 1, 2, 2, 3), 'LargeList(Int64)')),
-  (arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')),
-  (arrow_cast(make_array(3, 5, 3, 3, 3), 'LargeList(Int64)'))
+  (arrow_cast(make_array([1,2], [1,2], [3,4], [3,4], [5,6]), 'FixedSizeList(5, List(Int64))')),
+  (arrow_cast(make_array([1,2], [3,4], [5,6], [7,8], [9,10]), 'FixedSizeList(5, List(Int64))')),
+  (arrow_cast(make_array([5,6], [5,6], NULL, NULL, NULL), 'FixedSizeList(5, List(Int64))'))
 ;
 
 statement ok
@@ -1103,7 +1156,7 @@ select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'),
 NULL NULL
 
 query IT
-select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 11);
+select array_element(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 7), array_element(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 11);
 ----
 NULL NULL
 
@@ -1303,6 +1356,14 @@ NULL 43
 
 ## array_pop_back (aliases: `list_pop_back`)
 
+# array_pop_back scalar function with null
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# follow clickhouse and duckdb
+#query ?
+#select array_pop_back(null);
+#----
+#NULL
+
 # array_pop_back scalar function #1
 query ??
 select array_pop_back(make_array(1, 2, 3, 4, 5)), array_pop_back(make_array('h', 'e', 'l', 'l', 'o'));
@@ -1314,6 +1375,11 @@ select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'))
 ----
 [1, 2, 3, 4] [h, e, l, l]
 
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'));
+----
+[1, 2, 3, 4] [h, e, l, l]
+
 # array_pop_back scalar function #2 (after array_pop_back, array is empty)
 query ?
 select array_pop_back(make_array(1));
@@ -1325,6 +1391,11 @@ select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'));
 ----
 []
 
+query ?
+select array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+[]
+
 # array_pop_back scalar function #3 (array_pop_back the empty array)
 query ?
 select array_pop_back(array_pop_back(make_array(1)));
@@ -1336,12 +1407,27 @@ select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)
 ----
 []
 
+query ?
+select array_pop_back(array_pop_back(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')));
+----
+[]
+
 # array_pop_back scalar function #4 (array_pop_back the arrays which have NULL)
 query ??
 select array_pop_back(make_array(1, 2, 3, 4, NULL)), array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o'));
 ----
 [1, 2, 3, 4] [, e, l, ]
 
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'LargeList(Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'LargeList(Utf8)'));
+----
+[1, 2, 3, 4] [, e, l, ]
+
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, NULL), 'FixedSizeList(5, Int64)')), array_pop_back(arrow_cast(make_array(NULL, 'e', 'l', NULL, 'o'), 'FixedSizeList(5, Utf8)'));
+----
+[1, 2, 3, 4] [, e, l, ]
+
 # array_pop_back scalar function #5 (array_pop_back the nested arrays)
 query ?
 select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)));
@@ -1353,6 +1439,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
 ----
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
 
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
 # array_pop_back scalar function #6 (array_pop_back the nested arrays with NULL)
 query ?
 select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL));
@@ -1364,6 +1455,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
 ----
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
 
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL), 'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
 # array_pop_back scalar function #7 (array_pop_back the nested arrays with NULL)
 query ?
 select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)));
@@ -1375,6 +1471,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9
 ----
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
 
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)), 'FixedSizeList(5, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+
 # array_pop_back scalar function #8 (after array_pop_back, nested array is empty)
 query ?
 select array_pop_back(make_array(make_array(1, 2, 3)));
@@ -1386,6 +1487,11 @@ select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Lis
 ----
 []
 
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))'));
+----
+[]
+
 # array_pop_back with columns
 query ?
 select array_pop_back(column1) from arrayspop;
@@ -1407,8 +1513,36 @@ select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop;
 []
 [, 10, 11]
 
+query ?
+select array_pop_back(column1) from large_arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
+query ?
+select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from large_arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
 ## array_pop_front (aliases: `list_pop_front`)
 
+#TODO:https://github.com/apache/arrow-datafusion/issues/7142
+# array_pop_front scalar function with null
+# follow clickhouse and duckdb
+#query ?
+#select array_pop_front(null);
+#----
+#NULL
+
 # array_pop_front scalar function #1
 query ??
 select array_pop_front(make_array(1, 2, 3, 4, 5)), array_pop_front(make_array('h', 'e', 'l', 'l', 'o'));
@@ -1420,6 +1554,11 @@ select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')
 ----
 [2, 3, 4, 5] [e, l, l, o]
 
+query ??
+select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'));
+----
+[2, 3, 4, 5] [e, l, l, o]
+
 # array_pop_front scalar function #2 (after array_pop_front, array is empty)
 query ?
 select array_pop_front(make_array(1));
@@ -1431,6 +1570,11 @@ select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'));
 ----
 []
 
+query ?
+select array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+[]
+
 # array_pop_front scalar function #3 (array_pop_front the empty array)
 query ?
 select array_pop_front(array_pop_front(make_array(1)));
@@ -1442,6 +1586,11 @@ select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'LargeList(Int6
 ----
 []
 
+query ?
+select array_pop_front(array_pop_front(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')));
+----
+[]
+
 # array_pop_front scalar function #5 (array_pop_front the nested arrays)
 query ?
 select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)));
@@ -1453,6 +1602,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
 ----
 [[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
 
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5, 6)), 'FixedSizeList(6, List(Int64))'));
+----
+[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+
 # array_pop_front scalar function #6 (array_pop_front the nested arrays with NULL)
 query ?
 select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)));
@@ -1464,6 +1618,11 @@ select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_arr
 ----
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
 
+query ?
+select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3), make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)), 'FixedSizeList(6, List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
 # array_pop_front scalar function #8 (after array_pop_front, nested array is empty)
 query ?
 select array_pop_front(make_array(make_array(1, 2, 3)));
@@ -1475,6 +1634,11 @@ select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'LargeList(Li
 ----
 []
 
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)), 'FixedSizeList(1, List(Int64))'));
+----
+[]
+
 ## array_slice (aliases: list_slice)
 
 # array_slice scalar function #1 (with positive indexes)
@@ -1874,6 +2038,14 @@ select
 ----
 [4] [] [1, , 3, 4] [, , 1]
 
+query ??
+select
+  array_append(arrow_cast(make_array(1, null, 3), 'FixedSizeList(3, Int64)'), 4),
+  array_append(arrow_cast(make_array(null, null), 'FixedSizeList(2, Int64)'), 1)
+;
+----
+[1, , 3, 4] [, , 1]
+
 # test invalid (non-null)
 query error
 select array_append(1, 2);
@@ -1898,6 +2070,13 @@ select
 ----
 [[1, , 3], []] [[1, , 3], ]
 
+query ??
+select
+  array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), [null]),
+  array_append(arrow_cast(make_array(make_array(1, null, 3)), 'FixedSizeList(1, List(Int64))'), null);
+----
+[[1, , 3], []] [[1, , 3], ]
+
 # array_append scalar function #3
 query ???
 select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3.0), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
@@ -1905,7 +2084,12 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
 query ???
-select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(make_array('h', 'e', 'l', 'l'), 'o');
+select array_append(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'LargeList(Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'LargeList(Utf8)'), 'o');
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+
+query ???
+select array_append(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)'), 4), array_append(arrow_cast(make_array(1.0, 2.0, 3.0), 'FixedSizeList(3, Float64)'), 4.0), array_append(arrow_cast(make_array('h', 'e', 'l', 'l'), 'FixedSizeList(4, Utf8)'), 'o');
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
@@ -1920,6 +2104,11 @@ select array_append(arrow_cast(make_array([1], [2], [3]), 'LargeList(LargeList(I
 ----
 [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
 
+query ???
+select array_append(arrow_cast(make_array([1], [2], [3]), 'FixedSizeList(3, List(Int64))'), [4]), array_append(arrow_cast(make_array([1.0], [2.0], [3.0]), 'FixedSizeList(3, List(Float64))'), [4.0]), array_append(arrow_cast(make_array(['h'], ['e'], ['l'], ['l']), 'FixedSizeList(4, List(Utf8))'), ['o']);
+----
+[[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+
 # list_append scalar function #5 (function alias `array_append`)
 query ???
 select list_append(make_array(1, 2, 3), 4), list_append(make_array(1.0, 2.0, 3.0), 4.0), list_append(make_array('h', 'e', 'l', 'l'), 'o');
@@ -1978,6 +2167,18 @@ select array_append(column1, column2) from large_arrays_values;
 [51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
 [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
 
+query ?
+select array_append(column1, column2) from fixed_arrays_values;
+----
+[, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1]
+[11, 12, 13, 14, 15, 16, 17, 18, , 20, 12]
+[21, 22, 23, , 25, 26, 27, 28, 29, 30, 23]
+[31, 32, 33, 34, 35, , 37, 38, 39, 40, 34]
+[, , , , , , , , , , 44]
+[41, 42, 43, 44, 45, 46, 47, 48, 49, 50, ]
+[51, 52, , 54, 55, 56, 57, 58, 59, 60, 55]
+[61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 66]
+
 # array_append with columns #2 (element is list)
 query ?
 select array_append(column1, column2) from nested_arrays;
@@ -1991,6 +2192,12 @@ select array_append(column1, column2) from large_nested_arrays;
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]]
 [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]]
 
+query ?
+select array_append(column1, column2) from fixed_size_nested_arrays;
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [7, 8, 9]]
+[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [10, 11, 12]]
+
 # array_append with columns and scalars #1
 query ??
 select array_append(column2, 100.1), array_append(column3, '.') from arrays;
@@ -2014,6 +2221,17 @@ select array_append(column2, 100.1), array_append(column3, '.') from large_array
 [100.1] [,, .]
 [16.6, 17.7, 18.8, 100.1] [.]
 
+query ??
+select array_append(column2, 100.1), array_append(column3, '.') from fixed_size_arrays;
+----
+[1.1, 2.2, 3.3, 100.1] [L, o, r, e, m, .]
+[, 5.5, 6.6, 100.1] [i, p, , u, m, .]
+[7.7, 8.8, 9.9, 100.1] [d, , l, o, r, .]
+[10.1, , 12.2, 100.1] [s, i, t, a, b, .]
+[13.3, 14.4, 15.5, 100.1] [a, m, e, t, x, .]
+[, , , 100.1] [,, a, b, c, d, .]
+[16.6, 17.7, 18.8, 100.1] [, , , , , .]
+
 # array_append with columns and scalars #2
 query ??
 select array_append(column1, make_array(1, 11, 111)), array_append(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), column2) from nested_arrays;
@@ -2027,6 +2245,12 @@ select array_append(column1, arrow_cast(make_array(1, 11, 111), 'LargeList(Int64
 [[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]]
 [[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]]
 
+query ??
+select array_append(column1, arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)')), array_append(arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))'), column2) from fixed_size_nested_arrays;
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [7, 8, 9]]
+[[4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7], [1, 11, 111]] [[1, 2, 3], [11, 12, 13], [10, 11, 12]]
+
 ## array_prepend (aliases: `list_prepend`, `array_push_front`, `list_push_front`)
 
 # array_prepend with NULLs
@@ -2093,6 +2317,11 @@ select array_prepend(1, arrow_cast(make_array(2, 3, 4), 'LargeList(Int64)')), ar
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
+query ???
+select array_prepend(1, arrow_cast([2, 3, 4], 'FixedSizeList(3, Int64)')), array_prepend(1.0, arrow_cast([2.0, 3.0, 4.0], 'FixedSizeList(3, Float64)')), array_prepend('h', arrow_cast(['e', 'l', 'l', 'o'], 'FixedSizeList(4, Utf8)'));
+----
+[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
+
 # array_prepend scalar function #4 (element is list)
 query ???
 select array_prepend(make_array(1), make_array(make_array(2), make_array(3), make_array(4))), array_prepend(make_array(1.0), make_array([2.0], [3.0], [4.0])), array_prepend(make_array('h'), make_array(['e'], ['l'], ['l'], ['o']));
@@ -2106,6 +2335,13 @@ select array_prepend(arrow_cast(make_array(1), 'LargeList(Int64)'), arrow_cast(m
 ----
 [[1], [2], [3], [4]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
 
+query ???
+select array_prepend(arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([[1], [2], [3]], 'FixedSizeList(3, List(Int64))')),
+       array_prepend(arrow_cast([1.0], 'FixedSizeList(1, Float64)'), arrow_cast([[2.0], [3.0], [4.0]], 'FixedSizeList(3, List(Float64))')),
+       array_prepend(arrow_cast(['h'], 'FixedSizeList(1, Utf8)'), arrow_cast([['e'], ['l'], ['l'], ['o']], 'FixedSizeList(4, List(Utf8))'));
+----
+[[1], [1], [2], [3]] [[1.0], [2.0], [3.0], [4.0]] [[h], [e], [l], [l], [o]]
+
 # list_prepend scalar function #5 (function alias `array_prepend`)
 query ???
 select list_prepend(1, make_array(2, 3, 4)), list_prepend(1.0, make_array(2.0, 3.0, 4.0)), list_prepend('h', make_array('e', 'l', 'l', 'o'));
@@ -2164,6 +2400,18 @@ select array_prepend(column2, column1) from large_arrays_values;
 [55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
 [66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
 
+query ?
+select array_prepend(column2, column1) from fixed_arrays_values;
+----
+[1, , 2, 3, 4, 5, 6, 7, 8, 9, 10]
+[12, 11, 12, 13, 14, 15, 16, 17, 18, , 20]
+[23, 21, 22, 23, , 25, 26, 27, 28, 29, 30]
+[34, 31, 32, 33, 34, 35, , 37, 38, 39, 40]
+[44, , , , , , , , , , ]
+[, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
+[55, 51, 52, , 54, 55, 56, 57, 58, 59, 60]
+[66, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
+
 # array_prepend with columns #2 (element is list)
 query ?
 select array_prepend(column2, column1) from nested_arrays;
@@ -2177,6 +2425,12 @@ select array_prepend(column2, column1) from large_nested_arrays;
 [[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
 [[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]]
 
+query ?
+select array_prepend(column2, column1) from fixed_size_nested_arrays;
+----
+[[7, 8, 9], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+[[10, 11, 12], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]]
+
 # array_prepend with columns and scalars #1
 query ??
 select array_prepend(100.1, column2), array_prepend('.', column3) from arrays;
@@ -2200,6 +2454,17 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from large_arr
 [100.1] [., ,]
 [100.1, 16.6, 17.7, 18.8] [.]
 
+query ??
+select array_prepend(100.1, column2), array_prepend('.', column3) from fixed_size_arrays;
+----
+[100.1, 1.1, 2.2, 3.3] [., L, o, r, e, m]
+[100.1, , 5.5, 6.6] [., i, p, , u, m]
+[100.1, 7.7, 8.8, 9.9] [., d, , l, o, r]
+[100.1, 10.1, , 12.2] [., s, i, t, a, b]
+[100.1, 13.3, 14.4, 15.5] [., a, m, e, t, x]
+[100.1, , , ] [., ,, a, b, c, d]
+[100.1, 16.6, 17.7, 18.8] [., , , , , ]
+
 # array_prepend with columns and scalars #2 (element is list)
 query ??
 select array_prepend(make_array(1, 11, 111), column1), array_prepend(column2, make_array(make_array(1, 2, 3), make_array(11, 12, 13))) from nested_arrays;
@@ -2213,6 +2478,12 @@ select array_prepend(arrow_cast(make_array(1, 11, 111), 'LargeList(Int64)'), col
 [[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]]
 [[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]]
 
+query ??
+select array_prepend(arrow_cast(make_array(1, 11, 111), 'FixedSizeList(3, Int64)'), column1), array_prepend(column2, arrow_cast(make_array(make_array(1, 2, 3), make_array(11, 12, 13)), 'FixedSizeList(2, List(Int64))')) from fixed_size_nested_arrays;
+----
+[[1, 11, 111], [1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]] [[7, 8, 9], [1, 2, 3], [11, 12, 13]]
+[[1, 11, 111], [4, 5, 6], [10, 11, 12], [4, 9, 8], [7, 8, 9], [10, 11, 12], [1, 8, 7]] [[10, 11, 12], [1, 2, 3], [11, 12, 13]]
+
 ## array_repeat (aliases: `list_repeat`)
 
 # array_repeat scalar function #1
@@ -2723,12 +2994,18 @@ NULL 1 NULL
 
 ## array_positions (aliases: `list_positions`)
 
-# array_position with NULL (follow PostgreSQL)
 query ?
 select array_positions([1, 2, 3, 4, 5], null);
 ----
 []
 
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# array_positions with NULL (follow PostgreSQL)
+#query ?
+#select array_positions(null, 1);
+#----
+#NULL
+
 # array_positions scalar function #1
 query ???
 select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1);
@@ -3748,6 +4025,11 @@ select cardinality(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)')), c
 ----
 5 3 5
 
+query III
+select cardinality(arrow_cast([1, 2, 3, 4, 5], 'FixedSizeList(5, Int64)')), cardinality(arrow_cast([1, 3, 5], 'FixedSizeList(3, Int64)')), cardinality(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'));
+----
+5 3 5
+
 # cardinality scalar function #2
 query II
 select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_repeat(array_repeat(array_repeat(3, 3), 2), 3));
@@ -3759,6 +4041,11 @@ select cardinality(arrow_cast(make_array([1, 2], [3, 4], [5, 6]), 'LargeList(Lis
 ----
 6
 
+query I
+select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(Int64))'));
+----
+6
+
 # cardinality scalar function #3
 query II
 select cardinality(make_array()), cardinality(make_array(make_array()))
@@ -3770,6 +4057,13 @@ select cardinality(arrow_cast(make_array(), 'LargeList(Null)')), cardinality(arr
 ----
 NULL 0
 
+#TODO
+#https://github.com/apache/arrow-datafusion/issues/9158
+#query II
+#select cardinality(arrow_cast(make_array(), 'FixedSizeList(1, Null)')), cardinality(arrow_cast(make_array(make_array()), 'FixedSizeList(1, List(Null))'))
+#----
+#NULL 0
+
 # cardinality with columns
 query III
 select cardinality(column1), cardinality(column2), cardinality(column3) from arrays;
@@ -3793,6 +4087,17 @@ NULL 3 4
 4 NULL 1
 4 3 NULL
 
+query III
+select cardinality(column1), cardinality(column2), cardinality(column3) from fixed_size_arrays;
+----
+4 3 5
+4 3 5
+4 3 5
+4 3 5
+NULL 3 5
+4 NULL 5
+4 3 NULL
+
 ## array_remove (aliases: `list_remove`)
 
 # array_remove scalar function #1
@@ -3801,6 +4106,13 @@ select array_remove(make_array(1, 2, 2, 1, 1), 2), array_remove(make_array(1.0,
 ----
 [1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o]
 
+query ???
+select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'LargeList(Int64)'), 2),
+       array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'LargeList(Float64)'), 1.0),
+       array_remove(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 'l');
+----
+[1, 2, 1, 1] [2.0, 2.0, 1.0, 1.0] [h, e, l, o]
+
 query ???
 select array_remove(arrow_cast(make_array(1, 2, 2, 1, 1), 'FixedSizeList(5, Int64)'), 2),
        array_remove(arrow_cast(make_array(1.0, 2.0, 2.0, 1.0, 1.0), 'FixedSizeList(5, Float64)'), 1.0),
@@ -3816,6 +4128,14 @@ select
 ----
 [1, , 3] [, 2.2, 3.3] [, bc]
 
+query ???
+select
+  array_remove(arrow_cast(make_array(1, null, 2, 3), 'LargeList(Int64)'), 2),
+  array_remove(arrow_cast(make_array(1.1, null, 2.2, 3.3), 'LargeList(Float64)'), 1.1),
+  array_remove(arrow_cast(make_array('a', null, 'bc'), 'LargeList(Utf8)'), 'a');
+----
+[1, , 3] [, 2.2, 3.3] [, bc]
+
 query ???
 select
   array_remove(arrow_cast(make_array(1, null, 2, 3), 'FixedSizeList(4, Int64)'), 2),
@@ -3824,6 +4144,14 @@ select
 ----
 [1, , 3] [, 2.2, 3.3] [, bc]
 
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# follow PostgreSQL behavior
+#query ?
+#select
+#  array_remove(NULL, 1)
+#----
+#NULL
+
 query ??
 select
  array_remove(make_array(1, null, 2), null),
@@ -3831,12 +4159,32 @@ select
 ----
 [1, 2] [1, 2, ]
 
+query ??
+select
+ array_remove(arrow_cast(make_array(1, null, 2), 'LargeList(Int64)'), null),
+ array_remove(arrow_cast(make_array(1, null, 2, null), 'LargeList(Int64)'), null);
+----
+[1, 2] [1, 2, ]
+
+query ??
+select
+ array_remove(arrow_cast(make_array(1, null, 2), 'FixedSizeList(3, Int64)'), null),
+ array_remove(arrow_cast(make_array(1, null, 2, null), 'FixedSizeList(4, Int64)'), null);
+----
+[1, 2] [1, 2, ]
+
 # array_remove scalar function #2 (element is list)
 query ??
 select array_remove(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), [4, 5, 6]), array_remove(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), [2, 3, 4]);
 ----
 [[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]]
 
+query ??
+select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'LargeList(List(Int64))'), [4, 5, 6]),
+       array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'LargeList(List(Int64))'), [2, 3, 4]);
+----
+[[1, 2, 3], [5, 5, 5], [4, 5, 6], [7, 8, 9]] [[1, 3, 2], [2, 3, 4], [5, 3, 1], [1, 3, 2]]
+
 query ??
 select array_remove(arrow_cast(make_array([1, 2, 3], [4, 5, 6], [5, 5, 5], [4, 5, 6], [7, 8, 9]), 'FixedSizeList(5, List(Int64))'), [4, 5, 6]),
        array_remove(arrow_cast(make_array([1, 3, 2], [2, 3, 4], [2, 3, 4], [5, 3, 1], [1, 3, 2]), 'FixedSizeList(5, List(Int64))'), [2, 3, 4]);
@@ -3864,6 +4212,14 @@ select array_remove(column1, column2) from arrays_with_repeating_elements;
 [7, 7, 8, 7, 9, 7, 8, 7, 7]
 [11, 12, 10, 11, 12, 10, 11, 12, 10]
 
+query ?
+select array_remove(column1, column2) from large_arrays_with_repeating_elements;
+----
+[1, 1, 3, 2, 2, 1, 3, 2, 3]
+[4, 5, 5, 6, 5, 5, 5, 4, 4]
+[7, 7, 8, 7, 9, 7, 8, 7, 7]
+[11, 12, 10, 11, 12, 10, 11, 12, 10]
+
 query ?
 select array_remove(column1, column2) from fixed_arrays_with_repeating_elements;
 ----
@@ -3881,6 +4237,14 @@ select array_remove(column1, column2) from nested_arrays_with_repeating_elements
 [[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
 [[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
 
+query ?
+select array_remove(column1, column2) from large_nested_arrays_with_repeating_elements;
+----
+[[1, 2, 3], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]]
+[[10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]]
+[[19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
+[[31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+
 query ?
 select array_remove(column1, column2) from fixed_size_nested_arrays_with_repeating_elements;
 ----
@@ -3898,6 +4262,14 @@ select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), a
 [1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7]
 [1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10]
 
+query ??
+select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from large_arrays_with_repeating_elements;
+----
+[1, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8] [2, 1, 3, 2, 2, 1, 3, 2, 3]
+[1, 2, 2, 5, 4, 4, 7, 7, 10, 7, 8] [4, 4, 5, 5, 6, 5, 5, 5, 4, 4]
+[1, 2, 2, 4, 5, 4, 4, 7, 10, 7, 8] [7, 7, 7, 8, 7, 9, 7, 8, 7, 7]
+[1, 2, 2, 4, 5, 4, 4, 7, 7, 7, 8] [10, 11, 12, 10, 11, 12, 10, 11, 12, 10]
+
 query ??
 select array_remove(make_array(1, 2, 2, 4, 5, 4, 4, 7, 7, 10, 7, 8), column2), array_remove(column1, 1) from fixed_arrays_with_repeating_elements;
 ----
@@ -3916,6 +4288,15 @@ select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [1
 [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
 [[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
 
+query ??
+select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2),
+       array_remove(column1, make_array(1, 2, 3)) from large_nested_arrays_with_repeating_elements;
+----
+[[1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [4, 5, 6], [1, 2, 3], [7, 8, 9], [4, 5, 6], [7, 8, 9]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[10, 11, 12], [10, 11, 12], [13, 14, 15], [13, 14, 15], [16, 17, 18], [13, 14, 15], [13, 14, 15], [13, 14, 15], [10, 11, 12], [10, 11, 12]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]] [[19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24], [19, 20, 21], [25, 26, 27], [19, 20, 21], [22, 23, 24], [19, 20, 21], [19, 20, 21]]
+[[1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [19, 20, 21], [22, 23, 24]] [[28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30], [31, 32, 33], [34, 35, 36], [28, 29, 30]]
+
 query ??
 select array_remove(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12], [13, 14, 15], [10, 11, 12], [10, 11, 12], [19, 20, 21], [19, 20, 21], [28, 29, 30], [19, 20, 21], [22, 23, 24]), column2),
        array_remove(column1, make_array(1, 2, 3)) from fixed_size_nested_arrays_with_repeating_elements;
@@ -3983,7 +4364,13 @@ select array_remove_n(make_array([1, 2, 3], [4, 5, 6], [4, 5, 6], [10, 11, 12],
 
 ## array_remove_all (aliases: `list_removes`)
 
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
 # array_remove_all with NULL elements
+#query ?
+#select array_remove_all(NULL, 1);
+#----
+#NULL
+
 query ?
 select array_remove_all(make_array(1, 2, 2, 1, 1), NULL);
 ----
@@ -4217,7 +4604,7 @@ NULL 10
 ## array_dims (aliases: `list_dims`)
 
 # array dims error
-query error Execution error: array_dims does not support type 'Int64'
+query error DataFusion error: Error during planning: No function matches the given name and argument types 'array_dims\(Int64\)'. You might need to add explicit type casts.\n\tCandidate functions:\n\tarray_dims\(array\)
 select array_dims(1);
 
 # array_dims scalar function
@@ -4231,12 +4618,27 @@ select array_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array_di
 ----
 [3] [2, 2] [1, 1, 1, 2, 1]
 
+query ???
+select array_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), array_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), array_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
 # array_dims scalar function #2
 query ??
 select array_dims(array_repeat(array_repeat(array_repeat(2, 3), 2), 1)), array_dims(array_repeat(array_repeat(array_repeat(3, 4), 5), 2));
 ----
 [1, 2, 3] [2, 5, 4]
 
+query ??
+select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'LargeList(List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'LargeList(List(List(Int64)))'));
+----
+[1, 2, 3] [2, 5, 4]
+
+query ??
+select array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(2, 3), 2), 1), 'FixedSizeList(1, List(List(Int64)))')), array_dims(arrow_cast(array_repeat(array_repeat(array_repeat(3, 4), 5), 2), 'FixedSizeList(2, List(List(Int64)))'));
+----
+[1, 2, 3] [2, 5, 4]
+
 # array_dims scalar function #3
 query ??
 select array_dims(make_array()), array_dims(make_array(make_array()))
@@ -4259,6 +4661,11 @@ select list_dims(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), list_dims
 ----
 [3] [2, 2] [1, 1, 1, 2, 1]
 
+query ???
+select list_dims(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), list_dims(arrow_cast(make_array([1, 2], [3, 4]), 'FixedSizeList(2, List(Int64))')), list_dims(arrow_cast(make_array([[[[1], [2]]]]), 'FixedSizeList(1, List(List(List(List(Int64)))))'));
+----
+[3] [2, 2] [1, 1, 1, 2, 1]
+
 # array_dims with columns
 query ???
 select array_dims(column1), array_dims(column2), array_dims(column3) from arrays;
@@ -4282,14 +4689,22 @@ NULL [3] [4]
 [2, 2] NULL [1]
 [2, 2] [3] NULL
 
+query ???
+select array_dims(column1), array_dims(column2), array_dims(column3) from fixed_size_arrays;
+----
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+[2, 2] [3] [5]
+NULL [3] [5]
+[2, 2] NULL [5]
+[2, 2] [3] NULL
+
 
 ## array_ndims (aliases: `list_ndims`)
 
 # array_ndims scalar function #1
 
-query error
-select array_ndims(1);
-
 #follow PostgreSQL
 query error
 select
@@ -4306,6 +4721,7 @@ CREATE TABLE array_ndims_table
 AS VALUES
   ([1], [1, 2, 3], [[7]], [[[[[10]]]]]),
   ([2], [4, 5], [[8]], [[[[[10]]]]]),
+  (NUll, [6, 7], [[9]], [[[[[10]]]]]),
   ([3], [6], [[9]], [[[[[10]]]]])
 ;
 
@@ -4318,6 +4734,15 @@ AS SELECT
   arrow_cast(column4, 'LargeList(List(List(List(List(Int64)))))') as column4
 FROM array_ndims_table;
 
+statement ok
+CREATE TABLE fixed_array_ndims_table
+AS VALUES
+  (arrow_cast([1], 'FixedSizeList(1, Int64)'), arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)'), arrow_cast([[7]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')),
+  (arrow_cast([2], 'FixedSizeList(1, Int64)'), arrow_cast([4, 5, 6], 'FixedSizeList(3, Int64)'), arrow_cast([[8]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')),
+  (null, arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))')),
+  (arrow_cast([3], 'FixedSizeList(1, Int64)'), arrow_cast([6, 7, 8], 'FixedSizeList(3, Int64)'), arrow_cast([[9]], 'FixedSizeList(1, List(Int64))'), arrow_cast([[[[[10]]]]], 'FixedSizeList(1, List(List(List(List(Int64)))))'))
+;
+
 query IIII
 select
   array_ndims(column1),
@@ -4328,6 +4753,7 @@ from array_ndims_table;
 ----
 1 1 2 5
 1 1 2 5
+NULL 1 2 5
 1 1 2 5
 
 query IIII
@@ -4340,8 +4766,24 @@ from large_array_ndims_table;
 ----
 1 1 2 5
 1 1 2 5
+NULL 1 2 5
+1 1 2 5
+
+query IIII
+select
+  array_ndims(column1),
+  array_ndims(column2),
+  array_ndims(column3),
+  array_ndims(column4)
+from fixed_array_ndims_table;
+----
+1 1 2 5
+1 1 2 5
+NULL 1 2 5
 1 1 2 5
 
+
+
 statement ok
 drop table array_ndims_table;
 
@@ -4794,10 +5236,11 @@ true false true false false false true true false false true false true
 
 ## array_distinct
 
-query ?
-select array_distinct(null);
-----
-NULL
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+#query ?
+#select array_distinct(null);
+#----
+#NULL
 
 query ?
 select array_distinct([]);
@@ -4841,6 +5284,30 @@ from array_distinct_table_1D_large;
 [1, 2, 3, 4, 5]
 [3, 5]
 
+query ?
+select array_distinct(column1)
+from array_distinct_table_1D_fixed;
+----
+[1, 2, 3]
+[1, 2, 3, 4, 5]
+[3, 5]
+
+query ?
+select array_distinct(column1)
+from array_distinct_table_1D_UTF8_fixed;
+----
+[a, bc, def]
+[a, bc, def, defg]
+[defg]
+
+query ?
+select array_distinct(column1)
+from array_distinct_table_2D_fixed;
+----
+[[1, 2], [3, 4], [5, 6]]
+[[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]]
+[, [5, 6]]
+
 query ???
 select array_intersect(column1, column2),
        array_intersect(column3, column4),
@@ -5382,11 +5849,13 @@ select array_concat(column1, [7]) from arrays_values_v2;
 [7]
 
 # flatten
+
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
 # follow DuckDB
-query ?
-select flatten(NULL);
-----
-NULL
+#query ?
+#select flatten(NULL);
+#----
+#NULL
 
 # flatten with scalar values #1
 query ???
@@ -5403,6 +5872,13 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')),
 ----
 [1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4]
 
+query ???
+select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)')),
+       flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, List(Int64))')),
+       flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'FixedSizeList(2, List(List(Float64)))'));
+----
+[1, 2, 1, 3, 2] [1, 2, 3, , 4, , 5] [1.1, 2.2, 3.3, 4.4]
+
 # flatten with column values
 query ????
 select flatten(column1),
@@ -5424,6 +5900,16 @@ from large_flatten_table;
 [1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
 [1, 2, 3, 4, 5, 6] [8] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
 
+query ????
+select flatten(column1),
+       flatten(column2),
+       flatten(column3),
+       flatten(column4)
+from fixed_size_flatten_table;
+----
+[1, 2, 3] [1, 2, 3, 4, 5, 6] [1, 2, 3] [1.0, 2.1, 2.2, 3.2, 3.3, 3.4]
+[1, 2, 3, 4, 5, 6] [8, 9, 10, 11, 12, 13] [1, 2, 3] [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+
 ## empty
 # empty scalar function #1
 query B
@@ -5436,6 +5922,11 @@ select empty(arrow_cast(make_array(1), 'LargeList(Int64)'));
 ----
 false
 
+query B
+select empty(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)'));
+----
+false
+
 # empty scalar function #2
 query B
 select empty(make_array());
@@ -5447,6 +5938,12 @@ select empty(arrow_cast(make_array(), 'LargeList(Null)'));
 ----
 true
 
+#TODO: https://github.com/apache/arrow-datafusion/issues/9158
+#query B
+#select empty(arrow_cast(make_array(), 'FixedSizeList(0, Null)'));
+#----
+#true
+
 # empty scalar function #3
 query B
 select empty(make_array(NULL));
@@ -5458,11 +5955,17 @@ select empty(arrow_cast(make_array(NULL), 'LargeList(Null)'));
 ----
 false
 
-# empty scalar function #4
 query B
-select empty(NULL);
+select empty(arrow_cast(make_array(NULL), 'FixedSizeList(1, Null)'));
 ----
-NULL
+false
+
+#TODO: https://github.com/apache/arrow-datafusion/issues/7142
+# empty scalar function #4
+#query B
+#select empty(NULL);
+#----
+#NULL
 
 # empty scalar function #5
 query B
@@ -5487,6 +5990,17 @@ NULL
 false
 false
 
+query B
+select empty(column1) from fixed_size_arrays;
+----
+false
+false
+false
+false
+NULL
+false
+false
+
 query ?
 SELECT string_to_array('abcxxxdef', 'xxx')
 ----
@@ -5686,6 +6200,9 @@ drop table fixed_slices;
 statement ok
 drop table arrayspop;
 
+statement ok
+drop table large_arrayspop;
+
 statement ok
 drop table arrays_values;
 
@@ -5806,6 +6323,9 @@ drop table flatten_table;
 statement ok
 drop table large_flatten_table;
 
+statement ok
+drop table fixed_size_flatten_table;
+
 statement ok
 drop table arrays_values_without_nulls;
 

From ec86acbc1fbc0da1e0bec9ad066a5177ec586c96 Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Mon, 26 Feb 2024 21:33:19 +0800
Subject: [PATCH 17/45] feat: expand `unnest`  to accept arbitrary single array
 expression (#9342)

* feat: expand `unnest`  to accept any single array expression

* unnest null

* review feedback
---
 datafusion/sql/src/expr/function.rs           | 58 +++++++++----------
 datafusion/sqllogictest/test_files/unnest.slt | 48 +++++++++++----
 2 files changed, 64 insertions(+), 42 deletions(-)

diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index f56138066cb6..db572a23cf99 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -16,16 +16,17 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
+use arrow_schema::DataType;
 use datafusion_common::{
-    exec_err, not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError,
-    Dependency, Result,
+    not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError, Dependency,
+    Result,
 };
 use datafusion_expr::expr::{ScalarFunction, Unnest};
 use datafusion_expr::function::suggest_valid_function;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
-    expr, AggregateFunction, BuiltinScalarFunction, Expr, ScalarFunctionDefinition,
-    WindowFrame, WindowFunctionDefinition,
+    expr, AggregateFunction, BuiltinScalarFunction, Expr, ExprSchemable, WindowFrame,
+    WindowFunctionDefinition,
 };
 use sqlparser::ast::{
     Expr as SQLExpr, Function as SQLFunction, FunctionArg, FunctionArgExpr, WindowType,
@@ -80,41 +81,34 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         if name.eq("unnest") {
             let exprs =
                 self.function_args_to_expr(args.clone(), schema, planner_context)?;
-
-            match exprs.len() {
+            // Currently only one argument is supported
+            let arg = match exprs.len() {
                 0 => {
-                    return exec_err!("unnest() requires at least one argument");
-                }
-                1 => {
-                    if let Expr::ScalarFunction(ScalarFunction {
-                        func_def:
-                            ScalarFunctionDefinition::BuiltIn(
-                                BuiltinScalarFunction::MakeArray,
-                            ),
-                        ..
-                    }) = exprs[0]
-                    {
-                        // valid
-                    } else if let Expr::Column(_) = exprs[0] {
-                        // valid
-                    } else if let Expr::ScalarFunction(ScalarFunction {
-                        func_def:
-                            ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Struct),
-                        ..
-                    }) = exprs[0]
-                    {
-                        return not_impl_err!("unnest() does not support struct yet");
-                    } else {
-                        return plan_err!(
-                            "unnest() can only be applied to array and structs and null"
-                        );
-                    }
+                    return plan_err!("unnest() requires at least one argument");
                 }
+                1 => &exprs[0],
                 _ => {
                     return not_impl_err!(
                         "unnest() does not support multiple arguments yet"
                     );
                 }
+            };
+            // Check argument type, array types are supported
+            match arg.get_type(schema)? {
+                DataType::List(_)
+                | DataType::LargeList(_)
+                | DataType::FixedSizeList(_, _) => {}
+                DataType::Struct(_) => {
+                    return not_impl_err!("unnest() does not support struct yet");
+                }
+                DataType::Null => {
+                    return not_impl_err!("unnest() does not support null yet");
+                }
+                _ => {
+                    return plan_err!(
+                        "unnest() can only be applied to array, struct and null"
+                    );
+                }
             }
 
             return Ok(Expr::Unnest(Unnest { exprs }));
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 7e4ce06be203..9990c00f75d2 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -36,7 +36,7 @@ select unnest([1,2,3]);
 2
 3
 
-query error DataFusion error: Error during planning: unnest\(\) can only be applied to array and structs and null
+query error DataFusion error: This feature is not implemented: unnest\(\) does not support null yet
 select unnest(null);
 
 ## Unnest empty array
@@ -71,27 +71,55 @@ NULL
 NULL
 
 ## Unnest column with scalars
-# TODO: This should be an error, but unnest is able to process scalar values now.
-query I
+query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
 select unnest(column3) from unnest_table;
-----
-1
-2
-3
-NULL
 
 ## Unnest multiple columns
 query error DataFusion error: This feature is not implemented: Only support single unnest expression for now
 select unnest(column1), unnest(column2) from unnest_table;
 
 ## Unnest scalar
-query error DataFusion error: Error during planning: unnest\(\) can only be applied to array and structs and null
+query error DataFusion error: Error during planning: unnest\(\) can only be applied to array, struct and null
 select unnest(1);
 
 
 ## Unnest empty expression
-query error DataFusion error: Execution error: unnest\(\) requires at least one argument
+query error DataFusion error: Error during planning: unnest\(\) requires at least one argument
 select unnest();
 
+## Unnest struct expression
+query error DataFusion error: This feature is not implemented: unnest\(\) does not support struct yet
+select unnest(struct(null));
+
+
+## Unnest array expression
+query I
+select unnest(range(1, 3));
+----
+1
+2
+
+query I
+select unnest(arrow_cast(range(1, 3), 'LargeList(Int64)'));
+----
+1
+2
+
+query I
+select unnest(arrow_cast(range(1, 3), 'FixedSizeList(2, Int64)'));
+----
+1
+2
+
+query I
+select unnest(array_remove(column1, 12)) from unnest_table;
+----
+1
+2
+3
+4
+5
+6
+
 statement ok
 drop table unnest_table;

From c56840734c142fa1766ba5d3b0440733fcfccd05 Mon Sep 17 00:00:00 2001
From: Chris O'Donnell <chris.bowdoin@gmail.com>
Date: Mon, 26 Feb 2024 12:16:05 -0500
Subject: [PATCH 18/45] fix: flight examples (#9335)

* fix: downgrade tonic for arrow compatibility

Tonic 0.10 and 0.11 are not API compatible.
Arrow 50 depends on tonic 0.10, and datafusion must match that dependency for compatibility reasons.

* feat: make nested examples runnable

cargo run --example doesn't support nested examples. Nested examples need an explicit block to be runnable.

* fix: fix custom catalog typo and formatting

* docs: add note about upgrading tonic with arrow

* ci: add cargo check for all examples
---
 ci/scripts/rust_example.sh                    |  1 +
 datafusion-examples/Cargo.toml                | 27 ++++++++++++++++++-
 .../examples/external_dependency/catalog.rs   |  4 +--
 .../examples/flight/flight_sql_server.rs      |  4 +--
 4 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/ci/scripts/rust_example.sh b/ci/scripts/rust_example.sh
index fe3696f20865..18a7306b520d 100755
--- a/ci/scripts/rust_example.sh
+++ b/ci/scripts/rust_example.sh
@@ -20,6 +20,7 @@
 set -ex
 cd datafusion-examples/examples/
 cargo fmt --all -- --check
+cargo check --examples
 
 files=$(ls .)
 for filename in $files
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 0fb49a20a8f1..b1a9cbcad5f7 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -29,6 +29,30 @@ license = { workspace = true }
 authors = { workspace = true }
 rust-version = { workspace = true }
 
+[[example]]
+name = "flight_sql_server"
+path = "examples/flight/flight_sql_server.rs"
+
+[[example]]
+name = "flight_server"
+path = "examples/flight/flight_server.rs"
+
+[[example]]
+name = "flight_client"
+path = "examples/flight/flight_client.rs"
+
+[[example]]
+name = "catalog"
+path = "examples/external_dependency/catalog.rs"
+
+[[example]]
+name = "dataframe_to_s3"
+path = "examples/external_dependency/dataframe-to-s3.rs"
+
+[[example]]
+name = "query_aws_s3"
+path = "examples/external_dependency/query-aws-s3.rs"
+
 [dev-dependencies]
 arrow = { workspace = true }
 arrow-flight = { workspace = true }
@@ -54,6 +78,7 @@ serde = { version = "1.0.136", features = ["derive"] }
 serde_json = { workspace = true }
 tempfile = { workspace = true }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
-tonic = "0.11"
+# 0.10 and 0.11 are incompatible. Need to upgrade tonic to 0.11 when upgrading to arrow 51
+tonic = "0.10"
 url = { workspace = true }
 uuid = "1.2"
diff --git a/datafusion-examples/examples/external_dependency/catalog.rs b/datafusion-examples/examples/external_dependency/catalog.rs
index 29e505fb1dcb..a623eafdf3d7 100644
--- a/datafusion-examples/examples/external_dependency/catalog.rs
+++ b/datafusion-examples/examples/external_dependency/catalog.rs
@@ -24,7 +24,7 @@ use datafusion::{
     arrow::util::pretty,
     catalog::{
         schema::SchemaProvider,
-        {CatalogProviderList, CatalogProvider},
+        {CatalogProvider, CatalogProviderList},
     },
     datasource::{
         file_format::{csv::CsvFormat, parquet::ParquetFormat, FileFormat},
@@ -53,7 +53,7 @@ async fn main() -> Result<()> {
     .unwrap();
     let mut ctx = SessionContext::new();
     let state = ctx.state();
-    let catlist = Arc::new(CustomCatalogProvderList::new());
+    let catlist = Arc::new(CustomCatalogProviderList::new());
     // use our custom catalog list for context. each context has a single catalog list.
     // context will by default have [`MemoryCatalogProviderList`]
     ctx.register_catalog_list(catlist.clone());
diff --git a/datafusion-examples/examples/flight/flight_sql_server.rs b/datafusion-examples/examples/flight/flight_sql_server.rs
index ed5b86d0b66c..35d475623062 100644
--- a/datafusion-examples/examples/flight/flight_sql_server.rs
+++ b/datafusion-examples/examples/flight/flight_sql_server.rs
@@ -231,7 +231,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         info!("getting results for {handle}");
         let result = self.get_result(&handle)?;
         // if we get an empty result, create an empty schema
-        let (schema, batches) = match result.get(0) {
+        let (schema, batches) = match result.first() {
             None => (Arc::new(Schema::empty()), vec![]),
             Some(batch) => (batch.schema(), result.clone()),
         };
@@ -287,7 +287,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
             .map_err(|e| status!("Error executing query", e))?;
 
         // if we get an empty result, create an empty schema
-        let schema = match result.get(0) {
+        let schema = match result.first() {
             None => Schema::empty(),
             Some(batch) => (*batch.schema()).clone(),
         };

From b8c6e0bb3f3f8ab60cf394b46d6aacc616b67d41 Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Mon, 26 Feb 2024 10:05:09 -0800
Subject: [PATCH 19/45] docs: update parquet_sql_multiple_files.rs with a
 relative path ex (#9310)

* docs: update parquet_sql_multiple_files.rs with a relative path ex

* style: run cargo fmt

* docs: update comment

* docs: better
---
 .../examples/parquet_sql_multiple_files.rs    | 66 ++++++++++++++++---
 1 file changed, 56 insertions(+), 10 deletions(-)

diff --git a/datafusion-examples/examples/parquet_sql_multiple_files.rs b/datafusion-examples/examples/parquet_sql_multiple_files.rs
index 451de96f2e91..0e2968f20356 100644
--- a/datafusion-examples/examples/parquet_sql_multiple_files.rs
+++ b/datafusion-examples/examples/parquet_sql_multiple_files.rs
@@ -17,31 +17,35 @@
 
 use datafusion::datasource::file_format::parquet::ParquetFormat;
 use datafusion::datasource::listing::ListingOptions;
-use datafusion::error::Result;
 use datafusion::prelude::*;
-use datafusion_common::{FileType, GetExt};
+use object_store::local::LocalFileSystem;
+use std::path::Path;
 use std::sync::Arc;
 
 /// This example demonstrates executing a simple query against an Arrow data source (a directory
-/// with multiple Parquet files) and fetching results
+/// with multiple Parquet files) and fetching results. The query is run twice, once showing
+/// how to used `register_listing_table` with an absolute path, and once registering an
+/// ObjectStore to use a relative path.
 #[tokio::main]
-async fn main() -> Result<()> {
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // create local execution context
     let ctx = SessionContext::new();
 
-    let testdata = datafusion::test_util::parquet_test_data();
+    let test_data = datafusion::test_util::parquet_test_data();
 
     // Configure listing options
     let file_format = ParquetFormat::default().with_enable_pruning(Some(true));
     let listing_options = ListingOptions::new(Arc::new(file_format))
-        .with_file_extension(FileType::PARQUET.get_ext());
+        // This is a workaround for this example since `test_data` contains
+        // many different parquet different files,
+        // in practice use FileType::PARQUET.get_ext().
+        .with_file_extension("alltypes_plain.parquet");
 
-    // Register a listing table - this will use all files in the directory as data sources
-    // for the query
+    // First example were we use an absolute path, which requires no additional setup.
     ctx.register_listing_table(
         "my_table",
-        &format!("file://{testdata}/alltypes_plain.parquet"),
-        listing_options,
+        &format!("file://{test_data}/"),
+        listing_options.clone(),
         None,
         None,
     )
@@ -60,5 +64,47 @@ async fn main() -> Result<()> {
     // print the results
     df.show().await?;
 
+    // Second example were we temporarily move into the test data's parent directory and
+    // simulate a relative path, this requires registering an ObjectStore.
+    let cur_dir = std::env::current_dir()?;
+
+    let test_data_path = Path::new(&test_data);
+    let test_data_path_parent = test_data_path
+        .parent()
+        .ok_or("test_data path needs a parent")?;
+
+    std::env::set_current_dir(test_data_path_parent)?;
+
+    let local_fs = Arc::new(LocalFileSystem::default());
+
+    let u = url::Url::parse("file://./")?;
+    ctx.runtime_env().register_object_store(&u, local_fs);
+
+    // Register a listing table - this will use all files in the directory as data sources
+    // for the query
+    ctx.register_listing_table(
+        "relative_table",
+        "./data",
+        listing_options.clone(),
+        None,
+        None,
+    )
+    .await?;
+
+    // execute the query
+    let df = ctx
+        .sql(
+            "SELECT * \
+        FROM relative_table \
+        LIMIT 1",
+        )
+        .await?;
+
+    // print the results
+    df.show().await?;
+
+    // Reset the current directory
+    std::env::set_current_dir(cur_dir)?;
+
     Ok(())
 }

From a26f583d2766da746ff30199cc7341227526737f Mon Sep 17 00:00:00 2001
From: Trent Hauck <trent@trenthauck.com>
Date: Mon, 26 Feb 2024 10:06:11 -0800
Subject: [PATCH 20/45] tests: add tests for writing hive-partitioned parquet
 (#9316)

* tests: adds tests associated with #9237

* style: clippy
---
 .../datasource/physical_plan/parquet/mod.rs   |  74 --------
 datafusion/core/tests/dataframe/mod.rs        | 160 +++++++++++++++++-
 2 files changed, 158 insertions(+), 76 deletions(-)

diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index badd87084812..3aa1998bde7e 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -2066,80 +2066,6 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
-    async fn write_parquet_results() -> Result<()> {
-        // create partitioned input file and context
-        let tmp_dir = TempDir::new()?;
-        // let mut ctx = create_ctx(&tmp_dir, 4).await?;
-        let ctx = SessionContext::new_with_config(
-            SessionConfig::new().with_target_partitions(8),
-        );
-        let schema = populate_csv_partitions(&tmp_dir, 4, ".csv")?;
-        // register csv file with the execution context
-        ctx.register_csv(
-            "test",
-            tmp_dir.path().to_str().unwrap(),
-            CsvReadOptions::new().schema(&schema),
-        )
-        .await?;
-
-        // register a local file system object store for /tmp directory
-        let local = Arc::new(LocalFileSystem::new_with_prefix(&tmp_dir)?);
-        let local_url = Url::parse("file://local").unwrap();
-        ctx.runtime_env().register_object_store(&local_url, local);
-
-        // execute a simple query and write the results to parquet
-        let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/";
-        let out_dir_url = "file://local/out/";
-        let df = ctx.sql("SELECT c1, c2 FROM test").await?;
-        df.write_parquet(out_dir_url, DataFrameWriteOptions::new(), None)
-            .await?;
-        // write_parquet(&mut ctx, "SELECT c1, c2 FROM test", &out_dir, None).await?;
-
-        // create a new context and verify that the results were saved to a partitioned parquet file
-        let ctx = SessionContext::new();
-
-        // get write_id
-        let mut paths = fs::read_dir(&out_dir).unwrap();
-        let path = paths.next();
-        let name = path
-            .unwrap()?
-            .path()
-            .file_name()
-            .expect("Should be a file name")
-            .to_str()
-            .expect("Should be a str")
-            .to_owned();
-        let (parsed_id, _) = name.split_once('_').expect("File should contain _ !");
-        let write_id = parsed_id.to_owned();
-
-        // register each partition as well as the top level dir
-        ctx.register_parquet(
-            "part0",
-            &format!("{out_dir}/{write_id}_0.parquet"),
-            ParquetReadOptions::default(),
-        )
-        .await?;
-
-        ctx.register_parquet("allparts", &out_dir, ParquetReadOptions::default())
-            .await?;
-
-        let part0 = ctx.sql("SELECT c1, c2 FROM part0").await?.collect().await?;
-        let allparts = ctx
-            .sql("SELECT c1, c2 FROM allparts")
-            .await?
-            .collect()
-            .await?;
-
-        let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
-
-        assert_eq!(part0[0].schema(), allparts[0].schema());
-
-        assert_eq!(allparts_count, 40);
-
-        Ok(())
-    }
-
     fn logical2physical(expr: &Expr, schema: &Schema) -> Arc<dyn PhysicalExpr> {
         let df_schema = schema.clone().to_dfschema().unwrap();
         let execution_props = ExecutionProps::new();
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index b08b2b8fc7a2..ee842004172c 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -30,15 +30,19 @@ use arrow::{
 };
 use arrow_array::Float32Array;
 use arrow_schema::ArrowError;
+use object_store::local::LocalFileSystem;
+use std::fs;
 use std::sync::Arc;
+use tempfile::TempDir;
+use url::Url;
 
-use datafusion::dataframe::DataFrame;
+use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::datasource::MemTable;
 use datafusion::error::Result;
 use datafusion::execution::context::{SessionContext, SessionState};
 use datafusion::prelude::JoinType;
 use datafusion::prelude::{CsvReadOptions, ParquetReadOptions};
-use datafusion::test_util::parquet_test_data;
+use datafusion::test_util::{parquet_test_data, populate_csv_partitions};
 use datafusion::{assert_batches_eq, assert_batches_sorted_eq};
 use datafusion_common::{assert_contains, DataFusionError, ScalarValue, UnnestOptions};
 use datafusion_execution::config::SessionConfig;
@@ -1896,3 +1900,155 @@ async fn test_dataframe_placeholder_missing_param_values() -> Result<()> {
 
     Ok(())
 }
+
+#[tokio::test]
+async fn write_partitioned_parquet_results() -> Result<()> {
+    // create partitioned input file and context
+    let tmp_dir = TempDir::new()?;
+
+    let ctx = SessionContext::new();
+
+    // Create an in memory table with schema C1 and C2, both strings
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("c1", DataType::Utf8, false),
+        Field::new("c2", DataType::Utf8, false),
+    ]));
+
+    let record_batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(StringArray::from(vec!["abc", "def"])),
+            Arc::new(StringArray::from(vec!["123", "456"])),
+        ],
+    )?;
+
+    let mem_table = Arc::new(MemTable::try_new(schema, vec![vec![record_batch]])?);
+
+    // Register the table in the context
+    ctx.register_table("test", mem_table)?;
+
+    let local = Arc::new(LocalFileSystem::new_with_prefix(&tmp_dir)?);
+    let local_url = Url::parse("file://local").unwrap();
+    ctx.runtime_env().register_object_store(&local_url, local);
+
+    // execute a simple query and write the results to parquet
+    let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/";
+    let out_dir_url = format!("file://{out_dir}");
+
+    // Write the results to parquet with partitioning
+    let df = ctx.sql("SELECT c1, c2 FROM test").await?;
+    let df_write_options =
+        DataFrameWriteOptions::new().with_partition_by(vec![String::from("c2")]);
+
+    df.write_parquet(&out_dir_url, df_write_options, None)
+        .await?;
+
+    // Explicitly read the parquet file at c2=123 to verify the physical files are partitioned
+    let partitioned_file = format!("{out_dir}/c2=123", out_dir = out_dir);
+    let filted_df = ctx
+        .read_parquet(&partitioned_file, ParquetReadOptions::default())
+        .await?;
+
+    // Check that the c2 column is gone and that c1 is abc.
+    let results = filted_df.collect().await?;
+    let expected = ["+-----+", "| c1  |", "+-----+", "| abc |", "+-----+"];
+
+    assert_batches_eq!(expected, &results);
+
+    // Read the entire set of parquet files
+    let df = ctx
+        .read_parquet(
+            &out_dir_url,
+            ParquetReadOptions::default()
+                .table_partition_cols(vec![(String::from("c2"), DataType::Utf8)]),
+        )
+        .await?;
+
+    // Check that the df has the entire set of data
+    let results = df.collect().await?;
+    let expected = [
+        "+-----+-----+",
+        "| c1  | c2  |",
+        "+-----+-----+",
+        "| abc | 123 |",
+        "| def | 456 |",
+        "+-----+-----+",
+    ];
+
+    assert_batches_eq!(expected, &results);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn write_parquet_results() -> Result<()> {
+    // create partitioned input file and context
+    let tmp_dir = TempDir::new()?;
+    // let mut ctx = create_ctx(&tmp_dir, 4).await?;
+    let ctx =
+        SessionContext::new_with_config(SessionConfig::new().with_target_partitions(8));
+    let schema = populate_csv_partitions(&tmp_dir, 4, ".csv")?;
+    // register csv file with the execution context
+    ctx.register_csv(
+        "test",
+        tmp_dir.path().to_str().unwrap(),
+        CsvReadOptions::new().schema(&schema),
+    )
+    .await?;
+
+    // register a local file system object store for /tmp directory
+    let local = Arc::new(LocalFileSystem::new_with_prefix(&tmp_dir)?);
+    let local_url = Url::parse("file://local").unwrap();
+    ctx.runtime_env().register_object_store(&local_url, local);
+
+    // execute a simple query and write the results to parquet
+    let out_dir = tmp_dir.as_ref().to_str().unwrap().to_string() + "/out/";
+    let out_dir_url = "file://local/out/";
+    let df = ctx.sql("SELECT c1, c2 FROM test").await?;
+    df.write_parquet(out_dir_url, DataFrameWriteOptions::new(), None)
+        .await?;
+    // write_parquet(&mut ctx, "SELECT c1, c2 FROM test", &out_dir, None).await?;
+
+    // create a new context and verify that the results were saved to a partitioned parquet file
+    let ctx = SessionContext::new();
+
+    // get write_id
+    let mut paths = fs::read_dir(&out_dir).unwrap();
+    let path = paths.next();
+    let name = path
+        .unwrap()?
+        .path()
+        .file_name()
+        .expect("Should be a file name")
+        .to_str()
+        .expect("Should be a str")
+        .to_owned();
+    let (parsed_id, _) = name.split_once('_').expect("File should contain _ !");
+    let write_id = parsed_id.to_owned();
+
+    // register each partition as well as the top level dir
+    ctx.register_parquet(
+        "part0",
+        &format!("{out_dir}/{write_id}_0.parquet"),
+        ParquetReadOptions::default(),
+    )
+    .await?;
+
+    ctx.register_parquet("allparts", &out_dir, ParquetReadOptions::default())
+        .await?;
+
+    let part0 = ctx.sql("SELECT c1, c2 FROM part0").await?.collect().await?;
+    let allparts = ctx
+        .sql("SELECT c1, c2 FROM allparts")
+        .await?
+        .collect()
+        .await?;
+
+    let allparts_count: usize = allparts.iter().map(|batch| batch.num_rows()).sum();
+
+    assert_eq!(part0[0].schema(), allparts[0].schema());
+
+    assert_eq!(allparts_count, 40);
+
+    Ok(())
+}

From b55d0edb0e8b9e8ef8cfaaca164dd5ae93a46854 Mon Sep 17 00:00:00 2001
From: junxiangMu <63799833+guojidan@users.noreply.github.com>
Date: Tue, 27 Feb 2024 04:31:51 +0800
Subject: [PATCH 21/45] feature: support nvl(ifnull) function (#9284)

* feature: support nvl(ifnull) function

* add sqllogictest

* add docs entry

* Update docs/source/user-guide/sql/scalar_functions.md

Co-authored-by: Jonah Gao <jonahgao@msn.com>

* fix some code

* fix docs

---------

Co-authored-by: Jonah Gao <jonahgao@msn.com>
---
 datafusion/functions/src/core/mod.rs          |   5 +-
 datafusion/functions/src/core/nvl.rs          | 277 ++++++++++++++++++
 datafusion/sqllogictest/test_files/nvl.slt    | 120 ++++++++
 .../source/user-guide/sql/scalar_functions.md |  21 ++
 4 files changed, 422 insertions(+), 1 deletion(-)
 create mode 100644 datafusion/functions/src/core/nvl.rs
 create mode 100644 datafusion/sqllogictest/test_files/nvl.slt

diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs
index 9aab4bd450d1..db47c622188d 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -18,12 +18,15 @@
 //! "core" DataFusion functions
 
 mod nullif;
+mod nvl;
 
 // create UDFs
 make_udf_function!(nullif::NullIfFunc, NULLIF, nullif);
+make_udf_function!(nvl::NVLFunc, NVL, nvl);
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
-    (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression.")
+    (nullif, arg_1 arg_2, "returns NULL if value1 equals value2; otherwise it returns value1. This can be used to perform the inverse operation of the COALESCE expression."),
+    (nvl, arg_1 arg_2, "returns value2 if value1 is NULL; otherwise it returns value1")
 );
 
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
new file mode 100644
index 000000000000..6d6ad1cdeb21
--- /dev/null
+++ b/datafusion/functions/src/core/nvl.rs
@@ -0,0 +1,277 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::datatypes::DataType;
+use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use arrow::compute::kernels::zip::zip;
+use arrow::compute::is_not_null;
+use arrow::array::Array;
+
+#[derive(Debug)]
+pub(super) struct NVLFunc {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+/// Currently supported types by the nvl/ifnull function.
+/// The order of these types correspond to the order on which coercion applies
+/// This should thus be from least informative to most informative
+static SUPPORTED_NVL_TYPES: &[DataType] = &[
+    DataType::Boolean,
+    DataType::UInt8,
+    DataType::UInt16,
+    DataType::UInt32,
+    DataType::UInt64,
+    DataType::Int8,
+    DataType::Int16,
+    DataType::Int32,
+    DataType::Int64,
+    DataType::Float32,
+    DataType::Float64,
+    DataType::Utf8,
+    DataType::LargeUtf8,
+];
+
+impl NVLFunc {
+    pub fn new() -> Self {
+        Self {
+            signature:
+            Signature::uniform(2, SUPPORTED_NVL_TYPES.to_vec(),
+                Volatility::Immutable,
+            ),
+            aliases: vec![String::from("ifnull")],
+        }
+    }
+}
+
+impl ScalarUDFImpl for NVLFunc {
+    fn as_any(&self) -> &dyn std::any::Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "nvl"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        // NVL has two args and they might get coerced, get a preview of this
+        let coerced_types = datafusion_expr::type_coercion::functions::data_types(arg_types, &self.signature);
+        coerced_types.map(|typs| typs[0].clone())
+            .map_err(|e| e.context("Failed to coerce arguments for NVL")
+        )
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        nvl_func(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+fn nvl_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    if args.len() != 2 {
+        return internal_err!(
+            "{:?} args were supplied but NVL/IFNULL takes exactly two args",
+            args.len()
+        );
+    }
+    let (lhs_array, rhs_array) = match (&args[0], &args[1]) {
+        (ColumnarValue::Array(lhs), ColumnarValue::Scalar(rhs)) => {
+            (lhs.clone(), rhs.to_array_of_size(lhs.len())?)
+        }
+        (ColumnarValue::Array(lhs), ColumnarValue::Array(rhs)) => {
+            (lhs.clone(), rhs.clone())
+        }
+        (ColumnarValue::Scalar(lhs), ColumnarValue::Array(rhs)) => {
+            (lhs.to_array_of_size(rhs.len())?, rhs.clone())
+        }
+        (ColumnarValue::Scalar(lhs), ColumnarValue::Scalar(rhs)) => {
+            let mut current_value = lhs;
+            if lhs.is_null() {
+                current_value = rhs;
+            }
+            return Ok(ColumnarValue::Scalar(current_value.clone()));
+        }
+    };
+    let to_apply = is_not_null(&lhs_array)?;
+    let value = zip(&to_apply, &lhs_array, &rhs_array)?;
+    Ok(ColumnarValue::Array(value))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use arrow::array::*;
+
+    use super::*;
+    use datafusion_common::{Result, ScalarValue};
+
+    #[test]
+    fn nvl_int32() -> Result<()> {
+        let a = Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            None,
+            None,
+            Some(3),
+            None,
+            None,
+            Some(4),
+            Some(5),
+        ]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(6i32)));
+
+        let result = nvl_func(&[a, lit_array])?;
+        let result = result.into_array(0).expect("Failed to convert to array");
+
+        let expected = Arc::new(Int32Array::from(vec![
+            Some(1),
+            Some(2),
+            Some(6),
+            Some(6),
+            Some(3),
+            Some(6),
+            Some(6),
+            Some(4),
+            Some(5),
+        ])) as ArrayRef;
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    // Ensure that arrays with no nulls can also invoke nvl() correctly
+    fn nvl_int32_nonulls() -> Result<()> {
+        let a = Int32Array::from(vec![1, 3, 10, 7, 8, 1, 2, 4, 5]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(20i32)));
+
+        let result = nvl_func(&[a, lit_array])?;
+        let result = result.into_array(0).expect("Failed to convert to array");
+
+        let expected = Arc::new(Int32Array::from(vec![
+            Some(1),
+            Some(3),
+            Some(10),
+            Some(7),
+            Some(8),
+            Some(1),
+            Some(2),
+            Some(4),
+            Some(5),
+        ])) as ArrayRef;
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nvl_boolean() -> Result<()> {
+        let a = BooleanArray::from(vec![Some(true), Some(false), None]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)));
+
+        let result = nvl_func(&[a, lit_array])?;
+        let result = result.into_array(0).expect("Failed to convert to array");
+
+        let expected =
+            Arc::new(BooleanArray::from(vec![Some(true), Some(false), Some(false)])) as ArrayRef;
+
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nvl_string() -> Result<()> {
+        let a = StringArray::from(vec![Some("foo"), Some("bar"), None, Some("baz")]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::from("bax"));
+
+        let result = nvl_func(&[a, lit_array])?;
+        let result = result.into_array(0).expect("Failed to convert to array");
+
+        let expected = Arc::new(StringArray::from(vec![
+            Some("foo"),
+            Some("bar"),
+            Some("bax"),
+            Some("baz"),
+        ])) as ArrayRef;
+
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nvl_literal_first() -> Result<()> {
+        let a = Int32Array::from(vec![Some(1), Some(2), None, None, Some(3), Some(4)]);
+        let a = ColumnarValue::Array(Arc::new(a));
+
+        let lit_array = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+
+        let result = nvl_func(&[lit_array, a])?;
+        let result = result.into_array(0).expect("Failed to convert to array");
+
+        let expected = Arc::new(Int32Array::from(vec![
+            Some(2),
+            Some(2),
+            Some(2),
+            Some(2),
+            Some(2),
+            Some(2),
+        ])) as ArrayRef;
+        assert_eq!(expected.as_ref(), result.as_ref());
+        Ok(())
+    }
+
+    #[test]
+    fn nvl_scalar() -> Result<()> {
+        let a_null = ColumnarValue::Scalar(ScalarValue::Int32(None));
+        let b_null = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+
+        let result_null = nvl_func(&[a_null, b_null])?;
+        let result_null = result_null.into_array(1).expect("Failed to convert to array");
+
+        let expected_null = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
+
+        assert_eq!(expected_null.as_ref(), result_null.as_ref());
+
+        let a_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(2i32)));
+        let b_nnull = ColumnarValue::Scalar(ScalarValue::Int32(Some(1i32)));
+
+        let result_nnull = nvl_func(&[a_nnull, b_nnull])?;
+        let result_nnull = result_nnull
+            .into_array(1)
+            .expect("Failed to convert to array");
+
+        let expected_nnull = Arc::new(Int32Array::from(vec![Some(2i32)])) as ArrayRef;
+        assert_eq!(expected_nnull.as_ref(), result_nnull.as_ref());
+
+        Ok(())
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
new file mode 100644
index 000000000000..81e79e1eb5b0
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -0,0 +1,120 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+statement ok
+CREATE TABLE test(
+  int_field  INT,
+  bool_field BOOLEAN,
+  text_field TEXT,
+  more_ints  INT
+) as VALUES
+  (1,    true,  'abc',  2),
+  (2,    false, 'def',  2),
+  (3,    NULL,  'ghij', 3),
+  (NULL, NULL,   NULL,  4),
+  (4,    false, 'zxc',  5),
+  (NULL, true,   NULL,  6)
+;
+
+# Arrays tests
+query I
+SELECT NVL(int_field, 2) FROM test;
+----
+1
+2
+3
+2
+4
+2
+
+
+query B
+SELECT NVL(bool_field, false) FROM test;
+----
+true
+false
+false
+false
+false
+true
+
+
+query T
+SELECT NVL(text_field, 'zxb') FROM test;
+----
+abc
+def
+ghij
+zxb
+zxc
+zxb
+
+
+query I
+SELECT IFNULL(int_field, more_ints) FROM test;
+----
+1
+2
+3
+4
+4
+6
+
+
+query I
+SELECT NVL(3, int_field) FROM test;
+----
+3
+3
+3
+3
+3
+3
+
+
+# Scalar values tests
+query I
+SELECT NVL(1, 1);
+----
+1
+
+query I
+SELECT NVL(1, 3);
+----
+1
+
+query I
+SELECT NVL(NULL, NULL);
+----
+NULL
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 707e8c24b326..d4eb5944ad09 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -569,6 +569,8 @@ trunc(numeric_expression[, decimal_places])
 
 - [coalesce](#coalesce)
 - [nullif](#nullif)
+- [nvl](#nvl)
+- [ifnull](#ifnull)
 
 ### `coalesce`
 
@@ -603,6 +605,25 @@ nullif(expression1, expression2)
 - **expression2**: Expression to compare to expression1.
   Can be a constant, column, or function, and any combination of arithmetic operators.
 
+### `nvl`
+
+Returns _expression2_ if _expression1_ is NULL; otherwise it returns _expression1_.
+
+```
+nvl(expression1, expression2)
+```
+
+#### Arguments
+
+- **expression1**: return if expression1 not is NULL.
+  Can be a constant, column, or function, and any combination of arithmetic operators.
+- **expression2**: return if expression1 is NULL.
+  Can be a constant, column, or function, and any combination of arithmetic operators.
+
+### `ifnull`
+
+_Alias of [nvl](#nvl)._
+
 ## String Functions
 
 - [ascii](#ascii)

From 85f7a8e88e3596b3ec900c43fabb8f7f42bbea5c Mon Sep 17 00:00:00 2001
From: Junhao Liu <junhaoliu2023@gmail.com>
Date: Mon, 26 Feb 2024 18:24:16 -0600
Subject: [PATCH 22/45] Move abs to datafusion_functions (#9313)

* feat: move abs to datafusion_functions

* fix proto

* fix proto

* fix CI vendored code

* Fix proto

* add support type

* fix signature

* fix typo

* fix test cases

* disable a test case

* remove old code from math_expressions

* feat: add test

* fix clippy

* use unknown for proto

* fix unknown proto
---
 datafusion/expr/src/built_in_function.rs      |   7 -
 datafusion/expr/src/expr.rs                   |   5 -
 datafusion/expr/src/expr_fn.rs                |   2 -
 datafusion/functions/src/math/abs.rs          | 177 ++++++++++++++++++
 datafusion/functions/src/math/mod.rs          |   8 +-
 datafusion/physical-expr/src/functions.rs     |   4 -
 .../physical-expr/src/math_expressions.rs     |  93 +--------
 datafusion/proto/proto/datafusion.proto       |   4 +-
 datafusion/proto/src/generated/pbjson.rs      |   6 +-
 datafusion/proto/src/generated/prost.rs       |   8 +-
 .../proto/src/logical_plan/from_proto.rs      |   6 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   1 -
 12 files changed, 198 insertions(+), 123 deletions(-)
 create mode 100644 datafusion/functions/src/math/abs.rs

diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index 8b4e65121c79..cf1e73f780ad 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -42,8 +42,6 @@ use strum_macros::EnumIter;
 #[derive(Debug, Clone, PartialEq, Eq, Hash, EnumIter, Copy)]
 pub enum BuiltinScalarFunction {
     // math functions
-    /// abs
-    Abs,
     /// acos
     Acos,
     /// asin
@@ -364,7 +362,6 @@ impl BuiltinScalarFunction {
     pub fn volatility(&self) -> Volatility {
         match self {
             // Immutable scalar builtins
-            BuiltinScalarFunction::Abs => Volatility::Immutable,
             BuiltinScalarFunction::Acos => Volatility::Immutable,
             BuiltinScalarFunction::Asin => Volatility::Immutable,
             BuiltinScalarFunction::Atan => Volatility::Immutable,
@@ -868,8 +865,6 @@ impl BuiltinScalarFunction {
 
             BuiltinScalarFunction::ArrowTypeof => Ok(Utf8),
 
-            BuiltinScalarFunction::Abs => Ok(input_expr_types[0].clone()),
-
             BuiltinScalarFunction::OverLay => {
                 utf8_to_str_type(&input_expr_types[0], "overlay")
             }
@@ -1338,7 +1333,6 @@ impl BuiltinScalarFunction {
                 Signature::uniform(2, vec![Int64], self.volatility())
             }
             BuiltinScalarFunction::ArrowTypeof => Signature::any(1, self.volatility()),
-            BuiltinScalarFunction::Abs => Signature::any(1, self.volatility()),
             BuiltinScalarFunction::OverLay => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8, Int64, Int64]),
@@ -1444,7 +1438,6 @@ impl BuiltinScalarFunction {
     /// Returns all names that can be used to call this function
     pub fn aliases(&self) -> &'static [&'static str] {
         match self {
-            BuiltinScalarFunction::Abs => &["abs"],
             BuiltinScalarFunction::Acos => &["acos"],
             BuiltinScalarFunction::Acosh => &["acosh"],
             BuiltinScalarFunction::Asin => &["asin"],
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index f40ccb6cdb58..c3d9269d1559 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -2033,11 +2033,6 @@ mod test {
                 .is_volatile()
                 .unwrap()
         );
-        assert!(
-            !ScalarFunctionDefinition::BuiltIn(BuiltinScalarFunction::Abs)
-                .is_volatile()
-                .unwrap()
-        );
 
         // UDF
         #[derive(Debug)]
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 4aa270e6dde6..55bd40a18900 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -557,7 +557,6 @@ nary_scalar_expr!(
     trunc,
     "truncate toward zero, with optional precision"
 );
-scalar_expr!(Abs, abs, num, "absolute value");
 scalar_expr!(Signum, signum, num, "sign of the argument (-1, 0, +1) ");
 scalar_expr!(Exp, exp, num, "exponential");
 scalar_expr!(Gcd, gcd, arg_1 arg_2, "greatest common divisor");
@@ -1354,7 +1353,6 @@ mod test {
         test_nary_scalar_expr!(Round, round, input, decimal_places);
         test_nary_scalar_expr!(Trunc, trunc, num);
         test_nary_scalar_expr!(Trunc, trunc, num, precision);
-        test_unary_scalar_expr!(Abs, abs);
         test_unary_scalar_expr!(Signum, signum);
         test_unary_scalar_expr!(Exp, exp);
         test_unary_scalar_expr!(Log2, log2);
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
new file mode 100644
index 000000000000..21ca37fb8ec3
--- /dev/null
+++ b/datafusion/functions/src/math/abs.rs
@@ -0,0 +1,177 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! math expressions
+
+use arrow::array::Decimal128Array;
+use arrow::array::Decimal256Array;
+use arrow::array::Int16Array;
+use arrow::array::Int32Array;
+use arrow::array::Int64Array;
+use arrow::array::Int8Array;
+use arrow::datatypes::DataType;
+use datafusion_common::not_impl_err;
+use datafusion_common::plan_datafusion_err;
+use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_expr::utils;
+use datafusion_expr::ColumnarValue;
+
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use std::sync::Arc;
+use arrow::array::{ArrayRef, Float32Array, Float64Array};
+use arrow::error::ArrowError;
+
+type MathArrayFunction = fn(&Vec<ArrayRef>) -> Result<ArrayRef>;
+
+macro_rules! make_abs_function {
+    ($ARRAY_TYPE:ident) => {{
+        |args: &Vec<ArrayRef>| {
+            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
+            let res: $ARRAY_TYPE = array.unary(|x| x.abs());
+            Ok(Arc::new(res) as ArrayRef)
+        }
+    }};
+}
+
+macro_rules! make_try_abs_function {
+    ($ARRAY_TYPE:ident) => {{
+        |args: &Vec<ArrayRef>| {
+            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
+            let res: $ARRAY_TYPE = array.try_unary(|x| {
+                x.checked_abs().ok_or_else(|| {
+                    ArrowError::ComputeError(format!(
+                        "{} overflow on abs({})",
+                        stringify!($ARRAY_TYPE),
+                        x
+                    ))
+                })
+            })?;
+            Ok(Arc::new(res) as ArrayRef)
+        }
+    }};
+}
+
+macro_rules! make_decimal_abs_function {
+    ($ARRAY_TYPE:ident) => {{
+        |args: &Vec<ArrayRef>| {
+            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
+            let res: $ARRAY_TYPE = array
+                .unary(|x| x.wrapping_abs())
+                .with_data_type(args[0].data_type().clone());
+            Ok(Arc::new(res) as ArrayRef)
+        }
+    }};
+}
+
+/// Abs SQL function
+/// Return different implementations based on input datatype to reduce branches during execution
+fn create_abs_function(
+    input_data_type: &DataType,
+) -> Result<MathArrayFunction> {
+    match input_data_type {
+        DataType::Float32 => Ok(make_abs_function!(Float32Array)),
+        DataType::Float64 => Ok(make_abs_function!(Float64Array)),
+
+        // Types that may overflow, such as abs(-128_i8).
+        DataType::Int8 => Ok(make_try_abs_function!(Int8Array)),
+        DataType::Int16 => Ok(make_try_abs_function!(Int16Array)),
+        DataType::Int32 => Ok(make_try_abs_function!(Int32Array)),
+        DataType::Int64 => Ok(make_try_abs_function!(Int64Array)),
+
+        // Types of results are the same as the input.
+        DataType::Null
+        | DataType::UInt8
+        | DataType::UInt16
+        | DataType::UInt32
+        | DataType::UInt64 => Ok(|args: &Vec<ArrayRef>| Ok(args[0].clone())),
+
+        // Decimal types
+        DataType::Decimal128(_, _) => Ok(make_decimal_abs_function!(Decimal128Array)),
+        DataType::Decimal256(_, _) => Ok(make_decimal_abs_function!(Decimal256Array)),
+
+        other => not_impl_err!("Unsupported data type {other:?} for function abs"),
+    }
+}
+#[derive(Debug)]
+pub(super) struct AbsFunc {
+    signature: Signature,
+}
+
+impl AbsFunc {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::any(1, Volatility::Immutable)
+        }
+    }
+}
+
+impl ScalarUDFImpl for AbsFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "abs"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 1 {
+            return Err(plan_datafusion_err!(
+                "{}",
+                utils::generate_signature_error_msg(
+                    self.name(),
+                    self.signature().clone(),
+                    arg_types,
+                )
+            ));
+        }
+        match arg_types[0] {
+            DataType::Float32 => Ok(DataType::Float32),
+            DataType::Float64 => Ok(DataType::Float64),
+            DataType::Int8 => Ok(DataType::Int8),
+            DataType::Int16 => Ok(DataType::Int16),
+            DataType::Int32 => Ok(DataType::Int32),
+            DataType::Int64 => Ok(DataType::Int64),
+            DataType::Null => Ok(DataType::Null),
+            DataType::UInt8 => Ok(DataType::UInt8),
+            DataType::UInt16 => Ok(DataType::UInt16),
+            DataType::UInt32 => Ok(DataType::UInt32),
+            DataType::UInt64 => Ok(DataType::UInt64),
+            DataType::Decimal128(precision, scale) => Ok(DataType::Decimal128(precision, scale)),
+            DataType::Decimal256(precision, scale) => Ok(DataType::Decimal256(precision, scale)),
+            _ => not_impl_err!("Unsupported data type {} for function abs", arg_types[0].to_string()),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        if args.len() != 1 {
+            return internal_err!("abs function requires 1 argument, got {}", args.len());
+        }
+    
+        let input_data_type = args[0].data_type();
+        let abs_fun = create_abs_function(input_data_type)?;
+    
+        let arr = abs_fun(&args)?;
+        Ok(ColumnarValue::Array(arr))
+    }
+}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 873625948a35..9d13103ef23f 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -18,12 +18,14 @@
 //! "math" DataFusion functions
 
 mod nans;
+mod abs;
 
 // create  UDFs
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
+make_udf_function!(abs::AbsFunc, ABS, abs);
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
-    (isnan, num, "returns true if a given number is +NaN or -NaN otherwise returns false")
-);
-
+    (isnan, num, "returns true if a given number is +NaN or -NaN otherwise returns false"),
+    (abs, num, "returns the absolute value of a given number")
+);
\ No newline at end of file
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 8446a65d72c8..0dc3f96dc12a 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -260,9 +260,6 @@ pub fn create_physical_fun(
 ) -> Result<ScalarFunctionImplementation> {
     Ok(match fun {
         // math functions
-        BuiltinScalarFunction::Abs => Arc::new(|args| {
-            make_scalar_function_inner(math_expressions::abs_invoke)(args)
-        }),
         BuiltinScalarFunction::Acos => Arc::new(math_expressions::acos),
         BuiltinScalarFunction::Asin => Arc::new(math_expressions::asin),
         BuiltinScalarFunction::Atan => Arc::new(math_expressions::atan),
@@ -3075,7 +3072,6 @@ mod tests {
         let funs = [
             BuiltinScalarFunction::Concat,
             BuiltinScalarFunction::ToTimestamp,
-            BuiltinScalarFunction::Abs,
             BuiltinScalarFunction::Repeat,
         ];
 
diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs
index af66862aecc5..b622aee8e2b3 100644
--- a/datafusion/physical-expr/src/math_expressions.rs
+++ b/datafusion/physical-expr/src/math_expressions.rs
@@ -18,15 +18,11 @@
 //! Math expressions
 
 use arrow::array::ArrayRef;
-use arrow::array::{
-    BooleanArray, Decimal128Array, Decimal256Array, Float32Array, Float64Array,
-    Int16Array, Int32Array, Int64Array, Int8Array,
-};
+use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
 use arrow::datatypes::DataType;
-use arrow::error::ArrowError;
+use datafusion_common::internal_err;
 use datafusion_common::ScalarValue;
 use datafusion_common::ScalarValue::{Float32, Int64};
-use datafusion_common::{internal_err, not_impl_err};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 use rand::{thread_rng, Rng};
@@ -35,8 +31,6 @@ use std::iter;
 use std::mem::swap;
 use std::sync::Arc;
 
-type MathArrayFunction = fn(&[ArrayRef]) -> Result<ArrayRef>;
-
 macro_rules! downcast_compute_op {
     ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident) => {{
         let n = $ARRAY.as_any().downcast_ref::<$TYPE>();
@@ -176,7 +170,6 @@ math_unary_function!("acosh", acosh);
 math_unary_function!("atanh", atanh);
 math_unary_function!("floor", floor);
 math_unary_function!("ceil", ceil);
-math_unary_function!("abs", abs);
 math_unary_function!("signum", signum);
 math_unary_function!("exp", exp);
 math_unary_function!("ln", ln);
@@ -673,88 +666,6 @@ fn compute_truncate64(x: f64, y: i64) -> f64 {
     (x * factor).round() / factor
 }
 
-macro_rules! make_abs_function {
-    ($ARRAY_TYPE:ident) => {{
-        |args: &[ArrayRef]| {
-            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
-            let res: $ARRAY_TYPE = array.unary(|x| x.abs());
-            Ok(Arc::new(res) as ArrayRef)
-        }
-    }};
-}
-
-macro_rules! make_try_abs_function {
-    ($ARRAY_TYPE:ident) => {{
-        |args: &[ArrayRef]| {
-            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
-            let res: $ARRAY_TYPE = array.try_unary(|x| {
-                x.checked_abs().ok_or_else(|| {
-                    ArrowError::ComputeError(format!(
-                        "{} overflow on abs({})",
-                        stringify!($ARRAY_TYPE),
-                        x
-                    ))
-                })
-            })?;
-            Ok(Arc::new(res) as ArrayRef)
-        }
-    }};
-}
-
-macro_rules! make_decimal_abs_function {
-    ($ARRAY_TYPE:ident) => {{
-        |args: &[ArrayRef]| {
-            let array = downcast_arg!(&args[0], "abs arg", $ARRAY_TYPE);
-            let res: $ARRAY_TYPE = array
-                .unary(|x| x.wrapping_abs())
-                .with_data_type(args[0].data_type().clone());
-            Ok(Arc::new(res) as ArrayRef)
-        }
-    }};
-}
-
-/// Abs SQL function
-/// Return different implementations based on input datatype to reduce branches during execution
-pub(super) fn create_abs_function(
-    input_data_type: &DataType,
-) -> Result<MathArrayFunction> {
-    match input_data_type {
-        DataType::Float32 => Ok(make_abs_function!(Float32Array)),
-        DataType::Float64 => Ok(make_abs_function!(Float64Array)),
-
-        // Types that may overflow, such as abs(-128_i8).
-        DataType::Int8 => Ok(make_try_abs_function!(Int8Array)),
-        DataType::Int16 => Ok(make_try_abs_function!(Int16Array)),
-        DataType::Int32 => Ok(make_try_abs_function!(Int32Array)),
-        DataType::Int64 => Ok(make_try_abs_function!(Int64Array)),
-
-        // Types of results are the same as the input.
-        DataType::Null
-        | DataType::UInt8
-        | DataType::UInt16
-        | DataType::UInt32
-        | DataType::UInt64 => Ok(|args: &[ArrayRef]| Ok(args[0].clone())),
-
-        // Decimal types
-        DataType::Decimal128(_, _) => Ok(make_decimal_abs_function!(Decimal128Array)),
-        DataType::Decimal256(_, _) => Ok(make_decimal_abs_function!(Decimal256Array)),
-
-        other => not_impl_err!("Unsupported data type {other:?} for function abs"),
-    }
-}
-
-/// abs() SQL function implementation
-pub fn abs_invoke(args: &[ArrayRef]) -> Result<ArrayRef> {
-    if args.len() != 1 {
-        return internal_err!("abs function requires 1 argument, got {}", args.len());
-    }
-
-    let input_data_type = args[0].data_type();
-    let abs_fun = create_abs_function(input_data_type)?;
-
-    abs_fun(args)
-}
-
 #[cfg(test)]
 mod tests {
 
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 7673ce86ae1d..d91373f8f8d2 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -545,7 +545,9 @@ message InListNode {
 }
 
 enum ScalarFunction {
-  Abs = 0;
+  //  0 was Abs before
+  //  The first enum value must be zero for open enums
+  unknown = 0;
   Acos = 1;
   Asin = 2;
   Atan = 3;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 65483f9ac467..964b8890184c 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22321,7 +22321,7 @@ impl serde::Serialize for ScalarFunction {
         S: serde::Serializer,
     {
         let variant = match self {
-            Self::Abs => "Abs",
+            Self::Unknown => "unknown",
             Self::Acos => "Acos",
             Self::Asin => "Asin",
             Self::Atan => "Atan",
@@ -22464,7 +22464,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
         D: serde::Deserializer<'de>,
     {
         const FIELDS: &[&str] = &[
-            "Abs",
+            "unknown",
             "Acos",
             "Asin",
             "Atan",
@@ -22636,7 +22636,7 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                 E: serde::de::Error,
             {
                 match value {
-                    "Abs" => Ok(ScalarFunction::Abs),
+                    "unknown" => Ok(ScalarFunction::Unknown),
                     "Acos" => Ok(ScalarFunction::Acos),
                     "Asin" => Ok(ScalarFunction::Asin),
                     "Atan" => Ok(ScalarFunction::Atan),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index a567269e3356..292aef4402a2 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2633,7 +2633,9 @@ impl JoinConstraint {
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
 #[repr(i32)]
 pub enum ScalarFunction {
-    Abs = 0,
+    ///   0 was Abs before
+    ///   The first enum value must be zero for open enums
+    Unknown = 0,
     Acos = 1,
     Asin = 2,
     Atan = 3,
@@ -2776,7 +2778,7 @@ impl ScalarFunction {
     /// (if the ProtoBuf definition does not change) and safe for programmatic use.
     pub fn as_str_name(&self) -> &'static str {
         match self {
-            ScalarFunction::Abs => "Abs",
+            ScalarFunction::Unknown => "unknown",
             ScalarFunction::Acos => "Acos",
             ScalarFunction::Asin => "Asin",
             ScalarFunction::Atan => "Atan",
@@ -2913,7 +2915,7 @@ impl ScalarFunction {
     /// Creates an enum from field names used in the ProtoBuf definition.
     pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
         match value {
-            "Abs" => Some(Self::Abs),
+            "unknown" => Some(Self::Unknown),
             "Acos" => Some(Self::Acos),
             "Asin" => Some(Self::Asin),
             "Atan" => Some(Self::Atan),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 2554018a9273..69114fd74595 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -47,7 +47,7 @@ use datafusion_common::{
 use datafusion_expr::expr::Unnest;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
-    abs, acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
+    acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
     array_element, array_empty, array_except, array_has, array_has_all, array_has_any,
     array_intersect, array_length, array_ndims, array_pop_back, array_pop_front,
     array_position, array_positions, array_prepend, array_remove, array_remove_all,
@@ -442,6 +442,7 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
     fn from(f: &protobuf::ScalarFunction) -> Self {
         use protobuf::ScalarFunction;
         match f {
+            ScalarFunction::Unknown => todo!(),
             ScalarFunction::Sqrt => Self::Sqrt,
             ScalarFunction::Cbrt => Self::Cbrt,
             ScalarFunction::Sin => Self::Sin,
@@ -470,7 +471,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Ceil => Self::Ceil,
             ScalarFunction::Round => Self::Round,
             ScalarFunction::Trunc => Self::Trunc,
-            ScalarFunction::Abs => Self::Abs,
             ScalarFunction::OctetLength => Self::OctetLength,
             ScalarFunction::Concat => Self::Concat,
             ScalarFunction::Lower => Self::Lower,
@@ -1360,6 +1360,7 @@ pub fn parse_expr(
             let args = &expr.args;
 
             match scalar_function {
+                ScalarFunction::Unknown => Err(proto_error("Unknown scalar function")),
                 ScalarFunction::Asin => Ok(asin(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Acos => Ok(acos(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Asinh => Ok(asinh(parse_expr(&args[0], registry)?)),
@@ -1537,7 +1538,6 @@ pub fn parse_expr(
                         .map(|expr| parse_expr(expr, registry))
                         .collect::<Result<Vec<_>, _>>()?,
                 )),
-                ScalarFunction::Abs => Ok(abs(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Signum => Ok(signum(parse_expr(&args[0], registry)?)),
                 ScalarFunction::OctetLength => {
                     Ok(octet_length(parse_expr(&args[0], registry)?))
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index ccadbb217a58..9603df209ce4 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1450,7 +1450,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Ceil => Self::Ceil,
             BuiltinScalarFunction::Round => Self::Round,
             BuiltinScalarFunction::Trunc => Self::Trunc,
-            BuiltinScalarFunction::Abs => Self::Abs,
             BuiltinScalarFunction::OctetLength => Self::OctetLength,
             BuiltinScalarFunction::Concat => Self::Concat,
             BuiltinScalarFunction::Lower => Self::Lower,

From 07a438d72b7e962fed92cb041f3063187e3cea29 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Tue, 27 Feb 2024 09:22:49 +0300
Subject: [PATCH 23/45] Address reviews

---
 datafusion-examples/examples/custom_datasource.rs        | 1 +
 .../core/src/datasource/physical_plan/arrow_file.rs      | 1 +
 datafusion/core/src/datasource/physical_plan/avro.rs     | 1 +
 datafusion/core/src/datasource/physical_plan/csv.rs      | 1 +
 datafusion/core/src/datasource/physical_plan/json.rs     | 1 +
 .../core/src/datasource/physical_plan/parquet/mod.rs     | 1 +
 .../core/src/physical_optimizer/enforce_distribution.rs  | 1 +
 .../core/src/physical_optimizer/output_requirements.rs   | 1 +
 datafusion/core/src/physical_planner.rs                  | 1 +
 datafusion/core/src/test/mod.rs                          | 1 +
 datafusion/core/src/test_util/mod.rs                     | 3 ++-
 datafusion/core/tests/custom_sources.rs                  | 1 +
 .../custom_sources_cases/provider_filter_pushdown.rs     | 1 +
 datafusion/core/tests/custom_sources_cases/statistics.rs | 1 +
 datafusion/core/tests/user_defined/user_defined_plan.rs  | 3 ++-
 datafusion/physical-plan/src/aggregates/mod.rs           | 2 ++
 datafusion/physical-plan/src/analyze.rs                  | 1 +
 datafusion/physical-plan/src/coalesce_batches.rs         | 1 +
 datafusion/physical-plan/src/coalesce_partitions.rs      | 1 +
 datafusion/physical-plan/src/empty.rs                    | 1 +
 datafusion/physical-plan/src/explain.rs                  | 1 +
 datafusion/physical-plan/src/filter.rs                   | 2 ++
 datafusion/physical-plan/src/joins/cross_join.rs         | 1 +
 datafusion/physical-plan/src/joins/hash_join.rs          | 4 ++--
 datafusion/physical-plan/src/joins/nested_loop_join.rs   | 1 +
 datafusion/physical-plan/src/joins/sort_merge_join.rs    | 1 +
 .../physical-plan/src/joins/symmetric_hash_join.rs       | 1 +
 datafusion/physical-plan/src/limit.rs                    | 2 ++
 datafusion/physical-plan/src/memory.rs                   | 1 +
 datafusion/physical-plan/src/placeholder_row.rs          | 1 +
 datafusion/physical-plan/src/projection.rs               | 1 +
 datafusion/physical-plan/src/recursive_query.rs          | 1 +
 datafusion/physical-plan/src/repartition/mod.rs          | 9 +++------
 datafusion/physical-plan/src/sorts/partial_sort.rs       | 1 +
 datafusion/physical-plan/src/sorts/sort.rs               | 1 +
 .../physical-plan/src/sorts/sort_preserving_merge.rs     | 1 +
 datafusion/physical-plan/src/streaming.rs                | 1 +
 datafusion/physical-plan/src/test/exec.rs                | 6 ++++++
 datafusion/physical-plan/src/union.rs                    | 2 ++
 datafusion/physical-plan/src/unnest.rs                   | 1 +
 datafusion/physical-plan/src/values.rs                   | 1 +
 .../physical-plan/src/windows/bounded_window_agg_exec.rs | 1 +
 datafusion/physical-plan/src/windows/window_agg_exec.rs  | 1 +
 datafusion/physical-plan/src/work_table.rs               | 1 +
 44 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
index d3cd66b2c9bc..1ce3ced0e1c4 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -208,6 +208,7 @@ impl CustomExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 24e825a6920b..8eebc2b68f8b 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -84,6 +84,7 @@ impl ArrowExec {
         Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         projected_output_ordering: &[LexOrdering],
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 6b6e7bce90c1..9d65a0ce089a 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -69,6 +69,7 @@ impl AvroExec {
         &self.base_config
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 19281bc3c189..964f40b8e002 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -121,6 +121,7 @@ impl CsvExec {
         Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 6e17e58d8444..b27bcdaa917c 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -89,6 +89,7 @@ impl NdJsonExec {
         Partitioning::UnknownPartitioning(file_scan_config.file_groups.len())
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 810a84646c86..e2ad1980d422 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -266,6 +266,7 @@ impl ParquetExec {
         Partitioning::UnknownPartitioning(file_config.file_groups.len())
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index a5ad2d546d41..86a490278b0e 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1360,6 +1360,7 @@ pub(crate) mod tests {
             }
         }
 
+        /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
         fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
             PlanPropertiesCache::new(
                 input.equivalence_properties().clone(), // Equivalence Properties
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index a806580ce716..129ae538808f 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -112,6 +112,7 @@ impl OutputRequirementExec {
         self.input.clone()
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         PlanPropertiesCache::new(
             input.equivalence_properties().clone(), // Equivalence Properties
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index e571bc76f4d5..8049c3940a1a 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -2584,6 +2584,7 @@ mod tests {
             Self { cache }
         }
 
+        /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
         fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
             let eq_properties = EquivalenceProperties::new(schema);
             PlanPropertiesCache::new(
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index f8eb67cfdaf5..e5d8f6ebda32 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -384,6 +384,7 @@ impl StatisticsExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index dda6d730ce84..55a30b07d893 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -46,6 +46,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::TableReference;
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
+use datafusion_physical_expr::EquivalenceProperties;
 
 use async_trait::async_trait;
 use futures::Stream;
@@ -55,7 +56,6 @@ use tempfile::TempDir;
 #[cfg(feature = "parquet")]
 pub use datafusion_common::test_util::parquet_test_data;
 pub use datafusion_common::test_util::{arrow_test_data, get_data_dir};
-use datafusion_physical_expr::EquivalenceProperties;
 
 /// Scan an empty data source, mainly used in tests
 pub fn scan_empty(
@@ -246,6 +246,7 @@ impl UnboundedExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         batch_produce: Option<usize>,
diff --git a/datafusion/core/tests/custom_sources.rs b/datafusion/core/tests/custom_sources.rs
index 11f29192276c..f62a3f723ad7 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -86,6 +86,7 @@ impl CustomExecutionPlan {
         Self { projection, cache }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index da00effa00a8..dec2deb10cbb 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -67,6 +67,7 @@ impl CustomPlan {
         Self { batches, cache }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs
index 37854908f021..e98781aae9bf 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -61,6 +61,7 @@ impl StatisticsValidation {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index f2b6f6c93615..5f01334a4757 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -81,6 +81,7 @@ use datafusion::{
         UserDefinedLogicalNodeCore,
     },
     optimizer::{optimize_children, OptimizerConfig, OptimizerRule},
+    physical_expr::EquivalenceProperties,
     physical_plan::{
         DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan,
         Partitioning, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
@@ -91,7 +92,6 @@ use datafusion::{
 };
 
 use async_trait::async_trait;
-use datafusion_physical_expr::EquivalenceProperties;
 use futures::{Stream, StreamExt};
 
 /// Execute the specified sql and return the resulting record batches
@@ -421,6 +421,7 @@ impl TopKExec {
         Self { input, k, cache }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index fa5b65e40123..35e42b8a4d36 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -506,6 +506,7 @@ impl AggregateExec {
         true
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
@@ -1629,6 +1630,7 @@ mod tests {
             Self { yield_first, cache }
         }
 
+        /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
         fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
             let eq_properties = EquivalenceProperties::new(schema);
             PlanPropertiesCache::new(
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 731f3e3c7ebf..94dd6ff3bd28 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -81,6 +81,7 @@ impl AnalyzeExec {
         &self.input
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index e83bce0664a3..bce48698a558 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -73,6 +73,7 @@ impl CoalesceBatchesExec {
         self.target_batch_size
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         // The coalesce batches operator does not make any changes to the
         // partitioning of its input.
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 27f58c9bfd85..ad1094cee0e1 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -57,6 +57,7 @@ impl CoalescePartitionsExec {
         &self.input
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         // Coalescing partitions loses existing orderings:
         let mut eq_properties = input.equivalence_properties().clone();
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index 942bee81f472..0705c4b4eca7 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -72,6 +72,7 @@ impl EmptyExec {
         Partitioning::UnknownPartitioning(n_partitions)
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         let output_partitioning = Self::output_partitioning_helper(n_partitions);
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index 689ef32aa1a9..200ba0bd07c5 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -72,6 +72,7 @@ impl ExplainExec {
         self.verbose
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index d6942f0d5678..86502039c8ba 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -115,6 +115,7 @@ impl FilterExec {
         self.default_selectivity
     }
 
+    /// Calculates `Statistics` for `FilterExec`, by applying selectivity (either default, or estimated) to input statistics.
     fn statistics_helper(
         input: &Arc<dyn ExecutionPlan>,
         predicate: &Arc<dyn PhysicalExpr>,
@@ -157,6 +158,7 @@ impl FilterExec {
         })
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         predicate: &Arc<dyn PhysicalExpr>,
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 42758e635060..3f6b6ba5e2b6 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -98,6 +98,7 @@ impl CrossJoinExec {
         &self.right
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 2b88ec449a04..4b010e8c60c6 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -37,7 +37,7 @@ use crate::{
         check_join_is_valid, estimate_join_statistics, get_final_indices_from_bit_map,
         need_produce_result_in_final, partitioned_join_output_partitioning,
         BuildProbeJoinMetrics, ColumnIndex, JoinFilter, JoinHashMap, JoinHashMapOffset,
-        JoinHashMapType, JoinOn, StatefulStreamResult,
+        JoinHashMapType, JoinOn, JoinOnRef, StatefulStreamResult,
     },
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan,
@@ -65,7 +65,6 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
 use datafusion_physical_expr::PhysicalExprRef;
 
-use crate::joins::utils::JoinOnRef;
 use ahash::RandomState;
 use futures::{ready, Stream, StreamExt, TryStreamExt};
 
@@ -406,6 +405,7 @@ impl HashJoinExec {
         JoinSide::Right
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 89beac14816d..bbfc4c12f548 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -144,6 +144,7 @@ impl NestedLoopJoinExec {
         &self.join_type
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index f7d754a99e0e..20bae468cc4a 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -200,6 +200,7 @@ impl SortMergeJoinExec {
         self.left.as_ref()
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 4e07b10dd517..3eff026a176f 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -252,6 +252,7 @@ impl SymmetricHashJoinExec {
         })
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index a4b924d71066..e678360dd471 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -82,6 +82,7 @@ impl GlobalLimitExec {
         self.fetch
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         PlanPropertiesCache::new(
             input.equivalence_properties().clone(), // Equivalence Properties
@@ -292,6 +293,7 @@ impl LocalLimitExec {
         self.fetch
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         PlanPropertiesCache::new(
             input.equivalence_properties().clone(), // Equivalence Properties
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index f6039ee8b3ed..8bd4db0bd418 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -204,6 +204,7 @@ impl MemoryExec {
         self.schema.clone()
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index 9d4204ddb589..3280522e152c 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -94,6 +94,7 @@ impl PlaceholderRowExec {
         Partitioning::UnknownPartitioning(n_partitions)
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         // Get output partitioning:
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 7420cf58b5ce..2ed8095f256c 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -113,6 +113,7 @@ impl ProjectionExec {
         &self.input
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         projection_mapping: &ProjectionMapping,
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index adc675ba2730..fd0d506e2ce4 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -94,6 +94,7 @@ impl RecursiveQueryExec {
         })
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index dc1e88f52e56..b9489bd12e64 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -616,12 +616,8 @@ impl RepartitionExec {
         input: &Arc<dyn ExecutionPlan>,
         preserve_order: bool,
     ) -> Vec<bool> {
-        if preserve_order {
-            vec![true]
-        } else {
-            // We preserve ordering when input partitioning is 1
-            vec![input.output_partitioning().partition_count() <= 1]
-        }
+        // We preserve ordering when repartition is order preserving variant or input partitioning is 1
+        vec![preserve_order || input.output_partitioning().partition_count() <= 1]
     }
 
     fn eq_properties_helper(
@@ -637,6 +633,7 @@ impl RepartitionExec {
         eq_properties
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         partitioning: Partitioning,
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 16c4bc8601b6..095245a706ea 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -180,6 +180,7 @@ impl PartialSortExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         sort_exprs: LexOrdering,
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index a74705dd32ab..713ff86a5072 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -786,6 +786,7 @@ impl SortExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         sort_exprs: LexOrdering,
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index c07ae72d5492..16bdecd0f384 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -110,6 +110,7 @@ impl SortPreservingMergeExec {
         self.fetch
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
         PlanPropertiesCache::new(
             input.equivalence_properties().clone(), // Equivalence Properties
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index e95fd37ab1b2..90e8600d78e5 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -127,6 +127,7 @@ impl StreamingTableExec {
         self.infinite
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         orderings: &[LexOrdering],
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index a677907295a7..a70e05809923 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -150,6 +150,7 @@ impl MockExec {
         self
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
@@ -305,6 +306,7 @@ impl BarrierExec {
         println!("BarrierExec::wait done waiting");
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef, data: &[Vec<RecordBatch>]) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
         PlanPropertiesCache::new(
@@ -412,6 +414,7 @@ impl ErrorExec {
         Self { cache }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
@@ -489,6 +492,7 @@ impl StatisticsExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
@@ -585,6 +589,7 @@ impl BlockingExec {
         Arc::downgrade(&self.refs)
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
@@ -721,6 +726,7 @@ impl PanicExec {
         self
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         batches_until_panics: &[usize],
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 06a870123255..4d5377a9bdcc 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -111,6 +111,7 @@ impl UnionExec {
         &self.inputs
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         inputs: &[Arc<dyn ExecutionPlan>],
         schema: SchemaRef,
@@ -340,6 +341,7 @@ impl InterleaveExec {
         &self.inputs
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(inputs: &[Arc<dyn ExecutionPlan>]) -> PlanPropertiesCache {
         let schema = union_schema(inputs);
         let eq_properties = EquivalenceProperties::new(schema);
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index ba90e8b4f1fc..d727091fd1c3 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -82,6 +82,7 @@ impl UnnestExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index 20c8eddce6bd..f31272879279 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -127,6 +127,7 @@ impl ValuesExec {
         self.data.clone()
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index a9dfc9bfeedd..d7579cdc041d 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -182,6 +182,7 @@ impl BoundedWindowAggExec {
         })
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         input: &Arc<dyn ExecutionPlan>,
         schema: &SchemaRef,
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 852698bafe3a..f143d228f381 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -116,6 +116,7 @@ impl WindowAggExec {
         )
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(
         schema: SchemaRef,
         input: &Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index 9a0b5daf27e4..44a42a4fcf92 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -111,6 +111,7 @@ impl WorkTableExec {
         }
     }
 
+    /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
     fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
         let eq_properties = EquivalenceProperties::new(schema);
 

From 8f3d1ef23f93cd4303745eba76c0850b39774d07 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Tue, 27 Feb 2024 11:23:53 +0100
Subject: [PATCH 24/45] refactor: `SchemaProvider::table` can fail (#9307)

---
 datafusion-cli/src/catalog.rs                 | 43 ++++++++++--------
 .../examples/external_dependency/catalog.rs   |  4 +-
 .../core/src/catalog/information_schema.rs    | 45 +++++++++++++------
 datafusion/core/src/catalog/listing_schema.rs | 10 +++--
 datafusion/core/src/catalog/schema.rs         | 12 +++--
 datafusion/core/src/execution/context/mod.rs  |  6 +--
 datafusion/core/src/physical_planner.rs       |  4 +-
 datafusion/core/tests/sql/create_drop.rs      |  2 +-
 8 files changed, 80 insertions(+), 46 deletions(-)

diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index f664d40df5db..67184b8257b8 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -19,6 +19,7 @@ use crate::object_storage::get_object_store;
 use async_trait::async_trait;
 use datafusion::catalog::schema::SchemaProvider;
 use datafusion::catalog::{CatalogProvider, CatalogProviderList};
+use datafusion::common::{plan_datafusion_err, DataFusionError};
 use datafusion::datasource::listing::{
     ListingTable, ListingTableConfig, ListingTableUrl,
 };
@@ -145,16 +146,21 @@ impl SchemaProvider for DynamicFileSchemaProvider {
         self.inner.register_table(name, table)
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        let inner_table = self.inner.table(name).await;
+    async fn table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
+        let inner_table = self.inner.table(name).await?;
         if inner_table.is_some() {
-            return inner_table;
+            return Ok(inner_table);
         }
 
         // if the inner schema provider didn't have a table by
         // that name, try to treat it as a listing table
-        let state = self.state.upgrade()?.read().clone();
-        let table_url = ListingTableUrl::parse(name).ok()?;
+        let state = self
+            .state
+            .upgrade()
+            .ok_or_else(|| plan_datafusion_err!("locking error"))?
+            .read()
+            .clone();
+        let table_url = ListingTableUrl::parse(name)?;
         let url: &Url = table_url.as_ref();
 
         // If the store is already registered for this URL then `get_store`
@@ -169,18 +175,20 @@ impl SchemaProvider for DynamicFileSchemaProvider {
                 let mut options = HashMap::new();
                 let store =
                     get_object_store(&state, &mut options, table_url.scheme(), url)
-                        .await
-                        .unwrap();
+                        .await?;
                 state.runtime_env().register_object_store(url, store);
             }
         }
 
-        let config = ListingTableConfig::new(table_url)
-            .infer(&state)
-            .await
-            .ok()?;
+        let config = match ListingTableConfig::new(table_url).infer(&state).await {
+            Ok(cfg) => cfg,
+            Err(_) => {
+                // treat as non-existing
+                return Ok(None);
+            }
+        };
 
-        Some(Arc::new(ListingTable::try_new(config).ok()?))
+        Ok(Some(Arc::new(ListingTable::try_new(config)?)))
     }
 
     fn deregister_table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
@@ -227,7 +235,7 @@ mod tests {
         let (ctx, schema) = setup_context();
 
         // That's a non registered table so expecting None here
-        let table = schema.table(&location).await;
+        let table = schema.table(&location).await.unwrap();
         assert!(table.is_none());
 
         // It should still create an object store for the location in the SessionState
@@ -251,7 +259,7 @@ mod tests {
 
         let (ctx, schema) = setup_context();
 
-        let table = schema.table(&location).await;
+        let table = schema.table(&location).await.unwrap();
         assert!(table.is_none());
 
         let store = ctx
@@ -273,7 +281,7 @@ mod tests {
 
         let (ctx, schema) = setup_context();
 
-        let table = schema.table(&location).await;
+        let table = schema.table(&location).await.unwrap();
         assert!(table.is_none());
 
         let store = ctx
@@ -289,13 +297,10 @@ mod tests {
     }
 
     #[tokio::test]
-    #[should_panic]
     async fn query_invalid_location_test() {
         let location = "ts://file.parquet";
         let (_ctx, schema) = setup_context();
 
-        // This will panic, we cannot prevent that because `schema.table`
-        // returns an Option
-        schema.table(location).await;
+        assert!(schema.table(location).await.is_err());
     }
 }
diff --git a/datafusion-examples/examples/external_dependency/catalog.rs b/datafusion-examples/examples/external_dependency/catalog.rs
index a623eafdf3d7..28a720cc33a9 100644
--- a/datafusion-examples/examples/external_dependency/catalog.rs
+++ b/datafusion-examples/examples/external_dependency/catalog.rs
@@ -180,9 +180,9 @@ impl SchemaProvider for DirSchema {
         tables.keys().cloned().collect::<Vec<_>>()
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
+    async fn table(&self, name: &str) -> Result<Option<Arc<dyn TableProvider>>> {
         let tables = self.tables.read().unwrap();
-        tables.get(name).cloned()
+        Ok(tables.get(name).cloned())
     }
 
     fn table_exist(&self, name: &str) -> bool {
diff --git a/datafusion/core/src/catalog/information_schema.rs b/datafusion/core/src/catalog/information_schema.rs
index 80ce3b1ae419..cd8f7649534f 100644
--- a/datafusion/core/src/catalog/information_schema.rs
+++ b/datafusion/core/src/catalog/information_schema.rs
@@ -20,6 +20,7 @@
 //! [Information Schema]: https://en.wikipedia.org/wiki/Information_schema
 
 use async_trait::async_trait;
+use datafusion_common::DataFusionError;
 use std::{any::Any, sync::Arc};
 
 use arrow::{
@@ -78,7 +79,10 @@ struct InformationSchemaConfig {
 
 impl InformationSchemaConfig {
     /// Construct the `information_schema.tables` virtual table
-    async fn make_tables(&self, builder: &mut InformationSchemaTablesBuilder) {
+    async fn make_tables(
+        &self,
+        builder: &mut InformationSchemaTablesBuilder,
+    ) -> Result<(), DataFusionError> {
         // create a mem table with the names of tables
 
         for catalog_name in self.catalog_list.catalog_names() {
@@ -89,7 +93,7 @@ impl InformationSchemaConfig {
                     // schema name may not exist in the catalog, so we need to check
                     if let Some(schema) = catalog.schema(&schema_name) {
                         for table_name in schema.table_names() {
-                            if let Some(table) = schema.table(&table_name).await {
+                            if let Some(table) = schema.table(&table_name).await? {
                                 builder.add_table(
                                     &catalog_name,
                                     &schema_name,
@@ -124,6 +128,8 @@ impl InformationSchemaConfig {
                 TableType::View,
             );
         }
+
+        Ok(())
     }
 
     async fn make_schemata(&self, builder: &mut InformationSchemataBuilder) {
@@ -141,7 +147,10 @@ impl InformationSchemaConfig {
         }
     }
 
-    async fn make_views(&self, builder: &mut InformationSchemaViewBuilder) {
+    async fn make_views(
+        &self,
+        builder: &mut InformationSchemaViewBuilder,
+    ) -> Result<(), DataFusionError> {
         for catalog_name in self.catalog_list.catalog_names() {
             let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
 
@@ -150,7 +159,7 @@ impl InformationSchemaConfig {
                     // schema name may not exist in the catalog, so we need to check
                     if let Some(schema) = catalog.schema(&schema_name) {
                         for table_name in schema.table_names() {
-                            if let Some(table) = schema.table(&table_name).await {
+                            if let Some(table) = schema.table(&table_name).await? {
                                 builder.add_view(
                                     &catalog_name,
                                     &schema_name,
@@ -163,10 +172,15 @@ impl InformationSchemaConfig {
                 }
             }
         }
+
+        Ok(())
     }
 
     /// Construct the `information_schema.columns` virtual table
-    async fn make_columns(&self, builder: &mut InformationSchemaColumnsBuilder) {
+    async fn make_columns(
+        &self,
+        builder: &mut InformationSchemaColumnsBuilder,
+    ) -> Result<(), DataFusionError> {
         for catalog_name in self.catalog_list.catalog_names() {
             let catalog = self.catalog_list.catalog(&catalog_name).unwrap();
 
@@ -175,7 +189,7 @@ impl InformationSchemaConfig {
                     // schema name may not exist in the catalog, so we need to check
                     if let Some(schema) = catalog.schema(&schema_name) {
                         for table_name in schema.table_names() {
-                            if let Some(table) = schema.table(&table_name).await {
+                            if let Some(table) = schema.table(&table_name).await? {
                                 for (field_position, field) in
                                     table.schema().fields().iter().enumerate()
                                 {
@@ -193,6 +207,8 @@ impl InformationSchemaConfig {
                 }
             }
         }
+
+        Ok(())
     }
 
     /// Construct the `information_schema.df_settings` virtual table
@@ -223,7 +239,10 @@ impl SchemaProvider for InformationSchemaProvider {
         ]
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
+    async fn table(
+        &self,
+        name: &str,
+    ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError> {
         let config = self.config.clone();
         let table: Arc<dyn PartitionStream> = if name.eq_ignore_ascii_case("tables") {
             Arc::new(InformationSchemaTables::new(config))
@@ -236,12 +255,12 @@ impl SchemaProvider for InformationSchemaProvider {
         } else if name.eq_ignore_ascii_case("schemata") {
             Arc::new(InformationSchemata::new(config))
         } else {
-            return None;
+            return Ok(None);
         };
 
-        Some(Arc::new(
+        Ok(Some(Arc::new(
             StreamingTable::try_new(table.schema().clone(), vec![table]).unwrap(),
-        ))
+        )))
     }
 
     fn table_exist(&self, name: &str) -> bool {
@@ -292,7 +311,7 @@ impl PartitionStream for InformationSchemaTables {
             self.schema.clone(),
             // TODO: Stream this
             futures::stream::once(async move {
-                config.make_tables(&mut builder).await;
+                config.make_tables(&mut builder).await?;
                 Ok(builder.finish())
             }),
         ))
@@ -383,7 +402,7 @@ impl PartitionStream for InformationSchemaViews {
             self.schema.clone(),
             // TODO: Stream this
             futures::stream::once(async move {
-                config.make_views(&mut builder).await;
+                config.make_views(&mut builder).await?;
                 Ok(builder.finish())
             }),
         ))
@@ -497,7 +516,7 @@ impl PartitionStream for InformationSchemaColumns {
             self.schema.clone(),
             // TODO: Stream this
             futures::stream::once(async move {
-                config.make_columns(&mut builder).await;
+                config.make_columns(&mut builder).await?;
                 Ok(builder.finish())
             }),
         ))
diff --git a/datafusion/core/src/catalog/listing_schema.rs b/datafusion/core/src/catalog/listing_schema.rs
index c3c682689542..f64b43062d2f 100644
--- a/datafusion/core/src/catalog/listing_schema.rs
+++ b/datafusion/core/src/catalog/listing_schema.rs
@@ -175,12 +175,16 @@ impl SchemaProvider for ListingSchemaProvider {
             .collect()
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        self.tables
+    async fn table(
+        &self,
+        name: &str,
+    ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError> {
+        Ok(self
+            .tables
             .lock()
             .expect("Can't lock tables")
             .get(name)
-            .cloned()
+            .cloned())
     }
 
     fn register_table(
diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/core/src/catalog/schema.rs
index 1e9a86b49611..49f8350ecc5b 100644
--- a/datafusion/core/src/catalog/schema.rs
+++ b/datafusion/core/src/catalog/schema.rs
@@ -49,7 +49,10 @@ pub trait SchemaProvider: Sync + Send {
 
     /// Retrieves a specific table from the schema by name, if it exists,
     /// otherwise returns `None`.
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>>;
+    async fn table(
+        &self,
+        name: &str,
+    ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError>;
 
     /// If supported by the implementation, adds a new table named `name` to
     /// this schema.
@@ -111,8 +114,11 @@ impl SchemaProvider for MemorySchemaProvider {
             .collect()
     }
 
-    async fn table(&self, name: &str) -> Option<Arc<dyn TableProvider>> {
-        self.tables.get(name).map(|table| table.value().clone())
+    async fn table(
+        &self,
+        name: &str,
+    ) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError> {
+        Ok(self.tables.get(name).map(|table| table.value().clone()))
     }
 
     fn register_table(
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index b130070141b2..ffc4a4f717d7 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -783,7 +783,7 @@ impl SessionContext {
         };
 
         if let Some(schema) = maybe_schema {
-            if let Some(table_provider) = schema.table(&table).await {
+            if let Some(table_provider) = schema.table(&table).await? {
                 if table_provider.table_type() == table_type {
                     schema.deregister_table(&table)?;
                     return Ok(true);
@@ -1115,7 +1115,7 @@ impl SessionContext {
         let table_ref = table_ref.into();
         let table = table_ref.table().to_string();
         let schema = self.state.read().schema_for_ref(table_ref)?;
-        match schema.table(&table).await {
+        match schema.table(&table).await? {
             Some(ref provider) => Ok(Arc::clone(provider)),
             _ => plan_err!("No table named '{table}'"),
         }
@@ -1714,7 +1714,7 @@ impl SessionState {
             let resolved = self.resolve_table_ref(&reference);
             if let Entry::Vacant(v) = provider.tables.entry(resolved.to_string()) {
                 if let Ok(schema) = self.schema_for_ref(resolved) {
-                    if let Some(table) = schema.table(table).await {
+                    if let Some(table) = schema.table(table).await? {
                         v.insert(provider_as_source(table));
                     }
                 }
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 23ac7e08cad8..83ba773464f6 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -624,7 +624,7 @@ impl DefaultPhysicalPlanner {
                 }) => {
                     let name = table_name.table();
                     let schema = session_state.schema_for_ref(table_name)?;
-                    if let Some(provider) = schema.table(name).await {
+                    if let Some(provider) = schema.table(name).await? {
                         let input_exec = self.create_initial_plan(input, session_state).await?;
                         provider.insert_into(session_state, input_exec, false).await
                     } else {
@@ -641,7 +641,7 @@ impl DefaultPhysicalPlanner {
                 }) => {
                     let name = table_name.table();
                     let schema = session_state.schema_for_ref(table_name)?;
-                    if let Some(provider) = schema.table(name).await {
+                    if let Some(provider) = schema.table(name).await? {
                         let input_exec = self.create_initial_plan(input, session_state).await?;
                         provider.insert_into(session_state, input_exec, true).await
                     } else {
diff --git a/datafusion/core/tests/sql/create_drop.rs b/datafusion/core/tests/sql/create_drop.rs
index b1434dddee50..2174009b8557 100644
--- a/datafusion/core/tests/sql/create_drop.rs
+++ b/datafusion/core/tests/sql/create_drop.rs
@@ -63,7 +63,7 @@ async fn create_external_table_with_ddl() -> Result<()> {
     let exists = schema.table_exist("dt");
     assert!(exists, "Table should have been created!");
 
-    let table_schema = schema.table("dt").await.unwrap().schema();
+    let table_schema = schema.table("dt").await.unwrap().unwrap().schema();
 
     assert_eq!(3, table_schema.fields().len());
 

From 372204e1ed71ee600553e806885cdd7f596e168f Mon Sep 17 00:00:00 2001
From: junxiangMu <63799833+guojidan@users.noreply.github.com>
Date: Tue, 27 Feb 2024 20:40:08 +0800
Subject: [PATCH 25/45] fix write_partitioned_parquet_results bug (#9360)

---
 datafusion/core/tests/dataframe/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index ee842004172c..abe5fd29182e 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1975,7 +1975,7 @@ async fn write_partitioned_parquet_results() -> Result<()> {
         "+-----+-----+",
     ];
 
-    assert_batches_eq!(expected, &results);
+    assert_batches_sorted_eq!(expected, &results);
 
     Ok(())
 }

From 14264d2c3947e432f71bfe0af1a3dbafbb6ee686 Mon Sep 17 00:00:00 2001
From: Artem Medvedev <i@ddtkey.com>
Date: Tue, 27 Feb 2024 14:11:59 +0100
Subject: [PATCH 26/45] fix: use `JoinSet` to make spawned tasks cancel-safe
 (#9318)

* fix: use `JoinSet` to make spawned tasks cancel-safe

* feat: drop `AbortOnDropSingle` and `AbortOnDropMany`

* style: doc lint

* fix: ordering of the tasks in `RepartitionExec`

* fix: replace spawn_blocking with JoinSet

* style: disallow spawn methods

* fixes: preserve ordering of tasks

* style: allow spawning in tests

* chore: exclude clippy.toml from rat

* chore: typo

* feat: introduce `SpawnedTask`

* revert outdated comment

* switch to SpawnedTask missed outdated part

* doc: improve reason for disallowed-method
---
 clippy.toml                                   |  4 +
 datafusion/core/src/dataframe/mod.rs          |  1 +
 .../core/src/datasource/file_format/arrow.rs  |  2 +-
 .../src/datasource/file_format/parquet.rs     | 51 +++++++------
 .../src/datasource/file_format/write/demux.rs | 12 +--
 .../file_format/write/orchestration.rs        | 29 ++++----
 datafusion/core/src/datasource/stream.rs      |  9 +--
 datafusion/core/src/execution/context/mod.rs  |  1 +
 datafusion/core/tests/fifo.rs                 |  2 +
 .../sort_preserving_repartition_fuzz.rs       |  1 +
 .../core/tests/fuzz_cases/window_fuzz.rs      |  1 +
 datafusion/physical-plan/src/common.rs        | 73 ++++++++-----------
 datafusion/physical-plan/src/lib.rs           | 10 +--
 .../physical-plan/src/repartition/mod.rs      | 46 ++++++------
 datafusion/physical-plan/src/sorts/sort.rs    |  7 +-
 datafusion/sqllogictest/bin/sqllogictests.rs  |  1 +
 dev/release/rat_exclude_files.txt             |  3 +-
 17 files changed, 129 insertions(+), 124 deletions(-)
 create mode 100644 clippy.toml

diff --git a/clippy.toml b/clippy.toml
new file mode 100644
index 000000000000..c6c754e440c7
--- /dev/null
+++ b/clippy.toml
@@ -0,0 +1,4 @@
+disallowed-methods = [
+    { path = "tokio::task::spawn", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
+    { path = "tokio::task::spawn_blocking", reason = "To provide cancel-safety, use `SpawnedTask::spawn` instead (https://github.com/apache/arrow-datafusion/issues/6513)" },
+]
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 3a60d57f6685..c04247210d46 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -2172,6 +2172,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(clippy::disallowed_methods)]
     async fn sendable() {
         let df = test_table().await.unwrap();
         // dataframes should be sendable between threads/tasks
diff --git a/datafusion/core/src/datasource/file_format/arrow.rs b/datafusion/core/src/datasource/file_format/arrow.rs
index ead2db5a10c0..d5f07d11bee9 100644
--- a/datafusion/core/src/datasource/file_format/arrow.rs
+++ b/datafusion/core/src/datasource/file_format/arrow.rs
@@ -295,7 +295,7 @@ impl DataSink for ArrowFileSink {
             }
         }
 
-        match demux_task.await {
+        match demux_task.join().await {
             Ok(r) => r?,
             Err(e) => {
                 if e.is_panic() {
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 89ec81630c1b..739850115370 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -32,7 +32,7 @@ use std::fmt::Debug;
 use std::sync::Arc;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
-use tokio::task::{JoinHandle, JoinSet};
+use tokio::task::JoinSet;
 
 use crate::datasource::file_format::file_compression_type::FileCompressionType;
 use crate::datasource::statistics::{create_max_min_accs, get_col_stats};
@@ -42,6 +42,7 @@ use bytes::{BufMut, BytesMut};
 use datafusion_common::{exec_err, not_impl_err, DataFusionError, FileType};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
+use datafusion_physical_plan::common::SpawnedTask;
 use futures::{StreamExt, TryStreamExt};
 use hashbrown::HashMap;
 use object_store::path::Path;
@@ -728,7 +729,7 @@ impl DataSink for ParquetSink {
             }
         }
 
-        match demux_task.await {
+        match demux_task.join().await {
             Ok(r) => r?,
             Err(e) => {
                 if e.is_panic() {
@@ -738,6 +739,7 @@ impl DataSink for ParquetSink {
                 }
             }
         }
+
         Ok(row_count as u64)
     }
 }
@@ -754,8 +756,9 @@ async fn column_serializer_task(
     Ok(writer)
 }
 
-type ColumnJoinHandle = JoinHandle<Result<ArrowColumnWriter>>;
+type ColumnWriterTask = SpawnedTask<Result<ArrowColumnWriter>>;
 type ColSender = Sender<ArrowLeafColumn>;
+
 /// Spawns a parallel serialization task for each column
 /// Returns join handles for each columns serialization task along with a send channel
 /// to send arrow arrays to each serialization task.
@@ -763,23 +766,24 @@ fn spawn_column_parallel_row_group_writer(
     schema: Arc<Schema>,
     parquet_props: Arc<WriterProperties>,
     max_buffer_size: usize,
-) -> Result<(Vec<ColumnJoinHandle>, Vec<ColSender>)> {
+) -> Result<(Vec<ColumnWriterTask>, Vec<ColSender>)> {
     let schema_desc = arrow_to_parquet_schema(&schema)?;
     let col_writers = get_column_writers(&schema_desc, &parquet_props, &schema)?;
     let num_columns = col_writers.len();
 
-    let mut col_writer_handles = Vec::with_capacity(num_columns);
+    let mut col_writer_tasks = Vec::with_capacity(num_columns);
     let mut col_array_channels = Vec::with_capacity(num_columns);
     for writer in col_writers.into_iter() {
         // Buffer size of this channel limits the number of arrays queued up for column level serialization
         let (send_array, recieve_array) =
             mpsc::channel::<ArrowLeafColumn>(max_buffer_size);
         col_array_channels.push(send_array);
-        col_writer_handles
-            .push(tokio::spawn(column_serializer_task(recieve_array, writer)))
+
+        let task = SpawnedTask::spawn(column_serializer_task(recieve_array, writer));
+        col_writer_tasks.push(task);
     }
 
-    Ok((col_writer_handles, col_array_channels))
+    Ok((col_writer_tasks, col_array_channels))
 }
 
 /// Settings related to writing parquet files in parallel
@@ -820,14 +824,14 @@ async fn send_arrays_to_col_writers(
 /// Spawns a tokio task which joins the parallel column writer tasks,
 /// and finalizes the row group
 fn spawn_rg_join_and_finalize_task(
-    column_writer_handles: Vec<JoinHandle<Result<ArrowColumnWriter>>>,
+    column_writer_tasks: Vec<ColumnWriterTask>,
     rg_rows: usize,
-) -> JoinHandle<RBStreamSerializeResult> {
-    tokio::spawn(async move {
-        let num_cols = column_writer_handles.len();
+) -> SpawnedTask<RBStreamSerializeResult> {
+    SpawnedTask::spawn(async move {
+        let num_cols = column_writer_tasks.len();
         let mut finalized_rg = Vec::with_capacity(num_cols);
-        for handle in column_writer_handles.into_iter() {
-            match handle.await {
+        for task in column_writer_tasks.into_iter() {
+            match task.join().await {
                 Ok(r) => {
                     let w = r?;
                     finalized_rg.push(w.close()?);
@@ -856,12 +860,12 @@ fn spawn_rg_join_and_finalize_task(
 /// given by n_columns * num_row_groups.
 fn spawn_parquet_parallel_serialization_task(
     mut data: Receiver<RecordBatch>,
-    serialize_tx: Sender<JoinHandle<RBStreamSerializeResult>>,
+    serialize_tx: Sender<SpawnedTask<RBStreamSerializeResult>>,
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
     parallel_options: ParallelParquetWriterOptions,
-) -> JoinHandle<Result<(), DataFusionError>> {
-    tokio::spawn(async move {
+) -> SpawnedTask<Result<(), DataFusionError>> {
+    SpawnedTask::spawn(async move {
         let max_buffer_rb = parallel_options.max_buffered_record_batches_per_stream;
         let max_row_group_rows = writer_props.max_row_group_size();
         let (mut column_writer_handles, mut col_array_channels) =
@@ -931,7 +935,7 @@ fn spawn_parquet_parallel_serialization_task(
 /// Consume RowGroups serialized by other parallel tasks and concatenate them in
 /// to the final parquet file, while flushing finalized bytes to an [ObjectStore]
 async fn concatenate_parallel_row_groups(
-    mut serialize_rx: Receiver<JoinHandle<RBStreamSerializeResult>>,
+    mut serialize_rx: Receiver<SpawnedTask<RBStreamSerializeResult>>,
     schema: Arc<Schema>,
     writer_props: Arc<WriterProperties>,
     mut object_store_writer: AbortableWrite<Box<dyn AsyncWrite + Send + Unpin>>,
@@ -947,9 +951,8 @@ async fn concatenate_parallel_row_groups(
 
     let mut row_count = 0;
 
-    while let Some(handle) = serialize_rx.recv().await {
-        let join_result = handle.await;
-        match join_result {
+    while let Some(task) = serialize_rx.recv().await {
+        match task.join().await {
             Ok(result) => {
                 let mut rg_out = parquet_writer.next_row_group()?;
                 let (serialized_columns, cnt) = result?;
@@ -999,7 +1002,7 @@ async fn output_single_parquet_file_parallelized(
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
     let (serialize_tx, serialize_rx) =
-        mpsc::channel::<JoinHandle<RBStreamSerializeResult>>(max_rowgroups);
+        mpsc::channel::<SpawnedTask<RBStreamSerializeResult>>(max_rowgroups);
 
     let arc_props = Arc::new(parquet_props.clone());
     let launch_serialization_task = spawn_parquet_parallel_serialization_task(
@@ -1017,7 +1020,7 @@ async fn output_single_parquet_file_parallelized(
     )
     .await?;
 
-    match launch_serialization_task.await {
+    match launch_serialization_task.join().await {
         Ok(Ok(_)) => (),
         Ok(Err(e)) => return Err(e),
         Err(e) => {
@@ -1027,7 +1030,7 @@ async fn output_single_parquet_file_parallelized(
                 unreachable!()
             }
         }
-    };
+    }
 
     Ok(row_count)
 }
diff --git a/datafusion/core/src/datasource/file_format/write/demux.rs b/datafusion/core/src/datasource/file_format/write/demux.rs
index 8bccf3d71cf9..d70b4811da5b 100644
--- a/datafusion/core/src/datasource/file_format/write/demux.rs
+++ b/datafusion/core/src/datasource/file_format/write/demux.rs
@@ -41,8 +41,8 @@ use object_store::path::Path;
 
 use rand::distributions::DistString;
 
+use datafusion_physical_plan::common::SpawnedTask;
 use tokio::sync::mpsc::{self, Receiver, Sender, UnboundedReceiver, UnboundedSender};
-use tokio::task::JoinHandle;
 
 type RecordBatchReceiver = Receiver<RecordBatch>;
 type DemuxedStreamReceiver = UnboundedReceiver<(Path, RecordBatchReceiver)>;
@@ -76,15 +76,15 @@ pub(crate) fn start_demuxer_task(
     partition_by: Option<Vec<(String, DataType)>>,
     base_output_path: ListingTableUrl,
     file_extension: String,
-) -> (JoinHandle<Result<()>>, DemuxedStreamReceiver) {
-    let (tx, rx) = tokio::sync::mpsc::unbounded_channel();
+) -> (SpawnedTask<Result<()>>, DemuxedStreamReceiver) {
+    let (tx, rx) = mpsc::unbounded_channel();
     let context = context.clone();
     let single_file_output = !base_output_path.is_collection();
-    let task: JoinHandle<std::result::Result<(), DataFusionError>> = match partition_by {
+    let task = match partition_by {
         Some(parts) => {
             // There could be an arbitrarily large number of parallel hive style partitions being written to, so we cannot
             // bound this channel without risking a deadlock.
-            tokio::spawn(async move {
+            SpawnedTask::spawn(async move {
                 hive_style_partitions_demuxer(
                     tx,
                     input,
@@ -96,7 +96,7 @@ pub(crate) fn start_demuxer_task(
                 .await
             })
         }
-        None => tokio::spawn(async move {
+        None => SpawnedTask::spawn(async move {
             row_count_demuxer(
                 tx,
                 input,
diff --git a/datafusion/core/src/datasource/file_format/write/orchestration.rs b/datafusion/core/src/datasource/file_format/write/orchestration.rs
index 1a3042cbc00b..05406d3751c9 100644
--- a/datafusion/core/src/datasource/file_format/write/orchestration.rs
+++ b/datafusion/core/src/datasource/file_format/write/orchestration.rs
@@ -33,10 +33,11 @@ use datafusion_common::{internal_datafusion_err, internal_err, DataFusionError};
 use datafusion_execution::TaskContext;
 
 use bytes::Bytes;
+use datafusion_physical_plan::common::SpawnedTask;
+use futures::try_join;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver};
-use tokio::task::{JoinHandle, JoinSet};
-use tokio::try_join;
+use tokio::task::JoinSet;
 
 type WriterType = AbortableWrite<Box<dyn AsyncWrite + Send + Unpin>>;
 type SerializerType = Arc<dyn BatchSerializer>;
@@ -51,14 +52,14 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
     mut writer: AbortableWrite<Box<dyn AsyncWrite + Send + Unpin>>,
 ) -> std::result::Result<(WriterType, u64), (WriterType, DataFusionError)> {
     let (tx, mut rx) =
-        mpsc::channel::<JoinHandle<Result<(usize, Bytes), DataFusionError>>>(100);
-    let serialize_task = tokio::spawn(async move {
+        mpsc::channel::<SpawnedTask<Result<(usize, Bytes), DataFusionError>>>(100);
+    let serialize_task = SpawnedTask::spawn(async move {
         // Some serializers (like CSV) handle the first batch differently than
         // subsequent batches, so we track that here.
         let mut initial = true;
         while let Some(batch) = data_rx.recv().await {
             let serializer_clone = serializer.clone();
-            let handle = tokio::spawn(async move {
+            let task = SpawnedTask::spawn(async move {
                 let num_rows = batch.num_rows();
                 let bytes = serializer_clone.serialize(batch, initial)?;
                 Ok((num_rows, bytes))
@@ -66,7 +67,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
             if initial {
                 initial = false;
             }
-            tx.send(handle).await.map_err(|_| {
+            tx.send(task).await.map_err(|_| {
                 internal_datafusion_err!("Unknown error writing to object store")
             })?;
         }
@@ -74,8 +75,8 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
     });
 
     let mut row_count = 0;
-    while let Some(handle) = rx.recv().await {
-        match handle.await {
+    while let Some(task) = rx.recv().await {
+        match task.join().await {
             Ok(Ok((cnt, bytes))) => {
                 match writer.write_all(&bytes).await {
                     Ok(_) => (),
@@ -106,7 +107,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
         }
     }
 
-    match serialize_task.await {
+    match serialize_task.join().await {
         Ok(Ok(_)) => (),
         Ok(Err(e)) => return Err((writer, e)),
         Err(_) => {
@@ -115,7 +116,7 @@ pub(crate) async fn serialize_rb_stream_to_object_store(
                 internal_datafusion_err!("Unknown error writing to object store"),
             ))
         }
-    };
+    }
     Ok((writer, row_count as u64))
 }
 
@@ -241,9 +242,9 @@ pub(crate) async fn stateless_multipart_put(
         .execution
         .max_buffered_batches_per_output_file;
 
-    let (tx_file_bundle, rx_file_bundle) = tokio::sync::mpsc::channel(rb_buffer_size / 2);
+    let (tx_file_bundle, rx_file_bundle) = mpsc::channel(rb_buffer_size / 2);
     let (tx_row_cnt, rx_row_cnt) = tokio::sync::oneshot::channel();
-    let write_coordinater_task = tokio::spawn(async move {
+    let write_coordinator_task = SpawnedTask::spawn(async move {
         stateless_serialize_and_write_files(rx_file_bundle, tx_row_cnt).await
     });
     while let Some((location, rb_stream)) = file_stream_rx.recv().await {
@@ -260,10 +261,10 @@ pub(crate) async fn stateless_multipart_put(
             })?;
     }
 
-    // Signal to the write coordinater that no more files are coming
+    // Signal to the write coordinator that no more files are coming
     drop(tx_file_bundle);
 
-    match try_join!(write_coordinater_task, demux_task) {
+    match try_join!(write_coordinator_task.join(), demux_task.join()) {
         Ok((r1, r2)) => {
             r1?;
             r2?;
diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs
index 830cd7a07e46..6dc59e4a5c65 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -29,12 +29,11 @@ use arrow_array::{RecordBatch, RecordBatchReader, RecordBatchWriter};
 use arrow_schema::SchemaRef;
 use async_trait::async_trait;
 use futures::StreamExt;
-use tokio::task::spawn_blocking;
 
 use datafusion_common::{plan_err, Constraints, DataFusionError, Result};
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{CreateExternalTable, Expr, TableType};
-use datafusion_physical_plan::common::AbortOnDropSingle;
+use datafusion_physical_plan::common::SpawnedTask;
 use datafusion_physical_plan::insert::{DataSink, FileSinkExec};
 use datafusion_physical_plan::metrics::MetricsSet;
 use datafusion_physical_plan::stream::RecordBatchReceiverStreamBuilder;
@@ -344,7 +343,7 @@ impl DataSink for StreamWrite {
         let config = self.0.clone();
         let (sender, mut receiver) = tokio::sync::mpsc::channel::<RecordBatch>(2);
         // Note: FIFO Files support poll so this could use AsyncFd
-        let write = AbortOnDropSingle::new(spawn_blocking(move || {
+        let write_task = SpawnedTask::spawn_blocking(move || {
             let mut count = 0_u64;
             let mut writer = config.writer()?;
             while let Some(batch) = receiver.blocking_recv() {
@@ -352,7 +351,7 @@ impl DataSink for StreamWrite {
                 writer.write(&batch)?;
             }
             Ok(count)
-        }));
+        });
 
         while let Some(b) = data.next().await.transpose()? {
             if sender.send(b).await.is_err() {
@@ -360,6 +359,6 @@ impl DataSink for StreamWrite {
             }
         }
         drop(sender);
-        write.await.unwrap()
+        write_task.join().await.unwrap()
     }
 }
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index ffc4a4f717d7..453a00a1a5cf 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -2288,6 +2288,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(clippy::disallowed_methods)]
     async fn send_context_to_threads() -> Result<()> {
         // ensure SessionContexts can be used in a multi-threaded
         // environment. Usecase is for concurrent planing.
diff --git a/datafusion/core/tests/fifo.rs b/datafusion/core/tests/fifo.rs
index 93c7f7368065..c9ad95a3a042 100644
--- a/datafusion/core/tests/fifo.rs
+++ b/datafusion/core/tests/fifo.rs
@@ -103,6 +103,7 @@ mod unix_test {
         let broken_pipe_timeout = Duration::from_secs(10);
         let sa = file_path.clone();
         // Spawn a new thread to write to the FIFO file
+        #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
         spawn_blocking(move || {
             let file = OpenOptions::new().write(true).open(sa).unwrap();
             // Reference time to use when deciding to fail the test
@@ -357,6 +358,7 @@ mod unix_test {
             (sink_fifo_path.clone(), sink_fifo_path.display());
 
         // Spawn a new thread to read sink EXTERNAL TABLE.
+        #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
         tasks.push(spawn_blocking(move || {
             let file = File::open(sink_fifo_path_thread).unwrap();
             let schema = Arc::new(Schema::new(vec![
diff --git a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
index df6499e9b1e4..6c9c3359ebf4 100644
--- a/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/sort_preserving_repartition_fuzz.rs
@@ -302,6 +302,7 @@ mod sp_repartition_fuzz_tests {
                 let mut handles = Vec::new();
 
                 for seed in seed_start..seed_end {
+                    #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
                     let job = tokio::spawn(run_sort_preserving_repartition_test(
                         make_staggered_batches::<true>(n_row, n_distinct, seed as u64),
                         is_first_roundrobin,
diff --git a/datafusion/core/tests/fuzz_cases/window_fuzz.rs b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
index 609d26c9c253..1cab4d5c2f98 100644
--- a/datafusion/core/tests/fuzz_cases/window_fuzz.rs
+++ b/datafusion/core/tests/fuzz_cases/window_fuzz.rs
@@ -123,6 +123,7 @@ async fn window_bounded_window_random_comparison() -> Result<()> {
         for i in 0..n {
             let idx = i % test_cases.len();
             let (pb_cols, ob_cols, search_mode) = test_cases[idx].clone();
+            #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
             let job = tokio::spawn(run_window_test(
                 make_staggered_batches::<true>(1000, n_distinct, i as u64),
                 i as u64,
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index e83dc2525b9f..5172bc9b2a3c 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -21,7 +21,6 @@ use std::fs;
 use std::fs::{metadata, File};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
-use std::task::{Context, Poll};
 
 use super::SendableRecordBatchStream;
 use crate::stream::RecordBatchReceiverStream;
@@ -39,8 +38,7 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
 
 use futures::{Future, StreamExt, TryStreamExt};
 use parking_lot::Mutex;
-use pin_project_lite::pin_project;
-use tokio::task::JoinHandle;
+use tokio::task::{JoinError, JoinSet};
 
 /// [`MemoryReservation`] used across query execution streams
 pub(crate) type SharedMemoryReservation = Arc<Mutex<MemoryReservation>>;
@@ -174,50 +172,43 @@ pub fn compute_record_batch_statistics(
     }
 }
 
-pin_project! {
-    /// Helper that aborts the given join handle on drop.
-    ///
-    /// Useful to kill background tasks when the consumer is dropped.
-    #[derive(Debug)]
-    pub struct AbortOnDropSingle<T>{
-        #[pin]
-        join_handle: JoinHandle<T>,
-    }
-
-    impl<T> PinnedDrop for AbortOnDropSingle<T> {
-        fn drop(this: Pin<&mut Self>) {
-            this.join_handle.abort();
-        }
-    }
+/// Helper that  provides a simple API to spawn a single task and join it.
+/// Provides guarantees of aborting on `Drop` to keep it cancel-safe.
+///
+/// Technically, it's just a wrapper of `JoinSet` (with size=1).
+#[derive(Debug)]
+pub struct SpawnedTask<R> {
+    inner: JoinSet<R>,
 }
 
-impl<T> AbortOnDropSingle<T> {
-    /// Create new abort helper from join handle.
-    pub fn new(join_handle: JoinHandle<T>) -> Self {
-        Self { join_handle }
+impl<R: 'static> SpawnedTask<R> {
+    pub fn spawn<T>(task: T) -> Self
+    where
+        T: Future<Output = R>,
+        T: Send + 'static,
+        R: Send,
+    {
+        let mut inner = JoinSet::new();
+        inner.spawn(task);
+        Self { inner }
     }
-}
 
-impl<T> Future for AbortOnDropSingle<T> {
-    type Output = Result<T, tokio::task::JoinError>;
-
-    fn poll(self: std::pin::Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
-        let this = self.project();
-        this.join_handle.poll(cx)
+    pub fn spawn_blocking<T>(task: T) -> Self
+    where
+        T: FnOnce() -> R,
+        T: Send + 'static,
+        R: Send,
+    {
+        let mut inner = JoinSet::new();
+        inner.spawn_blocking(task);
+        Self { inner }
     }
-}
-
-/// Helper that aborts the given join handles on drop.
-///
-/// Useful to kill background tasks when the consumer is dropped.
-#[derive(Debug)]
-pub struct AbortOnDropMany<T>(pub Vec<JoinHandle<T>>);
 
-impl<T> Drop for AbortOnDropMany<T> {
-    fn drop(&mut self) {
-        for join_handle in &self.0 {
-            join_handle.abort();
-        }
+    pub async fn join(mut self) -> Result<R, JoinError> {
+        self.inner
+            .join_next()
+            .await
+            .expect("`SpawnedTask` instance always contains exactly 1 task")
     }
 }
 
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 1c4a6ac0ecaf..562e42a7da3b 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -298,14 +298,14 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     /// "abort" such tasks, they may continue to consume resources even after
     /// the plan is dropped, generating intermediate results that are never
     /// used.
+    /// Thus, [`spawn`] is disallowed, and instead use [`SpawnedTask`].
     ///
-    /// See [`AbortOnDropSingle`], [`AbortOnDropMany`] and
-    /// [`RecordBatchReceiverStreamBuilder`] for structures to help ensure all
-    /// background tasks are cancelled.
+    /// For more details see [`SpawnedTask`], [`JoinSet`] and [`RecordBatchReceiverStreamBuilder`]
+    /// for structures to help ensure all background tasks are cancelled.
     ///
     /// [`spawn`]: tokio::task::spawn
-    /// [`AbortOnDropSingle`]: crate::common::AbortOnDropSingle
-    /// [`AbortOnDropMany`]: crate::common::AbortOnDropMany
+    /// [`JoinSet`]: tokio::task::JoinSet
+    /// [`SpawnedTask`]: crate::common::SpawnedTask
     /// [`RecordBatchReceiverStreamBuilder`]: crate::stream::RecordBatchReceiverStreamBuilder
     ///
     /// # Implementation Examples
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 07693f747fee..a66a929796ab 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -32,21 +32,20 @@ use futures::{FutureExt, StreamExt};
 use hashbrown::HashMap;
 use log::trace;
 use parking_lot::Mutex;
-use tokio::task::JoinHandle;
 
 use datafusion_common::{arrow_datafusion_err, not_impl_err, DataFusionError, Result};
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
 
-use crate::common::transpose;
+use crate::common::{transpose, SpawnedTask};
 use crate::hash_utils::create_hashes;
 use crate::metrics::BaselineMetrics;
 use crate::repartition::distributor_channels::{channels, partition_aware_channels};
 use crate::sorts::streaming_merge;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning, Statistics};
 
-use super::common::{AbortOnDropMany, AbortOnDropSingle, SharedMemoryReservation};
+use super::common::SharedMemoryReservation;
 use super::expressions::PhysicalSortExpr;
 use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use super::{DisplayAs, RecordBatchStream, SendableRecordBatchStream};
@@ -74,7 +73,7 @@ struct RepartitionExecState {
     >,
 
     /// Helper that ensures that that background job is killed once it is no longer needed.
-    abort_helper: Arc<AbortOnDropMany<()>>,
+    abort_helper: Arc<Vec<SpawnedTask<()>>>,
 }
 
 /// A utility that can be used to partition batches based on [`Partitioning`]
@@ -522,7 +521,7 @@ impl ExecutionPlan for RepartitionExec {
             }
 
             // launch one async task per *input* partition
-            let mut join_handles = Vec::with_capacity(num_input_partitions);
+            let mut spawned_tasks = Vec::with_capacity(num_input_partitions);
             for i in 0..num_input_partitions {
                 let txs: HashMap<_, _> = state
                     .channels
@@ -534,28 +533,27 @@ impl ExecutionPlan for RepartitionExec {
 
                 let r_metrics = RepartitionMetrics::new(i, partition, &self.metrics);
 
-                let input_task: JoinHandle<Result<()>> =
-                    tokio::spawn(Self::pull_from_input(
-                        self.input.clone(),
-                        i,
-                        txs.clone(),
-                        self.partitioning.clone(),
-                        r_metrics,
-                        context.clone(),
-                    ));
+                let input_task = SpawnedTask::spawn(Self::pull_from_input(
+                    self.input.clone(),
+                    i,
+                    txs.clone(),
+                    self.partitioning.clone(),
+                    r_metrics,
+                    context.clone(),
+                ));
 
                 // In a separate task, wait for each input to be done
                 // (and pass along any errors, including panic!s)
-                let join_handle = tokio::spawn(Self::wait_for_task(
-                    AbortOnDropSingle::new(input_task),
+                let wait_for_task = SpawnedTask::spawn(Self::wait_for_task(
+                    input_task,
                     txs.into_iter()
                         .map(|(partition, (tx, _reservation))| (partition, tx))
                         .collect(),
                 ));
-                join_handles.push(join_handle);
+                spawned_tasks.push(wait_for_task);
             }
 
-            state.abort_helper = Arc::new(AbortOnDropMany(join_handles))
+            state.abort_helper = Arc::new(spawned_tasks)
         }
 
         trace!(
@@ -638,7 +636,7 @@ impl RepartitionExec {
             partitioning,
             state: Arc::new(Mutex::new(RepartitionExecState {
                 channels: HashMap::new(),
-                abort_helper: Arc::new(AbortOnDropMany::<()>(vec![])),
+                abort_helper: Arc::new(Vec::new()),
             })),
             metrics: ExecutionPlanMetricsSet::new(),
             preserve_order: false,
@@ -759,12 +757,13 @@ impl RepartitionExec {
     /// complete. Upon error, propagates the errors to all output tx
     /// channels.
     async fn wait_for_task(
-        input_task: AbortOnDropSingle<Result<()>>,
+        input_task: SpawnedTask<Result<()>>,
         txs: HashMap<usize, DistributionSender<MaybeBatch>>,
     ) {
         // wait for completion, and propagate error
         // note we ignore errors on send (.ok) as that means the receiver has already shutdown.
-        match input_task.await {
+
+        match input_task.join().await {
             // Error in joining task
             Err(e) => {
                 let e = Arc::new(e);
@@ -813,7 +812,7 @@ struct RepartitionStream {
 
     /// Handle to ensure background tasks are killed when no longer needed.
     #[allow(dead_code)]
-    drop_helper: Arc<AbortOnDropMany<()>>,
+    drop_helper: Arc<Vec<SpawnedTask<()>>>,
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
@@ -877,7 +876,7 @@ struct PerPartitionStream {
 
     /// Handle to ensure background tasks are killed when no longer needed.
     #[allow(dead_code)]
-    drop_helper: Arc<AbortOnDropMany<()>>,
+    drop_helper: Arc<Vec<SpawnedTask<()>>>,
 
     /// Memory reservation.
     reservation: SharedMemoryReservation,
@@ -1056,6 +1055,7 @@ mod tests {
     }
 
     #[tokio::test]
+    #[allow(clippy::disallowed_methods)]
     async fn many_to_many_round_robin_within_tokio_task() -> Result<()> {
         let join_handle: JoinHandle<Result<Vec<Vec<RecordBatch>>>> =
             tokio::spawn(async move {
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 2d8237011fff..84bf3ec415ef 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -27,7 +27,7 @@ use std::io::BufReader;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
-use crate::common::{spawn_buffered, IPCWriter};
+use crate::common::{spawn_buffered, IPCWriter, SpawnedTask};
 use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{
     BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet,
@@ -56,7 +56,6 @@ use datafusion_physical_expr::EquivalenceProperties;
 use futures::{StreamExt, TryStreamExt};
 use log::{debug, error, trace};
 use tokio::sync::mpsc::Sender;
-use tokio::task;
 
 struct ExternalSorterMetrics {
     /// metrics
@@ -604,8 +603,8 @@ async fn spill_sorted_batches(
     schema: SchemaRef,
 ) -> Result<()> {
     let path: PathBuf = path.into();
-    let handle = task::spawn_blocking(move || write_sorted(batches, path, schema));
-    match handle.await {
+    let task = SpawnedTask::spawn_blocking(move || write_sorted(batches, path, schema));
+    match task.join().await {
         Ok(r) => r,
         Err(e) => exec_err!("Error occurred while spilling {e}"),
     }
diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs
index ffae144eae84..41c33deec643 100644
--- a/datafusion/sqllogictest/bin/sqllogictests.rs
+++ b/datafusion/sqllogictest/bin/sqllogictests.rs
@@ -88,6 +88,7 @@ async fn run_tests() -> Result<()> {
     // modifying shared state like `/tmp/`)
     let errors: Vec<_> = futures::stream::iter(read_test_files(&options)?)
         .map(|test_file| {
+            #[allow(clippy::disallowed_methods)] // spawn allowed only in tests
             tokio::task::spawn(async move {
                 println!("Running {:?}", test_file.relative_path);
                 if options.complete {
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index f99d6e15e869..ce5635b6daf4 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -136,4 +136,5 @@ datafusion/proto/src/generated/prost.rs
 .github/ISSUE_TEMPLATE/feature_request.yml
 .github/workflows/docs.yaml
 **/node_modules/*
-datafusion/wasmtest/pkg/*
\ No newline at end of file
+datafusion/wasmtest/pkg/*
+clippy.toml

From acd09da1731a77f33a87dbbedee7d759cedcecc8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Feb 2024 08:17:58 -0500
Subject: [PATCH 27/45] Update nix requirement from 0.27.1 to 0.28.0 (#9344)

Updates the requirements on [nix](https://github.com/nix-rust/nix) to permit the latest version.
- [Changelog](https://github.com/nix-rust/nix/blob/master/CHANGELOG.md)
- [Commits](https://github.com/nix-rust/nix/compare/v0.27.1...v0.28.0)

---
updated-dependencies:
- dependency-name: nix
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 datafusion/core/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index c3bd89037cfe..1247143f9fb4 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -142,7 +142,7 @@ thiserror = { workspace = true }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot", "fs"] }
 tokio-postgres = "0.7.7"
 [target.'cfg(not(target_os = "windows"))'.dev-dependencies]
-nix = { version = "0.27.1", features = ["fs"] }
+nix = { version = "0.28.0", features = ["fs"] }
 
 [[bench]]
 harness = false

From c439bc73b6a9ba9efa4c8a9b5d2fb6111e660e74 Mon Sep 17 00:00:00 2001
From: Bruce Ritchie <bruce.ritchie@veeva.com>
Date: Tue, 27 Feb 2024 08:19:52 -0500
Subject: [PATCH 28/45] Replace usages of internal_err with exec_err where
 appropriate (#9241)

* internal_err! -> exec_err!

* fmt updates.

* Updated error macro from exec_err! to not_impl_err! for all unsupported type errors.

* Revert "Updated error macro from exec_err! to not_impl_err! for all unsupported type errors."

This reverts commit fe0517a7ca0c0c6e7fa2a61b0bf321d45185854c.

* Updated a few instances of internal_err missed in previous audit.

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/functions/src/core/nullif.rs       | 13 ++-
 datafusion/functions/src/encoding/inner.rs    | 34 ++++----
 datafusion/functions/src/math/abs.rs          |  6 +-
 datafusion/functions/src/math/nans.rs         | 10 +--
 .../physical-expr/src/aggregate/build_in.rs   | 25 +++---
 .../src/conditional_expressions.rs            |  4 +-
 .../physical-expr/src/crypto_expressions.rs   | 16 ++--
 datafusion/physical-expr/src/functions.rs     | 85 +++++++++----------
 .../physical-expr/src/math_expressions.rs     | 84 +++++++++---------
 .../physical-expr/src/regex_expressions.rs    | 17 ++--
 .../physical-expr/src/string_expressions.rs   | 49 +++++------
 .../physical-expr/src/unicode_expressions.rs  | 20 +++--
 12 files changed, 183 insertions(+), 180 deletions(-)

diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index f83bd987c937..73bfba9b38b1 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -18,15 +18,15 @@
 //! Encoding expressions
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
-use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
-use std::any::Any;
 use arrow::array::Array;
 use arrow::compute::kernels::cmp::eq;
 use arrow::compute::kernels::nullif::nullif;
 use datafusion_common::ScalarValue;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
 
 #[derive(Debug)]
 pub(super) struct NullIfFunc {
@@ -58,7 +58,7 @@ impl NullIfFunc {
         Self {
             signature:
             Signature::uniform(2, SUPPORTED_NULLIF_TYPES.to_vec(),
-                Volatility::Immutable,
+                               Volatility::Immutable,
             )
         }
     }
@@ -81,7 +81,7 @@ impl ScalarUDFImpl for NullIfFunc {
         let coerced_types = datafusion_expr::type_coercion::functions::data_types(arg_types, &self.signature);
         coerced_types.map(|typs| typs[0].clone())
             .map_err(|e| e.context("Failed to coerce arguments for NULLIF")
-        )
+            )
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
@@ -90,14 +90,13 @@ impl ScalarUDFImpl for NullIfFunc {
 }
 
 
-
 /// Implements NULLIF(expr1, expr2)
 /// Args: 0 - left expr is any array
 ///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
 ///
 fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but NULLIF takes exactly two args",
             args.len()
         );
diff --git a/datafusion/functions/src/encoding/inner.rs b/datafusion/functions/src/encoding/inner.rs
index 886a031a5269..4cbeab3092c7 100644
--- a/datafusion/functions/src/encoding/inner.rs
+++ b/datafusion/functions/src/encoding/inner.rs
@@ -22,11 +22,11 @@ use arrow::{
     datatypes::DataType,
 };
 use base64::{engine::general_purpose, Engine as _};
-use datafusion_common::ScalarValue;
 use datafusion_common::{
     cast::{as_generic_binary_array, as_generic_string_array},
-    internal_err, not_impl_err, plan_err,
+    not_impl_err, plan_err,
 };
+use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 use std::sync::Arc;
@@ -111,6 +111,7 @@ impl DecodeFunc {
         }
     }
 }
+
 impl ScalarUDFImpl for DecodeFunc {
     fn as_any(&self) -> &dyn Any {
         self
@@ -148,6 +149,7 @@ enum Encoding {
     Base64,
     Hex,
 }
+
 fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarValue> {
     match value {
         ColumnarValue::Array(a) => match a.data_type() {
@@ -155,7 +157,7 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
             DataType::LargeUtf8 => encoding.encode_utf8_array::<i64>(a.as_ref()),
             DataType::Binary => encoding.encode_binary_array::<i32>(a.as_ref()),
             DataType::LargeBinary => encoding.encode_binary_array::<i64>(a.as_ref()),
-            other => internal_err!(
+            other => exec_err!(
                 "Unsupported data type {other:?} for function encode({encoding})"
             ),
         },
@@ -171,7 +173,7 @@ fn encode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
                 ),
                 ScalarValue::LargeBinary(a) => Ok(encoding
                     .encode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
-                other => internal_err!(
+                other => exec_err!(
                     "Unsupported data type {other:?} for function encode({encoding})"
                 ),
             }
@@ -186,7 +188,7 @@ fn decode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
             DataType::LargeUtf8 => encoding.decode_utf8_array::<i64>(a.as_ref()),
             DataType::Binary => encoding.decode_binary_array::<i32>(a.as_ref()),
             DataType::LargeBinary => encoding.decode_binary_array::<i64>(a.as_ref()),
-            other => internal_err!(
+            other => exec_err!(
                 "Unsupported data type {other:?} for function decode({encoding})"
             ),
         },
@@ -202,7 +204,7 @@ fn decode_process(value: &ColumnarValue, encoding: Encoding) -> Result<ColumnarV
                 }
                 ScalarValue::LargeBinary(a) => encoding
                     .decode_large_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice())),
-                other => internal_err!(
+                other => exec_err!(
                     "Unsupported data type {other:?} for function decode({encoding})"
                 ),
             }
@@ -270,8 +272,8 @@ impl Encoding {
     }
 
     fn encode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
+        where
+            T: OffsetSizeTrait,
     {
         let input_value = as_generic_binary_array::<T>(value)?;
         let array: ArrayRef = match self {
@@ -282,8 +284,8 @@ impl Encoding {
     }
 
     fn encode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
+        where
+            T: OffsetSizeTrait,
     {
         let input_value = as_generic_string_array::<T>(value)?;
         let array: ArrayRef = match self {
@@ -350,8 +352,8 @@ impl Encoding {
     }
 
     fn decode_binary_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
+        where
+            T: OffsetSizeTrait,
     {
         let input_value = as_generic_binary_array::<T>(value)?;
         let array: ArrayRef = match self {
@@ -362,8 +364,8 @@ impl Encoding {
     }
 
     fn decode_utf8_array<T>(self, value: &dyn Array) -> Result<ColumnarValue>
-    where
-        T: OffsetSizeTrait,
+        where
+            T: OffsetSizeTrait,
     {
         let input_value = as_generic_string_array::<T>(value)?;
         let array: ArrayRef = match self {
@@ -405,7 +407,7 @@ impl FromStr for Encoding {
 /// Standard encodings are base64 and hex.
 fn encode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but encode takes exactly two arguments",
             args.len()
         );
@@ -431,7 +433,7 @@ fn encode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 /// Standard encodings are base64 and hex.
 fn decode(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but decode takes exactly two arguments",
             args.len()
         );
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
index 21ca37fb8ec3..9ba0e3da2ad4 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -24,9 +24,9 @@ use arrow::array::Int32Array;
 use arrow::array::Int64Array;
 use arrow::array::Int8Array;
 use arrow::datatypes::DataType;
-use datafusion_common::not_impl_err;
+use datafusion_common::{exec_err, not_impl_err};
 use datafusion_common::plan_datafusion_err;
-use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_common::{Result, DataFusionError};
 use datafusion_expr::utils;
 use datafusion_expr::ColumnarValue;
 
@@ -165,7 +165,7 @@ impl ScalarUDFImpl for AbsFunc {
         let args = ColumnarValue::values_to_arrays(args)?;
 
         if args.len() != 1 {
-            return internal_err!("abs function requires 1 argument, got {}", args.len());
+            return exec_err!("abs function requires 1 argument, got {}", args.len());
         }
     
         let input_data_type = args[0].data_type();
diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs
index 20754c18aa8e..c7868e6d5eca 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -18,14 +18,14 @@
 //! Encoding expressions
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
+use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
 use datafusion_expr::TypeSignature::*;
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use std::any::Any;
 use std::sync::Arc;
-use arrow::array::{ArrayRef, BooleanArray, Float32Array, Float64Array};
 
 #[derive(Debug)]
 pub(super) struct IsNanFunc {
@@ -73,7 +73,7 @@ impl ScalarUDFImpl for IsNanFunc {
                                 BooleanArray,
                                 { f64::is_nan }
                             ))
-            },
+            }
             DataType::Float32 => {
                 Arc::new(make_function_scalar_inputs_return_type!(
                             &args[0],
@@ -82,8 +82,8 @@ impl ScalarUDFImpl for IsNanFunc {
                             BooleanArray,
                             { f32::is_nan }
                         ))
-            },
-            other => return internal_err!("Unsupported data type {other:?} for function isnan"),
+            }
+            other => return exec_err!("Unsupported data type {other:?} for function isnan"),
         };
         Ok(ColumnarValue::Array(arr))
     }
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index 1a3d21fc40bc..2918856aa623 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -28,14 +28,15 @@
 
 use std::sync::Arc;
 
+use arrow::datatypes::Schema;
+
+use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result};
+use datafusion_expr::AggregateFunction;
+
 use crate::aggregate::regr::RegrType;
 use crate::expressions::{self, Literal};
 use crate::{AggregateExpr, PhysicalExpr, PhysicalSortExpr};
 
-use arrow::datatypes::Schema;
-use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
-use datafusion_expr::AggregateFunction;
-
 /// Create a physical aggregation expression.
 /// This function errors when `input_phy_exprs`' can't be coerced to a valid argument type of the aggregation function.
 pub fn create_aggregate_expr(
@@ -379,9 +380,7 @@ pub fn create_aggregate_expr(
                 .downcast_ref::<Literal>()
                 .map(|literal| literal.value())
             else {
-                return internal_err!(
-                    "Second argument of NTH_VALUE needs to be a literal"
-                );
+                return exec_err!("Second argument of NTH_VALUE needs to be a literal");
             };
             let nullable = expr.nullable(input_schema)?;
             Arc::new(expressions::NthValueAgg::new(
@@ -415,17 +414,19 @@ pub fn create_aggregate_expr(
 
 #[cfg(test)]
 mod tests {
-    use super::*;
+    use arrow::datatypes::{DataType, Field};
+
+    use datafusion_common::{plan_err, ScalarValue};
+    use datafusion_expr::type_coercion::aggregates::NUMERICS;
+    use datafusion_expr::{type_coercion, Signature};
+
     use crate::expressions::{
         try_cast, ApproxDistinct, ApproxMedian, ApproxPercentileCont, ArrayAgg, Avg,
         BitAnd, BitOr, BitXor, BoolAnd, BoolOr, Correlation, Count, Covariance,
         DistinctArrayAgg, DistinctCount, Max, Min, Stddev, Sum, Variance,
     };
 
-    use arrow::datatypes::{DataType, Field};
-    use datafusion_common::{plan_err, ScalarValue};
-    use datafusion_expr::type_coercion::aggregates::NUMERICS;
-    use datafusion_expr::{type_coercion, Signature};
+    use super::*;
 
     #[test]
     fn test_count_arragg_approx_expr() -> Result<()> {
diff --git a/datafusion/physical-expr/src/conditional_expressions.rs b/datafusion/physical-expr/src/conditional_expressions.rs
index 782897d46379..cc8f3c8dfaf0 100644
--- a/datafusion/physical-expr/src/conditional_expressions.rs
+++ b/datafusion/physical-expr/src/conditional_expressions.rs
@@ -19,14 +19,14 @@ use arrow::array::{new_null_array, Array, BooleanArray};
 use arrow::compute::kernels::zip::zip;
 use arrow::compute::{and, is_not_null, is_null};
 
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{exec_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
 /// coalesce evaluates to the first value which is not NULL
 pub fn coalesce(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     // do not accept 0 arguments.
     if args.is_empty() {
-        return internal_err!(
+        return exec_err!(
             "coalesce was called with {} arguments. It requires at least 1.",
             args.len()
         );
diff --git a/datafusion/physical-expr/src/crypto_expressions.rs b/datafusion/physical-expr/src/crypto_expressions.rs
index 580b0ed01b6e..3ff3bc83f297 100644
--- a/datafusion/physical-expr/src/crypto_expressions.rs
+++ b/datafusion/physical-expr/src/crypto_expressions.rs
@@ -23,11 +23,11 @@ use arrow::{
 };
 use blake2::{Blake2b512, Blake2s256, Digest};
 use blake3::Hasher as Blake3;
-use datafusion_common::ScalarValue;
 use datafusion_common::{
     cast::{as_binary_array, as_generic_binary_array, as_generic_string_array},
     plan_err,
 };
+use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{internal_err, DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 use md5::Md5;
@@ -66,7 +66,7 @@ fn digest_process(
             DataType::LargeBinary => {
                 digest_algorithm.digest_binary_array::<i64>(a.as_ref())
             }
-            other => internal_err!(
+            other => exec_err!(
                 "Unsupported data type {other:?} for function {digest_algorithm}"
             ),
         },
@@ -77,7 +77,7 @@ fn digest_process(
             }
             ScalarValue::Binary(a) | ScalarValue::LargeBinary(a) => Ok(digest_algorithm
                 .digest_scalar(a.as_ref().map(|v: &Vec<u8>| v.as_slice()))),
-            other => internal_err!(
+            other => exec_err!(
                 "Unsupported data type {other:?} for function {digest_algorithm}"
             ),
         },
@@ -238,7 +238,7 @@ macro_rules! define_digest_function {
         #[doc = $DOC]
         pub fn $NAME(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             if args.len() != 1 {
-                return internal_err!(
+                return exec_err!(
                     "{:?} args were supplied but {} takes exactly one argument",
                     args.len(),
                     DigestAlgorithm::$METHOD.to_string()
@@ -264,7 +264,7 @@ fn hex_encode<T: AsRef<[u8]>>(data: T) -> String {
 /// computes md5 hash digest of the given input
 pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 1 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but {} takes exactly one argument",
             args.len(),
             DigestAlgorithm::Md5
@@ -284,7 +284,7 @@ pub fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
         ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
             ColumnarValue::Scalar(ScalarValue::Utf8(opt.map(hex_encode::<_>)))
         }
-        _ => return internal_err!("Impossibly got invalid results from digest"),
+        _ => return exec_err!("Impossibly got invalid results from digest"),
     })
 }
 
@@ -329,7 +329,7 @@ define_digest_function!(
 /// Standard algorithms are md5, sha1, sha224, sha256, sha384 and sha512.
 pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but digest takes exactly two arguments",
             args.len()
         );
@@ -339,7 +339,7 @@ pub fn digest(args: &[ColumnarValue]) -> Result<ColumnarValue> {
             ScalarValue::Utf8(Some(method)) | ScalarValue::LargeUtf8(Some(method)) => {
                 method.parse::<DigestAlgorithm>()
             }
-            other => internal_err!("Unsupported data type {other:?} for function digest"),
+            other => exec_err!("Unsupported data type {other:?} for function digest"),
         },
         ColumnarValue::Array(_) => {
             internal_err!("Digest using dynamically decided method is not yet supported")
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 0dc3f96dc12a..c91b96d67a22 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -42,7 +42,7 @@ use arrow::{
     datatypes::{DataType, Int32Type, Int64Type, Schema},
 };
 use arrow_array::Array;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
 pub use datafusion_expr::FuncMonotonicity;
 use datafusion_expr::{
     type_coercion::functions::data_types, BuiltinScalarFunction, ColumnarValue,
@@ -95,6 +95,7 @@ macro_rules! invoke_if_crypto_expressions_feature_flag {
 #[cfg(not(feature = "crypto_expressions"))]
 macro_rules! invoke_if_crypto_expressions_feature_flag {
     ($FUNC:ident, $NAME:expr) => {
+        use datafusion_common::internal_err;
         |_: &[ColumnarValue]| -> Result<ColumnarValue> {
             internal_err!(
                 "function {} requires compilation with feature flag: crypto_expressions.",
@@ -433,7 +434,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::ascii::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function ascii"),
+            other => exec_err!("Unsupported data type {other:?} for function ascii"),
         }),
         BuiltinScalarFunction::BitLength => Arc::new(|args| match &args[0] {
             ColumnarValue::Array(v) => Ok(ColumnarValue::Array(bit_length(v.as_ref())?)),
@@ -454,7 +455,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function btrim"),
+            other => exec_err!("Unsupported data type {other:?} for function btrim"),
         }),
         BuiltinScalarFunction::CharacterLength => {
             Arc::new(|args| match args[0].data_type() {
@@ -474,7 +475,7 @@ pub fn create_physical_fun(
                     );
                     make_scalar_function_inner(func)(args)
                 }
-                other => internal_err!(
+                other => exec_err!(
                     "Unsupported data type {other:?} for function character_length"
                 ),
             })
@@ -536,7 +537,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(string_expressions::initcap::<i64>)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function initcap")
+                exec_err!("Unsupported data type {other:?} for function initcap")
             }
         }),
         BuiltinScalarFunction::InStr => Arc::new(|args| match args[0].data_type() {
@@ -546,7 +547,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::instr::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function instr"),
+            other => exec_err!("Unsupported data type {other:?} for function instr"),
         }),
         BuiltinScalarFunction::Left => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -557,7 +558,7 @@ pub fn create_physical_fun(
                 let func = invoke_if_unicode_expressions_feature_flag!(left, i64, "left");
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function left"),
+            other => exec_err!("Unsupported data type {other:?} for function left"),
         }),
         BuiltinScalarFunction::Lower => Arc::new(string_expressions::lower),
         BuiltinScalarFunction::Lpad => Arc::new(|args| match args[0].data_type() {
@@ -569,7 +570,7 @@ pub fn create_physical_fun(
                 let func = invoke_if_unicode_expressions_feature_flag!(lpad, i64, "lpad");
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function lpad"),
+            other => exec_err!("Unsupported data type {other:?} for function lpad"),
         }),
         BuiltinScalarFunction::Ltrim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -578,7 +579,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::ltrim::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function ltrim"),
+            other => exec_err!("Unsupported data type {other:?} for function ltrim"),
         }),
         BuiltinScalarFunction::MD5 => {
             Arc::new(invoke_if_crypto_expressions_feature_flag!(md5, "md5"))
@@ -616,7 +617,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(func)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function regexp_like")
+                exec_err!("Unsupported data type {other:?} for function regexp_like")
             }
         }),
         BuiltinScalarFunction::RegexpMatch => {
@@ -637,9 +638,9 @@ pub fn create_physical_fun(
                     );
                     make_scalar_function_inner(func)(args)
                 }
-                other => internal_err!(
-                    "Unsupported data type {other:?} for function regexp_match"
-                ),
+                other => {
+                    exec_err!("Unsupported data type {other:?} for function regexp_match")
+                }
             })
         }
         BuiltinScalarFunction::RegexpReplace => {
@@ -662,7 +663,7 @@ pub fn create_physical_fun(
                     let func = specializer_func(args)?;
                     func(args)
                 }
-                other => internal_err!(
+                other => exec_err!(
                     "Unsupported data type {other:?} for function regexp_replace"
                 ),
             })
@@ -674,7 +675,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::repeat::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function repeat"),
+            other => exec_err!("Unsupported data type {other:?} for function repeat"),
         }),
         BuiltinScalarFunction::Replace => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -684,7 +685,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(string_expressions::replace::<i64>)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function replace")
+                exec_err!("Unsupported data type {other:?} for function replace")
             }
         }),
         BuiltinScalarFunction::Reverse => Arc::new(|args| match args[0].data_type() {
@@ -699,7 +700,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(func)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function reverse")
+                exec_err!("Unsupported data type {other:?} for function reverse")
             }
         }),
         BuiltinScalarFunction::Right => Arc::new(|args| match args[0].data_type() {
@@ -713,7 +714,7 @@ pub fn create_physical_fun(
                     invoke_if_unicode_expressions_feature_flag!(right, i64, "right");
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function right"),
+            other => exec_err!("Unsupported data type {other:?} for function right"),
         }),
         BuiltinScalarFunction::Rpad => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -724,7 +725,7 @@ pub fn create_physical_fun(
                 let func = invoke_if_unicode_expressions_feature_flag!(rpad, i64, "rpad");
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function rpad"),
+            other => exec_err!("Unsupported data type {other:?} for function rpad"),
         }),
         BuiltinScalarFunction::Rtrim => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -733,7 +734,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::rtrim::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function rtrim"),
+            other => exec_err!("Unsupported data type {other:?} for function rtrim"),
         }),
         BuiltinScalarFunction::SHA224 => {
             Arc::new(invoke_if_crypto_expressions_feature_flag!(sha224, "sha224"))
@@ -755,7 +756,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(string_expressions::split_part::<i64>)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function split_part")
+                exec_err!("Unsupported data type {other:?} for function split_part")
             }
         }),
         BuiltinScalarFunction::StringToArray => {
@@ -767,7 +768,7 @@ pub fn create_physical_fun(
                     array_expressions::string_to_array::<i64>,
                 )(args),
                 other => {
-                    internal_err!(
+                    exec_err!(
                         "Unsupported data type {other:?} for function string_to_array"
                     )
                 }
@@ -781,7 +782,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(string_expressions::starts_with::<i64>)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function starts_with")
+                exec_err!("Unsupported data type {other:?} for function starts_with")
             }
         }),
         BuiltinScalarFunction::EndsWith => Arc::new(|args| match args[0].data_type() {
@@ -792,7 +793,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(string_expressions::ends_with::<i64>)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function ends_with")
+                exec_err!("Unsupported data type {other:?} for function ends_with")
             }
         }),
         BuiltinScalarFunction::Strpos => Arc::new(|args| match args[0].data_type() {
@@ -808,7 +809,7 @@ pub fn create_physical_fun(
                 );
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function strpos"),
+            other => exec_err!("Unsupported data type {other:?} for function strpos"),
         }),
         BuiltinScalarFunction::Substr => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -821,7 +822,7 @@ pub fn create_physical_fun(
                     invoke_if_unicode_expressions_feature_flag!(substr, i64, "substr");
                 make_scalar_function_inner(func)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function substr"),
+            other => exec_err!("Unsupported data type {other:?} for function substr"),
         }),
         BuiltinScalarFunction::ToHex => Arc::new(|args| match args[0].data_type() {
             DataType::Int32 => {
@@ -830,7 +831,7 @@ pub fn create_physical_fun(
             DataType::Int64 => {
                 make_scalar_function_inner(string_expressions::to_hex::<Int64Type>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function to_hex"),
+            other => exec_err!("Unsupported data type {other:?} for function to_hex"),
         }),
         BuiltinScalarFunction::Translate => Arc::new(|args| match args[0].data_type() {
             DataType::Utf8 => {
@@ -850,7 +851,7 @@ pub fn create_physical_fun(
                 make_scalar_function_inner(func)(args)
             }
             other => {
-                internal_err!("Unsupported data type {other:?} for function translate")
+                exec_err!("Unsupported data type {other:?} for function translate")
             }
         }),
         BuiltinScalarFunction::Trim => Arc::new(|args| match args[0].data_type() {
@@ -860,13 +861,13 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::btrim::<i64>)(args)
             }
-            other => internal_err!("Unsupported data type {other:?} for function trim"),
+            other => exec_err!("Unsupported data type {other:?} for function trim"),
         }),
         BuiltinScalarFunction::Upper => Arc::new(string_expressions::upper),
         BuiltinScalarFunction::Uuid => Arc::new(string_expressions::uuid),
         BuiltinScalarFunction::ArrowTypeof => Arc::new(move |args| {
             if args.len() != 1 {
-                return internal_err!(
+                return exec_err!(
                     "arrow_typeof function requires 1 arguments, got {}",
                     args.len()
                 );
@@ -884,9 +885,7 @@ pub fn create_physical_fun(
             DataType::LargeUtf8 => {
                 make_scalar_function_inner(string_expressions::overlay::<i64>)(args)
             }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {other:?} for function overlay",
-            ))),
+            other => exec_err!("Unsupported data type {other:?} for function overlay"),
         }),
         BuiltinScalarFunction::Levenshtein => {
             Arc::new(|args| match args[0].data_type() {
@@ -896,9 +895,9 @@ pub fn create_physical_fun(
                 DataType::LargeUtf8 => make_scalar_function_inner(
                     string_expressions::levenshtein::<i64>,
                 )(args),
-                other => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {other:?} for function levenshtein",
-                ))),
+                other => {
+                    exec_err!("Unsupported data type {other:?} for function levenshtein")
+                }
             })
         }
         BuiltinScalarFunction::SubstrIndex => {
@@ -919,9 +918,9 @@ pub fn create_physical_fun(
                     );
                     make_scalar_function_inner(func)(args)
                 }
-                other => Err(DataFusionError::Internal(format!(
-                    "Unsupported data type {other:?} for function substr_index",
-                ))),
+                other => {
+                    exec_err!("Unsupported data type {other:?} for function substr_index")
+                }
             })
         }
         BuiltinScalarFunction::FindInSet => Arc::new(|args| match args[0].data_type() {
@@ -941,9 +940,9 @@ pub fn create_physical_fun(
                 );
                 make_scalar_function_inner(func)(args)
             }
-            other => Err(DataFusionError::Internal(format!(
-                "Unsupported data type {other:?} for function find_in_set",
-            ))),
+            other => {
+                exec_err!("Unsupported data type {other:?} for function find_in_set")
+            }
         }),
     })
 }
@@ -1023,7 +1022,7 @@ mod tests {
         record_batch::RecordBatch,
     };
     use datafusion_common::cast::{as_boolean_array, as_uint64_array};
-    use datafusion_common::{exec_err, plan_err};
+    use datafusion_common::{exec_err, internal_err, plan_err};
     use datafusion_common::{Result, ScalarValue};
     use datafusion_expr::type_coercion::functions::data_types;
     use datafusion_expr::Signature;
diff --git a/datafusion/physical-expr/src/math_expressions.rs b/datafusion/physical-expr/src/math_expressions.rs
index b622aee8e2b3..98a05dff5386 100644
--- a/datafusion/physical-expr/src/math_expressions.rs
+++ b/datafusion/physical-expr/src/math_expressions.rs
@@ -17,19 +17,20 @@
 
 //! Math expressions
 
+use std::any::type_name;
+use std::iter;
+use std::mem::swap;
+use std::sync::Arc;
+
 use arrow::array::ArrayRef;
 use arrow::array::{BooleanArray, Float32Array, Float64Array, Int64Array};
 use arrow::datatypes::DataType;
-use datafusion_common::internal_err;
-use datafusion_common::ScalarValue;
+use rand::{thread_rng, Rng};
+
 use datafusion_common::ScalarValue::{Float32, Int64};
+use datafusion_common::{exec_err, ScalarValue};
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
-use rand::{thread_rng, Rng};
-use std::any::type_name;
-use std::iter;
-use std::mem::swap;
-use std::sync::Arc;
 
 macro_rules! downcast_compute_op {
     ($ARRAY:expr, $NAME:expr, $FUNC:ident, $TYPE:ident) => {{
@@ -40,7 +41,7 @@ macro_rules! downcast_compute_op {
                     arrow::compute::kernels::arity::unary(array, |x| x.$FUNC());
                 Ok(Arc::new(res))
             }
-            _ => internal_err!("Invalid data type for {}", $NAME),
+            _ => exec_err!("Invalid data type for {}", $NAME),
         }
     }};
 }
@@ -57,11 +58,9 @@ macro_rules! unary_primitive_array_op {
                     let result = downcast_compute_op!(array, $NAME, $FUNC, Float64Array);
                     Ok(ColumnarValue::Array(result?))
                 }
-                other => internal_err!(
-                    "Unsupported data type {:?} for function {}",
-                    other,
-                    $NAME
-                ),
+                other => {
+                    exec_err!("Unsupported data type {:?} for function {}", other, $NAME)
+                }
             },
             ColumnarValue::Scalar(a) => match a {
                 ScalarValue::Float32(a) => Ok(ColumnarValue::Scalar(
@@ -70,7 +69,7 @@ macro_rules! unary_primitive_array_op {
                 ScalarValue::Float64(a) => Ok(ColumnarValue::Scalar(
                     ScalarValue::Float64(a.map(|x| x.$FUNC())),
                 )),
-                _ => internal_err!(
+                _ => exec_err!(
                     "Unsupported data type {:?} for function {}",
                     ($VALUE).data_type(),
                     $NAME
@@ -187,7 +186,7 @@ pub fn factorial(args: &[ArrayRef]) -> Result<ArrayRef> {
             Int64Array,
             { |value: i64| { (1..=value).product() } }
         )) as ArrayRef),
-        other => internal_err!("Unsupported data type {other:?} for function factorial."),
+        other => exec_err!("Unsupported data type {other:?} for function factorial."),
     }
 }
 
@@ -234,7 +233,7 @@ pub fn gcd(args: &[ArrayRef]) -> Result<ArrayRef> {
             Int64Array,
             { compute_gcd }
         )) as ArrayRef),
-        other => internal_err!("Unsupported data type {other:?} for function gcd"),
+        other => exec_err!("Unsupported data type {other:?} for function gcd"),
     }
 }
 
@@ -260,7 +259,7 @@ pub fn lcm(args: &[ArrayRef]) -> Result<ArrayRef> {
             Int64Array,
             { compute_lcm }
         )) as ArrayRef),
-        other => internal_err!("Unsupported data type {other:?} for function lcm"),
+        other => exec_err!("Unsupported data type {other:?} for function lcm"),
     }
 }
 
@@ -305,7 +304,7 @@ pub fn nanvl(args: &[ArrayRef]) -> Result<ArrayRef> {
             )) as ArrayRef)
         }
 
-        other => internal_err!("Unsupported data type {other:?} for function nanvl"),
+        other => exec_err!("Unsupported data type {other:?} for function nanvl"),
     }
 }
 
@@ -328,7 +327,7 @@ pub fn isnan(args: &[ArrayRef]) -> Result<ArrayRef> {
             { f32::is_nan }
         )) as ArrayRef),
 
-        other => internal_err!("Unsupported data type {other:?} for function isnan"),
+        other => exec_err!("Unsupported data type {other:?} for function isnan"),
     }
 }
 
@@ -351,14 +350,14 @@ pub fn iszero(args: &[ArrayRef]) -> Result<ArrayRef> {
             { |x: f32| { x == 0_f32 } }
         )) as ArrayRef),
 
-        other => internal_err!("Unsupported data type {other:?} for function iszero"),
+        other => exec_err!("Unsupported data type {other:?} for function iszero"),
     }
 }
 
 /// Pi SQL function
 pub fn pi(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     if !matches!(&args[0], ColumnarValue::Array(_)) {
-        return internal_err!("Expect pi function to take no param");
+        return exec_err!("Expect pi function to take no param");
     }
     let array = Float64Array::from_value(std::f64::consts::PI, 1);
     Ok(ColumnarValue::Array(Arc::new(array)))
@@ -368,7 +367,7 @@ pub fn pi(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 pub fn random(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     let len: usize = match &args[0] {
         ColumnarValue::Array(array) => array.len(),
-        _ => return internal_err!("Expect random function to take no param"),
+        _ => return exec_err!("Expect random function to take no param"),
     };
     let mut rng = thread_rng();
     let values = iter::repeat_with(|| rng.gen_range(0.0..1.0)).take(len);
@@ -379,7 +378,7 @@ pub fn random(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 /// Round SQL function
 pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 1 && args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "round function requires one or two arguments, got {}",
             args.len()
         );
@@ -423,9 +422,9 @@ pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                 }
             )) as ArrayRef),
-            _ => internal_err!(
-                "round function requires a scalar or array for decimal_places"
-            ),
+            _ => {
+                exec_err!("round function requires a scalar or array for decimal_places")
+            }
         },
 
         DataType::Float32 => match decimal_places {
@@ -459,12 +458,12 @@ pub fn round(args: &[ArrayRef]) -> Result<ArrayRef> {
                     }
                 }
             )) as ArrayRef),
-            _ => internal_err!(
-                "round function requires a scalar or array for decimal_places"
-            ),
+            _ => {
+                exec_err!("round function requires a scalar or array for decimal_places")
+            }
         },
 
-        other => internal_err!("Unsupported data type {other:?} for function round"),
+        other => exec_err!("Unsupported data type {other:?} for function round"),
     }
 }
 
@@ -489,7 +488,7 @@ pub fn power(args: &[ArrayRef]) -> Result<ArrayRef> {
             { i64::pow }
         )) as ArrayRef),
 
-        other => internal_err!("Unsupported data type {other:?} for function power"),
+        other => exec_err!("Unsupported data type {other:?} for function power"),
     }
 }
 
@@ -514,7 +513,7 @@ pub fn atan2(args: &[ArrayRef]) -> Result<ArrayRef> {
             { f32::atan2 }
         )) as ArrayRef),
 
-        other => internal_err!("Unsupported data type {other:?} for function atan2"),
+        other => exec_err!("Unsupported data type {other:?} for function atan2"),
     }
 }
 
@@ -547,7 +546,7 @@ pub fn log(args: &[ArrayRef]) -> Result<ArrayRef> {
                 Float64Array,
                 { f64::log }
             )) as ArrayRef),
-            _ => internal_err!("log function requires a scalar or array for base"),
+            _ => exec_err!("log function requires a scalar or array for base"),
         },
 
         DataType::Float32 => match base {
@@ -565,10 +564,10 @@ pub fn log(args: &[ArrayRef]) -> Result<ArrayRef> {
                 Float32Array,
                 { f32::log }
             )) as ArrayRef),
-            _ => internal_err!("log function requires a scalar or array for base"),
+            _ => exec_err!("log function requires a scalar or array for base"),
         },
 
-        other => internal_err!("Unsupported data type {other:?} for function log"),
+        other => exec_err!("Unsupported data type {other:?} for function log"),
     }
 }
 
@@ -589,7 +588,7 @@ pub fn cot(args: &[ArrayRef]) -> Result<ArrayRef> {
             { compute_cot32 }
         )) as ArrayRef),
 
-        other => internal_err!("Unsupported data type {other:?} for function cot"),
+        other => exec_err!("Unsupported data type {other:?} for function cot"),
     }
 }
 
@@ -606,7 +605,7 @@ fn compute_cot64(x: f64) -> f64 {
 /// Truncate(numeric, decimalPrecision) and trunc(numeric) SQL function
 pub fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 1 && args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "truncate function requires one or two arguments, got {}",
             args.len()
         );
@@ -635,7 +634,7 @@ pub fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
                 Int64Array,
                 { compute_truncate64 }
             )) as ArrayRef),
-            _ => internal_err!("trunc function requires a scalar or array for precision"),
+            _ => exec_err!("trunc function requires a scalar or array for precision"),
         },
         DataType::Float32 => match precision {
             ColumnarValue::Scalar(Int64(Some(0))) => Ok(Arc::new(
@@ -650,9 +649,9 @@ pub fn trunc(args: &[ArrayRef]) -> Result<ArrayRef> {
                 Int64Array,
                 { compute_truncate32 }
             )) as ArrayRef),
-            _ => internal_err!("trunc function requires a scalar or array for precision"),
+            _ => exec_err!("trunc function requires a scalar or array for precision"),
         },
-        other => internal_err!("Unsupported data type {other:?} for function trunc"),
+        other => exec_err!("Unsupported data type {other:?} for function trunc"),
     }
 }
 
@@ -668,13 +667,14 @@ fn compute_truncate64(x: f64, y: i64) -> f64 {
 
 #[cfg(test)]
 mod tests {
-
-    use super::*;
     use arrow::array::{Float64Array, NullArray};
+
     use datafusion_common::cast::{
         as_boolean_array, as_float32_array, as_float64_array, as_int64_array,
     };
 
+    use super::*;
+
     #[test]
     fn test_random_expression() {
         let args = vec![ColumnarValue::Array(Arc::new(NullArray::new(1)))];
diff --git a/datafusion/physical-expr/src/regex_expressions.rs b/datafusion/physical-expr/src/regex_expressions.rs
index b1334854ba0b..846e5801af1c 100644
--- a/datafusion/physical-expr/src/regex_expressions.rs
+++ b/datafusion/physical-expr/src/regex_expressions.rs
@@ -21,19 +21,18 @@
 
 //! Regex expressions
 
+use std::sync::{Arc, OnceLock};
+
 use arrow::array::{
     new_null_array, Array, ArrayDataBuilder, ArrayRef, BufferBuilder, GenericStringArray,
     OffsetSizeTrait,
 };
+use hashbrown::HashMap;
+use regex::Regex;
 
 use datafusion_common::{arrow_datafusion_err, exec_err, plan_err};
-use datafusion_common::{
-    cast::as_generic_string_array, internal_err, DataFusionError, Result,
-};
+use datafusion_common::{cast::as_generic_string_array, DataFusionError, Result};
 use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation};
-use hashbrown::HashMap;
-use regex::Regex;
-use std::sync::{Arc, OnceLock};
 
 use crate::functions::{
     make_scalar_function_inner, make_scalar_function_with_hints, Hint,
@@ -188,7 +187,7 @@ pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             arrow_string::regexp::regexp_match(values, regex, Some(flags))
                 .map_err(|e| arrow_datafusion_err!(e))
         }
-        other => internal_err!(
+        other => exec_err!(
             "regexp_match was called with {other} arguments. It requires at least 2 and at most 3."
         ),
     }
@@ -341,7 +340,7 @@ pub fn regexp_replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef>
 
             Ok(Arc::new(result) as ArrayRef)
         }
-        other => internal_err!(
+        other => exec_err!(
             "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
         ),
     }
@@ -374,7 +373,7 @@ fn _regexp_replace_static_pattern_replace<T: OffsetSizeTrait>(
         3 => None,
         4 => Some(fetch_string_arg!(&args[3], "flags", T, _regexp_replace_early_abort)),
         other => {
-            return internal_err!(
+            return exec_err!(
                 "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4."
             )
         }
diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs
index 34a436ebe3cd..6a4a29763e4b 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -21,6 +21,12 @@
 
 //! String expressions
 
+use std::sync::Arc;
+use std::{
+    fmt::{Display, Formatter},
+    iter,
+};
+
 use arrow::{
     array::{
         Array, ArrayRef, GenericStringArray, Int32Array, Int64Array, OffsetSizeTrait,
@@ -28,6 +34,8 @@ use arrow::{
     },
     datatypes::{ArrowNativeType, ArrowPrimitiveType, DataType},
 };
+use uuid::Uuid;
+
 use datafusion_common::utils::datafusion_strsim;
 use datafusion_common::{
     cast::{
@@ -35,14 +43,8 @@ use datafusion_common::{
     },
     exec_err, ScalarValue,
 };
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
-use std::sync::Arc;
-use std::{
-    fmt::{Display, Formatter},
-    iter,
-};
-use uuid::Uuid;
 
 /// applies a unary expression to `args[0]` that is expected to be downcastable to
 /// a `GenericStringArray` and returns a `GenericStringArray` (which may have a different offset)
@@ -62,7 +64,7 @@ where
     F: Fn(&'a str) -> R,
 {
     if args.len() != 1 {
-        return internal_err!(
+        return exec_err!(
             "{:?} args were supplied but {} takes exactly one argument",
             args.len(),
             name
@@ -102,7 +104,7 @@ where
                     &[a.as_ref()], op, name
                 )?)))
             }
-            other => internal_err!("Unsupported data type {other:?} for function {name}"),
+            other => exec_err!("Unsupported data type {other:?} for function {name}"),
         },
         ColumnarValue::Scalar(scalar) => match scalar {
             ScalarValue::Utf8(a) => {
@@ -113,7 +115,7 @@ where
                 let result = a.as_ref().map(|x| (op)(x).as_ref().to_string());
                 Ok(ColumnarValue::Scalar(ScalarValue::LargeUtf8(result)))
             }
-            other => internal_err!("Unsupported data type {other:?} for function {name}"),
+            other => exec_err!("Unsupported data type {other:?} for function {name}"),
         },
     }
 }
@@ -170,7 +172,7 @@ pub fn chr(args: &[ArrayRef]) -> Result<ArrayRef> {
 pub fn concat(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     // do not accept 0 arguments.
     if args.is_empty() {
-        return internal_err!(
+        return exec_err!(
             "concat was called with {} arguments. It requires at least 1.",
             args.len()
         );
@@ -236,7 +238,7 @@ pub fn concat_ws(args: &[ArrayRef]) -> Result<ArrayRef> {
 
     // do not accept 0 or 1 arguments.
     if args.len() < 2 {
-        return internal_err!(
+        return exec_err!(
             "concat_ws was called with {} arguments. It requires at least 2.",
             args.len()
         );
@@ -333,7 +335,7 @@ pub fn instr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
-            internal_err!(
+            exec_err!(
                 "instr was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
             )
         }
@@ -410,7 +412,7 @@ fn general_trim<T: OffsetSizeTrait>(
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
-            internal_err!(
+            exec_err!(
             "{trim_type} was called with {other} arguments. It requires at least 1 and at most 2."
         )
         }
@@ -541,7 +543,7 @@ where
                 } else if let Some(value_isize) = value.to_isize() {
                     Ok(Some(format!("{value_isize:x}")))
                 } else {
-                    internal_err!("Unsupported data type {integer:?} for function to_hex")
+                    exec_err!("Unsupported data type {integer:?} for function to_hex")
                 }
             } else {
                 Ok(None)
@@ -563,7 +565,7 @@ pub fn upper(args: &[ColumnarValue]) -> Result<ColumnarValue> {
 pub fn uuid(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     let len: usize = match &args[0] {
         ColumnarValue::Array(array) => array.len(),
-        _ => return internal_err!("Expect uuid function to take no param"),
+        _ => return exec_err!("Expect uuid function to take no param"),
     };
 
     let values = iter::repeat_with(|| Uuid::new_v4().to_string()).take(len);
@@ -654,9 +656,7 @@ pub fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
-            internal_err!(
-                "overlay was called with {other} arguments. It requires 3 or 4."
-            )
+            exec_err!("overlay was called with {other} arguments. It requires 3 or 4.")
         }
     }
 }
@@ -665,10 +665,10 @@ pub fn overlay<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 /// LEVENSHTEIN('kitten', 'sitting') = 3
 pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 2 {
-        return Err(DataFusionError::Internal(format!(
+        return exec_err!(
             "levenshtein function requires two arguments, got {}",
             args.len()
-        )));
+        );
     }
     let str1_array = as_generic_string_array::<T>(&args[0])?;
     let str2_array = as_generic_string_array::<T>(&args[1])?;
@@ -700,7 +700,7 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
-            internal_err!(
+            exec_err!(
                 "levenshtein was called with {other} datatype arguments. It requires Utf8 or LargeUtf8."
             )
         }
@@ -709,12 +709,13 @@ pub fn levenshtein<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
 #[cfg(test)]
 mod tests {
-
-    use crate::string_expressions;
     use arrow::{array::Int32Array, datatypes::Int32Type};
     use arrow_array::Int64Array;
+
     use datafusion_common::cast::as_int32_array;
 
+    use crate::string_expressions;
+
     use super::*;
 
     #[test]
diff --git a/datafusion/physical-expr/src/unicode_expressions.rs b/datafusion/physical-expr/src/unicode_expressions.rs
index 240efe4223c3..3209a6176fad 100644
--- a/datafusion/physical-expr/src/unicode_expressions.rs
+++ b/datafusion/physical-expr/src/unicode_expressions.rs
@@ -21,18 +21,20 @@
 
 //! Unicode expressions
 
+use std::cmp::{max, Ordering};
+use std::sync::Arc;
+
 use arrow::{
     array::{ArrayRef, GenericStringArray, OffsetSizeTrait, PrimitiveArray},
     datatypes::{ArrowNativeType, ArrowPrimitiveType},
 };
+use hashbrown::HashMap;
+use unicode_segmentation::UnicodeSegmentation;
+
 use datafusion_common::{
     cast::{as_generic_string_array, as_int64_array},
-    exec_err, internal_err, DataFusionError, Result,
+    exec_err, DataFusionError, Result,
 };
-use hashbrown::HashMap;
-use std::cmp::{max, Ordering};
-use std::sync::Arc;
-use unicode_segmentation::UnicodeSegmentation;
 
 /// Returns number of characters in the string.
 /// character_length('josé') = 4
@@ -312,7 +314,7 @@ pub fn rpad<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 
             Ok(Arc::new(result) as ArrayRef)
         }
-        other => internal_err!(
+        other => exec_err!(
             "rpad was called with {other} arguments. It requires at least 2 and at most 3."
         ),
     }
@@ -407,7 +409,7 @@ pub fn substr<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
             Ok(Arc::new(result) as ArrayRef)
         }
         other => {
-            internal_err!("substr was called with {other} arguments. It requires 2 or 3.")
+            exec_err!("substr was called with {other} arguments. It requires 2 or 3.")
         }
     }
 }
@@ -463,7 +465,7 @@ pub fn translate<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
 /// SUBSTRING_INDEX('www.apache.org', '.', -1) = org
 pub fn substr_index<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
     if args.len() != 3 {
-        return internal_err!(
+        return exec_err!(
             "substr_index was called with {} arguments. It requires 3.",
             args.len()
         );
@@ -528,7 +530,7 @@ where
     T::Native: OffsetSizeTrait,
 {
     if args.len() != 2 {
-        return internal_err!(
+        return exec_err!(
             "find_in_set was called with {} arguments. It requires 2.",
             args.len()
         );

From eced5bc002b1a0442f55e43751973292f88d17d9 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Tue, 27 Feb 2024 19:04:09 +0300
Subject: [PATCH 29/45] Address reviews

---
 .../examples/custom_datasource.rs             | 12 ++--
 .../datasource/physical_plan/arrow_file.rs    | 14 ++--
 .../core/src/datasource/physical_plan/avro.rs | 14 ++--
 .../core/src/datasource/physical_plan/csv.rs  | 14 ++--
 .../core/src/datasource/physical_plan/json.rs | 14 ++--
 .../datasource/physical_plan/parquet/mod.rs   | 16 ++---
 .../enforce_distribution.rs                   | 12 ++--
 .../physical_optimizer/output_requirements.rs | 12 ++--
 datafusion/core/src/physical_planner.rs       | 12 ++--
 datafusion/core/src/test/mod.rs               | 12 ++--
 datafusion/core/src/test_util/mod.rs          | 14 ++--
 datafusion/core/tests/custom_sources.rs       | 12 ++--
 .../provider_filter_pushdown.rs               | 12 ++--
 .../tests/custom_sources_cases/statistics.rs  | 12 ++--
 .../tests/user_defined/user_defined_plan.rs   | 12 ++--
 .../physical-plan/src/aggregates/mod.rs       | 24 +++----
 datafusion/physical-plan/src/analyze.rs       | 14 ++--
 .../physical-plan/src/coalesce_batches.rs     | 12 ++--
 .../physical-plan/src/coalesce_partitions.rs  | 12 ++--
 datafusion/physical-plan/src/display.rs       |  4 +-
 datafusion/physical-plan/src/empty.rs         | 12 ++--
 datafusion/physical-plan/src/explain.rs       | 12 ++--
 datafusion/physical-plan/src/filter.rs        | 15 +++--
 datafusion/physical-plan/src/insert.rs        | 10 +--
 .../physical-plan/src/joins/cross_join.rs     | 18 ++---
 .../physical-plan/src/joins/hash_join.rs      | 18 ++---
 .../src/joins/nested_loop_join.rs             | 19 +++---
 .../src/joins/sort_merge_join.rs              | 19 +++---
 .../src/joins/symmetric_hash_join.rs          | 19 +++---
 datafusion/physical-plan/src/lib.rs           | 20 +++---
 datafusion/physical-plan/src/limit.rs         | 22 +++---
 datafusion/physical-plan/src/memory.rs        | 14 ++--
 .../physical-plan/src/placeholder_row.rs      | 16 ++---
 datafusion/physical-plan/src/projection.rs    | 16 ++---
 .../physical-plan/src/recursive_query.rs      | 12 ++--
 .../physical-plan/src/repartition/mod.rs      | 17 +++--
 .../physical-plan/src/sorts/partial_sort.rs   | 14 ++--
 datafusion/physical-plan/src/sorts/sort.rs    | 15 ++---
 .../src/sorts/sort_preserving_merge.rs        | 12 ++--
 datafusion/physical-plan/src/streaming.rs     | 14 ++--
 datafusion/physical-plan/src/test/exec.rs     | 67 ++++++++++---------
 datafusion/physical-plan/src/union.rs         | 30 ++++-----
 datafusion/physical-plan/src/unnest.rs        | 14 ++--
 datafusion/physical-plan/src/values.rs        | 12 ++--
 .../src/windows/bounded_window_agg_exec.rs    | 14 ++--
 .../src/windows/window_agg_exec.rs            | 14 ++--
 datafusion/physical-plan/src/work_table.rs    | 14 ++--
 47 files changed, 369 insertions(+), 370 deletions(-)

diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_datasource.rs
index 1ce3ced0e1c4..0b7e3d4c6442 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_datasource.rs
@@ -31,7 +31,7 @@ use datafusion::execution::context::{SessionState, TaskContext};
 use datafusion::physical_plan::memory::MemoryStream;
 use datafusion::physical_plan::{
     project_schema, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
-    Partitioning, PlanPropertiesCache, SendableRecordBatchStream,
+    Partitioning, PlanProperties, SendableRecordBatchStream,
 };
 use datafusion::prelude::*;
 use datafusion_expr::{Expr, LogicalPlanBuilder};
@@ -190,7 +190,7 @@ impl TableProvider for CustomDataSource {
 struct CustomExec {
     db: CustomDataSource,
     projected_schema: SchemaRef,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CustomExec {
@@ -200,7 +200,7 @@ impl CustomExec {
         db: CustomDataSource,
     ) -> Self {
         let projected_schema = project_schema(&schema, projections).unwrap();
-        let cache = Self::create_cache(projected_schema.clone());
+        let cache = Self::compute_properties(projected_schema.clone());
         Self {
             db,
             projected_schema,
@@ -209,9 +209,9 @@ impl CustomExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -230,7 +230,7 @@ impl ExecutionPlan for CustomExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 8eebc2b68f8b..8f010f1dcbf8 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -37,7 +37,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::Statistics;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
-use datafusion_physical_plan::{ExecutionMode, PlanPropertiesCache};
+use datafusion_physical_plan::{ExecutionMode, PlanProperties};
 
 use futures::StreamExt;
 use itertools::Itertools;
@@ -53,7 +53,7 @@ pub struct ArrowExec {
     projected_output_ordering: Vec<LexOrdering>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl ArrowExec {
@@ -61,7 +61,7 @@ impl ArrowExec {
     pub fn new(base_config: FileScanConfig) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema.clone(),
             &projected_output_ordering,
             &base_config,
@@ -85,16 +85,16 @@ impl ArrowExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         projected_output_ordering: &[LexOrdering],
         file_scan_config: &FileScanConfig,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties =
             EquivalenceProperties::new_with_orderings(schema, projected_output_ordering);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Self::output_partitioning_helper(file_scan_config), // Output Partitioning
             ExecutionMode::Bounded,                             // Execution Mode
@@ -126,7 +126,7 @@ impl ExecutionPlan for ArrowExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 9d65a0ce089a..2b913d862576 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -25,7 +25,7 @@ use crate::error::Result;
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::datatypes::SchemaRef;
@@ -42,7 +42,7 @@ pub struct AvroExec {
     projected_output_ordering: Vec<LexOrdering>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl AvroExec {
@@ -50,7 +50,7 @@ impl AvroExec {
     pub fn new(base_config: FileScanConfig) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema.clone(),
             &projected_output_ordering,
             &base_config,
@@ -70,16 +70,16 @@ impl AvroExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         file_scan_config: &FileScanConfig,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
         let n_partitions = file_scan_config.file_groups.len();
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(n_partitions), // Output Partitioning
             ExecutionMode::Bounded,                          // Execution Mode
@@ -103,7 +103,7 @@ impl ExecutionPlan for AvroExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 964f40b8e002..a509121a82c8 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -33,7 +33,7 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::csv;
@@ -61,7 +61,7 @@ pub struct CsvExec {
     metrics: ExecutionPlanMetricsSet,
     /// Compression type of the file associated with CsvExec
     pub file_compression_type: FileCompressionType,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CsvExec {
@@ -76,7 +76,7 @@ impl CsvExec {
     ) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema,
             &projected_output_ordering,
             &base_config,
@@ -122,15 +122,15 @@ impl CsvExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         file_scan_config: &FileScanConfig,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Self::output_partitioning_helper(file_scan_config), // Output Partitioning
             ExecutionMode::Bounded,                             // Execution Mode
@@ -164,7 +164,7 @@ impl ExecutionPlan for CsvExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index b27bcdaa917c..7b0e84c4410b 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -33,7 +33,7 @@ use crate::error::{DataFusionError, Result};
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::json::ReaderBuilder;
@@ -55,7 +55,7 @@ pub struct NdJsonExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     file_compression_type: FileCompressionType,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl NdJsonExec {
@@ -66,7 +66,7 @@ impl NdJsonExec {
     ) -> Self {
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema,
             &projected_output_ordering,
             &base_config,
@@ -90,15 +90,15 @@ impl NdJsonExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         file_scan_config: &FileScanConfig,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Self::output_partitioning_helper(file_scan_config), // Output Partitioning
             ExecutionMode::Bounded,                             // Execution Mode
@@ -129,7 +129,7 @@ impl ExecutionPlan for NdJsonExec {
     fn as_any(&self) -> &dyn Any {
         self
     }
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index e2ad1980d422..300ced3a7657 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -38,8 +38,8 @@ use crate::{
     physical_optimizer::pruning::PruningPredicate,
     physical_plan::{
         metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet},
-        DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-        PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+        DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties,
+        SendableRecordBatchStream, Statistics,
     },
 };
 
@@ -100,7 +100,7 @@ pub struct ParquetExec {
     metadata_size_hint: Option<usize>,
     /// Optional user defined parquet file reader factory
     parquet_file_reader_factory: Option<Arc<dyn ParquetFileReaderFactory>>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl ParquetExec {
@@ -148,7 +148,7 @@ impl ParquetExec {
 
         let (projected_schema, projected_statistics, projected_output_ordering) =
             base_config.project();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema,
             &projected_output_ordering,
             &base_config,
@@ -267,15 +267,15 @@ impl ParquetExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         file_config: &FileScanConfig,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Self::output_partitioning_helper(file_config), // Output Partitioning
             ExecutionMode::Bounded,                        // Execution Mode
@@ -335,7 +335,7 @@ impl ExecutionPlan for ParquetExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index 86a490278b0e..c7ffc7838b36 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -1331,7 +1331,7 @@ pub(crate) mod tests {
         expressions, expressions::binary, expressions::lit, expressions::Column,
         LexOrdering, PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement,
     };
-    use datafusion_physical_plan::PlanPropertiesCache;
+    use datafusion_physical_plan::PlanProperties;
 
     /// Models operators like BoundedWindowExec that require an input
     /// ordering but is easy to construct
@@ -1339,7 +1339,7 @@ pub(crate) mod tests {
     struct SortRequiredExec {
         input: Arc<dyn ExecutionPlan>,
         expr: LexOrdering,
-        cache: PlanPropertiesCache,
+        cache: PlanProperties,
     }
 
     impl SortRequiredExec {
@@ -1352,7 +1352,7 @@ pub(crate) mod tests {
             input: Arc<dyn ExecutionPlan>,
             requirement: Vec<PhysicalSortExpr>,
         ) -> Self {
-            let cache = Self::create_cache(&input);
+            let cache = Self::compute_properties(&input);
             Self {
                 input,
                 expr: requirement,
@@ -1361,8 +1361,8 @@ pub(crate) mod tests {
         }
 
         /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-        fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
-            PlanPropertiesCache::new(
+        fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+            PlanProperties::new(
                 input.equivalence_properties().clone(), // Equivalence Properties
                 input.output_partitioning().clone(),    // Output Partitioning
                 input.execution_mode(),                 // Execution Mode
@@ -1389,7 +1389,7 @@ pub(crate) mod tests {
             self
         }
 
-        fn cache(&self) -> &PlanPropertiesCache {
+        fn properties(&self) -> &PlanProperties {
             &self.cache
         }
 
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index 129ae538808f..992a6e7f82c0 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::{Distribution, LexRequirement, PhysicalSortRequirement};
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion_physical_plan::PlanPropertiesCache;
+use datafusion_physical_plan::PlanProperties;
 
 /// This rule either adds or removes [`OutputRequirements`]s to/from the physical
 /// plan according to its `mode` attribute, which is set by the constructors
@@ -90,7 +90,7 @@ pub(crate) struct OutputRequirementExec {
     input: Arc<dyn ExecutionPlan>,
     order_requirement: Option<LexRequirement>,
     dist_requirement: Distribution,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl OutputRequirementExec {
@@ -99,7 +99,7 @@ impl OutputRequirementExec {
         requirements: Option<LexRequirement>,
         dist_requirement: Distribution,
     ) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         Self {
             input,
             order_requirement: requirements,
@@ -113,8 +113,8 @@ impl OutputRequirementExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
-        PlanPropertiesCache::new(
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+        PlanProperties::new(
             input.equivalence_properties().clone(), // Equivalence Properties
             input.output_partitioning().clone(),    // Output Partitioning
             input.execution_mode(),                 // Execution Mode
@@ -137,7 +137,7 @@ impl ExecutionPlan for OutputRequirementExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 8049c3940a1a..7e858953ac8f 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -1995,7 +1995,7 @@ mod tests {
     use crate::datasource::MemTable;
     use crate::physical_plan::{
         expressions, DisplayAs, DisplayFormatType, ExecutionMode, Partitioning,
-        PlanPropertiesCache, SendableRecordBatchStream,
+        PlanProperties, SendableRecordBatchStream,
     };
     use crate::physical_planner::PhysicalPlanner;
     use crate::prelude::{SessionConfig, SessionContext};
@@ -2575,19 +2575,19 @@ mod tests {
 
     #[derive(Debug)]
     struct NoOpExecutionPlan {
-        cache: PlanPropertiesCache,
+        cache: PlanProperties,
     }
 
     impl NoOpExecutionPlan {
         fn new(schema: SchemaRef) -> Self {
-            let cache = Self::create_cache(schema.clone());
+            let cache = Self::compute_properties(schema.clone());
             Self { cache }
         }
 
         /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-        fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        fn compute_properties(schema: SchemaRef) -> PlanProperties {
             let eq_properties = EquivalenceProperties::new(schema);
-            PlanPropertiesCache::new(
+            PlanProperties::new(
                 eq_properties,
                 // Output Partitioning
                 Partitioning::UnknownPartitioning(1),
@@ -2613,7 +2613,7 @@ mod tests {
             self
         }
 
-        fn cache(&self) -> &PlanPropertiesCache {
+        fn properties(&self) -> &PlanProperties {
             &self.cache
         }
 
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index e5d8f6ebda32..0042554f6c73 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -45,7 +45,7 @@ use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalSortExpr};
 use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec};
 use datafusion_physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionMode, PlanPropertiesCache,
+    DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties,
 };
 
 #[cfg(feature = "compression")]
@@ -367,7 +367,7 @@ pub fn csv_exec_ordered(
 pub struct StatisticsExec {
     stats: Statistics,
     schema: Arc<Schema>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl StatisticsExec {
@@ -376,7 +376,7 @@ impl StatisticsExec {
             stats.column_statistics.len(), schema.fields().len(),
             "if defined, the column statistics vector length should be the number of fields"
         );
-        let cache = Self::create_cache(Arc::new(schema.clone()));
+        let cache = Self::compute_properties(Arc::new(schema.clone()));
         Self {
             stats,
             schema: Arc::new(schema),
@@ -385,9 +385,9 @@ impl StatisticsExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             // Output Partitioning
             Partitioning::UnknownPartitioning(2),
@@ -421,7 +421,7 @@ impl ExecutionPlan for StatisticsExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/src/test_util/mod.rs b/datafusion/core/src/test_util/mod.rs
index 55a30b07d893..3244ad49d1c6 100644
--- a/datafusion/core/src/test_util/mod.rs
+++ b/datafusion/core/src/test_util/mod.rs
@@ -38,7 +38,7 @@ use crate::execution::context::{SessionState, TaskContext};
 use crate::logical_expr::{LogicalPlanBuilder, UNNAMED_TABLE};
 use crate::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream,
 };
 use crate::prelude::{CsvReadOptions, SessionContext};
 
@@ -227,7 +227,7 @@ impl TableProvider for TestTableProvider {
 pub struct UnboundedExec {
     batch_produce: Option<usize>,
     batch: RecordBatch,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 impl UnboundedExec {
     /// Create new exec that clones the given record batch to its output.
@@ -238,7 +238,7 @@ impl UnboundedExec {
         batch: RecordBatch,
         partitions: usize,
     ) -> Self {
-        let cache = Self::create_cache(batch.schema(), batch_produce, partitions);
+        let cache = Self::compute_properties(batch.schema(), batch_produce, partitions);
         Self {
             batch_produce,
             batch,
@@ -247,18 +247,18 @@ impl UnboundedExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         batch_produce: Option<usize>,
         n_partitions: usize,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
         let mode = if batch_produce.is_none() {
             ExecutionMode::Unbounded
         } else {
             ExecutionMode::Bounded
         };
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(n_partitions),
             mode,
@@ -289,7 +289,7 @@ impl ExecutionPlan for UnboundedExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/tests/custom_sources.rs b/datafusion/core/tests/custom_sources.rs
index f62a3f723ad7..aa3f35e29541 100644
--- a/datafusion/core/tests/custom_sources.rs
+++ b/datafusion/core/tests/custom_sources.rs
@@ -40,7 +40,7 @@ use datafusion_common::project_schema;
 use datafusion_common::stats::Precision;
 use datafusion_physical_expr::EquivalenceProperties;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion_physical_plan::{ExecutionMode, PlanPropertiesCache};
+use datafusion_physical_plan::{ExecutionMode, PlanProperties};
 
 use async_trait::async_trait;
 use futures::stream::Stream;
@@ -74,7 +74,7 @@ struct CustomTableProvider;
 #[derive(Debug, Clone)]
 struct CustomExecutionPlan {
     projection: Option<Vec<usize>>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CustomExecutionPlan {
@@ -82,14 +82,14 @@ impl CustomExecutionPlan {
         let schema = TEST_CUSTOM_SCHEMA_REF!();
         let schema =
             project_schema(&schema, projection.as_ref()).expect("projected schema");
-        let cache = Self::create_cache(schema);
+        let cache = Self::compute_properties(schema);
         Self { projection, cache }
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             // Output Partitioning
             Partitioning::UnknownPartitioning(1),
@@ -144,7 +144,7 @@ impl ExecutionPlan for CustomExecutionPlan {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index dec2deb10cbb..9f6c44d4603f 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -28,7 +28,7 @@ use datafusion::logical_expr::{Expr, TableProviderFilterPushDown};
 use datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{
     DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 use datafusion::prelude::*;
 use datafusion::scalar::ScalarValue;
@@ -58,19 +58,19 @@ fn create_batch(value: i32, num_rows: usize) -> Result<RecordBatch> {
 #[derive(Debug)]
 struct CustomPlan {
     batches: Vec<RecordBatch>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CustomPlan {
     fn new(schema: SchemaRef, batches: Vec<RecordBatch>) -> Self {
-        let cache = Self::create_cache(schema);
+        let cache = Self::compute_properties(schema);
         Self { batches, cache }
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -97,7 +97,7 @@ impl ExecutionPlan for CustomPlan {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/tests/custom_sources_cases/statistics.rs b/datafusion/core/tests/custom_sources_cases/statistics.rs
index e98781aae9bf..85ac47dc97fc 100644
--- a/datafusion/core/tests/custom_sources_cases/statistics.rs
+++ b/datafusion/core/tests/custom_sources_cases/statistics.rs
@@ -27,7 +27,7 @@ use datafusion::{
     logical_expr::Expr,
     physical_plan::{
         ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan,
-        Partitioning, PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+        Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
     },
     prelude::SessionContext,
     scalar::ScalarValue,
@@ -43,7 +43,7 @@ use async_trait::async_trait;
 struct StatisticsValidation {
     stats: Statistics,
     schema: Arc<Schema>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl StatisticsValidation {
@@ -53,7 +53,7 @@ impl StatisticsValidation {
             schema.fields().len(),
             "the column statistics vector length should be the number of fields"
         );
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         Self {
             stats,
             schema,
@@ -62,10 +62,10 @@ impl StatisticsValidation {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(2),
             ExecutionMode::Bounded,
@@ -149,7 +149,7 @@ impl ExecutionPlan for StatisticsValidation {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index 5f01334a4757..2c12e108bb47 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -84,7 +84,7 @@ use datafusion::{
     physical_expr::EquivalenceProperties,
     physical_plan::{
         DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan,
-        Partitioning, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
+        Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
         Statistics,
     },
     physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner},
@@ -412,20 +412,20 @@ struct TopKExec {
     input: Arc<dyn ExecutionPlan>,
     /// The maxium number of values
     k: usize,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl TopKExec {
     fn new(input: Arc<dyn ExecutionPlan>, k: usize) -> Self {
-        let cache = Self::create_cache(input.schema());
+        let cache = Self::compute_properties(input.schema());
         Self { input, k, cache }
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -460,7 +460,7 @@ impl ExecutionPlan for TopKExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 35e42b8a4d36..98d41cca6764 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -20,7 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use super::{DisplayAs, ExecutionMode, PlanPropertiesCache};
+use super::{DisplayAs, ExecutionMode, PlanProperties};
 use crate::aggregates::{
     no_grouping::AggregateStream, row_hash::GroupedHashAggregateStream,
     topk_stream::GroupedTopKAggregateStream,
@@ -265,7 +265,7 @@ pub struct AggregateExec {
     required_input_ordering: Option<LexRequirement>,
     /// Describes how the input is ordered relative to the group by columns
     input_order_mode: InputOrderMode,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl AggregateExec {
@@ -362,7 +362,7 @@ impl AggregateExec {
         let required_input_ordering =
             (!new_requirement.is_empty()).then_some(new_requirement);
 
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             &input,
             schema.clone(),
             &projection_mapping,
@@ -507,13 +507,13 @@ impl AggregateExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
         projection_mapping: &ProjectionMapping,
         mode: &AggregateMode,
         input_order_mode: &InputOrderMode,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Construct equivalence properties:
         let eq_properties = input
             .equivalence_properties()
@@ -550,7 +550,7 @@ impl AggregateExec {
             exec_mode = ExecutionMode::PipelineBreaking;
         }
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, exec_mode)
+        PlanProperties::new(eq_properties, output_partitioning, exec_mode)
     }
 
     pub fn input_order_mode(&self) -> &InputOrderMode {
@@ -641,7 +641,7 @@ impl ExecutionPlan for AggregateExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -1620,20 +1620,20 @@ mod tests {
     struct TestYieldingExec {
         /// True if this exec should yield back to runtime the first time it is polled
         pub yield_first: bool,
-        cache: PlanPropertiesCache,
+        cache: PlanProperties,
     }
 
     impl TestYieldingExec {
         fn new(yield_first: bool) -> Self {
             let schema = some_data().0;
-            let cache = Self::create_cache(schema);
+            let cache = Self::compute_properties(schema);
             Self { yield_first, cache }
         }
 
         /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-        fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+        fn compute_properties(schema: SchemaRef) -> PlanProperties {
             let eq_properties = EquivalenceProperties::new(schema);
-            PlanPropertiesCache::new(
+            PlanProperties::new(
                 eq_properties,
                 // Output Partitioning
                 Partitioning::UnknownPartitioning(1),
@@ -1662,7 +1662,7 @@ mod tests {
             self
         }
 
-        fn cache(&self) -> &PlanPropertiesCache {
+        fn properties(&self) -> &PlanProperties {
             &self.cache
         }
 
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index 94dd6ff3bd28..fed4b97d2afb 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use std::{any::Any, time::Instant};
 
 use super::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
-use super::{DisplayAs, Distribution, PlanPropertiesCache, SendableRecordBatchStream};
+use super::{DisplayAs, Distribution, PlanProperties, SendableRecordBatchStream};
 
 use crate::display::DisplayableExecutionPlan;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
@@ -45,7 +45,7 @@ pub struct AnalyzeExec {
     pub(crate) input: Arc<dyn ExecutionPlan>,
     /// The output schema for RecordBatches of this exec node
     schema: SchemaRef,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl AnalyzeExec {
@@ -56,7 +56,7 @@ impl AnalyzeExec {
         input: Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
     ) -> Self {
-        let cache = Self::create_cache(&input, schema.clone());
+        let cache = Self::compute_properties(&input, schema.clone());
         AnalyzeExec {
             verbose,
             show_statistics,
@@ -82,14 +82,14 @@ impl AnalyzeExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
         let output_partitioning = Partitioning::UnknownPartitioning(1);
         let exec_mode = input.execution_mode();
-        PlanPropertiesCache::new(eq_properties, output_partitioning, exec_mode)
+        PlanProperties::new(eq_properties, output_partitioning, exec_mode)
     }
 }
 
@@ -113,7 +113,7 @@ impl ExecutionPlan for AnalyzeExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index bce48698a558..055f16288f95 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use super::{DisplayAs, PlanPropertiesCache, Statistics};
+use super::{DisplayAs, PlanProperties, Statistics};
 use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
@@ -48,13 +48,13 @@ pub struct CoalesceBatchesExec {
     target_batch_size: usize,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CoalesceBatchesExec {
     /// Create a new CoalesceBatchesExec
     pub fn new(input: Arc<dyn ExecutionPlan>, target_batch_size: usize) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         Self {
             input,
             target_batch_size,
@@ -74,10 +74,10 @@ impl CoalesceBatchesExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
         // The coalesce batches operator does not make any changes to the
         // partitioning of its input.
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             input.equivalence_properties().clone(), // Equivalence Properties
             input.output_partitioning().clone(),    // Output Partitioning
             input.execution_mode(),                 // Execution Mode
@@ -109,7 +109,7 @@ impl ExecutionPlan for CoalesceBatchesExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index ad1094cee0e1..7037445164a3 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::stream::{ObservedStream, RecordBatchReceiverStream};
-use super::{DisplayAs, PlanPropertiesCache, SendableRecordBatchStream, Statistics};
+use super::{DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics};
 
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
@@ -38,13 +38,13 @@ pub struct CoalescePartitionsExec {
     input: Arc<dyn ExecutionPlan>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CoalescePartitionsExec {
     /// Create a new CoalescePartitionsExec
     pub fn new(input: Arc<dyn ExecutionPlan>) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         CoalescePartitionsExec {
             input,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -58,12 +58,12 @@ impl CoalescePartitionsExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
         // Coalescing partitions loses existing orderings:
         let mut eq_properties = input.equivalence_properties().clone();
         eq_properties.clear_orderings();
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,                        // Equivalence Properties
             Partitioning::UnknownPartitioning(1), // Output Partitioning
             input.execution_mode(),               // Execution Mode
@@ -91,7 +91,7 @@ impl ExecutionPlan for CoalescePartitionsExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index d4afca6a1acf..38c23331983e 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -466,7 +466,7 @@ mod tests {
     use std::sync::Arc;
 
     use super::DisplayableExecutionPlan;
-    use crate::{DisplayAs, ExecutionPlan, PlanPropertiesCache};
+    use crate::{DisplayAs, ExecutionPlan, PlanProperties};
 
     use datafusion_common::{DataFusionError, Result, Statistics};
     use datafusion_execution::{SendableRecordBatchStream, TaskContext};
@@ -493,7 +493,7 @@ mod tests {
             self
         }
 
-        fn cache(&self) -> &PlanPropertiesCache {
+        fn properties(&self) -> &PlanProperties {
             unimplemented!()
         }
 
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index 0705c4b4eca7..b6708515ec6b 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use super::{
-    common, DisplayAs, ExecutionMode, PlanPropertiesCache, SendableRecordBatchStream,
+    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
     Statistics,
 };
 use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning};
@@ -41,13 +41,13 @@ pub struct EmptyExec {
     schema: SchemaRef,
     /// Number of partitions
     partitions: usize,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl EmptyExec {
     /// Create a new EmptyExec
     pub fn new(schema: SchemaRef) -> Self {
-        let cache = Self::create_cache(schema.clone(), 1);
+        let cache = Self::compute_properties(schema.clone(), 1);
         EmptyExec {
             schema,
             partitions: 1,
@@ -73,10 +73,10 @@ impl EmptyExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
         let output_partitioning = Self::output_partitioning_helper(n_partitions);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             // Output Partitioning
             output_partitioning,
@@ -106,7 +106,7 @@ impl ExecutionPlan for EmptyExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index 200ba0bd07c5..f63b1df29da5 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -20,7 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use super::{DisplayAs, ExecutionMode, PlanPropertiesCache, SendableRecordBatchStream};
+use super::{DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream};
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
@@ -43,7 +43,7 @@ pub struct ExplainExec {
     stringified_plans: Vec<StringifiedPlan>,
     /// control which plans to print
     verbose: bool,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl ExplainExec {
@@ -53,7 +53,7 @@ impl ExplainExec {
         stringified_plans: Vec<StringifiedPlan>,
         verbose: bool,
     ) -> Self {
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         ExplainExec {
             schema,
             stringified_plans,
@@ -73,9 +73,9 @@ impl ExplainExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -103,7 +103,7 @@ impl ExecutionPlan for ExplainExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 86502039c8ba..95c09f541cc2 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::{
-    ColumnStatistics, DisplayAs, PlanPropertiesCache, RecordBatchStream,
+    ColumnStatistics, DisplayAs, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
 use crate::{
@@ -62,7 +62,7 @@ pub struct FilterExec {
     metrics: ExecutionPlanMetricsSet,
     /// Selectivity for statistics. 0 = no rows, 100 all rows
     default_selectivity: u8,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl FilterExec {
@@ -74,7 +74,8 @@ impl FilterExec {
         match predicate.data_type(input.schema().as_ref())? {
             DataType::Boolean => {
                 let default_selectivity = 20;
-                let cache = Self::create_cache(&input, &predicate, default_selectivity)?;
+                let cache =
+                    Self::compute_properties(&input, &predicate, default_selectivity)?;
                 Ok(Self {
                     predicate,
                     input: input.clone(),
@@ -159,11 +160,11 @@ impl FilterExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         predicate: &Arc<dyn PhysicalExpr>,
         default_selectivity: u8,
-    ) -> Result<PlanPropertiesCache> {
+    ) -> Result<PlanProperties> {
         // Combine the equal predicates with the input equivalence properties
         // to construct the equivalence properties:
         let stats = Self::statistics_helper(input, predicate, default_selectivity)?;
@@ -182,7 +183,7 @@ impl FilterExec {
             .map(|column| Arc::new(column) as _);
         eq_properties = eq_properties.add_constants(constants);
 
-        Ok(PlanPropertiesCache::new(
+        Ok(PlanProperties::new(
             eq_properties,
             input.output_partitioning().clone(), // Output Partitioning
             input.execution_mode(),              // Execution Mode
@@ -210,7 +211,7 @@ impl ExecutionPlan for FilterExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 472c65f25b30..fd0bec108e03 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -23,7 +23,7 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use super::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanPropertiesCache,
+    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
     SendableRecordBatchStream,
 };
 use crate::metrics::MetricsSet;
@@ -88,7 +88,7 @@ pub struct FileSinkExec {
     count_schema: SchemaRef,
     /// Optional required sort order for output data.
     sort_order: Option<Vec<PhysicalSortRequirement>>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl fmt::Debug for FileSinkExec {
@@ -180,9 +180,9 @@ impl FileSinkExec {
     fn create_schema(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             input.execution_mode(),
@@ -211,7 +211,7 @@ impl ExecutionPlan for FileSinkExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 3f6b6ba5e2b6..f73f3d36cdb4 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -27,8 +27,8 @@ use crate::coalesce_batches::concat_batches;
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
-    exec_mode_flatten, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionMode, ExecutionPlan, PlanPropertiesCache, RecordBatchStream,
+    execution_mode_from_children, ColumnStatistics, DisplayAs, DisplayFormatType,
+    Distribution, ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
 
@@ -61,7 +61,7 @@ pub struct CrossJoinExec {
     left_fut: OnceAsync<JoinLeftData>,
     /// Execution plan metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl CrossJoinExec {
@@ -77,7 +77,7 @@ impl CrossJoinExec {
         };
 
         let schema = Arc::new(Schema::new(all_columns));
-        let cache = Self::create_cache(&left, &right, schema.clone());
+        let cache = Self::compute_properties(&left, &right, schema.clone());
         CrossJoinExec {
             left,
             right,
@@ -99,11 +99,11 @@ impl CrossJoinExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties
         // TODO: Check equivalence properties of cross join, it may preserve
         //       ordering in some cases.
@@ -126,13 +126,13 @@ impl CrossJoinExec {
         );
 
         // Determine the execution mode:
-        let mut mode = exec_mode_flatten([left, right]);
+        let mut mode = execution_mode_from_children([left, right]);
         if mode.is_unbounded() {
             // If any of the inputs is unbounded, cross join breaks the pipeline.
             mode = ExecutionMode::PipelineBreaking;
         }
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -197,7 +197,7 @@ impl ExecutionPlan for CrossJoinExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 4b010e8c60c6..2fdb2a17ebe8 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -29,7 +29,7 @@ use super::{
 };
 use crate::{
     coalesce_partitions::CoalescePartitionsExec,
-    exec_mode_flatten, handle_state,
+    execution_mode_from_children, handle_state,
     hash_utils::create_hashes,
     joins::utils::{
         adjust_indices_by_join_type, adjust_right_output_partitioning,
@@ -41,7 +41,7 @@ use crate::{
     },
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     DisplayAs, DisplayFormatType, Distribution, ExecutionMode, ExecutionPlan,
-    Partitioning, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
+    Partitioning, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
     Statistics,
 };
 
@@ -297,7 +297,7 @@ pub struct HashJoinExec {
     /// matched and thus will not appear in the output.
     pub null_equals_null: bool,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl HashJoinExec {
@@ -327,7 +327,7 @@ impl HashJoinExec {
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
 
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             &left,
             &right,
             Arc::new(schema.clone()),
@@ -406,14 +406,14 @@ impl HashJoinExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
         join_type: JoinType,
         on: JoinOnRef,
         mode: PartitionMode,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
             left.equivalence_properties().clone(),
@@ -470,10 +470,10 @@ impl HashJoinExec {
         let mode = if pipeline_breaking {
             ExecutionMode::PipelineBreaking
         } else {
-            exec_mode_flatten([left, right])
+            execution_mode_from_children([left, right])
         };
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -506,7 +506,7 @@ impl ExecutionPlan for HashJoinExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index bbfc4c12f548..5d2175d4a820 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -34,8 +34,9 @@ use crate::joins::utils::{
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
-    exec_mode_flatten, DisplayAs, DisplayFormatType, Distribution, ExecutionMode,
-    ExecutionPlan, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
+    execution_mode_from_children, DisplayAs, DisplayFormatType, Distribution,
+    ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream,
 };
 
 use arrow::array::{
@@ -93,7 +94,7 @@ pub struct NestedLoopJoinExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl NestedLoopJoinExec {
@@ -110,7 +111,7 @@ impl NestedLoopJoinExec {
         let (schema, column_indices) =
             build_join_schema(&left_schema, &right_schema, join_type);
         let schema = Arc::new(schema);
-        let cache = Self::create_cache(&left, &right, schema.clone(), *join_type);
+        let cache = Self::compute_properties(&left, &right, schema.clone(), *join_type);
         Ok(NestedLoopJoinExec {
             left,
             right,
@@ -145,12 +146,12 @@ impl NestedLoopJoinExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
         join_type: JoinType,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
             left.equivalence_properties().clone(),
@@ -176,12 +177,12 @@ impl NestedLoopJoinExec {
         };
 
         // Determine execution mode:
-        let mut mode = exec_mode_flatten([left, right]);
+        let mut mode = execution_mode_from_children([left, right]);
         if mode.is_unbounded() {
             mode = ExecutionMode::PipelineBreaking;
         }
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -208,7 +209,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index 20bae468cc4a..bde831b731ba 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -37,8 +37,8 @@ use crate::joins::utils::{
 };
 use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use crate::{
-    exec_mode_flatten, metrics, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionPlan, PhysicalExpr, PlanPropertiesCache, RecordBatchStream,
+    execution_mode_from_children, metrics, DisplayAs, DisplayFormatType, Distribution,
+    ExecutionPlan, PhysicalExpr, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
 
@@ -84,7 +84,7 @@ pub struct SortMergeJoinExec {
     /// If null_equals_null is true, null == null else null != null
     pub null_equals_null: bool,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl SortMergeJoinExec {
@@ -137,7 +137,8 @@ impl SortMergeJoinExec {
 
         let schema =
             Arc::new(build_join_schema(&left_schema, &right_schema, &join_type).0);
-        let cache = Self::create_cache(&left, &right, schema.clone(), join_type, &on);
+        let cache =
+            Self::compute_properties(&left, &right, schema.clone(), join_type, &on);
         Ok(Self {
             left,
             right,
@@ -201,13 +202,13 @@ impl SortMergeJoinExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
         join_type: JoinType,
         join_on: JoinOnRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
             left.equivalence_properties().clone(),
@@ -229,9 +230,9 @@ impl SortMergeJoinExec {
         );
 
         // Determine execution mode:
-        let mode = exec_mode_flatten([left, right]);
+        let mode = execution_mode_from_children([left, right]);
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -265,7 +266,7 @@ impl ExecutionPlan for SortMergeJoinExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 3eff026a176f..77871a8b5483 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -46,11 +46,11 @@ use crate::joins::utils::{
     JoinHashMapType, JoinOn, JoinOnRef, StatefulStreamResult,
 };
 use crate::{
-    exec_mode_flatten,
+    execution_mode_from_children,
     expressions::PhysicalSortExpr,
     joins::StreamJoinPartitionMode,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
-    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanPropertiesCache,
+    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties,
     RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
@@ -192,7 +192,7 @@ pub struct SymmetricHashJoinExec {
     /// Partition Mode
     mode: StreamJoinPartitionMode,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl SymmetricHashJoinExec {
@@ -234,7 +234,8 @@ impl SymmetricHashJoinExec {
         // Initialize the random state for the join operation:
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let schema = Arc::new(schema);
-        let cache = Self::create_cache(&left, &right, schema.clone(), *join_type, &on);
+        let cache =
+            Self::compute_properties(&left, &right, schema.clone(), *join_type, &on);
         Ok(SymmetricHashJoinExec {
             left,
             right,
@@ -253,13 +254,13 @@ impl SymmetricHashJoinExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         left: &Arc<dyn ExecutionPlan>,
         right: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
         join_type: JoinType,
         join_on: JoinOnRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = join_equivalence_properties(
             left.equivalence_properties().clone(),
@@ -282,9 +283,9 @@ impl SymmetricHashJoinExec {
         );
 
         // Determine execution mode:
-        let mode = exec_mode_flatten([left, right]);
+        let mode = execution_mode_from_children([left, right]);
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 
     /// left stream
@@ -390,7 +391,7 @@ impl ExecutionPlan for SymmetricHashJoinExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index f90bbf061d38..3145c999d6b5 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -122,22 +122,22 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
 
     /// Get the schema for this execution plan
     fn schema(&self) -> SchemaRef {
-        self.cache().schema().clone()
+        self.properties().schema().clone()
     }
 
-    fn cache(&self) -> &PlanPropertiesCache;
+    fn properties(&self) -> &PlanProperties;
 
     /// Specifies how the output of this `ExecutionPlan` is split into
     /// partitions.
     fn output_partitioning(&self) -> &Partitioning {
-        &self.cache().partitioning
+        &self.properties().partitioning
     }
 
     /// Specifies whether this plan generates an infinite stream of records.
     /// If the plan does not support pipelining, but its input(s) are
     /// infinite, returns an error to indicate this.
     fn execution_mode(&self) -> ExecutionMode {
-        self.cache().exec_mode
+        self.properties().exec_mode
     }
 
     /// If the output of this `ExecutionPlan` within each partition is sorted,
@@ -151,7 +151,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     /// It is safe to return `None` here if your `ExecutionPlan` does not
     /// have any particular output order here
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.cache().output_ordering.as_deref()
+        self.properties().output_ordering.as_deref()
     }
 
     /// Specifies the data distribution requirements for all the
@@ -230,7 +230,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     /// See also [`Self::maintains_input_order`] and [`Self::output_ordering`]
     /// for related concepts.
     fn equivalence_properties(&self) -> &EquivalenceProperties {
-        &self.cache().eq_properties
+        &self.properties().eq_properties
     }
 
     /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
@@ -482,7 +482,7 @@ impl ExecutionMode {
 }
 
 /// Conservatively "combines" execution modes of a given collection of operators.
-fn exec_mode_flatten<'a>(
+fn execution_mode_from_children<'a>(
     children: impl IntoIterator<Item = &'a Arc<dyn ExecutionPlan>>,
 ) -> ExecutionMode {
     let mut result = ExecutionMode::Bounded;
@@ -506,12 +506,12 @@ fn exec_mode_flatten<'a>(
     result
 }
 
-/// Represents a cache for plan properties used in query optimization.
+/// Stores the plan properties used in query optimization.
 ///
 /// This struct holds various properties useful for the query planning, which are used
 /// during optimization and execution phases.
 #[derive(Debug, Clone)]
-pub struct PlanPropertiesCache {
+pub struct PlanProperties {
     /// Stores the [`EquivalenceProperties`] of the [`ExecutionPlan`].
     pub eq_properties: EquivalenceProperties,
     /// Stores the output [`Partitioning`] of the [`ExecutionPlan`].
@@ -523,7 +523,7 @@ pub struct PlanPropertiesCache {
     output_ordering: Option<LexOrdering>,
 }
 
-impl PlanPropertiesCache {
+impl PlanProperties {
     /// Construct a new `PlanPropertiesCache` from the
     pub fn new(
         eq_properties: EquivalenceProperties,
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index e678360dd471..a0b49d4ef136 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -24,7 +24,7 @@ use std::task::{Context, Poll};
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{
-    DisplayAs, ExecutionMode, PlanPropertiesCache, RecordBatchStream,
+    DisplayAs, ExecutionMode, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
 use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning};
@@ -51,13 +51,13 @@ pub struct GlobalLimitExec {
     fetch: Option<usize>,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl GlobalLimitExec {
     /// Create a new GlobalLimitExec
     pub fn new(input: Arc<dyn ExecutionPlan>, skip: usize, fetch: Option<usize>) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         GlobalLimitExec {
             input,
             skip,
@@ -83,8 +83,8 @@ impl GlobalLimitExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
-        PlanPropertiesCache::new(
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+        PlanProperties::new(
             input.equivalence_properties().clone(), // Equivalence Properties
             Partitioning::UnknownPartitioning(1),   // Output Partitioning
             ExecutionMode::Bounded,                 // Execution Mode
@@ -117,7 +117,7 @@ impl ExecutionPlan for GlobalLimitExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -268,13 +268,13 @@ pub struct LocalLimitExec {
     fetch: usize,
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl LocalLimitExec {
     /// Create a new LocalLimitExec partition
     pub fn new(input: Arc<dyn ExecutionPlan>, fetch: usize) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         Self {
             input,
             fetch,
@@ -294,8 +294,8 @@ impl LocalLimitExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
-        PlanPropertiesCache::new(
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+        PlanProperties::new(
             input.equivalence_properties().clone(), // Equivalence Properties
             input.output_partitioning().clone(),    // Output Partitioning
             ExecutionMode::Bounded,                 // Execution Mode
@@ -323,7 +323,7 @@ impl ExecutionPlan for LocalLimitExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 8bd4db0bd418..23699295e121 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -25,7 +25,7 @@ use std::task::{Context, Poll};
 use super::expressions::PhysicalSortExpr;
 use super::{
     common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::datatypes::SchemaRef;
@@ -48,7 +48,7 @@ pub struct MemoryExec {
     projection: Option<Vec<usize>>,
     // Sort information: one or more equivalent orderings
     sort_information: Vec<LexOrdering>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl fmt::Debug for MemoryExec {
@@ -101,7 +101,7 @@ impl ExecutionPlan for MemoryExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -153,7 +153,7 @@ impl MemoryExec {
         projection: Option<Vec<usize>>,
     ) -> Result<Self> {
         let projected_schema = project_schema(&schema, projection.as_ref())?;
-        let cache = Self::create_cache(projected_schema.clone(), &[], partitions);
+        let cache = Self::compute_properties(projected_schema.clone(), &[], partitions);
         Ok(Self {
             partitions: partitions.to_vec(),
             schema,
@@ -205,13 +205,13 @@ impl MemoryExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         partitions: &[Vec<RecordBatch>],
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,                                       // Equivalence Properties
             Partitioning::UnknownPartitioning(partitions.len()), // Output Partitioning
             ExecutionMode::Bounded,                              // Execution Mode
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index 3280522e152c..3fc9a512e79e 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use super::{
-    common, DisplayAs, ExecutionMode, PlanPropertiesCache, SendableRecordBatchStream,
+    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
     Statistics,
 };
 use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning};
@@ -43,14 +43,14 @@ pub struct PlaceholderRowExec {
     schema: SchemaRef,
     /// Number of partitions
     partitions: usize,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl PlaceholderRowExec {
     /// Create a new PlaceholderRowExec
     pub fn new(schema: SchemaRef) -> Self {
         let partitions = 1;
-        let cache = Self::create_cache(schema.clone(), partitions);
+        let cache = Self::compute_properties(schema.clone(), partitions);
         PlaceholderRowExec {
             schema,
             partitions,
@@ -95,16 +95,12 @@ impl PlaceholderRowExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
         // Get output partitioning:
         let output_partitioning = Self::output_partitioning_helper(n_partitions);
 
-        PlanPropertiesCache::new(
-            eq_properties,
-            output_partitioning,
-            ExecutionMode::Bounded,
-        )
+        PlanProperties::new(eq_properties, output_partitioning, ExecutionMode::Bounded)
     }
 }
 
@@ -128,7 +124,7 @@ impl ExecutionPlan for PlaceholderRowExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 2ed8095f256c..402feeaf80ba 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -29,8 +29,7 @@ use std::task::{Context, Poll};
 use super::expressions::Column;
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{
-    DisplayAs, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
-    Statistics,
+    DisplayAs, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 use crate::{
     ColumnStatistics, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
@@ -59,7 +58,7 @@ pub struct ProjectionExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl ProjectionExec {
@@ -93,7 +92,8 @@ impl ProjectionExec {
 
         // construct a map from the input expressions to the output expression of the Projection
         let projection_mapping = ProjectionMapping::try_new(&expr, &input_schema)?;
-        let cache = Self::create_cache(&input, &projection_mapping, schema.clone())?;
+        let cache =
+            Self::compute_properties(&input, &projection_mapping, schema.clone())?;
         Ok(Self {
             expr,
             schema,
@@ -114,11 +114,11 @@ impl ProjectionExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         projection_mapping: &ProjectionMapping,
         schema: SchemaRef,
-    ) -> Result<PlanPropertiesCache> {
+    ) -> Result<PlanProperties> {
         // Calculate equivalence properties:
         let mut input_eq_properties = input.equivalence_properties().clone();
         input_eq_properties.substitute_oeq_class(projection_mapping)?;
@@ -143,7 +143,7 @@ impl ProjectionExec {
             input_partition.clone()
         };
 
-        Ok(PlanPropertiesCache::new(
+        Ok(PlanProperties::new(
             eq_properties,
             output_partitioning,
             input.execution_mode(),
@@ -184,7 +184,7 @@ impl ExecutionPlan for ProjectionExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index fd0d506e2ce4..9786b1cbf6fd 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -24,7 +24,7 @@ use std::task::{Context, Poll};
 use super::{
     metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
     work_table::{WorkTable, WorkTableExec},
-    PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan};
 
@@ -67,7 +67,7 @@ pub struct RecursiveQueryExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl RecursiveQueryExec {
@@ -82,7 +82,7 @@ impl RecursiveQueryExec {
         let work_table = Arc::new(WorkTable::new());
         // Use the same work table for both the WorkTableExec and the recursive term
         let recursive_term = assign_work_table(recursive_term, work_table.clone())?;
-        let cache = Self::create_cache(static_term.schema());
+        let cache = Self::compute_properties(static_term.schema());
         Ok(RecursiveQueryExec {
             name,
             static_term,
@@ -95,10 +95,10 @@ impl RecursiveQueryExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -111,7 +111,7 @@ impl ExecutionPlan for RecursiveQueryExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index b9489bd12e64..1b92a0b03e49 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -34,9 +34,7 @@ use crate::repartition::distributor_channels::{
     channels, partition_aware_channels, DistributionReceiver, DistributionSender,
 };
 use crate::sorts::streaming_merge;
-use crate::{
-    DisplayFormatType, ExecutionPlan, Partitioning, PlanPropertiesCache, Statistics,
-};
+use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics};
 
 use arrow::array::{ArrayRef, UInt64Builder};
 use arrow::datatypes::SchemaRef;
@@ -305,7 +303,7 @@ pub struct RepartitionExec {
     /// `SortPreservingRepartitionExec`, false means `RepartitionExec`.
     preserve_order: bool,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 #[derive(Debug, Clone)]
@@ -411,7 +409,7 @@ impl ExecutionPlan for RepartitionExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -598,7 +596,8 @@ impl RepartitionExec {
         partitioning: Partitioning,
     ) -> Result<Self> {
         let preserve_order = false;
-        let cache = Self::create_cache(&input, partitioning.clone(), preserve_order);
+        let cache =
+            Self::compute_properties(&input, partitioning.clone(), preserve_order);
         Ok(RepartitionExec {
             input,
             partitioning,
@@ -634,15 +633,15 @@ impl RepartitionExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         partitioning: Partitioning,
         preserve_order: bool,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Equivalence Properties
         let eq_properties = Self::eq_properties_helper(input, preserve_order);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,          // Equivalence Properties
             partitioning,           // Output Partitioning
             input.execution_mode(), // Execution Mode
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 095245a706ea..01e592d29f7b 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -62,7 +62,7 @@ use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::sort::sort_batch;
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::compute::concat_batches;
@@ -94,7 +94,7 @@ pub struct PartialSortExec {
     /// Fetch highest/lowest n results
     fetch: Option<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl PartialSortExec {
@@ -106,7 +106,7 @@ impl PartialSortExec {
     ) -> Self {
         assert!(common_prefix_length > 0);
         let preserve_partitioning = false;
-        let cache = Self::create_cache(&input, expr.clone(), preserve_partitioning);
+        let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning);
         Self {
             input,
             expr,
@@ -181,11 +181,11 @@ impl PartialSortExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         sort_exprs: LexOrdering,
         preserve_partitioning: bool,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties; i.e. reset the ordering equivalence
         // class with the new ordering:
         let eq_properties = input
@@ -200,7 +200,7 @@ impl PartialSortExec {
         // Determine execution mode:
         let mode = input.execution_mode();
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -230,7 +230,7 @@ impl ExecutionPlan for PartialSortExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index 713ff86a5072..7109d730f1dd 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -37,8 +37,7 @@ use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
 use crate::topk::TopK;
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionMode,
-    ExecutionPlan, Partitioning, PlanPropertiesCache, SendableRecordBatchStream,
-    Statistics,
+    ExecutionPlan, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::compute::{concat_batches, lexsort_to_indices, take};
@@ -678,7 +677,7 @@ pub struct SortExec {
     /// Fetch highest/lowest n results
     fetch: Option<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl SortExec {
@@ -696,7 +695,7 @@ impl SortExec {
     /// sorted output partition.
     pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
         let preserve_partitioning = false;
-        let cache = Self::create_cache(&input, expr.clone(), preserve_partitioning);
+        let cache = Self::compute_properties(&input, expr.clone(), preserve_partitioning);
         Self {
             expr,
             input,
@@ -787,11 +786,11 @@ impl SortExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         sort_exprs: LexOrdering,
         preserve_partitioning: bool,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties; i.e. reset the ordering equivalence
         // class with the new ordering:
         let eq_properties = input
@@ -811,7 +810,7 @@ impl SortExec {
             ExecutionMode::Bounded => ExecutionMode::Bounded,
         };
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -840,7 +839,7 @@ impl ExecutionPlan for SortExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 16bdecd0f384..862146e10549 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -26,7 +26,7 @@ use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::streaming_merge;
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, SendableRecordBatchStream, Statistics,
+    PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use datafusion_common::{internal_err, DataFusionError, Result};
@@ -74,13 +74,13 @@ pub struct SortPreservingMergeExec {
     /// Optional number of rows to fetch. Stops producing rows after this fetch
     fetch: Option<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl SortPreservingMergeExec {
     /// Create a new sort execution plan
     pub fn new(expr: Vec<PhysicalSortExpr>, input: Arc<dyn ExecutionPlan>) -> Self {
-        let cache = Self::create_cache(&input);
+        let cache = Self::compute_properties(&input);
         Self {
             input,
             expr,
@@ -111,8 +111,8 @@ impl SortPreservingMergeExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(input: &Arc<dyn ExecutionPlan>) -> PlanPropertiesCache {
-        PlanPropertiesCache::new(
+    fn compute_properties(input: &Arc<dyn ExecutionPlan>) -> PlanProperties {
+        PlanProperties::new(
             input.equivalence_properties().clone(), // Equivalence Properties
             Partitioning::UnknownPartitioning(1),   // Output Partitioning
             input.execution_mode(),                 // Execution Mode
@@ -149,7 +149,7 @@ impl ExecutionPlan for SortPreservingMergeExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index 90e8600d78e5..123588c34dff 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -20,7 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use super::{DisplayAs, DisplayFormatType, ExecutionMode, PlanPropertiesCache};
+use super::{DisplayAs, DisplayFormatType, ExecutionMode, PlanProperties};
 use crate::display::{display_orderings, ProjectSchemaDisplay};
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
@@ -58,7 +58,7 @@ pub struct StreamingTableExec {
     projected_schema: SchemaRef,
     projected_output_ordering: Vec<LexOrdering>,
     infinite: bool,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl StreamingTableExec {
@@ -87,7 +87,7 @@ impl StreamingTableExec {
         };
         let projected_output_ordering =
             projected_output_ordering.into_iter().collect::<Vec<_>>();
-        let cache = Self::create_cache(
+        let cache = Self::compute_properties(
             projected_schema.clone(),
             &projected_output_ordering,
             &partitions,
@@ -128,12 +128,12 @@ impl StreamingTableExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         orderings: &[LexOrdering],
         partitions: &[Arc<dyn PartitionStream>],
         is_infinite: bool,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = EquivalenceProperties::new_with_orderings(schema, orderings);
 
@@ -147,7 +147,7 @@ impl StreamingTableExec {
             ExecutionMode::Bounded
         };
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -195,7 +195,7 @@ impl ExecutionPlan for StreamingTableExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index a70e05809923..23df3753e817 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -27,7 +27,7 @@ use std::{
 use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
 use crate::{
     common, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
@@ -121,7 +121,7 @@ pub struct MockExec {
     /// if true (the default), sends data using a separate task to to ensure the
     /// batches are not available without this stream yielding first
     use_task: bool,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl MockExec {
@@ -133,7 +133,7 @@ impl MockExec {
     /// ensure any poll loops are correct. This behavior can be
     /// changed with `with_use_task`
     pub fn new(data: Vec<Result<RecordBatch>>, schema: SchemaRef) -> Self {
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         Self {
             data,
             schema,
@@ -151,10 +151,10 @@ impl MockExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -181,7 +181,7 @@ impl ExecutionPlan for MockExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -282,7 +282,7 @@ pub struct BarrierExec {
 
     /// all streams wait on this barrier to produce
     barrier: Arc<Barrier>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl BarrierExec {
@@ -290,7 +290,7 @@ impl BarrierExec {
     pub fn new(data: Vec<Vec<RecordBatch>>, schema: SchemaRef) -> Self {
         // wait for all streams and the input
         let barrier = Arc::new(Barrier::new(data.len() + 1));
-        let cache = Self::create_cache(schema.clone(), &data);
+        let cache = Self::compute_properties(schema.clone(), &data);
         Self {
             data,
             schema,
@@ -307,9 +307,12 @@ impl BarrierExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef, data: &[Vec<RecordBatch>]) -> PlanPropertiesCache {
+    fn compute_properties(
+        schema: SchemaRef,
+        data: &[Vec<RecordBatch>],
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(data.len()),
             ExecutionMode::Bounded,
@@ -336,7 +339,7 @@ impl ExecutionPlan for BarrierExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -394,7 +397,7 @@ impl ExecutionPlan for BarrierExec {
 /// A mock execution plan that errors on a call to execute
 #[derive(Debug)]
 pub struct ErrorExec {
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl Default for ErrorExec {
@@ -410,15 +413,15 @@ impl ErrorExec {
             DataType::Int64,
             true,
         )]));
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         Self { cache }
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -445,7 +448,7 @@ impl ExecutionPlan for ErrorExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -475,7 +478,7 @@ impl ExecutionPlan for ErrorExec {
 pub struct StatisticsExec {
     stats: Statistics,
     schema: Arc<Schema>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 impl StatisticsExec {
     pub fn new(stats: Statistics, schema: Schema) -> Self {
@@ -484,7 +487,7 @@ impl StatisticsExec {
                 .column_statistics.len(), schema.fields().len(),
             "if defined, the column statistics vector length should be the number of fields"
         );
-        let cache = Self::create_cache(Arc::new(schema.clone()));
+        let cache = Self::compute_properties(Arc::new(schema.clone()));
         Self {
             stats,
             schema: Arc::new(schema),
@@ -493,10 +496,10 @@ impl StatisticsExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(2),
             ExecutionMode::Bounded,
@@ -528,7 +531,7 @@ impl ExecutionPlan for StatisticsExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -566,13 +569,13 @@ pub struct BlockingExec {
 
     /// Ref-counting helper to check if the plan and the produced stream are still in memory.
     refs: Arc<()>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl BlockingExec {
     /// Create new [`BlockingExec`] with a give schema and number of partitions.
     pub fn new(schema: SchemaRef, n_partitions: usize) -> Self {
-        let cache = Self::create_cache(schema.clone(), n_partitions);
+        let cache = Self::compute_properties(schema.clone(), n_partitions);
         Self {
             schema,
             refs: Default::default(),
@@ -590,10 +593,10 @@ impl BlockingExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef, n_partitions: usize) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef, n_partitions: usize) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(n_partitions),
             ExecutionMode::Bounded,
@@ -620,7 +623,7 @@ impl ExecutionPlan for BlockingExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -704,7 +707,7 @@ pub struct PanicExec {
     /// Number of output partitions. Each partition will produce this
     /// many empty output record batches prior to panicing
     batches_until_panics: Vec<usize>,
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl PanicExec {
@@ -712,7 +715,7 @@ impl PanicExec {
     /// partitions, which will each panic immediately.
     pub fn new(schema: SchemaRef, n_partitions: usize) -> Self {
         let batches_until_panics = vec![0; n_partitions];
-        let cache = Self::create_cache(schema.clone(), &batches_until_panics);
+        let cache = Self::compute_properties(schema.clone(), &batches_until_panics);
         Self {
             schema,
             batches_until_panics,
@@ -727,14 +730,14 @@ impl PanicExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         batches_until_panics: &[usize],
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
         let num_partitions = batches_until_panics.len();
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(num_partitions),
             ExecutionMode::Bounded,
@@ -761,7 +764,7 @@ impl ExecutionPlan for PanicExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 4d5377a9bdcc..a533f2249588 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -27,10 +27,10 @@ use std::task::{Context, Poll};
 use std::{any::Any, sync::Arc};
 
 use super::{
-    exec_mode_flatten,
+    execution_mode_from_children,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
-    PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 use crate::metrics::BaselineMetrics;
 use crate::stream::ObservedStream;
@@ -91,14 +91,14 @@ pub struct UnionExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl UnionExec {
     /// Create a new UnionExec
     pub fn new(inputs: Vec<Arc<dyn ExecutionPlan>>) -> Self {
         let schema = union_schema(&inputs);
-        let cache = Self::create_cache(&inputs, schema);
+        let cache = Self::compute_properties(&inputs, schema);
         UnionExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -112,10 +112,10 @@ impl UnionExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         inputs: &[Arc<dyn ExecutionPlan>],
         schema: SchemaRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         // TODO: In some cases, we should be able to preserve some equivalence
         //       classes and constants. Add support for such cases.
@@ -161,9 +161,9 @@ impl UnionExec {
         let output_partitioning = Partitioning::UnknownPartitioning(num_partitions);
 
         // Determine execution mode:
-        let mode = exec_mode_flatten(inputs.iter());
+        let mode = execution_mode_from_children(inputs.iter());
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -187,7 +187,7 @@ impl ExecutionPlan for UnionExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
@@ -317,7 +317,7 @@ pub struct InterleaveExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl InterleaveExec {
@@ -328,7 +328,7 @@ impl InterleaveExec {
                 "Not all InterleaveExec children have a consistent hash partitioning"
             );
         }
-        let cache = Self::create_cache(&inputs);
+        let cache = Self::compute_properties(&inputs);
         Ok(InterleaveExec {
             inputs,
             metrics: ExecutionPlanMetricsSet::new(),
@@ -342,15 +342,15 @@ impl InterleaveExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(inputs: &[Arc<dyn ExecutionPlan>]) -> PlanPropertiesCache {
+    fn compute_properties(inputs: &[Arc<dyn ExecutionPlan>]) -> PlanProperties {
         let schema = union_schema(inputs);
         let eq_properties = EquivalenceProperties::new(schema);
         // Get output partitioning:
         let output_partitioning = inputs[0].output_partitioning().clone();
         // Determine execution mode:
-        let mode = exec_mode_flatten(inputs.iter());
+        let mode = execution_mode_from_children(inputs.iter());
 
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -374,7 +374,7 @@ impl ExecutionPlan for InterleaveExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index d727091fd1c3..72fcbfd2ffb9 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -20,7 +20,7 @@
 use std::{any::Any, sync::Arc};
 
 use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
-use super::{DisplayAs, PlanPropertiesCache};
+use super::{DisplayAs, PlanProperties};
 use crate::{
     expressions::Column, DisplayFormatType, Distribution, ExecutionPlan, PhysicalExpr,
     RecordBatchStream, SendableRecordBatchStream,
@@ -60,7 +60,7 @@ pub struct UnnestExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl UnnestExec {
@@ -71,7 +71,7 @@ impl UnnestExec {
         schema: SchemaRef,
         options: UnnestOptions,
     ) -> Self {
-        let cache = Self::create_cache(&input, schema.clone());
+        let cache = Self::compute_properties(&input, schema.clone());
         UnnestExec {
             input,
             schema,
@@ -83,13 +83,13 @@ impl UnnestExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         schema: SchemaRef,
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             input.output_partitioning().clone(),
             input.execution_mode(),
@@ -116,7 +116,7 @@ impl ExecutionPlan for UnnestExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index f31272879279..1e535b43d7c0 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use super::{
-    common, DisplayAs, ExecutionMode, PlanPropertiesCache, SendableRecordBatchStream,
+    common, DisplayAs, ExecutionMode, PlanProperties, SendableRecordBatchStream,
     Statistics,
 };
 use crate::{
@@ -43,7 +43,7 @@ pub struct ValuesExec {
     /// The data
     data: Vec<RecordBatch>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl ValuesExec {
@@ -114,7 +114,7 @@ impl ValuesExec {
             }
         }
 
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         Ok(ValuesExec {
             schema,
             data: batches,
@@ -128,10 +128,10 @@ impl ValuesExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -159,7 +159,7 @@ impl ExecutionPlan for ValuesExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index d7579cdc041d..eb4a27341785 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -35,7 +35,7 @@ use crate::windows::{
 };
 use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan,
-    InputOrderMode, PlanPropertiesCache, RecordBatchStream, SendableRecordBatchStream,
+    InputOrderMode, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
     Statistics, WindowExpr,
 };
 
@@ -90,7 +90,7 @@ pub struct BoundedWindowAggExec {
     // See `get_ordered_partition_by_indices` for more details.
     ordered_partition_by_indices: Vec<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl BoundedWindowAggExec {
@@ -121,7 +121,7 @@ impl BoundedWindowAggExec {
                 vec![]
             }
         };
-        let cache = Self::create_cache(&input, &schema, &window_expr);
+        let cache = Self::compute_properties(&input, &schema, &window_expr);
         Ok(Self {
             input,
             window_expr,
@@ -183,11 +183,11 @@ impl BoundedWindowAggExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         input: &Arc<dyn ExecutionPlan>,
         schema: &SchemaRef,
         window_expr: &[Arc<dyn WindowExpr>],
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = window_equivalence_properties(schema, input, window_expr);
 
@@ -197,7 +197,7 @@ impl BoundedWindowAggExec {
         let output_partitioning = input.output_partitioning().clone();
 
         // Construct properties cache
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,          // Equivalence Properties
             output_partitioning,    // Output Partitioning
             input.execution_mode(), // Execution Mode
@@ -240,7 +240,7 @@ impl ExecutionPlan for BoundedWindowAggExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index f143d228f381..efc84526c9ad 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -31,7 +31,7 @@ use crate::windows::{
 };
 use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionMode,
-    ExecutionPlan, PhysicalExpr, PlanPropertiesCache, RecordBatchStream,
+    ExecutionPlan, PhysicalExpr, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics, WindowExpr,
 };
 
@@ -65,7 +65,7 @@ pub struct WindowAggExec {
     // see `get_ordered_partition_by_indices` for more details.
     ordered_partition_by_indices: Vec<usize>,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl WindowAggExec {
@@ -80,7 +80,7 @@ impl WindowAggExec {
 
         let ordered_partition_by_indices =
             get_ordered_partition_by_indices(window_expr[0].partition_by(), &input);
-        let cache = Self::create_cache(schema.clone(), &input, &window_expr);
+        let cache = Self::compute_properties(schema.clone(), &input, &window_expr);
         Ok(Self {
             input,
             window_expr,
@@ -117,11 +117,11 @@ impl WindowAggExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(
+    fn compute_properties(
         schema: SchemaRef,
         input: &Arc<dyn ExecutionPlan>,
         window_expr: &[Arc<dyn WindowExpr>],
-    ) -> PlanPropertiesCache {
+    ) -> PlanProperties {
         // Calculate equivalence properties:
         let eq_properties = window_equivalence_properties(&schema, input, window_expr);
 
@@ -139,7 +139,7 @@ impl WindowAggExec {
         };
 
         // Construct properties cache:
-        PlanPropertiesCache::new(eq_properties, output_partitioning, mode)
+        PlanProperties::new(eq_properties, output_partitioning, mode)
     }
 }
 
@@ -177,7 +177,7 @@ impl ExecutionPlan for WindowAggExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index 44a42a4fcf92..9acabf2447eb 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -25,9 +25,7 @@ use super::{
     SendableRecordBatchStream, Statistics,
 };
 use crate::memory::MemoryStream;
-use crate::{
-    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanPropertiesCache,
-};
+use crate::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties};
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
@@ -85,13 +83,13 @@ pub struct WorkTableExec {
     /// Execution metrics
     metrics: ExecutionPlanMetricsSet,
     /// Cache holding plan properties like equivalences, output partitioning etc.
-    cache: PlanPropertiesCache,
+    cache: PlanProperties,
 }
 
 impl WorkTableExec {
     /// Create a new execution plan for a worktable exec.
     pub fn new(name: String, schema: SchemaRef) -> Self {
-        let cache = Self::create_cache(schema.clone());
+        let cache = Self::compute_properties(schema.clone());
         Self {
             name,
             schema,
@@ -112,10 +110,10 @@ impl WorkTableExec {
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
-    fn create_cache(schema: SchemaRef) -> PlanPropertiesCache {
+    fn compute_properties(schema: SchemaRef) -> PlanProperties {
         let eq_properties = EquivalenceProperties::new(schema);
 
-        PlanPropertiesCache::new(
+        PlanProperties::new(
             eq_properties,
             Partitioning::UnknownPartitioning(1),
             ExecutionMode::Bounded,
@@ -142,7 +140,7 @@ impl ExecutionPlan for WorkTableExec {
         self
     }
 
-    fn cache(&self) -> &PlanPropertiesCache {
+    fn properties(&self) -> &PlanProperties {
         &self.cache
     }
 

From a0128449970fa4cefa659b110ea5de8123ed5f8b Mon Sep 17 00:00:00 2001
From: Mustafa Akur <106137913+mustafasrepo@users.noreply.github.com>
Date: Tue, 27 Feb 2024 19:09:54 +0300
Subject: [PATCH 30/45] Update datafusion/physical-plan/src/lib.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/physical-plan/src/lib.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index d18b8f238859..cebe3a8837ad 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -460,6 +460,9 @@ pub enum ExecutionMode {
     /// Represents the mode where generated stream is unbounded, e.g. infinite.
     /// Even though the operator generates an unbounded stream of results, it
     /// works with bounded memory and execution can still continue successfully.
+    ///
+    /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded` 
+    /// will never be done (return `None`), except in case of error.
     Unbounded,
     /// Represents the mode where some of the operator's input stream(s) are
     /// unbounded; however, the operator cannot generate streaming results from

From e4a994772b17d0e3f30e467ce1a685533062c7e7 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Tue, 27 Feb 2024 19:13:08 +0300
Subject: [PATCH 31/45] Update comments

---
 datafusion/physical-plan/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index cebe3a8837ad..e21a96cee2ff 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -461,7 +461,7 @@ pub enum ExecutionMode {
     /// Even though the operator generates an unbounded stream of results, it
     /// works with bounded memory and execution can still continue successfully.
     ///
-    /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded` 
+    /// The stream that results from calling `execute` on an `ExecutionPlan` that is `Unbounded`
     /// will never be done (return `None`), except in case of error.
     Unbounded,
     /// Represents the mode where some of the operator's input stream(s) are
@@ -511,8 +511,8 @@ fn execution_mode_from_children<'a>(
 
 /// Stores the plan properties used in query optimization.
 ///
-/// This struct holds various properties useful for the query planning, which are used
-/// during optimization and execution phases.
+/// These properties are in a single structure to permit this information to be computed
+/// once and then those cached results used multiple times without recomputation (aka a cache)
 #[derive(Debug, Clone)]
 pub struct PlanProperties {
     /// Stores the [`EquivalenceProperties`] of the [`ExecutionPlan`].

From a940a46a6677c6f9639272f12164ba8a71b2b7c4 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Tue, 27 Feb 2024 19:44:12 +0300
Subject: [PATCH 32/45] Move properties to another trait.

---
 datafusion/core/src/dataframe/mod.rs          |   2 +-
 .../core/src/datasource/listing/table.rs      |   1 +
 datafusion/core/src/datasource/memory.rs      |   5 +-
 .../datasource/physical_plan/arrow_file.rs    |   2 +-
 .../core/src/datasource/physical_plan/csv.rs  |   3 +-
 .../core/src/datasource/physical_plan/json.rs |   3 +-
 .../datasource/physical_plan/parquet/mod.rs   |  12 +-
 .../enforce_distribution.rs                   |   2 +
 .../src/physical_optimizer/enforce_sorting.rs |   5 +-
 .../src/physical_optimizer/join_selection.rs  |   2 +
 .../limited_distinct_aggregation.rs           |   1 +
 .../physical_optimizer/output_requirements.rs |   4 +-
 .../physical_optimizer/pipeline_checker.rs    |   1 +
 .../physical_optimizer/projection_pushdown.rs |   1 +
 .../replace_with_order_preserving_variants.rs |   1 +
 .../src/physical_optimizer/sort_pushdown.rs   |   3 +-
 .../physical_optimizer/topk_aggregation.rs    |   3 +-
 .../core/src/physical_optimizer/utils.rs      |   1 +
 datafusion/core/src/physical_planner.rs       |   1 +
 .../physical-plan/src/aggregates/mod.rs       |   4 +-
 .../physical-plan/src/aggregates/row_hash.rs  |   3 +-
 datafusion/physical-plan/src/analyze.rs       |   5 +-
 .../physical-plan/src/coalesce_batches.rs     |   2 +-
 .../physical-plan/src/coalesce_partitions.rs  |   5 +-
 datafusion/physical-plan/src/common.rs        |   2 +-
 datafusion/physical-plan/src/filter.rs        |   4 +-
 datafusion/physical-plan/src/insert.rs        |   4 +-
 .../physical-plan/src/joins/cross_join.rs     |   1 +
 .../physical-plan/src/joins/hash_join.rs      |   1 +
 .../src/joins/nested_loop_join.rs             |   4 +-
 .../src/joins/sort_merge_join.rs              |  12 +-
 .../src/joins/symmetric_hash_join.rs          |   4 +-
 datafusion/physical-plan/src/lib.rs           | 134 +++++++++++-------
 datafusion/physical-plan/src/limit.rs         |   4 +-
 datafusion/physical-plan/src/projection.rs    |   3 +-
 .../physical-plan/src/repartition/mod.rs      |   4 +-
 .../physical-plan/src/sorts/partial_sort.rs   |   4 +-
 datafusion/physical-plan/src/sorts/sort.rs    |   3 +-
 .../src/sorts/sort_preserving_merge.rs        |   4 +-
 datafusion/physical-plan/src/union.rs         |   7 +-
 datafusion/physical-plan/src/unnest.rs        |   2 +-
 .../src/windows/bounded_window_agg_exec.rs    |   4 +-
 datafusion/physical-plan/src/windows/mod.rs   |   2 +-
 .../src/windows/window_agg_exec.rs            |   4 +-
 44 files changed, 170 insertions(+), 109 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 7898b71e23f6..1372570179fe 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -1517,7 +1517,7 @@ mod tests {
         WindowFunctionDefinition,
     };
     use datafusion_physical_expr::expressions::Column;
-    use datafusion_physical_plan::get_plan_string;
+    use datafusion_physical_plan::{get_plan_string, ExecutionPlanProperties};
 
     // Get string representation of the plan
     async fn assert_physical_plan(df: &DataFrame, expected: Vec<&str>) {
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index 56e64f556c12..a1f3d14aacca 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -920,6 +920,7 @@ mod tests {
     use datafusion_common::{assert_contains, GetExt, ScalarValue};
     use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator};
     use datafusion_physical_expr::PhysicalSortExpr;
+    use datafusion_physical_plan::ExecutionPlanProperties;
     use tempfile::TempDir;
 
     #[tokio::test]
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index 901e74dfc218..b4a51be264eb 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -32,6 +32,7 @@ use datafusion_common::{
     not_impl_err, plan_err, Constraints, DFSchema, DataFusionError, SchemaExt,
 };
 use datafusion_execution::TaskContext;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use parking_lot::Mutex;
 use tokio::sync::RwLock;
 use tokio::task::JoinSet;
@@ -161,10 +162,10 @@ impl MemTable {
         let exec = MemoryExec::try_new(&data, schema.clone(), None)?;
 
         if let Some(num_partitions) = output_partitions {
-            let exec = RepartitionExec::try_new(
+            let exec = Arc::new(RepartitionExec::try_new(
                 Arc::new(exec),
                 Partitioning::RoundRobinBatch(num_partitions),
-            )?;
+            )?) as Arc<dyn ExecutionPlan>;
 
             // execute and collect results
             let mut output_partitions = vec![];
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 8f010f1dcbf8..817f330097ac 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -152,7 +152,7 @@ impl ExecutionPlan for ArrowExec {
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
             .with_repartition_file_min_size(repartition_file_min_size)
-            .with_preserve_order_within_groups(self.output_ordering().is_some())
+            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
             .repartition_file_groups(&self.base_config.file_groups);
 
         if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index a509121a82c8..3066d77acf45 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -43,6 +43,7 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 
 use bytes::{Buf, Bytes};
+use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::{ready, StreamExt, TryStreamExt};
 use object_store::{GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
@@ -197,7 +198,7 @@ impl ExecutionPlan for CsvExec {
 
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
-            .with_preserve_order_within_groups(self.output_ordering().is_some())
+            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
             .with_repartition_file_min_size(repartition_file_min_size)
             .repartition_file_groups(&self.base_config.file_groups);
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 7b0e84c4410b..c471035bd286 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -42,6 +42,7 @@ use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 
 use bytes::{Buf, Bytes};
+use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::{ready, StreamExt, TryStreamExt};
 use object_store::{self, GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
@@ -150,7 +151,7 @@ impl ExecutionPlan for NdJsonExec {
         config: &datafusion_common::config::ConfigOptions,
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         let repartition_file_min_size = config.optimizer.repartition_file_min_size;
-        let preserve_order_within_groups = self.output_ordering().is_some();
+        let preserve_order_within_groups = self.cache.output_ordering().is_some();
         let file_groups = &self.base_config.file_groups;
 
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 997bdae762b8..e6962040e8ac 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -48,6 +48,7 @@ use arrow::error::ArrowError;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
 
 use bytes::Bytes;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::future::BoxFuture;
 use futures::{StreamExt, TryStreamExt};
 use itertools::Itertools;
@@ -362,7 +363,7 @@ impl ExecutionPlan for ParquetExec {
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
             .with_repartition_file_min_size(repartition_file_min_size)
-            .with_preserve_order_within_groups(self.output_ordering().is_some())
+            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
             .repartition_file_groups(&self.base_config.file_groups);
 
         let mut new_plan = self.clone();
@@ -1557,7 +1558,7 @@ mod tests {
             expected_row_num: Option<usize>,
             file_schema: SchemaRef,
         ) -> Result<()> {
-            let parquet_exec = ParquetExec::new(
+            let parquet_exec = Arc::new(ParquetExec::new(
                 FileScanConfig {
                     object_store_url: ObjectStoreUrl::local_filesystem(),
                     file_groups,
@@ -1570,7 +1571,7 @@ mod tests {
                 },
                 None,
                 None,
-            );
+            )) as Arc<dyn ExecutionPlan>;
             assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
             let results = parquet_exec.execute(0, state.task_ctx())?.next().await;
 
@@ -1685,7 +1686,10 @@ mod tests {
             None,
             None,
         );
-        assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
+        assert_eq!(
+            parquet_exec.cache.output_partitioning().partition_count(),
+            1
+        );
         assert_eq!(parquet_exec.schema().as_ref(), &expected_schema);
 
         let mut results = parquet_exec.execute(0, task_ctx)?;
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index c7ffc7838b36..c08aadc33c74 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -56,6 +56,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
 
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::izip;
 
 /// The `EnforceDistribution` rule ensures that distribution requirements are
@@ -1404,6 +1405,7 @@ pub(crate) mod tests {
         // model that it requires the output ordering of its input
         fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
             vec![self
+                .cache
                 .output_ordering()
                 .map(PhysicalSortRequirement::from_sort_exprs)]
         }
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index b459c86518b6..25280261c0a0 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -65,6 +65,7 @@ use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
 
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::izip;
 
 /// This rule inspects [`SortExec`]'s in the given physical plan and removes the
@@ -390,7 +391,7 @@ fn analyze_immediate_sort_removal(
         // If this sort is unnecessary, we should remove it:
         if sort_input
             .equivalence_properties()
-            .ordering_satisfy(sort_exec.output_ordering().unwrap_or(&[]))
+            .ordering_satisfy(node.plan.output_ordering().unwrap_or(&[]))
         {
             node.plan = if !sort_exec.preserve_partitioning()
                 && sort_input.output_partitioning().partition_count() > 1
@@ -573,7 +574,7 @@ fn remove_corresponding_sort_from_sub_plan(
         {
             node.plan = Arc::new(RepartitionExec::try_new(
                 node.children[0].plan.clone(),
-                repartition.output_partitioning().clone(),
+                repartition.properties().output_partitioning().clone(),
             )?) as _;
         }
     };
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index f74732305372..349e33dae251 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -42,6 +42,7 @@ use datafusion_common::{internal_err, DataFusionError, JoinSide, JoinType};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::sort_properties::SortProperties;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// The [`JoinSelection`] rule tries to modify a given plan so that it can
 /// accommodate infinite sources and optimize joins in the plan according to
@@ -1377,6 +1378,7 @@ mod hash_join_tests {
     use arrow::record_batch::RecordBatch;
     use datafusion_common::utils::DataPtr;
     use datafusion_common::JoinType;
+    use datafusion_physical_plan::ExecutionPlanProperties;
     use std::sync::Arc;
 
     struct TestCase {
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
index 9855247151b8..036c938c1ca6 100644
--- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
@@ -25,6 +25,7 @@ use crate::physical_plan::ExecutionPlan;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::Result;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::Itertools;
 use std::sync::Arc;
 
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index 992a6e7f82c0..da0697eb9aba 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -33,7 +33,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::{Distribution, LexRequirement, PhysicalSortRequirement};
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use datafusion_physical_plan::PlanProperties;
+use datafusion_physical_plan::{ExecutionPlanProperties, PlanProperties};
 
 /// This rule either adds or removes [`OutputRequirements`]s to/from the physical
 /// plan according to its `mode` attribute, which is set by the constructors
@@ -242,7 +242,7 @@ fn require_top_ordering_helper(
     if children.len() != 1 {
         Ok((plan, false))
     } else if let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() {
-        let req_ordering = sort_exec.output_ordering().unwrap_or(&[]);
+        let req_ordering = plan.output_ordering().unwrap_or(&[]);
         let req_dist = sort_exec.required_input_distribution()[0].clone();
         let reqs = PhysicalSortRequirement::from_sort_exprs(req_ordering);
         Ok((
diff --git a/datafusion/core/src/physical_optimizer/pipeline_checker.rs b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
index 9a7afcb6409d..dcf9ef6f48b7 100644
--- a/datafusion/core/src/physical_optimizer/pipeline_checker.rs
+++ b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
@@ -31,6 +31,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{plan_err, DataFusionError};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// The PipelineChecker rule rejects non-runnable query plans that use
 /// pipeline-breaking operators on infinite input(s).
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 79d22374f9c2..1b9bf17bffa4 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -53,6 +53,7 @@ use datafusion_physical_expr::{
 use datafusion_physical_plan::streaming::StreamingTableExec;
 use datafusion_physical_plan::union::UnionExec;
 
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::Itertools;
 
 /// This rule inspects [`ProjectionExec`]'s in the given physical plan and tries to
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
index 8825feb45e98..be4b7d13bc7e 100644
--- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -32,6 +32,7 @@ use datafusion_common::tree_node::Transformed;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::tree_node::PlanContext;
 
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::izip;
 
 /// For a given `plan`, this object carries the information one needs from its
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 0efa908cf5fc..a5b1f12147f3 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -37,6 +37,7 @@ use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
     LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement,
 };
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// This is a "data class" we use within the [`EnforceSorting`] rule to push
 /// down [`SortExec`] in the plan. In some cases, we can reduce the total
@@ -262,7 +263,7 @@ fn try_pushdown_requirements_to_join(
         &smj.maintains_input_order(),
         Some(probe_side),
     );
-    let mut smj_eqs = smj.equivalence_properties().clone();
+    let mut smj_eqs = smj.properties().equivalence_properties().clone();
     // smj will have this ordering when its input changes.
     smj_eqs = smj_eqs.with_reorder(new_output_ordering.unwrap_or_default());
     let should_pushdown = smj_eqs.ordering_satisfy_requirement(parent_required);
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index dd0261420304..7459deb1f72a 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -30,6 +30,7 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::Itertools;
 use std::sync::Arc;
 
@@ -86,7 +87,7 @@ impl TopKAggregation {
 
         let children = sort.children();
         let child = children.iter().exactly_one().ok()?;
-        let order = sort.output_ordering()?;
+        let order = plan.output_ordering()?;
         let order = order.iter().exactly_one().ok()?;
         let limit = sort.fetch()?;
 
diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs
index 4f4b17345ef8..f4c2c3873f68 100644
--- a/datafusion/core/src/physical_optimizer/utils.rs
+++ b/datafusion/core/src/physical_optimizer/utils.rs
@@ -30,6 +30,7 @@ use crate::physical_plan::ExecutionPlan;
 use datafusion_physical_expr::{LexRequirement, PhysicalSortRequirement};
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::tree_node::PlanContext;
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// This utility function adds a `SortExec` above an operator according to the
 /// given ordering requirements while preserving the original partitioning.
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index debde6aa4571..b16cc77a43f2 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -96,6 +96,7 @@ use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
 use async_trait::async_trait;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use itertools::{multiunzip, Itertools};
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 98d41cca6764..f96eacf45896 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -20,7 +20,7 @@
 use std::any::Any;
 use std::sync::Arc;
 
-use super::{DisplayAs, ExecutionMode, PlanProperties};
+use super::{DisplayAs, ExecutionMode, ExecutionPlanProperties, PlanProperties};
 use crate::aggregates::{
     no_grouping::AggregateStream, row_hash::GroupedHashAggregateStream,
     topk_stream::GroupedTopKAggregateStream,
@@ -496,7 +496,7 @@ impl AggregateExec {
             return false;
         }
         // ensure there is no output ordering; can this rule be relaxed?
-        if self.output_ordering().is_some() {
+        if self.cache.output_ordering().is_some() {
             return false;
         }
         // ensure no ordering is required on the input
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index f9db0a050cfc..8036012b7262 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -32,7 +32,7 @@ use crate::metrics::{BaselineMetrics, RecordOutput};
 use crate::sorts::sort::{read_spill_as_stream, sort_batch};
 use crate::sorts::streaming_merge;
 use crate::stream::RecordBatchStreamAdapter;
-use crate::{aggregates, ExecutionPlan, PhysicalExpr};
+use crate::{aggregates, PhysicalExpr};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::*;
@@ -341,6 +341,7 @@ impl GroupedHashAggregateStream {
             .with_can_spill(true)
             .register(context.memory_pool());
         let (ordering, _) = agg
+            .cache
             .equivalence_properties()
             .find_longest_permutation(&agg_group_by.output_exprs());
         let group_ordering = GroupOrdering::try_new(
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index fed4b97d2afb..f771ac238887 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -21,7 +21,10 @@ use std::sync::Arc;
 use std::{any::Any, time::Instant};
 
 use super::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
-use super::{DisplayAs, Distribution, PlanProperties, SendableRecordBatchStream};
+use super::{
+    DisplayAs, Distribution, ExecutionPlanProperties, PlanProperties,
+    SendableRecordBatchStream,
+};
 
 use crate::display::DisplayableExecutionPlan;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 055f16288f95..0b9ecebbb1e8 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -24,7 +24,7 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
-use super::{DisplayAs, PlanProperties, Statistics};
+use super::{DisplayAs, ExecutionPlanProperties, PlanProperties, Statistics};
 use crate::{
     DisplayFormatType, ExecutionPlan, RecordBatchStream, SendableRecordBatchStream,
 };
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 7037445164a3..3c5b7e9c13fb 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -23,7 +23,10 @@ use std::sync::Arc;
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::stream::{ObservedStream, RecordBatchReceiverStream};
-use super::{DisplayAs, PlanProperties, SendableRecordBatchStream, Statistics};
+use super::{
+    DisplayAs, ExecutionPlanProperties, PlanProperties, SendableRecordBatchStream,
+    Statistics,
+};
 
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index 5172bc9b2a3c..003c60edd9a8 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -22,7 +22,7 @@ use std::fs::{metadata, File};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 
-use super::SendableRecordBatchStream;
+use super::{ExecutionPlanProperties, SendableRecordBatchStream};
 use crate::stream::RecordBatchReceiverStream;
 use crate::{ColumnStatistics, ExecutionPlan, Statistics};
 
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 95c09f541cc2..4155b00820f4 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -24,8 +24,8 @@ use std::sync::Arc;
 use std::task::{Context, Poll};
 
 use super::{
-    ColumnStatistics, DisplayAs, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    ColumnStatistics, DisplayAs, ExecutionPlanProperties, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 use crate::{
     metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index fd0bec108e03..7d44828ac243 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -23,8 +23,8 @@ use std::fmt::Debug;
 use std::sync::Arc;
 
 use super::{
-    DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties,
-    SendableRecordBatchStream,
+    DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties, Partitioning,
+    PlanProperties, SendableRecordBatchStream,
 };
 use crate::metrics::MetricsSet;
 use crate::stream::RecordBatchStreamAdapter;
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index f73f3d36cdb4..9f8dc0ce56b0 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -26,6 +26,7 @@ use super::utils::{
 use crate::coalesce_batches::concat_batches;
 use crate::coalesce_partitions::CoalescePartitionsExec;
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
+use crate::ExecutionPlanProperties;
 use crate::{
     execution_mode_from_children, ColumnStatistics, DisplayAs, DisplayFormatType,
     Distribution, ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream,
diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs
index 2fdb2a17ebe8..ee3438c6a363 100644
--- a/datafusion/physical-plan/src/joins/hash_join.rs
+++ b/datafusion/physical-plan/src/joins/hash_join.rs
@@ -27,6 +27,7 @@ use super::{
     utils::{OnceAsync, OnceFut},
     PartitionMode,
 };
+use crate::ExecutionPlanProperties;
 use crate::{
     coalesce_partitions::CoalescePartitionsExec,
     execution_mode_from_children, handle_state,
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 5d2175d4a820..6fe28c8b54f3 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -35,8 +35,8 @@ use crate::joins::utils::{
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::{
     execution_mode_from_children, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionMode, ExecutionPlan, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream,
+    ExecutionMode, ExecutionPlan, ExecutionPlanProperties, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream,
 };
 
 use arrow::array::{
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs
index bde831b731ba..7b70a2952b4c 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs
@@ -38,8 +38,8 @@ use crate::joins::utils::{
 use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
 use crate::{
     execution_mode_from_children, metrics, DisplayAs, DisplayFormatType, Distribution,
-    ExecutionPlan, PhysicalExpr, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream, Statistics,
+    ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::array::*;
@@ -189,16 +189,16 @@ impl SortMergeJoinExec {
         &self.on
     }
 
-    pub fn right(&self) -> &dyn ExecutionPlan {
-        self.right.as_ref()
+    pub fn right(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.right
     }
 
     pub fn join_type(&self) -> JoinType {
         self.join_type
     }
 
-    pub fn left(&self) -> &dyn ExecutionPlan {
-        self.left.as_ref()
+    pub fn left(&self) -> &Arc<dyn ExecutionPlan> {
+        &self.left
     }
 
     /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 77871a8b5483..6460b2b478ef 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -50,8 +50,8 @@ use crate::{
     expressions::PhysicalSortExpr,
     joins::StreamJoinPartitionMode,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
-    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, PlanProperties,
-    RecordBatchStream, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
+    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::array::{
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index e21a96cee2ff..e37f84fb0150 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -127,33 +127,6 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
 
     fn properties(&self) -> &PlanProperties;
 
-    /// Specifies how the output of this `ExecutionPlan` is split into
-    /// partitions.
-    fn output_partitioning(&self) -> &Partitioning {
-        &self.properties().partitioning
-    }
-
-    /// Specifies whether this plan generates an infinite stream of records.
-    /// If the plan does not support pipelining, but its input(s) are
-    /// infinite, returns an error to indicate this.
-    fn execution_mode(&self) -> ExecutionMode {
-        self.properties().exec_mode
-    }
-
-    /// If the output of this `ExecutionPlan` within each partition is sorted,
-    /// returns `Some(keys)` with the description of how it was sorted.
-    ///
-    /// For example, Sort, (obviously) produces sorted output as does
-    /// SortPreservingMergeStream. Less obviously `Projection`
-    /// produces sorted output if its input was sorted as it does not
-    /// reorder the input rows,
-    ///
-    /// It is safe to return `None` here if your `ExecutionPlan` does not
-    /// have any particular output order here
-    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
-        self.properties().output_ordering.as_deref()
-    }
-
     /// Specifies the data distribution requirements for all the
     /// children for this `ExecutionPlan`, By default it's [[Distribution::UnspecifiedDistribution]] for each child,
     fn required_input_distribution(&self) -> Vec<Distribution> {
@@ -212,27 +185,6 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
             .collect()
     }
 
-    /// Get the [`EquivalenceProperties`] within the plan.
-    ///
-    /// Equivalence properties tell DataFusion what columns are known to be
-    /// equal, during various optimization passes. By default, this returns "no
-    /// known equivalences" which is always correct, but may cause DataFusion to
-    /// unnecessarily resort data.
-    ///
-    /// If this ExecutionPlan makes no changes to the schema of the rows flowing
-    /// through it or how columns within each row relate to each other, it
-    /// should return the equivalence properties of its input. For
-    /// example, since `FilterExec` may remove rows from its input, but does not
-    /// otherwise modify them, it preserves its input equivalence properties.
-    /// However, since `ProjectionExec` may calculate derived expressions, it
-    /// needs special handling.
-    ///
-    /// See also [`Self::maintains_input_order`] and [`Self::output_ordering`]
-    /// for related concepts.
-    fn equivalence_properties(&self) -> &EquivalenceProperties {
-        &self.properties().eq_properties
-    }
-
     /// Get a list of children `ExecutionPlan`s that act as inputs to this plan.
     /// The returned list will be empty for leaf nodes such as scans, will contain
     /// a single value for unary nodes, or two values for binary nodes (such as
@@ -450,6 +402,66 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     }
 }
 
+pub trait ExecutionPlanProperties {
+    fn output_partitioning(&self) -> &Partitioning;
+
+    fn execution_mode(&self) -> ExecutionMode;
+
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]>;
+
+    fn equivalence_properties(&self) -> &EquivalenceProperties;
+}
+
+impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
+    /// Specifies how the output of this `ExecutionPlan` is split into
+    /// partitions.
+    fn output_partitioning(&self) -> &Partitioning {
+        self.properties().output_partitioning()
+    }
+
+    /// Specifies whether this plan generates an infinite stream of records.
+    /// If the plan does not support pipelining, but its input(s) are
+    /// infinite, returns an error to indicate this.
+    fn execution_mode(&self) -> ExecutionMode {
+        self.properties().execution_mode()
+    }
+
+    /// If the output of this `ExecutionPlan` within each partition is sorted,
+    /// returns `Some(keys)` with the description of how it was sorted.
+    ///
+    /// For example, Sort, (obviously) produces sorted output as does
+    /// SortPreservingMergeStream. Less obviously `Projection`
+    /// produces sorted output if its input was sorted as it does not
+    /// reorder the input rows,
+    ///
+    /// It is safe to return `None` here if your `ExecutionPlan` does not
+    /// have any particular output order here
+    fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.properties().output_ordering()
+    }
+
+    /// Get the [`EquivalenceProperties`] within the plan.
+    ///
+    /// Equivalence properties tell DataFusion what columns are known to be
+    /// equal, during various optimization passes. By default, this returns "no
+    /// known equivalences" which is always correct, but may cause DataFusion to
+    /// unnecessarily resort data.
+    ///
+    /// If this ExecutionPlan makes no changes to the schema of the rows flowing
+    /// through it or how columns within each row relate to each other, it
+    /// should return the equivalence properties of its input. For
+    /// example, since `FilterExec` may remove rows from its input, but does not
+    /// otherwise modify them, it preserves its input equivalence properties.
+    /// However, since `ProjectionExec` may calculate derived expressions, it
+    /// needs special handling.
+    ///
+    /// See also [`Self::maintains_input_order`] and [`Self::output_ordering`]
+    /// for related concepts.
+    fn equivalence_properties(&self) -> &EquivalenceProperties {
+        self.properties().equivalence_properties()
+    }
+}
+
 /// Describes the execution mode of an operator's resulting stream with respect
 /// to its size and behavior. There are three possible execution modes: `Bounded`,
 /// `Unbounded` and `PipelineBreaking`.
@@ -564,6 +576,22 @@ impl PlanProperties {
         self
     }
 
+    pub fn equivalence_properties(&self) -> &EquivalenceProperties {
+        &self.eq_properties
+    }
+
+    pub fn output_partitioning(&self) -> &Partitioning {
+        &self.partitioning
+    }
+
+    pub fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
+        self.output_ordering.as_deref()
+    }
+
+    pub fn execution_mode(&self) -> ExecutionMode {
+        self.exec_mode
+    }
+
     /// Get schema of the node.
     fn schema(&self) -> &SchemaRef {
         self.eq_properties.schema()
@@ -577,11 +605,8 @@ impl PlanProperties {
 ///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
 ///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
 pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
-    if let Some(repart) = plan.as_any().downcast_ref::<RepartitionExec>() {
-        !matches!(
-            repart.output_partitioning(),
-            Partitioning::RoundRobinBatch(_)
-        )
+    if let Some(_) = plan.as_any().downcast_ref::<RepartitionExec>() {
+        !matches!(plan.output_partitioning(), Partitioning::RoundRobinBatch(_))
     } else if let Some(coalesce) = plan.as_any().downcast_ref::<CoalescePartitionsExec>()
     {
         coalesce.input().output_partitioning().partition_count() > 1
@@ -652,7 +677,8 @@ pub fn execute_stream(
         1 => plan.execute(0, context),
         _ => {
             // merge into a single partition
-            let plan = CoalescePartitionsExec::new(plan.clone());
+            let plan = Arc::new(CoalescePartitionsExec::new(plan.clone()))
+                as Arc<dyn ExecutionPlan>;
             // CoalescePartitionsExec must produce a single partition
             assert_eq!(1, plan.output_partitioning().partition_count());
             plan.execute(0, context)
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 1c09ec88ae08..3520d45f9e6c 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -24,7 +24,7 @@ use std::task::{Context, Poll};
 
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{
-    DisplayAs, ExecutionMode, PlanProperties, RecordBatchStream,
+    DisplayAs, ExecutionMode, ExecutionPlanProperties, PlanProperties, RecordBatchStream,
     SendableRecordBatchStream, Statistics,
 };
 use crate::{DisplayFormatType, Distribution, ExecutionPlan, Partitioning};
@@ -410,7 +410,7 @@ impl ExecutionPlan for LocalLimitExec {
             _ => Statistics {
                 // the result output row number will always be no greater than the limit number
                 num_rows: Precision::Inexact(
-                    self.fetch * self.output_partitioning().partition_count(),
+                    self.fetch * self.cache.output_partitioning().partition_count(),
                 ),
 
                 column_statistics: col_stats,
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 402feeaf80ba..8fe82e7de3eb 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -29,7 +29,8 @@ use std::task::{Context, Poll};
 use super::expressions::Column;
 use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use super::{
-    DisplayAs, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
 use crate::{
     ColumnStatistics, DisplayFormatType, ExecutionPlan, Partitioning, PhysicalExpr,
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 1cbc71471eb2..d1befb7c53c0 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -26,7 +26,9 @@ use std::{any::Any, vec};
 
 use super::common::SharedMemoryReservation;
 use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
-use super::{DisplayAs, RecordBatchStream, SendableRecordBatchStream};
+use super::{
+    DisplayAs, ExecutionPlanProperties, RecordBatchStream, SendableRecordBatchStream,
+};
 use crate::common::{transpose, SpawnedTask};
 use crate::hash_utils::create_hashes;
 use crate::metrics::BaselineMetrics;
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 01e592d29f7b..500df6153fdb 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -61,8 +61,8 @@ use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::sort::sort_batch;
 use crate::{
-    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
+    Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::compute::concat_batches;
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index a96834cf0553..f46958663252 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -37,7 +37,8 @@ use crate::stream::{RecordBatchReceiverStream, RecordBatchStreamAdapter};
 use crate::topk::TopK;
 use crate::{
     DisplayAs, DisplayFormatType, Distribution, EmptyRecordBatchStream, ExecutionMode,
-    ExecutionPlan, Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
+    ExecutionPlan, ExecutionPlanProperties, Partitioning, PlanProperties,
+    SendableRecordBatchStream, Statistics,
 };
 
 use arrow::compute::{concat_batches, lexsort_to_indices, take};
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index 862146e10549..e1e197e205e2 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -25,8 +25,8 @@ use crate::expressions::PhysicalSortExpr;
 use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet};
 use crate::sorts::streaming_merge;
 use crate::{
-    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties,
+    Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use datafusion_common::{internal_err, DataFusionError, Result};
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index a533f2249588..9f637361ff8f 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -29,8 +29,9 @@ use std::{any::Any, sync::Arc};
 use super::{
     execution_mode_from_children,
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
-    ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan, Partitioning,
-    PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics,
+    ColumnStatistics, DisplayAs, DisplayFormatType, ExecutionPlan,
+    ExecutionPlanProperties, Partitioning, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream, Statistics,
 };
 use crate::metrics::BaselineMetrics;
 use crate::stream::ObservedStream;
@@ -204,7 +205,7 @@ impl ExecutionPlan for UnionExec {
         // which is the "meet" of all input orderings. In this example, this
         // function will return vec![false, true, true], indicating that we
         // preserve the orderings for the 2nd and the 3rd children.
-        if let Some(output_ordering) = self.output_ordering() {
+        if let Some(output_ordering) = self.cache.output_ordering() {
             self.inputs()
                 .iter()
                 .map(|child| {
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 72fcbfd2ffb9..776d98f5b6ea 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -20,7 +20,7 @@
 use std::{any::Any, sync::Arc};
 
 use super::metrics::{self, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet};
-use super::{DisplayAs, PlanProperties};
+use super::{DisplayAs, ExecutionPlanProperties, PlanProperties};
 use crate::{
     expressions::Column, DisplayFormatType, Distribution, ExecutionPlan, PhysicalExpr,
     RecordBatchStream, SendableRecordBatchStream,
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index eb4a27341785..c99ec599596d 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -35,8 +35,8 @@ use crate::windows::{
 };
 use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan,
-    InputOrderMode, PlanProperties, RecordBatchStream, SendableRecordBatchStream,
-    Statistics, WindowExpr,
+    ExecutionPlanProperties, InputOrderMode, PlanProperties, RecordBatchStream,
+    SendableRecordBatchStream, Statistics, WindowExpr,
 };
 
 use arrow::{
diff --git a/datafusion/physical-plan/src/windows/mod.rs b/datafusion/physical-plan/src/windows/mod.rs
index 0e20c88619c2..c19694aef8b7 100644
--- a/datafusion/physical-plan/src/windows/mod.rs
+++ b/datafusion/physical-plan/src/windows/mod.rs
@@ -27,7 +27,7 @@ use crate::{
         cume_dist, dense_rank, lag, lead, percent_rank, rank, Literal, NthValue, Ntile,
         PhysicalSortExpr, RowNumber,
     },
-    udaf, ExecutionPlan, InputOrderMode, PhysicalExpr,
+    udaf, ExecutionPlan, ExecutionPlanProperties, InputOrderMode, PhysicalExpr,
 };
 
 use arrow::datatypes::Schema;
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index efc84526c9ad..66d31ec94d1a 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -31,8 +31,8 @@ use crate::windows::{
 };
 use crate::{
     ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionMode,
-    ExecutionPlan, PhysicalExpr, PlanProperties, RecordBatchStream,
-    SendableRecordBatchStream, Statistics, WindowExpr,
+    ExecutionPlan, ExecutionPlanProperties, PhysicalExpr, PlanProperties,
+    RecordBatchStream, SendableRecordBatchStream, Statistics, WindowExpr,
 };
 
 use arrow::array::ArrayRef;

From ea30b93194cfdfe4148a45fc0e33549884ba81b1 Mon Sep 17 00:00:00 2001
From: Eddy Oyieko <67474838+mobley-trent@users.noreply.github.com>
Date: Wed, 28 Feb 2024 01:37:46 +0300
Subject: [PATCH 33/45] feat : Support for deregistering user defined functions
 (#9239)

* Initial commit

* Updated mod.rs - Docstrings, Initial test

* Updated mod.rs - Fixed udf test

* Added udaf test, Updated udf test

* Added test for udwf

* Linting with rustfmt

* Update datafusion/core/src/execution/context/mod.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

* Moved tests to core/tests/user_defined

* fix fmt

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 datafusion/core/src/execution/context/mod.rs  | 33 +++++++++++++++++++
 .../user_defined/user_defined_aggregates.rs   | 23 +++++++++++++
 .../user_defined_scalar_functions.rs          | 16 +++++++++
 .../user_defined_window_functions.rs          | 15 +++++++++
 datafusion/execution/src/registry.rs          | 27 +++++++++++++++
 5 files changed, 114 insertions(+)

diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 453a00a1a5cf..3aa4edfe3adc 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -849,6 +849,21 @@ impl SessionContext {
         self.state.write().register_udwf(Arc::new(f)).ok();
     }
 
+    /// Deregisters a UDF within this context.
+    pub fn deregister_udf(&self, name: &str) {
+        self.state.write().deregister_udf(name).ok();
+    }
+
+    /// Deregisters a UDAF within this context.
+    pub fn deregister_udaf(&self, name: &str) {
+        self.state.write().deregister_udaf(name).ok();
+    }
+
+    /// Deregisters a UDWF within this context.
+    pub fn deregister_udwf(&self, name: &str) {
+        self.state.write().deregister_udwf(name).ok();
+    }
+
     /// Creates a [`DataFrame`] for reading a data source.
     ///
     /// For more control such as reading multiple files, you can use
@@ -2026,6 +2041,24 @@ impl FunctionRegistry for SessionState {
     fn register_udwf(&mut self, udwf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
         Ok(self.window_functions.insert(udwf.name().into(), udwf))
     }
+
+    fn deregister_udf(&mut self, name: &str) -> Result<Option<Arc<ScalarUDF>>> {
+        let udf = self.scalar_functions.remove(name);
+        if let Some(udf) = &udf {
+            for alias in udf.aliases() {
+                self.scalar_functions.remove(alias);
+            }
+        }
+        Ok(udf)
+    }
+
+    fn deregister_udaf(&mut self, name: &str) -> Result<Option<Arc<AggregateUDF>>> {
+        Ok(self.aggregate_functions.remove(name))
+    }
+
+    fn deregister_udwf(&mut self, name: &str) -> Result<Option<Arc<WindowUDF>>> {
+        Ok(self.window_functions.remove(name))
+    }
 }
 
 impl OptimizerConfig for SessionState {
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 0b29ad10d670..8daeefd236f7 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -255,6 +255,29 @@ async fn simple_udaf() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn deregister_udaf() -> Result<()> {
+    let ctx = SessionContext::new();
+    let my_avg = create_udaf(
+        "my_avg",
+        vec![DataType::Float64],
+        Arc::new(DataType::Float64),
+        Volatility::Immutable,
+        Arc::new(|_| Ok(Box::<AvgAccumulator>::default())),
+        Arc::new(vec![DataType::UInt64, DataType::Float64]),
+    );
+
+    ctx.register_udaf(my_avg.clone());
+
+    assert!(ctx.state().aggregate_functions().contains_key("my_avg"));
+
+    ctx.deregister_udaf("my_avg");
+
+    assert!(!ctx.state().aggregate_functions().contains_key("my_avg"));
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn case_sensitive_identifiers_user_defined_aggregates() -> Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index 9812789740f7..a255498eb5f7 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -498,6 +498,22 @@ async fn test_user_defined_functions_zero_argument() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn deregister_udf() -> Result<()> {
+    let random_normal_udf = ScalarUDF::from(RandomUDF::new());
+    let ctx = SessionContext::new();
+
+    ctx.register_udf(random_normal_udf.clone());
+
+    assert!(ctx.udfs().contains("random_udf"));
+
+    ctx.deregister_udf("random_udf");
+
+    assert!(!ctx.udfs().contains("random_udf"));
+
+    Ok(())
+}
+
 #[derive(Debug)]
 struct TakeUDF {
     signature: Signature,
diff --git a/datafusion/core/tests/user_defined/user_defined_window_functions.rs b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
index 54eab4315a97..cfd74f8861e3 100644
--- a/datafusion/core/tests/user_defined/user_defined_window_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_window_functions.rs
@@ -103,6 +103,21 @@ async fn test_udwf() {
     assert_eq!(test_state.evaluate_all_called(), 2);
 }
 
+#[tokio::test]
+async fn test_deregister_udwf() -> Result<()> {
+    let test_state = Arc::new(TestState::new());
+    let mut ctx = SessionContext::new();
+    OddCounter::register(&mut ctx, Arc::clone(&test_state));
+
+    assert!(ctx.state().window_functions().contains_key("odd_counter"));
+
+    ctx.deregister_udwf("odd_counter");
+
+    assert!(!ctx.state().window_functions().contains_key("odd_counter"));
+
+    Ok(())
+}
+
 /// Basic user defined window function with bounded window
 #[tokio::test]
 async fn test_udwf_bounded_window_ignores_frame() {
diff --git a/datafusion/execution/src/registry.rs b/datafusion/execution/src/registry.rs
index 4569967acb08..6e0a932f0bc5 100644
--- a/datafusion/execution/src/registry.rs
+++ b/datafusion/execution/src/registry.rs
@@ -66,6 +66,33 @@ pub trait FunctionRegistry {
     fn register_udwf(&mut self, _udaf: Arc<WindowUDF>) -> Result<Option<Arc<WindowUDF>>> {
         not_impl_err!("Registering WindowUDF")
     }
+
+    /// Deregisters a [`ScalarUDF`], returning the implementation that was
+    /// deregistered.
+    ///
+    /// Returns an error (the default) if the function can not be deregistered,
+    /// for example if the registry is read only.
+    fn deregister_udf(&mut self, _name: &str) -> Result<Option<Arc<ScalarUDF>>> {
+        not_impl_err!("Deregistering ScalarUDF")
+    }
+
+    /// Deregisters a [`AggregateUDF`], returning the implementation that was
+    /// deregistered.
+    ///
+    /// Returns an error (the default) if the function can not be deregistered,
+    /// for example if the registry is read only.
+    fn deregister_udaf(&mut self, _name: &str) -> Result<Option<Arc<AggregateUDF>>> {
+        not_impl_err!("Deregistering AggregateUDF")
+    }
+
+    /// Deregisters a [`WindowUDF`], returning the implementation that was
+    /// deregistered.
+    ///
+    /// Returns an error (the default) if the function can not be deregistered,
+    /// for example if the registry is read only.
+    fn deregister_udwf(&mut self, _name: &str) -> Result<Option<Arc<WindowUDF>>> {
+        not_impl_err!("Deregistering WindowUDF")
+    }
 }
 
 /// Serializer and deserializer registry for extensions like [UserDefinedLogicalNode].

From 544b3d9e8f7b9cca0a649b425b334baf6cae1dbd Mon Sep 17 00:00:00 2001
From: junxiangMu <63799833+guojidan@users.noreply.github.com>
Date: Wed, 28 Feb 2024 06:43:34 +0800
Subject: [PATCH 34/45] fix return type (#9357)

---
 datafusion/functions/src/core/nvl.rs       | 6 +-----
 datafusion/sqllogictest/test_files/nvl.slt | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index 6d6ad1cdeb21..caf095ecbbbd 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -73,11 +73,7 @@ impl ScalarUDFImpl for NVLFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        // NVL has two args and they might get coerced, get a preview of this
-        let coerced_types = datafusion_expr::type_coercion::functions::data_types(arg_types, &self.signature);
-        coerced_types.map(|typs| typs[0].clone())
-            .map_err(|e| e.context("Failed to coerce arguments for NVL")
-        )
+        Ok(arg_types[0].clone())
     }
 
     fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
diff --git a/datafusion/sqllogictest/test_files/nvl.slt b/datafusion/sqllogictest/test_files/nvl.slt
index 81e79e1eb5b0..c77214cc302a 100644
--- a/datafusion/sqllogictest/test_files/nvl.slt
+++ b/datafusion/sqllogictest/test_files/nvl.slt
@@ -114,7 +114,7 @@ SELECT NVL(1, 3);
 ----
 1
 
-query I
+query ?
 SELECT NVL(NULL, NULL);
 ----
 NULL

From 77e5c35e9164ad99683ac41c04089145ae46a482 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Tue, 27 Feb 2024 15:57:44 -0800
Subject: [PATCH 35/45] Final review

---
 .../src/physical_optimizer/enforce_distribution.rs    | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index c7ffc7838b36..a6d5aa763253 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -443,10 +443,10 @@ where
             join_plan.plan = join_constructor((new_join_on, new_sort_options))?;
         }
     }
-    let mut requirements = join_plan;
-    requirements.children[0].data = left_keys;
-    requirements.children[1].data = right_keys;
-    Ok(requirements)
+
+    join_plan.children[0].data = left_keys;
+    join_plan.children[1].data = right_keys;
+    Ok(join_plan)
 }
 
 fn reorder_aggregate_keys(
@@ -1299,8 +1299,7 @@ pub(crate) mod tests {
     use crate::datasource::file_format::file_compression_type::FileCompressionType;
     use crate::datasource::listing::PartitionedFile;
     use crate::datasource::object_store::ObjectStoreUrl;
-    use crate::datasource::physical_plan::ParquetExec;
-    use crate::datasource::physical_plan::{CsvExec, FileScanConfig};
+    use crate::datasource::physical_plan::{CsvExec, FileScanConfig, ParquetExec};
     use crate::physical_optimizer::enforce_sorting::EnforceSorting;
     use crate::physical_optimizer::output_requirements::OutputRequirements;
     use crate::physical_optimizer::test_utils::{

From 935ebcae1c869875d08e665710ed27e2f83e0108 Mon Sep 17 00:00:00 2001
From: SteveLauC <stevelauc@outlook.com>
Date: Wed, 28 Feb 2024 08:44:39 +0800
Subject: [PATCH 36/45] refactor: move acos() to function crate (#9297)

---
 datafusion-cli/Cargo.lock                     | 193 +++++++++---------
 datafusion/expr/src/built_in_function.rs      |  10 +-
 datafusion/expr/src/expr_fn.rs                |   2 -
 datafusion/functions/src/math/acos.rs         | 110 ++++++++++
 datafusion/functions/src/math/mod.rs          |  19 +-
 datafusion/functions/src/math/nans.rs         |   2 +-
 .../optimizer/src/analyzer/type_coercion.rs   |   4 +-
 datafusion/physical-expr/src/functions.rs     |   1 -
 datafusion/proto/proto/datafusion.proto       |   2 +-
 datafusion/proto/src/generated/pbjson.rs      |   3 -
 datafusion/proto/src/generated/prost.rs       |   4 +-
 .../proto/src/logical_plan/from_proto.rs      |  20 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   1 -
 .../tests/cases/roundtrip_physical_plan.rs    |  14 +-
 14 files changed, 240 insertions(+), 145 deletions(-)
 create mode 100644 datafusion/functions/src/math/acos.rs

diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock
index 69456446f52b..416df5d17f25 100644
--- a/datafusion-cli/Cargo.lock
+++ b/datafusion-cli/Cargo.lock
@@ -25,9 +25,9 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
 
 [[package]]
 name = "ahash"
-version = "0.8.8"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff"
+checksum = "8b79b82693f705137f8fb9b37871d99e4f9a7df12b917eed79c3d3954830a60b"
 dependencies = [
  "cfg-if",
  "const-random",
@@ -345,9 +345,9 @@ dependencies = [
 
 [[package]]
 name = "assert_cmd"
-version = "2.0.13"
+version = "2.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467"
+checksum = "ed72493ac66d5804837f480ab3766c72bdfab91a65e565fc54fa9e42db0073a8"
 dependencies = [
  "anstyle",
  "bstr",
@@ -384,7 +384,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -791,9 +791,9 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.9.0"
+version = "1.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc"
+checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
 dependencies = [
  "memchr",
  "regex-automata",
@@ -802,9 +802,9 @@ dependencies = [
 
 [[package]]
 name = "bumpalo"
-version = "3.15.0"
+version = "3.15.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f"
+checksum = "8ea184aa71bb362a1157c896979544cc23974e08fd265f29ea96b59f0b4a555b"
 
 [[package]]
 name = "byteorder"
@@ -851,11 +851,10 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.83"
+version = "1.0.88"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
+checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc"
 dependencies = [
- "jobserver",
  "libc",
 ]
 
@@ -875,7 +874,7 @@ dependencies = [
  "iana-time-zone",
  "num-traits",
  "serde",
- "windows-targets 0.52.0",
+ "windows-targets 0.52.3",
 ]
 
 [[package]]
@@ -1069,12 +1068,12 @@ dependencies = [
 
 [[package]]
 name = "ctor"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30d2b3721e861707777e3195b0158f950ae6dc4a27e4d02ff9f67e3eb3de199e"
+checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c"
 dependencies = [
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -1607,7 +1606,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -1624,9 +1623,9 @@ checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004"
 
 [[package]]
 name = "futures-timer"
-version = "3.0.2"
+version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
 
 [[package]]
 name = "futures-util"
@@ -1700,9 +1699,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.3.1"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872"
+checksum = "b5eceaaeec696539ddaf7b333340f1af35a5aa87ae3e4f3ead0532f72affab2e"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -1751,9 +1750,9 @@ dependencies = [
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.6"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd"
+checksum = "379dada1584ad501b383485dd706b8afb7a70fcbc7f4da7d780638a5a6124a60"
 
 [[package]]
 name = "hex"
@@ -1952,15 +1951,6 @@ version = "1.0.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
 
-[[package]]
-name = "jobserver"
-version = "0.1.28"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "js-sys"
 version = "0.3.68"
@@ -2304,7 +2294,7 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi 0.3.6",
+ "hermit-abi 0.3.8",
  "libc",
 ]
 
@@ -2336,7 +2326,7 @@ dependencies = [
  "quick-xml",
  "rand",
  "reqwest",
- "ring 0.17.7",
+ "ring 0.17.8",
  "rustls-pemfile 2.1.0",
  "serde",
  "serde_json",
@@ -2524,7 +2514,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -2800,16 +2790,17 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.7"
+version = "0.17.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
+checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d"
 dependencies = [
  "cc",
+ "cfg-if",
  "getrandom",
  "libc",
  "spin 0.9.8",
  "untrusted 0.9.0",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -2891,7 +2882,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
 dependencies = [
  "log",
- "ring 0.17.7",
+ "ring 0.17.8",
  "rustls-webpki",
  "sct",
 ]
@@ -2929,9 +2920,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.3.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7"
+checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
 
 [[package]]
 name = "rustls-webpki"
@@ -2939,7 +2930,7 @@ version = "0.101.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
 dependencies = [
- "ring 0.17.7",
+ "ring 0.17.8",
  "untrusted 0.9.0",
 ]
 
@@ -2974,9 +2965,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.16"
+version = "1.0.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
+checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1"
 
 [[package]]
 name = "same-file"
@@ -3008,7 +2999,7 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
 dependencies = [
- "ring 0.17.7",
+ "ring 0.17.8",
  "untrusted 0.9.0",
 ]
 
@@ -3037,9 +3028,9 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.21"
+version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0"
+checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
 
 [[package]]
 name = "seq-macro"
@@ -3049,29 +3040,29 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.196"
+version = "1.0.197"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32"
+checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.196"
+version = "1.0.197"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67"
+checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.113"
+version = "1.0.114"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79"
+checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
 dependencies = [
  "itoa",
  "ryu",
@@ -3161,12 +3152,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "socket2"
-version = "0.5.5"
+version = "0.5.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9"
+checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871"
 dependencies = [
  "libc",
- "windows-sys 0.48.0",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -3199,7 +3190,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3245,7 +3236,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3258,7 +3249,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3280,9 +3271,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.49"
+version = "2.0.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496"
+checksum = "6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -3318,9 +3309,9 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.10.0"
+version = "3.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67"
+checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
 dependencies = [
  "cfg-if",
  "fastrand 2.0.1",
@@ -3345,9 +3336,9 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76"
 
 [[package]]
 name = "textwrap"
-version = "0.16.0"
+version = "0.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
+checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
 
 [[package]]
 name = "thiserror"
@@ -3366,7 +3357,7 @@ checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3461,7 +3452,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3558,7 +3549,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3603,7 +3594,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
@@ -3626,9 +3617,9 @@ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
 [[package]]
 name = "unicode-normalization"
-version = "0.1.22"
+version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921"
+checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5"
 dependencies = [
  "tinyvec",
 ]
@@ -3757,7 +3748,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
  "wasm-bindgen-shared",
 ]
 
@@ -3791,7 +3782,7 @@ checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3831,7 +3822,7 @@ version = "0.22.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53"
 dependencies = [
- "ring 0.17.7",
+ "ring 0.17.8",
  "untrusted 0.9.0",
 ]
 
@@ -3872,7 +3863,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
 dependencies = [
- "windows-targets 0.52.0",
+ "windows-targets 0.52.3",
 ]
 
 [[package]]
@@ -3890,7 +3881,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.0",
+ "windows-targets 0.52.3",
 ]
 
 [[package]]
@@ -3910,17 +3901,17 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
+checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.0",
- "windows_aarch64_msvc 0.52.0",
- "windows_i686_gnu 0.52.0",
- "windows_i686_msvc 0.52.0",
- "windows_x86_64_gnu 0.52.0",
- "windows_x86_64_gnullvm 0.52.0",
- "windows_x86_64_msvc 0.52.0",
+ "windows_aarch64_gnullvm 0.52.3",
+ "windows_aarch64_msvc 0.52.3",
+ "windows_i686_gnu 0.52.3",
+ "windows_i686_msvc 0.52.3",
+ "windows_x86_64_gnu 0.52.3",
+ "windows_x86_64_gnullvm 0.52.3",
+ "windows_x86_64_msvc 0.52.3",
 ]
 
 [[package]]
@@ -3931,9 +3922,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
+checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -3943,9 +3934,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
+checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -3955,9 +3946,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
+checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -3967,9 +3958,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
+checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -3979,9 +3970,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
+checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -3991,9 +3982,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
+checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -4003,9 +3994,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.0"
+version = "0.52.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
+checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6"
 
 [[package]]
 name = "winreg"
@@ -4049,7 +4040,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.49",
+ "syn 2.0.51",
 ]
 
 [[package]]
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index cf1e73f780ad..e04106595876 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -42,8 +42,6 @@ use strum_macros::EnumIter;
 #[derive(Debug, Clone, PartialEq, Eq, Hash, EnumIter, Copy)]
 pub enum BuiltinScalarFunction {
     // math functions
-    /// acos
-    Acos,
     /// asin
     Asin,
     /// atan
@@ -362,7 +360,6 @@ impl BuiltinScalarFunction {
     pub fn volatility(&self) -> Volatility {
         match self {
             // Immutable scalar builtins
-            BuiltinScalarFunction::Acos => Volatility::Immutable,
             BuiltinScalarFunction::Asin => Volatility::Immutable,
             BuiltinScalarFunction::Atan => Volatility::Immutable,
             BuiltinScalarFunction::Atan2 => Volatility::Immutable,
@@ -873,8 +870,7 @@ impl BuiltinScalarFunction {
                 utf8_to_int_type(&input_expr_types[0], "levenshtein")
             }
 
-            BuiltinScalarFunction::Acos
-            | BuiltinScalarFunction::Asin
+            BuiltinScalarFunction::Asin
             | BuiltinScalarFunction::Atan
             | BuiltinScalarFunction::Acosh
             | BuiltinScalarFunction::Asinh
@@ -1346,8 +1342,7 @@ impl BuiltinScalarFunction {
                 vec![Exact(vec![Utf8, Utf8]), Exact(vec![LargeUtf8, LargeUtf8])],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::Acos
-            | BuiltinScalarFunction::Asin
+            BuiltinScalarFunction::Asin
             | BuiltinScalarFunction::Atan
             | BuiltinScalarFunction::Acosh
             | BuiltinScalarFunction::Asinh
@@ -1438,7 +1433,6 @@ impl BuiltinScalarFunction {
     /// Returns all names that can be used to call this function
     pub fn aliases(&self) -> &'static [&'static str] {
         match self {
-            BuiltinScalarFunction::Acos => &["acos"],
             BuiltinScalarFunction::Acosh => &["acosh"],
             BuiltinScalarFunction::Asin => &["asin"],
             BuiltinScalarFunction::Asinh => &["asinh"],
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 55bd40a18900..67bf39050d58 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -531,7 +531,6 @@ scalar_expr!(Sinh, sinh, num, "hyperbolic sine");
 scalar_expr!(Cosh, cosh, num, "hyperbolic cosine");
 scalar_expr!(Tanh, tanh, num, "hyperbolic tangent");
 scalar_expr!(Asin, asin, num, "inverse sine");
-scalar_expr!(Acos, acos, num, "inverse cosine");
 scalar_expr!(Atan, atan, num, "inverse tangent");
 scalar_expr!(Asinh, asinh, num, "inverse hyperbolic sine");
 scalar_expr!(Acosh, acosh, num, "inverse hyperbolic cosine");
@@ -1339,7 +1338,6 @@ mod test {
         test_unary_scalar_expr!(Cosh, cosh);
         test_unary_scalar_expr!(Tanh, tanh);
         test_unary_scalar_expr!(Asin, asin);
-        test_unary_scalar_expr!(Acos, acos);
         test_unary_scalar_expr!(Atan, atan);
         test_unary_scalar_expr!(Asinh, asinh);
         test_unary_scalar_expr!(Acosh, acosh);
diff --git a/datafusion/functions/src/math/acos.rs b/datafusion/functions/src/math/acos.rs
new file mode 100644
index 000000000000..22dfd37a0159
--- /dev/null
+++ b/datafusion/functions/src/math/acos.rs
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Math function: `acos()`.
+
+use arrow::array::{ArrayRef, Float32Array, Float64Array};
+use arrow::datatypes::DataType;
+use datafusion_common::{exec_err, plan_datafusion_err, DataFusionError, Result};
+use datafusion_expr::ColumnarValue;
+use datafusion_expr::{
+    utils::generate_signature_error_msg, ScalarUDFImpl, Signature, Volatility,
+};
+use std::any::Any;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub struct AcosFunc {
+    signature: Signature,
+}
+
+impl AcosFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::uniform(
+                1,
+                vec![Float64, Float32],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for AcosFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        "acos"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        if arg_types.len() != 1 {
+            return Err(plan_datafusion_err!(
+                "{}",
+                generate_signature_error_msg(
+                    self.name(),
+                    self.signature().clone(),
+                    arg_types,
+                )
+            ));
+        }
+
+        let arg_type = &arg_types[0];
+
+        match arg_type {
+            DataType::Float64 => Ok(DataType::Float64),
+            DataType::Float32 => Ok(DataType::Float32),
+
+            // For other types (possible values null/int), use Float 64
+            _ => Ok(DataType::Float64),
+        }
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let args = ColumnarValue::values_to_arrays(args)?;
+
+        let arr: ArrayRef = match args[0].data_type() {
+            DataType::Float64 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float64Array,
+                Float64Array,
+                { f64::acos }
+            )),
+            DataType::Float32 => Arc::new(make_function_scalar_inputs_return_type!(
+                &args[0],
+                self.name(),
+                Float32Array,
+                Float32Array,
+                { f32::acos }
+            )),
+            other => {
+                return exec_err!(
+                    "Unsupported data type {other:?} for function {}",
+                    self.name()
+                )
+            }
+        };
+        Ok(ColumnarValue::Array(arr))
+    }
+}
diff --git a/datafusion/functions/src/math/mod.rs b/datafusion/functions/src/math/mod.rs
index 9d13103ef23f..65752a0fe4d9 100644
--- a/datafusion/functions/src/math/mod.rs
+++ b/datafusion/functions/src/math/mod.rs
@@ -17,15 +17,26 @@
 
 //! "math" DataFusion functions
 
-mod nans;
 mod abs;
+mod acos;
+mod nans;
 
 // create  UDFs
 make_udf_function!(nans::IsNanFunc, ISNAN, isnan);
 make_udf_function!(abs::AbsFunc, ABS, abs);
+make_udf_function!(acos::AcosFunc, ACOS, acos);
 
 // Export the functions out of this package, both as expr_fn as well as a list of functions
 export_functions!(
-    (isnan, num, "returns true if a given number is +NaN or -NaN otherwise returns false"),
-    (abs, num, "returns the absolute value of a given number")
-);
\ No newline at end of file
+    (
+        isnan,
+        num,
+        "returns true if a given number is +NaN or -NaN otherwise returns false"
+    ),
+    (abs, num, "returns the absolute value of a given number"),
+    (
+        acos,
+        num,
+        "returns the arc cosine or inverse cosine of a number"
+    )
+);
diff --git a/datafusion/functions/src/math/nans.rs b/datafusion/functions/src/math/nans.rs
index c7868e6d5eca..96a50f9aa568 100644
--- a/datafusion/functions/src/math/nans.rs
+++ b/datafusion/functions/src/math/nans.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Encoding expressions
+//! Math function: `isnan()`.
 
 use arrow::datatypes::DataType;
 use datafusion_common::{exec_err, DataFusionError, Result};
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 8cdb4d7dbdf6..3a43e3cd7c20 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -889,14 +889,14 @@ mod test {
         // test that automatic argument type coercion for scalar functions work
         let empty = empty();
         let lit_expr = lit(10i64);
-        let fun: BuiltinScalarFunction = BuiltinScalarFunction::Acos;
+        let fun: BuiltinScalarFunction = BuiltinScalarFunction::Floor;
         let scalar_function_expr =
             Expr::ScalarFunction(ScalarFunction::new(fun, vec![lit_expr]));
         let plan = LogicalPlan::Projection(Projection::try_new(
             vec![scalar_function_expr],
             empty,
         )?);
-        let expected = "Projection: acos(CAST(Int64(10) AS Float64))\n  EmptyRelation";
+        let expected = "Projection: floor(CAST(Int64(10) AS Float64))\n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), &plan, expected)
     }
 
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index c91b96d67a22..af079dbd2d12 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -261,7 +261,6 @@ pub fn create_physical_fun(
 ) -> Result<ScalarFunctionImplementation> {
     Ok(match fun {
         // math functions
-        BuiltinScalarFunction::Acos => Arc::new(math_expressions::acos),
         BuiltinScalarFunction::Asin => Arc::new(math_expressions::asin),
         BuiltinScalarFunction::Atan => Arc::new(math_expressions::atan),
         BuiltinScalarFunction::Acosh => Arc::new(math_expressions::acosh),
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index d91373f8f8d2..2d729ffc5b3e 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -548,7 +548,7 @@ enum ScalarFunction {
   //  0 was Abs before
   //  The first enum value must be zero for open enums
   unknown = 0;
-  Acos = 1;
+  //  1 was Acos
   Asin = 2;
   Atan = 3;
   Ascii = 4;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 964b8890184c..5f05b8546f68 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22322,7 +22322,6 @@ impl serde::Serialize for ScalarFunction {
     {
         let variant = match self {
             Self::Unknown => "unknown",
-            Self::Acos => "Acos",
             Self::Asin => "Asin",
             Self::Atan => "Atan",
             Self::Ascii => "Ascii",
@@ -22465,7 +22464,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
     {
         const FIELDS: &[&str] = &[
             "unknown",
-            "Acos",
             "Asin",
             "Atan",
             "Ascii",
@@ -22637,7 +22635,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             {
                 match value {
                     "unknown" => Ok(ScalarFunction::Unknown),
-                    "Acos" => Ok(ScalarFunction::Acos),
                     "Asin" => Ok(ScalarFunction::Asin),
                     "Atan" => Ok(ScalarFunction::Atan),
                     "Ascii" => Ok(ScalarFunction::Ascii),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 292aef4402a2..252089d5c14d 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2636,7 +2636,7 @@ pub enum ScalarFunction {
     ///   0 was Abs before
     ///   The first enum value must be zero for open enums
     Unknown = 0,
-    Acos = 1,
+    ///   1 was Acos
     Asin = 2,
     Atan = 3,
     Ascii = 4,
@@ -2779,7 +2779,6 @@ impl ScalarFunction {
     pub fn as_str_name(&self) -> &'static str {
         match self {
             ScalarFunction::Unknown => "unknown",
-            ScalarFunction::Acos => "Acos",
             ScalarFunction::Asin => "Asin",
             ScalarFunction::Atan => "Atan",
             ScalarFunction::Ascii => "Ascii",
@@ -2916,7 +2915,6 @@ impl ScalarFunction {
     pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
         match value {
             "unknown" => Some(Self::Unknown),
-            "Acos" => Some(Self::Acos),
             "Asin" => Some(Self::Asin),
             "Atan" => Some(Self::Atan),
             "Ascii" => Some(Self::Ascii),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index 69114fd74595..acfa043b88af 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -47,15 +47,15 @@ use datafusion_common::{
 use datafusion_expr::expr::Unnest;
 use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by};
 use datafusion_expr::{
-    acos, acosh, array, array_append, array_concat, array_dims, array_distinct,
-    array_element, array_empty, array_except, array_has, array_has_all, array_has_any,
-    array_intersect, array_length, array_ndims, array_pop_back, array_pop_front,
-    array_position, array_positions, array_prepend, array_remove, array_remove_all,
-    array_remove_n, array_repeat, array_replace, array_replace_all, array_replace_n,
-    array_resize, array_slice, array_sort, array_union, arrow_typeof, ascii, asin, asinh,
-    atan, atan2, atanh, bit_length, btrim, cardinality, cbrt, ceil, character_length,
-    chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, current_date,
-    current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with, exp,
+    acosh, array, array_append, array_concat, array_dims, array_distinct, array_element,
+    array_empty, array_except, array_has, array_has_all, array_has_any, array_intersect,
+    array_length, array_ndims, array_pop_back, array_pop_front, array_position,
+    array_positions, array_prepend, array_remove, array_remove_all, array_remove_n,
+    array_repeat, array_replace, array_replace_all, array_replace_n, array_resize,
+    array_slice, array_sort, array_union, arrow_typeof, ascii, asin, asinh, atan, atan2,
+    atanh, bit_length, btrim, cardinality, cbrt, ceil, character_length, chr, coalesce,
+    concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, date_bin,
+    date_part, date_trunc, degrees, digest, ends_with, exp,
     expr::{self, InList, Sort, WindowFunction},
     factorial, find_in_set, flatten, floor, from_unixtime, gcd, gen_range, initcap,
     instr, iszero, lcm, left, levenshtein, ln, log, log10, log2,
@@ -450,7 +450,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Tan => Self::Tan,
             ScalarFunction::Cot => Self::Cot,
             ScalarFunction::Asin => Self::Asin,
-            ScalarFunction::Acos => Self::Acos,
             ScalarFunction::Atan => Self::Atan,
             ScalarFunction::Sinh => Self::Sinh,
             ScalarFunction::Cosh => Self::Cosh,
@@ -1362,7 +1361,6 @@ pub fn parse_expr(
             match scalar_function {
                 ScalarFunction::Unknown => Err(proto_error("Unknown scalar function")),
                 ScalarFunction::Asin => Ok(asin(parse_expr(&args[0], registry)?)),
-                ScalarFunction::Acos => Ok(acos(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Asinh => Ok(asinh(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Acosh => Ok(acosh(parse_expr(&args[0], registry)?)),
                 ScalarFunction::Array => Ok(array(
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index 9603df209ce4..d19830db98ce 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1432,7 +1432,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Cosh => Self::Cosh,
             BuiltinScalarFunction::Tanh => Self::Tanh,
             BuiltinScalarFunction::Asin => Self::Asin,
-            BuiltinScalarFunction::Acos => Self::Acos,
             BuiltinScalarFunction::Atan => Self::Atan,
             BuiltinScalarFunction::Asinh => Self::Asinh,
             BuiltinScalarFunction::Acosh => Self::Acosh,
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index e44f1863891a..7df22e01469b 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -16,10 +16,6 @@
 // under the License.
 
 use arrow::csv::WriterBuilder;
-use std::ops::Deref;
-use std::sync::Arc;
-use std::vec;
-
 use datafusion::arrow::array::ArrayRef;
 use datafusion::arrow::compute::kernels::sort::SortOptions;
 use datafusion::arrow::datatypes::{DataType, Field, Fields, IntervalUnit, Schema};
@@ -52,6 +48,7 @@ use datafusion::physical_plan::expressions::{
     StringAgg, Sum,
 };
 use datafusion::physical_plan::filter::FilterExec;
+use datafusion::physical_plan::functions;
 use datafusion::physical_plan::insert::FileSinkExec;
 use datafusion::physical_plan::joins::{
     HashJoinExec, NestedLoopJoinExec, PartitionMode, StreamJoinPartitionMode,
@@ -66,7 +63,7 @@ use datafusion::physical_plan::windows::{
     BuiltInWindowExpr, PlainAggregateWindowExpr, WindowAggExec,
 };
 use datafusion::physical_plan::{
-    functions, udaf, AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics,
+    udaf, AggregateExpr, ExecutionPlan, Partitioning, PhysicalExpr, Statistics,
 };
 use datafusion::prelude::SessionContext;
 use datafusion::scalar::ScalarValue;
@@ -82,6 +79,9 @@ use datafusion_expr::{
 };
 use datafusion_proto::physical_plan::{AsExecutionPlan, DefaultPhysicalExtensionCodec};
 use datafusion_proto::protobuf;
+use std::ops::Deref;
+use std::sync::Arc;
+use std::vec;
 
 /// Perform a serde roundtrip and assert that the string representation of the before and after plans
 /// are identical. Note that this often isn't sufficient to guarantee that no information is
@@ -600,10 +600,10 @@ fn roundtrip_builtin_scalar_function() -> Result<()> {
     let execution_props = ExecutionProps::new();
 
     let fun_expr =
-        functions::create_physical_fun(&BuiltinScalarFunction::Acos, &execution_props)?;
+        functions::create_physical_fun(&BuiltinScalarFunction::Sin, &execution_props)?;
 
     let expr = ScalarFunctionExpr::new(
-        "acos",
+        "sin",
         fun_expr,
         vec![col("a", &schema)?],
         DataType::Float64,

From fa8508e72fe7a9cbbbdd3f641205195e202366c8 Mon Sep 17 00:00:00 2001
From: SteveLauC <stevelauc@outlook.com>
Date: Wed, 28 Feb 2024 09:02:14 +0800
Subject: [PATCH 37/45] docs: put flatten in top fn list (#9376)

---
 docs/source/user-guide/sql/scalar_functions.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d4eb5944ad09..41b5a354abc1 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -1956,6 +1956,7 @@ from_unixtime(expression)
 - [array_to_string](#array_to_string)
 - [cardinality](#cardinality)
 - [empty](#empty)
+- [flatten](#flatten)
 - [generate_series](#generate_series)
 - [list_append](#list_append)
 - [list_sort](#list_sort)

From 32d906fc9622af3a67b3828700272092fe0982a0 Mon Sep 17 00:00:00 2001
From: Clide S <109172241+Monkwire3@users.noreply.github.com>
Date: Tue, 27 Feb 2024 21:10:15 -0500
Subject: [PATCH 38/45] Update list_to_string alias to point to array_to_string
 (#9374)

---
 docs/source/user-guide/sql/scalar_functions.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index 41b5a354abc1..38da3fd74c26 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -3030,7 +3030,7 @@ _Alias of [array_slice](#array_slice)._
 
 ### `list_to_string`
 
-_Alias of [list_to_string](#list_to_string)._
+_Alias of [array_to_string](#array_to_string)._
 
 ### `make_array`
 

From 930ac87d547f38b07be7cc4f1b2b5f934ca4e9c5 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Wed, 28 Feb 2024 09:33:39 +0300
Subject: [PATCH 39/45] Resolve linter errors

---
 datafusion-cli/src/exec.rs                    |  2 +-
 datafusion/core/benches/sort.rs               |  1 +
 datafusion/core/src/datasource/memory.rs      |  6 ++---
 .../datasource/physical_plan/arrow_file.rs    |  4 +++-
 .../core/src/datasource/physical_plan/avro.rs | 24 ++++++++++++++++---
 .../core/src/datasource/physical_plan/csv.rs  |  6 +++--
 .../core/src/datasource/physical_plan/json.rs |  4 ++--
 .../datasource/physical_plan/parquet/mod.rs   | 18 ++++++++++----
 .../enforce_distribution.rs                   |  4 ++--
 .../src/physical_optimizer/enforce_sorting.rs |  4 ++--
 .../physical_optimizer/output_requirements.rs |  2 +-
 .../physical_optimizer/projection_pushdown.rs |  2 +-
 .../replace_with_order_preserving_variants.rs |  2 +-
 .../physical_optimizer/topk_aggregation.rs    |  3 +--
 datafusion/core/src/physical_planner.rs       |  2 +-
 .../physical-plan/src/aggregates/mod.rs       |  2 +-
 .../physical-plan/src/aggregates/row_hash.rs  |  4 ++--
 .../physical-plan/src/coalesce_partitions.rs  |  5 +++-
 datafusion/physical-plan/src/display.rs       |  4 ----
 .../physical-plan/src/joins/test_utils.rs     |  2 +-
 datafusion/physical-plan/src/lib.rs           | 18 +++++++-------
 datafusion/physical-plan/src/limit.rs         |  3 ++-
 datafusion/physical-plan/src/memory.rs        |  9 ++++---
 datafusion/physical-plan/src/stream.rs        |  2 +-
 datafusion/physical-plan/src/union.rs         | 12 +++++++---
 25 files changed, 92 insertions(+), 53 deletions(-)

diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs
index 7e46f4a513fb..59581e91e857 100644
--- a/datafusion-cli/src/exec.rs
+++ b/datafusion-cli/src/exec.rs
@@ -36,7 +36,7 @@ use datafusion::datasource::listing::ListingTableUrl;
 use datafusion::error::{DataFusionError, Result};
 use datafusion::logical_expr::dml::CopyTo;
 use datafusion::logical_expr::{CreateExternalTable, DdlStatement, LogicalPlan};
-use datafusion::physical_plan::{collect, execute_stream};
+use datafusion::physical_plan::{collect, execute_stream, ExecutionPlanProperties};
 use datafusion::prelude::SessionContext;
 use datafusion::sql::parser::{DFParser, Statement};
 use datafusion::sql::sqlparser::dialect::dialect_from_str;
diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index fbb94d66db58..34b4a5ebf0dc 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -95,6 +95,7 @@ use rand::{Rng, SeedableRng};
 use tokio::runtime::Runtime;
 
 use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// Total number of streams to divide each input into
 /// models 8 partition plan (should it be 16??)
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index b4a51be264eb..1ac492a76040 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -162,14 +162,14 @@ impl MemTable {
         let exec = MemoryExec::try_new(&data, schema.clone(), None)?;
 
         if let Some(num_partitions) = output_partitions {
-            let exec = Arc::new(RepartitionExec::try_new(
+            let exec = RepartitionExec::try_new(
                 Arc::new(exec),
                 Partitioning::RoundRobinBatch(num_partitions),
-            )?) as Arc<dyn ExecutionPlan>;
+            )?;
 
             // execute and collect results
             let mut output_partitions = vec![];
-            for i in 0..exec.output_partitioning().partition_count() {
+            for i in 0..exec.properties().output_partitioning().partition_count() {
                 // execute this *output* partition and collect all batches
                 let task_ctx = state.task_ctx();
                 let mut stream = exec.execute(i, task_ctx)?;
diff --git a/datafusion/core/src/datasource/physical_plan/arrow_file.rs b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
index 817f330097ac..82774a6e831c 100644
--- a/datafusion/core/src/datasource/physical_plan/arrow_file.rs
+++ b/datafusion/core/src/datasource/physical_plan/arrow_file.rs
@@ -152,7 +152,9 @@ impl ExecutionPlan for ArrowExec {
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
             .with_repartition_file_min_size(repartition_file_min_size)
-            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
+            .with_preserve_order_within_groups(
+                self.properties().output_ordering().is_some(),
+            )
             .repartition_file_groups(&self.base_config.file_groups);
 
         if let Some(repartitioned_file_groups) = repartitioned_file_groups_option {
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 2b913d862576..6e7dcf39069c 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -279,7 +279,13 @@ mod tests {
             table_partition_cols: vec![],
             output_ordering: vec![],
         });
-        assert_eq!(avro_exec.output_partitioning().partition_count(), 1);
+        assert_eq!(
+            avro_exec
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            1
+        );
         let mut results = avro_exec
             .execute(0, state.task_ctx())
             .expect("plan execution failed");
@@ -350,7 +356,13 @@ mod tests {
             table_partition_cols: vec![],
             output_ordering: vec![],
         });
-        assert_eq!(avro_exec.output_partitioning().partition_count(), 1);
+        assert_eq!(
+            avro_exec
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            1
+        );
 
         let mut results = avro_exec
             .execute(0, state.task_ctx())
@@ -420,7 +432,13 @@ mod tests {
             table_partition_cols: vec![Field::new("date", DataType::Utf8, false)],
             output_ordering: vec![],
         });
-        assert_eq!(avro_exec.output_partitioning().partition_count(), 1);
+        assert_eq!(
+            avro_exec
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            1
+        );
 
         let mut results = avro_exec
             .execute(0, state.task_ctx())
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 3066d77acf45..05a83e8ac0b7 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -41,9 +41,9 @@ use arrow::datatypes::SchemaRef;
 use datafusion_common::config::ConfigOptions;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::{Buf, Bytes};
-use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::{ready, StreamExt, TryStreamExt};
 use object_store::{GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
@@ -198,7 +198,9 @@ impl ExecutionPlan for CsvExec {
 
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
-            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
+            .with_preserve_order_within_groups(
+                self.properties().output_ordering().is_some(),
+            )
             .with_repartition_file_min_size(repartition_file_min_size)
             .repartition_file_groups(&self.base_config.file_groups);
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index c471035bd286..6f9af2e6abcf 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -40,9 +40,9 @@ use arrow::json::ReaderBuilder;
 use arrow::{datatypes::SchemaRef, json};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::{Buf, Bytes};
-use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::{ready, StreamExt, TryStreamExt};
 use object_store::{self, GetOptions, GetResultPayload, ObjectStore};
 use tokio::io::AsyncWriteExt;
@@ -151,7 +151,7 @@ impl ExecutionPlan for NdJsonExec {
         config: &datafusion_common::config::ConfigOptions,
     ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
         let repartition_file_min_size = config.optimizer.repartition_file_min_size;
-        let preserve_order_within_groups = self.cache.output_ordering().is_some();
+        let preserve_order_within_groups = self.properties().output_ordering().is_some();
         let file_groups = &self.base_config.file_groups;
 
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index e6962040e8ac..5ccffde26359 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -46,9 +46,9 @@ use crate::{
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::error::ArrowError;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
+use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::Bytes;
-use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::future::BoxFuture;
 use futures::{StreamExt, TryStreamExt};
 use itertools::Itertools;
@@ -363,7 +363,9 @@ impl ExecutionPlan for ParquetExec {
         let repartitioned_file_groups_option = FileGroupPartitioner::new()
             .with_target_partitions(target_partitions)
             .with_repartition_file_min_size(repartition_file_min_size)
-            .with_preserve_order_within_groups(self.cache.output_ordering().is_some())
+            .with_preserve_order_within_groups(
+                self.properties().output_ordering().is_some(),
+            )
             .repartition_file_groups(&self.base_config.file_groups);
 
         let mut new_plan = self.clone();
@@ -1558,7 +1560,7 @@ mod tests {
             expected_row_num: Option<usize>,
             file_schema: SchemaRef,
         ) -> Result<()> {
-            let parquet_exec = Arc::new(ParquetExec::new(
+            let parquet_exec = ParquetExec::new(
                 FileScanConfig {
                     object_store_url: ObjectStoreUrl::local_filesystem(),
                     file_groups,
@@ -1571,8 +1573,14 @@ mod tests {
                 },
                 None,
                 None,
-            )) as Arc<dyn ExecutionPlan>;
-            assert_eq!(parquet_exec.output_partitioning().partition_count(), 1);
+            );
+            assert_eq!(
+                parquet_exec
+                    .properties()
+                    .output_partitioning()
+                    .partition_count(),
+                1
+            );
             let results = parquet_exec.execute(0, state.task_ctx())?.next().await;
 
             if let Some(expected_row_num) = expected_row_num {
diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
index dad81b4028bd..eb221a28e2cf 100644
--- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs
@@ -55,8 +55,8 @@ use datafusion_physical_expr::{
 };
 use datafusion_physical_plan::sorts::sort::SortExec;
 use datafusion_physical_plan::windows::{get_best_fitting_window, BoundedWindowAggExec};
-
 use datafusion_physical_plan::ExecutionPlanProperties;
+
 use itertools::izip;
 
 /// The `EnforceDistribution` rule ensures that distribution requirements are
@@ -1404,7 +1404,7 @@ pub(crate) mod tests {
         // model that it requires the output ordering of its input
         fn required_input_ordering(&self) -> Vec<Option<Vec<PhysicalSortRequirement>>> {
             vec![self
-                .cache
+                .properties()
                 .output_ordering()
                 .map(PhysicalSortRequirement::from_sort_exprs)]
         }
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 25280261c0a0..ee5ff7ec59fd 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -64,8 +64,8 @@ use datafusion_common::{plan_err, DataFusionError};
 use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
-
 use datafusion_physical_plan::ExecutionPlanProperties;
+
 use itertools::izip;
 
 /// This rule inspects [`SortExec`]'s in the given physical plan and removes the
@@ -391,7 +391,7 @@ fn analyze_immediate_sort_removal(
         // If this sort is unnecessary, we should remove it:
         if sort_input
             .equivalence_properties()
-            .ordering_satisfy(node.plan.output_ordering().unwrap_or(&[]))
+            .ordering_satisfy(sort_exec.properties().output_ordering().unwrap_or(&[]))
         {
             node.plan = if !sort_exec.preserve_partitioning()
                 && sort_input.output_partitioning().partition_count() > 1
diff --git a/datafusion/core/src/physical_optimizer/output_requirements.rs b/datafusion/core/src/physical_optimizer/output_requirements.rs
index da0697eb9aba..7fea375725a5 100644
--- a/datafusion/core/src/physical_optimizer/output_requirements.rs
+++ b/datafusion/core/src/physical_optimizer/output_requirements.rs
@@ -242,7 +242,7 @@ fn require_top_ordering_helper(
     if children.len() != 1 {
         Ok((plan, false))
     } else if let Some(sort_exec) = plan.as_any().downcast_ref::<SortExec>() {
-        let req_ordering = plan.output_ordering().unwrap_or(&[]);
+        let req_ordering = sort_exec.properties().output_ordering().unwrap_or(&[]);
         let req_dist = sort_exec.required_input_distribution()[0].clone();
         let reqs = PhysicalSortRequirement::from_sort_exprs(req_ordering);
         Ok((
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 1b9bf17bffa4..9cb2d6ecbc71 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -52,8 +52,8 @@ use datafusion_physical_expr::{
 };
 use datafusion_physical_plan::streaming::StreamingTableExec;
 use datafusion_physical_plan::union::UnionExec;
-
 use datafusion_physical_plan::ExecutionPlanProperties;
+
 use itertools::Itertools;
 
 /// This rule inspects [`ProjectionExec`]'s in the given physical plan and tries to
diff --git a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
index be4b7d13bc7e..c0abde26c300 100644
--- a/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
+++ b/datafusion/core/src/physical_optimizer/replace_with_order_preserving_variants.rs
@@ -31,8 +31,8 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::Transformed;
 use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec;
 use datafusion_physical_plan::tree_node::PlanContext;
-
 use datafusion_physical_plan::ExecutionPlanProperties;
+
 use itertools::izip;
 
 /// For a given `plan`, this object carries the information one needs from its
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index 7459deb1f72a..2006402ac59e 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -30,7 +30,6 @@ use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
-use datafusion_physical_plan::ExecutionPlanProperties;
 use itertools::Itertools;
 use std::sync::Arc;
 
@@ -87,7 +86,7 @@ impl TopKAggregation {
 
         let children = sort.children();
         let child = children.iter().exactly_one().ok()?;
-        let order = plan.output_ordering()?;
+        let order = sort.properties().output_ordering()?;
         let order = order.iter().exactly_one().ok()?;
         let limit = sort.fetch()?;
 
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index b16cc77a43f2..41a6e4d75be7 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -93,10 +93,10 @@ use datafusion_expr::{
 };
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
+use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
 use async_trait::async_trait;
-use datafusion_physical_plan::ExecutionPlanProperties;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
 use itertools::{multiunzip, Itertools};
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index f96eacf45896..46351a0d430f 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -496,7 +496,7 @@ impl AggregateExec {
             return false;
         }
         // ensure there is no output ordering; can this rule be relaxed?
-        if self.cache.output_ordering().is_some() {
+        if self.properties().output_ordering().is_some() {
             return false;
         }
         // ensure no ordering is required on the input
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 8036012b7262..45d408bb4de7 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -32,7 +32,7 @@ use crate::metrics::{BaselineMetrics, RecordOutput};
 use crate::sorts::sort::{read_spill_as_stream, sort_batch};
 use crate::sorts::streaming_merge;
 use crate::stream::RecordBatchStreamAdapter;
-use crate::{aggregates, PhysicalExpr};
+use crate::{aggregates, ExecutionPlan, PhysicalExpr};
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 
 use arrow::array::*;
@@ -341,7 +341,7 @@ impl GroupedHashAggregateStream {
             .with_can_spill(true)
             .register(context.memory_pool());
         let (ordering, _) = agg
-            .cache
+            .properties()
             .equivalence_properties()
             .find_longest_permutation(&agg_group_by.output_exprs());
         let group_ordering = GroupOrdering::try_new(
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index 3c5b7e9c13fb..5e7c459a7a98 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -192,7 +192,10 @@ mod tests {
         let merge = CoalescePartitionsExec::new(csv);
 
         // output of CoalescePartitionsExec should have a single partition
-        assert_eq!(merge.output_partitioning().partition_count(), 1);
+        assert_eq!(
+            merge.properties().output_partitioning().partition_count(),
+            1
+        );
 
         // the result should contain 4 batches (one per input partition)
         let iter = merge.execute(0, task_ctx)?;
diff --git a/datafusion/physical-plan/src/display.rs b/datafusion/physical-plan/src/display.rs
index 38c23331983e..4b7b35e53e1b 100644
--- a/datafusion/physical-plan/src/display.rs
+++ b/datafusion/physical-plan/src/display.rs
@@ -497,10 +497,6 @@ mod tests {
             unimplemented!()
         }
 
-        fn output_partitioning(&self) -> &datafusion_physical_expr::Partitioning {
-            &datafusion_physical_expr::Partitioning::UnknownPartitioning(1)
-        }
-
         fn children(&self) -> Vec<Arc<dyn ExecutionPlan>> {
             vec![]
         }
diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs
index 37faae873745..920e7b9d7a7c 100644
--- a/datafusion/physical-plan/src/joins/test_utils.rs
+++ b/datafusion/physical-plan/src/joins/test_utils.rs
@@ -26,7 +26,7 @@ use crate::joins::{
 };
 use crate::memory::MemoryExec;
 use crate::repartition::RepartitionExec;
-use crate::{common, ExecutionPlan, Partitioning};
+use crate::{common, ExecutionPlan, ExecutionPlanProperties, Partitioning};
 
 use arrow::util::pretty::pretty_format_batches;
 use arrow_array::{
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index e37f84fb0150..c41d94bf969e 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -101,7 +101,7 @@ pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 /// `ExecutionPlan`'s output from its input. See [`Partitioning`] for more
 /// details on partitioning.
 ///
-/// Methods such as [`schema`] and [`output_partitioning`] communicate
+/// Methods such as [`Self::schema`] and [`ExecutionPlanProperties::output_partitioning`] communicate
 /// properties of this output to the DataFusion optimizer, and methods such as
 /// [`required_input_distribution`] and [`required_input_ordering`] express
 /// requirements of the `ExecutionPlan` from its input.
@@ -111,8 +111,6 @@ pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
 /// quite verbose) `Debug` output.
 ///
 /// [`execute`]: ExecutionPlan::execute
-/// [`schema`]: ExecutionPlan::schema
-/// [`output_partitioning`]: ExecutionPlan::output_partitioning
 /// [`required_input_distribution`]: ExecutionPlan::required_input_distribution
 /// [`required_input_ordering`]: ExecutionPlan::required_input_ordering
 pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
@@ -455,7 +453,7 @@ impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
     /// However, since `ProjectionExec` may calculate derived expressions, it
     /// needs special handling.
     ///
-    /// See also [`Self::maintains_input_order`] and [`Self::output_ordering`]
+    /// See also [`ExecutionPlan::maintains_input_order`] and [`Self::output_ordering`]
     /// for related concepts.
     fn equivalence_properties(&self) -> &EquivalenceProperties {
         self.properties().equivalence_properties()
@@ -605,8 +603,11 @@ impl PlanProperties {
 ///     2. CoalescePartitionsExec for collapsing all of the partitions into one without ordering guarantee
 ///     3. SortPreservingMergeExec for collapsing all of the sorted partitions into one with ordering guarantee
 pub fn need_data_exchange(plan: Arc<dyn ExecutionPlan>) -> bool {
-    if let Some(_) = plan.as_any().downcast_ref::<RepartitionExec>() {
-        !matches!(plan.output_partitioning(), Partitioning::RoundRobinBatch(_))
+    if let Some(repartition) = plan.as_any().downcast_ref::<RepartitionExec>() {
+        !matches!(
+            repartition.properties().output_partitioning(),
+            Partitioning::RoundRobinBatch(_)
+        )
     } else if let Some(coalesce) = plan.as_any().downcast_ref::<CoalescePartitionsExec>()
     {
         coalesce.input().output_partitioning().partition_count() > 1
@@ -677,10 +678,9 @@ pub fn execute_stream(
         1 => plan.execute(0, context),
         _ => {
             // merge into a single partition
-            let plan = Arc::new(CoalescePartitionsExec::new(plan.clone()))
-                as Arc<dyn ExecutionPlan>;
+            let plan = CoalescePartitionsExec::new(plan.clone());
             // CoalescePartitionsExec must produce a single partition
-            assert_eq!(1, plan.output_partitioning().partition_count());
+            assert_eq!(1, plan.properties().output_partitioning().partition_count());
             plan.execute(0, context)
         }
     }
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 3520d45f9e6c..2d54db3c3cbd 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -410,7 +410,8 @@ impl ExecutionPlan for LocalLimitExec {
             _ => Statistics {
                 // the result output row number will always be no greater than the limit number
                 num_rows: Precision::Inexact(
-                    self.fetch * self.cache.output_partitioning().partition_count(),
+                    self.fetch
+                        * self.properties().output_partitioning().partition_count(),
                 ),
 
                 column_statistics: col_stats,
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 23699295e121..04b7c78b77f4 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -287,8 +287,8 @@ mod tests {
     use std::sync::Arc;
 
     use crate::memory::MemoryExec;
-    use crate::ExecutionPlan;
 
+    use crate::ExecutionPlan;
     use arrow_schema::{DataType, Field, Schema, SortOptions};
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalSortExpr;
@@ -322,8 +322,11 @@ mod tests {
         let mem_exec = MemoryExec::try_new(&[vec![]], schema, None)?
             .with_sort_information(sort_information);
 
-        assert_eq!(mem_exec.output_ordering().unwrap(), expected_output_order);
-        let eq_properties = mem_exec.equivalence_properties();
+        assert_eq!(
+            mem_exec.properties().output_ordering().unwrap(),
+            expected_output_order
+        );
+        let eq_properties = mem_exec.properties().equivalence_properties();
         assert!(eq_properties.oeq_class().contains(&sort1));
         assert!(eq_properties.oeq_class().contains(&sort2));
         Ok(())
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index b780a50cdc90..597b60537f6e 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -555,7 +555,7 @@ mod test {
         let task_ctx = Arc::new(TaskContext::default());
 
         let input = Arc::new(input);
-        let num_partitions = input.output_partitioning().partition_count();
+        let num_partitions = input.properties().output_partitioning().partition_count();
 
         // Configure a RecordBatchReceiverStream to consume all the input partitions
         let mut builder =
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 9f637361ff8f..2889a506f3ff 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -205,7 +205,7 @@ impl ExecutionPlan for UnionExec {
         // which is the "meet" of all input orderings. In this example, this
         // function will return vec![false, true, true], indicating that we
         // preserve the orderings for the 2nd and the 3rd children.
-        if let Some(output_ordering) = self.cache.output_ordering() {
+        if let Some(output_ordering) = self.properties().output_ordering() {
             self.inputs()
                 .iter()
                 .map(|child| {
@@ -635,7 +635,13 @@ mod tests {
         let union_exec = Arc::new(UnionExec::new(vec![csv, csv2]));
 
         // Should have 9 partitions and 9 output batches
-        assert_eq!(union_exec.output_partitioning().partition_count(), 9);
+        assert_eq!(
+            union_exec
+                .properties()
+                .output_partitioning()
+                .partition_count(),
+            9
+        );
 
         let result: Vec<RecordBatch> = collect(union_exec, task_ctx).await?;
         assert_eq!(result.len(), 9);
@@ -806,7 +812,7 @@ mod tests {
             );
 
             let union = UnionExec::new(vec![child1, child2]);
-            let union_eq_properties = union.equivalence_properties();
+            let union_eq_properties = union.properties().equivalence_properties();
             let union_actual_orderings = union_eq_properties.oeq_class();
             let err_msg = format!(
                 "Error in test id: {:?}, test case: {:?}",

From a8fac85429f519ea5ae258a2a6425eaa9ab333c8 Mon Sep 17 00:00:00 2001
From: Mustafa Akur <mustafa.akur@synnada.ai>
Date: Wed, 28 Feb 2024 09:35:09 +0300
Subject: [PATCH 40/45] Bring docs yaml

---
 .github/workflows/docs.yaml | 64 +++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 .github/workflows/docs.yaml

diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml
new file mode 100644
index 000000000000..ab6a615ab60b
--- /dev/null
+++ b/.github/workflows/docs.yaml
@@ -0,0 +1,64 @@
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - .asf.yaml
+      - .github/workflows/docs.yaml
+      - docs/**
+
+name: Deploy DataFusion site
+
+jobs:
+  build-docs:
+    name: Build docs
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout docs sources
+        uses: actions/checkout@v4
+
+      - name: Checkout asf-site branch
+        uses: actions/checkout@v4
+        with:
+          ref: asf-site
+          path: asf-site
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install dependencies
+        run: |
+          set -x
+          python3 -m venv venv
+          source venv/bin/activate
+          pip install -r docs/requirements.txt
+
+      - name: Build docs
+        run: |
+          set -x
+          source venv/bin/activate
+          cd docs
+          ./build.sh
+
+      - name: Copy & push the generated HTML
+        run: |
+          set -x
+          cd asf-site/
+          rsync \
+            -a \
+            --delete \
+            --exclude '/.git/' \
+            ../docs/build/html/ \
+            ./
+          cp ../.asf.yaml .
+          touch .nojekyll
+          git status --porcelain
+          if [ "$(git status --porcelain)" != "" ]; then
+            git config user.name "github-actions[bot]"
+            git config user.email "github-actions[bot]@users.noreply.github.com"
+            git add --all
+            git commit -m 'Publish built docs triggered by ${{ github.sha }}'
+            git push || git push --force
+          fi

From e62240969135e2236d100c8c0c01546a87950a80 Mon Sep 17 00:00:00 2001
From: Lordworms <48054792+Lordworms@users.noreply.github.com>
Date: Wed, 28 Feb 2024 04:55:03 -0600
Subject: [PATCH 41/45] feat: issue_9285: port builtin reg function into
 datafusion-function-* (1/3 regexpmatch) (#9329)

* feat: issue_9285: port builtin reg function into datafusion-function-* crate (1/3: RegexpMatch part)

* fix fmt

* refact

* modify test

* fix msrv verify problem

* port test and delete useless lines
---
 .../tests/dataframe/dataframe_functions.rs    |   2 +-
 datafusion/expr/src/built_in_function.rs      |  22 ---
 datafusion/expr/src/expr_fn.rs                |   7 -
 datafusion/functions/Cargo.toml               |   3 +-
 datafusion/functions/src/lib.rs               |   7 +-
 datafusion/functions/src/regex/mod.rs         |  29 ++++
 datafusion/functions/src/regex/regexpmatch.rs | 145 ++++++++++++++++++
 datafusion/physical-expr/src/functions.rs     | 108 -------------
 datafusion/proto/proto/datafusion.proto       |   2 +-
 datafusion/proto/src/generated/pbjson.rs      |   3 -
 datafusion/proto/src/generated/prost.rs       |   4 +-
 .../proto/src/logical_plan/from_proto.rs      |  17 +-
 datafusion/proto/src/logical_plan/to_proto.rs |   1 -
 datafusion/sqllogictest/test_files/regexp.slt |   8 +-
 14 files changed, 196 insertions(+), 162 deletions(-)
 create mode 100644 datafusion/functions/src/regex/mod.rs
 create mode 100644 datafusion/functions/src/regex/regexpmatch.rs

diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index 95c13fc17c90..ff553a48888b 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -467,7 +467,7 @@ async fn test_fn_regexp_like() -> Result<()> {
 #[tokio::test]
 #[cfg(feature = "unicode_expressions")]
 async fn test_fn_regexp_match() -> Result<()> {
-    let expr = regexp_match(vec![col("a"), lit("[a-z]")]);
+    let expr = regexp_match(col("a"), lit("[a-z]"));
 
     let expected = [
         "+------------------------------------+",
diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs
index e04106595876..8df2f4e88d41 100644
--- a/datafusion/expr/src/built_in_function.rs
+++ b/datafusion/expr/src/built_in_function.rs
@@ -233,7 +233,6 @@ pub enum BuiltinScalarFunction {
     /// regexp_like
     RegexpLike,
     /// regexp_match
-    RegexpMatch,
     /// regexp_replace
     RegexpReplace,
     /// repeat
@@ -449,7 +448,6 @@ impl BuiltinScalarFunction {
             BuiltinScalarFunction::OctetLength => Volatility::Immutable,
             BuiltinScalarFunction::Radians => Volatility::Immutable,
             BuiltinScalarFunction::RegexpLike => Volatility::Immutable,
-            BuiltinScalarFunction::RegexpMatch => Volatility::Immutable,
             BuiltinScalarFunction::RegexpReplace => Volatility::Immutable,
             BuiltinScalarFunction::Repeat => Volatility::Immutable,
             BuiltinScalarFunction::Replace => Volatility::Immutable,
@@ -814,16 +812,6 @@ impl BuiltinScalarFunction {
                     );
                 }
             }),
-            BuiltinScalarFunction::RegexpMatch => Ok(match &input_expr_types[0] {
-                LargeUtf8 => List(Arc::new(Field::new("item", LargeUtf8, true))),
-                Utf8 => List(Arc::new(Field::new("item", Utf8, true))),
-                Null => Null,
-                other => {
-                    return plan_err!(
-                        "The regexp_match function can only accept strings. Got {other}"
-                    );
-                }
-            }),
 
             BuiltinScalarFunction::Factorial
             | BuiltinScalarFunction::Gcd
@@ -1263,15 +1251,6 @@ impl BuiltinScalarFunction {
                 ],
                 self.volatility(),
             ),
-            BuiltinScalarFunction::RegexpMatch => Signature::one_of(
-                vec![
-                    Exact(vec![Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8]),
-                    Exact(vec![Utf8, Utf8, Utf8]),
-                    Exact(vec![LargeUtf8, Utf8, Utf8]),
-                ],
-                self.volatility(),
-            ),
             BuiltinScalarFunction::RegexpReplace => Signature::one_of(
                 vec![
                     Exact(vec![Utf8, Utf8, Utf8]),
@@ -1514,7 +1493,6 @@ impl BuiltinScalarFunction {
 
             // regex functions
             BuiltinScalarFunction::RegexpLike => &["regexp_like"],
-            BuiltinScalarFunction::RegexpMatch => &["regexp_match"],
             BuiltinScalarFunction::RegexpReplace => &["regexp_replace"],
 
             // time/date functions
diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs
index 67bf39050d58..7ffd2f76e783 100644
--- a/datafusion/expr/src/expr_fn.rs
+++ b/datafusion/expr/src/expr_fn.rs
@@ -854,11 +854,6 @@ nary_scalar_expr!(
     regexp_like,
     "matches a regular expression against a string and returns true or false if there was at least one match or not"
 );
-nary_scalar_expr!(
-    RegexpMatch,
-    regexp_match,
-    "matches a regular expression against a string and returns matched substrings."
-);
 nary_scalar_expr!(
     RegexpReplace,
     regexp_replace,
@@ -1380,8 +1375,6 @@ mod test {
         test_scalar_expr!(OctetLength, octet_length, string);
         test_nary_scalar_expr!(RegexpLike, regexp_like, string, pattern);
         test_nary_scalar_expr!(RegexpLike, regexp_like, string, pattern, flags);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern);
-        test_nary_scalar_expr!(RegexpMatch, regexp_match, string, pattern, flags);
         test_nary_scalar_expr!(
             RegexpReplace,
             regexp_replace,
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index f63f18f955de..89b7de9ee11a 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -32,11 +32,12 @@ rust-version = { workspace = true }
 # enable core functions
 core_expressions = []
 # Enable encoding by default so the doctests work. In general don't automatically enable all packages.
-default = ["core_expressions", "encoding_expressions", "math_expressions"]
+default = ["core_expressions", "encoding_expressions", "math_expressions", "regex_expressions"]
 # enable encode/decode functions
 encoding_expressions = ["base64", "hex"]
 # enable math functions
 math_expressions = []
+regex_expressions = []
 
 [lib]
 name = "datafusion_functions"
diff --git a/datafusion/functions/src/lib.rs b/datafusion/functions/src/lib.rs
index 981174c141d6..d2f0270959ee 100644
--- a/datafusion/functions/src/lib.rs
+++ b/datafusion/functions/src/lib.rs
@@ -93,7 +93,7 @@ make_package!(
 );
 
 make_package!(math, "math_expressions", "Mathematical functions.");
-
+make_package!(regex, "regex_expressions", "Regex functions");
 /// Fluent-style API for creating `Expr`s
 pub mod expr_fn {
     #[cfg(feature = "core_expressions")]
@@ -102,6 +102,8 @@ pub mod expr_fn {
     pub use super::encoding::expr_fn::*;
     #[cfg(feature = "math_expressions")]
     pub use super::math::expr_fn::*;
+    #[cfg(feature = "regex_expressions")]
+    pub use super::regex::expr_fn::*;
 }
 
 /// Registers all enabled packages with a [`FunctionRegistry`]
@@ -109,7 +111,8 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> {
     let mut all_functions = core::functions()
         .into_iter()
         .chain(encoding::functions())
-        .chain(math::functions());
+        .chain(math::functions())
+        .chain(regex::functions());
 
     all_functions.try_for_each(|udf| {
         let existing_udf = registry.register_udf(udf)?;
diff --git a/datafusion/functions/src/regex/mod.rs b/datafusion/functions/src/regex/mod.rs
new file mode 100644
index 000000000000..862e8b77a2d6
--- /dev/null
+++ b/datafusion/functions/src/regex/mod.rs
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! "regx" DataFusion functions
+
+mod regexpmatch;
+// create UDFs
+make_udf_function!(regexpmatch::RegexpMatchFunc, REGEXP_MATCH, regexp_match);
+
+export_functions!((
+    regexp_match,
+    input_arg1
+    input_arg2,
+    "returns a list of regular expression matches in a string. "
+));
diff --git a/datafusion/functions/src/regex/regexpmatch.rs b/datafusion/functions/src/regex/regexpmatch.rs
new file mode 100644
index 000000000000..7ab99f96b142
--- /dev/null
+++ b/datafusion/functions/src/regex/regexpmatch.rs
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Encoding expressions
+use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow::compute::kernels::regexp;
+use arrow::datatypes::DataType;
+use arrow::datatypes::Field;
+use datafusion_common::ScalarValue;
+use datafusion_expr::TypeSignature::*;
+use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
+use std::any::Any;
+use datafusion_common::exec_err;
+use datafusion_common::{arrow_datafusion_err, plan_err};
+use datafusion_common::{
+    cast::as_generic_string_array, internal_err, DataFusionError, Result,
+};
+use datafusion_expr::ColumnarValue;
+use std::sync::Arc;
+
+#[derive(Debug)]
+pub(super) struct RegexpMatchFunc {
+    signature: Signature,
+}
+impl RegexpMatchFunc {
+    pub fn new() -> Self {
+        use DataType::*;
+        Self {
+            signature: Signature::one_of(
+                vec![
+                    Exact(vec![Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, Utf8]),
+                    Exact(vec![Utf8, Utf8, Utf8]),
+                    Exact(vec![LargeUtf8, Utf8, Utf8]),
+                ],
+                Volatility::Immutable,
+            ),
+        }
+    }
+}
+
+impl ScalarUDFImpl for RegexpMatchFunc {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "regexp_match"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        use DataType::*;
+
+        Ok(match &arg_types[0] {
+            LargeUtf8 => List(Arc::new(Field::new("item", LargeUtf8, true))),
+            Utf8 => List(Arc::new(Field::new("item", Utf8, true))),
+            Null => Null,
+            other => {
+                return plan_err!(
+                    "The regexp_match function can only accept strings. Got {other}"
+                );
+            }
+        })
+    }
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        let len = args
+            .iter()
+            .fold(Option::<usize>::None, |acc, arg| match arg {
+                ColumnarValue::Scalar(_) => acc,
+                ColumnarValue::Array(a) => Some(a.len()),
+            });
+
+        let is_scalar = len.is_none();
+        let inferred_length = len.unwrap_or(1);
+        let args = args
+            .iter()
+            .map(|arg| arg.clone().into_array(inferred_length))
+            .collect::<Result<Vec<_>>>()?;
+
+        let result = regexp_match_func(&args);
+        if is_scalar {
+            // If all inputs are scalar, keeps output as scalar
+            let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
+            result.map(ColumnarValue::Scalar)
+        } else {
+            result.map(ColumnarValue::Array)
+        }
+    }
+}
+fn regexp_match_func(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args[0].data_type() {
+        DataType::Utf8 => {
+            regexp_match::<i32>(args)
+        }
+        DataType::LargeUtf8 => {
+            regexp_match::<i64>(args)
+        }
+        other => {
+            internal_err!("Unsupported data type {other:?} for function regexp_match")
+        }
+    }
+}
+pub fn regexp_match<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
+    match args.len() {
+        2 => {
+            let values = as_generic_string_array::<T>(&args[0])?;
+            let regex = as_generic_string_array::<T>(&args[1])?;
+            regexp::regexp_match(values, regex, None)
+                .map_err(|e| arrow_datafusion_err!(e))
+        }
+        3 => {
+            let values = as_generic_string_array::<T>(&args[0])?;
+            let regex = as_generic_string_array::<T>(&args[1])?;
+            let flags = as_generic_string_array::<T>(&args[2])?;
+
+            if flags.iter().any(|s| s == Some("g")) {
+                return plan_err!("regexp_match() does not support the \"global\" option")
+            }
+
+            regexp::regexp_match(values, regex, Some(flags))
+                .map_err(|e| arrow_datafusion_err!(e))
+        }
+        other => exec_err!(
+            "regexp_match was called with {other} arguments. It requires at least 2 and at most 3."
+        ),
+    }
+}
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index af079dbd2d12..186de0609b9a 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -243,7 +243,6 @@ where
             .collect::<Result<Vec<_>>>()?;
 
         let result = (inner)(&args);
-
         if is_scalar {
             // If all inputs are scalar, keeps output as scalar
             let result = result.and_then(|arr| ScalarValue::try_from_array(&arr, 0));
@@ -619,29 +618,6 @@ pub fn create_physical_fun(
                 exec_err!("Unsupported data type {other:?} for function regexp_like")
             }
         }),
-        BuiltinScalarFunction::RegexpMatch => {
-            Arc::new(|args| match args[0].data_type() {
-                DataType::Utf8 => {
-                    let func = invoke_on_array_if_regex_expressions_feature_flag!(
-                        regexp_match,
-                        i32,
-                        "regexp_match"
-                    );
-                    make_scalar_function_inner(func)(args)
-                }
-                DataType::LargeUtf8 => {
-                    let func = invoke_on_array_if_regex_expressions_feature_flag!(
-                        regexp_match,
-                        i64,
-                        "regexp_match"
-                    );
-                    make_scalar_function_inner(func)(args)
-                }
-                other => {
-                    exec_err!("Unsupported data type {other:?} for function regexp_match")
-                }
-            })
-        }
         BuiltinScalarFunction::RegexpReplace => {
             Arc::new(|args| match args[0].data_type() {
                 DataType::Utf8 => {
@@ -3185,90 +3161,6 @@ mod tests {
         Ok(())
     }
 
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match() -> Result<()> {
-        use datafusion_common::cast::{as_list_array, as_string_array};
-        let schema = Schema::new(vec![Field::new("a", DataType::Utf8, false)]);
-        let execution_props = ExecutionProps::new();
-
-        let col_value: ArrayRef = Arc::new(StringArray::from(vec!["aaa-555"]));
-        let pattern = lit(r".*-(\d*)");
-        let columns: Vec<ArrayRef> = vec![col_value];
-        let expr = create_physical_expr_with_type_coercion(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col("a", &schema)?, pattern],
-            &schema,
-            &execution_props,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())
-            .expect("Failed to convert to array");
-
-        // downcast works
-        let result = as_list_array(&result)?;
-        let first_row = result.value(0);
-        let first_row = as_string_array(&first_row)?;
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-
-    #[test]
-    #[cfg(feature = "regex_expressions")]
-    fn test_regexp_match_all_literals() -> Result<()> {
-        use datafusion_common::cast::{as_list_array, as_string_array};
-        let schema = Schema::new(vec![Field::new("a", DataType::Int32, false)]);
-        let execution_props = ExecutionProps::new();
-
-        let col_value = lit("aaa-555");
-        let pattern = lit(r".*-(\d*)");
-        let columns: Vec<ArrayRef> = vec![Arc::new(Int32Array::from(vec![1]))];
-        let expr = create_physical_expr_with_type_coercion(
-            &BuiltinScalarFunction::RegexpMatch,
-            &[col_value, pattern],
-            &schema,
-            &execution_props,
-        )?;
-
-        // type is correct
-        assert_eq!(
-            expr.data_type(&schema)?,
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)))
-        );
-
-        // evaluate works
-        let batch = RecordBatch::try_new(Arc::new(schema.clone()), columns)?;
-        let result = expr
-            .evaluate(&batch)?
-            .into_array(batch.num_rows())
-            .expect("Failed to convert to array");
-
-        // downcast works
-        let result = as_list_array(&result)?;
-        let first_row = result.value(0);
-        let first_row = as_string_array(&first_row)?;
-
-        // value is correct
-        let expected = "555".to_string();
-        assert_eq!(first_row.value(0), expected);
-
-        Ok(())
-    }
-
     // Helper function just for testing.
     // Returns `expressions` coerced to types compatible with
     // `signature`, if possible.
diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto
index 2d729ffc5b3e..1f659469aa3a 100644
--- a/datafusion/proto/proto/datafusion.proto
+++ b/datafusion/proto/proto/datafusion.proto
@@ -568,7 +568,7 @@ enum ScalarFunction {
   Tan = 18;
   Trunc = 19;
   Array = 20;
-  RegexpMatch = 21;
+  // RegexpMatch = 21;
   BitLength = 22;
   Btrim = 23;
   CharacterLength = 24;
diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs
index 5f05b8546f68..8959dd37cf13 100644
--- a/datafusion/proto/src/generated/pbjson.rs
+++ b/datafusion/proto/src/generated/pbjson.rs
@@ -22341,7 +22341,6 @@ impl serde::Serialize for ScalarFunction {
             Self::Tan => "Tan",
             Self::Trunc => "Trunc",
             Self::Array => "Array",
-            Self::RegexpMatch => "RegexpMatch",
             Self::BitLength => "BitLength",
             Self::Btrim => "Btrim",
             Self::CharacterLength => "CharacterLength",
@@ -22483,7 +22482,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
             "Tan",
             "Trunc",
             "Array",
-            "RegexpMatch",
             "BitLength",
             "Btrim",
             "CharacterLength",
@@ -22654,7 +22652,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction {
                     "Tan" => Ok(ScalarFunction::Tan),
                     "Trunc" => Ok(ScalarFunction::Trunc),
                     "Array" => Ok(ScalarFunction::Array),
-                    "RegexpMatch" => Ok(ScalarFunction::RegexpMatch),
                     "BitLength" => Ok(ScalarFunction::BitLength),
                     "Btrim" => Ok(ScalarFunction::Btrim),
                     "CharacterLength" => Ok(ScalarFunction::CharacterLength),
diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs
index 252089d5c14d..09152d99c12f 100644
--- a/datafusion/proto/src/generated/prost.rs
+++ b/datafusion/proto/src/generated/prost.rs
@@ -2656,7 +2656,7 @@ pub enum ScalarFunction {
     Tan = 18,
     Trunc = 19,
     Array = 20,
-    RegexpMatch = 21,
+    /// RegexpMatch = 21;
     BitLength = 22,
     Btrim = 23,
     CharacterLength = 24,
@@ -2798,7 +2798,6 @@ impl ScalarFunction {
             ScalarFunction::Tan => "Tan",
             ScalarFunction::Trunc => "Trunc",
             ScalarFunction::Array => "Array",
-            ScalarFunction::RegexpMatch => "RegexpMatch",
             ScalarFunction::BitLength => "BitLength",
             ScalarFunction::Btrim => "Btrim",
             ScalarFunction::CharacterLength => "CharacterLength",
@@ -2934,7 +2933,6 @@ impl ScalarFunction {
             "Tan" => Some(Self::Tan),
             "Trunc" => Some(Self::Trunc),
             "Array" => Some(Self::Array),
-            "RegexpMatch" => Some(Self::RegexpMatch),
             "BitLength" => Some(Self::BitLength),
             "Btrim" => Some(Self::Btrim),
             "CharacterLength" => Some(Self::CharacterLength),
diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs
index acfa043b88af..e8059482b1b9 100644
--- a/datafusion/proto/src/logical_plan/from_proto.rs
+++ b/datafusion/proto/src/logical_plan/from_proto.rs
@@ -61,11 +61,11 @@ use datafusion_expr::{
     instr, iszero, lcm, left, levenshtein, ln, log, log10, log2,
     logical_plan::{PlanType, StringifiedPlan},
     lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, radians,
-    random, regexp_like, regexp_match, regexp_replace, repeat, replace, reverse, right,
-    round, rpad, rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part,
-    sqrt, starts_with, string_to_array, strpos, struct_fun, substr, substr_index,
-    substring, tan, tanh, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction,
-    Between, BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
+    random, regexp_like, regexp_replace, repeat, replace, reverse, right, round, rpad,
+    rtrim, sha224, sha256, sha384, sha512, signum, sin, sinh, split_part, sqrt,
+    starts_with, string_to_array, strpos, struct_fun, substr, substr_index, substring,
+    tan, tanh, to_hex, translate, trim, trunc, upper, uuid, AggregateFunction, Between,
+    BinaryExpr, BuiltInWindowFunction, BuiltinScalarFunction, Case, Cast, Expr,
     GetFieldAccess, GetIndexedField, GroupingSet,
     GroupingSet::GroupingSets,
     JoinConstraint, JoinType, Like, Operator, TryCast, WindowFrame, WindowFrameBound,
@@ -535,7 +535,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction {
             ScalarFunction::Lpad => Self::Lpad,
             ScalarFunction::Random => Self::Random,
             ScalarFunction::RegexpLike => Self::RegexpLike,
-            ScalarFunction::RegexpMatch => Self::RegexpMatch,
             ScalarFunction::RegexpReplace => Self::RegexpReplace,
             ScalarFunction::Repeat => Self::Repeat,
             ScalarFunction::Replace => Self::Replace,
@@ -1638,12 +1637,6 @@ pub fn parse_expr(
                         .map(|expr| parse_expr(expr, registry))
                         .collect::<Result<Vec<_>, _>>()?,
                 )),
-                ScalarFunction::RegexpMatch => Ok(regexp_match(
-                    args.to_owned()
-                        .iter()
-                        .map(|expr| parse_expr(expr, registry))
-                        .collect::<Result<Vec<_>, _>>()?,
-                )),
                 ScalarFunction::RegexpReplace => Ok(regexp_replace(
                     args.to_owned()
                         .iter()
diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs
index d19830db98ce..6f126729cb29 100644
--- a/datafusion/proto/src/logical_plan/to_proto.rs
+++ b/datafusion/proto/src/logical_plan/to_proto.rs
@@ -1518,7 +1518,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction {
             BuiltinScalarFunction::Random => Self::Random,
             BuiltinScalarFunction::Uuid => Self::Uuid,
             BuiltinScalarFunction::RegexpLike => Self::RegexpLike,
-            BuiltinScalarFunction::RegexpMatch => Self::RegexpMatch,
             BuiltinScalarFunction::RegexpReplace => Self::RegexpReplace,
             BuiltinScalarFunction::Repeat => Self::Repeat,
             BuiltinScalarFunction::Replace => Self::Replace,
diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt
index 1e951e2962ff..a80b08c41ee3 100644
--- a/datafusion/sqllogictest/test_files/regexp.slt
+++ b/datafusion/sqllogictest/test_files/regexp.slt
@@ -220,6 +220,12 @@ SELECT regexp_match('(?<=[A-Z]\w )Smith', 'John Smith', 'i');
 ----
 NULL
 
+# ported test
+query ?
+SELECT regexp_match('aaa-555', '.*-(\d*)');
+----
+[555]
+
 #
 # regexp_replace tests
 #
@@ -300,4 +306,4 @@ SELECT regexp_replace(arrow_cast('foobar', 'Dictionary(Int32, Utf8)'), 'bar', 'x
 fooxx
 
 statement ok
-drop table t;
\ No newline at end of file
+drop table t;

From d896ebe4d7466e52a1e8fad4252067d69e62298a Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 28 Feb 2024 20:16:44 +0800
Subject: [PATCH 42/45] Add test to verify issue #9161 (#9265)

* Add test to verify issue #9161

* fmt
---
 datafusion/sqllogictest/test_files/aggregate.slt | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index 109c64f06055..b78c6287746c 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -3313,5 +3313,13 @@ query I
 SELECT 0 AS "t.a" FROM t HAVING MAX(t.a) = 0;
 ----
 
+# Test issue: https://github.com/apache/arrow-datafusion/issues/9161
+query I rowsort
+SELECT CAST(a AS INT) FROM t GROUP BY t.a;
+----
+1
+2
+3
+
 statement ok
 DROP TABLE t;

From a1ae15826245097e7c12d4f0ed3425b25af6c431 Mon Sep 17 00:00:00 2001
From: Marco Neumann <marco@crepererum.net>
Date: Wed, 28 Feb 2024 13:25:36 +0100
Subject: [PATCH 43/45] refactor: fix error macros hygiene (#9366)

---
 benchmarks/src/tpch/convert.rs                |  2 +-
 benchmarks/src/tpch/mod.rs                    |  2 +-
 datafusion-cli/src/catalog.rs                 |  2 +-
 datafusion-cli/src/functions.rs               |  2 +-
 datafusion-examples/examples/advanced_udf.rs  |  1 -
 datafusion-examples/examples/rewrite_expr.rs  |  2 +-
 datafusion-examples/examples/simple_udtf.rs   |  2 +-
 datafusion/common/src/error.rs                | 16 +++++++-
 datafusion/common/src/hash_utils.rs           |  2 +-
 .../common/src/scalar/struct_builder.rs       |  2 +-
 datafusion/core/src/catalog/mod.rs            |  2 +-
 datafusion/core/src/catalog/schema.rs         |  4 +-
 .../src/datasource/default_table_source.rs    |  2 +-
 .../core/src/datasource/file_format/json.rs   |  2 +-
 .../core/src/datasource/file_format/mod.rs    |  2 +-
 datafusion/core/src/datasource/memory.rs      |  5 +--
 datafusion/core/src/datasource/mod.rs         |  2 +-
 .../physical_plan/file_scan_config.rs         |  5 +--
 .../datasource/physical_plan/file_stream.rs   |  2 +-
 .../core/src/datasource/physical_plan/mod.rs  |  2 +-
 datafusion/core/src/datasource/provider.rs    |  2 +-
 datafusion/core/src/datasource/streaming.rs   |  2 +-
 .../src/physical_optimizer/enforce_sorting.rs |  2 +-
 .../src/physical_optimizer/join_selection.rs  |  2 +-
 .../physical_optimizer/pipeline_checker.rs    |  2 +-
 .../src/physical_optimizer/sort_pushdown.rs   |  2 +-
 datafusion/core/tests/core_integration.rs     |  3 ++
 .../provider_filter_pushdown.rs               |  2 +-
 datafusion/core/tests/macro_hygiene/mod.rs    | 39 +++++++++++++++++++
 .../user_defined/user_defined_aggregates.rs   |  4 +-
 .../user_defined_scalar_functions.rs          |  2 +-
 datafusion/execution/src/registry.rs          |  2 +-
 datafusion/expr/src/accumulator.rs            |  2 +-
 datafusion/expr/src/columnar_value.rs         |  2 +-
 .../expr/src/conditional_expressions.rs       |  2 +-
 datafusion/expr/src/expr.rs                   |  2 +-
 datafusion/expr/src/expr_schema.rs            |  3 +-
 datafusion/expr/src/field_util.rs             |  4 +-
 datafusion/expr/src/interval_arithmetic.rs    |  2 +-
 datafusion/expr/src/partition_evaluator.rs    |  2 +-
 datafusion/expr/src/tree_node/expr.rs         |  2 +-
 .../expr/src/type_coercion/aggregates.rs      |  2 +-
 datafusion/expr/src/type_coercion/binary.rs   |  4 +-
 .../expr/src/type_coercion/functions.rs       |  4 +-
 datafusion/expr/src/udaf.rs                   |  2 +-
 datafusion/expr/src/utils.rs                  | 28 ++++++-------
 datafusion/functions-array/src/udf.rs         |  2 +-
 datafusion/functions/src/core/nullif.rs       |  2 +-
 datafusion/functions/src/core/nvl.rs          |  2 +-
 datafusion/optimizer/src/analyzer/subquery.rs |  2 +-
 datafusion/optimizer/src/decorrelate.rs       |  2 +-
 .../src/decorrelate_predicate_subquery.rs     |  2 +-
 .../optimizer/src/eliminate_cross_join.rs     |  2 +-
 datafusion/optimizer/src/optimizer.rs         |  4 +-
 .../optimizer/src/propagate_empty_relation.rs |  2 +-
 datafusion/optimizer/src/push_down_filter.rs  |  4 +-
 .../optimizer/src/scalar_subquery_to_join.rs  |  2 +-
 .../src/simplify_expressions/utils.rs         |  2 +-
 .../src/unwrap_cast_in_comparison.rs          |  4 +-
 .../optimizer/tests/optimizer_integration.rs  |  2 +-
 .../src/aggregate/array_agg_distinct.rs       |  2 +-
 .../src/aggregate/array_agg_ordered.rs        |  2 +-
 .../physical-expr/src/aggregate/average.rs    |  2 +-
 .../physical-expr/src/aggregate/build_in.rs   |  4 +-
 .../physical-expr/src/aggregate/grouping.rs   |  2 +-
 datafusion/physical-expr/src/aggregate/mod.rs |  2 +-
 .../physical-expr/src/aggregate/nth_value.rs  |  2 +-
 .../physical-expr/src/aggregate/stddev.rs     |  2 +-
 .../physical-expr/src/aggregate/string_agg.rs |  2 +-
 datafusion/physical-expr/src/aggregate/sum.rs |  2 +-
 .../src/aggregate/sum_distinct.rs             |  2 +-
 datafusion/physical-expr/src/analysis.rs      |  4 +-
 .../src/conditional_expressions.rs            |  2 +-
 .../physical-expr/src/equivalence/mod.rs      |  2 +-
 .../physical-expr/src/expressions/binary.rs   |  2 +-
 .../src/expressions/binary/kernels.rs         |  2 +-
 .../physical-expr/src/expressions/cast.rs     |  2 +-
 .../physical-expr/src/expressions/column.rs   |  2 +-
 .../src/expressions/get_indexed_field.rs      |  2 +-
 .../physical-expr/src/expressions/in_list.rs  |  4 +-
 .../physical-expr/src/expressions/like.rs     |  2 +-
 .../physical-expr/src/expressions/mod.rs      |  4 +-
 .../physical-expr/src/expressions/negative.rs |  3 +-
 .../physical-expr/src/expressions/no_op.rs    |  2 +-
 .../physical-expr/src/expressions/try_cast.rs |  2 +-
 datafusion/physical-expr/src/functions.rs     |  4 +-
 .../physical-expr/src/intervals/cp_solver.rs  |  2 +-
 .../physical-expr/src/intervals/utils.rs      |  4 +-
 datafusion/physical-expr/src/physical_expr.rs |  2 +-
 datafusion/physical-expr/src/planner.rs       |  3 +-
 .../physical-expr/src/string_expressions.rs   |  2 +-
 .../physical-expr/src/struct_expressions.rs   |  2 +-
 .../physical-expr/src/unicode_expressions.rs  |  2 +-
 .../physical-expr/src/window/nth_value.rs     |  2 +-
 datafusion/physical-expr/src/window/rank.rs   |  2 +-
 .../physical-plan/src/aggregates/mod.rs       |  2 +-
 .../physical-plan/src/coalesce_partitions.rs  |  2 +-
 datafusion/physical-plan/src/empty.rs         |  2 +-
 datafusion/physical-plan/src/explain.rs       |  2 +-
 datafusion/physical-plan/src/insert.rs        |  2 +-
 .../physical-plan/src/joins/cross_join.rs     |  2 +-
 .../src/joins/nested_loop_join.rs             |  2 +-
 .../src/joins/symmetric_hash_join.rs          |  4 +-
 datafusion/physical-plan/src/lib.rs           |  2 +-
 datafusion/physical-plan/src/limit.rs         |  2 +-
 datafusion/physical-plan/src/memory.rs        |  2 +-
 .../physical-plan/src/placeholder_row.rs      |  2 +-
 .../src/sorts/sort_preserving_merge.rs        |  2 +-
 .../src/sorts/streaming_merge.rs              |  2 +-
 datafusion/physical-plan/src/stream.rs        |  2 +-
 datafusion/physical-plan/src/streaming.rs     |  2 +-
 datafusion/physical-plan/src/udaf.rs          |  2 +-
 datafusion/physical-plan/src/union.rs         |  2 +-
 datafusion/physical-plan/src/unnest.rs        |  2 +-
 datafusion/physical-plan/src/values.rs        |  2 +-
 .../src/windows/window_agg_exec.rs            |  2 +-
 datafusion/physical-plan/src/work_table.rs    |  2 +-
 datafusion/proto/src/bytes/mod.rs             |  2 +-
 datafusion/proto/src/bytes/registry.rs        |  2 +-
 datafusion/sql/examples/sql.rs                |  2 +-
 datafusion/sql/src/expr/binary_op.rs          |  2 +-
 datafusion/sql/src/expr/function.rs           |  3 +-
 datafusion/sql/src/expr/grouping_set.rs       |  2 +-
 datafusion/sql/src/expr/json_access.rs        |  2 +-
 datafusion/sql/src/expr/mod.rs                |  3 +-
 datafusion/sql/src/expr/order_by.rs           |  4 +-
 datafusion/sql/src/expr/substring.rs          |  2 +-
 datafusion/sql/src/expr/unary_op.rs           |  2 +-
 datafusion/sql/src/relation/join.rs           |  2 +-
 datafusion/sql/src/relation/mod.rs            |  4 +-
 datafusion/sql/src/set_expr.rs                |  2 +-
 .../tests/cases/roundtrip_logical_plan.rs     |  2 +-
 132 files changed, 205 insertions(+), 181 deletions(-)
 create mode 100644 datafusion/core/tests/macro_hygiene/mod.rs

diff --git a/benchmarks/src/tpch/convert.rs b/benchmarks/src/tpch/convert.rs
index 2fc74ce38888..12b562421e53 100644
--- a/benchmarks/src/tpch/convert.rs
+++ b/benchmarks/src/tpch/convert.rs
@@ -20,7 +20,7 @@ use std::path::{Path, PathBuf};
 use std::time::Instant;
 
 use datafusion::common::not_impl_err;
-use datafusion::error::DataFusionError;
+
 use datafusion::error::Result;
 use datafusion::prelude::*;
 use parquet::basic::Compression;
diff --git a/benchmarks/src/tpch/mod.rs b/benchmarks/src/tpch/mod.rs
index 8965ebea7ff6..23d0681f560c 100644
--- a/benchmarks/src/tpch/mod.rs
+++ b/benchmarks/src/tpch/mod.rs
@@ -21,7 +21,7 @@ use arrow::datatypes::SchemaBuilder;
 use datafusion::{
     arrow::datatypes::{DataType, Field, Schema},
     common::plan_err,
-    error::{DataFusionError, Result},
+    error::Result,
 };
 use std::fs;
 mod run;
diff --git a/datafusion-cli/src/catalog.rs b/datafusion-cli/src/catalog.rs
index 67184b8257b8..29211edbb0a4 100644
--- a/datafusion-cli/src/catalog.rs
+++ b/datafusion-cli/src/catalog.rs
@@ -19,7 +19,7 @@ use crate::object_storage::get_object_store;
 use async_trait::async_trait;
 use datafusion::catalog::schema::SchemaProvider;
 use datafusion::catalog::{CatalogProvider, CatalogProviderList};
-use datafusion::common::{plan_datafusion_err, DataFusionError};
+use datafusion::common::plan_datafusion_err;
 use datafusion::datasource::listing::{
     ListingTable, ListingTableConfig, ListingTableUrl,
 };
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 5390fa9f2271..806e2bb39cd4 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -21,7 +21,7 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow::util::pretty::pretty_format_batches;
 use async_trait::async_trait;
-use datafusion::common::DataFusionError;
+
 use datafusion::common::{plan_err, Column};
 use datafusion::datasource::function::TableFunctionImpl;
 use datafusion::datasource::TableProvider;
diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/advanced_udf.rs
index 3e7dd2e2af08..c8063c0eb1e3 100644
--- a/datafusion-examples/examples/advanced_udf.rs
+++ b/datafusion-examples/examples/advanced_udf.rs
@@ -176,7 +176,6 @@ impl ScalarUDFImpl for PowUdf {
             }
             // if the types were not float, it is a bug in DataFusion
             _ => {
-                use datafusion_common::DataFusionError;
                 internal_err!("Invalid argument types to pow function")
             }
         }
diff --git a/datafusion-examples/examples/rewrite_expr.rs b/datafusion-examples/examples/rewrite_expr.rs
index 5e95562033e6..8d13d1201881 100644
--- a/datafusion-examples/examples/rewrite_expr.rs
+++ b/datafusion-examples/examples/rewrite_expr.rs
@@ -18,7 +18,7 @@
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{plan_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{plan_err, Result, ScalarValue};
 use datafusion_expr::{
     AggregateUDF, Between, Expr, Filter, LogicalPlan, ScalarUDF, TableSource, WindowUDF,
 };
diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs
index f1d763ba6e41..09341fbf47fa 100644
--- a/datafusion-examples/examples/simple_udtf.rs
+++ b/datafusion-examples/examples/simple_udtf.rs
@@ -27,7 +27,7 @@ use datafusion::execution::context::{ExecutionProps, SessionState};
 use datafusion::physical_plan::memory::MemoryExec;
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
-use datafusion_common::{plan_err, DataFusionError, ScalarValue};
+use datafusion_common::{plan_err, ScalarValue};
 use datafusion_expr::{Expr, TableType};
 use datafusion_optimizer::simplify_expressions::{ExprSimplifier, SimplifyContext};
 use std::fs::File;
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 331f5910d7e5..0f4e97905938 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -495,7 +495,13 @@ macro_rules! make_error {
                 #[macro_export]
                 macro_rules! $NAME_DF_ERR {
                     ($d($d args:expr),*) => {
-                        DataFusionError::$ERR(format!("{}{}", format!($d($d args),*), DataFusionError::get_back_trace()).into())
+                        $crate::DataFusionError::$ERR(
+                            format!(
+                                "{}{}",
+                                format!($d($d args),*),
+                                $crate::DataFusionError::get_back_trace(),
+                            ).into()
+                        )
                     }
                 }
 
@@ -503,7 +509,13 @@ macro_rules! make_error {
                 #[macro_export]
                 macro_rules! $NAME_ERR {
                     ($d($d args:expr),*) => {
-                        Err(DataFusionError::$ERR(format!("{}{}", format!($d($d args),*), DataFusionError::get_back_trace()).into()))
+                        Err($crate::DataFusionError::$ERR(
+                            format!(
+                                "{}{}",
+                                format!($d($d args),*),
+                                $crate::DataFusionError::get_back_trace(),
+                            ).into()
+                        ))
                     }
                 }
             }
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index d5a1b3ee363b..d1a7a675cb45 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -31,7 +31,7 @@ use crate::cast::{
     as_large_list_array, as_list_array, as_primitive_array, as_string_array,
     as_struct_array,
 };
-use crate::error::{DataFusionError, Result, _internal_err};
+use crate::error::{Result, _internal_err};
 
 // Combines two hashes into one hash
 #[inline]
diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
index 1192757e890b..b1a34e4a61d0 100644
--- a/datafusion/common/src/scalar/struct_builder.rs
+++ b/datafusion/common/src/scalar/struct_builder.rs
@@ -18,7 +18,7 @@
 //! [`ScalarStructBuilder`] for building [`ScalarValue::Struct`]
 
 use crate::error::_internal_err;
-use crate::{DataFusionError, Result, ScalarValue};
+use crate::{Result, ScalarValue};
 use arrow::array::{ArrayRef, StructArray};
 use arrow::datatypes::{DataType, FieldRef, Fields};
 use arrow_schema::Field;
diff --git a/datafusion/core/src/catalog/mod.rs b/datafusion/core/src/catalog/mod.rs
index a05a480bef44..8aeeaf9f72d8 100644
--- a/datafusion/core/src/catalog/mod.rs
+++ b/datafusion/core/src/catalog/mod.rs
@@ -25,7 +25,7 @@ pub use datafusion_sql::{ResolvedTableReference, TableReference};
 
 use crate::catalog::schema::SchemaProvider;
 use dashmap::DashMap;
-use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result};
+use datafusion_common::{exec_err, not_impl_err, Result};
 use std::any::Any;
 use std::sync::Arc;
 
diff --git a/datafusion/core/src/catalog/schema.rs b/datafusion/core/src/catalog/schema.rs
index 49f8350ecc5b..8249c3a5330f 100644
--- a/datafusion/core/src/catalog/schema.rs
+++ b/datafusion/core/src/catalog/schema.rs
@@ -20,12 +20,12 @@
 
 use async_trait::async_trait;
 use dashmap::DashMap;
-use datafusion_common::exec_err;
+use datafusion_common::{exec_err, DataFusionError};
 use std::any::Any;
 use std::sync::Arc;
 
 use crate::datasource::TableProvider;
-use crate::error::{DataFusionError, Result};
+use crate::error::Result;
 
 /// Represents a schema, comprising a number of named tables.
 ///
diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs
index fadf01c74c5d..977e681d6641 100644
--- a/datafusion/core/src/datasource/default_table_source.rs
+++ b/datafusion/core/src/datasource/default_table_source.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use crate::datasource::TableProvider;
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, Constraints, DataFusionError};
+use datafusion_common::{internal_err, Constraints};
 use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource};
 
 /// DataFusion default table source, wrapping TableProvider.
diff --git a/datafusion/core/src/datasource/file_format/json.rs b/datafusion/core/src/datasource/file_format/json.rs
index 0f6d3648d120..121fe5e8dcb1 100644
--- a/datafusion/core/src/datasource/file_format/json.rs
+++ b/datafusion/core/src/datasource/file_format/json.rs
@@ -42,7 +42,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::json;
 use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
 use arrow_array::RecordBatch;
-use datafusion_common::{not_impl_err, DataFusionError, FileType};
+use datafusion_common::{not_impl_err, FileType};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
 use datafusion_physical_plan::metrics::MetricsSet;
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index 12c9fb91adb1..72dc289d4b64 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -41,7 +41,7 @@ use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::physical_plan::{ExecutionPlan, Statistics};
 
-use datafusion_common::{not_impl_err, DataFusionError, FileType};
+use datafusion_common::{not_impl_err, FileType};
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortRequirement};
 
 use async_trait::async_trait;
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index 901e74dfc218..e087b4bcba51 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -28,9 +28,7 @@ use std::sync::Arc;
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use async_trait::async_trait;
-use datafusion_common::{
-    not_impl_err, plan_err, Constraints, DFSchema, DataFusionError, SchemaExt,
-};
+use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
 use datafusion_execution::TaskContext;
 use parking_lot::Mutex;
 use tokio::sync::RwLock;
@@ -370,6 +368,7 @@ mod tests {
     use arrow::array::{AsArray, Int32Array};
     use arrow::datatypes::{DataType, Field, Schema, UInt64Type};
     use arrow::error::ArrowError;
+    use datafusion_common::DataFusionError;
     use datafusion_expr::LogicalPlanBuilder;
     use futures::StreamExt;
     use std::collections::HashMap;
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 8f20da183a93..351967d35324 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -48,7 +48,7 @@ pub use crate::logical_expr::TableType;
 pub use statistics::get_statistics_with_limit;
 
 use arrow_schema::{Schema, SortOptions};
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::Expr;
 use datafusion_physical_expr::{expressions, LexOrdering, PhysicalSortExpr};
 
diff --git a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
index 516755e4d293..4a814c5b9b2c 100644
--- a/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_scan_config.rs
@@ -24,10 +24,7 @@ use std::{
 
 use super::{get_projected_output_ordering, FileGroupPartitioner};
 use crate::datasource::{listing::PartitionedFile, object_store::ObjectStoreUrl};
-use crate::{
-    error::{DataFusionError, Result},
-    scalar::ScalarValue,
-};
+use crate::{error::Result, scalar::ScalarValue};
 
 use arrow::array::{ArrayData, BufferBuilder};
 use arrow::buffer::Buffer;
diff --git a/datafusion/core/src/datasource/physical_plan/file_stream.rs b/datafusion/core/src/datasource/physical_plan/file_stream.rs
index 9cb58e7032db..0d25189a6124 100644
--- a/datafusion/core/src/datasource/physical_plan/file_stream.rs
+++ b/datafusion/core/src/datasource/physical_plan/file_stream.rs
@@ -531,7 +531,7 @@ mod tests {
     };
 
     use arrow_schema::Schema;
-    use datafusion_common::{internal_err, DataFusionError, Statistics};
+    use datafusion_common::{internal_err, Statistics};
 
     use bytes::Bytes;
     use futures::StreamExt;
diff --git a/datafusion/core/src/datasource/physical_plan/mod.rs b/datafusion/core/src/datasource/physical_plan/mod.rs
index d6546539993b..2a8bb3b4fbaa 100644
--- a/datafusion/core/src/datasource/physical_plan/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/mod.rs
@@ -52,7 +52,7 @@ use std::{
 };
 
 use super::listing::ListingTableUrl;
-use crate::error::{DataFusionError, Result};
+use crate::error::Result;
 use crate::physical_plan::{DisplayAs, DisplayFormatType};
 use crate::{
     datasource::{
diff --git a/datafusion/core/src/datasource/provider.rs b/datafusion/core/src/datasource/provider.rs
index 8de2c6b3ea86..e769084df636 100644
--- a/datafusion/core/src/datasource/provider.rs
+++ b/datafusion/core/src/datasource/provider.rs
@@ -21,7 +21,7 @@ use std::any::Any;
 use std::sync::Arc;
 
 use async_trait::async_trait;
-use datafusion_common::{not_impl_err, Constraints, DataFusionError, Statistics};
+use datafusion_common::{not_impl_err, Constraints, Statistics};
 use datafusion_expr::{CreateExternalTable, LogicalPlan};
 pub use datafusion_expr::{TableProviderFilterPushDown, TableType};
 
diff --git a/datafusion/core/src/datasource/streaming.rs b/datafusion/core/src/datasource/streaming.rs
index 3eb120653ce3..f85db2280d8e 100644
--- a/datafusion/core/src/datasource/streaming.rs
+++ b/datafusion/core/src/datasource/streaming.rs
@@ -23,7 +23,7 @@ use std::sync::Arc;
 use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::{Expr, TableType};
 use log::debug;
 
diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
index 9b76af2dbb1f..e6f3e12aceaf 100644
--- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs
@@ -59,8 +59,8 @@ use crate::physical_plan::windows::{
 };
 use crate::physical_plan::{Distribution, ExecutionPlan, InputOrderMode};
 
+use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{plan_err, DataFusionError};
 use datafusion_physical_expr::{PhysicalSortExpr, PhysicalSortRequirement};
 use datafusion_physical_plan::repartition::RepartitionExec;
 use datafusion_physical_plan::sorts::partial_sort::PartialSortExec;
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 02626056f6cc..f2e81fb053c0 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -41,8 +41,8 @@ use crate::physical_plan::ExecutionPlan;
 
 use arrow_schema::Schema;
 use datafusion_common::tree_node::{Transformed, TreeNode};
+use datafusion_common::JoinType;
 use datafusion_common::{internal_err, JoinSide};
-use datafusion_common::{DataFusionError, JoinType};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::sort_properties::SortProperties;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
diff --git a/datafusion/core/src/physical_optimizer/pipeline_checker.rs b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
index bb0665c10bcc..c0f071cd3f64 100644
--- a/datafusion/core/src/physical_optimizer/pipeline_checker.rs
+++ b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
@@ -27,8 +27,8 @@ use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::ExecutionPlan;
 
 use datafusion_common::config::OptimizerOptions;
+use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::{plan_err, DataFusionError};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
 use datafusion_physical_plan::tree_node::PlanContext;
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 3413486c6b46..22e0d804acb1 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -31,7 +31,7 @@ use crate::physical_plan::tree_node::PlanContext;
 use crate::physical_plan::ExecutionPlan;
 
 use datafusion_common::tree_node::Transformed;
-use datafusion_common::{plan_err, DataFusionError, JoinSide, Result};
+use datafusion_common::{plan_err, JoinSide, Result};
 use datafusion_expr::JoinType;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index af39e1e18abc..befefb1d7ec5 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -21,6 +21,9 @@ mod sql;
 /// Run all tests that are found in the `dataframe` directory
 mod dataframe;
 
+/// Run all tests that are found in the `macro_hygiene` directory
+mod macro_hygiene;
+
 #[cfg(test)]
 #[ctor::ctor]
 fn init() {
diff --git a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
index e374abd6e891..bc7f88b39672 100644
--- a/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
+++ b/datafusion/core/tests/custom_sources_cases/provider_filter_pushdown.rs
@@ -34,7 +34,7 @@ use datafusion::physical_plan::{
 use datafusion::prelude::*;
 use datafusion::scalar::ScalarValue;
 use datafusion_common::cast::as_primitive_array;
-use datafusion_common::{internal_err, not_impl_err, DataFusionError};
+use datafusion_common::{internal_err, not_impl_err};
 use datafusion_expr::expr::{BinaryExpr, Cast};
 
 use async_trait::async_trait;
diff --git a/datafusion/core/tests/macro_hygiene/mod.rs b/datafusion/core/tests/macro_hygiene/mod.rs
new file mode 100644
index 000000000000..72ac6e64fb0c
--- /dev/null
+++ b/datafusion/core/tests/macro_hygiene/mod.rs
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//! Verifies [Macro Hygene]
+//!
+//! [Macro Hygene]: https://en.wikipedia.org/wiki/Hygienic_macro
+mod plan_err {
+    // NO other imports!
+    use datafusion_common::plan_err;
+
+    #[test]
+    fn test_macro() {
+        // need type annotation for Ok variant
+        let _res: Result<(), _> = plan_err!("foo");
+    }
+}
+
+mod plan_datafusion_err {
+    // NO other imports!
+    use datafusion_common::plan_datafusion_err;
+
+    #[test]
+    fn test_macro() {
+        plan_datafusion_err!("foo");
+    }
+}
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 8daeefd236f7..9e231d25f298 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -42,9 +42,7 @@ use datafusion::{
     prelude::SessionContext,
     scalar::ScalarValue,
 };
-use datafusion_common::{
-    assert_contains, cast::as_primitive_array, exec_err, DataFusionError,
-};
+use datafusion_common::{assert_contains, cast::as_primitive_array, exec_err};
 use datafusion_expr::{
     create_udaf, AggregateUDFImpl, GroupsAccumulator, SimpleAggregateUDF,
 };
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index a255498eb5f7..0546ef59b1d8 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -24,7 +24,7 @@ use datafusion::{execution::registry::FunctionRegistry, test_util};
 use datafusion_common::cast::as_float64_array;
 use datafusion_common::{
     assert_batches_eq, assert_batches_sorted_eq, cast::as_int32_array, not_impl_err,
-    plan_err, DataFusionError, ExprSchema, Result, ScalarValue,
+    plan_err, ExprSchema, Result, ScalarValue,
 };
 use datafusion_expr::{
     create_udaf, create_udf, Accumulator, ColumnarValue, ExprSchemable,
diff --git a/datafusion/execution/src/registry.rs b/datafusion/execution/src/registry.rs
index 6e0a932f0bc5..5bc9a7a07b6f 100644
--- a/datafusion/execution/src/registry.rs
+++ b/datafusion/execution/src/registry.rs
@@ -17,7 +17,7 @@
 
 //! FunctionRegistry trait
 
-use datafusion_common::{not_impl_err, plan_datafusion_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, plan_datafusion_err, Result};
 use datafusion_expr::{AggregateUDF, ScalarUDF, UserDefinedLogicalNode, WindowUDF};
 use std::collections::HashMap;
 use std::{collections::HashSet, sync::Arc};
diff --git a/datafusion/expr/src/accumulator.rs b/datafusion/expr/src/accumulator.rs
index fa2017586d21..031348269a38 100644
--- a/datafusion/expr/src/accumulator.rs
+++ b/datafusion/expr/src/accumulator.rs
@@ -18,7 +18,7 @@
 //! Accumulator module contains the trait definition for aggregation function's accumulators.
 
 use arrow::array::ArrayRef;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
 use std::fmt::Debug;
 
 /// Tracks an aggregate function's state.
diff --git a/datafusion/expr/src/columnar_value.rs b/datafusion/expr/src/columnar_value.rs
index 585bee3b9bfa..c845c81cb708 100644
--- a/datafusion/expr/src/columnar_value.rs
+++ b/datafusion/expr/src/columnar_value.rs
@@ -20,7 +20,7 @@
 use arrow::array::ArrayRef;
 use arrow::array::NullArray;
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
 use std::sync::Arc;
 
 /// Represents the result of evaluating an expression: either a single
diff --git a/datafusion/expr/src/conditional_expressions.rs b/datafusion/expr/src/conditional_expressions.rs
index c31bd04eafa0..1346825f054d 100644
--- a/datafusion/expr/src/conditional_expressions.rs
+++ b/datafusion/expr/src/conditional_expressions.rs
@@ -19,7 +19,7 @@
 use crate::expr::Case;
 use crate::{expr_schema::ExprSchemable, Expr};
 use arrow::datatypes::DataType;
-use datafusion_common::{plan_err, DFSchema, DataFusionError, Result};
+use datafusion_common::{plan_err, DFSchema, Result};
 use std::collections::HashSet;
 
 /// Currently supported types by the coalesce function.
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index c3d9269d1559..43f6ba8f6309 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -29,7 +29,7 @@ use crate::{built_in_window_function, udaf};
 use arrow::datatypes::DataType;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{internal_err, DFSchema, OwnedTableReference};
-use datafusion_common::{plan_err, Column, DataFusionError, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, Result, ScalarValue};
 use sqlparser::ast::NullTreatment;
 use std::collections::HashSet;
 use std::fmt;
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 491b4a852261..a453730a0e71 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -28,8 +28,7 @@ use crate::{utils, LogicalPlan, Projection, Subquery};
 use arrow::compute::can_cast_types;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::{
-    internal_err, plan_datafusion_err, plan_err, Column, DFField, DataFusionError,
-    ExprSchema, Result,
+    internal_err, plan_datafusion_err, plan_err, Column, DFField, ExprSchema, Result,
 };
 use std::collections::HashMap;
 use std::sync::Arc;
diff --git a/datafusion/expr/src/field_util.rs b/datafusion/expr/src/field_util.rs
index c46ec50234dd..3195ce6f2dfc 100644
--- a/datafusion/expr/src/field_util.rs
+++ b/datafusion/expr/src/field_util.rs
@@ -18,9 +18,7 @@
 //! Utility functions for complex field access
 
 use arrow::datatypes::{DataType, Field};
-use datafusion_common::{
-    plan_datafusion_err, plan_err, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{plan_datafusion_err, plan_err, Result, ScalarValue};
 
 /// Types of the field access expression of a nested type, such as `Field` or `List`
 pub enum GetFieldAccessSchema {
diff --git a/datafusion/expr/src/interval_arithmetic.rs b/datafusion/expr/src/interval_arithmetic.rs
index 5d34fe91c3ac..ca91a8c9da00 100644
--- a/datafusion/expr/src/interval_arithmetic.rs
+++ b/datafusion/expr/src/interval_arithmetic.rs
@@ -28,7 +28,7 @@ use arrow::compute::{cast_with_options, CastOptions};
 use arrow::datatypes::DataType;
 use arrow::datatypes::{IntervalUnit, TimeUnit};
 use datafusion_common::rounding::{alter_fp_rounding_mode, next_down, next_up};
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
 
 macro_rules! get_extreme_value {
     ($extreme:ident, $value:expr) => {
diff --git a/datafusion/expr/src/partition_evaluator.rs b/datafusion/expr/src/partition_evaluator.rs
index 4b5357ddf8ba..04b6faf55ae1 100644
--- a/datafusion/expr/src/partition_evaluator.rs
+++ b/datafusion/expr/src/partition_evaluator.rs
@@ -18,7 +18,7 @@
 //! Partition evaluation module
 
 use arrow::array::ArrayRef;
-use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, not_impl_err, Result, ScalarValue};
 use std::fmt::Debug;
 use std::ops::Range;
 
diff --git a/datafusion/expr/src/tree_node/expr.rs b/datafusion/expr/src/tree_node/expr.rs
index def25ed9242f..81949f2178f6 100644
--- a/datafusion/expr/src/tree_node/expr.rs
+++ b/datafusion/expr/src/tree_node/expr.rs
@@ -25,7 +25,7 @@ use crate::expr::{
 use crate::{Expr, GetFieldAccess};
 
 use datafusion_common::tree_node::{TreeNode, VisitRecursion};
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 
 impl TreeNode for Expr {
     fn apply_children<F: FnMut(&Self) -> Result<VisitRecursion>>(
diff --git a/datafusion/expr/src/type_coercion/aggregates.rs b/datafusion/expr/src/type_coercion/aggregates.rs
index ab994c143ac2..866aea06b4d4 100644
--- a/datafusion/expr/src/type_coercion/aggregates.rs
+++ b/datafusion/expr/src/type_coercion/aggregates.rs
@@ -24,7 +24,7 @@ use arrow::datatypes::{
     DataType, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, plan_err, Result};
 
 pub static STRINGS: &[DataType] = &[DataType::Utf8, DataType::LargeUtf8];
 
diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs
index 70015c699296..118844e4b266 100644
--- a/datafusion/expr/src/type_coercion/binary.rs
+++ b/datafusion/expr/src/type_coercion/binary.rs
@@ -28,9 +28,7 @@ use arrow::datatypes::{
     DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
 };
 
-use datafusion_common::{
-    exec_datafusion_err, plan_datafusion_err, plan_err, DataFusionError, Result,
-};
+use datafusion_common::{exec_datafusion_err, plan_datafusion_err, plan_err, Result};
 
 /// The type signature of an instantiation of binary operator expression such as
 /// `lhs + rhs`
diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs
index 2022d67879f8..e9878fd17e8d 100644
--- a/datafusion/expr/src/type_coercion/functions.rs
+++ b/datafusion/expr/src/type_coercion/functions.rs
@@ -22,9 +22,7 @@ use arrow::{
     datatypes::{DataType, TimeUnit},
 };
 use datafusion_common::utils::{coerced_fixed_size_list_to_list, list_ndims};
-use datafusion_common::{
-    internal_datafusion_err, internal_err, plan_err, DataFusionError, Result,
-};
+use datafusion_common::{internal_datafusion_err, internal_err, plan_err, Result};
 
 use super::binary::comparison_coercion;
 
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index fb062e5830eb..6ff7730bd606 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -23,7 +23,7 @@ use crate::{
     AccumulatorFactoryFunction, ReturnTypeFunction, Signature, StateTypeFunction,
 };
 use arrow::datatypes::DataType;
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use std::any::Any;
 use std::fmt::{self, Debug, Formatter};
 use std::sync::Arc;
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index 2fda81d8896f..fe9297b32a8e 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -35,7 +35,7 @@ use datafusion_common::tree_node::{TreeNode, VisitRecursion};
 use datafusion_common::utils::get_at_indices;
 use datafusion_common::{
     internal_err, plan_datafusion_err, plan_err, Column, DFField, DFSchema, DFSchemaRef,
-    DataFusionError, Result, ScalarValue, TableReference,
+    Result, ScalarValue, TableReference,
 };
 
 use sqlparser::ast::{ExceptSelectItem, ExcludeSelectItem, WildcardAdditionalOptions};
@@ -129,14 +129,15 @@ fn check_grouping_sets_size_limit(size: usize) -> Result<()> {
 
 /// Merge two grouping_set
 ///
-///
-/// Example:
-///
+/// # Example
+/// ```text
 /// (A, B), (C, D) -> (A, B, C, D)
+/// ```
 ///
-/// Error:
+/// # Error
+/// - [`DataFusionError`]: The number of group_expression in grouping_set exceeds the maximum limit
 ///
-/// [`DataFusionError`] The number of group_expression in grouping_set exceeds the maximum limit
+/// [`DataFusionError`]: datafusion_common::DataFusionError
 fn merge_grouping_set<T: Clone>(left: &[T], right: &[T]) -> Result<Vec<T>> {
     check_grouping_set_size_limit(left.len() + right.len())?;
     Ok(left.iter().chain(right.iter()).cloned().collect())
@@ -144,15 +145,16 @@ fn merge_grouping_set<T: Clone>(left: &[T], right: &[T]) -> Result<Vec<T>> {
 
 /// Compute the cross product of two grouping_sets
 ///
+/// # Example
+/// ```text
+/// [(A, B), (C, D)], [(E), (F)] -> [(A, B, E), (A, B, F), (C, D, E), (C, D, F)]
+/// ```
 ///
-/// Example:
-///
-/// \[(A, B), (C, D)], [(E), (F)\] -> \[(A, B, E), (A, B, F), (C, D, E), (C, D, F)\]
-///
-/// Error:
+/// # Error
+/// - [`DataFusionError`]: The number of group_expression in grouping_set exceeds the maximum limit
+/// - [`DataFusionError`]: The number of grouping_set in grouping_sets exceeds the maximum limit
 ///
-/// [`DataFusionError`] The number of group_expression in grouping_set exceeds the maximum limit \
-/// [`DataFusionError`] The number of grouping_set in grouping_sets exceeds the maximum limit
+/// [`DataFusionError`]: datafusion_common::DataFusionError
 fn cross_join_grouping_sets<T: Clone>(
     left: &[Vec<T>],
     right: &[Vec<T>],
diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs
index b7f9d2497fb7..79fb83c059a4 100644
--- a/datafusion/functions-array/src/udf.rs
+++ b/datafusion/functions-array/src/udf.rs
@@ -18,7 +18,7 @@
 //! [`ScalarUDFImpl`] definitions for array functions.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{plan_err, DataFusionError};
+use datafusion_common::plan_err;
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::Expr;
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index 73bfba9b38b1..afb308e441f5 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -18,7 +18,7 @@
 //! Encoding expressions
 
 use arrow::datatypes::DataType;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 
 use arrow::array::Array;
diff --git a/datafusion/functions/src/core/nvl.rs b/datafusion/functions/src/core/nvl.rs
index caf095ecbbbd..578aaeda2e89 100644
--- a/datafusion/functions/src/core/nvl.rs
+++ b/datafusion/functions/src/core/nvl.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use arrow::datatypes::DataType;
-use datafusion_common::{internal_err, Result, DataFusionError};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
 use arrow::compute::kernels::zip::zip;
 use arrow::compute::is_not_null;
diff --git a/datafusion/optimizer/src/analyzer/subquery.rs b/datafusion/optimizer/src/analyzer/subquery.rs
index 7c5b70b19af0..a0e972fc703c 100644
--- a/datafusion/optimizer/src/analyzer/subquery.rs
+++ b/datafusion/optimizer/src/analyzer/subquery.rs
@@ -18,7 +18,7 @@
 use crate::analyzer::check_plan;
 use crate::utils::collect_subquery_cols;
 use datafusion_common::tree_node::{TreeNode, VisitRecursion};
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::expr_rewriter::strip_outer_reference;
 use datafusion_expr::utils::split_conjunction;
 use datafusion_expr::{
diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs
index b1000f042c98..0f4b39d9eee3 100644
--- a/datafusion/optimizer/src/decorrelate.rs
+++ b/datafusion/optimizer/src/decorrelate.rs
@@ -21,7 +21,7 @@ use datafusion_common::tree_node::{
     RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
 };
 use datafusion_common::{plan_err, Result};
-use datafusion_common::{Column, DFSchemaRef, DataFusionError, ScalarValue};
+use datafusion_common::{Column, DFSchemaRef, ScalarValue};
 use datafusion_expr::expr::{AggregateFunctionDefinition, Alias};
 use datafusion_expr::utils::{conjunction, find_join_exprs, split_conjunction};
 use datafusion_expr::{expr, EmptyRelation, Expr, LogicalPlan, LogicalPlanBuilder};
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index 450336376a23..a9e1f1228e5e 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -21,7 +21,7 @@ use crate::utils::replace_qualified_name;
 use crate::{OptimizerConfig, OptimizerRule};
 use datafusion_common::alias::AliasGenerator;
 use datafusion_common::tree_node::TreeNode;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::expr::{Exists, InSubquery};
 use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs
index d9e96a9f2543..7f65690a4a7c 100644
--- a/datafusion/optimizer/src/eliminate_cross_join.rs
+++ b/datafusion/optimizer/src/eliminate_cross_join.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 
 use crate::{utils, OptimizerConfig, OptimizerRule};
 
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::expr::{BinaryExpr, Expr};
 use datafusion_expr::logical_plan::{
     CrossJoin, Filter, Join, JoinConstraint, JoinType, LogicalPlan, Projection,
diff --git a/datafusion/optimizer/src/optimizer.rs b/datafusion/optimizer/src/optimizer.rs
index 633a32996d1c..93b3d6b8b9f2 100644
--- a/datafusion/optimizer/src/optimizer.rs
+++ b/datafusion/optimizer/src/optimizer.rs
@@ -458,9 +458,7 @@ mod tests {
     use crate::test::test_table_scan;
     use crate::{OptimizerConfig, OptimizerContext, OptimizerRule};
 
-    use datafusion_common::{
-        plan_err, DFField, DFSchema, DFSchemaRef, DataFusionError, Result,
-    };
+    use datafusion_common::{plan_err, DFField, DFSchema, DFSchemaRef, Result};
     use datafusion_expr::logical_plan::EmptyRelation;
     use datafusion_expr::{col, lit, LogicalPlan, LogicalPlanBuilder, Projection};
 
diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs
index 040b69fc8bf3..d1f9f87a32a3 100644
--- a/datafusion/optimizer/src/propagate_empty_relation.rs
+++ b/datafusion/optimizer/src/propagate_empty_relation.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::logical_plan::LogicalPlan;
 use datafusion_expr::{EmptyRelation, JoinType, Projection, Union};
 use std::sync::Arc;
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index acdda6833285..40156d43c572 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -24,8 +24,8 @@ use crate::{OptimizerConfig, OptimizerRule};
 
 use datafusion_common::tree_node::{Transformed, TreeNode, VisitRecursion};
 use datafusion_common::{
-    internal_err, plan_datafusion_err, Column, DFSchema, DFSchemaRef, DataFusionError,
-    JoinConstraint, Result,
+    internal_err, plan_datafusion_err, Column, DFSchema, DFSchemaRef, JoinConstraint,
+    Result,
 };
 use datafusion_expr::expr::Alias;
 use datafusion_expr::expr_rewriter::replace_col;
diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs
index 34ed4a9475cb..9aa08c37fa35 100644
--- a/datafusion/optimizer/src/scalar_subquery_to_join.rs
+++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs
@@ -23,7 +23,7 @@ use datafusion_common::alias::AliasGenerator;
 use datafusion_common::tree_node::{
     RewriteRecursion, Transformed, TreeNode, TreeNodeRewriter,
 };
-use datafusion_common::{plan_err, Column, DataFusionError, Result, ScalarValue};
+use datafusion_common::{plan_err, Column, Result, ScalarValue};
 use datafusion_expr::expr_rewriter::create_col_from_scalar_expr;
 use datafusion_expr::logical_plan::{JoinType, Subquery};
 use datafusion_expr::utils::conjunction;
diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs
index fa91a3ace2a2..c9736061df90 100644
--- a/datafusion/optimizer/src/simplify_expressions/utils.rs
+++ b/datafusion/optimizer/src/simplify_expressions/utils.rs
@@ -18,7 +18,7 @@
 //! Utility functions for expression simplification
 
 use crate::simplify_expressions::SimplifyInfo;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{
     expr::{Between, BinaryExpr, InList},
diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
index 9d3561d12671..4c22742c8635 100644
--- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
+++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs
@@ -25,9 +25,7 @@ use arrow::datatypes::{
 };
 use arrow::temporal_conversions::{MICROSECONDS, MILLISECONDS, NANOSECONDS};
 use datafusion_common::tree_node::{RewriteRecursion, TreeNodeRewriter};
-use datafusion_common::{
-    internal_err, DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{internal_err, DFSchema, DFSchemaRef, Result, ScalarValue};
 use datafusion_expr::expr::{BinaryExpr, Cast, InList, TryCast};
 use datafusion_expr::expr_rewriter::rewrite_preserving_name;
 use datafusion_expr::utils::merge_schema;
diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index d857c6154ea9..fe1234de5ab8 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::{AggregateUDF, LogicalPlan, ScalarUDF, TableSource, WindowUDF};
 use datafusion_optimizer::analyzer::Analyzer;
 use datafusion_optimizer::optimizer::Optimizer;
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
index b073b00578a5..8e7b9d91ee49 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_distinct.rs
@@ -188,7 +188,7 @@ mod tests {
     use arrow_array::Array;
     use arrow_array::ListArray;
     use arrow_buffer::OffsetBuffer;
-    use datafusion_common::{internal_err, DataFusionError};
+    use datafusion_common::internal_err;
 
     // arrow::compute::sort can't sort nested ListArray directly, so we compare the scalar values pair-wise.
     fn compare_list_contents(
diff --git a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
index 587f40081c90..7e2c7bb27144 100644
--- a/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
+++ b/datafusion/physical-expr/src/aggregate/array_agg_ordered.rs
@@ -38,7 +38,7 @@ use arrow_schema::{Fields, SortOptions};
 
 use datafusion_common::utils::array_into_list_array;
 use datafusion_common::utils::{compare_rows, get_row_at_idx};
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 
 /// Expression for a `ARRAY_AGG(... ORDER BY ..., ...)` aggregation. In a multi
diff --git a/datafusion/physical-expr/src/aggregate/average.rs b/datafusion/physical-expr/src/aggregate/average.rs
index 57f8fa211e58..f06355293d7c 100644
--- a/datafusion/physical-expr/src/aggregate/average.rs
+++ b/datafusion/physical-expr/src/aggregate/average.rs
@@ -39,7 +39,7 @@ use arrow_array::{
     Array, ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, PrimitiveArray,
 };
 use arrow_buffer::{i256, ArrowNativeType};
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::type_coercion::aggregates::avg_return_type;
 use datafusion_expr::{Accumulator, EmitTo, GroupsAccumulator};
 
diff --git a/datafusion/physical-expr/src/aggregate/build_in.rs b/datafusion/physical-expr/src/aggregate/build_in.rs
index 2918856aa623..0aaf0dc0c8c5 100644
--- a/datafusion/physical-expr/src/aggregate/build_in.rs
+++ b/datafusion/physical-expr/src/aggregate/build_in.rs
@@ -30,7 +30,7 @@ use std::sync::Arc;
 
 use arrow::datatypes::Schema;
 
-use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result};
+use datafusion_common::{exec_err, not_impl_err, Result};
 use datafusion_expr::AggregateFunction;
 
 use crate::aggregate::regr::RegrType;
@@ -416,7 +416,7 @@ pub fn create_aggregate_expr(
 mod tests {
     use arrow::datatypes::{DataType, Field};
 
-    use datafusion_common::{plan_err, ScalarValue};
+    use datafusion_common::{plan_err, DataFusionError, ScalarValue};
     use datafusion_expr::type_coercion::aggregates::NUMERICS;
     use datafusion_expr::{type_coercion, Signature};
 
diff --git a/datafusion/physical-expr/src/aggregate/grouping.rs b/datafusion/physical-expr/src/aggregate/grouping.rs
index 70afda265aea..d43bcd5c7091 100644
--- a/datafusion/physical-expr/src/aggregate/grouping.rs
+++ b/datafusion/physical-expr/src/aggregate/grouping.rs
@@ -24,7 +24,7 @@ use crate::aggregate::utils::down_cast_any_ref;
 use crate::{AggregateExpr, PhysicalExpr};
 use arrow::datatypes::DataType;
 use arrow::datatypes::Field;
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::Accumulator;
 
 use crate::expressions::format_state_name;
diff --git a/datafusion/physical-expr/src/aggregate/mod.rs b/datafusion/physical-expr/src/aggregate/mod.rs
index 2bb205ce90dc..893178f29d08 100644
--- a/datafusion/physical-expr/src/aggregate/mod.rs
+++ b/datafusion/physical-expr/src/aggregate/mod.rs
@@ -23,7 +23,7 @@ use crate::expressions::{NthValueAgg, OrderSensitiveArrayAgg};
 use crate::{PhysicalExpr, PhysicalSortExpr};
 
 use arrow::datatypes::Field;
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::{Accumulator, GroupsAccumulator};
 
 mod hyperloglog;
diff --git a/datafusion/physical-expr/src/aggregate/nth_value.rs b/datafusion/physical-expr/src/aggregate/nth_value.rs
index 5d721e3a5e87..dba259a507fd 100644
--- a/datafusion/physical-expr/src/aggregate/nth_value.rs
+++ b/datafusion/physical-expr/src/aggregate/nth_value.rs
@@ -33,7 +33,7 @@ use arrow_array::cast::AsArray;
 use arrow_array::{new_empty_array, ArrayRef, StructArray};
 use arrow_schema::{DataType, Field, Fields};
 use datafusion_common::utils::{array_into_list_array, get_row_at_idx};
-use datafusion_common::{exec_err, internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, internal_err, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 
 /// Expression for a `NTH_VALUE(... ORDER BY ..., ...)` aggregation. In a multi
diff --git a/datafusion/physical-expr/src/aggregate/stddev.rs b/datafusion/physical-expr/src/aggregate/stddev.rs
index dcc2b0e69c02..6033d63cbe21 100644
--- a/datafusion/physical-expr/src/aggregate/stddev.rs
+++ b/datafusion/physical-expr/src/aggregate/stddev.rs
@@ -27,7 +27,7 @@ use crate::expressions::format_state_name;
 use crate::{AggregateExpr, PhysicalExpr};
 use arrow::{array::ArrayRef, datatypes::DataType, datatypes::Field};
 use datafusion_common::ScalarValue;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::Accumulator;
 
 /// STDDEV and STDDEV_SAMP (standard deviation) aggregate expression
diff --git a/datafusion/physical-expr/src/aggregate/string_agg.rs b/datafusion/physical-expr/src/aggregate/string_agg.rs
index 7a1da6d62246..8993c630aa49 100644
--- a/datafusion/physical-expr/src/aggregate/string_agg.rs
+++ b/datafusion/physical-expr/src/aggregate/string_agg.rs
@@ -23,7 +23,7 @@ use crate::{AggregateExpr, PhysicalExpr};
 use arrow::array::ArrayRef;
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::cast::as_generic_string_array;
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::Accumulator;
 use std::any::Any;
 use std::sync::Arc;
diff --git a/datafusion/physical-expr/src/aggregate/sum.rs b/datafusion/physical-expr/src/aggregate/sum.rs
index 6cf2810ce588..f19be62bbc95 100644
--- a/datafusion/physical-expr/src/aggregate/sum.rs
+++ b/datafusion/physical-expr/src/aggregate/sum.rs
@@ -33,7 +33,7 @@ use arrow_array::types::{
 };
 use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType};
 use arrow_buffer::ArrowNativeType;
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::type_coercion::aggregates::sum_return_type;
 use datafusion_expr::{Accumulator, GroupsAccumulator};
 
diff --git a/datafusion/physical-expr/src/aggregate/sum_distinct.rs b/datafusion/physical-expr/src/aggregate/sum_distinct.rs
index 4c0f94b3a2bb..a62a7b08da35 100644
--- a/datafusion/physical-expr/src/aggregate/sum_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/sum_distinct.rs
@@ -31,7 +31,7 @@ use std::collections::HashSet;
 use crate::aggregate::sum::downcast_sum;
 use crate::aggregate::utils::{down_cast_any_ref, Hashable};
 use crate::{AggregateExpr, PhysicalExpr};
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::type_coercion::aggregates::sum_return_type;
 use datafusion_expr::Accumulator;
 
diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs
index 6d36e2233cdd..ca25bfd647b6 100644
--- a/datafusion/physical-expr/src/analysis.rs
+++ b/datafusion/physical-expr/src/analysis.rs
@@ -27,9 +27,7 @@ use crate::PhysicalExpr;
 
 use arrow::datatypes::Schema;
 use datafusion_common::stats::Precision;
-use datafusion_common::{
-    internal_err, ColumnStatistics, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{internal_err, ColumnStatistics, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::{cardinality_ratio, Interval};
 
 /// The shared context used during the analysis of an expression. Includes
diff --git a/datafusion/physical-expr/src/conditional_expressions.rs b/datafusion/physical-expr/src/conditional_expressions.rs
index cc8f3c8dfaf0..87d63bfd32e2 100644
--- a/datafusion/physical-expr/src/conditional_expressions.rs
+++ b/datafusion/physical-expr/src/conditional_expressions.rs
@@ -19,7 +19,7 @@ use arrow::array::{new_null_array, Array, BooleanArray};
 use arrow::compute::kernels::zip::zip;
 use arrow::compute::{and, is_not_null, is_null};
 
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 
 /// coalesce evaluates to the first value which is not NULL
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs b/datafusion/physical-expr/src/equivalence/mod.rs
index 387dce2cdc8b..a31be06ecf0b 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -68,7 +68,7 @@ mod tests {
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow_array::{ArrayRef, Float64Array, RecordBatch, UInt32Array};
     use arrow_schema::{SchemaRef, SortOptions};
-    use datafusion_common::{plan_datafusion_err, DataFusionError, Result};
+    use datafusion_common::{plan_datafusion_err, Result};
     use itertools::izip;
     use rand::rngs::StdRng;
     use rand::seq::SliceRandom;
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index 3f13030092c1..f1842458d5c4 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -38,7 +38,7 @@ use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
 
 use datafusion_common::cast::as_boolean_array;
-use datafusion_common::{internal_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::{apply_operator, Interval};
 use datafusion_expr::type_coercion::binary::get_result_type;
 use datafusion_expr::{ColumnarValue, Operator};
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index 22cadec40940..b0736e140fec 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -25,7 +25,7 @@ use arrow::compute::kernels::bitwise::{
 };
 use arrow::datatypes::DataType;
 use datafusion_common::internal_err;
-use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_common::{Result, ScalarValue};
 
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index b0e175e711fe..9125f73048cb 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -28,7 +28,7 @@ use arrow::compute::{can_cast_types, kernels, CastOptions};
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::ColumnarValue;
 
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index 62da8ff9ed44..a07f36e785e3 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -28,7 +28,7 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::ColumnarValue;
 
 /// Represents the column at a given index in a RecordBatch
diff --git a/datafusion/physical-expr/src/expressions/get_indexed_field.rs b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
index 58fe4728543d..39eef61f963a 100644
--- a/datafusion/physical-expr/src/expressions/get_indexed_field.rs
+++ b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
@@ -30,7 +30,7 @@ use arrow::{
 };
 use datafusion_common::{
     cast::{as_map_array, as_struct_array},
-    DataFusionError, Result, ScalarValue,
+    Result, ScalarValue,
 };
 use datafusion_expr::{field_util::GetFieldAccessSchema, ColumnarValue};
 use std::fmt::Debug;
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index 1a1634081c38..ecdb03e97ee3 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -38,9 +38,7 @@ use datafusion_common::cast::{
     as_boolean_array, as_generic_binary_array, as_string_array,
 };
 use datafusion_common::hash_utils::HashValue;
-use datafusion_common::{
-    exec_err, internal_err, not_impl_err, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{exec_err, internal_err, not_impl_err, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
 use ahash::RandomState;
diff --git a/datafusion/physical-expr/src/expressions/like.rs b/datafusion/physical-expr/src/expressions/like.rs
index 37452e278484..6e0beeb0beea 100644
--- a/datafusion/physical-expr/src/expressions/like.rs
+++ b/datafusion/physical-expr/src/expressions/like.rs
@@ -23,7 +23,7 @@ use crate::{physical_expr::down_cast_any_ref, PhysicalExpr};
 use crate::expressions::datum::apply_cmp;
 use arrow::record_batch::RecordBatch;
 use arrow_schema::{DataType, Schema};
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::ColumnarValue;
 
 // Like expression
diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs
index 09e908586c5b..ec20345569c2 100644
--- a/datafusion/physical-expr/src/expressions/mod.rs
+++ b/datafusion/physical-expr/src/expressions/mod.rs
@@ -133,7 +133,7 @@ pub(crate) mod tests {
 
             assert_eq!(expected, actual);
 
-            Ok(()) as Result<(), DataFusionError>
+            Ok(()) as Result<(), ::datafusion_common::DataFusionError>
         }};
     }
 
@@ -166,7 +166,7 @@ pub(crate) mod tests {
             let actual = aggregate_new(&batch, agg)?;
             assert_eq!($EXPECTED, &actual);
 
-            Ok(()) as Result<(), DataFusionError>
+            Ok(()) as Result<(), ::datafusion_common::DataFusionError>
         }};
     }
 
diff --git a/datafusion/physical-expr/src/expressions/negative.rs b/datafusion/physical-expr/src/expressions/negative.rs
index 6b5c208bae81..d6dd3ddbea5e 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -30,7 +30,7 @@ use arrow::{
     datatypes::{DataType, Schema},
     record_batch::RecordBatch,
 };
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::{
     type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
@@ -179,6 +179,7 @@ mod tests {
     use arrow::datatypes::*;
     use arrow_schema::DataType::{Float32, Float64, Int16, Int32, Int64, Int8};
     use datafusion_common::cast::as_primitive_array;
+    use datafusion_common::DataFusionError;
     use datafusion_common::Result;
 
     use paste::paste;
diff --git a/datafusion/physical-expr/src/expressions/no_op.rs b/datafusion/physical-expr/src/expressions/no_op.rs
index 95e6879a6c2d..b558ccab154d 100644
--- a/datafusion/physical-expr/src/expressions/no_op.rs
+++ b/datafusion/physical-expr/src/expressions/no_op.rs
@@ -28,7 +28,7 @@ use arrow::{
 
 use crate::physical_expr::down_cast_any_ref;
 use crate::PhysicalExpr;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::ColumnarValue;
 
 /// A place holder expression, can not be evaluated.
diff --git a/datafusion/physical-expr/src/expressions/try_cast.rs b/datafusion/physical-expr/src/expressions/try_cast.rs
index 0f7909097a10..ddfe49dda7a3 100644
--- a/datafusion/physical-expr/src/expressions/try_cast.rs
+++ b/datafusion/physical-expr/src/expressions/try_cast.rs
@@ -28,7 +28,7 @@ use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
 use compute::can_cast_types;
 use datafusion_common::format::DEFAULT_FORMAT_OPTIONS;
-use datafusion_common::{not_impl_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::ColumnarValue;
 
 /// TRY_CAST expression casts an expression to a specific data type and retuns NULL on invalid cast
diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs
index 186de0609b9a..d2b9a68ef8b9 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -42,7 +42,7 @@ use arrow::{
     datatypes::{DataType, Int32Type, Int64Type, Schema},
 };
 use arrow_array::Array;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, Result, ScalarValue};
 pub use datafusion_expr::FuncMonotonicity;
 use datafusion_expr::{
     type_coercion::functions::data_types, BuiltinScalarFunction, ColumnarValue,
@@ -998,7 +998,7 @@ mod tests {
     };
     use datafusion_common::cast::{as_boolean_array, as_uint64_array};
     use datafusion_common::{exec_err, internal_err, plan_err};
-    use datafusion_common::{Result, ScalarValue};
+    use datafusion_common::{DataFusionError, Result, ScalarValue};
     use datafusion_expr::type_coercion::functions::data_types;
     use datafusion_expr::Signature;
 
diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs
index b2403dadf05a..3bd059afa6be 100644
--- a/datafusion/physical-expr/src/intervals/cp_solver.rs
+++ b/datafusion/physical-expr/src/intervals/cp_solver.rs
@@ -29,7 +29,7 @@ use crate::utils::{build_dag, ExprTreeNode};
 use crate::PhysicalExpr;
 
 use arrow_schema::{DataType, Schema};
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_expr::interval_arithmetic::{apply_operator, satisfy_greater, Interval};
 use datafusion_expr::Operator;
 
diff --git a/datafusion/physical-expr/src/intervals/utils.rs b/datafusion/physical-expr/src/intervals/utils.rs
index 03d13632104d..e188b2d56bae 100644
--- a/datafusion/physical-expr/src/intervals/utils.rs
+++ b/datafusion/physical-expr/src/intervals/utils.rs
@@ -25,9 +25,7 @@ use crate::{
 };
 
 use arrow_schema::{DataType, SchemaRef};
-use datafusion_common::{
-    internal_datafusion_err, internal_err, DataFusionError, Result, ScalarValue,
-};
+use datafusion_common::{internal_datafusion_err, internal_err, Result, ScalarValue};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::Operator;
 
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index e596cb2e6ceb..567054e2b59e 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -28,7 +28,7 @@ use arrow::compute::filter_record_batch;
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::utils::DataPtr;
-use datafusion_common::{internal_err, not_impl_err, DataFusionError, Result};
+use datafusion_common::{internal_err, not_impl_err, Result};
 use datafusion_expr::interval_arithmetic::Interval;
 use datafusion_expr::ColumnarValue;
 
diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs
index b8491aea2d6f..bf279518d31d 100644
--- a/datafusion/physical-expr/src/planner.rs
+++ b/datafusion/physical-expr/src/planner.rs
@@ -26,8 +26,7 @@ use crate::{
 };
 use arrow::datatypes::Schema;
 use datafusion_common::{
-    exec_err, internal_err, not_impl_err, plan_err, DFSchema, DataFusionError, Result,
-    ScalarValue,
+    exec_err, internal_err, not_impl_err, plan_err, DFSchema, Result, ScalarValue,
 };
 use datafusion_expr::expr::{Alias, Cast, InList, ScalarFunction};
 use datafusion_expr::{
diff --git a/datafusion/physical-expr/src/string_expressions.rs b/datafusion/physical-expr/src/string_expressions.rs
index 6a4a29763e4b..ace7ef2888a3 100644
--- a/datafusion/physical-expr/src/string_expressions.rs
+++ b/datafusion/physical-expr/src/string_expressions.rs
@@ -37,13 +37,13 @@ use arrow::{
 use uuid::Uuid;
 
 use datafusion_common::utils::datafusion_strsim;
+use datafusion_common::Result;
 use datafusion_common::{
     cast::{
         as_generic_string_array, as_int64_array, as_primitive_array, as_string_array,
     },
     exec_err, ScalarValue,
 };
-use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::ColumnarValue;
 
 /// applies a unary expression to `args[0]` that is expected to be downcastable to
diff --git a/datafusion/physical-expr/src/struct_expressions.rs b/datafusion/physical-expr/src/struct_expressions.rs
index b0ccb2a3ccb6..f420e062ef91 100644
--- a/datafusion/physical-expr/src/struct_expressions.rs
+++ b/datafusion/physical-expr/src/struct_expressions.rs
@@ -19,7 +19,7 @@
 
 use arrow::array::*;
 use arrow::datatypes::Field;
-use datafusion_common::{exec_err, DataFusionError, Result};
+use datafusion_common::{exec_err, Result};
 use datafusion_expr::ColumnarValue;
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/src/unicode_expressions.rs b/datafusion/physical-expr/src/unicode_expressions.rs
index 3209a6176fad..aa6a84119c34 100644
--- a/datafusion/physical-expr/src/unicode_expressions.rs
+++ b/datafusion/physical-expr/src/unicode_expressions.rs
@@ -33,7 +33,7 @@ use unicode_segmentation::UnicodeSegmentation;
 
 use datafusion_common::{
     cast::{as_generic_string_array, as_int64_array},
-    exec_err, DataFusionError, Result,
+    exec_err, Result,
 };
 
 /// Returns number of characters in the string.
diff --git a/datafusion/physical-expr/src/window/nth_value.rs b/datafusion/physical-expr/src/window/nth_value.rs
index 05909ab25a07..a7bb31b6e109 100644
--- a/datafusion/physical-expr/src/window/nth_value.rs
+++ b/datafusion/physical-expr/src/window/nth_value.rs
@@ -29,8 +29,8 @@ use crate::PhysicalExpr;
 
 use arrow::array::{Array, ArrayRef};
 use arrow::datatypes::{DataType, Field};
+use datafusion_common::Result;
 use datafusion_common::{exec_err, ScalarValue};
-use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::window_state::WindowAggState;
 use datafusion_expr::PartitionEvaluator;
 
diff --git a/datafusion/physical-expr/src/window/rank.rs b/datafusion/physical-expr/src/window/rank.rs
index 437fdbe0b982..fa3d4e487f14 100644
--- a/datafusion/physical-expr/src/window/rank.rs
+++ b/datafusion/physical-expr/src/window/rank.rs
@@ -28,7 +28,7 @@ use arrow::array::{Float64Array, UInt64Array};
 use arrow::datatypes::{DataType, Field};
 use arrow_schema::{SchemaRef, SortOptions};
 use datafusion_common::utils::get_row_at_idx;
-use datafusion_common::{exec_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{exec_err, Result, ScalarValue};
 use datafusion_expr::PartitionEvaluator;
 
 use std::any::Any;
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 156362430558..855408c4baa8 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -36,7 +36,7 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::{Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
-use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, not_impl_err, plan_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_expr::Accumulator;
 use datafusion_physical_expr::{
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index bfcff2853538..1521daee1334 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -29,7 +29,7 @@ use super::{DisplayAs, SendableRecordBatchStream, Statistics};
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
diff --git a/datafusion/physical-plan/src/empty.rs b/datafusion/physical-plan/src/empty.rs
index 41c8dbed1453..398edc211de3 100644
--- a/datafusion/physical-plan/src/empty.rs
+++ b/datafusion/physical-plan/src/empty.rs
@@ -26,7 +26,7 @@ use crate::{memory::MemoryStream, DisplayFormatType, ExecutionPlan, Partitioning
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 
 use log::trace;
diff --git a/datafusion/physical-plan/src/explain.rs b/datafusion/physical-plan/src/explain.rs
index e4904ddd3410..babcaf078bdc 100644
--- a/datafusion/physical-plan/src/explain.rs
+++ b/datafusion/physical-plan/src/explain.rs
@@ -27,7 +27,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
 use arrow::{array::StringBuilder, datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::display::StringifiedPlan;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 
 use log::trace;
diff --git a/datafusion/physical-plan/src/insert.rs b/datafusion/physical-plan/src/insert.rs
index 81cdfd753fe6..e678425d3753 100644
--- a/datafusion/physical-plan/src/insert.rs
+++ b/datafusion/physical-plan/src/insert.rs
@@ -33,7 +33,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use arrow_array::{ArrayRef, UInt64Array};
 use arrow_schema::{DataType, Field, Schema};
-use datafusion_common::{exec_err, internal_err, DataFusionError, Result};
+use datafusion_common::{exec_err, internal_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{Distribution, PhysicalSortRequirement};
 
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 938c9e4d343d..99bd051da0f7 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -35,7 +35,7 @@ use arrow::datatypes::{Fields, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_array::RecordBatchOptions;
 use datafusion_common::stats::Precision;
-use datafusion_common::{plan_err, DataFusionError, JoinType, Result, ScalarValue};
+use datafusion_common::{plan_err, JoinType, Result, ScalarValue};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::equivalence::join_equivalence_properties;
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index f89a2445fd07..1618efd4d0f9 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -44,7 +44,7 @@ use arrow::array::{
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow::util::bit_util;
-use datafusion_common::{exec_err, DataFusionError, JoinSide, Result, Statistics};
+use datafusion_common::{exec_err, JoinSide, Result, Statistics};
 use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation};
 use datafusion_execution::TaskContext;
 use datafusion_expr::JoinType;
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index 42c7029edcc1..506324852b21 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -62,9 +62,7 @@ use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::hash_utils::create_hashes;
 use datafusion_common::utils::bisect;
-use datafusion_common::{
-    internal_err, plan_err, DataFusionError, JoinSide, JoinType, Result,
-};
+use datafusion_common::{internal_err, plan_err, JoinSide, JoinType, Result};
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_expr::interval_arithmetic::Interval;
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 562e42a7da3b..a15fd470a98b 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -31,7 +31,7 @@ use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::utils::DataPtr;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
diff --git a/datafusion/physical-plan/src/limit.rs b/datafusion/physical-plan/src/limit.rs
index 417bc4cf977b..680aa23214f9 100644
--- a/datafusion/physical-plan/src/limit.rs
+++ b/datafusion/physical-plan/src/limit.rs
@@ -32,7 +32,7 @@ use crate::{
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 
 use futures::stream::{Stream, StreamExt};
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 7de474fda11c..86bd89e7ebac 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -30,7 +30,7 @@ use super::{
 
 use arrow::datatypes::SchemaRef;
 use arrow::record_batch::RecordBatch;
-use datafusion_common::{internal_err, project_schema, DataFusionError, Result};
+use datafusion_common::{internal_err, project_schema, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
 
diff --git a/datafusion/physical-plan/src/placeholder_row.rs b/datafusion/physical-plan/src/placeholder_row.rs
index 3ab3de62f37a..3371148587d1 100644
--- a/datafusion/physical-plan/src/placeholder_row.rs
+++ b/datafusion/physical-plan/src/placeholder_row.rs
@@ -28,7 +28,7 @@ use arrow::array::{ArrayRef, NullArray};
 use arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use arrow_array::RecordBatchOptions;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
 
 use log::trace;
diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
index f4b57e8bfb45..81a26cd2188d 100644
--- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
+++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs
@@ -30,7 +30,7 @@ use crate::{
 };
 
 use arrow::datatypes::SchemaRef;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement};
diff --git a/datafusion/physical-plan/src/sorts/streaming_merge.rs b/datafusion/physical-plan/src/sorts/streaming_merge.rs
index 4f8d8063853b..9e6618dd1af5 100644
--- a/datafusion/physical-plan/src/sorts/streaming_merge.rs
+++ b/datafusion/physical-plan/src/sorts/streaming_merge.rs
@@ -26,7 +26,7 @@ use crate::sorts::{
 use crate::{PhysicalSortExpr, SendableRecordBatchStream};
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow_array::*;
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::memory_pool::MemoryReservation;
 
 macro_rules! primitive_merge_helper {
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index b780a50cdc90..970194550045 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -25,7 +25,7 @@ use std::task::Poll;
 use crate::displayable;
 use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
 use datafusion_common::internal_err;
-use datafusion_common::DataFusionError;
+
 use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 use futures::stream::BoxStream;
diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs
index 897682092831..bca37ed942d0 100644
--- a/datafusion/physical-plan/src/streaming.rs
+++ b/datafusion/physical-plan/src/streaming.rs
@@ -27,7 +27,7 @@ use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream};
 
 use arrow::datatypes::SchemaRef;
 use arrow_schema::Schema;
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, plan_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr};
 
diff --git a/datafusion/physical-plan/src/udaf.rs b/datafusion/physical-plan/src/udaf.rs
index a82bbe1d0705..fd9279dfd552 100644
--- a/datafusion/physical-plan/src/udaf.rs
+++ b/datafusion/physical-plan/src/udaf.rs
@@ -28,7 +28,7 @@ use arrow::{
 };
 
 use super::{expressions::format_state_name, Accumulator, AggregateExpr};
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 pub use datafusion_expr::AggregateUDF;
 use datafusion_physical_expr::PhysicalExpr;
 
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index d01ea5507449..62a6d5c0f877 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -39,7 +39,7 @@ use crate::stream::ObservedStream;
 use arrow::datatypes::{Field, Schema, SchemaRef};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::stats::Precision;
-use datafusion_common::{exec_err, internal_err, DataFusionError, Result};
+use datafusion_common::{exec_err, internal_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::EquivalenceProperties;
 
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index b9e732c317af..d5453f0924a3 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -35,7 +35,7 @@ use arrow::datatypes::{
 };
 use arrow::record_batch::RecordBatch;
 use arrow_array::{GenericListArray, OffsetSizeTrait};
-use datafusion_common::{exec_err, DataFusionError, Result, UnnestOptions};
+use datafusion_common::{exec_err, Result, UnnestOptions};
 use datafusion_execution::TaskContext;
 
 use async_trait::async_trait;
diff --git a/datafusion/physical-plan/src/values.rs b/datafusion/physical-plan/src/values.rs
index f82f7ea2f869..9c1ce93b2a08 100644
--- a/datafusion/physical-plan/src/values.rs
+++ b/datafusion/physical-plan/src/values.rs
@@ -29,7 +29,7 @@ use crate::{
 
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::record_batch::{RecordBatch, RecordBatchOptions};
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result, ScalarValue};
+use datafusion_common::{internal_err, plan_err, Result, ScalarValue};
 use datafusion_execution::TaskContext;
 
 /// Execution plan for values list based relation (produces constant rows)
diff --git a/datafusion/physical-plan/src/windows/window_agg_exec.rs b/datafusion/physical-plan/src/windows/window_agg_exec.rs
index 6c245f65ba4f..e80102812ebd 100644
--- a/datafusion/physical-plan/src/windows/window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/window_agg_exec.rs
@@ -45,7 +45,7 @@ use arrow::{
 };
 use datafusion_common::stats::Precision;
 use datafusion_common::utils::evaluate_partition_ranges;
-use datafusion_common::{internal_err, plan_err, DataFusionError, Result};
+use datafusion_common::{internal_err, plan_err, Result};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalSortRequirement};
 
diff --git a/datafusion/physical-plan/src/work_table.rs b/datafusion/physical-plan/src/work_table.rs
index c74a596f3dae..0f934a76a60f 100644
--- a/datafusion/physical-plan/src/work_table.rs
+++ b/datafusion/physical-plan/src/work_table.rs
@@ -34,7 +34,7 @@ use super::{
     metrics::{ExecutionPlanMetricsSet, MetricsSet},
     SendableRecordBatchStream, Statistics,
 };
-use datafusion_common::{internal_err, DataFusionError, Result};
+use datafusion_common::{internal_err, Result};
 
 /// The name is from PostgreSQL's terminology.
 /// See <https://wiki.postgresql.org/wiki/CTEReadme#How_Recursion_Works>
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 6d5a7c7f3063..d4abb9ed9c6f 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -23,7 +23,7 @@ use crate::physical_plan::{
     AsExecutionPlan, DefaultPhysicalExtensionCodec, PhysicalExtensionCodec,
 };
 use crate::protobuf;
-use datafusion_common::{plan_datafusion_err, DataFusionError, Result};
+use datafusion_common::{plan_datafusion_err, Result};
 use datafusion_expr::{
     create_udaf, create_udf, create_udwf, AggregateUDF, Expr, LogicalPlan, Volatility,
     WindowUDF,
diff --git a/datafusion/proto/src/bytes/registry.rs b/datafusion/proto/src/bytes/registry.rs
index 7c993c639991..4bf2bb3d7b79 100644
--- a/datafusion/proto/src/bytes/registry.rs
+++ b/datafusion/proto/src/bytes/registry.rs
@@ -19,7 +19,7 @@ use std::{collections::HashSet, sync::Arc};
 
 use datafusion::execution::registry::FunctionRegistry;
 use datafusion_common::plan_err;
-use datafusion_common::{DataFusionError, Result};
+use datafusion_common::Result;
 use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF};
 
 /// A default [`FunctionRegistry`] registry that does not resolve any
diff --git a/datafusion/sql/examples/sql.rs b/datafusion/sql/examples/sql.rs
index 9df65b99a748..8744a905481f 100644
--- a/datafusion/sql/examples/sql.rs
+++ b/datafusion/sql/examples/sql.rs
@@ -17,7 +17,7 @@
 
 use arrow_schema::{DataType, Field, Schema};
 use datafusion_common::config::ConfigOptions;
-use datafusion_common::{plan_err, DataFusionError, Result};
+use datafusion_common::{plan_err, Result};
 use datafusion_expr::WindowUDF;
 use datafusion_expr::{
     logical_plan::builder::LogicalTableSource, AggregateUDF, ScalarUDF, TableSource,
diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs
index 78efaca09938..0d37742e5b07 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, SqlToRel};
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::Operator;
 use sqlparser::ast::BinaryOperator;
 
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index db572a23cf99..bc1d672522dd 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -18,8 +18,7 @@
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow_schema::DataType;
 use datafusion_common::{
-    not_impl_err, plan_datafusion_err, plan_err, DFSchema, DataFusionError, Dependency,
-    Result,
+    not_impl_err, plan_datafusion_err, plan_err, DFSchema, Dependency, Result,
 };
 use datafusion_expr::expr::{ScalarFunction, Unnest};
 use datafusion_expr::function::suggest_valid_function;
diff --git a/datafusion/sql/src/expr/grouping_set.rs b/datafusion/sql/src/expr/grouping_set.rs
index 254f5079b7b1..a8b3ef7e20ec 100644
--- a/datafusion/sql/src/expr/grouping_set.rs
+++ b/datafusion/sql/src/expr/grouping_set.rs
@@ -17,7 +17,7 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::plan_err;
-use datafusion_common::{DFSchema, DataFusionError, Result};
+use datafusion_common::{DFSchema, Result};
 use datafusion_expr::{Expr, GroupingSet};
 use sqlparser::ast::Expr as SQLExpr;
 
diff --git a/datafusion/sql/src/expr/json_access.rs b/datafusion/sql/src/expr/json_access.rs
index 681b72b4e71a..b24482f88297 100644
--- a/datafusion/sql/src/expr/json_access.rs
+++ b/datafusion/sql/src/expr/json_access.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, SqlToRel};
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::Operator;
 use sqlparser::ast::JsonOperator;
 
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index da6c3a6074d4..b058fb79b4a1 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -31,8 +31,7 @@ use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use arrow_schema::DataType;
 use arrow_schema::TimeUnit;
 use datafusion_common::{
-    internal_err, not_impl_err, plan_err, Column, DFSchema, DataFusionError, Result,
-    ScalarValue,
+    internal_err, not_impl_err, plan_err, Column, DFSchema, Result, ScalarValue,
 };
 use datafusion_expr::expr::AggregateFunctionDefinition;
 use datafusion_expr::expr::InList;
diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs
index 772255bd9773..46f19f436ccc 100644
--- a/datafusion/sql/src/expr/order_by.rs
+++ b/datafusion/sql/src/expr/order_by.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{
-    plan_datafusion_err, plan_err, DFSchema, DataFusionError, Result,
-};
+use datafusion_common::{plan_datafusion_err, plan_err, DFSchema, Result};
 use datafusion_expr::expr::Sort;
 use datafusion_expr::Expr;
 use sqlparser::ast::{Expr as SQLExpr, OrderByExpr, Value};
diff --git a/datafusion/sql/src/expr/substring.rs b/datafusion/sql/src/expr/substring.rs
index 71b2a11cd414..a5d1abf0f265 100644
--- a/datafusion/sql/src/expr/substring.rs
+++ b/datafusion/sql/src/expr/substring.rs
@@ -17,7 +17,7 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use datafusion_common::plan_err;
-use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue};
+use datafusion_common::{DFSchema, Result, ScalarValue};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::{BuiltinScalarFunction, Expr};
 use sqlparser::ast::Expr as SQLExpr;
diff --git a/datafusion/sql/src/expr/unary_op.rs b/datafusion/sql/src/expr/unary_op.rs
index 08ff6f2c3622..9fcee7a06124 100644
--- a/datafusion/sql/src/expr/unary_op.rs
+++ b/datafusion/sql/src/expr/unary_op.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, DFSchema, DataFusionError, Result};
+use datafusion_common::{not_impl_err, DFSchema, Result};
 use datafusion_expr::Expr;
 use sqlparser::ast::{Expr as SQLExpr, UnaryOperator, Value};
 
diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs
index b119672eae5f..4ba089f48630 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, Column, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Column, Result};
 use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins};
 use std::collections::HashSet;
diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
index b233f47a058f..1d52899160a9 100644
--- a/datafusion/sql/src/relation/mod.rs
+++ b/datafusion/sql/src/relation/mod.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{
-    not_impl_err, plan_err, DFSchema, DataFusionError, Result, TableReference,
-};
+use datafusion_common::{not_impl_err, plan_err, DFSchema, Result, TableReference};
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{FunctionArg, FunctionArgExpr, TableFactor};
 
diff --git a/datafusion/sql/src/set_expr.rs b/datafusion/sql/src/set_expr.rs
index 7300d49be0f5..2cbb68368f72 100644
--- a/datafusion/sql/src/set_expr.rs
+++ b/datafusion/sql/src/set_expr.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use datafusion_common::{not_impl_err, DataFusionError, Result};
+use datafusion_common::{not_impl_err, Result};
 use datafusion_expr::{LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{SetExpr, SetOperator, SetQuantifier};
 
diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index 331d63cc22b2..bc9cc66b7626 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -27,7 +27,7 @@ use std::sync::Arc;
 
 use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit};
 use datafusion::common::{not_impl_err, plan_err, DFSchema, DFSchemaRef};
-use datafusion::error::{DataFusionError, Result};
+use datafusion::error::Result;
 use datafusion::execution::context::SessionState;
 use datafusion::execution::registry::SerializerRegistry;
 use datafusion::execution::runtime_env::RuntimeEnv;

From b220f03fffda22c70f03fa84e244cf04f0e6644c Mon Sep 17 00:00:00 2001
From: Jonah Gao <jonahgao@msn.com>
Date: Wed, 28 Feb 2024 20:48:20 +0800
Subject: [PATCH 44/45] feat: support for defining ARRAY columns in `CREATE
 TABLE` (#9381)

---
 datafusion/sql/src/planner.rs                |  5 +--
 datafusion/sqllogictest/test_files/array.slt | 36 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 012b1c51a5c1..1f21299d8559 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -238,7 +238,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let mut fields = Vec::with_capacity(columns.len());
 
         for column in columns {
-            let data_type = self.convert_simple_data_type(&column.data_type)?;
+            let data_type = self.convert_data_type(&column.data_type)?;
             let not_nullable = column
                 .options
                 .iter()
@@ -358,7 +358,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         match sql_type {
             SQLDataType::Array(ArrayElemTypeDef::AngleBracket(inner_sql_type))
             | SQLDataType::Array(ArrayElemTypeDef::SquareBracket(inner_sql_type)) => {
-                let data_type = self.convert_simple_data_type(inner_sql_type)?;
+                // Arrays may be multi-dimensional.
+                let data_type = self.convert_data_type(inner_sql_type)?;
 
                 Ok(DataType::List(Arc::new(Field::new(
                     "field", data_type, true,
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index da02a80a104f..e64346537150 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -6165,6 +6165,39 @@ NULL NULL
 [60, 59, 58, 57, 56, 55, 54, , 52, 51] [51, 52, , 54, 55, 56, 57, 58, 59, 60]
 [70, 69, 68, 67, 66, 65, 64, 63, 62, 61] [61, 62, 63, 64, 65, 66, 67, 68, 69, 70]
 
+
+# Test defining a table with array columns
+statement ok
+create table test_create_array_table(
+  a int[],
+  b text[],
+  -- two-dimensional array
+  c int[][],
+  d int
+);
+
+query ???I
+insert into test_create_array_table values
+  ([1, 2, 3], ['a', 'b', 'c'], [[4,6], [6,7,8]], 1);
+----
+1
+
+query ???I
+select * from test_create_array_table;
+----
+[1, 2, 3] [a, b, c] [[4, 6], [6, 7, 8]] 1
+
+query T
+select arrow_typeof(a) from test_create_array_table;
+----
+List(Field { name: "field", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+
+query T
+select arrow_typeof(c) from test_create_array_table;
+----
+List(Field { name: "field", data_type: List(Field { name: "field", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+
+
 ### Delete tables
 
 statement ok
@@ -6334,3 +6367,6 @@ drop table large_arrays_values_without_nulls;
 
 statement ok
 drop table fixed_size_arrays_values_without_nulls;
+
+statement ok
+drop table test_create_array_table;

From fea2174574c1a2a24b24479e966fb232bd971435 Mon Sep 17 00:00:00 2001
From: Mehmet Ozan Kabak <ozankabak@gmail.com>
Date: Wed, 28 Feb 2024 08:08:18 -0800
Subject: [PATCH 45/45] Final reviews and cleanups

---
 datafusion/core/benches/sort.rs               | 19 ++++-----
 datafusion/core/src/dataframe/mod.rs          |  3 +-
 .../core/src/datasource/listing/table.rs      |  7 ++--
 datafusion/core/src/datasource/memory.rs      | 39 +++++++++++--------
 .../core/src/datasource/physical_plan/avro.rs |  6 +--
 .../core/src/datasource/physical_plan/csv.rs  | 12 +++---
 .../core/src/datasource/physical_plan/json.rs |  5 +--
 .../datasource/physical_plan/parquet/mod.rs   |  5 +--
 .../src/physical_optimizer/join_selection.rs  | 11 +++---
 .../limited_distinct_aggregation.rs           | 18 +++++----
 .../physical_optimizer/pipeline_checker.rs    |  3 +-
 .../physical_optimizer/projection_pushdown.rs |  3 +-
 .../src/physical_optimizer/sort_pushdown.rs   |  3 +-
 .../physical_optimizer/topk_aggregation.rs    |  5 ++-
 .../core/src/physical_optimizer/utils.rs      |  3 +-
 datafusion/core/src/physical_planner.rs       |  5 +--
 datafusion/physical-plan/src/analyze.rs       |  8 ++--
 .../physical-plan/src/coalesce_partitions.rs  |  8 ++--
 datafusion/physical-plan/src/common.rs        |  3 +-
 datafusion/physical-plan/src/lib.rs           | 18 ++++-----
 datafusion/physical-plan/src/memory.rs        |  2 +-
 .../physical-plan/src/repartition/mod.rs      | 22 +++++------
 datafusion/physical-plan/src/stream.rs        | 17 ++++----
 23 files changed, 110 insertions(+), 115 deletions(-)

diff --git a/datafusion/core/benches/sort.rs b/datafusion/core/benches/sort.rs
index 34b4a5ebf0dc..94a39bbb2af3 100644
--- a/datafusion/core/benches/sort.rs
+++ b/datafusion/core/benches/sort.rs
@@ -68,35 +68,32 @@
 
 use std::sync::Arc;
 
-use arrow::array::DictionaryArray;
-use arrow::datatypes::Int32Type;
 use arrow::{
-    array::{Float64Array, Int64Array, StringArray},
+    array::{DictionaryArray, Float64Array, Int64Array, StringArray},
     compute::SortOptions,
-    datatypes::Schema,
+    datatypes::{Int32Type, Schema},
     record_batch::RecordBatch,
 };
 
-/// Benchmarks for SortPreservingMerge stream
-use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion::physical_plan::sorts::sort::SortExec;
 use datafusion::{
     execution::context::TaskContext,
     physical_plan::{
-        memory::MemoryExec, sorts::sort_preserving_merge::SortPreservingMergeExec,
-        ExecutionPlan,
+        coalesce_partitions::CoalescePartitionsExec, memory::MemoryExec,
+        sorts::sort_preserving_merge::SortPreservingMergeExec, ExecutionPlan,
+        ExecutionPlanProperties,
     },
     prelude::SessionContext,
 };
 use datafusion_physical_expr::{expressions::col, PhysicalSortExpr};
+
+/// Benchmarks for SortPreservingMerge stream
+use criterion::{criterion_group, criterion_main, Criterion};
 use futures::StreamExt;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use tokio::runtime::Runtime;
 
-use datafusion::physical_plan::coalesce_partitions::CoalescePartitionsExec;
-use datafusion_physical_plan::ExecutionPlanProperties;
-
 /// Total number of streams to divide each input into
 /// models 8 partition plan (should it be 16??)
 const NUM_STREAMS: usize = 8;
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 1372570179fe..d7c31b9bd6b3 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -23,7 +23,6 @@ mod parquet;
 use std::any::Any;
 use std::sync::Arc;
 
-use crate::arrow::datatypes::{Schema, SchemaRef};
 use crate::arrow::record_batch::RecordBatch;
 use crate::arrow::util::pretty;
 use crate::datasource::{provider_as_source, MemTable, TableProvider};
@@ -43,7 +42,7 @@ use crate::prelude::SessionContext;
 use arrow::array::{Array, ArrayRef, Int64Array, StringArray};
 use arrow::compute::{cast, concat};
 use arrow::csv::WriterBuilder;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::file_options::csv_writer::CsvWriterOptions;
 use datafusion_common::file_options::json_writer::JsonWriterOptions;
 use datafusion_common::parsers::CompressionTypeVariant;
diff --git a/datafusion/core/src/datasource/listing/table.rs b/datafusion/core/src/datasource/listing/table.rs
index a1f3d14aacca..00821a1cdd1a 100644
--- a/datafusion/core/src/datasource/listing/table.rs
+++ b/datafusion/core/src/datasource/listing/table.rs
@@ -27,7 +27,9 @@ use super::PartitionedFile;
 #[cfg(feature = "parquet")]
 use crate::datasource::file_format::parquet::ParquetFormat;
 use crate::datasource::{
-    create_ordering,
+    create_ordering, get_statistics_with_limit, TableProvider, TableType,
+};
+use crate::datasource::{
     file_format::{
         arrow::ArrowFormat,
         avro::AvroFormat,
@@ -36,10 +38,8 @@ use crate::datasource::{
         json::JsonFormat,
         FileFormat,
     },
-    get_statistics_with_limit,
     listing::ListingTableUrl,
     physical_plan::{FileScanConfig, FileSinkConfig},
-    TableProvider, TableType,
 };
 use crate::{
     error::{DataFusionError, Result},
@@ -921,6 +921,7 @@ mod tests {
     use datafusion_expr::{BinaryExpr, LogicalPlanBuilder, Operator};
     use datafusion_physical_expr::PhysicalSortExpr;
     use datafusion_physical_plan::ExecutionPlanProperties;
+
     use tempfile::TempDir;
 
     #[tokio::test]
diff --git a/datafusion/core/src/datasource/memory.rs b/datafusion/core/src/datasource/memory.rs
index cbe20f6a63a1..e47122ccdfda 100644
--- a/datafusion/core/src/datasource/memory.rs
+++ b/datafusion/core/src/datasource/memory.rs
@@ -17,35 +17,37 @@
 
 //! [`MemTable`] for querying `Vec<RecordBatch>` by DataFusion.
 
-use datafusion_physical_plan::metrics::MetricsSet;
-use futures::StreamExt;
-use log::debug;
 use std::any::Any;
 use std::collections::HashMap;
 use std::fmt::{self, Debug};
 use std::sync::Arc;
 
-use arrow::datatypes::SchemaRef;
-use arrow::record_batch::RecordBatch;
-use async_trait::async_trait;
-use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
-use datafusion_execution::TaskContext;
-use datafusion_physical_plan::ExecutionPlanProperties;
-use parking_lot::Mutex;
-use tokio::sync::RwLock;
-use tokio::task::JoinSet;
-
 use crate::datasource::{TableProvider, TableType};
 use crate::error::Result;
 use crate::execution::context::SessionState;
 use crate::logical_expr::Expr;
 use crate::physical_plan::insert::{DataSink, FileSinkExec};
 use crate::physical_plan::memory::MemoryExec;
-use crate::physical_plan::{common, SendableRecordBatchStream};
-use crate::physical_plan::{repartition::RepartitionExec, Partitioning};
-use crate::physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
+use crate::physical_plan::repartition::RepartitionExec;
+use crate::physical_plan::{
+    common, DisplayAs, DisplayFormatType, ExecutionPlan, ExecutionPlanProperties,
+    Partitioning, SendableRecordBatchStream,
+};
 use crate::physical_planner::create_physical_sort_expr;
 
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion_common::{not_impl_err, plan_err, Constraints, DFSchema, SchemaExt};
+use datafusion_execution::TaskContext;
+use datafusion_physical_plan::metrics::MetricsSet;
+
+use async_trait::async_trait;
+use futures::StreamExt;
+use log::debug;
+use parking_lot::Mutex;
+use tokio::sync::RwLock;
+use tokio::task::JoinSet;
+
 /// Type alias for partition data
 pub type PartitionData = Arc<RwLock<Vec<RecordBatch>>>;
 
@@ -362,17 +364,20 @@ impl DataSink for MemSink {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashMap;
+
     use super::*;
     use crate::datasource::provider_as_source;
     use crate::physical_plan::collect;
     use crate::prelude::SessionContext;
+
     use arrow::array::{AsArray, Int32Array};
     use arrow::datatypes::{DataType, Field, Schema, UInt64Type};
     use arrow::error::ArrowError;
     use datafusion_common::DataFusionError;
     use datafusion_expr::LogicalPlanBuilder;
+
     use futures::StreamExt;
-    use std::collections::HashMap;
 
     #[tokio::test]
     async fn test_with_projection() -> Result<()> {
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 6e7dcf39069c..2ccd83de80cb 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -219,13 +219,15 @@ mod private {
 #[cfg(test)]
 #[cfg(feature = "avro")]
 mod tests {
+    use super::*;
+    use crate::arrow::datatypes::{DataType, Field, SchemaBuilder};
     use crate::datasource::file_format::{avro::AvroFormat, FileFormat};
     use crate::datasource::listing::PartitionedFile;
     use crate::datasource::object_store::ObjectStoreUrl;
     use crate::prelude::SessionContext;
     use crate::scalar::ScalarValue;
     use crate::test::object_store::local_unpartitioned_file;
-    use arrow::datatypes::{DataType, Field, SchemaBuilder};
+
     use futures::StreamExt;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
@@ -233,8 +235,6 @@ mod tests {
     use rstest::*;
     use url::Url;
 
-    use super::*;
-
     #[tokio::test]
     async fn avro_exec_without_partition() -> Result<()> {
         test_with_stores(Arc::new(LocalFileSystem::new())).await
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index 05a83e8ac0b7..5fcb9f483952 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -32,8 +32,8 @@ use crate::datasource::physical_plan::FileMeta;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties,
+    Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::csv;
@@ -41,7 +41,6 @@ use arrow::datatypes::SchemaRef;
 use datafusion_common::config::ConfigOptions;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::{Buf, Bytes};
 use futures::{ready, StreamExt, TryStreamExt};
@@ -512,20 +511,23 @@ pub async fn plan_to_csv(
 
 #[cfg(test)]
 mod tests {
+    use std::fs::{self, File};
+    use std::io::Write;
+
     use super::*;
     use crate::dataframe::DataFrameWriteOptions;
     use crate::prelude::*;
     use crate::test::{partitioned_csv_config, partitioned_file_groups};
     use crate::{scalar::ScalarValue, test_util::aggr_test_schema};
+
     use arrow::datatypes::*;
     use datafusion_common::test_util::arrow_test_data;
     use datafusion_common::FileType;
+
     use futures::StreamExt;
     use object_store::chunked::ChunkedStore;
     use object_store::local::LocalFileSystem;
     use rstest::*;
-    use std::fs::{self, File};
-    use std::io::Write;
     use tempfile::TempDir;
     use url::Url;
 
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 6f9af2e6abcf..62b96ea3aefb 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -32,15 +32,14 @@ use crate::datasource::physical_plan::FileMeta;
 use crate::error::{DataFusionError, Result};
 use crate::physical_plan::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::physical_plan::{
-    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning,
-    PlanProperties, SendableRecordBatchStream, Statistics,
+    DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties,
+    Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
 };
 
 use arrow::json::ReaderBuilder;
 use arrow::{datatypes::SchemaRef, json};
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering};
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::{Buf, Bytes};
 use futures::{ready, StreamExt, TryStreamExt};
diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
index 5ccffde26359..12b62fd68068 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs
@@ -38,15 +38,14 @@ use crate::{
     physical_optimizer::pruning::PruningPredicate,
     physical_plan::{
         metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet},
-        DisplayFormatType, ExecutionMode, ExecutionPlan, Partitioning, PlanProperties,
-        SendableRecordBatchStream, Statistics,
+        DisplayFormatType, ExecutionMode, ExecutionPlan, ExecutionPlanProperties,
+        Partitioning, PlanProperties, SendableRecordBatchStream, Statistics,
     },
 };
 
 use arrow::datatypes::{DataType, SchemaRef};
 use arrow::error::ArrowError;
 use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr};
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 use bytes::Bytes;
 use futures::future::BoxFuture;
diff --git a/datafusion/core/src/physical_optimizer/join_selection.rs b/datafusion/core/src/physical_optimizer/join_selection.rs
index 338bf619f9a0..ee60c65ead0b 100644
--- a/datafusion/core/src/physical_optimizer/join_selection.rs
+++ b/datafusion/core/src/physical_optimizer/join_selection.rs
@@ -34,16 +34,14 @@ use crate::physical_plan::joins::{
     SymmetricHashJoinExec,
 };
 use crate::physical_plan::projection::ProjectionExec;
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use arrow_schema::Schema;
 use datafusion_common::tree_node::{Transformed, TreeNode};
-use datafusion_common::JoinType;
-use datafusion_common::{internal_err, JoinSide};
+use datafusion_common::{internal_err, JoinSide, JoinType};
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::sort_properties::SortProperties;
 use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr};
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// The [`JoinSelection`] rule tries to modify a given plan so that it can
 /// accommodate infinite sources and optimize joins in the plan according to
@@ -1366,8 +1364,9 @@ mod util_tests {
 
 #[cfg(test)]
 mod hash_join_tests {
-    use self::tests_statistical::crosscheck_plans;
+    use std::sync::Arc;
 
+    use self::tests_statistical::crosscheck_plans;
     use super::*;
     use crate::physical_optimizer::join_selection::swap_join_type;
     use crate::physical_optimizer::test_utils::SourceType;
@@ -1375,12 +1374,12 @@ mod hash_join_tests {
     use crate::physical_plan::joins::PartitionMode;
     use crate::physical_plan::projection::ProjectionExec;
     use crate::test_util::UnboundedExec;
+
     use arrow::datatypes::{DataType, Field, Schema};
     use arrow::record_batch::RecordBatch;
     use datafusion_common::utils::DataPtr;
     use datafusion_common::JoinType;
     use datafusion_physical_plan::ExecutionPlanProperties;
-    use std::sync::Arc;
 
     struct TestCase {
         case: String,
diff --git a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
index 036c938c1ca6..7be9acec5092 100644
--- a/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/limited_distinct_aggregation.rs
@@ -18,16 +18,18 @@
 //! A special-case optimizer rule that pushes limit into a grouped aggregation
 //! which has no aggregate expressions or sorting requirements
 
+use std::sync::Arc;
+
 use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::AggregateExec;
 use crate::physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
+
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::Result;
-use datafusion_physical_plan::ExecutionPlanProperties;
+
 use itertools::Itertools;
-use std::sync::Arc;
 
 /// An optimizer rule that passes a `limit` hint into grouped aggregations which don't require all
 /// rows in the group to be processed for correctness. Example queries fitting this description are:
@@ -189,6 +191,8 @@ impl PhysicalOptimizerRule for LimitedDistinctAggregation {
 
 #[cfg(test)]
 mod tests {
+    use std::sync::Arc;
+
     use super::*;
     use crate::error::Result;
     use crate::physical_optimizer::aggregate_statistics::tests::TestAggregate;
@@ -199,6 +203,7 @@ mod tests {
     use crate::physical_plan::collect;
     use crate::physical_plan::memory::MemoryExec;
     use crate::prelude::SessionContext;
+
     use arrow::array::Int32Array;
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema};
@@ -207,13 +212,10 @@ mod tests {
     use arrow_schema::SchemaRef;
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::Operator;
-    use datafusion_physical_expr::expressions::cast;
-    use datafusion_physical_expr::expressions::col;
-    use datafusion_physical_expr::PhysicalSortExpr;
-    use datafusion_physical_expr::{expressions, PhysicalExpr};
+    use datafusion_physical_expr::expressions::{cast, col};
+    use datafusion_physical_expr::{expressions, PhysicalExpr, PhysicalSortExpr};
     use datafusion_physical_plan::aggregates::AggregateMode;
     use datafusion_physical_plan::displayable;
-    use std::sync::Arc;
 
     fn mock_data() -> Result<Arc<MemoryExec>> {
         let schema = Arc::new(Schema::new(vec![
diff --git a/datafusion/core/src/physical_optimizer/pipeline_checker.rs b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
index c0a77eb56f87..e783f75378b1 100644
--- a/datafusion/core/src/physical_optimizer/pipeline_checker.rs
+++ b/datafusion/core/src/physical_optimizer/pipeline_checker.rs
@@ -24,14 +24,13 @@ use std::sync::Arc;
 use crate::config::ConfigOptions;
 use crate::error::Result;
 use crate::physical_optimizer::PhysicalOptimizerRule;
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use datafusion_common::config::OptimizerOptions;
 use datafusion_common::plan_err;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_physical_expr::intervals::utils::{check_support, is_datatype_supported};
 use datafusion_physical_plan::joins::SymmetricHashJoinExec;
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// The PipelineChecker rule rejects non-runnable query plans that use
 /// pipeline-breaking operators on infinite input(s).
diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
index 9cb2d6ecbc71..4ed265d59526 100644
--- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs
@@ -39,7 +39,7 @@ use crate::physical_plan::projection::ProjectionExec;
 use crate::physical_plan::repartition::RepartitionExec;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
-use crate::physical_plan::{Distribution, ExecutionPlan};
+use crate::physical_plan::{Distribution, ExecutionPlan, ExecutionPlanProperties};
 
 use arrow_schema::SchemaRef;
 use datafusion_common::config::ConfigOptions;
@@ -52,7 +52,6 @@ use datafusion_physical_expr::{
 };
 use datafusion_physical_plan::streaming::StreamingTableExec;
 use datafusion_physical_plan::union::UnionExec;
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 use itertools::Itertools;
 
diff --git a/datafusion/core/src/physical_optimizer/sort_pushdown.rs b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
index 7c7564fdb400..ff82319fba19 100644
--- a/datafusion/core/src/physical_optimizer/sort_pushdown.rs
+++ b/datafusion/core/src/physical_optimizer/sort_pushdown.rs
@@ -28,7 +28,7 @@ use crate::physical_plan::projection::ProjectionExec;
 use crate::physical_plan::repartition::RepartitionExec;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::tree_node::PlanContext;
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use datafusion_common::tree_node::Transformed;
 use datafusion_common::{plan_err, JoinSide, Result};
@@ -37,7 +37,6 @@ use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{
     LexRequirementRef, PhysicalSortExpr, PhysicalSortRequirement,
 };
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// This is a "data class" we use within the [`EnforceSorting`] rule to push
 /// down [`SortExec`] in the plan. In some cases, we can reduce the total
diff --git a/datafusion/core/src/physical_optimizer/topk_aggregation.rs b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
index 2006402ac59e..0ca709e56bcb 100644
--- a/datafusion/core/src/physical_optimizer/topk_aggregation.rs
+++ b/datafusion/core/src/physical_optimizer/topk_aggregation.rs
@@ -17,6 +17,8 @@
 
 //! An optimizer rule that detects aggregate operations that could use a limited bucket count
 
+use std::sync::Arc;
+
 use crate::physical_optimizer::PhysicalOptimizerRule;
 use crate::physical_plan::aggregates::AggregateExec;
 use crate::physical_plan::coalesce_batches::CoalesceBatchesExec;
@@ -24,14 +26,15 @@ use crate::physical_plan::filter::FilterExec;
 use crate::physical_plan::repartition::RepartitionExec;
 use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::ExecutionPlan;
+
 use arrow_schema::DataType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::Result;
 use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::PhysicalSortExpr;
+
 use itertools::Itertools;
-use std::sync::Arc;
 
 /// An optimizer rule that passes a `limit` hint to aggregations if the whole result is not needed
 pub struct TopKAggregation {}
diff --git a/datafusion/core/src/physical_optimizer/utils.rs b/datafusion/core/src/physical_optimizer/utils.rs
index f4c2c3873f68..8cc543802e3f 100644
--- a/datafusion/core/src/physical_optimizer/utils.rs
+++ b/datafusion/core/src/physical_optimizer/utils.rs
@@ -25,12 +25,11 @@ use crate::physical_plan::sorts::sort::SortExec;
 use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use crate::physical_plan::union::UnionExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
-use crate::physical_plan::ExecutionPlan;
+use crate::physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
 use datafusion_physical_expr::{LexRequirement, PhysicalSortRequirement};
 use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec};
 use datafusion_physical_plan::tree_node::PlanContext;
-use datafusion_physical_plan::ExecutionPlanProperties;
 
 /// This utility function adds a `SortExec` above an operator according to the
 /// given ordering requirements while preserving the original partitioning.
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 41a6e4d75be7..bf5f5afc5791 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -66,8 +66,8 @@ use crate::physical_plan::unnest::UnnestExec;
 use crate::physical_plan::values::ValuesExec;
 use crate::physical_plan::windows::{BoundedWindowAggExec, WindowAggExec};
 use crate::physical_plan::{
-    aggregates, displayable, udaf, windows, AggregateExpr, ExecutionPlan, InputOrderMode,
-    Partitioning, PhysicalExpr, WindowExpr,
+    aggregates, displayable, udaf, windows, AggregateExpr, ExecutionPlan,
+    ExecutionPlanProperties, InputOrderMode, Partitioning, PhysicalExpr, WindowExpr,
 };
 
 use arrow::compute::SortOptions;
@@ -93,7 +93,6 @@ use datafusion_expr::{
 };
 use datafusion_physical_expr::expressions::Literal;
 use datafusion_physical_plan::placeholder_row::PlaceholderRowExec;
-use datafusion_physical_plan::ExecutionPlanProperties;
 use datafusion_sql::utils::window_expr_common_partition_keys;
 
 use async_trait::async_trait;
diff --git a/datafusion/physical-plan/src/analyze.rs b/datafusion/physical-plan/src/analyze.rs
index f771ac238887..5baedc332951 100644
--- a/datafusion/physical-plan/src/analyze.rs
+++ b/datafusion/physical-plan/src/analyze.rs
@@ -25,7 +25,6 @@ use super::{
     DisplayAs, Distribution, ExecutionPlanProperties, PlanProperties,
     SendableRecordBatchStream,
 };
-
 use crate::display::DisplayableExecutionPlan;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning};
 
@@ -249,9 +248,7 @@ fn create_output_batch(
 
 #[cfg(test)]
 mod tests {
-    use arrow::datatypes::{DataType, Field, Schema};
-    use futures::FutureExt;
-
+    use super::*;
     use crate::{
         collect,
         test::{
@@ -260,7 +257,8 @@ mod tests {
         },
     };
 
-    use super::*;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use futures::FutureExt;
 
     #[tokio::test]
     async fn test_drop_cancel() -> Result<()> {
diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs
index d2706cb06f90..1e58260a5344 100644
--- a/datafusion/physical-plan/src/coalesce_partitions.rs
+++ b/datafusion/physical-plan/src/coalesce_partitions.rs
@@ -168,10 +168,6 @@ impl ExecutionPlan for CoalescePartitionsExec {
 
 #[cfg(test)]
 mod tests {
-
-    use arrow::datatypes::{DataType, Field, Schema};
-    use futures::FutureExt;
-
     use super::*;
     use crate::test::exec::{
         assert_strong_count_converges_to_zero, BlockingExec, PanicExec,
@@ -179,6 +175,10 @@ mod tests {
     use crate::test::{self, assert_is_pending};
     use crate::{collect, common};
 
+    use arrow::datatypes::{DataType, Field, Schema};
+
+    use futures::FutureExt;
+
     #[tokio::test]
     async fn merge() -> Result<()> {
         let task_ctx = Arc::new(TaskContext::default());
diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs
index 003c60edd9a8..47cdf3e400e3 100644
--- a/datafusion/physical-plan/src/common.rs
+++ b/datafusion/physical-plan/src/common.rs
@@ -381,11 +381,10 @@ mod tests {
 
     use arrow::compute::SortOptions;
     use arrow::{
-        array::{Float32Array, Float64Array},
+        array::{Float32Array, Float64Array, UInt64Array},
         datatypes::{DataType, Field, Schema},
         record_batch::RecordBatch,
     };
-    use arrow_array::UInt64Array;
     use datafusion_expr::Operator;
     use datafusion_physical_expr::expressions::{col, Column};
 
diff --git a/datafusion/physical-plan/src/lib.rs b/datafusion/physical-plan/src/lib.rs
index 578ce42f2e9d..b726b587dd1d 100644
--- a/datafusion/physical-plan/src/lib.rs
+++ b/datafusion/physical-plan/src/lib.rs
@@ -123,6 +123,7 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
         self.properties().schema().clone()
     }
 
+    /// Gets plan properties, such as output ordering(s), partitioning information etc.
     fn properties(&self) -> &PlanProperties;
 
     /// Specifies the data distribution requirements for all the
@@ -400,6 +401,8 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     }
 }
 
+/// This extension trait provides an API to fetch various properties of
+/// [`ExecutionPlan`] objects.
 pub trait ExecutionPlanProperties {
     fn output_partitioning(&self) -> &Partitioning;
 
@@ -419,21 +422,18 @@ impl ExecutionPlanProperties for Arc<dyn ExecutionPlan> {
 
     /// Specifies whether this plan generates an infinite stream of records.
     /// If the plan does not support pipelining, but its input(s) are
-    /// infinite, returns an error to indicate this.
+    /// infinite, returns [`ExecutionMode::PipelineBreaking`] to indicate this.
     fn execution_mode(&self) -> ExecutionMode {
         self.properties().execution_mode()
     }
 
     /// If the output of this `ExecutionPlan` within each partition is sorted,
-    /// returns `Some(keys)` with the description of how it was sorted.
+    /// returns `Some(keys)` describing the ordering. A `None` return value
+    /// indicates no assumptions should be made on the output ordering.
     ///
-    /// For example, Sort, (obviously) produces sorted output as does
-    /// SortPreservingMergeStream. Less obviously `Projection`
-    /// produces sorted output if its input was sorted as it does not
-    /// reorder the input rows,
-    ///
-    /// It is safe to return `None` here if your `ExecutionPlan` does not
-    /// have any particular output order here
+    /// For example, `SortExec` (obviously) produces sorted output as does
+    /// `SortPreservingMergeStream`. Less obviously, `Projection` produces sorted
+    /// output if its input is sorted as it does not reorder the input rows.
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
         self.properties().output_ordering()
     }
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index fa2d16530823..ca324a0f7d3b 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -287,8 +287,8 @@ mod tests {
     use std::sync::Arc;
 
     use crate::memory::MemoryExec;
-
     use crate::ExecutionPlan;
+
     use arrow_schema::{DataType, Field, Schema, SortOptions};
     use datafusion_physical_expr::expressions::col;
     use datafusion_physical_expr::PhysicalSortExpr;
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index d1befb7c53c0..fe93ea131506 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -926,17 +926,7 @@ impl RecordBatchStream for PerPartitionStream {
 mod tests {
     use std::collections::HashSet;
 
-    use arrow::array::{ArrayRef, StringArray};
-    use arrow::datatypes::{DataType, Field, Schema};
-    use arrow::record_batch::RecordBatch;
-    use arrow_array::UInt32Array;
-    use futures::FutureExt;
-    use tokio::task::JoinHandle;
-
-    use datafusion_common::cast::as_string_array;
-    use datafusion_common::{assert_batches_sorted_eq, exec_err};
-    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
-
+    use super::*;
     use crate::{
         test::{
             assert_is_pending,
@@ -948,7 +938,15 @@ mod tests {
         {collect, expressions::col, memory::MemoryExec},
     };
 
-    use super::*;
+    use arrow::array::{ArrayRef, StringArray, UInt32Array};
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow::record_batch::RecordBatch;
+    use datafusion_common::cast::as_string_array;
+    use datafusion_common::{assert_batches_sorted_eq, exec_err};
+    use datafusion_execution::runtime_env::{RuntimeConfig, RuntimeEnv};
+
+    use futures::FutureExt;
+    use tokio::task::JoinHandle;
 
     #[tokio::test]
     async fn one_to_many_round_robin() -> Result<()> {
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index dcba5c74daf1..99d9367740be 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -22,12 +22,14 @@ use std::sync::Arc;
 use std::task::Context;
 use std::task::Poll;
 
+use super::metrics::BaselineMetrics;
+use super::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream};
 use crate::displayable;
-use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
-use datafusion_common::internal_err;
 
-use datafusion_common::Result;
+use arrow::{datatypes::SchemaRef, record_batch::RecordBatch};
+use datafusion_common::{internal_err, Result};
 use datafusion_execution::TaskContext;
+
 use futures::stream::BoxStream;
 use futures::{Future, Stream, StreamExt};
 use log::debug;
@@ -35,9 +37,6 @@ use pin_project_lite::pin_project;
 use tokio::sync::mpsc::{Receiver, Sender};
 use tokio::task::JoinSet;
 
-use super::metrics::BaselineMetrics;
-use super::{ExecutionPlan, RecordBatchStream, SendableRecordBatchStream};
-
 /// Creates a stream from a collection of producing tasks, routing panics to the stream.
 ///
 /// Note that this is similar to  [`ReceiverStream` from tokio-stream], with the differences being:
@@ -458,13 +457,13 @@ impl futures::Stream for ObservedStream {
 #[cfg(test)]
 mod test {
     use super::*;
-    use arrow_schema::{DataType, Field, Schema};
-    use datafusion_common::exec_err;
-
     use crate::test::exec::{
         assert_strong_count_converges_to_zero, BlockingExec, MockExec, PanicExec,
     };
 
+    use arrow_schema::{DataType, Field, Schema};
+    use datafusion_common::exec_err;
+
     fn schema() -> SchemaRef {
         Arc::new(Schema::new(vec![Field::new("a", DataType::Float32, true)]))
     }