diff --git a/Cargo.toml b/Cargo.toml index aa412cba5108..9eb4d5b39af0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -128,7 +128,7 @@ env_logger = "0.11" futures = "0.3" half = { version = "2.2.1", default-features = false } hashbrown = { version = "0.14.5", features = ["raw"] } -indexmap = "2.0.0" +indexmap = "2.7.1" itertools = "0.14" log = "^0.4" object_store = { version = "0.11.0", default-features = false } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 967c2e5f0ef0..84354c8c0e9a 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1400,6 +1400,7 @@ dependencies = [ "arrow", "datafusion-common", "itertools 0.14.0", + "paste", ] [[package]] @@ -1592,7 +1593,6 @@ dependencies = [ "datafusion-execution", "datafusion-expr", "datafusion-expr-common", - "datafusion-functions-aggregate", "datafusion-physical-expr", "datafusion-physical-expr-common", "datafusion-physical-plan", @@ -2398,9 +2398,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.7.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -3665,9 +3665,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "930cfb6e6abf99298aaad7d29abbef7a9999a9a8806a40088f55f0dcec03146b" dependencies = [ "itoa", "memchr", diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index d8aaad801e5c..d90ec3333cb9 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -54,20 +54,15 @@ path = "examples/external_dependency/query-aws-s3.rs" [dev-dependencies] arrow = { workspace = true } +# arrow_schema is required for record_batch! macro :sad: arrow-flight = { workspace = true } arrow-schema = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } dashmap = { workspace = true } +# note only use main datafusion crate for examples datafusion = { workspace = true, default-features = true, features = ["avro"] } -datafusion-catalog = { workspace = true } -datafusion-common = { workspace = true, default-features = true } -datafusion-expr = { workspace = true } -datafusion-functions-window-common = { workspace = true } -datafusion-optimizer = { workspace = true, default-features = true } -datafusion-physical-expr = { workspace = true, default-features = true } datafusion-proto = { workspace = true } -datafusion-sql = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } log = { workspace = true } diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs index 28a3a2f1de09..7e2139370e20 100644 --- a/datafusion-examples/examples/advanced_parquet_index.rs +++ b/datafusion-examples/examples/advanced_parquet_index.rs @@ -15,11 +15,22 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::ops::Range; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; -use arrow_schema::SchemaRef; +use arrow::datatypes::SchemaRef; use async_trait::async_trait; use bytes::Bytes; use datafusion::catalog::Session; +use datafusion::common::{ + internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue, +}; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::parquet::{ ParquetAccessPlan, ParquetExecBuilder, @@ -29,6 +40,8 @@ use datafusion::datasource::physical_plan::{ }; use datafusion::datasource::TableProvider; use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::logical_expr::utils::conjunction; +use datafusion::logical_expr::{TableProviderFilterPushDown, TableType}; use datafusion::parquet::arrow::arrow_reader::{ ArrowReaderOptions, ParquetRecordBatchReaderBuilder, RowSelection, RowSelector, }; @@ -37,27 +50,15 @@ use datafusion::parquet::arrow::ArrowWriter; use datafusion::parquet::file::metadata::ParquetMetaData; use datafusion::parquet::file::properties::{EnabledStatistics, WriterProperties}; use datafusion::parquet::schema::types::ColumnPath; +use datafusion::physical_expr::utils::{Guarantee, LiteralGuarantee}; use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_optimizer::pruning::PruningPredicate; use datafusion::physical_plan::metrics::ExecutionPlanMetricsSet; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::*; -use datafusion_common::{ - internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue, -}; -use datafusion_expr::utils::conjunction; -use datafusion_expr::{TableProviderFilterPushDown, TableType}; -use datafusion_physical_expr::utils::{Guarantee, LiteralGuarantee}; use futures::future::BoxFuture; use futures::FutureExt; use object_store::ObjectStore; -use std::any::Any; -use std::collections::{HashMap, HashSet}; -use std::fs::File; -use std::ops::Range; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::Arc; use tempfile::TempDir; use url::Url; @@ -282,7 +283,7 @@ impl IndexTableProvider { .transpose()? // if there are no filters, use a literal true to have a predicate // that always evaluates to true we can pass to the index - .unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true)); + .unwrap_or_else(|| datafusion::physical_expr::expressions::lit(true)); Ok(predicate) } diff --git a/datafusion-examples/examples/advanced_udaf.rs b/datafusion-examples/examples/advanced_udaf.rs index a914cea4a928..fd65c3352bbc 100644 --- a/datafusion-examples/examples/advanced_udaf.rs +++ b/datafusion-examples/examples/advanced_udaf.rs @@ -15,27 +15,25 @@ // specific language governing permissions and limitations // under the License. -use arrow_schema::{Field, Schema}; +use arrow::datatypes::{Field, Schema}; +use datafusion::physical_expr::NullState; use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; -use datafusion_physical_expr::NullState; use std::{any::Any, sync::Arc}; -use arrow::{ - array::{ - ArrayRef, AsArray, Float32Array, PrimitiveArray, PrimitiveBuilder, UInt32Array, - }, - datatypes::{ArrowNativeTypeOp, ArrowPrimitiveType, Float64Type, UInt32Type}, - record_batch::RecordBatch, +use arrow::array::{ + ArrayRef, AsArray, Float32Array, PrimitiveArray, PrimitiveBuilder, UInt32Array, }; +use arrow::datatypes::{ArrowNativeTypeOp, ArrowPrimitiveType, Float64Type, UInt32Type}; +use arrow::record_batch::RecordBatch; +use datafusion::common::{cast::as_float64_array, ScalarValue}; use datafusion::error::Result; -use datafusion::prelude::*; -use datafusion_common::{cast::as_float64_array, ScalarValue}; -use datafusion_expr::{ +use datafusion::logical_expr::{ expr::AggregateFunction, function::{AccumulatorArgs, AggregateFunctionSimplification, StateFieldsArgs}, simplify::SimplifyInfo, - Accumulator, AggregateUDF, AggregateUDFImpl, GroupsAccumulator, Signature, + Accumulator, AggregateUDF, AggregateUDFImpl, EmitTo, GroupsAccumulator, Signature, }; +use datafusion::prelude::*; /// This example shows how to use the full AggregateUDFImpl API to implement a user /// defined aggregate function. As in the `simple_udaf.rs` example, this struct implements @@ -308,7 +306,7 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator { } /// Generate output, as specified by `emit_to` and update the intermediate state - fn evaluate(&mut self, emit_to: datafusion_expr::EmitTo) -> Result { + fn evaluate(&mut self, emit_to: EmitTo) -> Result { let counts = emit_to.take_needed(&mut self.counts); let prods = emit_to.take_needed(&mut self.prods); let nulls = self.null_state.build(emit_to); @@ -344,7 +342,7 @@ impl GroupsAccumulator for GeometricMeanGroupsAccumulator { } // return arrays for counts and prods - fn state(&mut self, emit_to: datafusion_expr::EmitTo) -> Result> { + fn state(&mut self, emit_to: EmitTo) -> Result> { let nulls = self.null_state.build(emit_to); let nulls = Some(nulls); diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/advanced_udf.rs index ae35cff6facf..290d1c53334b 100644 --- a/datafusion-examples/examples/advanced_udf.rs +++ b/datafusion-examples/examples/advanced_udf.rs @@ -24,14 +24,14 @@ use arrow::array::{ use arrow::compute; use arrow::datatypes::{DataType, Float64Type}; use arrow::record_batch::RecordBatch; +use datafusion::common::{exec_err, internal_err, ScalarValue}; use datafusion::error::Result; +use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties}; use datafusion::logical_expr::Volatility; -use datafusion::prelude::*; -use datafusion_common::{exec_err, internal_err, ScalarValue}; -use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; -use datafusion_expr::{ +use datafusion::logical_expr::{ ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, }; +use datafusion::prelude::*; /// This example shows how to use the full ScalarUDFImpl API to implement a user /// defined function. As in the `simple_udf.rs` example, this struct implements diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/advanced_udwf.rs index 49e890467d21..ac326be9cb04 100644 --- a/datafusion-examples/examples/advanced_udwf.rs +++ b/datafusion-examples/examples/advanced_udwf.rs @@ -18,22 +18,24 @@ use datafusion::{arrow::datatypes::DataType, logical_expr::Volatility}; use std::any::Any; +use arrow::datatypes::Field; use arrow::{ array::{ArrayRef, AsArray, Float64Array}, datatypes::Float64Type, }; -use arrow_schema::Field; +use datafusion::common::ScalarValue; use datafusion::error::Result; use datafusion::functions_aggregate::average::avg_udaf; -use datafusion::prelude::*; -use datafusion_common::ScalarValue; -use datafusion_expr::expr::WindowFunction; -use datafusion_expr::function::{WindowFunctionSimplification, WindowUDFFieldArgs}; -use datafusion_expr::simplify::SimplifyInfo; -use datafusion_expr::{ - Expr, PartitionEvaluator, Signature, WindowFrame, WindowUDF, WindowUDFImpl, +use datafusion::logical_expr::expr::WindowFunction; +use datafusion::logical_expr::function::{ + PartitionEvaluatorArgs, WindowFunctionSimplification, WindowUDFFieldArgs, +}; +use datafusion::logical_expr::simplify::SimplifyInfo; +use datafusion::logical_expr::{ + Expr, PartitionEvaluator, Signature, WindowFrame, WindowFunctionDefinition, + WindowUDF, WindowUDFImpl, }; -use datafusion_functions_window_common::partition::PartitionEvaluatorArgs; +use datafusion::prelude::*; /// This example shows how to use the full WindowUDFImpl API to implement a user /// defined window function. As in the `simple_udwf.rs` example, this struct implements @@ -189,7 +191,7 @@ impl WindowUDFImpl for SimplifySmoothItUdf { fn simplify(&self) -> Option { let simplify = |window_function: WindowFunction, _: &dyn SimplifyInfo| { Ok(Expr::WindowFunction(WindowFunction { - fun: datafusion_expr::WindowFunctionDefinition::AggregateUDF(avg_udaf()), + fun: WindowFunctionDefinition::AggregateUDF(avg_udaf()), args: window_function.args, partition_by: window_function.partition_by, order_by: window_function.order_by, diff --git a/datafusion-examples/examples/analyzer_rule.rs b/datafusion-examples/examples/analyzer_rule.rs index aded64ed4105..cb81cd167a88 100644 --- a/datafusion-examples/examples/analyzer_rule.rs +++ b/datafusion-examples/examples/analyzer_rule.rs @@ -16,12 +16,12 @@ // under the License. use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; +use datafusion::common::config::ConfigOptions; +use datafusion::common::tree_node::{Transformed, TreeNode}; +use datafusion::common::Result; +use datafusion::logical_expr::{col, lit, Expr, LogicalPlan, LogicalPlanBuilder}; +use datafusion::optimizer::analyzer::AnalyzerRule; use datafusion::prelude::SessionContext; -use datafusion_common::config::ConfigOptions; -use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::Result; -use datafusion_expr::{col, lit, Expr, LogicalPlan, LogicalPlanBuilder}; -use datafusion_optimizer::analyzer::AnalyzerRule; use std::sync::{Arc, Mutex}; /// This example demonstrates how to add your own [`AnalyzerRule`] to diff --git a/datafusion-examples/examples/composed_extension_codec.rs b/datafusion-examples/examples/composed_extension_codec.rs index 5c34eccf26e1..4baefcae507f 100644 --- a/datafusion-examples/examples/composed_extension_codec.rs +++ b/datafusion-examples/examples/composed_extension_codec.rs @@ -36,11 +36,11 @@ use std::ops::Deref; use std::sync::Arc; use datafusion::common::Result; +use datafusion::common::{internal_err, DataFusionError}; +use datafusion::logical_expr::registry::FunctionRegistry; +use datafusion::logical_expr::{AggregateUDF, ScalarUDF}; use datafusion::physical_plan::{DisplayAs, ExecutionPlan}; use datafusion::prelude::SessionContext; -use datafusion_common::{internal_err, DataFusionError}; -use datafusion_expr::registry::FunctionRegistry; -use datafusion_expr::{AggregateUDF, ScalarUDF}; use datafusion_proto::physical_plan::{AsExecutionPlan, PhysicalExtensionCodec}; use datafusion_proto::protobuf; diff --git a/datafusion-examples/examples/csv_json_opener.rs b/datafusion-examples/examples/csv_json_opener.rs index 334e4c83404f..0b71432f621c 100644 --- a/datafusion-examples/examples/csv_json_opener.rs +++ b/datafusion-examples/examples/csv_json_opener.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use arrow_schema::{DataType, Field, Schema}; +use arrow::datatypes::{DataType, Field, Schema}; use datafusion::{ assert_batches_eq, datasource::{ diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_file_format.rs index 95168597ebaa..5493fa21968e 100644 --- a/datafusion-examples/examples/custom_file_format.rs +++ b/datafusion-examples/examples/custom_file_format.rs @@ -19,11 +19,12 @@ use std::{any::Any, sync::Arc}; use arrow::{ array::{AsArray, RecordBatch, StringArray, UInt8Array}, - datatypes::UInt64Type, + datatypes::{DataType, Field, Schema, SchemaRef, UInt64Type}, }; -use arrow_schema::{DataType, Field, Schema, SchemaRef}; +use datafusion::common::{GetExt, Statistics}; use datafusion::execution::session_state::SessionStateBuilder; use datafusion::physical_expr::LexRequirement; +use datafusion::physical_expr::PhysicalExpr; use datafusion::{ datasource::{ file_format::{ @@ -38,8 +39,6 @@ use datafusion::{ physical_plan::ExecutionPlan, prelude::SessionContext, }; -use datafusion_common::{GetExt, Statistics}; -use datafusion_physical_expr::PhysicalExpr; use object_store::{ObjectMeta, ObjectStore}; use tempfile::tempdir; diff --git a/datafusion-examples/examples/dataframe.rs b/datafusion-examples/examples/dataframe.rs index c93d74dd85b0..6f61c164f41d 100644 --- a/datafusion-examples/examples/dataframe.rs +++ b/datafusion-examples/examples/dataframe.rs @@ -17,15 +17,15 @@ use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::common::config::CsvOptions; +use datafusion::common::parsers::CompressionTypeVariant; +use datafusion::common::DataFusionError; +use datafusion::common::ScalarValue; use datafusion::dataframe::DataFrameWriteOptions; use datafusion::error::Result; use datafusion::functions_aggregate::average::avg; use datafusion::functions_aggregate::min_max::max; use datafusion::prelude::*; -use datafusion_common::config::CsvOptions; -use datafusion_common::parsers::CompressionTypeVariant; -use datafusion_common::DataFusionError; -use datafusion_common::ScalarValue; use std::fs::File; use std::io::Write; use std::sync::Arc; diff --git a/datafusion-examples/examples/date_time_functions.rs b/datafusion-examples/examples/date_time_functions.rs index d9e53e61d1ff..dbe9970439df 100644 --- a/datafusion-examples/examples/date_time_functions.rs +++ b/datafusion-examples/examples/date_time_functions.rs @@ -22,9 +22,9 @@ use datafusion::arrow::array::StringArray; use datafusion::arrow::datatypes::{DataType, Field, Schema}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::assert_batches_eq; +use datafusion::common::assert_contains; use datafusion::error::Result; use datafusion::prelude::*; -use datafusion_common::assert_contains; #[tokio::main] async fn main() -> Result<()> { diff --git a/datafusion-examples/examples/deserialize_to_struct.rs b/datafusion-examples/examples/deserialize_to_struct.rs index 5ac3ee6187d1..d6655b3b654f 100644 --- a/datafusion-examples/examples/deserialize_to_struct.rs +++ b/datafusion-examples/examples/deserialize_to_struct.rs @@ -17,9 +17,9 @@ use arrow::array::{AsArray, PrimitiveArray}; use arrow::datatypes::{Float64Type, Int32Type}; +use datafusion::common::assert_batches_eq; use datafusion::error::Result; use datafusion::prelude::*; -use datafusion_common::assert_batches_eq; use futures::StreamExt; /// This example shows how to convert query results into Rust structs by using diff --git a/datafusion-examples/examples/expr_api.rs b/datafusion-examples/examples/expr_api.rs index 943e5d5e027c..6bfde2ebbf52 100644 --- a/datafusion-examples/examples/expr_api.rs +++ b/datafusion-examples/examples/expr_api.rs @@ -22,20 +22,20 @@ use arrow::array::{BooleanArray, Int32Array, Int8Array}; use arrow::record_batch::RecordBatch; use datafusion::arrow::datatypes::{DataType, Field, Schema, TimeUnit}; +use datafusion::common::tree_node::{Transformed, TreeNode}; use datafusion::common::DFSchema; +use datafusion::common::{ScalarValue, ToDFSchema}; use datafusion::error::Result; use datafusion::functions_aggregate::first_last::first_value_udaf; +use datafusion::logical_expr::execution_props::ExecutionProps; +use datafusion::logical_expr::expr::BinaryExpr; +use datafusion::logical_expr::interval_arithmetic::Interval; +use datafusion::logical_expr::simplify::SimplifyContext; +use datafusion::logical_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator}; +use datafusion::optimizer::analyzer::type_coercion::TypeCoercionRewriter; use datafusion::optimizer::simplify_expressions::ExprSimplifier; use datafusion::physical_expr::{analyze, AnalysisContext, ExprBoundaries}; use datafusion::prelude::*; -use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{ScalarValue, ToDFSchema}; -use datafusion_expr::execution_props::ExecutionProps; -use datafusion_expr::expr::BinaryExpr; -use datafusion_expr::interval_arithmetic::Interval; -use datafusion_expr::simplify::SimplifyContext; -use datafusion_expr::{ColumnarValue, ExprFunctionExt, ExprSchemable, Operator}; -use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter; /// This example demonstrates the DataFusion [`Expr`] API. /// @@ -357,7 +357,7 @@ fn type_coercion_demo() -> Result<()> { // Evaluation with an expression that has not been type coerced cannot succeed. let props = ExecutionProps::default(); let physical_expr = - datafusion_physical_expr::create_physical_expr(&expr, &df_schema, &props)?; + datafusion::physical_expr::create_physical_expr(&expr, &df_schema, &props)?; let e = physical_expr.evaluate(&batch).unwrap_err(); assert!(e .find_root() @@ -373,7 +373,7 @@ fn type_coercion_demo() -> Result<()> { let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); let simplifier = ExprSimplifier::new(context); let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; - let physical_expr = datafusion_physical_expr::create_physical_expr( + let physical_expr = datafusion::physical_expr::create_physical_expr( &coerced_expr, &df_schema, &props, @@ -385,7 +385,7 @@ fn type_coercion_demo() -> Result<()> { .clone() .rewrite(&mut TypeCoercionRewriter::new(&df_schema))? .data; - let physical_expr = datafusion_physical_expr::create_physical_expr( + let physical_expr = datafusion::physical_expr::create_physical_expr( &coerced_expr, &df_schema, &props, @@ -413,7 +413,7 @@ fn type_coercion_demo() -> Result<()> { } })? .data; - let physical_expr = datafusion_physical_expr::create_physical_expr( + let physical_expr = datafusion::physical_expr::create_physical_expr( &coerced_expr, &df_schema, &props, diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/file_stream_provider.rs index e4fd937fd373..e6c59d57e98d 100644 --- a/datafusion-examples/examples/file_stream_provider.rs +++ b/datafusion-examples/examples/file_stream_provider.rs @@ -18,7 +18,7 @@ #[cfg(not(target_os = "windows"))] mod non_windows { use datafusion::assert_batches_eq; - use datafusion_common::instant::Instant; + use datafusion::common::instant::Instant; use std::fs::{File, OpenOptions}; use std::io::Write; use std::path::PathBuf; @@ -27,19 +27,18 @@ mod non_windows { use std::thread; use std::time::Duration; - use arrow::datatypes::{DataType, Field, Schema}; - use arrow_schema::SchemaRef; + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use futures::StreamExt; use nix::sys::stat; use nix::unistd; use tempfile::TempDir; use tokio::task::JoinSet; + use datafusion::common::{exec_err, Result}; use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::datasource::TableProvider; + use datafusion::logical_expr::SortExpr; use datafusion::prelude::{SessionConfig, SessionContext}; - use datafusion_common::{exec_err, Result}; - use datafusion_expr::SortExpr; // Number of lines written to FIFO const TEST_BATCH_SIZE: usize = 5; @@ -157,7 +156,7 @@ mod non_windows { ])); // Specify the ordering: - let order = vec![vec![datafusion_expr::col("a1").sort(true, false)]]; + let order = vec![vec![datafusion::logical_expr::col("a1").sort(true, false)]]; let provider = fifo_table(schema.clone(), fifo_path, order.clone()); ctx.register_table("fifo", provider)?; @@ -189,7 +188,7 @@ mod non_windows { } #[tokio::main] -async fn main() -> datafusion_common::Result<()> { +async fn main() -> datafusion::error::Result<()> { #[cfg(target_os = "windows")] { println!("file_stream_provider example does not work on windows"); diff --git a/datafusion-examples/examples/flight/flight_sql_server.rs b/datafusion-examples/examples/flight/flight_sql_server.rs index 2e46daf7cb4e..54e8de7177cb 100644 --- a/datafusion-examples/examples/flight/flight_sql_server.rs +++ b/datafusion-examples/examples/flight/flight_sql_server.rs @@ -16,6 +16,7 @@ // under the License. use arrow::array::{ArrayRef, StringArray}; +use arrow::datatypes::{DataType, Field, Schema}; use arrow::ipc::writer::IpcWriteOptions; use arrow::record_batch::RecordBatch; use arrow_flight::encode::FlightDataEncoderBuilder; @@ -32,7 +33,6 @@ use arrow_flight::{ Action, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse, IpcMessage, SchemaAsIpc, Ticket, }; -use arrow_schema::{DataType, Field, Schema}; use dashmap::DashMap; use datafusion::logical_expr::LogicalPlan; use datafusion::prelude::{DataFrame, ParquetReadOptions, SessionConfig, SessionContext}; diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/function_factory.rs index 58ffa060ebaa..06367f5c09e3 100644 --- a/datafusion-examples/examples/function_factory.rs +++ b/datafusion-examples/examples/function_factory.rs @@ -15,20 +15,21 @@ // specific language governing permissions and limitations // under the License. -use std::result::Result as RResult; -use std::sync::Arc; - +use arrow::datatypes::DataType; +use datafusion::common::tree_node::{Transformed, TreeNode}; +use datafusion::common::{exec_err, internal_err, DataFusionError}; use datafusion::error::Result; use datafusion::execution::context::{ FunctionFactory, RegisterFunction, SessionContext, SessionState, }; -use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{exec_err, internal_err, DataFusionError}; -use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; -use datafusion_expr::sort_properties::{ExprProperties, SortProperties}; -use datafusion_expr::{ - CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, +use datafusion::logical_expr::simplify::{ExprSimplifyResult, SimplifyInfo}; +use datafusion::logical_expr::sort_properties::{ExprProperties, SortProperties}; +use datafusion::logical_expr::{ + ColumnarValue, CreateFunction, Expr, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, + Signature, Volatility, }; +use std::result::Result as RResult; +use std::sync::Arc; /// This example shows how to utilize [FunctionFactory] to implement simple /// SQL-macro like functions using a `CREATE FUNCTION` statement. The same @@ -111,7 +112,7 @@ struct ScalarFunctionWrapper { name: String, expr: Expr, signature: Signature, - return_type: arrow_schema::DataType, + return_type: DataType, } impl ScalarUDFImpl for ScalarFunctionWrapper { @@ -127,17 +128,11 @@ impl ScalarUDFImpl for ScalarFunctionWrapper { &self.signature } - fn return_type( - &self, - _arg_types: &[arrow_schema::DataType], - ) -> Result { + fn return_type(&self, _arg_types: &[DataType]) -> Result { Ok(self.return_type.clone()) } - fn invoke_with_args( - &self, - _args: ScalarFunctionArgs, - ) -> Result { + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { // Since this function is always simplified to another expression, it // should never actually be invoked internal_err!("This function should not get invoked!") @@ -226,10 +221,7 @@ impl TryFrom for ScalarFunctionWrapper { .into_iter() .map(|a| a.data_type) .collect(), - definition - .params - .behavior - .unwrap_or(datafusion_expr::Volatility::Volatile), + definition.params.behavior.unwrap_or(Volatility::Volatile), ), }) } diff --git a/datafusion-examples/examples/optimizer_rule.rs b/datafusion-examples/examples/optimizer_rule.rs index e8a272f28318..0206c7cd157e 100644 --- a/datafusion-examples/examples/optimizer_rule.rs +++ b/datafusion-examples/examples/optimizer_rule.rs @@ -16,16 +16,16 @@ // under the License. use arrow::array::{ArrayRef, Int32Array, RecordBatch, StringArray}; -use arrow_schema::DataType; -use datafusion::prelude::SessionContext; -use datafusion_common::tree_node::{Transformed, TreeNode}; -use datafusion_common::{assert_batches_eq, Result, ScalarValue}; -use datafusion_expr::{ +use arrow::datatypes::DataType; +use datafusion::common::tree_node::{Transformed, TreeNode}; +use datafusion::common::{assert_batches_eq, Result, ScalarValue}; +use datafusion::logical_expr::{ BinaryExpr, ColumnarValue, Expr, LogicalPlan, Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility, }; -use datafusion_optimizer::optimizer::ApplyOrder; -use datafusion_optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion::optimizer::ApplyOrder; +use datafusion::optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion::prelude::SessionContext; use std::any::Any; use std::sync::Arc; diff --git a/datafusion-examples/examples/parquet_exec_visitor.rs b/datafusion-examples/examples/parquet_exec_visitor.rs index eeb288beb0df..2f1b6333373e 100644 --- a/datafusion-examples/examples/parquet_exec_visitor.rs +++ b/datafusion-examples/examples/parquet_exec_visitor.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::{ListingOptions, PartitionedFile}; use datafusion::datasource::physical_plan::ParquetExec; +use datafusion::error::DataFusionError; use datafusion::execution::context::SessionContext; use datafusion::physical_plan::metrics::MetricValue; use datafusion::physical_plan::{ @@ -88,7 +89,7 @@ struct ParquetExecVisitor { } impl ExecutionPlanVisitor for ParquetExecVisitor { - type Error = datafusion_common::DataFusionError; + type Error = DataFusionError; /// This function is called once for every node in the tree. /// Based on your needs implement either `pre_visit` (visit each node before its children/inputs) diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs index d6e17764442d..67fa038e2d31 100644 --- a/datafusion-examples/examples/parquet_index.rs +++ b/datafusion-examples/examples/parquet_index.rs @@ -19,37 +19,37 @@ use arrow::array::{ Array, ArrayRef, AsArray, BooleanArray, Int32Array, RecordBatch, StringArray, UInt64Array, }; -use arrow::datatypes::Int32Type; +use arrow::datatypes::{Int32Type, SchemaRef}; use arrow::util::pretty::pretty_format_batches; -use arrow_schema::SchemaRef; use async_trait::async_trait; use datafusion::catalog::Session; +use datafusion::common::{ + internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue, +}; use datafusion::datasource::listing::PartitionedFile; use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec}; use datafusion::datasource::TableProvider; use datafusion::execution::object_store::ObjectStoreUrl; +use datafusion::logical_expr::{ + utils::conjunction, TableProviderFilterPushDown, TableType, +}; use datafusion::parquet::arrow::arrow_reader::statistics::StatisticsConverter; use datafusion::parquet::arrow::{ arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter, }; +use datafusion::physical_expr::PhysicalExpr; use datafusion::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::*; -use datafusion_common::{ - internal_datafusion_err, DFSchema, DataFusionError, Result, ScalarValue, -}; -use datafusion_expr::{utils::conjunction, TableProviderFilterPushDown, TableType}; -use datafusion_physical_expr::PhysicalExpr; use std::any::Any; use std::collections::HashSet; use std::fmt::Display; -use std::fs::{self, DirEntry, File}; +use std::fs; +use std::fs::{DirEntry, File}; use std::ops::Range; use std::path::{Path, PathBuf}; -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; use tempfile::TempDir; use url::Url; @@ -233,7 +233,7 @@ impl TableProvider for IndexTableProvider { .transpose()? // if there are no filters, use a literal true to have a predicate // that always evaluates to true we can pass to the index - .unwrap_or_else(|| datafusion_physical_expr::expressions::lit(true)); + .unwrap_or_else(|| datafusion::physical_expr::expressions::lit(true)); // Use the index to find the files that might have data that matches the // predicate. Any file that can not have data that matches the predicate diff --git a/datafusion-examples/examples/parse_sql_expr.rs b/datafusion-examples/examples/parse_sql_expr.rs index d8f0778e19e3..5387e7c4a05d 100644 --- a/datafusion-examples/examples/parse_sql_expr.rs +++ b/datafusion-examples/examples/parse_sql_expr.rs @@ -16,14 +16,14 @@ // under the License. use arrow::datatypes::{DataType, Field, Schema}; +use datafusion::common::DFSchema; +use datafusion::logical_expr::{col, lit}; +use datafusion::sql::unparser::Unparser; use datafusion::{ assert_batches_eq, error::Result, prelude::{ParquetReadOptions, SessionContext}, }; -use datafusion_common::DFSchema; -use datafusion_expr::{col, lit}; -use datafusion_sql::unparser::Unparser; /// This example demonstrates the programmatic parsing of SQL expressions using /// the DataFusion [`SessionContext::parse_sql_expr`] API or the [`DataFrame::parse_sql_expr`] API. diff --git a/datafusion-examples/examples/plan_to_sql.rs b/datafusion-examples/examples/plan_to_sql.rs index cf1202498416..54483b143a16 100644 --- a/datafusion-examples/examples/plan_to_sql.rs +++ b/datafusion-examples/examples/plan_to_sql.rs @@ -15,24 +15,24 @@ // specific language governing permissions and limitations // under the License. +use datafusion::common::DFSchemaRef; use datafusion::error::Result; use datafusion::logical_expr::sqlparser::ast::Statement; -use datafusion::prelude::*; -use datafusion::sql::unparser::expr_to_sql; -use datafusion_common::DFSchemaRef; -use datafusion_expr::{ +use datafusion::logical_expr::{ Extension, LogicalPlan, LogicalPlanBuilder, UserDefinedLogicalNode, UserDefinedLogicalNodeCore, }; -use datafusion_sql::unparser::ast::{ +use datafusion::prelude::*; +use datafusion::sql::unparser::ast::{ DerivedRelationBuilder, QueryBuilder, RelationBuilder, SelectBuilder, }; -use datafusion_sql::unparser::dialect::CustomDialectBuilder; -use datafusion_sql::unparser::extension_unparser::UserDefinedLogicalNodeUnparser; -use datafusion_sql::unparser::extension_unparser::{ +use datafusion::sql::unparser::dialect::CustomDialectBuilder; +use datafusion::sql::unparser::expr_to_sql; +use datafusion::sql::unparser::extension_unparser::UserDefinedLogicalNodeUnparser; +use datafusion::sql::unparser::extension_unparser::{ UnparseToStatementResult, UnparseWithinStatementResult, }; -use datafusion_sql::unparser::{plan_to_sql, Unparser}; +use datafusion::sql::unparser::{plan_to_sql, Unparser}; use std::fmt; use std::sync::Arc; diff --git a/datafusion-examples/examples/planner_api.rs b/datafusion-examples/examples/planner_api.rs index 35cf766ba1af..e52f0d78682f 100644 --- a/datafusion-examples/examples/planner_api.rs +++ b/datafusion-examples/examples/planner_api.rs @@ -16,10 +16,10 @@ // under the License. use datafusion::error::Result; +use datafusion::logical_expr::{LogicalPlan, PlanType}; use datafusion::physical_plan::displayable; use datafusion::physical_planner::DefaultPhysicalPlanner; use datafusion::prelude::*; -use datafusion_expr::{LogicalPlan, PlanType}; /// This example demonstrates the process of converting logical plan /// into physical execution plans using DataFusion. diff --git a/datafusion-examples/examples/pruning.rs b/datafusion-examples/examples/pruning.rs index c090cd2bcca9..4c802bcdbda0 100644 --- a/datafusion-examples/examples/pruning.rs +++ b/datafusion-examples/examples/pruning.rs @@ -15,6 +15,9 @@ // specific language governing permissions and limitations // under the License. +use std::collections::HashSet; +use std::sync::Arc; + use arrow::array::{ArrayRef, BooleanArray, Int32Array}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::common::{DFSchema, ScalarValue}; @@ -22,8 +25,6 @@ use datafusion::execution::context::ExecutionProps; use datafusion::physical_expr::create_physical_expr; use datafusion::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; use datafusion::prelude::*; -use std::collections::HashSet; -use std::sync::Arc; /// This example shows how to use DataFusion's `PruningPredicate` to prove /// filter expressions can never be true based on statistics such as min/max diff --git a/datafusion-examples/examples/regexp.rs b/datafusion-examples/examples/regexp.rs index 5419efd2faea..12d115b9b502 100644 --- a/datafusion-examples/examples/regexp.rs +++ b/datafusion-examples/examples/regexp.rs @@ -16,9 +16,9 @@ // specific language governing permissions and limitations // under the License. +use datafusion::common::{assert_batches_eq, assert_contains}; use datafusion::error::Result; use datafusion::prelude::*; -use datafusion_common::{assert_batches_eq, assert_contains}; /// This example demonstrates how to use the regexp_* functions /// diff --git a/datafusion-examples/examples/remote_catalog.rs b/datafusion-examples/examples/remote_catalog.rs index 38629328d71c..e44b0c9569f9 100644 --- a/datafusion-examples/examples/remote_catalog.rs +++ b/datafusion-examples/examples/remote_catalog.rs @@ -30,18 +30,18 @@ /// [Unity]: https://github.com/unitycatalog/unitycatalog /// [Hive]: https://hive.apache.org/ use arrow::array::record_batch; -use arrow_schema::{Field, Fields, Schema, SchemaRef}; +use arrow::datatypes::{Field, Fields, Schema, SchemaRef}; use async_trait::async_trait; use datafusion::catalog::TableProvider; +use datafusion::catalog::{AsyncSchemaProvider, Session}; use datafusion::common::Result; +use datafusion::common::{assert_batches_eq, internal_datafusion_err, plan_err}; use datafusion::execution::SendableRecordBatchStream; +use datafusion::logical_expr::{Expr, TableType}; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; use datafusion::physical_plan::ExecutionPlan; use datafusion::prelude::{DataFrame, SessionContext}; -use datafusion_catalog::{AsyncSchemaProvider, Session}; -use datafusion_common::{assert_batches_eq, internal_datafusion_err, plan_err}; -use datafusion_expr::{Expr, TableType}; use futures::TryStreamExt; use std::any::Any; use std::sync::Arc; diff --git a/datafusion-examples/examples/simple_udaf.rs b/datafusion-examples/examples/simple_udaf.rs index ef97bf9763b0..82bde7c034a5 100644 --- a/datafusion-examples/examples/simple_udaf.rs +++ b/datafusion-examples/examples/simple_udaf.rs @@ -20,9 +20,9 @@ use datafusion::arrow::{ array::ArrayRef, array::Float32Array, datatypes::DataType, record_batch::RecordBatch, }; +use datafusion::common::cast::as_float64_array; use datafusion::{error::Result, physical_plan::Accumulator}; use datafusion::{logical_expr::Volatility, prelude::*, scalar::ScalarValue}; -use datafusion_common::cast::as_float64_array; use std::sync::Arc; // create local session context with an in-memory table diff --git a/datafusion-examples/examples/simple_udf.rs b/datafusion-examples/examples/simple_udf.rs index 6879a17f34be..5612e0939f70 100644 --- a/datafusion-examples/examples/simple_udf.rs +++ b/datafusion-examples/examples/simple_udf.rs @@ -24,10 +24,10 @@ use datafusion::{ logical_expr::Volatility, }; +use datafusion::common::cast::as_float64_array; use datafusion::error::Result; +use datafusion::logical_expr::ColumnarValue; use datafusion::prelude::*; -use datafusion_common::cast::as_float64_array; -use datafusion_expr::ColumnarValue; use std::sync::Arc; /// create local execution context with an in-memory table: diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/simple_udtf.rs index 7cf1ce87690e..75c7645c18a5 100644 --- a/datafusion-examples/examples/simple_udtf.rs +++ b/datafusion-examples/examples/simple_udtf.rs @@ -21,17 +21,17 @@ use async_trait::async_trait; use datafusion::arrow::datatypes::SchemaRef; use datafusion::arrow::record_batch::RecordBatch; use datafusion::catalog::Session; +use datafusion::catalog::TableFunctionImpl; +use datafusion::common::{plan_err, ScalarValue}; use datafusion::datasource::TableProvider; use datafusion::error::Result; use datafusion::execution::context::ExecutionProps; +use datafusion::logical_expr::simplify::SimplifyContext; +use datafusion::logical_expr::{Expr, TableType}; +use datafusion::optimizer::simplify_expressions::ExprSimplifier; use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::ExecutionPlan; -use datafusion::prelude::SessionContext; -use datafusion_catalog::TableFunctionImpl; -use datafusion_common::{plan_err, ScalarValue}; -use datafusion_expr::simplify::SimplifyContext; -use datafusion_expr::{Expr, TableType}; -use datafusion_optimizer::simplify_expressions::ExprSimplifier; +use datafusion::prelude::*; use std::fs::File; use std::io::Seek; use std::path::Path; diff --git a/datafusion-examples/examples/simple_udwf.rs b/datafusion-examples/examples/simple_udwf.rs index 22dfbbbf0c3a..1736ff00bd70 100644 --- a/datafusion-examples/examples/simple_udwf.rs +++ b/datafusion-examples/examples/simple_udwf.rs @@ -19,14 +19,13 @@ use std::sync::Arc; use arrow::{ array::{ArrayRef, AsArray, Float64Array}, - datatypes::Float64Type, + datatypes::{DataType, Float64Type}, }; -use arrow_schema::DataType; +use datafusion::common::ScalarValue; use datafusion::error::Result; +use datafusion::logical_expr::{PartitionEvaluator, Volatility, WindowFrame}; use datafusion::prelude::*; -use datafusion_common::ScalarValue; -use datafusion_expr::{PartitionEvaluator, Volatility, WindowFrame}; // create local execution context with `cars.csv` registered as a table named `cars` async fn create_context() -> Result { diff --git a/datafusion-examples/examples/sql_analysis.rs b/datafusion-examples/examples/sql_analysis.rs index 2158b8e4b016..d3826026a972 100644 --- a/datafusion-examples/examples/sql_analysis.rs +++ b/datafusion-examples/examples/sql_analysis.rs @@ -23,13 +23,13 @@ use std::sync::Arc; +use datafusion::common::tree_node::{TreeNode, TreeNodeRecursion}; use datafusion::common::Result; +use datafusion::logical_expr::LogicalPlan; use datafusion::{ datasource::MemTable, execution::context::{SessionConfig, SessionContext}, }; -use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; -use datafusion_expr::LogicalPlan; use test_utils::tpcds::tpcds_schemas; /// Counts the total number of joins in a plan diff --git a/datafusion-examples/examples/sql_dialect.rs b/datafusion-examples/examples/sql_dialect.rs index 259f38216b80..16aa5be02635 100644 --- a/datafusion-examples/examples/sql_dialect.rs +++ b/datafusion-examples/examples/sql_dialect.rs @@ -18,7 +18,7 @@ use std::fmt::Display; use datafusion::error::Result; -use datafusion_sql::{ +use datafusion::sql::{ parser::{CopyToSource, CopyToStatement, DFParser, Statement}, sqlparser::{keywords::Keyword, parser::ParserError, tokenizer::Token}, }; diff --git a/datafusion-examples/examples/sql_frontend.rs b/datafusion-examples/examples/sql_frontend.rs index 839ee95eb181..c7afb79084bf 100644 --- a/datafusion-examples/examples/sql_frontend.rs +++ b/datafusion-examples/examples/sql_frontend.rs @@ -16,19 +16,20 @@ // under the License. use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion_common::config::ConfigOptions; -use datafusion_common::{plan_err, Result}; -use datafusion_expr::{ +use datafusion::common::plan_err; +use datafusion::config::ConfigOptions; +use datafusion::error::Result; +use datafusion::logical_expr::{ AggregateUDF, Expr, LogicalPlan, ScalarUDF, TableProviderFilterPushDown, TableSource, WindowUDF, }; -use datafusion_optimizer::{ +use datafusion::optimizer::{ Analyzer, AnalyzerRule, Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule, }; -use datafusion_sql::planner::{ContextProvider, SqlToRel}; -use datafusion_sql::sqlparser::dialect::PostgreSqlDialect; -use datafusion_sql::sqlparser::parser::Parser; -use datafusion_sql::TableReference; +use datafusion::sql::planner::{ContextProvider, SqlToRel}; +use datafusion::sql::sqlparser::dialect::PostgreSqlDialect; +use datafusion::sql::sqlparser::parser::Parser; +use datafusion::sql::TableReference; use std::any::Any; use std::sync::Arc; diff --git a/datafusion-examples/examples/sql_query.rs b/datafusion-examples/examples/sql_query.rs index a6e7fe91dda5..0ac203cfb7e7 100644 --- a/datafusion-examples/examples/sql_query.rs +++ b/datafusion-examples/examples/sql_query.rs @@ -18,12 +18,12 @@ use datafusion::arrow::array::{UInt64Array, UInt8Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::arrow::record_batch::RecordBatch; +use datafusion::common::{assert_batches_eq, exec_datafusion_err}; use datafusion::datasource::file_format::parquet::ParquetFormat; use datafusion::datasource::listing::ListingOptions; use datafusion::datasource::MemTable; use datafusion::error::{DataFusionError, Result}; -use datafusion::prelude::SessionContext; -use datafusion_common::{assert_batches_eq, exec_datafusion_err}; +use datafusion::prelude::*; use object_store::local::LocalFileSystem; use std::path::Path; use std::sync::Arc; diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 33a90017bd7e..32b7213d952f 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -108,38 +108,51 @@ use crate::{DataFusionError, Result}; /// ``` /// /// NB: Misplaced commas may result in nonsensical errors -#[macro_export] macro_rules! config_namespace { ( - $(#[doc = $struct_d:tt])* - $vis:vis struct $struct_name:ident { - $( - $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, $(warn = $warn: expr,)? $(transform = $transform:expr,)? default = $default:expr - )*$(,)* - } + $(#[doc = $struct_d:tt])* // Struct-level documentation attributes + $(#[deprecated($($struct_depr:tt)*)])? // Optional struct-level deprecated attribute + $(#[allow($($struct_de:tt)*)])? + $vis:vis struct $struct_name:ident { + $( + $(#[doc = $d:tt])* // Field-level documentation attributes + $(#[deprecated($($field_depr:tt)*)])? // Optional field-level deprecated attribute + $(#[allow($($field_de:tt)*)])? + $field_vis:vis $field_name:ident : $field_type:ty, + $(warn = $warn:expr,)? + $(transform = $transform:expr,)? + default = $default:expr + )*$(,)* + } ) => { - - $(#[doc = $struct_d])* + $(#[doc = $struct_d])* // Apply struct documentation + $(#[deprecated($($struct_depr)*)])? // Apply struct deprecation + $(#[allow($($struct_de)*)])? #[derive(Debug, Clone, PartialEq)] - $vis struct $struct_name{ + $vis struct $struct_name { $( - $(#[doc = $d])* - $field_vis $field_name : $field_type, + $(#[doc = $d])* // Apply field documentation + $(#[deprecated($($field_depr)*)])? // Apply field deprecation + $(#[allow($($field_de)*)])? + $field_vis $field_name: $field_type, )* } impl ConfigField for $struct_name { fn set(&mut self, key: &str, value: &str) -> Result<()> { let (key, rem) = key.split_once('.').unwrap_or((key, "")); - match key { $( - stringify!($field_name) => { - $(let value = $transform(value);)? - $(log::warn!($warn);)? - self.$field_name.set(rem, value.as_ref()) - }, + stringify!($field_name) => { + // Safely apply deprecated attribute if present + // $(#[allow(deprecated)])? + { + $(let value = $transform(value);)? // Apply transformation if specified + $(log::warn!($warn);)? // Log warning if specified + #[allow(deprecated)] + self.$field_name.set(rem, value.as_ref()) + } + }, )* _ => return _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -149,15 +162,16 @@ macro_rules! config_namespace { fn visit(&self, v: &mut V, key_prefix: &str, _description: &'static str) { $( - let key = format!(concat!("{}.", stringify!($field_name)), key_prefix); - let desc = concat!($($d),*).trim(); - self.$field_name.visit(v, key.as_str(), desc); + let key = format!(concat!("{}.", stringify!($field_name)), key_prefix); + let desc = concat!($($d),*).trim(); + #[allow(deprecated)] + self.$field_name.visit(v, key.as_str(), desc); )* } } - impl Default for $struct_name { fn default() -> Self { + #[allow(deprecated)] Self { $($field_name: $default),* } @@ -467,6 +481,9 @@ config_namespace! { /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting + /// max_statistics_size is deprecated, currently it is not being used + // TODO: remove once deprecated + #[deprecated(since = "45.0.0", note = "Setting does not do anything")] pub max_statistics_size: Option, default = Some(4096) /// (writing) Target maximum number of rows in each row group (defaults to 1M @@ -1598,19 +1615,23 @@ impl ConfigField for TableParquetOptions { macro_rules! config_namespace_with_hashmap { ( $(#[doc = $struct_d:tt])* + $(#[deprecated($($struct_depr:tt)*)])? // Optional struct-level deprecated attribute $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* + $(#[deprecated($($field_depr:tt)*)])? // Optional field-level deprecated attribute $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { $(#[doc = $struct_d])* + $(#[deprecated($($struct_depr)*)])? // Apply struct deprecation #[derive(Debug, Clone, PartialEq)] $vis struct $struct_name{ $( $(#[doc = $d])* + $(#[deprecated($($field_depr)*)])? // Apply field deprecation $field_vis $field_name : $field_type, )* } @@ -1621,6 +1642,8 @@ macro_rules! config_namespace_with_hashmap { match key { $( stringify!($field_name) => { + // Handle deprecated fields + #[allow(deprecated)] // Allow deprecated fields $(let value = $transform(value);)? self.$field_name.set(rem, value.as_ref()) }, @@ -1635,6 +1658,8 @@ macro_rules! config_namespace_with_hashmap { $( let key = format!(concat!("{}.", stringify!($field_name)), key_prefix); let desc = concat!($($d),*).trim(); + // Handle deprecated fields + #[allow(deprecated)] self.$field_name.visit(v, key.as_str(), desc); )* } @@ -1642,6 +1667,7 @@ macro_rules! config_namespace_with_hashmap { impl Default for $struct_name { fn default() -> Self { + #[allow(deprecated)] Self { $($field_name: $default),* } @@ -1653,7 +1679,7 @@ macro_rules! config_namespace_with_hashmap { let parts: Vec<&str> = key.splitn(2, "::").collect(); match parts.as_slice() { [inner_key, hashmap_key] => { - // Get or create the ColumnOptions for the specified column + // Get or create the struct for the specified key let inner_value = self .entry((*hashmap_key).to_owned()) .or_insert_with($struct_name::default); @@ -1669,6 +1695,7 @@ macro_rules! config_namespace_with_hashmap { $( let key = format!("{}.{field}::{}", key_prefix, column_name, field = stringify!($field_name)); let desc = concat!($($d),*).trim(); + #[allow(deprecated)] col_options.$field_name.visit(v, key.as_str(), desc); )* } @@ -1720,6 +1747,9 @@ config_namespace_with_hashmap! { /// Sets max statistics size for the column path. If NULL, uses /// default parquet options + /// max_statistics_size is deprecated, currently it is not being used + // TODO: remove once deprecated + #[deprecated(since = "45.0.0", note = "Setting does not do anything")] pub max_statistics_size: Option, default = None } } diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index 3f06e11bb376..6a717d3c0c60 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -26,6 +26,7 @@ use crate::{ }; use arrow_schema::Schema; +// TODO: handle once deprecated #[allow(deprecated)] use parquet::{ arrow::ARROW_SCHEMA_META_KEY, @@ -157,6 +158,9 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv); } + // max_statistics_size is deprecated, currently it is not being used + // TODO: remove once deprecated + #[allow(deprecated)] if let Some(max_statistics_size) = options.max_statistics_size { builder = { #[allow(deprecated)] @@ -202,6 +206,7 @@ impl ParquetOptions { /// /// Note that this method does not include the key_value_metadata from [`TableParquetOptions`]. pub fn into_writer_properties_builder(&self) -> Result { + #[allow(deprecated)] let ParquetOptions { data_pagesize_limit, write_batch_size, @@ -452,6 +457,7 @@ mod tests { fn column_options_with_non_defaults( src_col_defaults: &ParquetOptions, ) -> ParquetColumnOptions { + #[allow(deprecated)] // max_statistics_size ParquetColumnOptions { compression: Some("zstd(22)".into()), dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v), @@ -472,6 +478,7 @@ mod tests { "1.0" }; + #[allow(deprecated)] // max_statistics_size ParquetOptions { data_pagesize_limit: 42, write_batch_size: 42, @@ -515,6 +522,7 @@ mod tests { ) -> ParquetColumnOptions { let bloom_filter_default_props = props.bloom_filter_properties(&col); + #[allow(deprecated)] // max_statistics_size ParquetColumnOptions { bloom_filter_enabled: Some(bloom_filter_default_props.is_some()), encoding: props.encoding(&col).map(|s| s.to_string()), @@ -535,7 +543,6 @@ mod tests { ), bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp), bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv), - #[allow(deprecated)] max_statistics_size: Some(props.max_statistics_size(&col)), } } @@ -569,6 +576,7 @@ mod tests { HashMap::from([(COL_NAME.into(), configured_col_props)]) }; + #[allow(deprecated)] // max_statistics_size TableParquetOptions { global: ParquetOptions { // global options diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs index dd5736806eeb..55971f6f627c 100644 --- a/datafusion/core/src/datasource/physical_plan/csv.rs +++ b/datafusion/core/src/datasource/physical_plan/csv.rs @@ -45,6 +45,9 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; +use datafusion_physical_plan::projection::{ + all_alias_free_columns, new_projections_for_columns, ProjectionExec, +}; use futures::{StreamExt, TryStreamExt}; use object_store::buffered::BufWriter; use object_store::{GetOptions, GetResultPayload, ObjectStore}; @@ -479,6 +482,36 @@ impl ExecutionPlan for CsvExec { cache: self.cache.clone(), })) } + + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If there is any non-column or alias-carrier expression, Projection should not be removed. + // This process can be moved into CsvExec, but it would be an overlap of their responsibility. + Ok(all_alias_free_columns(projection.expr()).then(|| { + let mut file_scan = self.base_config().clone(); + let new_projections = new_projections_for_columns( + projection, + &file_scan + .projection + .unwrap_or((0..self.schema().fields().len()).collect()), + ); + file_scan.projection = Some(new_projections); + + Arc::new( + CsvExec::builder(file_scan) + .with_has_header(self.has_header()) + .with_delimeter(self.delimiter()) + .with_quote(self.quote()) + .with_escape(self.escape()) + .with_comment(self.comment()) + .with_newlines_in_values(self.newlines_in_values()) + .with_file_compression_type(self.file_compression_type) + .build(), + ) as _ + })) + } } /// A Config for [`CsvOpener`] diff --git a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs index 085f44191b8a..25bbe86e8b5c 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/mod.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/mod.rs @@ -17,6 +17,15 @@ //! [`ParquetExec`] Execution plan for reading Parquet files +mod access_plan; +mod metrics; +mod opener; +mod page_filter; +mod reader; +mod row_filter; +mod row_group_filter; +mod writer; + use std::any::Any; use std::fmt::Debug; use std::sync::Arc; @@ -27,11 +36,13 @@ use crate::datasource::physical_plan::{ parquet::page_filter::PagePruningAccessPlanFilter, DisplayAs, FileGroupPartitioner, FileScanConfig, }; +use crate::datasource::schema_adapter::{ + DefaultSchemaAdapterFactory, SchemaAdapterFactory, +}; use crate::{ config::{ConfigOptions, TableParquetOptions}, error::Result, execution::context::TaskContext, - physical_optimizer::pruning::PruningPredicate, physical_plan::{ metrics::{ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}, DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, @@ -39,33 +50,21 @@ use crate::{ }, }; +pub use access_plan::{ParquetAccessPlan, RowGroupAccess}; use arrow::datatypes::SchemaRef; use datafusion_common::Constraints; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalExpr}; +use datafusion_physical_optimizer::pruning::PruningPredicate; use datafusion_physical_plan::execution_plan::{Boundedness, EmissionType}; - -use itertools::Itertools; -use log::debug; - -mod access_plan; -mod metrics; -mod opener; -mod page_filter; -mod reader; -mod row_filter; -mod row_group_filter; -mod writer; - -use crate::datasource::schema_adapter::{ - DefaultSchemaAdapterFactory, SchemaAdapterFactory, -}; -pub use access_plan::{ParquetAccessPlan, RowGroupAccess}; pub use metrics::ParquetFileMetrics; use opener::ParquetOpener; pub use reader::{DefaultParquetFileReaderFactory, ParquetFileReaderFactory}; pub use row_filter::can_expr_be_pushed_down_with_schemas; pub use writer::plan_to_parquet; +use itertools::Itertools; +use log::debug; + /// Execution plan for reading one or more Parquet files. /// /// ```text diff --git a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs index 883f296f3b95..a1f8f0172ce4 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/opener.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/opener.rs @@ -17,6 +17,8 @@ //! [`ParquetOpener`] for opening Parquet files +use std::sync::Arc; + use crate::datasource::file_format::{ coerce_file_schema_to_string_type, coerce_file_schema_to_view_type, }; @@ -29,17 +31,18 @@ use crate::datasource::physical_plan::{ FileMeta, FileOpenFuture, FileOpener, ParquetFileMetrics, ParquetFileReaderFactory, }; use crate::datasource::schema_adapter::SchemaAdapterFactory; -use crate::physical_optimizer::pruning::PruningPredicate; + use arrow_schema::{ArrowError, SchemaRef}; use datafusion_common::{exec_err, Result}; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use datafusion_physical_optimizer::pruning::PruningPredicate; use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet; + use futures::{StreamExt, TryStreamExt}; use log::debug; use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask}; -use std::sync::Arc; /// Implements [`FileOpener`] for a parquet file pub(super) struct ParquetOpener { diff --git a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs index 4d0a8451a0d4..dcc4b0bc8150 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/page_filter.rs @@ -17,14 +17,19 @@ //! Contains code to filter entire pages +use std::collections::HashSet; +use std::sync::Arc; + use super::metrics::ParquetFileMetrics; use crate::datasource::physical_plan::parquet::ParquetAccessPlan; -use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; + use arrow::array::BooleanArray; use arrow::{array::ArrayRef, datatypes::SchemaRef}; use arrow_schema::Schema; use datafusion_common::ScalarValue; use datafusion_physical_expr::{split_conjunction, PhysicalExpr}; +use datafusion_physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; + use log::{debug, trace}; use parquet::arrow::arrow_reader::statistics::StatisticsConverter; use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex}; @@ -34,8 +39,6 @@ use parquet::{ arrow::arrow_reader::{RowSelection, RowSelector}, file::metadata::{ParquetMetaData, RowGroupMetaData}, }; -use std::collections::HashSet; -use std::sync::Arc; /// Filters a [`ParquetAccessPlan`] based on the [Parquet PageIndex], if present /// diff --git a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs index 39d86fe857f7..b008157a8324 100644 --- a/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs +++ b/datafusion/core/src/datasource/physical_plan/parquet/row_group_filter.rs @@ -15,11 +15,17 @@ // specific language governing permissions and limitations // under the License. +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; + +use super::{ParquetAccessPlan, ParquetFileMetrics}; use crate::datasource::listing::FileRange; -use crate::physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; + use arrow::{array::ArrayRef, datatypes::Schema}; use arrow_array::BooleanArray; use datafusion_common::{Column, Result, ScalarValue}; +use datafusion_physical_optimizer::pruning::{PruningPredicate, PruningStatistics}; + use parquet::arrow::arrow_reader::statistics::StatisticsConverter; use parquet::arrow::parquet_column; use parquet::basic::Type; @@ -30,10 +36,6 @@ use parquet::{ bloom_filter::Sbbf, file::metadata::RowGroupMetaData, }; -use std::collections::{HashMap, HashSet}; -use std::sync::Arc; - -use super::{ParquetAccessPlan, ParquetFileMetrics}; /// Reduces the [`ParquetAccessPlan`] based on row group level metadata. /// diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index c5874deb6ed5..54d505e1b4b9 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -17,6 +17,12 @@ //! [`SessionState`]: information required to run queries in a session +use std::any::Any; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; +use std::fmt::Debug; +use std::sync::Arc; + use crate::catalog::{CatalogProviderList, SchemaProvider, TableProviderFactory}; use crate::catalog_common::information_schema::{ InformationSchemaProvider, INFORMATION_SCHEMA, @@ -27,11 +33,9 @@ use crate::datasource::file_format::{format_as_file_type, FileFormatFactory}; use crate::datasource::provider_as_source; use crate::execution::context::{EmptySerializerRegistry, FunctionFactory, QueryPlanner}; use crate::execution::SessionStateDefaults; -use crate::physical_optimizer::optimizer::PhysicalOptimizer; use crate::physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}; + use arrow_schema::{DataType, SchemaRef}; -use async_trait::async_trait; -use chrono::{DateTime, Utc}; use datafusion_catalog::{Session, TableFunction, TableFunctionImpl}; use datafusion_common::alias::AliasGenerator; use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions}; @@ -61,20 +65,19 @@ use datafusion_optimizer::{ }; use datafusion_physical_expr::create_physical_expr; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use datafusion_physical_optimizer::optimizer::PhysicalOptimizer; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::ExecutionPlan; use datafusion_sql::parser::{DFParser, Statement}; use datafusion_sql::planner::{ContextProvider, ParserOptions, PlannerContext, SqlToRel}; + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; use itertools::Itertools; use log::{debug, info}; use object_store::ObjectStore; use sqlparser::ast::{Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias}; use sqlparser::dialect::dialect_from_str; -use std::any::Any; -use std::collections::hash_map::Entry; -use std::collections::{HashMap, HashSet}; -use std::fmt::Debug; -use std::sync::Arc; use url::Url; use uuid::Uuid; diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs index e9501bd37a8a..5d917e1673f1 100644 --- a/datafusion/core/src/lib.rs +++ b/datafusion/core/src/lib.rs @@ -207,7 +207,7 @@ //! [`QueryPlanner`]: execution::context::QueryPlanner //! [`OptimizerRule`]: datafusion_optimizer::optimizer::OptimizerRule //! [`AnalyzerRule`]: datafusion_optimizer::analyzer::AnalyzerRule -//! [`PhysicalOptimizerRule`]: crate::physical_optimizer::PhysicalOptimizerRule +//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule //! //! ## Query Planning and Execution Overview //! @@ -349,7 +349,7 @@ //! filtering can never be `true` using additional statistical information. //! //! [cp_solver]: crate::physical_expr::intervals::cp_solver -//! [`PruningPredicate`]: crate::physical_optimizer::pruning::PruningPredicate +//! [`PruningPredicate`]: datafusion_physical_optimizer::pruning::PruningPredicate //! [`PhysicalExpr`]: crate::physical_plan::PhysicalExpr //! //! ## Execution @@ -624,19 +624,41 @@ //! //! ## Crate Organization //! -//! DataFusion is organized into multiple crates to enforce modularity -//! and improve compilation times. The crates are: +//! Most users interact with DataFusion via this crate (`datafusion`), which re-exports +//! all functionality needed to build and execute queries. +//! +//! There are three other crates that provide additional functionality that +//! must be used directly: +//! * [`datafusion_proto`]: Plan serialization and deserialization +//! * [`datafusion_substrait`]: Support for the substrait plan serialization format +//! * [`datafusion_sqllogictest`] : The DataFusion SQL logic test runner +//! +//! [`datafusion_proto`]: https://crates.io/crates/datafusion-proto +//! [`datafusion_substrait`]: https://crates.io/crates/datafusion-substrait +//! [`datafusion_sqllogictest`]: https://crates.io/crates/datafusion-sqllogictest +//! +//! DataFusion is internally split into multiple sub crates to +//! enforce modularity and improve compilation times. See the +//! [list of modules](#modules) for all available sub-crates. Major ones are //! //! * [datafusion_common]: Common traits and types +//! * [datafusion_catalog]: Catalog APIs such as [`SchemaProvider`] and [`CatalogProvider`] //! * [datafusion_execution]: State and structures needed for execution -//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure +//! * [datafusion_expr]: [`LogicalPlan`], [`Expr`] and related logical planning structure //! * [datafusion_functions]: Scalar function packages +//! * [datafusion_functions_aggregate]: Aggregate functions such as `MIN`, `MAX`, `SUM`, etc //! * [datafusion_functions_nested]: Scalar function packages for `ARRAY`s, `MAP`s and `STRUCT`s +//! * [datafusion_functions_table]: Table Functions such as `GENERATE_SERIES` +//! * [datafusion_functions_window]: Window functions such as `ROW_NUMBER`, `RANK`, etc //! * [datafusion_optimizer]: [`OptimizerRule`]s and [`AnalyzerRule`]s //! * [datafusion_physical_expr]: [`PhysicalExpr`] and related expressions //! * [datafusion_physical_plan]: [`ExecutionPlan`] and related expressions +//! * [datafusion_physical_optimizer]: [`ExecutionPlan`] and related expressions //! * [datafusion_sql]: SQL planner ([`SqlToRel`]) //! +//! [`SchemaProvider`]: datafusion_catalog::SchemaProvider +//! [`CatalogProvider`]: datafusion_catalog::CatalogProvider +//! //! ## Citing DataFusion in Academic Papers //! //! You can use the following citation to reference DataFusion in academic papers: @@ -659,7 +681,7 @@ //! [`OptimizerRule`]: optimizer::optimizer::OptimizerRule //! [`ExecutionPlan`]: physical_plan::ExecutionPlan //! [`PhysicalPlanner`]: physical_planner::PhysicalPlanner -//! [`PhysicalOptimizerRule`]: datafusion::physical_optimizer::optimizer::PhysicalOptimizerRule +//! [`PhysicalOptimizerRule`]: datafusion_physical_optimizer::PhysicalOptimizerRule //! [`Schema`]: arrow::datatypes::Schema //! [`PhysicalExpr`]: physical_plan::PhysicalExpr //! [`RecordBatch`]: arrow::record_batch::RecordBatch @@ -677,7 +699,6 @@ pub mod dataframe; pub mod datasource; pub mod error; pub mod execution; -pub mod physical_optimizer; pub mod physical_planner; pub mod prelude; pub mod scalar; @@ -721,6 +742,11 @@ pub mod optimizer { pub use datafusion_optimizer::*; } +/// re-export of [`datafusion_physical_optimizer`] crate +pub mod physical_optimizer { + pub use datafusion_physical_optimizer::*; +} + /// re-export of [`datafusion_physical_expr`] crate pub mod physical_expr_common { pub use datafusion_physical_expr_common::*; diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs deleted file mode 100644 index e6aa15a4c09d..000000000000 --- a/datafusion/core/src/physical_optimizer/mod.rs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Optimizer that rewrites [`ExecutionPlan`]s. -//! -//! These rules take advantage of physical plan properties , such as -//! "Repartition" or "Sortedness" -//! -//! [`ExecutionPlan`]: crate::physical_plan::ExecutionPlan - -pub mod optimizer; -pub mod projection_pushdown; - -pub use datafusion_physical_optimizer::*; diff --git a/datafusion/core/src/physical_optimizer/optimizer.rs b/datafusion/core/src/physical_optimizer/optimizer.rs deleted file mode 100644 index 7a6f991121ef..000000000000 --- a/datafusion/core/src/physical_optimizer/optimizer.rs +++ /dev/null @@ -1,124 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Physical optimizer traits - -use datafusion_physical_optimizer::PhysicalOptimizerRule; -use std::sync::Arc; - -use super::projection_pushdown::ProjectionPushdown; -use super::update_aggr_exprs::OptimizeAggregateOrder; -use crate::physical_optimizer::aggregate_statistics::AggregateStatistics; -use crate::physical_optimizer::coalesce_batches::CoalesceBatches; -use crate::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; -use crate::physical_optimizer::enforce_distribution::EnforceDistribution; -use crate::physical_optimizer::enforce_sorting::EnforceSorting; -use crate::physical_optimizer::join_selection::JoinSelection; -use crate::physical_optimizer::limit_pushdown::LimitPushdown; -use crate::physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; -use crate::physical_optimizer::output_requirements::OutputRequirements; -use crate::physical_optimizer::sanity_checker::SanityCheckPlan; -use crate::physical_optimizer::topk_aggregation::TopKAggregation; - -/// A rule-based physical optimizer. -#[derive(Clone, Debug)] -pub struct PhysicalOptimizer { - /// All rules to apply - pub rules: Vec>, -} - -impl Default for PhysicalOptimizer { - fn default() -> Self { - Self::new() - } -} - -impl PhysicalOptimizer { - /// Create a new optimizer using the recommended list of rules - pub fn new() -> Self { - let rules: Vec> = vec![ - // If there is a output requirement of the query, make sure that - // this information is not lost across different rules during optimization. - Arc::new(OutputRequirements::new_add_mode()), - Arc::new(AggregateStatistics::new()), - // Statistics-based join selection will change the Auto mode to a real join implementation, - // like collect left, or hash join, or future sort merge join, which will influence the - // EnforceDistribution and EnforceSorting rules as they decide whether to add additional - // repartitioning and local sorting steps to meet distribution and ordering requirements. - // Therefore, it should run before EnforceDistribution and EnforceSorting. - Arc::new(JoinSelection::new()), - // The LimitedDistinctAggregation rule should be applied before the EnforceDistribution rule, - // as that rule may inject other operations in between the different AggregateExecs. - // Applying the rule early means only directly-connected AggregateExecs must be examined. - Arc::new(LimitedDistinctAggregation::new()), - // The EnforceDistribution rule is for adding essential repartitioning to satisfy distribution - // requirements. Please make sure that the whole plan tree is determined before this rule. - // This rule increases parallelism if doing so is beneficial to the physical plan; i.e. at - // least one of the operators in the plan benefits from increased parallelism. - Arc::new(EnforceDistribution::new()), - // The CombinePartialFinalAggregate rule should be applied after the EnforceDistribution rule - Arc::new(CombinePartialFinalAggregate::new()), - // The EnforceSorting rule is for adding essential local sorting to satisfy the required - // ordering. Please make sure that the whole plan tree is determined before this rule. - // Note that one should always run this rule after running the EnforceDistribution rule - // as the latter may break local sorting requirements. - Arc::new(EnforceSorting::new()), - // Run once after the local sorting requirement is changed - Arc::new(OptimizeAggregateOrder::new()), - // TODO: `try_embed_to_hash_join` in the ProjectionPushdown rule would be block by the CoalesceBatches, so add it before CoalesceBatches. Maybe optimize it in the future. - Arc::new(ProjectionPushdown::new()), - // The CoalesceBatches rule will not influence the distribution and ordering of the - // whole plan tree. Therefore, to avoid influencing other rules, it should run last. - Arc::new(CoalesceBatches::new()), - // Remove the ancillary output requirement operator since we are done with the planning - // phase. - Arc::new(OutputRequirements::new_remove_mode()), - // The aggregation limiter will try to find situations where the accumulator count - // is not tied to the cardinality, i.e. when the output of the aggregation is passed - // into an `order by max(x) limit y`. In this case it will copy the limit value down - // to the aggregation, allowing it to use only y number of accumulators. - Arc::new(TopKAggregation::new()), - // The ProjectionPushdown rule tries to push projections towards - // the sources in the execution plan. As a result of this process, - // a projection can disappear if it reaches the source providers, and - // sequential projections can merge into one. Even if these two cases - // are not present, the load of executors such as join or union will be - // reduced by narrowing their input tables. - Arc::new(ProjectionPushdown::new()), - // The LimitPushdown rule tries to push limits down as far as possible, - // replacing operators with fetching variants, or adding limits - // past operators that support limit pushdown. - Arc::new(LimitPushdown::new()), - // The SanityCheckPlan rule checks whether the order and - // distribution requirements of each node in the plan - // is satisfied. It will also reject non-runnable query - // plans that use pipeline-breaking operators on infinite - // input(s). The rule generates a diagnostic error - // message for invalid plans. It makes no changes to the - // given query plan; i.e. it only acts as a final - // gatekeeping rule. - Arc::new(SanityCheckPlan::new()), - ]; - - Self::with_rules(rules) - } - - /// Create a new optimizer with the given rules - pub fn with_rules(rules: Vec>) -> Self { - Self { rules } - } -} diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs deleted file mode 100644 index cf8d5e352ef1..000000000000 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ /dev/null @@ -1,2792 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! This file implements the `ProjectionPushdown` physical optimization rule. -//! The function [`remove_unnecessary_projections`] tries to push down all -//! projections one by one if the operator below is amenable to this. If a -//! projection reaches a source, it can even disappear from the plan entirely. - -use std::collections::HashMap; -use std::sync::Arc; - -use super::output_requirements::OutputRequirementExec; -use crate::datasource::physical_plan::CsvExec; -use crate::error::Result; -use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec; -use crate::physical_plan::filter::FilterExec; -use crate::physical_plan::joins::utils::{ColumnIndex, JoinFilter}; -use crate::physical_plan::joins::{ - CrossJoinExec, HashJoinExec, NestedLoopJoinExec, SortMergeJoinExec, - SymmetricHashJoinExec, -}; -use crate::physical_plan::memory::MemoryExec; -use crate::physical_plan::projection::ProjectionExec; -use crate::physical_plan::repartition::RepartitionExec; -use crate::physical_plan::sorts::sort::SortExec; -use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; -use crate::physical_plan::{Distribution, ExecutionPlan, ExecutionPlanProperties}; - -use arrow_schema::SchemaRef; -use datafusion_common::config::ConfigOptions; -use datafusion_common::tree_node::{ - Transformed, TransformedResult, TreeNode, TreeNodeRecursion, -}; -use datafusion_common::{internal_err, JoinSide}; -use datafusion_physical_expr::expressions::{Column, Literal}; -use datafusion_physical_expr::{ - utils::collect_columns, Partitioning, PhysicalExpr, PhysicalExprRef, - PhysicalSortExpr, PhysicalSortRequirement, -}; -use datafusion_physical_plan::joins::utils::{JoinOn, JoinOnRef}; -use datafusion_physical_plan::streaming::StreamingTableExec; -use datafusion_physical_plan::union::UnionExec; - -use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; -use datafusion_physical_optimizer::PhysicalOptimizerRule; -use itertools::Itertools; - -/// This rule inspects [`ProjectionExec`]'s in the given physical plan and tries to -/// remove or swap with its child. -#[derive(Default, Debug)] -pub struct ProjectionPushdown {} - -impl ProjectionPushdown { - #[allow(missing_docs)] - pub fn new() -> Self { - Self {} - } -} - -impl PhysicalOptimizerRule for ProjectionPushdown { - fn optimize( - &self, - plan: Arc, - _config: &ConfigOptions, - ) -> Result> { - plan.transform_down(remove_unnecessary_projections).data() - } - - fn name(&self) -> &str { - "ProjectionPushdown" - } - - fn schema_check(&self) -> bool { - true - } -} - -/// This function checks if `plan` is a [`ProjectionExec`], and inspects its -/// input(s) to test whether it can push `plan` under its input(s). This function -/// will operate on the entire tree and may ultimately remove `plan` entirely -/// by leveraging source providers with built-in projection capabilities. -pub fn remove_unnecessary_projections( - plan: Arc, -) -> Result>> { - let maybe_modified = if let Some(projection) = - plan.as_any().downcast_ref::() - { - // If the projection does not cause any change on the input, we can - // safely remove it: - if is_projection_removable(projection) { - return Ok(Transformed::yes(Arc::clone(projection.input()))); - } - // If it does, check if we can push it under its child(ren): - let input = projection.input().as_any(); - if let Some(csv) = input.downcast_ref::() { - try_swapping_with_csv(projection, csv) - } else if let Some(memory) = input.downcast_ref::() { - try_swapping_with_memory(projection, memory)? - } else if let Some(child_projection) = input.downcast_ref::() { - let maybe_unified = try_unifying_projections(projection, child_projection)?; - return if let Some(new_plan) = maybe_unified { - // To unify 3 or more sequential projections: - remove_unnecessary_projections(new_plan) - .data() - .map(Transformed::yes) - } else { - Ok(Transformed::no(plan)) - }; - } else if let Some(output_req) = input.downcast_ref::() { - try_swapping_with_output_req(projection, output_req)? - } else if input.is::() { - try_swapping_with_coalesce_partitions(projection)? - } else if let Some(filter) = input.downcast_ref::() { - try_swapping_with_filter(projection, filter)?.map_or_else( - || try_embed_projection(projection, filter), - |e| Ok(Some(e)), - )? - } else if let Some(repartition) = input.downcast_ref::() { - try_swapping_with_repartition(projection, repartition)? - } else if let Some(sort) = input.downcast_ref::() { - try_swapping_with_sort(projection, sort)? - } else if let Some(spm) = input.downcast_ref::() { - try_swapping_with_sort_preserving_merge(projection, spm)? - } else if let Some(union) = input.downcast_ref::() { - try_pushdown_through_union(projection, union)? - } else if let Some(hash_join) = input.downcast_ref::() { - try_pushdown_through_hash_join(projection, hash_join)? - } else if let Some(cross_join) = input.downcast_ref::() { - try_swapping_with_cross_join(projection, cross_join)? - } else if let Some(nl_join) = input.downcast_ref::() { - try_pushdown_through_nested_loop_join(projection, nl_join)? - } else if let Some(sm_join) = input.downcast_ref::() { - try_swapping_with_sort_merge_join(projection, sm_join)? - } else if let Some(sym_join) = input.downcast_ref::() { - try_swapping_with_sym_hash_join(projection, sym_join)? - } else if let Some(ste) = input.downcast_ref::() { - try_swapping_with_streaming_table(projection, ste)? - } else { - // If the input plan of the projection is not one of the above, we - // conservatively assume that pushing the projection down may hurt. - // When adding new operators, consider adding them here if you - // think pushing projections under them is beneficial. - None - } - } else { - return Ok(Transformed::no(plan)); - }; - - Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes)) -} - -/// Tries to embed `projection` to its input (`csv`). If possible, returns -/// [`CsvExec`] as the top plan. Otherwise, returns `None`. -fn try_swapping_with_csv( - projection: &ProjectionExec, - csv: &CsvExec, -) -> Option> { - // If there is any non-column or alias-carrier expression, Projection should not be removed. - // This process can be moved into CsvExec, but it would be an overlap of their responsibility. - all_alias_free_columns(projection.expr()).then(|| { - let mut file_scan = csv.base_config().clone(); - let new_projections = new_projections_for_columns( - projection, - &file_scan - .projection - .unwrap_or((0..csv.schema().fields().len()).collect()), - ); - file_scan.projection = Some(new_projections); - - Arc::new( - CsvExec::builder(file_scan) - .with_has_header(csv.has_header()) - .with_delimeter(csv.delimiter()) - .with_quote(csv.quote()) - .with_escape(csv.escape()) - .with_comment(csv.comment()) - .with_newlines_in_values(csv.newlines_in_values()) - .with_file_compression_type(csv.file_compression_type) - .build(), - ) as _ - }) -} - -/// Tries to embed `projection` to its input (`memory`). If possible, returns -/// [`MemoryExec`] as the top plan. Otherwise, returns `None`. -fn try_swapping_with_memory( - projection: &ProjectionExec, - memory: &MemoryExec, -) -> Result>> { - // If there is any non-column or alias-carrier expression, Projection should not be removed. - // This process can be moved into MemoryExec, but it would be an overlap of their responsibility. - all_alias_free_columns(projection.expr()) - .then(|| { - let all_projections = (0..memory.schema().fields().len()).collect(); - let new_projections = new_projections_for_columns( - projection, - memory.projection().as_ref().unwrap_or(&all_projections), - ); - - MemoryExec::try_new( - memory.partitions(), - memory.original_schema(), - Some(new_projections), - ) - .map(|e| Arc::new(e) as _) - }) - .transpose() -} - -/// Tries to embed `projection` to its input (`streaming table`). -/// If possible, returns [`StreamingTableExec`] as the top plan. Otherwise, -/// returns `None`. -fn try_swapping_with_streaming_table( - projection: &ProjectionExec, - streaming_table: &StreamingTableExec, -) -> Result>> { - if !all_alias_free_columns(projection.expr()) { - return Ok(None); - } - - let streaming_table_projections = streaming_table - .projection() - .as_ref() - .map(|i| i.as_ref().to_vec()); - let new_projections = new_projections_for_columns( - projection, - &streaming_table_projections - .unwrap_or((0..streaming_table.schema().fields().len()).collect()), - ); - - let mut lex_orderings = vec![]; - for lex_ordering in streaming_table.projected_output_ordering().into_iter() { - let mut orderings = LexOrdering::default(); - for order in lex_ordering { - let Some(new_ordering) = update_expr(&order.expr, projection.expr(), false)? - else { - return Ok(None); - }; - orderings.push(PhysicalSortExpr { - expr: new_ordering, - options: order.options, - }); - } - lex_orderings.push(orderings); - } - - StreamingTableExec::try_new( - Arc::clone(streaming_table.partition_schema()), - streaming_table.partitions().clone(), - Some(new_projections.as_ref()), - lex_orderings, - streaming_table.is_infinite(), - streaming_table.limit(), - ) - .map(|e| Some(Arc::new(e) as _)) -} - -/// Unifies `projection` with its input (which is also a [`ProjectionExec`]). -fn try_unifying_projections( - projection: &ProjectionExec, - child: &ProjectionExec, -) -> Result>> { - let mut projected_exprs = vec![]; - let mut column_ref_map: HashMap = HashMap::new(); - - // Collect the column references usage in the outer projection. - projection.expr().iter().for_each(|(expr, _)| { - expr.apply(|expr| { - Ok({ - if let Some(column) = expr.as_any().downcast_ref::() { - *column_ref_map.entry(column.clone()).or_default() += 1; - } - TreeNodeRecursion::Continue - }) - }) - .unwrap(); - }); - - // Merging these projections is not beneficial, e.g - // If an expression is not trivial and it is referred more than 1, unifies projections will be - // beneficial as caching mechanism for non-trivial computations. - // See discussion in: https://github.com/apache/datafusion/issues/8296 - if column_ref_map.iter().any(|(column, count)| { - *count > 1 && !is_expr_trivial(&Arc::clone(&child.expr()[column.index()].0)) - }) { - return Ok(None); - } - - for (expr, alias) in projection.expr() { - // If there is no match in the input projection, we cannot unify these - // projections. This case will arise if the projection expression contains - // a `PhysicalExpr` variant `update_expr` doesn't support. - let Some(expr) = update_expr(expr, child.expr(), true)? else { - return Ok(None); - }; - projected_exprs.push((expr, alias.clone())); - } - - ProjectionExec::try_new(projected_exprs, Arc::clone(child.input())) - .map(|e| Some(Arc::new(e) as _)) -} - -/// Checks if the given expression is trivial. -/// An expression is considered trivial if it is either a `Column` or a `Literal`. -fn is_expr_trivial(expr: &Arc) -> bool { - expr.as_any().downcast_ref::().is_some() - || expr.as_any().downcast_ref::().is_some() -} - -/// Tries to swap `projection` with its input (`output_req`). If possible, -/// performs the swap and returns [`OutputRequirementExec`] as the top plan. -/// Otherwise, returns `None`. -fn try_swapping_with_output_req( - projection: &ProjectionExec, - output_req: &OutputRequirementExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down: - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - - let mut updated_sort_reqs = LexRequirement::new(vec![]); - // None or empty_vec can be treated in the same way. - if let Some(reqs) = &output_req.required_input_ordering()[0] { - for req in &reqs.inner { - let Some(new_expr) = update_expr(&req.expr, projection.expr(), false)? else { - return Ok(None); - }; - updated_sort_reqs.push(PhysicalSortRequirement { - expr: new_expr, - options: req.options, - }); - } - } - - let dist_req = match &output_req.required_input_distribution()[0] { - Distribution::HashPartitioned(exprs) => { - let mut updated_exprs = vec![]; - for expr in exprs { - let Some(new_expr) = update_expr(expr, projection.expr(), false)? else { - return Ok(None); - }; - updated_exprs.push(new_expr); - } - Distribution::HashPartitioned(updated_exprs) - } - dist => dist.clone(), - }; - - make_with_child(projection, &output_req.input()) - .map(|input| { - OutputRequirementExec::new( - input, - (!updated_sort_reqs.is_empty()).then_some(updated_sort_reqs), - dist_req, - ) - }) - .map(|e| Some(Arc::new(e) as _)) -} - -/// Tries to swap `projection` with its input, which is known to be a -/// [`CoalescePartitionsExec`]. If possible, performs the swap and returns -/// [`CoalescePartitionsExec`] as the top plan. Otherwise, returns `None`. -fn try_swapping_with_coalesce_partitions( - projection: &ProjectionExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down: - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - // CoalescePartitionsExec always has a single child, so zero indexing is safe. - make_with_child(projection, projection.input().children()[0]) - .map(|e| Some(Arc::new(CoalescePartitionsExec::new(e)) as _)) -} - -/// Tries to swap `projection` with its input (`filter`). If possible, performs -/// the swap and returns [`FilterExec`] as the top plan. Otherwise, returns `None`. -fn try_swapping_with_filter( - projection: &ProjectionExec, - filter: &FilterExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down: - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - // Each column in the predicate expression must exist after the projection. - let Some(new_predicate) = update_expr(filter.predicate(), projection.expr(), false)? - else { - return Ok(None); - }; - - FilterExec::try_new(new_predicate, make_with_child(projection, filter.input())?) - .and_then(|e| { - let selectivity = filter.default_selectivity(); - e.with_default_selectivity(selectivity) - }) - .map(|e| Some(Arc::new(e) as _)) -} - -/// Tries to swap the projection with its input [`RepartitionExec`]. If it can be done, -/// it returns the new swapped version having the [`RepartitionExec`] as the top plan. -/// Otherwise, it returns None. -fn try_swapping_with_repartition( - projection: &ProjectionExec, - repartition: &RepartitionExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down. - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - - // If pushdown is not beneficial or applicable, break it. - if projection.benefits_from_input_partitioning()[0] || !all_columns(projection.expr()) - { - return Ok(None); - } - - let new_projection = make_with_child(projection, repartition.input())?; - - let new_partitioning = match repartition.partitioning() { - Partitioning::Hash(partitions, size) => { - let mut new_partitions = vec![]; - for partition in partitions { - let Some(new_partition) = - update_expr(partition, projection.expr(), false)? - else { - return Ok(None); - }; - new_partitions.push(new_partition); - } - Partitioning::Hash(new_partitions, *size) - } - others => others.clone(), - }; - - Ok(Some(Arc::new(RepartitionExec::try_new( - new_projection, - new_partitioning, - )?))) -} - -/// Tries to swap the projection with its input [`SortExec`]. If it can be done, -/// it returns the new swapped version having the [`SortExec`] as the top plan. -/// Otherwise, it returns None. -fn try_swapping_with_sort( - projection: &ProjectionExec, - sort: &SortExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down. - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - - let mut updated_exprs = LexOrdering::default(); - for sort in sort.expr() { - let Some(new_expr) = update_expr(&sort.expr, projection.expr(), false)? else { - return Ok(None); - }; - updated_exprs.push(PhysicalSortExpr { - expr: new_expr, - options: sort.options, - }); - } - - Ok(Some(Arc::new( - SortExec::new(updated_exprs, make_with_child(projection, sort.input())?) - .with_fetch(sort.fetch()) - .with_preserve_partitioning(sort.preserve_partitioning()), - ))) -} - -/// Tries to swap the projection with its input [`SortPreservingMergeExec`]. -/// If this is possible, it returns the new [`SortPreservingMergeExec`] whose -/// child is a projection. Otherwise, it returns None. -fn try_swapping_with_sort_preserving_merge( - projection: &ProjectionExec, - spm: &SortPreservingMergeExec, -) -> Result>> { - // If the projection does not narrow the schema, we should not try to push it down. - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - - let mut updated_exprs = LexOrdering::default(); - for sort in spm.expr() { - let Some(updated_expr) = update_expr(&sort.expr, projection.expr(), false)? - else { - return Ok(None); - }; - updated_exprs.push(PhysicalSortExpr { - expr: updated_expr, - options: sort.options, - }); - } - - Ok(Some(Arc::new( - SortPreservingMergeExec::new( - updated_exprs, - make_with_child(projection, spm.input())?, - ) - .with_fetch(spm.fetch()), - ))) -} - -/// Tries to push `projection` down through `union`. If possible, performs the -/// pushdown and returns a new [`UnionExec`] as the top plan which has projections -/// as its children. Otherwise, returns `None`. -fn try_pushdown_through_union( - projection: &ProjectionExec, - union: &UnionExec, -) -> Result>> { - // If the projection doesn't narrow the schema, we shouldn't try to push it down. - if projection.expr().len() >= projection.input().schema().fields().len() { - return Ok(None); - } - - let new_children = union - .children() - .into_iter() - .map(|child| make_with_child(projection, child)) - .collect::>>()?; - - Ok(Some(Arc::new(UnionExec::new(new_children)))) -} - -trait EmbeddedProjection: ExecutionPlan + Sized { - fn with_projection(&self, projection: Option>) -> Result; -} - -impl EmbeddedProjection for HashJoinExec { - fn with_projection(&self, projection: Option>) -> Result { - self.with_projection(projection) - } -} - -impl EmbeddedProjection for NestedLoopJoinExec { - fn with_projection(&self, projection: Option>) -> Result { - self.with_projection(projection) - } -} - -impl EmbeddedProjection for FilterExec { - fn with_projection(&self, projection: Option>) -> Result { - self.with_projection(projection) - } -} - -/// Some projection can't be pushed down left input or right input of hash join because filter or on need may need some columns that won't be used in later. -/// By embed those projection to hash join, we can reduce the cost of build_batch_from_indices in hash join (build_batch_from_indices need to can compute::take() for each column) and avoid unnecessary output creation. -fn try_embed_projection( - projection: &ProjectionExec, - execution_plan: &Exec, -) -> Result>> { - // Collect all column indices from the given projection expressions. - let projection_index = collect_column_indices(projection.expr()); - - if projection_index.is_empty() { - return Ok(None); - }; - - // If the projection indices is the same as the input columns, we don't need to embed the projection to hash join. - // Check the projection_index is 0..n-1 and the length of projection_index is the same as the length of execution_plan schema fields. - if projection_index.len() == projection_index.last().unwrap() + 1 - && projection_index.len() == execution_plan.schema().fields().len() - { - return Ok(None); - } - - let new_execution_plan = - Arc::new(execution_plan.with_projection(Some(projection_index.to_vec()))?); - - // Build projection expressions for update_expr. Zip the projection_index with the new_execution_plan output schema fields. - let embed_project_exprs = projection_index - .iter() - .zip(new_execution_plan.schema().fields()) - .map(|(index, field)| { - ( - Arc::new(Column::new(field.name(), *index)) as Arc, - field.name().to_owned(), - ) - }) - .collect::>(); - - let mut new_projection_exprs = Vec::with_capacity(projection.expr().len()); - - for (expr, alias) in projection.expr() { - // update column index for projection expression since the input schema has been changed. - let Some(expr) = update_expr(expr, embed_project_exprs.as_slice(), false)? else { - return Ok(None); - }; - new_projection_exprs.push((expr, alias.clone())); - } - // Old projection may contain some alias or expression such as `a + 1` and `CAST('true' AS BOOLEAN)`, but our projection_exprs in hash join just contain column, so we need to create the new projection to keep the original projection. - let new_projection = Arc::new(ProjectionExec::try_new( - new_projection_exprs, - Arc::clone(&new_execution_plan) as _, - )?); - if is_projection_removable(&new_projection) { - Ok(Some(new_execution_plan)) - } else { - Ok(Some(new_projection)) - } -} - -/// Collect all column indices from the given projection expressions. -fn collect_column_indices(exprs: &[(Arc, String)]) -> Vec { - // Collect indices and remove duplicates. - let mut indices = exprs - .iter() - .flat_map(|(expr, _)| collect_columns(expr)) - .map(|x| x.index()) - .collect::>() - .into_iter() - .collect::>(); - indices.sort(); - indices -} - -struct JoinData { - projected_left_child: ProjectionExec, - projected_right_child: ProjectionExec, - join_filter: Option, - join_on: JoinOn, -} - -fn try_pushdown_through_join( - projection: &ProjectionExec, - join_left: &Arc, - join_right: &Arc, - join_on: JoinOnRef, - schema: SchemaRef, - filter: Option<&JoinFilter>, -) -> Result> { - // Convert projected expressions to columns. We can not proceed if this is not possible. - let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) else { - return Ok(None); - }; - - let (far_right_left_col_ind, far_left_right_col_ind) = - join_table_borders(join_left.schema().fields().len(), &projection_as_columns); - - if !join_allows_pushdown( - &projection_as_columns, - &schema, - far_right_left_col_ind, - far_left_right_col_ind, - ) { - return Ok(None); - } - - let new_filter = if let Some(filter) = filter { - match update_join_filter( - &projection_as_columns[0..=far_right_left_col_ind as _], - &projection_as_columns[far_left_right_col_ind as _..], - filter, - join_left.schema().fields().len(), - ) { - Some(updated_filter) => Some(updated_filter), - None => return Ok(None), - } - } else { - None - }; - - let Some(new_on) = update_join_on( - &projection_as_columns[0..=far_right_left_col_ind as _], - &projection_as_columns[far_left_right_col_ind as _..], - join_on, - join_left.schema().fields().len(), - ) else { - return Ok(None); - }; - - let (new_left, new_right) = new_join_children( - &projection_as_columns, - far_right_left_col_ind, - far_left_right_col_ind, - join_left, - join_right, - )?; - - Ok(Some(JoinData { - projected_left_child: new_left, - projected_right_child: new_right, - join_filter: new_filter, - join_on: new_on, - })) -} - -/// Tries to push `projection` down through `nested_loop_join`. If possible, performs the -/// pushdown and returns a new [`NestedLoopJoinExec`] as the top plan which has projections -/// as its children. Otherwise, returns `None`. -fn try_pushdown_through_nested_loop_join( - projection: &ProjectionExec, - nl_join: &NestedLoopJoinExec, -) -> Result>> { - // TODO: currently if there is projection in NestedLoopJoinExec, we can't push down projection to left or right input. Maybe we can pushdown the mixed projection later. - if nl_join.contains_projection() { - return Ok(None); - } - - if let Some(JoinData { - projected_left_child, - projected_right_child, - join_filter, - .. - }) = try_pushdown_through_join( - projection, - nl_join.left(), - nl_join.right(), - &[], - nl_join.schema(), - nl_join.filter(), - )? { - Ok(Some(Arc::new(NestedLoopJoinExec::try_new( - Arc::new(projected_left_child), - Arc::new(projected_right_child), - join_filter, - nl_join.join_type(), - // Returned early if projection is not None - None, - )?))) - } else { - try_embed_projection(projection, nl_join) - } -} - -/// Tries to push `projection` down through `hash_join`. If possible, performs the -/// pushdown and returns a new [`HashJoinExec`] as the top plan which has projections -/// as its children. Otherwise, returns `None`. -fn try_pushdown_through_hash_join( - projection: &ProjectionExec, - hash_join: &HashJoinExec, -) -> Result>> { - // TODO: currently if there is projection in HashJoinExec, we can't push down projection to left or right input. Maybe we can pushdown the mixed projection later. - if hash_join.contains_projection() { - return Ok(None); - } - - if let Some(JoinData { - projected_left_child, - projected_right_child, - join_filter, - join_on, - }) = try_pushdown_through_join( - projection, - hash_join.left(), - hash_join.right(), - hash_join.on(), - hash_join.schema(), - hash_join.filter(), - )? { - Ok(Some(Arc::new(HashJoinExec::try_new( - Arc::new(projected_left_child), - Arc::new(projected_right_child), - join_on, - join_filter, - hash_join.join_type(), - // Returned early if projection is not None - None, - *hash_join.partition_mode(), - hash_join.null_equals_null, - )?))) - } else { - try_embed_projection(projection, hash_join) - } -} - -/// Tries to swap the projection with its input [`CrossJoinExec`]. If it can be done, -/// it returns the new swapped version having the [`CrossJoinExec`] as the top plan. -/// Otherwise, it returns None. -fn try_swapping_with_cross_join( - projection: &ProjectionExec, - cross_join: &CrossJoinExec, -) -> Result>> { - // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. - let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) else { - return Ok(None); - }; - - let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( - cross_join.left().schema().fields().len(), - &projection_as_columns, - ); - - if !join_allows_pushdown( - &projection_as_columns, - &cross_join.schema(), - far_right_left_col_ind, - far_left_right_col_ind, - ) { - return Ok(None); - } - - let (new_left, new_right) = new_join_children( - &projection_as_columns, - far_right_left_col_ind, - far_left_right_col_ind, - cross_join.left(), - cross_join.right(), - )?; - - Ok(Some(Arc::new(CrossJoinExec::new( - Arc::new(new_left), - Arc::new(new_right), - )))) -} - -/// Tries to swap the projection with its input [`SortMergeJoinExec`]. If it can be done, -/// it returns the new swapped version having the [`SortMergeJoinExec`] as the top plan. -/// Otherwise, it returns None. -fn try_swapping_with_sort_merge_join( - projection: &ProjectionExec, - sm_join: &SortMergeJoinExec, -) -> Result>> { - // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. - let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) else { - return Ok(None); - }; - - let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( - sm_join.left().schema().fields().len(), - &projection_as_columns, - ); - - if !join_allows_pushdown( - &projection_as_columns, - &sm_join.schema(), - far_right_left_col_ind, - far_left_right_col_ind, - ) { - return Ok(None); - } - - let Some(new_on) = update_join_on( - &projection_as_columns[0..=far_right_left_col_ind as _], - &projection_as_columns[far_left_right_col_ind as _..], - sm_join.on(), - sm_join.left().schema().fields().len(), - ) else { - return Ok(None); - }; - - let (new_left, new_right) = new_join_children( - &projection_as_columns, - far_right_left_col_ind, - far_left_right_col_ind, - sm_join.children()[0], - sm_join.children()[1], - )?; - - Ok(Some(Arc::new(SortMergeJoinExec::try_new( - Arc::new(new_left), - Arc::new(new_right), - new_on, - sm_join.filter.clone(), - sm_join.join_type, - sm_join.sort_options.clone(), - sm_join.null_equals_null, - )?))) -} - -/// Tries to swap the projection with its input [`SymmetricHashJoinExec`]. If it can be done, -/// it returns the new swapped version having the [`SymmetricHashJoinExec`] as the top plan. -/// Otherwise, it returns None. -fn try_swapping_with_sym_hash_join( - projection: &ProjectionExec, - sym_join: &SymmetricHashJoinExec, -) -> Result>> { - // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. - let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) else { - return Ok(None); - }; - - let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( - sym_join.left().schema().fields().len(), - &projection_as_columns, - ); - - if !join_allows_pushdown( - &projection_as_columns, - &sym_join.schema(), - far_right_left_col_ind, - far_left_right_col_ind, - ) { - return Ok(None); - } - - let Some(new_on) = update_join_on( - &projection_as_columns[0..=far_right_left_col_ind as _], - &projection_as_columns[far_left_right_col_ind as _..], - sym_join.on(), - sym_join.left().schema().fields().len(), - ) else { - return Ok(None); - }; - - let new_filter = if let Some(filter) = sym_join.filter() { - match update_join_filter( - &projection_as_columns[0..=far_right_left_col_ind as _], - &projection_as_columns[far_left_right_col_ind as _..], - filter, - sym_join.left().schema().fields().len(), - ) { - Some(updated_filter) => Some(updated_filter), - None => return Ok(None), - } - } else { - None - }; - - let (new_left, new_right) = new_join_children( - &projection_as_columns, - far_right_left_col_ind, - far_left_right_col_ind, - sym_join.left(), - sym_join.right(), - )?; - - Ok(Some(Arc::new(SymmetricHashJoinExec::try_new( - Arc::new(new_left), - Arc::new(new_right), - new_on, - new_filter, - sym_join.join_type(), - sym_join.null_equals_null(), - sym_join - .right() - .output_ordering() - .map(|p| LexOrdering::new(p.to_vec())), - sym_join - .left() - .output_ordering() - .map(|p| LexOrdering::new(p.to_vec())), - sym_join.partition_mode(), - )?))) -} - -/// Compare the inputs and outputs of the projection. All expressions must be -/// columns without alias, and projection does not change the order of fields. -/// For example, if the input schema is `a, b`, `SELECT a, b` is removable, -/// but `SELECT b, a` and `SELECT a+1, b` and `SELECT a AS c, b` are not. -fn is_projection_removable(projection: &ProjectionExec) -> bool { - let exprs = projection.expr(); - exprs.iter().enumerate().all(|(idx, (expr, alias))| { - let Some(col) = expr.as_any().downcast_ref::() else { - return false; - }; - col.name() == alias && col.index() == idx - }) && exprs.len() == projection.input().schema().fields().len() -} - -/// Given the expression set of a projection, checks if the projection causes -/// any renaming or constructs a non-`Column` physical expression. -fn all_alias_free_columns(exprs: &[(Arc, String)]) -> bool { - exprs.iter().all(|(expr, alias)| { - expr.as_any() - .downcast_ref::() - .map(|column| column.name() == alias) - .unwrap_or(false) - }) -} - -/// Updates a source provider's projected columns according to the given -/// projection operator's expressions. To use this function safely, one must -/// ensure that all expressions are `Column` expressions without aliases. -fn new_projections_for_columns( - projection: &ProjectionExec, - source: &[usize], -) -> Vec { - projection - .expr() - .iter() - .filter_map(|(expr, _)| { - expr.as_any() - .downcast_ref::() - .map(|expr| source[expr.index()]) - }) - .collect() -} - -/// The function operates in two modes: -/// -/// 1) When `sync_with_child` is `true`: -/// -/// The function updates the indices of `expr` if the expression resides -/// in the input plan. For instance, given the expressions `a@1 + b@2` -/// and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are -/// updated to `a@0 + b@1` and `c@2`. -/// -/// 2) When `sync_with_child` is `false`: -/// -/// The function determines how the expression would be updated if a projection -/// was placed before the plan associated with the expression. If the expression -/// cannot be rewritten after the projection, it returns `None`. For example, -/// given the expressions `c@0`, `a@1` and `b@2`, and the [`ProjectionExec`] with -/// an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes -/// `a@0`, but `b@2` results in `None` since the projection does not include `b`. -fn update_expr( - expr: &Arc, - projected_exprs: &[(Arc, String)], - sync_with_child: bool, -) -> Result>> { - #[derive(Debug, PartialEq)] - enum RewriteState { - /// The expression is unchanged. - Unchanged, - /// Some part of the expression has been rewritten - RewrittenValid, - /// Some part of the expression has been rewritten, but some column - /// references could not be. - RewrittenInvalid, - } - - let mut state = RewriteState::Unchanged; - - let new_expr = Arc::clone(expr) - .transform_up(|expr: Arc| { - if state == RewriteState::RewrittenInvalid { - return Ok(Transformed::no(expr)); - } - - let Some(column) = expr.as_any().downcast_ref::() else { - return Ok(Transformed::no(expr)); - }; - if sync_with_child { - state = RewriteState::RewrittenValid; - // Update the index of `column`: - Ok(Transformed::yes(Arc::clone( - &projected_exprs[column.index()].0, - ))) - } else { - // default to invalid, in case we can't find the relevant column - state = RewriteState::RewrittenInvalid; - // Determine how to update `column` to accommodate `projected_exprs` - projected_exprs - .iter() - .enumerate() - .find_map(|(index, (projected_expr, alias))| { - projected_expr.as_any().downcast_ref::().and_then( - |projected_column| { - (column.name().eq(projected_column.name()) - && column.index() == projected_column.index()) - .then(|| { - state = RewriteState::RewrittenValid; - Arc::new(Column::new(alias, index)) as _ - }) - }, - ) - }) - .map_or_else( - || Ok(Transformed::no(expr)), - |c| Ok(Transformed::yes(c)), - ) - } - }) - .data(); - - new_expr.map(|e| (state == RewriteState::RewrittenValid).then_some(e)) -} - -/// Creates a new [`ProjectionExec`] instance with the given child plan and -/// projected expressions. -fn make_with_child( - projection: &ProjectionExec, - child: &Arc, -) -> Result> { - ProjectionExec::try_new(projection.expr().to_vec(), Arc::clone(child)) - .map(|e| Arc::new(e) as _) -} - -/// Returns `true` if all the expressions in the argument are `Column`s. -fn all_columns(exprs: &[(Arc, String)]) -> bool { - exprs.iter().all(|(expr, _)| expr.as_any().is::()) -} - -/// Downcasts all the expressions in `exprs` to `Column`s. If any of the given -/// expressions is not a `Column`, returns `None`. -fn physical_to_column_exprs( - exprs: &[(Arc, String)], -) -> Option> { - exprs - .iter() - .map(|(expr, alias)| { - expr.as_any() - .downcast_ref::() - .map(|col| (col.clone(), alias.clone())) - }) - .collect() -} - -/// Returns the last index before encountering a column coming from the right table when traveling -/// through the projection from left to right, and the last index before encountering a column -/// coming from the left table when traveling through the projection from right to left. -/// If there is no column in the projection coming from the left side, it returns (-1, ...), -/// if there is no column in the projection coming from the right side, it returns (..., projection length). -fn join_table_borders( - left_table_column_count: usize, - projection_as_columns: &[(Column, String)], -) -> (i32, i32) { - let far_right_left_col_ind = projection_as_columns - .iter() - .enumerate() - .take_while(|(_, (projection_column, _))| { - projection_column.index() < left_table_column_count - }) - .last() - .map(|(index, _)| index as i32) - .unwrap_or(-1); - - let far_left_right_col_ind = projection_as_columns - .iter() - .enumerate() - .rev() - .take_while(|(_, (projection_column, _))| { - projection_column.index() >= left_table_column_count - }) - .last() - .map(|(index, _)| index as i32) - .unwrap_or(projection_as_columns.len() as i32); - - (far_right_left_col_ind, far_left_right_col_ind) -} - -/// Tries to update the equi-join `Column`'s of a join as if the input of -/// the join was replaced by a projection. -fn update_join_on( - proj_left_exprs: &[(Column, String)], - proj_right_exprs: &[(Column, String)], - hash_join_on: &[(PhysicalExprRef, PhysicalExprRef)], - left_field_size: usize, -) -> Option> { - // TODO: Clippy wants the "map" call removed, but doing so generates - // a compilation error. Remove the clippy directive once this - // issue is fixed. - #[allow(clippy::map_identity)] - let (left_idx, right_idx): (Vec<_>, Vec<_>) = hash_join_on - .iter() - .map(|(left, right)| (left, right)) - .unzip(); - - let new_left_columns = new_columns_for_join_on(&left_idx, proj_left_exprs, 0); - let new_right_columns = - new_columns_for_join_on(&right_idx, proj_right_exprs, left_field_size); - - match (new_left_columns, new_right_columns) { - (Some(left), Some(right)) => Some(left.into_iter().zip(right).collect()), - _ => None, - } -} - -/// This function generates a new set of columns to be used in a hash join -/// operation based on a set of equi-join conditions (`hash_join_on`) and a -/// list of projection expressions (`projection_exprs`). -/// -/// Notes: Column indices in the projection expressions are based on the join schema, -/// whereas the join on expressions are based on the join child schema. `column_index_offset` -/// represents the offset between them. -fn new_columns_for_join_on( - hash_join_on: &[&PhysicalExprRef], - projection_exprs: &[(Column, String)], - column_index_offset: usize, -) -> Option> { - let new_columns = hash_join_on - .iter() - .filter_map(|on| { - // Rewrite all columns in `on` - Arc::clone(*on) - .transform(|expr| { - if let Some(column) = expr.as_any().downcast_ref::() { - // Find the column in the projection expressions - let new_column = projection_exprs - .iter() - .enumerate() - .find(|(_, (proj_column, _))| { - column.name() == proj_column.name() - && column.index() + column_index_offset - == proj_column.index() - }) - .map(|(index, (_, alias))| Column::new(alias, index)); - if let Some(new_column) = new_column { - Ok(Transformed::yes(Arc::new(new_column))) - } else { - // If the column is not found in the projection expressions, - // it means that the column is not projected. In this case, - // we cannot push the projection down. - internal_err!( - "Column {:?} not found in projection expressions", - column - ) - } - } else { - Ok(Transformed::no(expr)) - } - }) - .data() - .ok() - }) - .collect::>(); - (new_columns.len() == hash_join_on.len()).then_some(new_columns) -} - -/// Tries to update the column indices of a [`JoinFilter`] as if the input of -/// the join was replaced by a projection. -fn update_join_filter( - projection_left_exprs: &[(Column, String)], - projection_right_exprs: &[(Column, String)], - join_filter: &JoinFilter, - left_field_size: usize, -) -> Option { - let mut new_left_indices = new_indices_for_join_filter( - join_filter, - JoinSide::Left, - projection_left_exprs, - 0, - ) - .into_iter(); - let mut new_right_indices = new_indices_for_join_filter( - join_filter, - JoinSide::Right, - projection_right_exprs, - left_field_size, - ) - .into_iter(); - - // Check if all columns match: - (new_right_indices.len() + new_left_indices.len() - == join_filter.column_indices().len()) - .then(|| { - JoinFilter::new( - Arc::clone(join_filter.expression()), - join_filter - .column_indices() - .iter() - .map(|col_idx| ColumnIndex { - index: if col_idx.side == JoinSide::Left { - new_left_indices.next().unwrap() - } else { - new_right_indices.next().unwrap() - }, - side: col_idx.side, - }) - .collect(), - Arc::clone(join_filter.schema()), - ) - }) -} - -/// This function determines and returns a vector of indices representing the -/// positions of columns in `projection_exprs` that are involved in `join_filter`, -/// and correspond to a particular side (`join_side`) of the join operation. -/// -/// Notes: Column indices in the projection expressions are based on the join schema, -/// whereas the join filter is based on the join child schema. `column_index_offset` -/// represents the offset between them. -fn new_indices_for_join_filter( - join_filter: &JoinFilter, - join_side: JoinSide, - projection_exprs: &[(Column, String)], - column_index_offset: usize, -) -> Vec { - join_filter - .column_indices() - .iter() - .filter(|col_idx| col_idx.side == join_side) - .filter_map(|col_idx| { - projection_exprs - .iter() - .position(|(col, _)| col_idx.index + column_index_offset == col.index()) - }) - .collect() -} - -/// Checks three conditions for pushing a projection down through a join: -/// - Projection must narrow the join output schema. -/// - Columns coming from left/right tables must be collected at the left/right -/// sides of the output table. -/// - Left or right table is not lost after the projection. -fn join_allows_pushdown( - projection_as_columns: &[(Column, String)], - join_schema: &SchemaRef, - far_right_left_col_ind: i32, - far_left_right_col_ind: i32, -) -> bool { - // Projection must narrow the join output: - projection_as_columns.len() < join_schema.fields().len() - // Are the columns from different tables mixed? - && (far_right_left_col_ind + 1 == far_left_right_col_ind) - // Left or right table is not lost after the projection. - && far_right_left_col_ind >= 0 - && far_left_right_col_ind < projection_as_columns.len() as i32 -} - -/// If pushing down the projection over this join's children seems possible, -/// this function constructs the new [`ProjectionExec`]s that will come on top -/// of the original children of the join. -fn new_join_children( - projection_as_columns: &[(Column, String)], - far_right_left_col_ind: i32, - far_left_right_col_ind: i32, - left_child: &Arc, - right_child: &Arc, -) -> Result<(ProjectionExec, ProjectionExec)> { - let new_left = ProjectionExec::try_new( - projection_as_columns[0..=far_right_left_col_ind as _] - .iter() - .map(|(col, alias)| { - ( - Arc::new(Column::new(col.name(), col.index())) as _, - alias.clone(), - ) - }) - .collect_vec(), - Arc::clone(left_child), - )?; - let left_size = left_child.schema().fields().len() as i32; - let new_right = ProjectionExec::try_new( - projection_as_columns[far_left_right_col_ind as _..] - .iter() - .map(|(col, alias)| { - ( - Arc::new(Column::new( - col.name(), - // Align projected expressions coming from the right - // table with the new right child projection: - (col.index() as i32 - left_size) as _, - )) as _, - alias.clone(), - ) - }) - .collect_vec(), - Arc::clone(right_child), - )?; - - Ok((new_left, new_right)) -} - -#[cfg(test)] -mod tests { - use super::*; - use std::any::Any; - - use crate::datasource::file_format::file_compression_type::FileCompressionType; - use crate::datasource::listing::PartitionedFile; - use crate::datasource::physical_plan::FileScanConfig; - use crate::physical_plan::get_plan_string; - use crate::physical_plan::joins::StreamJoinPartitionMode; - - use arrow_schema::{DataType, Field, Schema, SortOptions}; - use datafusion_common::{JoinType, ScalarValue}; - use datafusion_execution::object_store::ObjectStoreUrl; - use datafusion_execution::{SendableRecordBatchStream, TaskContext}; - use datafusion_expr::{ - ColumnarValue, Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility, - }; - use datafusion_physical_expr::expressions::{ - binary, col, BinaryExpr, CaseExpr, CastExpr, NegativeExpr, - }; - use datafusion_physical_expr::ScalarFunctionExpr; - use datafusion_physical_plan::joins::PartitionMode; - use datafusion_physical_plan::streaming::PartitionStream; - - /// Mocked UDF - #[derive(Debug)] - struct DummyUDF { - signature: Signature, - } - - impl DummyUDF { - fn new() -> Self { - Self { - signature: Signature::variadic_any(Volatility::Immutable), - } - } - } - - impl ScalarUDFImpl for DummyUDF { - fn as_any(&self) -> &dyn Any { - self - } - - fn name(&self) -> &str { - "dummy_udf" - } - - fn signature(&self) -> &Signature { - &self.signature - } - - fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(DataType::Int32) - } - - fn invoke_batch( - &self, - _args: &[ColumnarValue], - _number_rows: usize, - ) -> Result { - unimplemented!("DummyUDF::invoke") - } - } - - #[test] - fn test_update_matching_exprs() -> Result<()> { - let exprs: Vec> = vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 3)), - Operator::Divide, - Arc::new(Column::new("e", 5)), - )), - Arc::new(CastExpr::new( - Arc::new(Column::new("a", 3)), - DataType::Float32, - None, - )), - Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 4)))), - Arc::new(ScalarFunctionExpr::new( - "scalar_expr", - Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), - vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b", 1)), - Operator::Divide, - Arc::new(Column::new("c", 0)), - )), - Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 0)), - Operator::Divide, - Arc::new(Column::new("b", 1)), - )), - ], - DataType::Int32, - )), - Arc::new(CaseExpr::try_new( - Some(Arc::new(Column::new("d", 2))), - vec![ - ( - Arc::new(Column::new("a", 3)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("d", 2)), - Operator::Plus, - Arc::new(Column::new("e", 5)), - )) as Arc, - ), - ( - Arc::new(Column::new("a", 3)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("e", 5)), - Operator::Plus, - Arc::new(Column::new("d", 2)), - )) as Arc, - ), - ], - Some(Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 3)), - Operator::Modulo, - Arc::new(Column::new("e", 5)), - ))), - )?), - ]; - let child: Vec<(Arc, String)> = vec![ - (Arc::new(Column::new("c", 2)), "c".to_owned()), - (Arc::new(Column::new("b", 1)), "b".to_owned()), - (Arc::new(Column::new("d", 3)), "d".to_owned()), - (Arc::new(Column::new("a", 0)), "a".to_owned()), - (Arc::new(Column::new("f", 5)), "f".to_owned()), - (Arc::new(Column::new("e", 4)), "e".to_owned()), - ]; - - let expected_exprs: Vec> = vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Divide, - Arc::new(Column::new("e", 4)), - )), - Arc::new(CastExpr::new( - Arc::new(Column::new("a", 0)), - DataType::Float32, - None, - )), - Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 5)))), - Arc::new(ScalarFunctionExpr::new( - "scalar_expr", - Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), - vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b", 1)), - Operator::Divide, - Arc::new(Column::new("c", 2)), - )), - Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 2)), - Operator::Divide, - Arc::new(Column::new("b", 1)), - )), - ], - DataType::Int32, - )), - Arc::new(CaseExpr::try_new( - Some(Arc::new(Column::new("d", 3))), - vec![ - ( - Arc::new(Column::new("a", 0)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("d", 3)), - Operator::Plus, - Arc::new(Column::new("e", 4)), - )) as Arc, - ), - ( - Arc::new(Column::new("a", 0)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("e", 4)), - Operator::Plus, - Arc::new(Column::new("d", 3)), - )) as Arc, - ), - ], - Some(Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Modulo, - Arc::new(Column::new("e", 4)), - ))), - )?), - ]; - - for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { - assert!(update_expr(&expr, &child, true)? - .unwrap() - .eq(&expected_expr)); - } - - Ok(()) - } - - #[test] - fn test_update_projected_exprs() -> Result<()> { - let exprs: Vec> = vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 3)), - Operator::Divide, - Arc::new(Column::new("e", 5)), - )), - Arc::new(CastExpr::new( - Arc::new(Column::new("a", 3)), - DataType::Float32, - None, - )), - Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 4)))), - Arc::new(ScalarFunctionExpr::new( - "scalar_expr", - Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), - vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b", 1)), - Operator::Divide, - Arc::new(Column::new("c", 0)), - )), - Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 0)), - Operator::Divide, - Arc::new(Column::new("b", 1)), - )), - ], - DataType::Int32, - )), - Arc::new(CaseExpr::try_new( - Some(Arc::new(Column::new("d", 2))), - vec![ - ( - Arc::new(Column::new("a", 3)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("d", 2)), - Operator::Plus, - Arc::new(Column::new("e", 5)), - )) as Arc, - ), - ( - Arc::new(Column::new("a", 3)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("e", 5)), - Operator::Plus, - Arc::new(Column::new("d", 2)), - )) as Arc, - ), - ], - Some(Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 3)), - Operator::Modulo, - Arc::new(Column::new("e", 5)), - ))), - )?), - ]; - let projected_exprs: Vec<(Arc, String)> = vec![ - (Arc::new(Column::new("a", 3)), "a".to_owned()), - (Arc::new(Column::new("b", 1)), "b_new".to_owned()), - (Arc::new(Column::new("c", 0)), "c".to_owned()), - (Arc::new(Column::new("d", 2)), "d_new".to_owned()), - (Arc::new(Column::new("e", 5)), "e".to_owned()), - (Arc::new(Column::new("f", 4)), "f_new".to_owned()), - ]; - - let expected_exprs: Vec> = vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Divide, - Arc::new(Column::new("e", 4)), - )), - Arc::new(CastExpr::new( - Arc::new(Column::new("a", 0)), - DataType::Float32, - None, - )), - Arc::new(NegativeExpr::new(Arc::new(Column::new("f_new", 5)))), - Arc::new(ScalarFunctionExpr::new( - "scalar_expr", - Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), - vec![ - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b_new", 1)), - Operator::Divide, - Arc::new(Column::new("c", 2)), - )), - Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 2)), - Operator::Divide, - Arc::new(Column::new("b_new", 1)), - )), - ], - DataType::Int32, - )), - Arc::new(CaseExpr::try_new( - Some(Arc::new(Column::new("d_new", 3))), - vec![ - ( - Arc::new(Column::new("a", 0)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("d_new", 3)), - Operator::Plus, - Arc::new(Column::new("e", 4)), - )) as Arc, - ), - ( - Arc::new(Column::new("a", 0)) as Arc, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("e", 4)), - Operator::Plus, - Arc::new(Column::new("d_new", 3)), - )) as Arc, - ), - ], - Some(Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Modulo, - Arc::new(Column::new("e", 4)), - ))), - )?), - ]; - - for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { - assert!(update_expr(&expr, &projected_exprs, false)? - .unwrap() - .eq(&expected_expr)); - } - - Ok(()) - } - - #[test] - fn test_join_table_borders() -> Result<()> { - let projections = vec![ - (Column::new("b", 1), "b".to_owned()), - (Column::new("c", 2), "c".to_owned()), - (Column::new("e", 4), "e".to_owned()), - (Column::new("d", 3), "d".to_owned()), - (Column::new("c", 2), "c".to_owned()), - (Column::new("f", 5), "f".to_owned()), - (Column::new("h", 7), "h".to_owned()), - (Column::new("g", 6), "g".to_owned()), - ]; - let left_table_column_count = 5; - assert_eq!( - join_table_borders(left_table_column_count, &projections), - (4, 5) - ); - - let left_table_column_count = 8; - assert_eq!( - join_table_borders(left_table_column_count, &projections), - (7, 8) - ); - - let left_table_column_count = 1; - assert_eq!( - join_table_borders(left_table_column_count, &projections), - (-1, 0) - ); - - let projections = vec![ - (Column::new("a", 0), "a".to_owned()), - (Column::new("b", 1), "b".to_owned()), - (Column::new("d", 3), "d".to_owned()), - (Column::new("g", 6), "g".to_owned()), - (Column::new("e", 4), "e".to_owned()), - (Column::new("f", 5), "f".to_owned()), - (Column::new("e", 4), "e".to_owned()), - (Column::new("h", 7), "h".to_owned()), - ]; - let left_table_column_count = 5; - assert_eq!( - join_table_borders(left_table_column_count, &projections), - (2, 7) - ); - - let left_table_column_count = 7; - assert_eq!( - join_table_borders(left_table_column_count, &projections), - (6, 7) - ); - - Ok(()) - } - - fn create_simple_csv_exec() -> Arc { - let schema = Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - ])); - Arc::new( - CsvExec::builder( - FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection(Some(vec![0, 1, 2, 3, 4])), - ) - .with_has_header(false) - .with_delimeter(0) - .with_quote(0) - .with_escape(None) - .with_comment(None) - .with_newlines_in_values(false) - .with_file_compression_type(FileCompressionType::UNCOMPRESSED) - .build(), - ) - } - - fn create_projecting_csv_exec() -> Arc { - let schema = Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - ])); - Arc::new( - CsvExec::builder( - FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_projection(Some(vec![3, 2, 1])), - ) - .with_has_header(false) - .with_delimeter(0) - .with_quote(0) - .with_escape(None) - .with_comment(None) - .with_newlines_in_values(false) - .with_file_compression_type(FileCompressionType::UNCOMPRESSED) - .build(), - ) - } - - fn create_projecting_memory_exec() -> Arc { - let schema = Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - ])); - - Arc::new(MemoryExec::try_new(&[], schema, Some(vec![2, 0, 3, 4])).unwrap()) - } - - #[test] - fn test_csv_after_projection() -> Result<()> { - let csv = create_projecting_csv_exec(); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("b", 2)), "b".to_string()), - (Arc::new(Column::new("d", 0)), "d".to_string()), - ], - csv.clone(), - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[b@2 as b, d@0 as d]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[d, c, b], has_header=false", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "CsvExec: file_groups={1 group: [[x]]}, projection=[b, d], has_header=false", - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_memory_after_projection() -> Result<()> { - let memory = create_projecting_memory_exec(); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("d", 2)), "d".to_string()), - (Arc::new(Column::new("e", 3)), "e".to_string()), - (Arc::new(Column::new("a", 1)), "a".to_string()), - ], - memory.clone(), - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[d@2 as d, e@3 as e, a@1 as a]", - " MemoryExec: partitions=0, partition_sizes=[]", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = ["MemoryExec: partitions=0, partition_sizes=[]"]; - assert_eq!(get_plan_string(&after_optimize), expected); - assert_eq!( - after_optimize - .clone() - .as_any() - .downcast_ref::() - .unwrap() - .projection() - .clone() - .unwrap(), - vec![3, 4, 0] - ); - - Ok(()) - } - - #[test] - fn test_streaming_table_after_projection() -> Result<()> { - #[derive(Debug)] - struct DummyStreamPartition { - schema: SchemaRef, - } - impl PartitionStream for DummyStreamPartition { - fn schema(&self) -> &SchemaRef { - &self.schema - } - fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { - unreachable!() - } - } - - let streaming_table = StreamingTableExec::try_new( - Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - ])), - vec![Arc::new(DummyStreamPartition { - schema: Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - ])), - }) as _], - Some(&vec![0_usize, 2, 4, 3]), - vec![ - LexOrdering::new(vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("e", 2)), - options: SortOptions::default(), - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 0)), - options: SortOptions::default(), - }, - ]), - LexOrdering::new(vec![PhysicalSortExpr { - expr: Arc::new(Column::new("d", 3)), - options: SortOptions::default(), - }]), - ] - .into_iter(), - true, - None, - )?; - let projection = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("d", 3)), "d".to_string()), - (Arc::new(Column::new("e", 2)), "e".to_string()), - (Arc::new(Column::new("a", 0)), "a".to_string()), - ], - Arc::new(streaming_table) as _, - )?) as _; - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let result = after_optimize - .as_any() - .downcast_ref::() - .unwrap(); - assert_eq!( - result.partition_schema(), - &Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - ])) - ); - assert_eq!( - result.projection().clone().unwrap().to_vec(), - vec![3_usize, 4, 0] - ); - assert_eq!( - result.projected_schema(), - &Schema::new(vec![ - Field::new("d", DataType::Int32, true), - Field::new("e", DataType::Int32, true), - Field::new("a", DataType::Int32, true), - ]) - ); - assert_eq!( - result.projected_output_ordering().into_iter().collect_vec(), - vec![ - LexOrdering::new(vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("e", 1)), - options: SortOptions::default(), - }, - PhysicalSortExpr { - expr: Arc::new(Column::new("a", 2)), - options: SortOptions::default(), - }, - ]), - LexOrdering::new(vec![PhysicalSortExpr { - expr: Arc::new(Column::new("d", 0)), - options: SortOptions::default(), - }]), - ] - ); - assert!(result.is_infinite()); - - Ok(()) - } - - #[test] - fn test_projection_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let child_projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("e", 4)), "new_e".to_string()), - (Arc::new(Column::new("a", 0)), "a".to_string()), - (Arc::new(Column::new("b", 1)), "new_b".to_string()), - ], - csv.clone(), - )?); - let top_projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("new_b", 3)), "new_b".to_string()), - ( - Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 0)), - Operator::Plus, - Arc::new(Column::new("new_e", 1)), - )), - "binary".to_string(), - ), - (Arc::new(Column::new("new_b", 3)), "newest_b".to_string()), - ], - child_projection.clone(), - )?); - - let initial = get_plan_string(&top_projection); - let expected_initial = [ - "ProjectionExec: expr=[new_b@3 as new_b, c@0 + new_e@1 as binary, new_b@3 as newest_b]", - " ProjectionExec: expr=[c@2 as c, e@4 as new_e, a@0 as a, b@1 as new_b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(top_projection, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_output_req_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let sort_req: Arc = Arc::new(OutputRequirementExec::new( - csv.clone(), - Some(LexRequirement::new(vec![ - PhysicalSortRequirement { - expr: Arc::new(Column::new("b", 1)), - options: Some(SortOptions::default()), - }, - PhysicalSortRequirement { - expr: Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 2)), - Operator::Plus, - Arc::new(Column::new("a", 0)), - )), - options: Some(SortOptions::default()), - }, - ])), - Distribution::HashPartitioned(vec![ - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("b", 1)), - ]), - )); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("a", 0)), "new_a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - ], - sort_req.clone(), - )?); - - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " OutputRequirementExec", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected: [&str; 3] = [ - "OutputRequirementExec", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - - assert_eq!(get_plan_string(&after_optimize), expected); - let expected_reqs = LexRequirement::new(vec![ - PhysicalSortRequirement { - expr: Arc::new(Column::new("b", 2)), - options: Some(SortOptions::default()), - }, - PhysicalSortRequirement { - expr: Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 0)), - Operator::Plus, - Arc::new(Column::new("new_a", 1)), - )), - options: Some(SortOptions::default()), - }, - ]); - assert_eq!( - after_optimize - .as_any() - .downcast_ref::() - .unwrap() - .required_input_ordering()[0] - .clone() - .unwrap(), - expected_reqs - ); - let expected_distribution: Vec> = vec![ - Arc::new(Column::new("new_a", 1)), - Arc::new(Column::new("b", 2)), - ]; - if let Distribution::HashPartitioned(vec) = after_optimize - .as_any() - .downcast_ref::() - .unwrap() - .required_input_distribution()[0] - .clone() - { - assert!(vec - .iter() - .zip(expected_distribution) - .all(|(actual, expected)| actual.eq(&expected))); - } else { - panic!("Expected HashPartitioned distribution!"); - }; - - Ok(()) - } - - #[test] - fn test_coalesce_partitions_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let coalesce_partitions: Arc = - Arc::new(CoalescePartitionsExec::new(csv)); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("b", 1)), "b".to_string()), - (Arc::new(Column::new("a", 0)), "a_new".to_string()), - (Arc::new(Column::new("d", 3)), "d".to_string()), - ], - coalesce_partitions, - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]", - " CoalescePartitionsExec", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "CoalescePartitionsExec", - " ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_filter_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let predicate = Arc::new(BinaryExpr::new( - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b", 1)), - Operator::Minus, - Arc::new(Column::new("a", 0)), - )), - Operator::Gt, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("d", 3)), - Operator::Minus, - Arc::new(Column::new("a", 0)), - )), - )); - let filter: Arc = - Arc::new(FilterExec::try_new(predicate, csv)?); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("a", 0)), "a_new".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - (Arc::new(Column::new("d", 3)), "d".to_string()), - ], - filter.clone(), - )?); - - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]", - " FilterExec: b@1 - a@0 > d@3 - a@0", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "FilterExec: b@1 - a_new@0 > d@2 - a_new@0", - " ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_join_after_projection() -> Result<()> { - let left_csv = create_simple_csv_exec(); - let right_csv = create_simple_csv_exec(); - - let join: Arc = Arc::new(SymmetricHashJoinExec::try_new( - left_csv, - right_csv, - vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], - // b_left-(1+a_right)<=a_right+c_left - Some(JoinFilter::new( - Arc::new(BinaryExpr::new( - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b_left_inter", 0)), - Operator::Minus, - Arc::new(BinaryExpr::new( - Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), - Operator::Plus, - Arc::new(Column::new("a_right_inter", 1)), - )), - )), - Operator::LtEq, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a_right_inter", 1)), - Operator::Plus, - Arc::new(Column::new("c_left_inter", 2)), - )), - )), - vec![ - ColumnIndex { - index: 1, - side: JoinSide::Left, - }, - ColumnIndex { - index: 0, - side: JoinSide::Right, - }, - ColumnIndex { - index: 2, - side: JoinSide::Left, - }, - ], - Arc::new(Schema::new(vec![ - Field::new("b_left_inter", DataType::Int32, true), - Field::new("a_right_inter", DataType::Int32, true), - Field::new("c_left_inter", DataType::Int32, true), - ])), - )), - &JoinType::Inner, - true, - None, - None, - StreamJoinPartitionMode::SinglePartition, - )?); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c_from_left".to_string()), - (Arc::new(Column::new("b", 1)), "b_from_left".to_string()), - (Arc::new(Column::new("a", 0)), "a_from_left".to_string()), - (Arc::new(Column::new("a", 5)), "a_from_right".to_string()), - (Arc::new(Column::new("c", 7)), "c_from_right".to_string()), - ], - join, - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, a@5 as a_from_right, c@7 as c_from_right]", - " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", - " ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - let expected_filter_col_ind = vec![ - ColumnIndex { - index: 1, - side: JoinSide::Left, - }, - ColumnIndex { - index: 0, - side: JoinSide::Right, - }, - ColumnIndex { - index: 0, - side: JoinSide::Left, - }, - ]; - - assert_eq!( - expected_filter_col_ind, - after_optimize - .as_any() - .downcast_ref::() - .unwrap() - .filter() - .unwrap() - .column_indices() - ); - - Ok(()) - } - - #[test] - fn test_join_after_required_projection() -> Result<()> { - let left_csv = create_simple_csv_exec(); - let right_csv = create_simple_csv_exec(); - - let join: Arc = Arc::new(SymmetricHashJoinExec::try_new( - left_csv, - right_csv, - vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], - // b_left-(1+a_right)<=a_right+c_left - Some(JoinFilter::new( - Arc::new(BinaryExpr::new( - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b_left_inter", 0)), - Operator::Minus, - Arc::new(BinaryExpr::new( - Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), - Operator::Plus, - Arc::new(Column::new("a_right_inter", 1)), - )), - )), - Operator::LtEq, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a_right_inter", 1)), - Operator::Plus, - Arc::new(Column::new("c_left_inter", 2)), - )), - )), - vec![ - ColumnIndex { - index: 1, - side: JoinSide::Left, - }, - ColumnIndex { - index: 0, - side: JoinSide::Right, - }, - ColumnIndex { - index: 2, - side: JoinSide::Left, - }, - ], - Arc::new(Schema::new(vec![ - Field::new("b_left_inter", DataType::Int32, true), - Field::new("a_right_inter", DataType::Int32, true), - Field::new("c_left_inter", DataType::Int32, true), - ])), - )), - &JoinType::Inner, - true, - None, - None, - StreamJoinPartitionMode::SinglePartition, - )?); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("a", 5)), "a".to_string()), - (Arc::new(Column::new("b", 6)), "b".to_string()), - (Arc::new(Column::new("c", 7)), "c".to_string()), - (Arc::new(Column::new("d", 8)), "d".to_string()), - (Arc::new(Column::new("e", 9)), "e".to_string()), - (Arc::new(Column::new("a", 0)), "a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("d", 3)), "d".to_string()), - (Arc::new(Column::new("e", 4)), "e".to_string()), - ], - join, - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]", - " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]", - " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - Ok(()) - } - - #[test] - fn test_collect_column_indices() -> Result<()> { - let expr = Arc::new(BinaryExpr::new( - Arc::new(Column::new("b", 7)), - Operator::Minus, - Arc::new(BinaryExpr::new( - Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), - Operator::Plus, - Arc::new(Column::new("a", 1)), - )), - )); - let column_indices = collect_column_indices(&[(expr, "b-(1+a)".to_string())]); - assert_eq!(column_indices, vec![1, 7]); - Ok(()) - } - - #[test] - fn test_nested_loop_join_after_projection() -> Result<()> { - let left_csv = create_simple_csv_exec(); - let right_csv = create_simple_csv_exec(); - - let col_left_a = col("a", &left_csv.schema())?; - let col_right_b = col("b", &right_csv.schema())?; - let col_left_c = col("c", &left_csv.schema())?; - // left_a < right_b - let filter_expr = - binary(col_left_a, Operator::Lt, col_right_b, &Schema::empty())?; - let filter_column_indices = vec![ - ColumnIndex { - index: 0, - side: JoinSide::Left, - }, - ColumnIndex { - index: 1, - side: JoinSide::Right, - }, - ColumnIndex { - index: 2, - side: JoinSide::Right, - }, - ]; - let filter_schema = Schema::new(vec![ - Field::new("a", DataType::Int32, true), - Field::new("b", DataType::Int32, true), - Field::new("c", DataType::Int32, true), - ]); - - let join: Arc = Arc::new(NestedLoopJoinExec::try_new( - left_csv, - right_csv, - Some(JoinFilter::new( - filter_expr, - filter_column_indices, - Arc::new(filter_schema), - )), - &JoinType::Inner, - None, - )?); - - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![(col_left_c, "c".to_string())], - Arc::clone(&join), - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c]", - " NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - let expected = [ - "NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1, projection=[c@2]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - Ok(()) - } - - #[test] - fn test_hash_join_after_projection() -> Result<()> { - // sql like - // SELECT t1.c as c_from_left, t1.b as b_from_left, t1.a as a_from_left, t2.c as c_from_right FROM t1 JOIN t2 ON t1.b = t2.c WHERE t1.b - (1 + t2.a) <= t2.a + t1.c - let left_csv = create_simple_csv_exec(); - let right_csv = create_simple_csv_exec(); - - let join: Arc = Arc::new(HashJoinExec::try_new( - left_csv, - right_csv, - vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], - // b_left-(1+a_right)<=a_right+c_left - Some(JoinFilter::new( - Arc::new(BinaryExpr::new( - Arc::new(BinaryExpr::new( - Arc::new(Column::new("b_left_inter", 0)), - Operator::Minus, - Arc::new(BinaryExpr::new( - Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), - Operator::Plus, - Arc::new(Column::new("a_right_inter", 1)), - )), - )), - Operator::LtEq, - Arc::new(BinaryExpr::new( - Arc::new(Column::new("a_right_inter", 1)), - Operator::Plus, - Arc::new(Column::new("c_left_inter", 2)), - )), - )), - vec![ - ColumnIndex { - index: 1, - side: JoinSide::Left, - }, - ColumnIndex { - index: 0, - side: JoinSide::Right, - }, - ColumnIndex { - index: 2, - side: JoinSide::Left, - }, - ], - Arc::new(Schema::new(vec![ - Field::new("b_left_inter", DataType::Int32, true), - Field::new("a_right_inter", DataType::Int32, true), - Field::new("c_left_inter", DataType::Int32, true), - ])), - )), - &JoinType::Inner, - None, - PartitionMode::Auto, - true, - )?); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c_from_left".to_string()), - (Arc::new(Column::new("b", 1)), "b_from_left".to_string()), - (Arc::new(Column::new("a", 0)), "a_from_left".to_string()), - (Arc::new(Column::new("c", 7)), "c_from_right".to_string()), - ], - join.clone(), - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@7 as c_from_right]", " HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - // HashJoinExec only returns result after projection. Because there are some alias columns in the projection, the ProjectionExec is not removed. - let expected = ["ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@3 as c_from_right]", " HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("a", 0)), "a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("c", 7)), "c".to_string()), - ], - join.clone(), - )?); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - // Comparing to the previous result, this projection don't have alias columns either change the order of output fields. So the ProjectionExec is removed. - let expected = ["HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_repartition_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let repartition: Arc = Arc::new(RepartitionExec::try_new( - csv, - Partitioning::Hash( - vec![ - Arc::new(Column::new("a", 0)), - Arc::new(Column::new("b", 1)), - Arc::new(Column::new("d", 3)), - ], - 6, - ), - )?); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("b", 1)), "b_new".to_string()), - (Arc::new(Column::new("a", 0)), "a".to_string()), - (Arc::new(Column::new("d", 3)), "d_new".to_string()), - ], - repartition, - )?); - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]", - " RepartitionExec: partitioning=Hash([a@0, b@1, d@3], 6), input_partitions=1", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1", - " ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - assert_eq!( - after_optimize - .as_any() - .downcast_ref::() - .unwrap() - .partitioning() - .clone(), - Partitioning::Hash( - vec![ - Arc::new(Column::new("a", 1)), - Arc::new(Column::new("b_new", 0)), - Arc::new(Column::new("d_new", 2)), - ], - 6, - ), - ); - - Ok(()) - } - - #[test] - fn test_sort_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let sort_req: Arc = Arc::new(SortExec::new( - LexOrdering::new(vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: SortOptions::default(), - }, - PhysicalSortExpr { - expr: Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 2)), - Operator::Plus, - Arc::new(Column::new("a", 0)), - )), - options: SortOptions::default(), - }, - ]), - csv.clone(), - )); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("a", 0)), "new_a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - ], - sort_req.clone(), - )?); - - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_sort_preserving_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let sort_req: Arc = Arc::new(SortPreservingMergeExec::new( - LexOrdering::new(vec![ - PhysicalSortExpr { - expr: Arc::new(Column::new("b", 1)), - options: SortOptions::default(), - }, - PhysicalSortExpr { - expr: Arc::new(BinaryExpr::new( - Arc::new(Column::new("c", 2)), - Operator::Plus, - Arc::new(Column::new("a", 0)), - )), - options: SortOptions::default(), - }, - ]), - csv.clone(), - )); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("a", 0)), "new_a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - ], - sort_req.clone(), - )?); - - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn test_union_after_projection() -> Result<()> { - let csv = create_simple_csv_exec(); - let union: Arc = - Arc::new(UnionExec::new(vec![csv.clone(), csv.clone(), csv])); - let projection: Arc = Arc::new(ProjectionExec::try_new( - vec![ - (Arc::new(Column::new("c", 2)), "c".to_string()), - (Arc::new(Column::new("a", 0)), "new_a".to_string()), - (Arc::new(Column::new("b", 1)), "b".to_string()), - ], - union.clone(), - )?); - - let initial = get_plan_string(&projection); - let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " UnionExec", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; - - let expected = [ - "UnionExec", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", - " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", - " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } -} diff --git a/datafusion/core/tests/memory_limit/mod.rs b/datafusion/core/tests/memory_limit/mod.rs index b6f2f8e9ac4a..77e4b491da6d 100644 --- a/datafusion/core/tests/memory_limit/mod.rs +++ b/datafusion/core/tests/memory_limit/mod.rs @@ -16,45 +16,45 @@ // under the License. //! This module contains tests for limiting memory at runtime in DataFusion + +use std::any::Any; +use std::num::NonZeroUsize; +use std::sync::{Arc, LazyLock}; + #[cfg(feature = "extended_tests")] mod memory_limit_validation; - use arrow::datatypes::{Int32Type, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_array::{ArrayRef, DictionaryArray}; use arrow_schema::SortOptions; -use async_trait::async_trait; use datafusion::assert_batches_eq; -use datafusion::physical_optimizer::PhysicalOptimizerRule; -use datafusion::physical_plan::memory::MemoryExec; -use datafusion::physical_plan::streaming::PartitionStream; -use datafusion_execution::memory_pool::{ - GreedyMemoryPool, MemoryPool, TrackConsumersPool, -}; -use datafusion_expr::{Expr, TableType}; -use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; -use datafusion_physical_plan::spill::get_record_batch_memory_size; -use futures::StreamExt; -use std::any::Any; -use std::num::NonZeroUsize; -use std::sync::{Arc, LazyLock}; -use tokio::fs::File; - use datafusion::datasource::streaming::StreamingTable; use datafusion::datasource::{MemTable, TableProvider}; use datafusion::execution::disk_manager::DiskManagerConfig; use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::session_state::SessionStateBuilder; -use datafusion::physical_optimizer::join_selection::JoinSelection; +use datafusion::physical_plan::memory::MemoryExec; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; +use datafusion::physical_plan::streaming::PartitionStream; use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; -use datafusion_common::{assert_contains, Result}; - use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_catalog::Session; +use datafusion_common::{assert_contains, Result}; +use datafusion_execution::memory_pool::{ + GreedyMemoryPool, MemoryPool, TrackConsumersPool, +}; use datafusion_execution::TaskContext; +use datafusion_expr::{Expr, TableType}; +use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_optimizer::join_selection::JoinSelection; +use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::spill::get_record_batch_memory_size; use test_utils::AccessLogGenerator; +use async_trait::async_trait; +use futures::StreamExt; +use tokio::fs::File; + #[cfg(test)] #[ctor::ctor] fn init() { diff --git a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs index 95da1767a3ef..f5ecd41ab11e 100644 --- a/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs +++ b/datafusion/core/tests/physical_optimizer/aggregate_statistics.rs @@ -15,31 +15,30 @@ // specific language governing permissions and limitations // under the License. -use datafusion_common::config::ConfigOptions; -use datafusion_execution::TaskContext; -use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics; -use datafusion_physical_optimizer::PhysicalOptimizerRule; -use datafusion_physical_plan::aggregates::AggregateExec; -use datafusion_physical_plan::projection::ProjectionExec; -use datafusion_physical_plan::ExecutionPlan; use std::sync::Arc; -use datafusion_common::Result; - -use datafusion_physical_plan::aggregates::PhysicalGroupBy; -use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; -use datafusion_physical_plan::common; -use datafusion_physical_plan::filter::FilterExec; -use datafusion_physical_plan::memory::MemoryExec; +use crate::physical_optimizer::test_utils::TestAggregate; use arrow::array::Int32Array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; use datafusion_common::cast::as_int64_array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::Result; +use datafusion_execution::TaskContext; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{self, cast}; -use datafusion_physical_optimizer::test_utils::TestAggregate; +use datafusion_physical_optimizer::aggregate_statistics::AggregateStatistics; +use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::aggregates::AggregateExec; use datafusion_physical_plan::aggregates::AggregateMode; +use datafusion_physical_plan::aggregates::PhysicalGroupBy; +use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion_physical_plan::common; +use datafusion_physical_plan::filter::FilterExec; +use datafusion_physical_plan::memory::MemoryExec; +use datafusion_physical_plan::projection::ProjectionExec; +use datafusion_physical_plan::ExecutionPlan; /// Mock data using a MemoryExec which has an exact count statistic fn mock_data() -> Result> { diff --git a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs index 85efebf2386a..f0588e45cc6a 100644 --- a/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs +++ b/datafusion/core/tests/physical_optimizer/combine_partial_final_agg.rs @@ -22,11 +22,9 @@ use std::sync::Arc; -use crate::physical_optimizer::parquet_exec; +use crate::physical_optimizer::test_utils::{parquet_exec, trim_plan_display}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -use datafusion::physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; -use datafusion::physical_optimizer::test_utils::trim_plan_display; use datafusion_common::config::ConfigOptions; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::sum::sum_udaf; @@ -34,6 +32,7 @@ use datafusion_physical_expr::aggregate::{AggregateExprBuilder, AggregateFunctio use datafusion_physical_expr::expressions::{col, lit}; use datafusion_physical_expr::Partitioning; use datafusion_physical_expr_common::physical_expr::PhysicalExpr; +use datafusion_physical_optimizer::combine_partial_final_agg::CombinePartialFinalAggregate; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs index ea75e7d0e5f4..856f7dc8e8a9 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs @@ -19,7 +19,11 @@ use std::fmt::Debug; use std::ops::Deref; use std::sync::Arc; -use crate::physical_optimizer::parquet_exec_with_sort; +use crate::physical_optimizer::test_utils::{ + check_integrity, coalesce_partitions_exec, repartition_exec, schema, + sort_merge_join_exec, sort_preserving_merge_exec, +}; +use crate::physical_optimizer::test_utils::{parquet_exec_with_sort, trim_plan_display}; use arrow::compute::SortOptions; use datafusion::config::ConfigOptions; @@ -40,11 +44,6 @@ use datafusion_physical_expr_common::sort_expr::LexRequirement; use datafusion_physical_optimizer::enforce_distribution::*; use datafusion_physical_optimizer::enforce_sorting::EnforceSorting; use datafusion_physical_optimizer::output_requirements::OutputRequirements; -use datafusion_physical_optimizer::test_utils::trim_plan_display; -use datafusion_physical_optimizer::test_utils::{ - check_integrity, coalesce_partitions_exec, repartition_exec, schema, - sort_merge_join_exec, sort_preserving_merge_exec, -}; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, @@ -293,7 +292,7 @@ fn hash_join_exec( join_on: &JoinOn, join_type: &JoinType, ) -> Arc { - datafusion_physical_optimizer::test_utils::hash_join_exec( + crate::physical_optimizer::test_utils::hash_join_exec( left, right, join_on.clone(), diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs index 3cf0bff0482f..88b3fe0da3a5 100644 --- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs @@ -17,7 +17,14 @@ use std::sync::Arc; -use crate::physical_optimizer::parquet_exec; +use crate::physical_optimizer::test_utils::{ + aggregate_exec, bounded_window_exec, check_integrity, coalesce_batches_exec, + coalesce_partitions_exec, create_test_schema, create_test_schema2, + create_test_schema3, filter_exec, global_limit_exec, hash_join_exec, limit_exec, + local_limit_exec, memory_exec, parquet_exec, repartition_exec, sort_exec, sort_expr, + sort_expr_options, sort_merge_join_exec, sort_preserving_merge_exec, + spr_repartition_exec, stream_exec_ordered, union_exec, RequirementsTestExec, +}; use datafusion_physical_plan::displayable; use arrow::compute::SortOptions; @@ -37,7 +44,6 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE use datafusion_physical_plan::{get_plan_string, ExecutionPlan}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{TreeNode, TransformedResult}; -use datafusion_physical_optimizer::test_utils::{check_integrity, bounded_window_exec, coalesce_partitions_exec, create_test_schema, create_test_schema2, create_test_schema3, filter_exec, global_limit_exec, hash_join_exec, limit_exec, local_limit_exec, memory_exec, repartition_exec, sort_exec, sort_expr, sort_expr_options, sort_merge_join_exec, sort_preserving_merge_exec, spr_repartition_exec, stream_exec_ordered, union_exec, coalesce_batches_exec, aggregate_exec, RequirementsTestExec, create_test_schema4, aggregate_exec_monotonic, aggregate_exec_non_monotonic, bounded_window_exec_with_partition, bounded_window_exec_non_monotonic,}; use datafusion::datasource::physical_plan::{CsvExec, FileScanConfig, ParquetExec}; use datafusion_execution::object_store::ObjectStoreUrl; use datafusion::datasource::listing::PartitionedFile; diff --git a/datafusion/core/tests/physical_optimizer/join_selection.rs b/datafusion/core/tests/physical_optimizer/join_selection.rs new file mode 100644 index 000000000000..ae7adacadb19 --- /dev/null +++ b/datafusion/core/tests/physical_optimizer/join_selection.rs @@ -0,0 +1,1494 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; +use std::{ + any::Any, + pin::Pin, + task::{Context, Poll}, +}; + +use arrow::datatypes::{DataType, Field, Schema}; +use arrow::record_batch::RecordBatch; +use arrow_schema::SchemaRef; +use datafusion_common::config::ConfigOptions; +use datafusion_common::JoinSide; +use datafusion_common::{stats::Precision, ColumnStatistics, JoinType, ScalarValue}; +use datafusion_common::{Result, Statistics}; +use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}; +use datafusion_expr::Operator; +use datafusion_physical_expr::expressions::col; +use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr}; +use datafusion_physical_expr::intervals::utils::check_support; +use datafusion_physical_expr::PhysicalExprRef; +use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; +use datafusion_physical_optimizer::join_selection::{ + hash_join_swap_subrule, JoinSelection, +}; +use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::displayable; +use datafusion_physical_plan::joins::utils::ColumnIndex; +use datafusion_physical_plan::joins::utils::JoinFilter; +use datafusion_physical_plan::joins::{HashJoinExec, NestedLoopJoinExec, PartitionMode}; +use datafusion_physical_plan::projection::ProjectionExec; +use datafusion_physical_plan::ExecutionPlanProperties; +use datafusion_physical_plan::{ + execution_plan::{Boundedness, EmissionType}, + DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, +}; + +use futures::Stream; +use rstest::rstest; + +/// Return statistics for empty table +fn empty_statistics() -> Statistics { + Statistics { + num_rows: Precision::Absent, + total_byte_size: Precision::Absent, + column_statistics: vec![ColumnStatistics::new_unknown()], + } +} + +/// Get table thresholds: (num_rows, byte_size) +fn get_thresholds() -> (usize, usize) { + let optimizer_options = ConfigOptions::new().optimizer; + ( + optimizer_options.hash_join_single_partition_threshold_rows, + optimizer_options.hash_join_single_partition_threshold, + ) +} + +/// Return statistics for small table +fn small_statistics() -> Statistics { + let (threshold_num_rows, threshold_byte_size) = get_thresholds(); + Statistics { + num_rows: Precision::Inexact(threshold_num_rows / 128), + total_byte_size: Precision::Inexact(threshold_byte_size / 128), + column_statistics: vec![ColumnStatistics::new_unknown()], + } +} + +/// Return statistics for big table +fn big_statistics() -> Statistics { + let (threshold_num_rows, threshold_byte_size) = get_thresholds(); + Statistics { + num_rows: Precision::Inexact(threshold_num_rows * 2), + total_byte_size: Precision::Inexact(threshold_byte_size * 2), + column_statistics: vec![ColumnStatistics::new_unknown()], + } +} + +/// Return statistics for big table +fn bigger_statistics() -> Statistics { + let (threshold_num_rows, threshold_byte_size) = get_thresholds(); + Statistics { + num_rows: Precision::Inexact(threshold_num_rows * 4), + total_byte_size: Precision::Inexact(threshold_byte_size * 4), + column_statistics: vec![ColumnStatistics::new_unknown()], + } +} + +fn create_big_and_small() -> (Arc, Arc) { + let big = Arc::new(StatisticsExec::new( + big_statistics(), + Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), + )); + + let small = Arc::new(StatisticsExec::new( + small_statistics(), + Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), + )); + (big, small) +} + +/// Create a column statistics vector for a single column +/// that has the given min/max/distinct_count properties. +/// +/// Given min/max will be mapped to a [`ScalarValue`] if +/// they are not `None`. +fn create_column_stats( + min: Option, + max: Option, + distinct_count: Option, +) -> Vec { + vec![ColumnStatistics { + distinct_count: distinct_count + .map(Precision::Inexact) + .unwrap_or(Precision::Absent), + min_value: min + .map(|size| Precision::Inexact(ScalarValue::UInt64(Some(size)))) + .unwrap_or(Precision::Absent), + max_value: max + .map(|size| Precision::Inexact(ScalarValue::UInt64(Some(size)))) + .unwrap_or(Precision::Absent), + ..Default::default() + }] +} + +/// Create join filter for NLJoinExec with expression `big_col > small_col` +/// where both columns are 0-indexed and come from left and right inputs respectively +fn nl_join_filter() -> Option { + let column_indices = vec![ + ColumnIndex { + index: 0, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ]; + let intermediate_schema = Schema::new(vec![ + Field::new("big_col", DataType::Int32, false), + Field::new("small_col", DataType::Int32, false), + ]); + let expression = Arc::new(BinaryExpr::new( + Arc::new(Column::new_with_schema("big_col", &intermediate_schema).unwrap()), + Operator::Gt, + Arc::new(Column::new_with_schema("small_col", &intermediate_schema).unwrap()), + )) as _; + Some(JoinFilter::new( + expression, + column_indices, + Arc::new(intermediate_schema), + )) +} + +/// Returns three plans with statistics of (min, max, distinct_count) +/// * big 100K rows @ (0, 50k, 50k) +/// * medium 10K rows @ (1k, 5k, 1k) +/// * small 1K rows @ (0, 100k, 1k) +fn create_nested_with_min_max() -> ( + Arc, + Arc, + Arc, +) { + let big = Arc::new(StatisticsExec::new( + Statistics { + num_rows: Precision::Inexact(100_000), + column_statistics: create_column_stats(Some(0), Some(50_000), Some(50_000)), + total_byte_size: Precision::Absent, + }, + Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), + )); + + let medium = Arc::new(StatisticsExec::new( + Statistics { + num_rows: Precision::Inexact(10_000), + column_statistics: create_column_stats(Some(1000), Some(5000), Some(1000)), + total_byte_size: Precision::Absent, + }, + Schema::new(vec![Field::new("medium_col", DataType::Int32, false)]), + )); + + let small = Arc::new(StatisticsExec::new( + Statistics { + num_rows: Precision::Inexact(1000), + column_statistics: create_column_stats(Some(0), Some(100_000), Some(1000)), + total_byte_size: Precision::Absent, + }, + Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), + )); + + (big, medium, small) +} + +#[tokio::test] +async fn test_join_with_swap() { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + HashJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + vec![( + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), + Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()), + )], + None, + &JoinType::Left, + None, + PartitionMode::CollectLeft, + false, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join, &ConfigOptions::new()) + .unwrap(); + + let swapping_projection = optimized_join + .as_any() + .downcast_ref::() + .expect("A proj is required to swap columns back to their original order"); + + assert_eq!(swapping_projection.expr().len(), 2); + let (col, name) = &swapping_projection.expr()[0]; + assert_eq!(name, "big_col"); + assert_col_expr(col, "big_col", 1); + let (col, name) = &swapping_projection.expr()[1]; + assert_eq!(name, "small_col"); + assert_col_expr(col, "small_col", 0); + + let swapped_join = swapping_projection + .input() + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); +} + +#[tokio::test] +async fn test_left_join_no_swap() { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + HashJoinExec::try_new( + Arc::clone(&small), + Arc::clone(&big), + vec![( + Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()), + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), + )], + None, + &JoinType::Left, + None, + PartitionMode::CollectLeft, + false, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join, &ConfigOptions::new()) + .unwrap(); + + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); +} + +#[tokio::test] +async fn test_join_with_swap_semi() { + let join_types = [JoinType::LeftSemi, JoinType::LeftAnti]; + for join_type in join_types { + let (big, small) = create_big_and_small(); + + let join = HashJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + vec![( + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), + Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()), + )], + None, + &join_type, + None, + PartitionMode::Partitioned, + false, + ) + .unwrap(); + + let original_schema = join.schema(); + + let optimized_join = JoinSelection::new() + .optimize(Arc::new(join), &ConfigOptions::new()) + .unwrap(); + + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect( + "A proj is not required to swap columns back to their original order", + ); + + assert_eq!(swapped_join.schema().fields().len(), 1); + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); + assert_eq!(original_schema, swapped_join.schema()); + } +} + +/// Compare the input plan with the plan after running the probe order optimizer. +macro_rules! assert_optimized { + ($EXPECTED_LINES: expr, $PLAN: expr) => { + let expected_lines = $EXPECTED_LINES.iter().map(|s| *s).collect::>(); + + let plan = Arc::new($PLAN); + let optimized = JoinSelection::new() + .optimize(plan.clone(), &ConfigOptions::new()) + .unwrap(); + + let plan_string = displayable(optimized.as_ref()).indent(true).to_string(); + let actual_lines = plan_string.split("\n").collect::>(); + + assert_eq!( + &expected_lines, &actual_lines, + "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", + expected_lines, actual_lines + ); + }; +} + +#[tokio::test] +async fn test_nested_join_swap() { + let (big, medium, small) = create_nested_with_min_max(); + + // Form the inner join: big JOIN small + let child_join = HashJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + vec![( + col("big_col", &big.schema()).unwrap(), + col("small_col", &small.schema()).unwrap(), + )], + None, + &JoinType::Inner, + None, + PartitionMode::CollectLeft, + false, + ) + .unwrap(); + let child_schema = child_join.schema(); + + // Form join tree `medium LEFT JOIN (big JOIN small)` + let join = HashJoinExec::try_new( + Arc::clone(&medium), + Arc::new(child_join), + vec![( + col("medium_col", &medium.schema()).unwrap(), + col("small_col", &child_schema).unwrap(), + )], + None, + &JoinType::Left, + None, + PartitionMode::CollectLeft, + false, + ) + .unwrap(); + + // Hash join uses the left side to build the hash table, and right side to probe it. We want + // to keep left as small as possible, so if we can estimate (with a reasonable margin of error) + // that the left side is smaller than the right side, we should swap the sides. + // + // The first hash join's left is 'small' table (with 1000 rows), and the second hash join's + // left is the F(small IJ big) which has an estimated cardinality of 2000 rows (vs medium which + // has an exact cardinality of 10_000 rows). + let expected = [ + "ProjectionExec: expr=[medium_col@2 as medium_col, big_col@0 as big_col, small_col@1 as small_col]", + " HashJoinExec: mode=CollectLeft, join_type=Right, on=[(small_col@1, medium_col@0)]", + " ProjectionExec: expr=[big_col@1 as big_col, small_col@0 as small_col]", + " HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(small_col@0, big_col@0)]", + " StatisticsExec: col_count=1, row_count=Inexact(1000)", + " StatisticsExec: col_count=1, row_count=Inexact(100000)", + " StatisticsExec: col_count=1, row_count=Inexact(10000)", + "", + ]; + assert_optimized!(expected, join); +} + +#[tokio::test] +async fn test_join_no_swap() { + let (big, small) = create_big_and_small(); + let join = Arc::new( + HashJoinExec::try_new( + Arc::clone(&small), + Arc::clone(&big), + vec![( + Arc::new(Column::new_with_schema("small_col", &small.schema()).unwrap()), + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), + )], + None, + &JoinType::Inner, + None, + PartitionMode::CollectLeft, + false, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join, &ConfigOptions::new()) + .unwrap(); + + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); +} + +#[rstest( + join_type, + case::inner(JoinType::Inner), + case::left(JoinType::Left), + case::right(JoinType::Right), + case::full(JoinType::Full) +)] +#[tokio::test] +async fn test_nl_join_with_swap(join_type: JoinType) { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + NestedLoopJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + nl_join_filter(), + &join_type, + None, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join, &ConfigOptions::new()) + .unwrap(); + + let swapping_projection = optimized_join + .as_any() + .downcast_ref::() + .expect("A proj is required to swap columns back to their original order"); + + assert_eq!(swapping_projection.expr().len(), 2); + let (col, name) = &swapping_projection.expr()[0]; + assert_eq!(name, "big_col"); + assert_col_expr(col, "big_col", 1); + let (col, name) = &swapping_projection.expr()[1]; + assert_eq!(name, "small_col"); + assert_col_expr(col, "small_col", 0); + + let swapped_join = swapping_projection + .input() + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + // Assert join side of big_col swapped in filter expression + let swapped_filter = swapped_join.filter().unwrap(); + let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); + let swapped_big_col_side = swapped_filter + .column_indices() + .get(swapped_big_col_idx) + .unwrap() + .side; + assert_eq!( + swapped_big_col_side, + JoinSide::Right, + "Filter column side should be swapped" + ); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); +} + +#[rstest( + join_type, + case::left_semi(JoinType::LeftSemi), + case::left_anti(JoinType::LeftAnti), + case::right_semi(JoinType::RightSemi), + case::right_anti(JoinType::RightAnti) +)] +#[tokio::test] +async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { + let (big, small) = create_big_and_small(); + + let join = Arc::new( + NestedLoopJoinExec::try_new( + Arc::clone(&big), + Arc::clone(&small), + nl_join_filter(), + &join_type, + None, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize( + Arc::::clone(&join), + &ConfigOptions::new(), + ) + .unwrap(); + + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + // Assert before/after schemas are equal + assert_eq!( + join.schema(), + swapped_join.schema(), + "Join schema should not be modified while optimization" + ); + + // Assert join side of big_col swapped in filter expression + let swapped_filter = swapped_join.filter().unwrap(); + let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); + let swapped_big_col_side = swapped_filter + .column_indices() + .get(swapped_big_col_idx) + .unwrap() + .side; + assert_eq!( + swapped_big_col_side, + JoinSide::Right, + "Filter column side should be swapped" + ); + + assert_eq!( + swapped_join.left().statistics().unwrap().total_byte_size, + Precision::Inexact(8192) + ); + assert_eq!( + swapped_join.right().statistics().unwrap().total_byte_size, + Precision::Inexact(2097152) + ); +} + +#[rstest( + join_type, projection, small_on_right, + case::inner(JoinType::Inner, vec![1], true), + case::left(JoinType::Left, vec![1], true), + case::right(JoinType::Right, vec![1], true), + case::full(JoinType::Full, vec![1], true), + case::left_anti(JoinType::LeftAnti, vec![0], false), + case::left_semi(JoinType::LeftSemi, vec![0], false), + case::right_anti(JoinType::RightAnti, vec![0], true), + case::right_semi(JoinType::RightSemi, vec![0], true), + )] +#[tokio::test] +async fn test_hash_join_swap_on_joins_with_projections( + join_type: JoinType, + projection: Vec, + small_on_right: bool, +) -> Result<()> { + let (big, small) = create_big_and_small(); + + let left = if small_on_right { &big } else { &small }; + let right = if small_on_right { &small } else { &big }; + + let left_on = if small_on_right { + "big_col" + } else { + "small_col" + }; + let right_on = if small_on_right { + "small_col" + } else { + "big_col" + }; + + let join = Arc::new(HashJoinExec::try_new( + Arc::clone(left), + Arc::clone(right), + vec![( + Arc::new(Column::new_with_schema(left_on, &left.schema())?), + Arc::new(Column::new_with_schema(right_on, &right.schema())?), + )], + None, + &join_type, + Some(projection), + PartitionMode::Partitioned, + false, + )?); + + let swapped = join + .swap_inputs(PartitionMode::Partitioned) + .expect("swap_hash_join must support joins with projections"); + let swapped_join = swapped.as_any().downcast_ref::().expect( + "ProjectionExec won't be added above if HashJoinExec contains embedded projection", + ); + + assert_eq!(swapped_join.projection, Some(vec![0_usize])); + assert_eq!(swapped.schema().fields.len(), 1); + assert_eq!(swapped.schema().fields[0].name(), "small_col"); + Ok(()) +} + +fn assert_col_expr(expr: &Arc, name: &str, index: usize) { + let col = expr + .as_any() + .downcast_ref::() + .expect("Projection items should be Column expression"); + assert_eq!(col.name(), name); + assert_eq!(col.index(), index); +} + +#[tokio::test] +async fn test_join_selection_collect_left() { + let big = Arc::new(StatisticsExec::new( + big_statistics(), + Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), + )); + + let small = Arc::new(StatisticsExec::new( + small_statistics(), + Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), + )); + + let empty = Arc::new(StatisticsExec::new( + empty_statistics(), + Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]), + )); + + let join_on = vec![( + col("small_col", &small.schema()).unwrap(), + col("big_col", &big.schema()).unwrap(), + )]; + check_join_partition_mode( + Arc::::clone(&small), + Arc::::clone(&big), + join_on, + false, + PartitionMode::CollectLeft, + ); + + let join_on = vec![( + col("big_col", &big.schema()).unwrap(), + col("small_col", &small.schema()).unwrap(), + )]; + check_join_partition_mode( + big, + Arc::::clone(&small), + join_on, + true, + PartitionMode::CollectLeft, + ); + + let join_on = vec![( + col("small_col", &small.schema()).unwrap(), + col("empty_col", &empty.schema()).unwrap(), + )]; + check_join_partition_mode( + Arc::::clone(&small), + Arc::::clone(&empty), + join_on, + false, + PartitionMode::CollectLeft, + ); + + let join_on = vec![( + col("empty_col", &empty.schema()).unwrap(), + col("small_col", &small.schema()).unwrap(), + )]; + check_join_partition_mode(empty, small, join_on, true, PartitionMode::CollectLeft); +} + +#[tokio::test] +async fn test_join_selection_partitioned() { + let bigger = Arc::new(StatisticsExec::new( + bigger_statistics(), + Schema::new(vec![Field::new("bigger_col", DataType::Int32, false)]), + )); + + let big = Arc::new(StatisticsExec::new( + big_statistics(), + Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), + )); + + let empty = Arc::new(StatisticsExec::new( + empty_statistics(), + Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]), + )); + + let join_on = vec![( + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, + Arc::new(Column::new_with_schema("bigger_col", &bigger.schema()).unwrap()) as _, + )]; + check_join_partition_mode( + Arc::::clone(&big), + Arc::::clone(&bigger), + join_on, + false, + PartitionMode::Partitioned, + ); + + let join_on = vec![( + Arc::new(Column::new_with_schema("bigger_col", &bigger.schema()).unwrap()) as _, + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, + )]; + check_join_partition_mode( + bigger, + Arc::::clone(&big), + join_on, + true, + PartitionMode::Partitioned, + ); + + let join_on = vec![( + Arc::new(Column::new_with_schema("empty_col", &empty.schema()).unwrap()) as _, + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, + )]; + check_join_partition_mode( + Arc::::clone(&empty), + Arc::::clone(&big), + join_on, + false, + PartitionMode::Partitioned, + ); + + let join_on = vec![( + Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, + Arc::new(Column::new_with_schema("empty_col", &empty.schema()).unwrap()) as _, + )]; + check_join_partition_mode(big, empty, join_on, false, PartitionMode::Partitioned); +} + +fn check_join_partition_mode( + left: Arc, + right: Arc, + on: Vec<(PhysicalExprRef, PhysicalExprRef)>, + is_swapped: bool, + expected_mode: PartitionMode, +) { + let join = Arc::new( + HashJoinExec::try_new( + left, + right, + on, + None, + &JoinType::Inner, + None, + PartitionMode::Auto, + false, + ) + .unwrap(), + ); + + let optimized_join = JoinSelection::new() + .optimize(join, &ConfigOptions::new()) + .unwrap(); + + if !is_swapped { + let swapped_join = optimized_join + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + assert_eq!(*swapped_join.partition_mode(), expected_mode); + } else { + let swapping_projection = optimized_join + .as_any() + .downcast_ref::() + .expect("A proj is required to swap columns back to their original order"); + let swapped_join = swapping_projection + .input() + .as_any() + .downcast_ref::() + .expect("The type of the plan should not be changed"); + + assert_eq!(*swapped_join.partition_mode(), expected_mode); + } +} + +#[derive(Debug)] +struct UnboundedStream { + batch_produce: Option, + count: usize, + batch: RecordBatch, +} + +impl Stream for UnboundedStream { + type Item = Result; + + fn poll_next( + mut self: Pin<&mut Self>, + _cx: &mut Context<'_>, + ) -> Poll> { + if let Some(val) = self.batch_produce { + if val <= self.count { + return Poll::Ready(None); + } + } + self.count += 1; + Poll::Ready(Some(Ok(self.batch.clone()))) + } +} + +impl RecordBatchStream for UnboundedStream { + fn schema(&self) -> SchemaRef { + self.batch.schema() + } +} + +/// A mock execution plan that simply returns the provided data source characteristic +#[derive(Debug, Clone)] +pub struct UnboundedExec { + batch_produce: Option, + batch: RecordBatch, + cache: PlanProperties, +} + +impl UnboundedExec { + /// Create new exec that clones the given record batch to its output. + /// + /// Set `batch_produce` to `Some(n)` to emit exactly `n` batches per partition. + pub fn new( + batch_produce: Option, + batch: RecordBatch, + partitions: usize, + ) -> Self { + let cache = Self::compute_properties(batch.schema(), batch_produce, partitions); + Self { + batch_produce, + batch, + cache, + } + } + + /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. + fn compute_properties( + schema: SchemaRef, + batch_produce: Option, + n_partitions: usize, + ) -> PlanProperties { + let boundedness = if batch_produce.is_none() { + Boundedness::Unbounded { + requires_infinite_memory: false, + } + } else { + Boundedness::Bounded + }; + PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(n_partitions), + EmissionType::Incremental, + boundedness, + ) + } +} + +impl DisplayAs for UnboundedExec { + fn fmt_as( + &self, + t: DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!( + f, + "UnboundedExec: unbounded={}", + self.batch_produce.is_none(), + ) + } + } + } +} + +impl ExecutionPlan for UnboundedExec { + fn name(&self) -> &'static str { + Self::static_name() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.cache + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + Ok(Box::pin(UnboundedStream { + batch_produce: self.batch_produce, + count: 0, + batch: self.batch.clone(), + })) + } +} + +#[derive(Eq, PartialEq, Debug)] +pub enum SourceType { + Unbounded, + Bounded, +} + +/// A mock execution plan that simply returns the provided statistics +#[derive(Debug, Clone)] +pub struct StatisticsExec { + stats: Statistics, + schema: Arc, + cache: PlanProperties, +} + +impl StatisticsExec { + pub fn new(stats: Statistics, schema: Schema) -> Self { + assert_eq!( + stats.column_statistics.len(), schema.fields().len(), + "if defined, the column statistics vector length should be the number of fields" + ); + let cache = Self::compute_properties(Arc::new(schema.clone())); + Self { + stats, + schema: Arc::new(schema), + cache, + } + } + + /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. + fn compute_properties(schema: SchemaRef) -> PlanProperties { + PlanProperties::new( + EquivalenceProperties::new(schema), + Partitioning::UnknownPartitioning(2), + EmissionType::Incremental, + Boundedness::Bounded, + ) + } +} + +impl DisplayAs for StatisticsExec { + fn fmt_as( + &self, + t: DisplayFormatType, + f: &mut std::fmt::Formatter, + ) -> std::fmt::Result { + match t { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + write!( + f, + "StatisticsExec: col_count={}, row_count={:?}", + self.schema.fields().len(), + self.stats.num_rows, + ) + } + } + } +} + +impl ExecutionPlan for StatisticsExec { + fn name(&self) -> &'static str { + Self::static_name() + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn properties(&self) -> &PlanProperties { + &self.cache + } + + fn children(&self) -> Vec<&Arc> { + vec![] + } + + fn with_new_children( + self: Arc, + _: Vec>, + ) -> Result> { + Ok(self) + } + + fn execute( + &self, + _partition: usize, + _context: Arc, + ) -> Result { + unimplemented!("This plan only serves for testing statistics") + } + + fn statistics(&self) -> Result { + Ok(self.stats.clone()) + } +} + +#[test] +fn check_expr_supported() { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Utf8, false), + ])); + let supported_expr = Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Plus, + Arc::new(Column::new("a", 0)), + )) as Arc; + assert!(check_support(&supported_expr, &schema)); + let supported_expr_2 = Arc::new(Column::new("a", 0)) as Arc; + assert!(check_support(&supported_expr_2, &schema)); + let unsupported_expr = Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Or, + Arc::new(Column::new("a", 0)), + )) as Arc; + assert!(!check_support(&unsupported_expr, &schema)); + let unsupported_expr_2 = Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Or, + Arc::new(NegativeExpr::new(Arc::new(Column::new("a", 0)))), + )) as Arc; + assert!(!check_support(&unsupported_expr_2, &schema)); +} + +struct TestCase { + case: String, + initial_sources_unbounded: (SourceType, SourceType), + initial_join_type: JoinType, + initial_mode: PartitionMode, + expected_sources_unbounded: (SourceType, SourceType), + expected_join_type: JoinType, + expected_mode: PartitionMode, + expecting_swap: bool, +} + +#[tokio::test] +async fn test_join_with_swap_full() -> Result<()> { + // NOTE: Currently, some initial conditions are not viable after join order selection. + // For example, full join always comes in partitioned mode. See the warning in + // function "swap". If this changes in the future, we should update these tests. + let cases = vec![ + TestCase { + case: "Bounded - Unbounded 1".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: JoinType::Full, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: JoinType::Full, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }, + TestCase { + case: "Unbounded - Bounded 2".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: JoinType::Full, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + expected_join_type: JoinType::Full, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }, + TestCase { + case: "Bounded - Bounded 3".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: JoinType::Full, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: JoinType::Full, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }, + TestCase { + case: "Unbounded - Unbounded 4".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: JoinType::Full, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: JoinType::Full, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }, + ]; + for case in cases.into_iter() { + test_join_with_maybe_swap_unbounded_case(case).await? + } + Ok(()) +} + +#[tokio::test] +async fn test_cases_without_collect_left_check() -> Result<()> { + let mut cases = vec![]; + let join_types = vec![JoinType::LeftSemi, JoinType::Inner]; + for join_type in join_types { + cases.push(TestCase { + case: "Unbounded - Bounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type.swap(), + expected_mode: PartitionMode::CollectLeft, + expecting_swap: true, + }); + cases.push(TestCase { + case: "Bounded - Unbounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Unbounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Bounded - Bounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Bounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type.swap(), + expected_mode: PartitionMode::Partitioned, + expecting_swap: true, + }); + cases.push(TestCase { + case: "Bounded - Unbounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Bounded - Bounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Unbounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + } + + for case in cases.into_iter() { + test_join_with_maybe_swap_unbounded_case(case).await? + } + Ok(()) +} + +#[tokio::test] +async fn test_not_support_collect_left() -> Result<()> { + let mut cases = vec![]; + // After [JoinSelection] optimization, these join types cannot run in CollectLeft mode except + // [JoinType::LeftSemi] + let the_ones_not_support_collect_left = vec![JoinType::Left, JoinType::LeftAnti]; + for join_type in the_ones_not_support_collect_left { + cases.push(TestCase { + case: "Unbounded - Bounded".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type.swap(), + expected_mode: PartitionMode::Partitioned, + expecting_swap: true, + }); + cases.push(TestCase { + case: "Bounded - Unbounded".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Bounded - Bounded".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Unbounded".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + } + + for case in cases.into_iter() { + test_join_with_maybe_swap_unbounded_case(case).await? + } + Ok(()) +} + +#[tokio::test] +async fn test_not_supporting_swaps_possible_collect_left() -> Result<()> { + let mut cases = vec![]; + let the_ones_not_support_collect_left = + vec![JoinType::Right, JoinType::RightAnti, JoinType::RightSemi]; + for join_type in the_ones_not_support_collect_left { + // We expect that (SourceType::Unbounded, SourceType::Bounded) will change, regardless of the + // statistics. + cases.push(TestCase { + case: "Unbounded - Bounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + // We expect that (SourceType::Bounded, SourceType::Unbounded) will stay same, regardless of the + // statistics. + cases.push(TestCase { + case: "Bounded - Unbounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Unbounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + // + cases.push(TestCase { + case: "Bounded - Bounded / CollectLeft".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::CollectLeft, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::CollectLeft, + expecting_swap: false, + }); + // If cases are partitioned, only unbounded & bounded check will affect the order. + cases.push(TestCase { + case: "Unbounded - Bounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Bounded - Unbounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Bounded - Bounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + cases.push(TestCase { + case: "Unbounded - Unbounded / Partitioned".to_string(), + initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + initial_join_type: join_type, + initial_mode: PartitionMode::Partitioned, + expected_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), + expected_join_type: join_type, + expected_mode: PartitionMode::Partitioned, + expecting_swap: false, + }); + } + + for case in cases.into_iter() { + test_join_with_maybe_swap_unbounded_case(case).await? + } + Ok(()) +} + +async fn test_join_with_maybe_swap_unbounded_case(t: TestCase) -> Result<()> { + let left_unbounded = t.initial_sources_unbounded.0 == SourceType::Unbounded; + let right_unbounded = t.initial_sources_unbounded.1 == SourceType::Unbounded; + let left_exec = Arc::new(UnboundedExec::new( + (!left_unbounded).then_some(1), + RecordBatch::new_empty(Arc::new(Schema::new(vec![Field::new( + "a", + DataType::Int32, + false, + )]))), + 2, + )) as _; + let right_exec = Arc::new(UnboundedExec::new( + (!right_unbounded).then_some(1), + RecordBatch::new_empty(Arc::new(Schema::new(vec![Field::new( + "b", + DataType::Int32, + false, + )]))), + 2, + )) as _; + + let join = Arc::new(HashJoinExec::try_new( + Arc::clone(&left_exec), + Arc::clone(&right_exec), + vec![( + col("a", &left_exec.schema())?, + col("b", &right_exec.schema())?, + )], + None, + &t.initial_join_type, + None, + t.initial_mode, + false, + )?) as _; + + let optimized_join_plan = hash_join_swap_subrule(join, &ConfigOptions::new())?; + + // If swap did happen + let projection_added = optimized_join_plan.as_any().is::(); + let plan = if projection_added { + let proj = optimized_join_plan + .as_any() + .downcast_ref::() + .expect("A proj is required to swap columns back to their original order"); + Arc::::clone(proj.input()) + } else { + optimized_join_plan + }; + + if let Some(HashJoinExec { + left, + right, + join_type, + mode, + .. + }) = plan.as_any().downcast_ref::() + { + let left_changed = Arc::ptr_eq(left, &right_exec); + let right_changed = Arc::ptr_eq(right, &left_exec); + // If this is not equal, we have a bigger problem. + assert_eq!(left_changed, right_changed); + assert_eq!( + ( + t.case.as_str(), + if left.boundedness().is_unbounded() { + SourceType::Unbounded + } else { + SourceType::Bounded + }, + if right.boundedness().is_unbounded() { + SourceType::Unbounded + } else { + SourceType::Bounded + }, + join_type, + mode, + left_changed && right_changed + ), + ( + t.case.as_str(), + t.expected_sources_unbounded.0, + t.expected_sources_unbounded.1, + &t.expected_join_type, + &t.expected_mode, + t.expecting_swap + ) + ); + }; + Ok(()) +} diff --git a/datafusion/core/tests/physical_optimizer/limit_pushdown.rs b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs new file mode 100644 index 000000000000..49490b2a3d48 --- /dev/null +++ b/datafusion/core/tests/physical_optimizer/limit_pushdown.rs @@ -0,0 +1,490 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::compute::SortOptions; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion_common::config::ConfigOptions; +use datafusion_common::error::Result; +use datafusion_execution::{SendableRecordBatchStream, TaskContext}; +use datafusion_expr::Operator; +use datafusion_physical_expr::expressions::BinaryExpr; +use datafusion_physical_expr::expressions::{col, lit}; +use datafusion_physical_expr::{Partitioning, PhysicalSortExpr}; +use datafusion_physical_optimizer::limit_pushdown::LimitPushdown; +use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; +use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion_physical_plan::empty::EmptyExec; +use datafusion_physical_plan::filter::FilterExec; +use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; +use datafusion_physical_plan::projection::ProjectionExec; +use datafusion_physical_plan::repartition::RepartitionExec; +use datafusion_physical_plan::sorts::sort::SortExec; +use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; +use datafusion_physical_plan::{get_plan_string, ExecutionPlan, ExecutionPlanProperties}; + +fn create_schema() -> SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Int32, true), + Field::new("c2", DataType::Int32, true), + Field::new("c3", DataType::Int32, true), + ])) +} + +fn streaming_table_exec(schema: SchemaRef) -> Result> { + Ok(Arc::new(StreamingTableExec::try_new( + Arc::clone(&schema), + vec![Arc::new(DummyStreamPartition { schema }) as _], + None, + None, + true, + None, + )?)) +} + +fn global_limit_exec( + input: Arc, + skip: usize, + fetch: Option, +) -> Arc { + Arc::new(GlobalLimitExec::new(input, skip, fetch)) +} + +fn local_limit_exec( + input: Arc, + fetch: usize, +) -> Arc { + Arc::new(LocalLimitExec::new(input, fetch)) +} + +fn sort_exec( + sort_exprs: impl IntoIterator, + input: Arc, +) -> Arc { + let sort_exprs = sort_exprs.into_iter().collect(); + Arc::new(SortExec::new(sort_exprs, input)) +} + +fn sort_preserving_merge_exec( + sort_exprs: impl IntoIterator, + input: Arc, +) -> Arc { + let sort_exprs = sort_exprs.into_iter().collect(); + Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) +} + +fn projection_exec( + schema: SchemaRef, + input: Arc, +) -> Result> { + Ok(Arc::new(ProjectionExec::try_new( + vec![ + (col("c1", schema.as_ref()).unwrap(), "c1".to_string()), + (col("c2", schema.as_ref()).unwrap(), "c2".to_string()), + (col("c3", schema.as_ref()).unwrap(), "c3".to_string()), + ], + input, + )?)) +} + +fn filter_exec( + schema: SchemaRef, + input: Arc, +) -> Result> { + Ok(Arc::new(FilterExec::try_new( + Arc::new(BinaryExpr::new( + col("c3", schema.as_ref()).unwrap(), + Operator::Gt, + lit(0), + )), + input, + )?)) +} + +fn coalesce_batches_exec(input: Arc) -> Arc { + Arc::new(CoalesceBatchesExec::new(input, 8192)) +} + +fn coalesce_partitions_exec( + local_limit: Arc, +) -> Arc { + Arc::new(CoalescePartitionsExec::new(local_limit)) +} + +fn repartition_exec( + streaming_table: Arc, +) -> Result> { + Ok(Arc::new(RepartitionExec::try_new( + streaming_table, + Partitioning::RoundRobinBatch(8), + )?)) +} + +fn empty_exec(schema: SchemaRef) -> Arc { + Arc::new(EmptyExec::new(schema)) +} + +#[derive(Debug)] +struct DummyStreamPartition { + schema: SchemaRef, +} +impl PartitionStream for DummyStreamPartition { + fn schema(&self) -> &SchemaRef { + &self.schema + } + fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { + unreachable!() + } +} + +#[test] +fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero() -> Result<()> +{ + let schema = create_schema(); + let streaming_table = streaming_table_exec(schema)?; + let global_limit = global_limit_exec(streaming_table, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( +) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(schema)?; + let global_limit = global_limit_exec(streaming_table, 2, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=2, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit( +) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let repartition = repartition_exec(streaming_table)?; + let filter = filter_exec(schema, repartition)?; + let coalesce_batches = coalesce_batches_exec(filter); + let local_limit = local_limit_exec(coalesce_batches, 5); + let coalesce_partitions = coalesce_partitions_exec(local_limit); + let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " LocalLimitExec: fetch=5", + " CoalesceBatchesExec: target_batch_size=8192", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " CoalesceBatchesExec: target_batch_size=8192, fetch=5", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn pushes_global_limit_exec_through_projection_exec() -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let filter = filter_exec(Arc::clone(&schema), streaming_table)?; + let projection = projection_exec(schema, filter)?; + let global_limit = global_limit_exec(projection, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " FilterExec: c3@2 > 0", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " GlobalLimitExec: skip=0, fetch=5", + " FilterExec: c3@2 > 0", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version( +) -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); + let coalesce_batches = coalesce_batches_exec(streaming_table); + let projection = projection_exec(schema, coalesce_batches)?; + let global_limit = global_limit_exec(projection, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192, fetch=5", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); + let coalesce_batches = coalesce_batches_exec(streaming_table); + let projection = projection_exec(Arc::clone(&schema), coalesce_batches)?; + let repartition = repartition_exec(projection)?; + let sort = sort_exec( + vec![PhysicalSortExpr { + expr: col("c1", &schema)?, + options: SortOptions::default(), + }], + repartition, + ); + let spm = sort_preserving_merge_exec(sort.output_ordering().unwrap().to_vec(), sort); + let global_limit = global_limit_exec(spm, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " SortPreservingMergeExec: [c1@0 ASC]", + " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "SortPreservingMergeExec: [c1@0 ASC], fetch=5", + " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", + " CoalesceBatchesExec: target_batch_size=8192", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions() -> Result<()> +{ + let schema = create_schema(); + let streaming_table = streaming_table_exec(Arc::clone(&schema))?; + let repartition = repartition_exec(streaming_table)?; + let filter = filter_exec(schema, repartition)?; + let coalesce_partitions = coalesce_partitions_exec(filter); + let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = [ + "GlobalLimitExec: skip=0, fetch=5", + " CoalescePartitionsExec", + " FilterExec: c3@2 > 0", + " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", + " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn merges_local_limit_with_local_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let child_local_limit = local_limit_exec(empty_exec, 10); + let parent_local_limit = local_limit_exec(child_local_limit, 20); + + let initial = get_plan_string(&parent_local_limit); + let expected_initial = [ + "LocalLimitExec: fetch=20", + " LocalLimitExec: fetch=10", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=0, fetch=10", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn merges_global_limit_with_global_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let child_global_limit = global_limit_exec(empty_exec, 10, Some(30)); + let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20)); + + let initial = get_plan_string(&parent_global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=10, fetch=20", + " GlobalLimitExec: skip=10, fetch=30", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn merges_global_limit_with_local_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let local_limit = local_limit_exec(empty_exec, 40); + let global_limit = global_limit_exec(local_limit, 20, Some(30)); + + let initial = get_plan_string(&global_limit); + let expected_initial = [ + "GlobalLimitExec: skip=20, fetch=30", + " LocalLimitExec: fetch=40", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn merges_local_limit_with_global_limit() -> Result<()> { + let schema = create_schema(); + let empty_exec = empty_exec(schema); + let global_limit = global_limit_exec(empty_exec, 20, Some(30)); + let local_limit = local_limit_exec(global_limit, 20); + + let initial = get_plan_string(&local_limit); + let expected_initial = [ + "LocalLimitExec: fetch=20", + " GlobalLimitExec: skip=20, fetch=30", + " EmptyExec", + ]; + + assert_eq!(initial, expected_initial); + + let after_optimize = + LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?; + + let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} diff --git a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs index 7c04d1239bc8..565cee47e3b9 100644 --- a/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs +++ b/datafusion/core/tests/physical_optimizer/limited_distinct_aggregation.rs @@ -19,7 +19,10 @@ use std::sync::Arc; -use crate::physical_optimizer::parquet_exec_with_sort; +use crate::physical_optimizer::test_utils::{ + assert_plan_matches_expected, build_group_by, mock_data, parquet_exec_with_sort, + schema, TestAggregate, +}; use arrow::{compute::SortOptions, util::pretty::pretty_format_batches}; use arrow_schema::DataType; @@ -30,9 +33,6 @@ use datafusion_expr::Operator; use datafusion_physical_expr::expressions::cast; use datafusion_physical_expr::{expressions, expressions::col, PhysicalSortExpr}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_optimizer::test_utils::{ - assert_plan_matches_expected, build_group_by, mock_data, schema, TestAggregate, -}; use datafusion_physical_plan::{ aggregates::{AggregateExec, AggregateMode}, collect, diff --git a/datafusion/core/tests/physical_optimizer/mod.rs b/datafusion/core/tests/physical_optimizer/mod.rs index 19da0ef7855e..7d5d07715eeb 100644 --- a/datafusion/core/tests/physical_optimizer/mod.rs +++ b/datafusion/core/tests/physical_optimizer/mod.rs @@ -21,36 +21,10 @@ mod aggregate_statistics; mod combine_partial_final_agg; mod enforce_distribution; mod enforce_sorting; +mod join_selection; +mod limit_pushdown; mod limited_distinct_aggregation; +mod projection_pushdown; mod replace_with_order_preserving_variants; mod sanity_checker; - -use std::sync::Arc; - -use arrow_schema::SchemaRef; -use datafusion::datasource::listing::PartitionedFile; -use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec}; -use datafusion_execution::object_store::ObjectStoreUrl; -use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_physical_optimizer::test_utils::schema; - -/// Create a non sorted parquet exec -pub fn parquet_exec(schema: &SchemaRef) -> Arc { - ParquetExec::builder( - FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema.clone()) - .with_file(PartitionedFile::new("x".to_string(), 100)), - ) - .build_arc() -} - -/// Create a single parquet file that is sorted -pub(crate) fn parquet_exec_with_sort( - output_ordering: Vec, -) -> Arc { - ParquetExec::builder( - FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) - .with_file(PartitionedFile::new("x".to_string(), 100)) - .with_output_ordering(output_ordering), - ) - .build_arc() -} +mod test_utils; diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs new file mode 100644 index 000000000000..fc576e929591 --- /dev/null +++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs @@ -0,0 +1,1403 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::Arc; + +use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions}; +use datafusion::datasource::file_format::file_compression_type::FileCompressionType; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::{CsvExec, FileScanConfig}; +use datafusion_common::config::ConfigOptions; +use datafusion_common::Result; +use datafusion_common::{JoinSide, JoinType, ScalarValue}; +use datafusion_execution::object_store::ObjectStoreUrl; +use datafusion_execution::{SendableRecordBatchStream, TaskContext}; +use datafusion_expr::{ + ColumnarValue, Operator, ScalarUDF, ScalarUDFImpl, Signature, Volatility, +}; +use datafusion_physical_expr::expressions::{ + binary, col, BinaryExpr, CaseExpr, CastExpr, Column, Literal, NegativeExpr, +}; +use datafusion_physical_expr::ScalarFunctionExpr; +use datafusion_physical_expr::{ + Distribution, Partitioning, PhysicalExpr, PhysicalSortExpr, PhysicalSortRequirement, +}; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; +use datafusion_physical_optimizer::output_requirements::OutputRequirementExec; +use datafusion_physical_optimizer::projection_pushdown::ProjectionPushdown; +use datafusion_physical_optimizer::PhysicalOptimizerRule; +use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; +use datafusion_physical_plan::filter::FilterExec; +use datafusion_physical_plan::joins::utils::{ColumnIndex, JoinFilter}; +use datafusion_physical_plan::joins::{ + HashJoinExec, NestedLoopJoinExec, PartitionMode, StreamJoinPartitionMode, + SymmetricHashJoinExec, +}; +use datafusion_physical_plan::memory::MemoryExec; +use datafusion_physical_plan::projection::{update_expr, ProjectionExec}; +use datafusion_physical_plan::repartition::RepartitionExec; +use datafusion_physical_plan::sorts::sort::SortExec; +use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion_physical_plan::streaming::PartitionStream; +use datafusion_physical_plan::streaming::StreamingTableExec; +use datafusion_physical_plan::union::UnionExec; +use datafusion_physical_plan::{get_plan_string, ExecutionPlan}; + +use itertools::Itertools; + +/// Mocked UDF +#[derive(Debug)] +struct DummyUDF { + signature: Signature, +} + +impl DummyUDF { + fn new() -> Self { + Self { + signature: Signature::variadic_any(Volatility::Immutable), + } + } +} + +impl ScalarUDFImpl for DummyUDF { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "dummy_udf" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(DataType::Int32) + } + + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + unimplemented!("DummyUDF::invoke") + } +} + +#[test] +fn test_update_matching_exprs() -> Result<()> { + let exprs: Vec> = vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 3)), + Operator::Divide, + Arc::new(Column::new("e", 5)), + )), + Arc::new(CastExpr::new( + Arc::new(Column::new("a", 3)), + DataType::Float32, + None, + )), + Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 4)))), + Arc::new(ScalarFunctionExpr::new( + "scalar_expr", + Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), + vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 1)), + Operator::Divide, + Arc::new(Column::new("c", 0)), + )), + Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 0)), + Operator::Divide, + Arc::new(Column::new("b", 1)), + )), + ], + DataType::Int32, + )), + Arc::new(CaseExpr::try_new( + Some(Arc::new(Column::new("d", 2))), + vec![ + ( + Arc::new(Column::new("a", 3)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("d", 2)), + Operator::Plus, + Arc::new(Column::new("e", 5)), + )) as Arc, + ), + ( + Arc::new(Column::new("a", 3)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("e", 5)), + Operator::Plus, + Arc::new(Column::new("d", 2)), + )) as Arc, + ), + ], + Some(Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 3)), + Operator::Modulo, + Arc::new(Column::new("e", 5)), + ))), + )?), + ]; + let child: Vec<(Arc, String)> = vec![ + (Arc::new(Column::new("c", 2)), "c".to_owned()), + (Arc::new(Column::new("b", 1)), "b".to_owned()), + (Arc::new(Column::new("d", 3)), "d".to_owned()), + (Arc::new(Column::new("a", 0)), "a".to_owned()), + (Arc::new(Column::new("f", 5)), "f".to_owned()), + (Arc::new(Column::new("e", 4)), "e".to_owned()), + ]; + + let expected_exprs: Vec> = vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Divide, + Arc::new(Column::new("e", 4)), + )), + Arc::new(CastExpr::new( + Arc::new(Column::new("a", 0)), + DataType::Float32, + None, + )), + Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 5)))), + Arc::new(ScalarFunctionExpr::new( + "scalar_expr", + Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), + vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 1)), + Operator::Divide, + Arc::new(Column::new("c", 2)), + )), + Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 2)), + Operator::Divide, + Arc::new(Column::new("b", 1)), + )), + ], + DataType::Int32, + )), + Arc::new(CaseExpr::try_new( + Some(Arc::new(Column::new("d", 3))), + vec![ + ( + Arc::new(Column::new("a", 0)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("d", 3)), + Operator::Plus, + Arc::new(Column::new("e", 4)), + )) as Arc, + ), + ( + Arc::new(Column::new("a", 0)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("e", 4)), + Operator::Plus, + Arc::new(Column::new("d", 3)), + )) as Arc, + ), + ], + Some(Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Modulo, + Arc::new(Column::new("e", 4)), + ))), + )?), + ]; + + for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { + assert!(update_expr(&expr, &child, true)? + .unwrap() + .eq(&expected_expr)); + } + + Ok(()) +} + +#[test] +fn test_update_projected_exprs() -> Result<()> { + let exprs: Vec> = vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 3)), + Operator::Divide, + Arc::new(Column::new("e", 5)), + )), + Arc::new(CastExpr::new( + Arc::new(Column::new("a", 3)), + DataType::Float32, + None, + )), + Arc::new(NegativeExpr::new(Arc::new(Column::new("f", 4)))), + Arc::new(ScalarFunctionExpr::new( + "scalar_expr", + Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), + vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 1)), + Operator::Divide, + Arc::new(Column::new("c", 0)), + )), + Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 0)), + Operator::Divide, + Arc::new(Column::new("b", 1)), + )), + ], + DataType::Int32, + )), + Arc::new(CaseExpr::try_new( + Some(Arc::new(Column::new("d", 2))), + vec![ + ( + Arc::new(Column::new("a", 3)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("d", 2)), + Operator::Plus, + Arc::new(Column::new("e", 5)), + )) as Arc, + ), + ( + Arc::new(Column::new("a", 3)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("e", 5)), + Operator::Plus, + Arc::new(Column::new("d", 2)), + )) as Arc, + ), + ], + Some(Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 3)), + Operator::Modulo, + Arc::new(Column::new("e", 5)), + ))), + )?), + ]; + let projected_exprs: Vec<(Arc, String)> = vec![ + (Arc::new(Column::new("a", 3)), "a".to_owned()), + (Arc::new(Column::new("b", 1)), "b_new".to_owned()), + (Arc::new(Column::new("c", 0)), "c".to_owned()), + (Arc::new(Column::new("d", 2)), "d_new".to_owned()), + (Arc::new(Column::new("e", 5)), "e".to_owned()), + (Arc::new(Column::new("f", 4)), "f_new".to_owned()), + ]; + + let expected_exprs: Vec> = vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Divide, + Arc::new(Column::new("e", 4)), + )), + Arc::new(CastExpr::new( + Arc::new(Column::new("a", 0)), + DataType::Float32, + None, + )), + Arc::new(NegativeExpr::new(Arc::new(Column::new("f_new", 5)))), + Arc::new(ScalarFunctionExpr::new( + "scalar_expr", + Arc::new(ScalarUDF::new_from_impl(DummyUDF::new())), + vec![ + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b_new", 1)), + Operator::Divide, + Arc::new(Column::new("c", 2)), + )), + Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 2)), + Operator::Divide, + Arc::new(Column::new("b_new", 1)), + )), + ], + DataType::Int32, + )), + Arc::new(CaseExpr::try_new( + Some(Arc::new(Column::new("d_new", 3))), + vec![ + ( + Arc::new(Column::new("a", 0)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("d_new", 3)), + Operator::Plus, + Arc::new(Column::new("e", 4)), + )) as Arc, + ), + ( + Arc::new(Column::new("a", 0)) as Arc, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("e", 4)), + Operator::Plus, + Arc::new(Column::new("d_new", 3)), + )) as Arc, + ), + ], + Some(Arc::new(BinaryExpr::new( + Arc::new(Column::new("a", 0)), + Operator::Modulo, + Arc::new(Column::new("e", 4)), + ))), + )?), + ]; + + for (expr, expected_expr) in exprs.into_iter().zip(expected_exprs.into_iter()) { + assert!(update_expr(&expr, &projected_exprs, false)? + .unwrap() + .eq(&expected_expr)); + } + + Ok(()) +} + +fn create_simple_csv_exec() -> Arc { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + ])); + Arc::new( + CsvExec::builder( + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection(Some(vec![0, 1, 2, 3, 4])), + ) + .with_has_header(false) + .with_delimeter(0) + .with_quote(0) + .with_escape(None) + .with_comment(None) + .with_newlines_in_values(false) + .with_file_compression_type(FileCompressionType::UNCOMPRESSED) + .build(), + ) +} + +fn create_projecting_csv_exec() -> Arc { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + ])); + Arc::new( + CsvExec::builder( + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_projection(Some(vec![3, 2, 1])), + ) + .with_has_header(false) + .with_delimeter(0) + .with_quote(0) + .with_escape(None) + .with_comment(None) + .with_newlines_in_values(false) + .with_file_compression_type(FileCompressionType::UNCOMPRESSED) + .build(), + ) +} + +fn create_projecting_memory_exec() -> Arc { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + ])); + + Arc::new(MemoryExec::try_new(&[], schema, Some(vec![2, 0, 3, 4])).unwrap()) +} + +#[test] +fn test_csv_after_projection() -> Result<()> { + let csv = create_projecting_csv_exec(); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("b", 2)), "b".to_string()), + (Arc::new(Column::new("d", 0)), "d".to_string()), + ], + csv.clone(), + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[b@2 as b, d@0 as d]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[d, c, b], has_header=false", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = + ["CsvExec: file_groups={1 group: [[x]]}, projection=[b, d], has_header=false"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_memory_after_projection() -> Result<()> { + let memory = create_projecting_memory_exec(); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("d", 2)), "d".to_string()), + (Arc::new(Column::new("e", 3)), "e".to_string()), + (Arc::new(Column::new("a", 1)), "a".to_string()), + ], + memory.clone(), + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[d@2 as d, e@3 as e, a@1 as a]", + " MemoryExec: partitions=0, partition_sizes=[]", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = ["MemoryExec: partitions=0, partition_sizes=[]"]; + assert_eq!(get_plan_string(&after_optimize), expected); + assert_eq!( + after_optimize + .clone() + .as_any() + .downcast_ref::() + .unwrap() + .projection() + .clone() + .unwrap(), + vec![3, 4, 0] + ); + + Ok(()) +} + +#[test] +fn test_streaming_table_after_projection() -> Result<()> { + #[derive(Debug)] + struct DummyStreamPartition { + schema: SchemaRef, + } + impl PartitionStream for DummyStreamPartition { + fn schema(&self) -> &SchemaRef { + &self.schema + } + fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { + unreachable!() + } + } + + let streaming_table = StreamingTableExec::try_new( + Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + ])), + vec![Arc::new(DummyStreamPartition { + schema: Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + ])), + }) as _], + Some(&vec![0_usize, 2, 4, 3]), + vec![ + LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("e", 2)), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("a", 0)), + options: SortOptions::default(), + }, + ]), + LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::new(Column::new("d", 3)), + options: SortOptions::default(), + }]), + ] + .into_iter(), + true, + None, + )?; + let projection = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("d", 3)), "d".to_string()), + (Arc::new(Column::new("e", 2)), "e".to_string()), + (Arc::new(Column::new("a", 0)), "a".to_string()), + ], + Arc::new(streaming_table) as _, + )?) as _; + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let result = after_optimize + .as_any() + .downcast_ref::() + .unwrap(); + assert_eq!( + result.partition_schema(), + &Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + ])) + ); + assert_eq!( + result.projection().clone().unwrap().to_vec(), + vec![3_usize, 4, 0] + ); + assert_eq!( + result.projected_schema(), + &Schema::new(vec![ + Field::new("d", DataType::Int32, true), + Field::new("e", DataType::Int32, true), + Field::new("a", DataType::Int32, true), + ]) + ); + assert_eq!( + result.projected_output_ordering().into_iter().collect_vec(), + vec![ + LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("e", 1)), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::new(Column::new("a", 2)), + options: SortOptions::default(), + }, + ]), + LexOrdering::new(vec![PhysicalSortExpr { + expr: Arc::new(Column::new("d", 0)), + options: SortOptions::default(), + }]), + ] + ); + assert!(result.is_infinite()); + + Ok(()) +} + +#[test] +fn test_projection_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let child_projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("e", 4)), "new_e".to_string()), + (Arc::new(Column::new("a", 0)), "a".to_string()), + (Arc::new(Column::new("b", 1)), "new_b".to_string()), + ], + csv.clone(), + )?); + let top_projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("new_b", 3)), "new_b".to_string()), + ( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 0)), + Operator::Plus, + Arc::new(Column::new("new_e", 1)), + )), + "binary".to_string(), + ), + (Arc::new(Column::new("new_b", 3)), "newest_b".to_string()), + ], + child_projection.clone(), + )?); + + let initial = get_plan_string(&top_projection); + let expected_initial = [ + "ProjectionExec: expr=[new_b@3 as new_b, c@0 + new_e@1 as binary, new_b@3 as newest_b]", + " ProjectionExec: expr=[c@2 as c, e@4 as new_e, a@0 as a, b@1 as new_b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(top_projection, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[b@1 as new_b, c@2 + e@4 as binary, b@1 as newest_b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_output_req_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let sort_req: Arc = Arc::new(OutputRequirementExec::new( + csv.clone(), + Some(LexRequirement::new(vec![ + PhysicalSortRequirement { + expr: Arc::new(Column::new("b", 1)), + options: Some(SortOptions::default()), + }, + PhysicalSortRequirement { + expr: Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 2)), + Operator::Plus, + Arc::new(Column::new("a", 0)), + )), + options: Some(SortOptions::default()), + }, + ])), + Distribution::HashPartitioned(vec![ + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("b", 1)), + ]), + )); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("a", 0)), "new_a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + ], + sort_req.clone(), + )?); + + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " OutputRequirementExec", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected: [&str; 3] = [ + "OutputRequirementExec", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + + assert_eq!(get_plan_string(&after_optimize), expected); + let expected_reqs = LexRequirement::new(vec![ + PhysicalSortRequirement { + expr: Arc::new(Column::new("b", 2)), + options: Some(SortOptions::default()), + }, + PhysicalSortRequirement { + expr: Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 0)), + Operator::Plus, + Arc::new(Column::new("new_a", 1)), + )), + options: Some(SortOptions::default()), + }, + ]); + assert_eq!( + after_optimize + .as_any() + .downcast_ref::() + .unwrap() + .required_input_ordering()[0] + .clone() + .unwrap(), + expected_reqs + ); + let expected_distribution: Vec> = vec![ + Arc::new(Column::new("new_a", 1)), + Arc::new(Column::new("b", 2)), + ]; + if let Distribution::HashPartitioned(vec) = after_optimize + .as_any() + .downcast_ref::() + .unwrap() + .required_input_distribution()[0] + .clone() + { + assert!(vec + .iter() + .zip(expected_distribution) + .all(|(actual, expected)| actual.eq(&expected))); + } else { + panic!("Expected HashPartitioned distribution!"); + }; + + Ok(()) +} + +#[test] +fn test_coalesce_partitions_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let coalesce_partitions: Arc = + Arc::new(CoalescePartitionsExec::new(csv)); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("b", 1)), "b".to_string()), + (Arc::new(Column::new("a", 0)), "a_new".to_string()), + (Arc::new(Column::new("d", 3)), "d".to_string()), + ], + coalesce_partitions, + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]", + " CoalescePartitionsExec", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "CoalescePartitionsExec", + " ProjectionExec: expr=[b@1 as b, a@0 as a_new, d@3 as d]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_filter_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let predicate = Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 1)), + Operator::Minus, + Arc::new(Column::new("a", 0)), + )), + Operator::Gt, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("d", 3)), + Operator::Minus, + Arc::new(Column::new("a", 0)), + )), + )); + let filter: Arc = Arc::new(FilterExec::try_new(predicate, csv)?); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("a", 0)), "a_new".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + (Arc::new(Column::new("d", 3)), "d".to_string()), + ], + filter.clone(), + )?); + + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]", + " FilterExec: b@1 - a@0 > d@3 - a@0", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "FilterExec: b@1 - a_new@0 > d@2 - a_new@0", + " ProjectionExec: expr=[a@0 as a_new, b@1 as b, d@3 as d]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_join_after_projection() -> Result<()> { + let left_csv = create_simple_csv_exec(); + let right_csv = create_simple_csv_exec(); + + let join: Arc = Arc::new(SymmetricHashJoinExec::try_new( + left_csv, + right_csv, + vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], + // b_left-(1+a_right)<=a_right+c_left + Some(JoinFilter::new( + Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b_left_inter", 0)), + Operator::Minus, + Arc::new(BinaryExpr::new( + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), + Operator::Plus, + Arc::new(Column::new("a_right_inter", 1)), + )), + )), + Operator::LtEq, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a_right_inter", 1)), + Operator::Plus, + Arc::new(Column::new("c_left_inter", 2)), + )), + )), + vec![ + ColumnIndex { + index: 1, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ColumnIndex { + index: 2, + side: JoinSide::Left, + }, + ], + Arc::new(Schema::new(vec![ + Field::new("b_left_inter", DataType::Int32, true), + Field::new("a_right_inter", DataType::Int32, true), + Field::new("c_left_inter", DataType::Int32, true), + ])), + )), + &JoinType::Inner, + true, + None, + None, + StreamJoinPartitionMode::SinglePartition, + )?); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c_from_left".to_string()), + (Arc::new(Column::new("b", 1)), "b_from_left".to_string()), + (Arc::new(Column::new("a", 0)), "a_from_left".to_string()), + (Arc::new(Column::new("a", 5)), "a_from_right".to_string()), + (Arc::new(Column::new("c", 7)), "c_from_right".to_string()), + ], + join, + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, a@5 as a_from_right, c@7 as c_from_right]", + " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b_from_left@1, c_from_right@1)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", + " ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " ProjectionExec: expr=[a@0 as a_from_right, c@2 as c_from_right]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + let expected_filter_col_ind = vec![ + ColumnIndex { + index: 1, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ColumnIndex { + index: 0, + side: JoinSide::Left, + }, + ]; + + assert_eq!( + expected_filter_col_ind, + after_optimize + .as_any() + .downcast_ref::() + .unwrap() + .filter() + .unwrap() + .column_indices() + ); + + Ok(()) +} + +#[test] +fn test_join_after_required_projection() -> Result<()> { + let left_csv = create_simple_csv_exec(); + let right_csv = create_simple_csv_exec(); + + let join: Arc = Arc::new(SymmetricHashJoinExec::try_new( + left_csv, + right_csv, + vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], + // b_left-(1+a_right)<=a_right+c_left + Some(JoinFilter::new( + Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b_left_inter", 0)), + Operator::Minus, + Arc::new(BinaryExpr::new( + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), + Operator::Plus, + Arc::new(Column::new("a_right_inter", 1)), + )), + )), + Operator::LtEq, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a_right_inter", 1)), + Operator::Plus, + Arc::new(Column::new("c_left_inter", 2)), + )), + )), + vec![ + ColumnIndex { + index: 1, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ColumnIndex { + index: 2, + side: JoinSide::Left, + }, + ], + Arc::new(Schema::new(vec![ + Field::new("b_left_inter", DataType::Int32, true), + Field::new("a_right_inter", DataType::Int32, true), + Field::new("c_left_inter", DataType::Int32, true), + ])), + )), + &JoinType::Inner, + true, + None, + None, + StreamJoinPartitionMode::SinglePartition, + )?); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("a", 5)), "a".to_string()), + (Arc::new(Column::new("b", 6)), "b".to_string()), + (Arc::new(Column::new("c", 7)), "c".to_string()), + (Arc::new(Column::new("d", 8)), "d".to_string()), + (Arc::new(Column::new("e", 9)), "e".to_string()), + (Arc::new(Column::new("a", 0)), "a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("d", 3)), "d".to_string()), + (Arc::new(Column::new("e", 4)), "e".to_string()), + ], + join, + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]", + " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "ProjectionExec: expr=[a@5 as a, b@6 as b, c@7 as c, d@8 as d, e@9 as e, a@0 as a, b@1 as b, c@2 as c, d@3 as d, e@4 as e]", + " SymmetricHashJoinExec: mode=SinglePartition, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + Ok(()) +} + +#[test] +fn test_nested_loop_join_after_projection() -> Result<()> { + let left_csv = create_simple_csv_exec(); + let right_csv = create_simple_csv_exec(); + + let col_left_a = col("a", &left_csv.schema())?; + let col_right_b = col("b", &right_csv.schema())?; + let col_left_c = col("c", &left_csv.schema())?; + // left_a < right_b + let filter_expr = binary(col_left_a, Operator::Lt, col_right_b, &Schema::empty())?; + let filter_column_indices = vec![ + ColumnIndex { + index: 0, + side: JoinSide::Left, + }, + ColumnIndex { + index: 1, + side: JoinSide::Right, + }, + ColumnIndex { + index: 2, + side: JoinSide::Right, + }, + ]; + let filter_schema = Schema::new(vec![ + Field::new("a", DataType::Int32, true), + Field::new("b", DataType::Int32, true), + Field::new("c", DataType::Int32, true), + ]); + + let join: Arc = Arc::new(NestedLoopJoinExec::try_new( + left_csv, + right_csv, + Some(JoinFilter::new( + filter_expr, + filter_column_indices, + Arc::new(filter_schema), + )), + &JoinType::Inner, + None, + )?); + + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![(col_left_c, "c".to_string())], + Arc::clone(&join), + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c]", + " NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + let expected = [ + "NestedLoopJoinExec: join_type=Inner, filter=a@0 < b@1, projection=[c@2]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + Ok(()) +} + +#[test] +fn test_hash_join_after_projection() -> Result<()> { + // sql like + // SELECT t1.c as c_from_left, t1.b as b_from_left, t1.a as a_from_left, t2.c as c_from_right FROM t1 JOIN t2 ON t1.b = t2.c WHERE t1.b - (1 + t2.a) <= t2.a + t1.c + let left_csv = create_simple_csv_exec(); + let right_csv = create_simple_csv_exec(); + + let join: Arc = Arc::new(HashJoinExec::try_new( + left_csv, + right_csv, + vec![(Arc::new(Column::new("b", 1)), Arc::new(Column::new("c", 2)))], + // b_left-(1+a_right)<=a_right+c_left + Some(JoinFilter::new( + Arc::new(BinaryExpr::new( + Arc::new(BinaryExpr::new( + Arc::new(Column::new("b_left_inter", 0)), + Operator::Minus, + Arc::new(BinaryExpr::new( + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), + Operator::Plus, + Arc::new(Column::new("a_right_inter", 1)), + )), + )), + Operator::LtEq, + Arc::new(BinaryExpr::new( + Arc::new(Column::new("a_right_inter", 1)), + Operator::Plus, + Arc::new(Column::new("c_left_inter", 2)), + )), + )), + vec![ + ColumnIndex { + index: 1, + side: JoinSide::Left, + }, + ColumnIndex { + index: 0, + side: JoinSide::Right, + }, + ColumnIndex { + index: 2, + side: JoinSide::Left, + }, + ], + Arc::new(Schema::new(vec![ + Field::new("b_left_inter", DataType::Int32, true), + Field::new("a_right_inter", DataType::Int32, true), + Field::new("c_left_inter", DataType::Int32, true), + ])), + )), + &JoinType::Inner, + None, + PartitionMode::Auto, + true, + )?); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c_from_left".to_string()), + (Arc::new(Column::new("b", 1)), "b_from_left".to_string()), + (Arc::new(Column::new("a", 0)), "a_from_left".to_string()), + (Arc::new(Column::new("c", 7)), "c_from_right".to_string()), + ], + join.clone(), + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@7 as c_from_right]", " HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + // HashJoinExec only returns result after projection. Because there are some alias columns in the projection, the ProjectionExec is not removed. + let expected = ["ProjectionExec: expr=[c@2 as c_from_left, b@1 as b_from_left, a@0 as a_from_left, c@3 as c_from_right]", " HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("a", 0)), "a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("c", 7)), "c".to_string()), + ], + join.clone(), + )?); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + // Comparing to the previous result, this projection don't have alias columns either change the order of output fields. So the ProjectionExec is removed. + let expected = ["HashJoinExec: mode=Auto, join_type=Inner, on=[(b@1, c@2)], filter=b_left_inter@0 - 1 + a_right_inter@1 <= a_right_inter@1 + c_left_inter@2, projection=[a@0, b@1, c@2, c@7]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false"]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_repartition_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let repartition: Arc = Arc::new(RepartitionExec::try_new( + csv, + Partitioning::Hash( + vec![ + Arc::new(Column::new("a", 0)), + Arc::new(Column::new("b", 1)), + Arc::new(Column::new("d", 3)), + ], + 6, + ), + )?); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("b", 1)), "b_new".to_string()), + (Arc::new(Column::new("a", 0)), "a".to_string()), + (Arc::new(Column::new("d", 3)), "d_new".to_string()), + ], + repartition, + )?); + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]", + " RepartitionExec: partitioning=Hash([a@0, b@1, d@3], 6), input_partitions=1", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "RepartitionExec: partitioning=Hash([a@1, b_new@0, d_new@2], 6), input_partitions=1", + " ProjectionExec: expr=[b@1 as b_new, a@0 as a, d@3 as d_new]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + assert_eq!( + after_optimize + .as_any() + .downcast_ref::() + .unwrap() + .partitioning() + .clone(), + Partitioning::Hash( + vec![ + Arc::new(Column::new("a", 1)), + Arc::new(Column::new("b_new", 0)), + Arc::new(Column::new("d_new", 2)), + ], + 6, + ), + ); + + Ok(()) +} + +#[test] +fn test_sort_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let sort_req: Arc = Arc::new(SortExec::new( + LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 2)), + Operator::Plus, + Arc::new(Column::new("a", 0)), + )), + options: SortOptions::default(), + }, + ]), + csv.clone(), + )); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("a", 0)), "new_a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + ], + sort_req.clone(), + )?); + + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " SortExec: expr=[b@1 ASC, c@2 + a@0 ASC], preserve_partitioning=[false]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "SortExec: expr=[b@2 ASC, c@0 + new_a@1 ASC], preserve_partitioning=[false]", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_sort_preserving_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let sort_req: Arc = Arc::new(SortPreservingMergeExec::new( + LexOrdering::new(vec![ + PhysicalSortExpr { + expr: Arc::new(Column::new("b", 1)), + options: SortOptions::default(), + }, + PhysicalSortExpr { + expr: Arc::new(BinaryExpr::new( + Arc::new(Column::new("c", 2)), + Operator::Plus, + Arc::new(Column::new("a", 0)), + )), + options: SortOptions::default(), + }, + ]), + csv.clone(), + )); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("a", 0)), "new_a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + ], + sort_req.clone(), + )?); + + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " SortPreservingMergeExec: [b@1 ASC, c@2 + a@0 ASC]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "SortPreservingMergeExec: [b@2 ASC, c@0 + new_a@1 ASC]", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} + +#[test] +fn test_union_after_projection() -> Result<()> { + let csv = create_simple_csv_exec(); + let union: Arc = + Arc::new(UnionExec::new(vec![csv.clone(), csv.clone(), csv])); + let projection: Arc = Arc::new(ProjectionExec::try_new( + vec![ + (Arc::new(Column::new("c", 2)), "c".to_string()), + (Arc::new(Column::new("a", 0)), "new_a".to_string()), + (Arc::new(Column::new("b", 1)), "b".to_string()), + ], + union.clone(), + )?); + + let initial = get_plan_string(&projection); + let expected_initial = [ + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " UnionExec", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(initial, expected_initial); + + let after_optimize = + ProjectionPushdown::new().optimize(projection, &ConfigOptions::new())?; + + let expected = [ + "UnionExec", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", + " ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" + ]; + assert_eq!(get_plan_string(&after_optimize), expected); + + Ok(()) +} diff --git a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs index 912683083738..52cd5e5754fa 100644 --- a/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs +++ b/datafusion/core/tests/physical_optimizer/replace_with_order_preserving_variants.rs @@ -17,6 +17,10 @@ use std::sync::Arc; +use crate::physical_optimizer::test_utils::{ + check_integrity, stream_exec_ordered_with_projection, +}; + use datafusion::prelude::SessionContext; use arrow::array::{ArrayRef, Int32Array}; use arrow::compute::SortOptions; @@ -40,7 +44,6 @@ use datafusion_common::Result; use datafusion_expr::{JoinType, Operator}; use datafusion_physical_expr::expressions::{self, col, Column}; use datafusion_physical_expr::PhysicalSortExpr; -use datafusion_physical_optimizer::test_utils::{check_integrity, stream_exec_ordered_with_projection}; use datafusion_physical_optimizer::enforce_sorting::replace_with_order_preserving_variants::{replace_with_order_preserving_variants, OrderPreservationContext}; use datafusion_common::config::ConfigOptions; diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs index 7f723ae67e8e..3057ca819e82 100644 --- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs +++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs @@ -15,25 +15,27 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + +use crate::physical_optimizer::test_utils::{ + bounded_window_exec, global_limit_exec, local_limit_exec, memory_exec, + repartition_exec, sort_exec, sort_expr_options, sort_merge_join_exec, +}; + use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions}; use datafusion::datasource::stream::{FileStreamProvider, StreamConfig, StreamTable}; use datafusion::prelude::{CsvReadOptions, SessionContext}; -use datafusion_common::{JoinType, Result}; -use std::sync::Arc; - -use async_trait::async_trait; use datafusion_common::config::ConfigOptions; +use datafusion_common::{JoinType, Result}; use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::Partitioning; use datafusion_physical_optimizer::sanity_checker::SanityCheckPlan; -use datafusion_physical_optimizer::test_utils::{ - bounded_window_exec, global_limit_exec, local_limit_exec, memory_exec, - repartition_exec, sort_exec, sort_expr_options, sort_merge_join_exec, -}; use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::repartition::RepartitionExec; use datafusion_physical_plan::{displayable, ExecutionPlan}; +use async_trait::async_trait; + async fn register_current_csv( ctx: &SessionContext, table_name: &str, diff --git a/datafusion/physical-optimizer/src/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs similarity index 95% rename from datafusion/physical-optimizer/src/test_utils.rs rename to datafusion/core/tests/physical_optimizer/test_utils.rs index 560a5e598733..920b8da575c7 100644 --- a/datafusion/physical-optimizer/src/test_utils.rs +++ b/datafusion/core/tests/physical_optimizer/test_utils.rs @@ -21,16 +21,16 @@ use std::any::Any; use std::fmt::Formatter; use std::sync::Arc; -use crate::limited_distinct_aggregation::LimitedDistinctAggregation; -use crate::PhysicalOptimizerRule; - use arrow::array::Int32Array; use arrow::record_batch::RecordBatch; use arrow_schema::{DataType, Field, Schema, SchemaRef, SortOptions}; +use datafusion::datasource::listing::PartitionedFile; +use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec}; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::utils::expr::COUNT_STAR_EXPANSION; use datafusion_common::{JoinType, Result}; +use datafusion_execution::object_store::ObjectStoreUrl; use datafusion_execution::{SendableRecordBatchStream, TaskContext}; use datafusion_expr::test::function_stub::{ avg_udaf as avg_stub, count_udaf as count_stub, @@ -42,6 +42,8 @@ use datafusion_physical_expr::expressions::col; use datafusion_physical_expr::{expressions, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::LexRequirement; use datafusion_physical_expr_common::sort_expr::{LexOrdering, PhysicalSortExpr}; +use datafusion_physical_optimizer::limited_distinct_aggregation::LimitedDistinctAggregation; +use datafusion_physical_optimizer::PhysicalOptimizerRule; use datafusion_physical_plan::aggregates::{ AggregateExec, AggregateMode, PhysicalGroupBy, }; @@ -65,6 +67,27 @@ use datafusion_physical_plan::{ }; use datafusion_physical_plan::{InputOrderMode, Partitioning}; +/// Create a non sorted parquet exec +pub fn parquet_exec(schema: &SchemaRef) -> Arc { + ParquetExec::builder( + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema.clone()) + .with_file(PartitionedFile::new("x".to_string(), 100)), + ) + .build_arc() +} + +/// Create a single parquet file that is sorted +pub(crate) fn parquet_exec_with_sort( + output_ordering: Vec, +) -> Arc { + ParquetExec::builder( + FileScanConfig::new(ObjectStoreUrl::parse("test:///").unwrap(), schema()) + .with_file(PartitionedFile::new("x".to_string(), 100)) + .with_output_ordering(output_ordering), + ) + .build_arc() +} + pub fn schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("a", DataType::Int64, true), @@ -492,7 +515,7 @@ pub fn trim_plan_display(plan: &str) -> Vec<&str> { // construct a stream partition for test purposes #[derive(Debug)] -pub(crate) struct TestStreamPartition { +pub struct TestStreamPartition { pub schema: SchemaRef, } diff --git a/datafusion/execution/src/memory_pool/mod.rs b/datafusion/execution/src/memory_pool/mod.rs index 45d467f133bf..797f2687aba6 100644 --- a/datafusion/execution/src/memory_pool/mod.rs +++ b/datafusion/execution/src/memory_pool/mod.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! [`MemoryPool`] for memory management during query execution, [`proxy]` for +//! [`MemoryPool`] for memory management during query execution, [`proxy`] for //! help with allocation accounting. use datafusion_common::{internal_err, Result}; @@ -108,6 +108,9 @@ pub use pool::*; /// /// * [`FairSpillPool`]: Limits memory usage to a fixed size, allocating memory /// to all spilling operators fairly +/// +/// * [`TrackConsumersPool`]: Wraps another [`MemoryPool`] and tracks consumers, +/// providing better error messages on the largest memory users. pub trait MemoryPool: Send + Sync + std::fmt::Debug { /// Registers a new [`MemoryConsumer`] /// @@ -140,9 +143,9 @@ pub trait MemoryPool: Send + Sync + std::fmt::Debug { /// [`MemoryReservation`] in a [`MemoryPool`]. All allocations are registered to /// a particular `MemoryConsumer`; /// -/// For help with allocation accounting, see the [proxy] module. +/// For help with allocation accounting, see the [`proxy`] module. /// -/// [proxy]: crate::memory_pool::proxy +/// [proxy]: datafusion_common::utils::proxy #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct MemoryConsumer { name: String, diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml index 1ccc6fc17293..109d8e0b89a6 100644 --- a/datafusion/expr-common/Cargo.toml +++ b/datafusion/expr-common/Cargo.toml @@ -40,6 +40,4 @@ path = "src/lib.rs" arrow = { workspace = true } datafusion-common = { workspace = true } itertools = { workspace = true } - -[dev-dependencies] paste = "^1.0" diff --git a/datafusion/expr-common/src/columnar_value.rs b/datafusion/expr-common/src/columnar_value.rs index 3b17e606544d..cb7cbdbac291 100644 --- a/datafusion/expr-common/src/columnar_value.rs +++ b/datafusion/expr-common/src/columnar_value.rs @@ -20,8 +20,10 @@ use arrow::array::{Array, ArrayRef, NullArray}; use arrow::compute::{kernels, CastOptions}; use arrow::datatypes::DataType; +use arrow::util::pretty::pretty_format_columns; use datafusion_common::format::DEFAULT_CAST_OPTIONS; use datafusion_common::{internal_err, Result, ScalarValue}; +use std::fmt; use std::sync::Arc; /// The result of evaluating an expression. @@ -218,9 +220,34 @@ impl ColumnarValue { } } +// Implement Display trait for ColumnarValue +impl fmt::Display for ColumnarValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let formatted = match self { + ColumnarValue::Array(array) => { + pretty_format_columns("ColumnarValue(ArrayRef)", &[Arc::clone(array)]) + } + ColumnarValue::Scalar(_) => { + if let Ok(array) = self.to_array(1) { + pretty_format_columns("ColumnarValue(ScalarValue)", &[array]) + } else { + return write!(f, "Error formatting columnar value"); + } + } + }; + + if let Ok(formatted) = formatted { + write!(f, "{}", formatted) + } else { + write!(f, "Error formatting columnar value") + } + } +} + #[cfg(test)] mod tests { use super::*; + use arrow::array::Int32Array; #[test] fn values_to_arrays() { @@ -329,6 +356,39 @@ mod tests { /// Makes an array of length `len` with all elements set to `val` fn make_array(val: i32, len: usize) -> ArrayRef { - Arc::new(arrow::array::Int32Array::from(vec![val; len])) + Arc::new(Int32Array::from(vec![val; len])) + } + + #[test] + fn test_display_scalar() { + let column = ColumnarValue::from(ScalarValue::from("foo")); + assert_eq!( + column.to_string(), + concat!( + "+----------------------------+\n", + "| ColumnarValue(ScalarValue) |\n", + "+----------------------------+\n", + "| foo |\n", + "+----------------------------+" + ) + ); + } + + #[test] + fn test_display_array() { + let array: ArrayRef = Arc::new(Int32Array::from_iter_values(vec![1, 2, 3])); + let column = ColumnarValue::from(array); + assert_eq!( + column.to_string(), + concat!( + "+-------------------------+\n", + "| ColumnarValue(ArrayRef) |\n", + "+-------------------------+\n", + "| 1 |\n", + "| 2 |\n", + "| 3 |\n", + "+-------------------------+" + ) + ); } } diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs index ffaa32f08075..993051eaeee1 100644 --- a/datafusion/expr-common/src/interval_arithmetic.rs +++ b/datafusion/expr-common/src/interval_arithmetic.rs @@ -76,11 +76,739 @@ macro_rules! get_extreme_value { DataType::Interval(IntervalUnit::MonthDayNano) => { ScalarValue::IntervalMonthDayNano(Some(IntervalMonthDayNano::$extreme)) } + DataType::Decimal128(precision, scale) => ScalarValue::Decimal128( + Some( + paste::paste! {[<$extreme _DECIMAL128_FOR_EACH_PRECISION>]} + [*precision as usize], + ), + *precision, + *scale, + ), + DataType::Decimal256(precision, scale) => ScalarValue::Decimal256( + Some( + paste::paste! {[<$extreme _DECIMAL256_FOR_EACH_PRECISION>]} + [*precision as usize], + ), + *precision, + *scale, + ), _ => unreachable!(), } }; } +/// The maximum `i128` value that can be stored in a `Decimal128` value of precision `p`. +/// +/// Remove this once is available +const MAX_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [ + 0, // unused first element + 9, + 99, + 999, + 9999, + 99999, + 999999, + 9999999, + 99999999, + 999999999, + 9999999999, + 99999999999, + 999999999999, + 9999999999999, + 99999999999999, + 999999999999999, + 9999999999999999, + 99999999999999999, + 999999999999999999, + 9999999999999999999, + 99999999999999999999, + 999999999999999999999, + 9999999999999999999999, + 99999999999999999999999, + 999999999999999999999999, + 9999999999999999999999999, + 99999999999999999999999999, + 999999999999999999999999999, + 9999999999999999999999999999, + 99999999999999999999999999999, + 999999999999999999999999999999, + 9999999999999999999999999999999, + 99999999999999999999999999999999, + 999999999999999999999999999999999, + 9999999999999999999999999999999999, + 99999999999999999999999999999999999, + 999999999999999999999999999999999999, + 9999999999999999999999999999999999999, + 99999999999999999999999999999999999999, +]; + +/// The minimum `i128` value that can be stored in a `Decimal128` value of precision `p`. +/// +/// Remove this once is available +const MIN_DECIMAL128_FOR_EACH_PRECISION: [i128; 39] = [ + 0, // unused first element + -9, + -99, + -999, + -9999, + -99999, + -999999, + -9999999, + -99999999, + -999999999, + -9999999999, + -99999999999, + -999999999999, + -9999999999999, + -99999999999999, + -999999999999999, + -9999999999999999, + -99999999999999999, + -999999999999999999, + -9999999999999999999, + -99999999999999999999, + -999999999999999999999, + -9999999999999999999999, + -99999999999999999999999, + -999999999999999999999999, + -9999999999999999999999999, + -99999999999999999999999999, + -999999999999999999999999999, + -9999999999999999999999999999, + -99999999999999999999999999999, + -999999999999999999999999999999, + -9999999999999999999999999999999, + -99999999999999999999999999999999, + -999999999999999999999999999999999, + -9999999999999999999999999999999999, + -99999999999999999999999999999999999, + -999999999999999999999999999999999999, + -9999999999999999999999999999999999999, + -99999999999999999999999999999999999999, +]; + +/// The maximum `i256` value that can be stored in a `Decimal256` value of precision `p`. +/// +/// Remove this once is available +const MAX_DECIMAL256_FOR_EACH_PRECISION: [arrow::datatypes::i256; 77] = [ + arrow::datatypes::i256::from_i128(0_i128), // unused first element + arrow::datatypes::i256::from_le_bytes([ + 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, + 37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, + 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, + 123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, + 215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, + 192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, + 179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, + 141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, + 50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, + 21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, + 219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, + 20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, + 214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, + 100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, + 241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, + 106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, + 37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, + 118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, + 160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, + 69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, + 18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, + 244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, + 143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, + 151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, + 139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, + 88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, + 119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, + 170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, + 170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, + 250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, + 135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, + 254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, + 197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, + 187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, + 242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2, + ]), + arrow::datatypes::i256::from_le_bytes([ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, + 121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22, + ]), +]; + +/// The minimum `i256` value that can be stored in a `Decimal256` value of precision `p`. +/// +/// Remove this once is available +const MIN_DECIMAL256_FOR_EACH_PRECISION: [arrow::datatypes::i256; 77] = [ + arrow::datatypes::i256::from_i128(0_i128), // unused first element + arrow::datatypes::i256::from_le_bytes([ + 247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, + 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, + 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, + 80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, + 249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, + 187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, + 83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, + 184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, + 185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, + 59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, + 152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, + 135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, + 74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, + 230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, + 163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, + 101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, + 131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, + 199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, + 205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, + 11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, + 111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, + 90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, + 137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, + 98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, + 193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, + 152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, + 242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, + 67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, + 162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, + 90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, + 136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253, + ]), + arrow::datatypes::i256::from_le_bytes([ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, + 84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233, + ]), +]; + macro_rules! value_transition { ($bound:ident, $direction:expr, $value:expr) => { match $value { @@ -1008,17 +1736,20 @@ fn handle_overflow( lhs: &ScalarValue, rhs: &ScalarValue, ) -> ScalarValue { - let zero = ScalarValue::new_zero(dt).unwrap(); + let lhs_zero = ScalarValue::new_zero(&lhs.data_type()).unwrap(); + let rhs_zero = ScalarValue::new_zero(&rhs.data_type()).unwrap(); let positive_sign = match op { Operator::Multiply | Operator::Divide => { - lhs.lt(&zero) && rhs.lt(&zero) || lhs.gt(&zero) && rhs.gt(&zero) + lhs.lt(&lhs_zero) && rhs.lt(&rhs_zero) + || lhs.gt(&lhs_zero) && rhs.gt(&rhs_zero) } - Operator::Plus => lhs.ge(&zero), + Operator::Plus => lhs.ge(&lhs_zero), Operator::Minus => lhs.ge(rhs), _ => { unreachable!() } }; + match (UPPER, positive_sign) { (true, true) | (false, false) => ScalarValue::try_from(dt).unwrap(), (true, false) => { @@ -1832,7 +2563,12 @@ impl NullableInterval { #[cfg(test)] mod tests { - use crate::interval_arithmetic::{next_value, prev_value, satisfy_greater, Interval}; + use crate::{ + interval_arithmetic::{ + handle_overflow, next_value, prev_value, satisfy_greater, Interval, + }, + operator::Operator, + }; use arrow::datatypes::DataType; use datafusion_common::{Result, ScalarValue}; @@ -3108,6 +3844,73 @@ mod tests { Ok(()) } + #[test] + fn test_overflow_handling() -> Result<()> { + // Test integer overflow handling: + let dt = DataType::Int32; + let op = Operator::Plus; + let lhs = ScalarValue::Int32(Some(i32::MAX)); + let rhs = ScalarValue::Int32(Some(1)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Int32(None)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Int32(Some(i32::MAX))); + + // Test float overflow handling: + let dt = DataType::Float32; + let op = Operator::Multiply; + let lhs = ScalarValue::Float32(Some(f32::MAX)); + let rhs = ScalarValue::Float32(Some(2.0)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Float32(None)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Float32(Some(f32::MAX))); + + // Test float underflow handling: + let lhs = ScalarValue::Float32(Some(f32::MIN)); + let rhs = ScalarValue::Float32(Some(2.0)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Float32(Some(f32::MIN))); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Float32(None)); + + // Test integer underflow handling: + let dt = DataType::Int64; + let op = Operator::Minus; + let lhs = ScalarValue::Int64(Some(i64::MIN)); + let rhs = ScalarValue::Int64(Some(1)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Int64(Some(i64::MIN))); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Int64(None)); + + // Test unsigned integer handling: + let dt = DataType::UInt32; + let op = Operator::Minus; + let lhs = ScalarValue::UInt32(Some(0)); + let rhs = ScalarValue::UInt32(Some(1)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::UInt32(Some(0))); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::UInt32(None)); + + // Test decimal handling: + let dt = DataType::Decimal128(38, 35); + let op = Operator::Plus; + let lhs = + ScalarValue::Decimal128(Some(54321543215432154321543215432154321), 35, 35); + let rhs = ScalarValue::Decimal128(Some(10000), 20, 0); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!(result, ScalarValue::Decimal128(None, 38, 35)); + let result = handle_overflow::(&dt, op, &lhs, &rhs); + assert_eq!( + result, + ScalarValue::Decimal128(Some(99999999999999999999999999999999999999), 38, 35) + ); + + Ok(()) + } + #[test] fn test_cardinality_of_intervals() -> Result<()> { // In IEEE 754 standard for floating-point arithmetic, if we keep the sign and exponent fields same, diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 81e5233a1516..333f0d9cdd79 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -66,3 +66,7 @@ harness = false [[bench]] name = "sum" harness = false + +[[bench]] +name = "array_agg" +harness = false diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs new file mode 100644 index 000000000000..c4599cdfc9b3 --- /dev/null +++ b/datafusion/functions-aggregate/benches/array_agg.rs @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray}; +use arrow::datatypes::Int64Type; +use arrow::util::bench_util::create_primitive_array; +use arrow_schema::Field; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::Accumulator; +use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator; + +use arrow::util::test_util::seedable_rng; +use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; +use rand::distributions::{Distribution, Standard}; +use rand::Rng; + +fn merge_batch_bench(c: &mut Criterion, name: &str, values: ArrayRef) { + let list_item_data_type = values.as_list::().values().data_type().clone(); + c.bench_function(name, |b| { + b.iter(|| { + #[allow(clippy::unit_arg)] + black_box( + ArrayAggAccumulator::try_new(&list_item_data_type) + .unwrap() + .merge_batch(&[values.clone()]) + .unwrap(), + ) + }) + }); +} + +/// Create List array with the given item data type, null density, null locations and zero length lists density +/// Creates an random (but fixed-seeded) array of a given size and null density +pub fn create_list_array( + size: usize, + null_density: f32, + zero_length_lists_probability: f32, +) -> ListArray +where + T: ArrowPrimitiveType, + Standard: Distribution, +{ + let mut nulls_builder = NullBufferBuilder::new(size); + let mut rng = seedable_rng(); + + let offsets = OffsetBuffer::from_lengths((0..size).map(|_| { + let is_null = rng.gen::() < null_density; + + let mut length = rng.gen_range(1..10); + + if is_null { + nulls_builder.append_null(); + + if rng.gen::() <= zero_length_lists_probability { + length = 0; + } + } else { + nulls_builder.append_non_null(); + } + + length + })); + + let length = *offsets.last().unwrap() as usize; + + let values = create_primitive_array::(length, 0.0); + + let field = Field::new_list_field(T::DATA_TYPE, true); + + ListArray::new( + Arc::new(field), + offsets, + Arc::new(values), + nulls_builder.finish(), + ) +} + +fn array_agg_benchmark(c: &mut Criterion) { + let values = Arc::new(create_list_array::(8192, 0.0, 1.0)) as ArrayRef; + merge_batch_bench(c, "array_agg i64 merge_batch no nulls", values); + + let values = Arc::new(create_list_array::(8192, 1.0, 1.0)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch all nulls, 100% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 1.0, 0.9)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch all nulls, 90% of nulls point to a zero length array", + values, + ); + + // All nulls point to a 0 length array + + let values = Arc::new(create_list_array::(8192, 0.3, 1.0)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 30% nulls, 100% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.7, 1.0)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 70% nulls, 100% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.3, 0.99)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 30% nulls, 99% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.7, 0.99)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 70% nulls, 99% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.3, 0.9)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 30% nulls, 90% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.7, 0.9)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 70% nulls, 90% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.3, 0.50)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 30% nulls, 50% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.7, 0.50)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 70% nulls, 50% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.3, 0.0)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 30% nulls, 0% of nulls point to a zero length array", + values, + ); + + let values = Arc::new(create_list_array::(8192, 0.7, 0.0)) as ArrayRef; + merge_batch_bench( + c, + "array_agg i64 merge_batch 70% nulls, 0% of nulls point to a zero length array", + values, + ); +} + +criterion_group!(benches, array_agg_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index da0aa5f12fde..2bc4721f3cfa 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -20,14 +20,15 @@ use crate::utils::make_scalar_function; use arrow::array::{Capacities, MutableArrayData}; use arrow::compute; +use arrow::compute::cast; use arrow_array::{ - new_null_array, Array, ArrayRef, GenericListArray, Int64Array, ListArray, - OffsetSizeTrait, + new_null_array, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, + UInt64Array, }; use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{LargeList, List}; use arrow_schema::{DataType, Field}; -use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; +use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array}; use datafusion_common::{exec_err, Result}; use datafusion_expr::{ ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, @@ -86,7 +87,7 @@ impl Default for ArrayRepeat { impl ArrayRepeat { pub fn new() -> Self { Self { - signature: Signature::variadic_any(Volatility::Immutable), + signature: Signature::user_defined(Volatility::Immutable), aliases: vec![String::from("list_repeat")], } } @@ -124,6 +125,30 @@ impl ScalarUDFImpl for ArrayRepeat { &self.aliases } + fn coerce_types(&self, arg_types: &[DataType]) -> Result> { + if arg_types.len() != 2 { + return exec_err!("array_repeat expects two arguments"); + } + + let element_type = &arg_types[0]; + let first = element_type.clone(); + + let count_type = &arg_types[1]; + + // Coerce the second argument to Int64/UInt64 if it's a numeric type + let second = match count_type { + DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => { + DataType::Int64 + } + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => { + DataType::UInt64 + } + _ => return exec_err!("count must be an integer type"), + }; + + Ok(vec![first, second]) + } + fn documentation(&self) -> Option<&Documentation> { self.doc() } @@ -131,12 +156,16 @@ impl ScalarUDFImpl for ArrayRepeat { /// Array_repeat SQL function pub fn array_repeat_inner(args: &[ArrayRef]) -> Result { - if args.len() != 2 { - return exec_err!("array_repeat expects two arguments"); - } - let element = &args[0]; - let count_array = as_int64_array(&args[1])?; + let count_array = &args[1]; + + let count_array = match count_array.data_type() { + DataType::Int64 => &cast(count_array, &DataType::UInt64)?, + DataType::UInt64 => count_array, + _ => return exec_err!("count must be an integer type"), + }; + + let count_array = as_uint64_array(count_array)?; match element.data_type() { List(_) => { @@ -165,7 +194,7 @@ pub fn array_repeat_inner(args: &[ArrayRef]) -> Result { /// ``` fn general_repeat( array: &ArrayRef, - count_array: &Int64Array, + count_array: &UInt64Array, ) -> Result { let data_type = array.data_type(); let mut new_values = vec![]; @@ -219,7 +248,7 @@ fn general_repeat( /// ``` fn general_list_repeat( list_array: &GenericListArray, - count_array: &Int64Array, + count_array: &UInt64Array, ) -> Result { let data_type = list_array.data_type(); let value_type = list_array.value_type(); diff --git a/datafusion/functions/src/math/monotonicity.rs b/datafusion/functions/src/math/monotonicity.rs index 46c670b8e651..7c87d025e929 100644 --- a/datafusion/functions/src/math/monotonicity.rs +++ b/datafusion/functions/src/math/monotonicity.rs @@ -558,3 +558,405 @@ pub fn get_tanh_doc() -> &'static Documentation { .build() }) } + +#[cfg(test)] +mod tests { + use arrow::compute::SortOptions; + use datafusion_common::Result; + + use super::*; + + #[derive(Debug)] + struct MonotonicityTestCase { + name: &'static str, + func: fn(&[ExprProperties]) -> Result, + lower: f64, + upper: f64, + input_sort: SortProperties, + expected: Result, + } + + #[test] + fn test_monotonicity_table() { + fn create_ep(lower: f64, upper: f64, sp: SortProperties) -> ExprProperties { + ExprProperties { + range: Interval::try_new( + ScalarValue::from(lower), + ScalarValue::from(upper), + ) + .unwrap(), + sort_properties: sp, + preserves_lex_ordering: false, + } + } + + let test_cases = vec![ + MonotonicityTestCase { + name: "acos_order within domain", + func: acos_order, + lower: -0.5, + upper: 0.5, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "acos_order out of domain", + func: acos_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: exec_err!("Input range of ACOS contains out-of-domain values"), + }, + MonotonicityTestCase { + name: "acosh_order within domain", + func: acosh_order, + lower: 2.0, + upper: 100.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: true, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: true, + })), + }, + MonotonicityTestCase { + name: "acosh_order out of domain", + func: acosh_order, + lower: 0.5, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: false, + }), + expected: exec_err!("Input range of ACOSH contains out-of-domain values"), + }, + MonotonicityTestCase { + name: "asin_order within domain", + func: asin_order, + lower: -0.5, + upper: 0.5, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "asin_order out of domain", + func: asin_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: exec_err!("Input range of ASIN contains out-of-domain values"), + }, + MonotonicityTestCase { + name: "asinh_order within domain", + func: asinh_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "asinh_order out of domain", + func: asinh_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "atan_order within domain", + func: atan_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "atan_order out of domain", + func: atan_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "atanh_order within domain", + func: atanh_order, + lower: -0.6, + upper: 0.6, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "atanh_order out of domain", + func: atanh_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: exec_err!("Input range of ATANH contains out-of-domain values"), + }, + MonotonicityTestCase { + name: "cbrt_order within domain", + func: cbrt_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "cbrt_order out of domain", + func: cbrt_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "ceil_order within domain", + func: ceil_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "ceil_order out of domain", + func: ceil_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "cos_order within domain", + func: cos_order, + lower: 0.0, + upper: 2.0 * std::f64::consts::PI, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Unordered), + }, + MonotonicityTestCase { + name: "cos_order out of domain", + func: cos_order, + lower: -2.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Unordered), + }, + MonotonicityTestCase { + name: "cosh_order within domain positive", + func: cosh_order, + lower: 5.0, + upper: 100.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "cosh_order within domain negative", + func: cosh_order, + lower: -100.0, + upper: -5.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "cosh_order out of domain so unordered", + func: cosh_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Unordered), + }, + MonotonicityTestCase { + name: "degrees_order", + func: degrees_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: true, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: true, + })), + }, + MonotonicityTestCase { + name: "exp_order", + func: exp_order, + lower: -1000.0, + upper: 1000.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "floor_order", + func: floor_order, + lower: -1.0, + upper: 1.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: true, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: true, + nulls_first: true, + })), + }, + MonotonicityTestCase { + name: "ln_order within domain", + func: ln_order, + lower: 1.0, + upper: 2.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: Ok(SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + })), + }, + MonotonicityTestCase { + name: "ln_order out of domain", + func: ln_order, + lower: -5.0, + upper: -4.0, + input_sort: SortProperties::Ordered(SortOptions { + descending: false, + nulls_first: false, + }), + expected: exec_err!("Input range of LN contains out-of-domain values"), + }, + ]; + + for tcase in test_cases { + let input = vec![create_ep(tcase.lower, tcase.upper, tcase.input_sort)]; + let actual = (tcase.func)(&input); + match (&actual, &tcase.expected) { + (Ok(a), Ok(e)) => assert_eq!( + a, e, + "Test '{}' failed: got {:?}, expected {:?}", + tcase.name, a, e + ), + (Err(e1), Err(e2)) => { + assert_eq!( + e1.strip_backtrace().to_string(), + e2.strip_backtrace().to_string(), + "Test '{}' failed: got {:?}, expected {:?}", + tcase.name, + e1, + e2 + ) + } // Both are errors, so it's fine + _ => panic!( + "Test '{}' failed: got {:?}, expected {:?}", + tcase.name, actual, tcase.expected + ), + } + } + } +} diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs index 263770b81fcd..614284e1b477 100644 --- a/datafusion/optimizer/src/lib.rs +++ b/datafusion/optimizer/src/lib.rs @@ -62,7 +62,9 @@ pub mod utils; pub mod test; pub use analyzer::{Analyzer, AnalyzerRule}; -pub use optimizer::{Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule}; +pub use optimizer::{ + ApplyOrder, Optimizer, OptimizerConfig, OptimizerContext, OptimizerRule, +}; #[allow(deprecated)] pub use utils::optimize_children; diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs index 7eda5fb4beaa..8a093e0ae92e 100644 --- a/datafusion/physical-expr/src/expressions/cast.rs +++ b/datafusion/physical-expr/src/expressions/cast.rs @@ -242,6 +242,7 @@ mod tests { }, datatypes::*, }; + use datafusion_common::assert_contains; // runs an end-to-end test of physical type cast // 1. construct a record batch with a column "a" of type A @@ -399,6 +400,45 @@ mod tests { Ok(()) } + #[test] + fn test_cast_decimal_to_decimal_overflow() -> Result<()> { + let array = vec![Some(123456789)]; + + let decimal_array = array + .clone() + .into_iter() + .collect::() + .with_precision_and_scale(10, 3)?; + + let schema = Schema::new(vec![Field::new("a", Decimal128(10, 3), false)]); + let batch = RecordBatch::try_new( + Arc::new(schema.clone()), + vec![Arc::new(decimal_array)], + )?; + let expression = + cast_with_options(col("a", &schema)?, &schema, Decimal128(6, 2), None)?; + let e = expression.evaluate(&batch).unwrap_err(); // panics on OK + assert_contains!( + e.to_string(), + "Arrow error: Invalid argument error: 12345679 is too large to store in a Decimal128 of precision 6. Max is 999999" + ); + + let expression_safe = cast_with_options( + col("a", &schema)?, + &schema, + Decimal128(6, 2), + Some(DEFAULT_SAFE_CAST_OPTIONS), + )?; + let result_safe = expression_safe + .evaluate(&batch)? + .into_array(batch.num_rows()) + .expect("failed to convert to array"); + + assert!(result_safe.is_null(0)); + + Ok(()) + } + #[test] fn test_cast_decimal_to_numeric() -> Result<()> { let array = vec![Some(1), Some(2), Some(3), Some(4), Some(5), None]; diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index a40827bda209..4dc9ac22f173 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -41,7 +41,6 @@ datafusion-common = { workspace = true, default-features = true } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-expr-common = { workspace = true, default-features = true } -datafusion-functions-aggregate = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-expr-common = { workspace = true } datafusion-physical-plan = { workspace = true } diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs index d5f70938a7d4..03bfb6978890 100644 --- a/datafusion/physical-optimizer/src/join_selection.rs +++ b/datafusion/physical-optimizer/src/join_selection.rs @@ -526,7 +526,7 @@ fn hash_join_convert_symmetric_subrule( /// +--------------+ +--------------+ /// /// ``` -fn hash_join_swap_subrule( +pub fn hash_join_swap_subrule( mut input: Arc, _config_options: &ConfigOptions, ) -> Result> { @@ -589,1550 +589,4 @@ fn apply_subrules( Ok(Transformed::yes(input)) } -#[cfg(test)] -mod tests_statistical { - use super::*; - use util_tests::StatisticsExec; - - use arrow::datatypes::{DataType, Field, Schema}; - use datafusion_common::{ - stats::Precision, ColumnStatistics, JoinType, ScalarValue, Statistics, - }; - use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::col; - use datafusion_physical_expr::expressions::BinaryExpr; - use datafusion_physical_expr::{PhysicalExpr, PhysicalExprRef}; - use datafusion_physical_plan::displayable; - use datafusion_physical_plan::projection::ProjectionExec; - use rstest::rstest; - - /// Return statistics for empty table - fn empty_statistics() -> Statistics { - Statistics { - num_rows: Precision::Absent, - total_byte_size: Precision::Absent, - column_statistics: vec![ColumnStatistics::new_unknown()], - } - } - - /// Get table thresholds: (num_rows, byte_size) - fn get_thresholds() -> (usize, usize) { - let optimizer_options = ConfigOptions::new().optimizer; - ( - optimizer_options.hash_join_single_partition_threshold_rows, - optimizer_options.hash_join_single_partition_threshold, - ) - } - - /// Return statistics for small table - fn small_statistics() -> Statistics { - let (threshold_num_rows, threshold_byte_size) = get_thresholds(); - Statistics { - num_rows: Precision::Inexact(threshold_num_rows / 128), - total_byte_size: Precision::Inexact(threshold_byte_size / 128), - column_statistics: vec![ColumnStatistics::new_unknown()], - } - } - - /// Return statistics for big table - fn big_statistics() -> Statistics { - let (threshold_num_rows, threshold_byte_size) = get_thresholds(); - Statistics { - num_rows: Precision::Inexact(threshold_num_rows * 2), - total_byte_size: Precision::Inexact(threshold_byte_size * 2), - column_statistics: vec![ColumnStatistics::new_unknown()], - } - } - - /// Return statistics for big table - fn bigger_statistics() -> Statistics { - let (threshold_num_rows, threshold_byte_size) = get_thresholds(); - Statistics { - num_rows: Precision::Inexact(threshold_num_rows * 4), - total_byte_size: Precision::Inexact(threshold_byte_size * 4), - column_statistics: vec![ColumnStatistics::new_unknown()], - } - } - - fn create_big_and_small() -> (Arc, Arc) { - let big = Arc::new(StatisticsExec::new( - big_statistics(), - Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), - )); - - let small = Arc::new(StatisticsExec::new( - small_statistics(), - Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), - )); - (big, small) - } - - /// Create a column statistics vector for a single column - /// that has the given min/max/distinct_count properties. - /// - /// Given min/max will be mapped to a [`ScalarValue`] if - /// they are not `None`. - fn create_column_stats( - min: Option, - max: Option, - distinct_count: Option, - ) -> Vec { - vec![ColumnStatistics { - distinct_count: distinct_count - .map(Precision::Inexact) - .unwrap_or(Precision::Absent), - min_value: min - .map(|size| Precision::Inexact(ScalarValue::UInt64(Some(size)))) - .unwrap_or(Precision::Absent), - max_value: max - .map(|size| Precision::Inexact(ScalarValue::UInt64(Some(size)))) - .unwrap_or(Precision::Absent), - ..Default::default() - }] - } - - /// Create join filter for NLJoinExec with expression `big_col > small_col` - /// where both columns are 0-indexed and come from left and right inputs respectively - fn nl_join_filter() -> Option { - let column_indices = vec![ - ColumnIndex { - index: 0, - side: JoinSide::Left, - }, - ColumnIndex { - index: 0, - side: JoinSide::Right, - }, - ]; - let intermediate_schema = Schema::new(vec![ - Field::new("big_col", DataType::Int32, false), - Field::new("small_col", DataType::Int32, false), - ]); - let expression = Arc::new(BinaryExpr::new( - Arc::new(Column::new_with_schema("big_col", &intermediate_schema).unwrap()), - Operator::Gt, - Arc::new(Column::new_with_schema("small_col", &intermediate_schema).unwrap()), - )) as _; - Some(JoinFilter::new( - expression, - column_indices, - Arc::new(intermediate_schema), - )) - } - - /// Returns three plans with statistics of (min, max, distinct_count) - /// * big 100K rows @ (0, 50k, 50k) - /// * medium 10K rows @ (1k, 5k, 1k) - /// * small 1K rows @ (0, 100k, 1k) - fn create_nested_with_min_max() -> ( - Arc, - Arc, - Arc, - ) { - let big = Arc::new(StatisticsExec::new( - Statistics { - num_rows: Precision::Inexact(100_000), - column_statistics: create_column_stats( - Some(0), - Some(50_000), - Some(50_000), - ), - total_byte_size: Precision::Absent, - }, - Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), - )); - - let medium = Arc::new(StatisticsExec::new( - Statistics { - num_rows: Precision::Inexact(10_000), - column_statistics: create_column_stats( - Some(1000), - Some(5000), - Some(1000), - ), - total_byte_size: Precision::Absent, - }, - Schema::new(vec![Field::new("medium_col", DataType::Int32, false)]), - )); - - let small = Arc::new(StatisticsExec::new( - Statistics { - num_rows: Precision::Inexact(1000), - column_statistics: create_column_stats( - Some(0), - Some(100_000), - Some(1000), - ), - total_byte_size: Precision::Absent, - }, - Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), - )); - - (big, medium, small) - } - - #[tokio::test] - async fn test_join_with_swap() { - let (big, small) = create_big_and_small(); - - let join = Arc::new( - HashJoinExec::try_new( - Arc::clone(&big), - Arc::clone(&small), - vec![( - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), - Arc::new( - Column::new_with_schema("small_col", &small.schema()).unwrap(), - ), - )], - None, - &JoinType::Left, - None, - PartitionMode::CollectLeft, - false, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) - .unwrap(); - - let swapping_projection = optimized_join - .as_any() - .downcast_ref::() - .expect("A proj is required to swap columns back to their original order"); - - assert_eq!(swapping_projection.expr().len(), 2); - let (col, name) = &swapping_projection.expr()[0]; - assert_eq!(name, "big_col"); - assert_col_expr(col, "big_col", 1); - let (col, name) = &swapping_projection.expr()[1]; - assert_eq!(name, "small_col"); - assert_col_expr(col, "small_col", 0); - - let swapped_join = swapping_projection - .input() - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - } - - #[tokio::test] - async fn test_left_join_no_swap() { - let (big, small) = create_big_and_small(); - - let join = Arc::new( - HashJoinExec::try_new( - Arc::clone(&small), - Arc::clone(&big), - vec![( - Arc::new( - Column::new_with_schema("small_col", &small.schema()).unwrap(), - ), - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), - )], - None, - &JoinType::Left, - None, - PartitionMode::CollectLeft, - false, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) - .unwrap(); - - let swapped_join = optimized_join - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - } - - #[tokio::test] - async fn test_join_with_swap_semi() { - let join_types = [JoinType::LeftSemi, JoinType::LeftAnti]; - for join_type in join_types { - let (big, small) = create_big_and_small(); - - let join = HashJoinExec::try_new( - Arc::clone(&big), - Arc::clone(&small), - vec![( - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), - Arc::new( - Column::new_with_schema("small_col", &small.schema()).unwrap(), - ), - )], - None, - &join_type, - None, - PartitionMode::Partitioned, - false, - ) - .unwrap(); - - let original_schema = join.schema(); - - let optimized_join = JoinSelection::new() - .optimize(Arc::new(join), &ConfigOptions::new()) - .unwrap(); - - let swapped_join = optimized_join - .as_any() - .downcast_ref::() - .expect( - "A proj is not required to swap columns back to their original order", - ); - - assert_eq!(swapped_join.schema().fields().len(), 1); - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - assert_eq!(original_schema, swapped_join.schema()); - } - } - - /// Compare the input plan with the plan after running the probe order optimizer. - macro_rules! assert_optimized { - ($EXPECTED_LINES: expr, $PLAN: expr) => { - let expected_lines = - $EXPECTED_LINES.iter().map(|s| *s).collect::>(); - - let plan = Arc::new($PLAN); - let optimized = JoinSelection::new() - .optimize(plan.clone(), &ConfigOptions::new()) - .unwrap(); - - let plan_string = displayable(optimized.as_ref()).indent(true).to_string(); - let actual_lines = plan_string.split("\n").collect::>(); - - assert_eq!( - &expected_lines, &actual_lines, - "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n", - expected_lines, actual_lines - ); - }; - } - - #[tokio::test] - async fn test_nested_join_swap() { - let (big, medium, small) = create_nested_with_min_max(); - - // Form the inner join: big JOIN small - let child_join = HashJoinExec::try_new( - Arc::clone(&big), - Arc::clone(&small), - vec![( - col("big_col", &big.schema()).unwrap(), - col("small_col", &small.schema()).unwrap(), - )], - None, - &JoinType::Inner, - None, - PartitionMode::CollectLeft, - false, - ) - .unwrap(); - let child_schema = child_join.schema(); - - // Form join tree `medium LEFT JOIN (big JOIN small)` - let join = HashJoinExec::try_new( - Arc::clone(&medium), - Arc::new(child_join), - vec![( - col("medium_col", &medium.schema()).unwrap(), - col("small_col", &child_schema).unwrap(), - )], - None, - &JoinType::Left, - None, - PartitionMode::CollectLeft, - false, - ) - .unwrap(); - - // Hash join uses the left side to build the hash table, and right side to probe it. We want - // to keep left as small as possible, so if we can estimate (with a reasonable margin of error) - // that the left side is smaller than the right side, we should swap the sides. - // - // The first hash join's left is 'small' table (with 1000 rows), and the second hash join's - // left is the F(small IJ big) which has an estimated cardinality of 2000 rows (vs medium which - // has an exact cardinality of 10_000 rows). - let expected = [ - "ProjectionExec: expr=[medium_col@2 as medium_col, big_col@0 as big_col, small_col@1 as small_col]", - " HashJoinExec: mode=CollectLeft, join_type=Right, on=[(small_col@1, medium_col@0)]", - " ProjectionExec: expr=[big_col@1 as big_col, small_col@0 as small_col]", - " HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(small_col@0, big_col@0)]", - " StatisticsExec: col_count=1, row_count=Inexact(1000)", - " StatisticsExec: col_count=1, row_count=Inexact(100000)", - " StatisticsExec: col_count=1, row_count=Inexact(10000)", - "", - ]; - assert_optimized!(expected, join); - } - - #[tokio::test] - async fn test_join_no_swap() { - let (big, small) = create_big_and_small(); - let join = Arc::new( - HashJoinExec::try_new( - Arc::clone(&small), - Arc::clone(&big), - vec![( - Arc::new( - Column::new_with_schema("small_col", &small.schema()).unwrap(), - ), - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()), - )], - None, - &JoinType::Inner, - None, - PartitionMode::CollectLeft, - false, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) - .unwrap(); - - let swapped_join = optimized_join - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - } - - #[rstest( - join_type, - case::inner(JoinType::Inner), - case::left(JoinType::Left), - case::right(JoinType::Right), - case::full(JoinType::Full) - )] - #[tokio::test] - async fn test_nl_join_with_swap(join_type: JoinType) { - let (big, small) = create_big_and_small(); - - let join = Arc::new( - NestedLoopJoinExec::try_new( - Arc::clone(&big), - Arc::clone(&small), - nl_join_filter(), - &join_type, - None, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) - .unwrap(); - - let swapping_projection = optimized_join - .as_any() - .downcast_ref::() - .expect("A proj is required to swap columns back to their original order"); - - assert_eq!(swapping_projection.expr().len(), 2); - let (col, name) = &swapping_projection.expr()[0]; - assert_eq!(name, "big_col"); - assert_col_expr(col, "big_col", 1); - let (col, name) = &swapping_projection.expr()[1]; - assert_eq!(name, "small_col"); - assert_col_expr(col, "small_col", 0); - - let swapped_join = swapping_projection - .input() - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - // Assert join side of big_col swapped in filter expression - let swapped_filter = swapped_join.filter().unwrap(); - let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); - let swapped_big_col_side = swapped_filter - .column_indices() - .get(swapped_big_col_idx) - .unwrap() - .side; - assert_eq!( - swapped_big_col_side, - JoinSide::Right, - "Filter column side should be swapped" - ); - - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - } - - #[rstest( - join_type, - case::left_semi(JoinType::LeftSemi), - case::left_anti(JoinType::LeftAnti), - case::right_semi(JoinType::RightSemi), - case::right_anti(JoinType::RightAnti) - )] - #[tokio::test] - async fn test_nl_join_with_swap_no_proj(join_type: JoinType) { - let (big, small) = create_big_and_small(); - - let join = Arc::new( - NestedLoopJoinExec::try_new( - Arc::clone(&big), - Arc::clone(&small), - nl_join_filter(), - &join_type, - None, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize( - Arc::::clone(&join), - &ConfigOptions::new(), - ) - .unwrap(); - - let swapped_join = optimized_join - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - // Assert before/after schemas are equal - assert_eq!( - join.schema(), - swapped_join.schema(), - "Join schema should not be modified while optimization" - ); - - // Assert join side of big_col swapped in filter expression - let swapped_filter = swapped_join.filter().unwrap(); - let swapped_big_col_idx = swapped_filter.schema().index_of("big_col").unwrap(); - let swapped_big_col_side = swapped_filter - .column_indices() - .get(swapped_big_col_idx) - .unwrap() - .side; - assert_eq!( - swapped_big_col_side, - JoinSide::Right, - "Filter column side should be swapped" - ); - - assert_eq!( - swapped_join.left().statistics().unwrap().total_byte_size, - Precision::Inexact(8192) - ); - assert_eq!( - swapped_join.right().statistics().unwrap().total_byte_size, - Precision::Inexact(2097152) - ); - } - - #[rstest( - join_type, projection, small_on_right, - case::inner(JoinType::Inner, vec![1], true), - case::left(JoinType::Left, vec![1], true), - case::right(JoinType::Right, vec![1], true), - case::full(JoinType::Full, vec![1], true), - case::left_anti(JoinType::LeftAnti, vec![0], false), - case::left_semi(JoinType::LeftSemi, vec![0], false), - case::right_anti(JoinType::RightAnti, vec![0], true), - case::right_semi(JoinType::RightSemi, vec![0], true), - )] - #[tokio::test] - async fn test_hash_join_swap_on_joins_with_projections( - join_type: JoinType, - projection: Vec, - small_on_right: bool, - ) -> Result<()> { - let (big, small) = create_big_and_small(); - - let left = if small_on_right { &big } else { &small }; - let right = if small_on_right { &small } else { &big }; - - let left_on = if small_on_right { - "big_col" - } else { - "small_col" - }; - let right_on = if small_on_right { - "small_col" - } else { - "big_col" - }; - - let join = Arc::new(HashJoinExec::try_new( - Arc::clone(left), - Arc::clone(right), - vec![( - Arc::new(Column::new_with_schema(left_on, &left.schema())?), - Arc::new(Column::new_with_schema(right_on, &right.schema())?), - )], - None, - &join_type, - Some(projection), - PartitionMode::Partitioned, - false, - )?); - - let swapped = join - .swap_inputs(PartitionMode::Partitioned) - .expect("swap_hash_join must support joins with projections"); - let swapped_join = swapped.as_any().downcast_ref::().expect( - "ProjectionExec won't be added above if HashJoinExec contains embedded projection", - ); - - assert_eq!(swapped_join.projection, Some(vec![0_usize])); - assert_eq!(swapped.schema().fields.len(), 1); - assert_eq!(swapped.schema().fields[0].name(), "small_col"); - Ok(()) - } - - fn assert_col_expr(expr: &Arc, name: &str, index: usize) { - let col = expr - .as_any() - .downcast_ref::() - .expect("Projection items should be Column expression"); - assert_eq!(col.name(), name); - assert_eq!(col.index(), index); - } - - #[tokio::test] - async fn test_join_selection_collect_left() { - let big = Arc::new(StatisticsExec::new( - big_statistics(), - Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), - )); - - let small = Arc::new(StatisticsExec::new( - small_statistics(), - Schema::new(vec![Field::new("small_col", DataType::Int32, false)]), - )); - - let empty = Arc::new(StatisticsExec::new( - empty_statistics(), - Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]), - )); - - let join_on = vec![( - col("small_col", &small.schema()).unwrap(), - col("big_col", &big.schema()).unwrap(), - )]; - check_join_partition_mode( - Arc::::clone(&small), - Arc::::clone(&big), - join_on, - false, - PartitionMode::CollectLeft, - ); - - let join_on = vec![( - col("big_col", &big.schema()).unwrap(), - col("small_col", &small.schema()).unwrap(), - )]; - check_join_partition_mode( - big, - Arc::::clone(&small), - join_on, - true, - PartitionMode::CollectLeft, - ); - - let join_on = vec![( - col("small_col", &small.schema()).unwrap(), - col("empty_col", &empty.schema()).unwrap(), - )]; - check_join_partition_mode( - Arc::::clone(&small), - Arc::::clone(&empty), - join_on, - false, - PartitionMode::CollectLeft, - ); - - let join_on = vec![( - col("empty_col", &empty.schema()).unwrap(), - col("small_col", &small.schema()).unwrap(), - )]; - check_join_partition_mode( - empty, - small, - join_on, - true, - PartitionMode::CollectLeft, - ); - } - - #[tokio::test] - async fn test_join_selection_partitioned() { - let bigger = Arc::new(StatisticsExec::new( - bigger_statistics(), - Schema::new(vec![Field::new("bigger_col", DataType::Int32, false)]), - )); - - let big = Arc::new(StatisticsExec::new( - big_statistics(), - Schema::new(vec![Field::new("big_col", DataType::Int32, false)]), - )); - - let empty = Arc::new(StatisticsExec::new( - empty_statistics(), - Schema::new(vec![Field::new("empty_col", DataType::Int32, false)]), - )); - - let join_on = vec![( - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, - Arc::new(Column::new_with_schema("bigger_col", &bigger.schema()).unwrap()) - as _, - )]; - check_join_partition_mode( - Arc::::clone(&big), - Arc::::clone(&bigger), - join_on, - false, - PartitionMode::Partitioned, - ); - - let join_on = vec![( - Arc::new(Column::new_with_schema("bigger_col", &bigger.schema()).unwrap()) - as _, - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, - )]; - check_join_partition_mode( - bigger, - Arc::::clone(&big), - join_on, - true, - PartitionMode::Partitioned, - ); - - let join_on = vec![( - Arc::new(Column::new_with_schema("empty_col", &empty.schema()).unwrap()) as _, - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, - )]; - check_join_partition_mode( - Arc::::clone(&empty), - Arc::::clone(&big), - join_on, - false, - PartitionMode::Partitioned, - ); - - let join_on = vec![( - Arc::new(Column::new_with_schema("big_col", &big.schema()).unwrap()) as _, - Arc::new(Column::new_with_schema("empty_col", &empty.schema()).unwrap()) as _, - )]; - check_join_partition_mode(big, empty, join_on, false, PartitionMode::Partitioned); - } - - fn check_join_partition_mode( - left: Arc, - right: Arc, - on: Vec<(PhysicalExprRef, PhysicalExprRef)>, - is_swapped: bool, - expected_mode: PartitionMode, - ) { - let join = Arc::new( - HashJoinExec::try_new( - left, - right, - on, - None, - &JoinType::Inner, - None, - PartitionMode::Auto, - false, - ) - .unwrap(), - ); - - let optimized_join = JoinSelection::new() - .optimize(join, &ConfigOptions::new()) - .unwrap(); - - if !is_swapped { - let swapped_join = optimized_join - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - assert_eq!(*swapped_join.partition_mode(), expected_mode); - } else { - let swapping_projection = optimized_join - .as_any() - .downcast_ref::() - .expect( - "A proj is required to swap columns back to their original order", - ); - let swapped_join = swapping_projection - .input() - .as_any() - .downcast_ref::() - .expect("The type of the plan should not be changed"); - - assert_eq!(*swapped_join.partition_mode(), expected_mode); - } - } -} - -#[cfg(test)] -mod util_tests { - use std::{ - any::Any, - pin::Pin, - sync::Arc, - task::{Context, Poll}, - }; - - use arrow::{ - array::RecordBatch, - datatypes::{DataType, Field, Schema, SchemaRef}, - }; - use datafusion_common::{Result, Statistics}; - use datafusion_execution::{ - RecordBatchStream, SendableRecordBatchStream, TaskContext, - }; - use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::{BinaryExpr, Column, NegativeExpr}; - use datafusion_physical_expr::intervals::utils::check_support; - use datafusion_physical_expr::{EquivalenceProperties, Partitioning, PhysicalExpr}; - use datafusion_physical_plan::{ - execution_plan::{Boundedness, EmissionType}, - DisplayAs, DisplayFormatType, ExecutionPlan, PlanProperties, - }; - use futures::Stream; - - #[derive(Debug)] - struct UnboundedStream { - batch_produce: Option, - count: usize, - batch: RecordBatch, - } - - impl Stream for UnboundedStream { - type Item = Result; - - fn poll_next( - mut self: Pin<&mut Self>, - _cx: &mut Context<'_>, - ) -> Poll> { - if let Some(val) = self.batch_produce { - if val <= self.count { - return Poll::Ready(None); - } - } - self.count += 1; - Poll::Ready(Some(Ok(self.batch.clone()))) - } - } - - impl RecordBatchStream for UnboundedStream { - fn schema(&self) -> SchemaRef { - self.batch.schema() - } - } - - /// A mock execution plan that simply returns the provided data source characteristic - #[derive(Debug, Clone)] - pub struct UnboundedExec { - batch_produce: Option, - batch: RecordBatch, - cache: PlanProperties, - } - - impl UnboundedExec { - /// Create new exec that clones the given record batch to its output. - /// - /// Set `batch_produce` to `Some(n)` to emit exactly `n` batches per partition. - pub fn new( - batch_produce: Option, - batch: RecordBatch, - partitions: usize, - ) -> Self { - let cache = - Self::compute_properties(batch.schema(), batch_produce, partitions); - Self { - batch_produce, - batch, - cache, - } - } - - /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. - fn compute_properties( - schema: SchemaRef, - batch_produce: Option, - n_partitions: usize, - ) -> PlanProperties { - let boundedness = if batch_produce.is_none() { - Boundedness::Unbounded { - requires_infinite_memory: false, - } - } else { - Boundedness::Bounded - }; - PlanProperties::new( - EquivalenceProperties::new(schema), - Partitioning::UnknownPartitioning(n_partitions), - EmissionType::Incremental, - boundedness, - ) - } - } - - impl DisplayAs for UnboundedExec { - fn fmt_as( - &self, - t: DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!( - f, - "UnboundedExec: unbounded={}", - self.batch_produce.is_none(), - ) - } - } - } - } - - impl ExecutionPlan for UnboundedExec { - fn name(&self) -> &'static str { - Self::static_name() - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn properties(&self) -> &PlanProperties { - &self.cache - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _: Vec>, - ) -> Result> { - Ok(self) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> Result { - Ok(Box::pin(UnboundedStream { - batch_produce: self.batch_produce, - count: 0, - batch: self.batch.clone(), - })) - } - } - - #[derive(Eq, PartialEq, Debug)] - pub enum SourceType { - Unbounded, - Bounded, - } - - /// A mock execution plan that simply returns the provided statistics - #[derive(Debug, Clone)] - pub struct StatisticsExec { - stats: Statistics, - schema: Arc, - cache: PlanProperties, - } - - impl StatisticsExec { - pub fn new(stats: Statistics, schema: Schema) -> Self { - assert_eq!( - stats.column_statistics.len(), schema.fields().len(), - "if defined, the column statistics vector length should be the number of fields" - ); - let cache = Self::compute_properties(Arc::new(schema.clone())); - Self { - stats, - schema: Arc::new(schema), - cache, - } - } - - /// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc. - fn compute_properties(schema: SchemaRef) -> PlanProperties { - PlanProperties::new( - EquivalenceProperties::new(schema), - Partitioning::UnknownPartitioning(2), - EmissionType::Incremental, - Boundedness::Bounded, - ) - } - } - - impl DisplayAs for StatisticsExec { - fn fmt_as( - &self, - t: DisplayFormatType, - f: &mut std::fmt::Formatter, - ) -> std::fmt::Result { - match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!( - f, - "StatisticsExec: col_count={}, row_count={:?}", - self.schema.fields().len(), - self.stats.num_rows, - ) - } - } - } - } - - impl ExecutionPlan for StatisticsExec { - fn name(&self) -> &'static str { - Self::static_name() - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn properties(&self) -> &PlanProperties { - &self.cache - } - - fn children(&self) -> Vec<&Arc> { - vec![] - } - - fn with_new_children( - self: Arc, - _: Vec>, - ) -> Result> { - Ok(self) - } - - fn execute( - &self, - _partition: usize, - _context: Arc, - ) -> Result { - unimplemented!("This plan only serves for testing statistics") - } - - fn statistics(&self) -> Result { - Ok(self.stats.clone()) - } - } - - #[test] - fn check_expr_supported() { - let schema = Arc::new(Schema::new(vec![ - Field::new("a", DataType::Int32, false), - Field::new("b", DataType::Utf8, false), - ])); - let supported_expr = Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Plus, - Arc::new(Column::new("a", 0)), - )) as Arc; - assert!(check_support(&supported_expr, &schema)); - let supported_expr_2 = Arc::new(Column::new("a", 0)) as Arc; - assert!(check_support(&supported_expr_2, &schema)); - let unsupported_expr = Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Or, - Arc::new(Column::new("a", 0)), - )) as Arc; - assert!(!check_support(&unsupported_expr, &schema)); - let unsupported_expr_2 = Arc::new(BinaryExpr::new( - Arc::new(Column::new("a", 0)), - Operator::Or, - Arc::new(NegativeExpr::new(Arc::new(Column::new("a", 0)))), - )) as Arc; - assert!(!check_support(&unsupported_expr_2, &schema)); - } -} - -#[cfg(test)] -mod hash_join_tests { - use super::*; - use util_tests::{SourceType, UnboundedExec}; - - use arrow::datatypes::{DataType, Field, Schema}; - use arrow::record_batch::RecordBatch; - use datafusion_physical_expr::expressions::col; - use datafusion_physical_plan::projection::ProjectionExec; - - struct TestCase { - case: String, - initial_sources_unbounded: (SourceType, SourceType), - initial_join_type: JoinType, - initial_mode: PartitionMode, - expected_sources_unbounded: (SourceType, SourceType), - expected_join_type: JoinType, - expected_mode: PartitionMode, - expecting_swap: bool, - } - - #[tokio::test] - async fn test_join_with_swap_full() -> Result<()> { - // NOTE: Currently, some initial conditions are not viable after join order selection. - // For example, full join always comes in partitioned mode. See the warning in - // function "swap". If this changes in the future, we should update these tests. - let cases = vec![ - TestCase { - case: "Bounded - Unbounded 1".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: JoinType::Full, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: JoinType::Full, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }, - TestCase { - case: "Unbounded - Bounded 2".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: JoinType::Full, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - expected_join_type: JoinType::Full, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }, - TestCase { - case: "Bounded - Bounded 3".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: JoinType::Full, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: JoinType::Full, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }, - TestCase { - case: "Unbounded - Unbounded 4".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: JoinType::Full, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: JoinType::Full, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }, - ]; - for case in cases.into_iter() { - test_join_with_maybe_swap_unbounded_case(case).await? - } - Ok(()) - } - - #[tokio::test] - async fn test_cases_without_collect_left_check() -> Result<()> { - let mut cases = vec![]; - let join_types = vec![JoinType::LeftSemi, JoinType::Inner]; - for join_type in join_types { - cases.push(TestCase { - case: "Unbounded - Bounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type.swap(), - expected_mode: PartitionMode::CollectLeft, - expecting_swap: true, - }); - cases.push(TestCase { - case: "Bounded - Unbounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Unbounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Bounded - Bounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Bounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type.swap(), - expected_mode: PartitionMode::Partitioned, - expecting_swap: true, - }); - cases.push(TestCase { - case: "Bounded - Unbounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Bounded - Bounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Unbounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - } - - for case in cases.into_iter() { - test_join_with_maybe_swap_unbounded_case(case).await? - } - Ok(()) - } - - #[tokio::test] - async fn test_not_support_collect_left() -> Result<()> { - let mut cases = vec![]; - // After [JoinSelection] optimization, these join types cannot run in CollectLeft mode except - // [JoinType::LeftSemi] - let the_ones_not_support_collect_left = vec![JoinType::Left, JoinType::LeftAnti]; - for join_type in the_ones_not_support_collect_left { - cases.push(TestCase { - case: "Unbounded - Bounded".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type.swap(), - expected_mode: PartitionMode::Partitioned, - expecting_swap: true, - }); - cases.push(TestCase { - case: "Bounded - Unbounded".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Bounded - Bounded".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Unbounded".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - } - - for case in cases.into_iter() { - test_join_with_maybe_swap_unbounded_case(case).await? - } - Ok(()) - } - - #[tokio::test] - async fn test_not_supporting_swaps_possible_collect_left() -> Result<()> { - let mut cases = vec![]; - let the_ones_not_support_collect_left = - vec![JoinType::Right, JoinType::RightAnti, JoinType::RightSemi]; - for join_type in the_ones_not_support_collect_left { - // We expect that (SourceType::Unbounded, SourceType::Bounded) will change, regardless of the - // statistics. - cases.push(TestCase { - case: "Unbounded - Bounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - // We expect that (SourceType::Bounded, SourceType::Unbounded) will stay same, regardless of the - // statistics. - cases.push(TestCase { - case: "Bounded - Unbounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Unbounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - // - cases.push(TestCase { - case: "Bounded - Bounded / CollectLeft".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::CollectLeft, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::CollectLeft, - expecting_swap: false, - }); - // If cases are partitioned, only unbounded & bounded check will affect the order. - cases.push(TestCase { - case: "Unbounded - Bounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Unbounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Bounded - Unbounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Unbounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Bounded - Bounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: (SourceType::Bounded, SourceType::Bounded), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - cases.push(TestCase { - case: "Unbounded - Unbounded / Partitioned".to_string(), - initial_sources_unbounded: (SourceType::Unbounded, SourceType::Unbounded), - initial_join_type: join_type, - initial_mode: PartitionMode::Partitioned, - expected_sources_unbounded: ( - SourceType::Unbounded, - SourceType::Unbounded, - ), - expected_join_type: join_type, - expected_mode: PartitionMode::Partitioned, - expecting_swap: false, - }); - } - - for case in cases.into_iter() { - test_join_with_maybe_swap_unbounded_case(case).await? - } - Ok(()) - } - - async fn test_join_with_maybe_swap_unbounded_case(t: TestCase) -> Result<()> { - let left_unbounded = t.initial_sources_unbounded.0 == SourceType::Unbounded; - let right_unbounded = t.initial_sources_unbounded.1 == SourceType::Unbounded; - let left_exec = Arc::new(UnboundedExec::new( - (!left_unbounded).then_some(1), - RecordBatch::new_empty(Arc::new(Schema::new(vec![Field::new( - "a", - DataType::Int32, - false, - )]))), - 2, - )) as _; - let right_exec = Arc::new(UnboundedExec::new( - (!right_unbounded).then_some(1), - RecordBatch::new_empty(Arc::new(Schema::new(vec![Field::new( - "b", - DataType::Int32, - false, - )]))), - 2, - )) as _; - - let join = Arc::new(HashJoinExec::try_new( - Arc::clone(&left_exec), - Arc::clone(&right_exec), - vec![( - col("a", &left_exec.schema())?, - col("b", &right_exec.schema())?, - )], - None, - &t.initial_join_type, - None, - t.initial_mode, - false, - )?) as _; - - let optimized_join_plan = hash_join_swap_subrule(join, &ConfigOptions::new())?; - - // If swap did happen - let projection_added = optimized_join_plan.as_any().is::(); - let plan = if projection_added { - let proj = optimized_join_plan - .as_any() - .downcast_ref::() - .expect( - "A proj is required to swap columns back to their original order", - ); - Arc::::clone(proj.input()) - } else { - optimized_join_plan - }; - - if let Some(HashJoinExec { - left, - right, - join_type, - mode, - .. - }) = plan.as_any().downcast_ref::() - { - let left_changed = Arc::ptr_eq(left, &right_exec); - let right_changed = Arc::ptr_eq(right, &left_exec); - // If this is not equal, we have a bigger problem. - assert_eq!(left_changed, right_changed); - assert_eq!( - ( - t.case.as_str(), - if left.boundedness().is_unbounded() { - SourceType::Unbounded - } else { - SourceType::Bounded - }, - if right.boundedness().is_unbounded() { - SourceType::Unbounded - } else { - SourceType::Bounded - }, - join_type, - mode, - left_changed && right_changed - ), - ( - t.case.as_str(), - t.expected_sources_unbounded.0, - t.expected_sources_unbounded.1, - &t.expected_join_type, - &t.expected_mode, - t.expecting_swap - ) - ); - }; - Ok(()) - } -} +// See tests in datafusion/core/tests/physical_optimizer diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs index bcb87944f5fd..c2beab032049 100644 --- a/datafusion/physical-optimizer/src/lib.rs +++ b/datafusion/physical-optimizer/src/lib.rs @@ -26,12 +26,13 @@ pub mod enforce_sorting; pub mod join_selection; pub mod limit_pushdown; pub mod limited_distinct_aggregation; -mod optimizer; +pub mod optimizer; pub mod output_requirements; +pub mod projection_pushdown; pub mod pruning; pub mod sanity_checker; pub mod topk_aggregation; pub mod update_aggr_exprs; -pub use optimizer::PhysicalOptimizerRule; -pub mod test_utils; pub mod utils; + +pub use optimizer::PhysicalOptimizerRule; diff --git a/datafusion/physical-optimizer/src/limit_pushdown.rs b/datafusion/physical-optimizer/src/limit_pushdown.rs index 7a44b2e90dde..1c7e4d3d4c3d 100644 --- a/datafusion/physical-optimizer/src/limit_pushdown.rs +++ b/datafusion/physical-optimizer/src/limit_pushdown.rs @@ -22,6 +22,7 @@ use std::fmt::Debug; use std::sync::Arc; use crate::PhysicalOptimizerRule; + use datafusion_common::config::ConfigOptions; use datafusion_common::error::Result; use datafusion_common::tree_node::{Transformed, TreeNodeRecursion}; @@ -339,480 +340,3 @@ fn add_global_limit( } // See tests in datafusion/core/tests/physical_optimizer - -#[cfg(test)] -mod test { - use super::*; - use arrow::compute::SortOptions; - use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; - use datafusion_common::config::ConfigOptions; - use datafusion_execution::{SendableRecordBatchStream, TaskContext}; - use datafusion_expr::Operator; - use datafusion_physical_expr::expressions::BinaryExpr; - use datafusion_physical_expr::expressions::{col, lit}; - use datafusion_physical_expr::{Partitioning, PhysicalSortExpr}; - use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec; - use datafusion_physical_plan::coalesce_partitions::CoalescePartitionsExec; - use datafusion_physical_plan::empty::EmptyExec; - use datafusion_physical_plan::filter::FilterExec; - use datafusion_physical_plan::limit::{GlobalLimitExec, LocalLimitExec}; - use datafusion_physical_plan::projection::ProjectionExec; - use datafusion_physical_plan::repartition::RepartitionExec; - use datafusion_physical_plan::sorts::sort::SortExec; - use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; - use datafusion_physical_plan::streaming::{PartitionStream, StreamingTableExec}; - use datafusion_physical_plan::{ - get_plan_string, ExecutionPlan, ExecutionPlanProperties, - }; - use std::sync::Arc; - - #[derive(Debug)] - struct DummyStreamPartition { - schema: SchemaRef, - } - impl PartitionStream for DummyStreamPartition { - fn schema(&self) -> &SchemaRef { - &self.schema - } - fn execute(&self, _ctx: Arc) -> SendableRecordBatchStream { - unreachable!() - } - } - - #[test] - fn transforms_streaming_table_exec_into_fetching_version_when_skip_is_zero( - ) -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema)?; - let global_limit = global_limit_exec(streaming_table, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=5" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn transforms_streaming_table_exec_into_fetching_version_and_keeps_the_global_limit_when_skip_is_nonzero( - ) -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(schema)?; - let global_limit = global_limit_exec(streaming_table, 2, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=2, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true, fetch=7" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn transforms_coalesce_batches_exec_into_fetching_version_and_removes_local_limit( - ) -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(Arc::clone(&schema))?; - let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema, repartition)?; - let coalesce_batches = coalesce_batches_exec(filter); - let local_limit = local_limit_exec(coalesce_batches, 5); - let coalesce_partitions = coalesce_partitions_exec(local_limit); - let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " LocalLimitExec: fetch=5", - " CoalesceBatchesExec: target_batch_size=8192", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " CoalesceBatchesExec: target_batch_size=8192, fetch=5", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn pushes_global_limit_exec_through_projection_exec() -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(Arc::clone(&schema))?; - let filter = filter_exec(Arc::clone(&schema), streaming_table)?; - let projection = projection_exec(schema, filter)?; - let global_limit = global_limit_exec(projection, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " FilterExec: c3@2 > 0", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " GlobalLimitExec: skip=0, fetch=5", - " FilterExec: c3@2 > 0", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn pushes_global_limit_exec_through_projection_exec_and_transforms_coalesce_batches_exec_into_fetching_version( - ) -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); - let coalesce_batches = coalesce_batches_exec(streaming_table); - let projection = projection_exec(schema, coalesce_batches)?; - let global_limit = global_limit_exec(projection, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192, fetch=5", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn pushes_global_limit_into_multiple_fetch_plans() -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(Arc::clone(&schema)).unwrap(); - let coalesce_batches = coalesce_batches_exec(streaming_table); - let projection = projection_exec(Arc::clone(&schema), coalesce_batches)?; - let repartition = repartition_exec(projection)?; - let sort = sort_exec( - vec![PhysicalSortExpr { - expr: col("c1", &schema)?, - options: SortOptions::default(), - }], - repartition, - ); - let spm = - sort_preserving_merge_exec(sort.output_ordering().unwrap().to_vec(), sort); - let global_limit = global_limit_exec(spm, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " SortPreservingMergeExec: [c1@0 ASC]", - " SortExec: expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "SortPreservingMergeExec: [c1@0 ASC], fetch=5", - " SortExec: TopK(fetch=5), expr=[c1@0 ASC], preserve_partitioning=[false]", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3]", - " CoalesceBatchesExec: target_batch_size=8192", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn keeps_pushed_local_limit_exec_when_there_are_multiple_input_partitions( - ) -> Result<()> { - let schema = create_schema(); - let streaming_table = streaming_table_exec(Arc::clone(&schema))?; - let repartition = repartition_exec(streaming_table)?; - let filter = filter_exec(schema, repartition)?; - let coalesce_partitions = coalesce_partitions_exec(filter); - let global_limit = global_limit_exec(coalesce_partitions, 0, Some(5)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = [ - "GlobalLimitExec: skip=0, fetch=5", - " CoalescePartitionsExec", - " FilterExec: c3@2 > 0", - " RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1", - " StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3], infinite_source=true" - ]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn merges_local_limit_with_local_limit() -> Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let child_local_limit = local_limit_exec(empty_exec, 10); - let parent_local_limit = local_limit_exec(child_local_limit, 20); - - let initial = get_plan_string(&parent_local_limit); - let expected_initial = [ - "LocalLimitExec: fetch=20", - " LocalLimitExec: fetch=10", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(parent_local_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=0, fetch=10", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn merges_global_limit_with_global_limit() -> Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let child_global_limit = global_limit_exec(empty_exec, 10, Some(30)); - let parent_global_limit = global_limit_exec(child_global_limit, 10, Some(20)); - - let initial = get_plan_string(&parent_global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=10, fetch=20", - " GlobalLimitExec: skip=10, fetch=30", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(parent_global_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn merges_global_limit_with_local_limit() -> Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let local_limit = local_limit_exec(empty_exec, 40); - let global_limit = global_limit_exec(local_limit, 20, Some(30)); - - let initial = get_plan_string(&global_limit); - let expected_initial = [ - "GlobalLimitExec: skip=20, fetch=30", - " LocalLimitExec: fetch=40", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(global_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - #[test] - fn merges_local_limit_with_global_limit() -> Result<()> { - let schema = create_schema(); - let empty_exec = empty_exec(schema); - let global_limit = global_limit_exec(empty_exec, 20, Some(30)); - let local_limit = local_limit_exec(global_limit, 20); - - let initial = get_plan_string(&local_limit); - let expected_initial = [ - "LocalLimitExec: fetch=20", - " GlobalLimitExec: skip=20, fetch=30", - " EmptyExec", - ]; - - assert_eq!(initial, expected_initial); - - let after_optimize = - LimitPushdown::new().optimize(local_limit, &ConfigOptions::new())?; - - let expected = ["GlobalLimitExec: skip=20, fetch=20", " EmptyExec"]; - assert_eq!(get_plan_string(&after_optimize), expected); - - Ok(()) - } - - fn create_schema() -> SchemaRef { - Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Int32, true), - Field::new("c2", DataType::Int32, true), - Field::new("c3", DataType::Int32, true), - ])) - } - - fn streaming_table_exec(schema: SchemaRef) -> Result> { - Ok(Arc::new(StreamingTableExec::try_new( - Arc::clone(&schema), - vec![Arc::new(DummyStreamPartition { schema }) as _], - None, - None, - true, - None, - )?)) - } - - fn global_limit_exec( - input: Arc, - skip: usize, - fetch: Option, - ) -> Arc { - Arc::new(GlobalLimitExec::new(input, skip, fetch)) - } - - fn local_limit_exec( - input: Arc, - fetch: usize, - ) -> Arc { - Arc::new(LocalLimitExec::new(input, fetch)) - } - - fn sort_exec( - sort_exprs: impl IntoIterator, - input: Arc, - ) -> Arc { - let sort_exprs = sort_exprs.into_iter().collect(); - Arc::new(SortExec::new(sort_exprs, input)) - } - - fn sort_preserving_merge_exec( - sort_exprs: impl IntoIterator, - input: Arc, - ) -> Arc { - let sort_exprs = sort_exprs.into_iter().collect(); - Arc::new(SortPreservingMergeExec::new(sort_exprs, input)) - } - - fn projection_exec( - schema: SchemaRef, - input: Arc, - ) -> Result> { - Ok(Arc::new(ProjectionExec::try_new( - vec![ - (col("c1", schema.as_ref()).unwrap(), "c1".to_string()), - (col("c2", schema.as_ref()).unwrap(), "c2".to_string()), - (col("c3", schema.as_ref()).unwrap(), "c3".to_string()), - ], - input, - )?)) - } - - fn filter_exec( - schema: SchemaRef, - input: Arc, - ) -> Result> { - Ok(Arc::new(FilterExec::try_new( - Arc::new(BinaryExpr::new( - col("c3", schema.as_ref()).unwrap(), - Operator::Gt, - lit(0), - )), - input, - )?)) - } - - fn coalesce_batches_exec(input: Arc) -> Arc { - Arc::new(CoalesceBatchesExec::new(input, 8192)) - } - - fn coalesce_partitions_exec( - local_limit: Arc, - ) -> Arc { - Arc::new(CoalescePartitionsExec::new(local_limit)) - } - - fn repartition_exec( - streaming_table: Arc, - ) -> Result> { - Ok(Arc::new(RepartitionExec::try_new( - streaming_table, - Partitioning::RoundRobinBatch(8), - )?)) - } - - fn empty_exec(schema: SchemaRef) -> Arc { - Arc::new(EmptyExec::new(schema)) - } -} diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs index 609890e2d43f..88f11f53491e 100644 --- a/datafusion/physical-optimizer/src/optimizer.rs +++ b/datafusion/physical-optimizer/src/optimizer.rs @@ -17,11 +17,26 @@ //! Physical optimizer traits +use std::fmt::Debug; +use std::sync::Arc; + +use crate::aggregate_statistics::AggregateStatistics; +use crate::coalesce_batches::CoalesceBatches; +use crate::combine_partial_final_agg::CombinePartialFinalAggregate; +use crate::enforce_distribution::EnforceDistribution; +use crate::enforce_sorting::EnforceSorting; +use crate::join_selection::JoinSelection; +use crate::limit_pushdown::LimitPushdown; +use crate::limited_distinct_aggregation::LimitedDistinctAggregation; +use crate::output_requirements::OutputRequirements; +use crate::projection_pushdown::ProjectionPushdown; +use crate::sanity_checker::SanityCheckPlan; +use crate::topk_aggregation::TopKAggregation; +use crate::update_aggr_exprs::OptimizeAggregateOrder; + use datafusion_common::config::ConfigOptions; use datafusion_common::Result; use datafusion_physical_plan::ExecutionPlan; -use std::fmt::Debug; -use std::sync::Arc; /// `PhysicalOptimizerRule` transforms one ['ExecutionPlan'] into another which /// computes the same results, but in a potentially more efficient way. @@ -47,3 +62,92 @@ pub trait PhysicalOptimizerRule: Debug { /// and should disable the schema check. fn schema_check(&self) -> bool; } + +/// A rule-based physical optimizer. +#[derive(Clone, Debug)] +pub struct PhysicalOptimizer { + /// All rules to apply + pub rules: Vec>, +} + +impl Default for PhysicalOptimizer { + fn default() -> Self { + Self::new() + } +} + +impl PhysicalOptimizer { + /// Create a new optimizer using the recommended list of rules + pub fn new() -> Self { + let rules: Vec> = vec![ + // If there is a output requirement of the query, make sure that + // this information is not lost across different rules during optimization. + Arc::new(OutputRequirements::new_add_mode()), + Arc::new(AggregateStatistics::new()), + // Statistics-based join selection will change the Auto mode to a real join implementation, + // like collect left, or hash join, or future sort merge join, which will influence the + // EnforceDistribution and EnforceSorting rules as they decide whether to add additional + // repartitioning and local sorting steps to meet distribution and ordering requirements. + // Therefore, it should run before EnforceDistribution and EnforceSorting. + Arc::new(JoinSelection::new()), + // The LimitedDistinctAggregation rule should be applied before the EnforceDistribution rule, + // as that rule may inject other operations in between the different AggregateExecs. + // Applying the rule early means only directly-connected AggregateExecs must be examined. + Arc::new(LimitedDistinctAggregation::new()), + // The EnforceDistribution rule is for adding essential repartitioning to satisfy distribution + // requirements. Please make sure that the whole plan tree is determined before this rule. + // This rule increases parallelism if doing so is beneficial to the physical plan; i.e. at + // least one of the operators in the plan benefits from increased parallelism. + Arc::new(EnforceDistribution::new()), + // The CombinePartialFinalAggregate rule should be applied after the EnforceDistribution rule + Arc::new(CombinePartialFinalAggregate::new()), + // The EnforceSorting rule is for adding essential local sorting to satisfy the required + // ordering. Please make sure that the whole plan tree is determined before this rule. + // Note that one should always run this rule after running the EnforceDistribution rule + // as the latter may break local sorting requirements. + Arc::new(EnforceSorting::new()), + // Run once after the local sorting requirement is changed + Arc::new(OptimizeAggregateOrder::new()), + // TODO: `try_embed_to_hash_join` in the ProjectionPushdown rule would be block by the CoalesceBatches, so add it before CoalesceBatches. Maybe optimize it in the future. + Arc::new(ProjectionPushdown::new()), + // The CoalesceBatches rule will not influence the distribution and ordering of the + // whole plan tree. Therefore, to avoid influencing other rules, it should run last. + Arc::new(CoalesceBatches::new()), + // Remove the ancillary output requirement operator since we are done with the planning + // phase. + Arc::new(OutputRequirements::new_remove_mode()), + // The aggregation limiter will try to find situations where the accumulator count + // is not tied to the cardinality, i.e. when the output of the aggregation is passed + // into an `order by max(x) limit y`. In this case it will copy the limit value down + // to the aggregation, allowing it to use only y number of accumulators. + Arc::new(TopKAggregation::new()), + // The ProjectionPushdown rule tries to push projections towards + // the sources in the execution plan. As a result of this process, + // a projection can disappear if it reaches the source providers, and + // sequential projections can merge into one. Even if these two cases + // are not present, the load of executors such as join or union will be + // reduced by narrowing their input tables. + Arc::new(ProjectionPushdown::new()), + // The LimitPushdown rule tries to push limits down as far as possible, + // replacing operators with fetching variants, or adding limits + // past operators that support limit pushdown. + Arc::new(LimitPushdown::new()), + // The SanityCheckPlan rule checks whether the order and + // distribution requirements of each node in the plan + // is satisfied. It will also reject non-runnable query + // plans that use pipeline-breaking operators on infinite + // input(s). The rule generates a diagnostic error + // message for invalid plans. It makes no changes to the + // given query plan; i.e. it only acts as a final + // gatekeeping rule. + Arc::new(SanityCheckPlan::new()), + ]; + + Self::with_rules(rules) + } + + /// Create a new optimizer with the given rules + pub fn with_rules(rules: Vec>) -> Self { + Self { rules } + } +} diff --git a/datafusion/physical-optimizer/src/output_requirements.rs b/datafusion/physical-optimizer/src/output_requirements.rs index e107bb85d7b8..90a570894a44 100644 --- a/datafusion/physical-optimizer/src/output_requirements.rs +++ b/datafusion/physical-optimizer/src/output_requirements.rs @@ -24,21 +24,23 @@ use std::sync::Arc; -use datafusion_execution::TaskContext; -use datafusion_physical_plan::sorts::sort::SortExec; -use datafusion_physical_plan::{ - DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, -}; +use crate::PhysicalOptimizerRule; use datafusion_common::config::ConfigOptions; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{Result, Statistics}; -use datafusion_physical_expr::{Distribution, LexRequirement}; +use datafusion_execution::TaskContext; +use datafusion_physical_expr::{Distribution, LexRequirement, PhysicalSortRequirement}; +use datafusion_physical_plan::projection::{ + make_with_child, update_expr, ProjectionExec, +}; +use datafusion_physical_plan::sorts::sort::SortExec; use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec; +use datafusion_physical_plan::{ + DisplayAs, DisplayFormatType, ExecutionPlan, SendableRecordBatchStream, +}; use datafusion_physical_plan::{ExecutionPlanProperties, PlanProperties}; -use crate::PhysicalOptimizerRule; - /// This rule either adds or removes [`OutputRequirements`]s to/from the physical /// plan according to its `mode` attribute, which is set by the constructors /// `new_add_mode` and `new_remove_mode`. With this rule, we can keep track of @@ -192,6 +194,56 @@ impl ExecutionPlan for OutputRequirementExec { fn statistics(&self) -> Result { self.input.statistics() } + + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down: + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + + let mut updated_sort_reqs = LexRequirement::new(vec![]); + // None or empty_vec can be treated in the same way. + if let Some(reqs) = &self.required_input_ordering()[0] { + for req in &reqs.inner { + let Some(new_expr) = update_expr(&req.expr, projection.expr(), false)? + else { + return Ok(None); + }; + updated_sort_reqs.push(PhysicalSortRequirement { + expr: new_expr, + options: req.options, + }); + } + } + + let dist_req = match &self.required_input_distribution()[0] { + Distribution::HashPartitioned(exprs) => { + let mut updated_exprs = vec![]; + for expr in exprs { + let Some(new_expr) = update_expr(expr, projection.expr(), false)? + else { + return Ok(None); + }; + updated_exprs.push(new_expr); + } + Distribution::HashPartitioned(updated_exprs) + } + dist => dist.clone(), + }; + + make_with_child(projection, &self.input()) + .map(|input| { + OutputRequirementExec::new( + input, + (!updated_sort_reqs.is_empty()).then_some(updated_sort_reqs), + dist_req, + ) + }) + .map(|e| Some(Arc::new(e) as _)) + } } impl PhysicalOptimizerRule for OutputRequirements { diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs new file mode 100644 index 000000000000..34affcbd4a19 --- /dev/null +++ b/datafusion/physical-optimizer/src/projection_pushdown.rs @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! This file implements the `ProjectionPushdown` physical optimization rule. +//! The function [`remove_unnecessary_projections`] tries to push down all +//! projections one by one if the operator below is amenable to this. If a +//! projection reaches a source, it can even disappear from the plan entirely. + +use std::sync::Arc; + +use crate::PhysicalOptimizerRule; + +use datafusion_common::config::ConfigOptions; +use datafusion_common::tree_node::{TransformedResult, TreeNode}; +use datafusion_common::Result; +use datafusion_physical_plan::projection::remove_unnecessary_projections; +use datafusion_physical_plan::ExecutionPlan; + +/// This rule inspects `ProjectionExec`'s in the given physical plan and tries to +/// remove or swap with its child. +#[derive(Default, Debug)] +pub struct ProjectionPushdown {} + +impl ProjectionPushdown { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } +} + +impl PhysicalOptimizerRule for ProjectionPushdown { + fn optimize( + &self, + plan: Arc, + _config: &ConfigOptions, + ) -> Result> { + plan.transform_down(remove_unnecessary_projections).data() + } + + fn name(&self) -> &str { + "ProjectionPushdown" + } + + fn schema_check(&self) -> bool { + true + } +} diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index 8e975e10180f..e75c75a235b7 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -405,7 +405,7 @@ mod tests { use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; use arrow_array::{ArrayRef, StringArray}; - use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; + use arrow_buffer::NullBufferBuilder; use datafusion_physical_expr::binary_map::OutputType; use super::GroupColumn; @@ -602,16 +602,15 @@ mod tests { .into_parts(); // explicitly build a boolean buffer where one of the null values also happens to match - let mut boolean_buffer_builder = BooleanBufferBuilder::new(6); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(false); // this sets Some("bar") to null above - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - let nulls = NullBuffer::new(boolean_buffer_builder.finish()); + let mut nulls = NullBufferBuilder::new(6); + nulls.append_non_null(); + nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); + nulls.append_null(); + nulls.append_non_null(); + nulls.append_non_null(); let input_array = - Arc::new(StringArray::new(offsets, buffer, Some(nulls))) as ArrayRef; + Arc::new(StringArray::new(offsets, buffer, nulls.finish())) as ArrayRef; // Check let mut equal_to_results = vec![true; builder.len()]; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index 811790f4e588..c3d88b894999 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -548,7 +548,7 @@ mod tests { use arrow::array::AsArray; use arrow::datatypes::StringViewType; use arrow_array::{ArrayRef, StringViewArray}; - use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; + use arrow_buffer::NullBufferBuilder; use super::GroupColumn; @@ -751,22 +751,21 @@ mod tests { .into_parts(); // explicitly build a boolean buffer where one of the null values also happens to match - let mut boolean_buffer_builder = BooleanBufferBuilder::new(9); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(false); // this sets Some("bar") to null above - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - let nulls = NullBuffer::new(boolean_buffer_builder.finish()); + let mut nulls = NullBufferBuilder::new(9); + nulls.append_non_null(); + nulls.append_null(); // this sets Some("bar") to null above + nulls.append_null(); + nulls.append_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); + nulls.append_non_null(); let input_array = - Arc::new(StringViewArray::new(views, buffer, Some(nulls))) as ArrayRef; + Arc::new(StringViewArray::new(views, buffer, nulls.finish())) as ArrayRef; // Check let mut equal_to_results = vec![true; input_array.len()]; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index 4ceeb634bad2..cd5dfae86ee9 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -214,7 +214,7 @@ mod tests { use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; use arrow::datatypes::Int64Type; use arrow_array::{ArrayRef, Int64Array}; - use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; + use arrow_buffer::NullBufferBuilder; use arrow_schema::DataType; use super::GroupColumn; @@ -304,16 +304,15 @@ mod tests { Int64Array::from(vec![Some(1), Some(2), None, None, Some(1), Some(3)]) .into_parts(); - // explicitly build a boolean buffer where one of the null values also happens to match - let mut boolean_buffer_builder = BooleanBufferBuilder::new(6); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(false); // this sets Some(2) to null above - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(false); - boolean_buffer_builder.append(true); - boolean_buffer_builder.append(true); - let nulls = NullBuffer::new(boolean_buffer_builder.finish()); - let input_array = Arc::new(Int64Array::new(values, Some(nulls))) as ArrayRef; + // explicitly build a null buffer where one of the null values also happens to match + let mut nulls = NullBufferBuilder::new(6); + nulls.append_non_null(); + nulls.append_null(); // this sets Some(2) to null above + nulls.append_null(); + nulls.append_null(); + nulls.append_non_null(); + nulls.append_non_null(); + let input_array = Arc::new(Int64Array::new(values, nulls.finish())) as ArrayRef; // Check let mut equal_to_results = vec![true; builder.len()]; diff --git a/datafusion/physical-plan/src/coalesce_partitions.rs b/datafusion/physical-plan/src/coalesce_partitions.rs index 7c1bdba2f339..3900bd1ddca9 100644 --- a/datafusion/physical-plan/src/coalesce_partitions.rs +++ b/datafusion/physical-plan/src/coalesce_partitions.rs @@ -27,10 +27,10 @@ use super::{ DisplayAs, ExecutionPlanProperties, PlanProperties, SendableRecordBatchStream, Statistics, }; - +use crate::execution_plan::CardinalityEffect; +use crate::projection::{make_with_child, ProjectionExec}; use crate::{DisplayFormatType, ExecutionPlan, Partitioning}; -use crate::execution_plan::CardinalityEffect; use datafusion_common::{internal_err, Result}; use datafusion_execution::TaskContext; @@ -184,6 +184,22 @@ impl ExecutionPlan for CoalescePartitionsExec { fn cardinality_effect(&self) -> CardinalityEffect { CardinalityEffect::Equal } + + /// Tries to swap `projection` with its input, which is known to be a + /// [`CoalescePartitionsExec`]. If possible, performs the swap and returns + /// [`CoalescePartitionsExec`] as the top plan. Otherwise, returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down: + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + // CoalescePartitionsExec always has a single child, so zero indexing is safe. + make_with_child(projection, projection.input().children()[0]) + .map(|e| Some(Arc::new(CoalescePartitionsExec::new(e)) as _)) + } } #[cfg(test)] diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 753234c09994..a54b46111f53 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -15,42 +15,45 @@ // specific language governing permissions and limitations // under the License. -use std::any::Any; -use std::fmt::Debug; -use std::sync::Arc; - -use arrow::datatypes::SchemaRef; -use arrow::record_batch::RecordBatch; -use arrow_array::Array; -use futures::stream::{StreamExt, TryStreamExt}; -use tokio::task::JoinSet; +pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; +pub use crate::metrics::Metric; +pub use crate::ordering::InputOrderMode; +pub use crate::stream::EmptyRecordBatchStream; -use datafusion_common::config::ConfigOptions; pub use datafusion_common::hash_utils; pub use datafusion_common::utils::project_schema; -use datafusion_common::{exec_err, Constraints, Result}; pub use datafusion_common::{internal_err, ColumnStatistics, Statistics}; -use datafusion_execution::TaskContext; pub use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream}; pub use datafusion_expr::{Accumulator, ColumnarValue}; pub use datafusion_physical_expr::window::WindowExpr; pub use datafusion_physical_expr::{ expressions, udf, Distribution, Partitioning, PhysicalExpr, }; -use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; -use datafusion_physical_expr_common::sort_expr::LexRequirement; + +use std::any::Any; +use std::fmt::Debug; +use std::sync::Arc; use crate::coalesce_partitions::CoalescePartitionsExec; use crate::display::DisplayableExecutionPlan; -pub use crate::display::{DefaultDisplay, DisplayAs, DisplayFormatType, VerboseDisplay}; -pub use crate::metrics::Metric; use crate::metrics::MetricsSet; -pub use crate::ordering::InputOrderMode; +use crate::projection::ProjectionExec; use crate::repartition::RepartitionExec; use crate::sorts::sort_preserving_merge::SortPreservingMergeExec; -pub use crate::stream::EmptyRecordBatchStream; use crate::stream::RecordBatchStreamAdapter; +use arrow::datatypes::SchemaRef; +use arrow::record_batch::RecordBatch; +use arrow_array::Array; +use datafusion_common::config::ConfigOptions; +use datafusion_common::{exec_err, Constraints, Result}; +use datafusion_execution::TaskContext; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_expr_common::sort_expr::LexRequirement; + +use futures::stream::{StreamExt, TryStreamExt}; +use tokio::task::JoinSet; + /// Represent nodes in the DataFusion Physical Plan. /// /// Calling [`execute`] produces an `async` [`SendableRecordBatchStream`] of @@ -431,6 +434,21 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync { fn cardinality_effect(&self) -> CardinalityEffect { CardinalityEffect::Unknown } + + /// Attempts to push down the given projection into the input of this `ExecutionPlan`. + /// + /// If the operator supports this optimization, the resulting plan will be: + /// `self_new <- projection <- source`, starting from `projection <- self <- source`. + /// Otherwise, it returns the current `ExecutionPlan` as-is. + /// + /// Returns `Ok(Some(...))` if pushdown is applied, `Ok(None)` if it is not supported + /// or not possible, or `Err` on failure. + fn try_swapping_with_projection( + &self, + _projection: &ProjectionExec, + ) -> Result>> { + Ok(None) + } } /// [`ExecutionPlan`] Invariant Level diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs index 8e7c14f0baed..ae4a15ba5249 100644 --- a/datafusion/physical-plan/src/filter.rs +++ b/datafusion/physical-plan/src/filter.rs @@ -25,6 +25,11 @@ use super::{ RecordBatchStream, SendableRecordBatchStream, Statistics, }; use crate::common::can_project; +use crate::execution_plan::CardinalityEffect; +use crate::projection::{ + make_with_child, try_embed_projection, update_expr, EmbeddedProjection, + ProjectionExec, +}; use crate::{ metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}, DisplayFormatType, ExecutionPlan, @@ -49,7 +54,6 @@ use datafusion_physical_expr::{ ExprBoundaries, PhysicalExpr, }; -use crate::execution_plan::CardinalityEffect; use futures::stream::{Stream, StreamExt}; use log::trace; @@ -399,6 +403,38 @@ impl ExecutionPlan for FilterExec { fn cardinality_effect(&self) -> CardinalityEffect { CardinalityEffect::LowerEqual } + + /// Tries to swap `projection` with its input (`filter`). If possible, performs + /// the swap and returns [`FilterExec`] as the top plan. Otherwise, returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down: + if projection.expr().len() < projection.input().schema().fields().len() { + // Each column in the predicate expression must exist after the projection. + if let Some(new_predicate) = + update_expr(self.predicate(), projection.expr(), false)? + { + return FilterExec::try_new( + new_predicate, + make_with_child(projection, self.input())?, + ) + .and_then(|e| { + let selectivity = self.default_selectivity(); + e.with_default_selectivity(selectivity) + }) + .map(|e| Some(Arc::new(e) as _)); + } + } + try_embed_projection(projection, self) + } +} + +impl EmbeddedProjection for FilterExec { + fn with_projection(&self, projection: Option>) -> Result { + self.with_projection(projection) + } } /// This function ensures that all bounds in the `ExprBoundaries` vector are diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 69300fce7745..87fd0f96586a 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -18,6 +18,8 @@ //! Defines the cross join plan for loading the left side of the cross join //! and producing batches in parallel for the right partitions +use std::{any::Any, sync::Arc, task::Poll}; + use super::utils::{ adjust_right_output_partitioning, reorder_output_after_swap, BatchSplitter, BatchTransformer, BuildProbeJoinMetrics, NoopBatchTransformer, OnceAsync, OnceFut, @@ -26,14 +28,17 @@ use super::utils::{ use crate::coalesce_partitions::CoalescePartitionsExec; use crate::execution_plan::{boundedness_from_children, EmissionType}; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::projection::{ + join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, ProjectionExec, +}; use crate::{ handle_state, ColumnStatistics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; -use arrow::compute::concat_batches; -use std::{any::Any, sync::Arc, task::Poll}; +use arrow::compute::concat_batches; use arrow::datatypes::{Fields, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_array::RecordBatchOptions; @@ -335,6 +340,47 @@ impl ExecutionPlan for CrossJoinExec { self.right.statistics()?, )) } + + /// Tries to swap the projection with its input [`CrossJoinExec`]. If it can be done, + /// it returns the new swapped version having the [`CrossJoinExec`] as the top plan. + /// Otherwise, it returns None. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. + let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) + else { + return Ok(None); + }; + + let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( + self.left().schema().fields().len(), + &projection_as_columns, + ); + + if !join_allows_pushdown( + &projection_as_columns, + &self.schema(), + far_right_left_col_ind, + far_left_right_col_ind, + ) { + return Ok(None); + } + + let (new_left, new_right) = new_join_children( + &projection_as_columns, + far_right_left_col_ind, + far_left_right_col_ind, + self.left(), + self.right(), + )?; + + Ok(Some(Arc::new(CrossJoinExec::new( + Arc::new(new_left), + Arc::new(new_right), + )))) + } } /// [left/right]_col_count are required in case the column statistics are None diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index 4e224ea65d19..bac72e8a0cc7 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -33,6 +33,11 @@ use super::{ PartitionMode, SharedBitmapBuilder, }; use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::projection::{ + try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData, + ProjectionExec, +}; +use crate::spill::get_record_batch_memory_size; use crate::ExecutionPlanProperties; use crate::{ coalesce_partitions::CoalescePartitionsExec, @@ -69,15 +74,14 @@ use datafusion_common::{ }; use datafusion_execution::memory_pool::{MemoryConsumer, MemoryReservation}; use datafusion_execution::TaskContext; +use datafusion_expr::Operator; use datafusion_physical_expr::equivalence::{ join_equivalence_properties, ProjectionMapping, }; use datafusion_physical_expr::PhysicalExprRef; +use datafusion_physical_expr_common::datum::compare_op_for_nested; -use crate::spill::get_record_batch_memory_size; use ahash::RandomState; -use datafusion_expr::Operator; -use datafusion_physical_expr_common::datum::compare_op_for_nested; use futures::{ready, Stream, StreamExt, TryStreamExt}; use parking_lot::Mutex; @@ -864,6 +868,47 @@ impl ExecutionPlan for HashJoinExec { // Project statistics if there is a projection Ok(stats.project(self.projection.as_ref())) } + + /// Tries to push `projection` down through `hash_join`. If possible, performs the + /// pushdown and returns a new [`HashJoinExec`] as the top plan which has projections + /// as its children. Otherwise, returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // TODO: currently if there is projection in HashJoinExec, we can't push down projection to left or right input. Maybe we can pushdown the mixed projection later. + if self.contains_projection() { + return Ok(None); + } + + if let Some(JoinData { + projected_left_child, + projected_right_child, + join_filter, + join_on, + }) = try_pushdown_through_join( + projection, + self.left(), + self.right(), + self.on(), + self.schema(), + self.filter(), + )? { + Ok(Some(Arc::new(HashJoinExec::try_new( + Arc::new(projected_left_child), + Arc::new(projected_right_child), + join_on, + join_filter, + self.join_type(), + // Returned early if projection is not None + None, + *self.partition_mode(), + self.null_equals_null, + )?))) + } else { + try_embed_projection(projection, self) + } + } } /// Reads the left (build) side of the input, buffering it in memory, to build a @@ -1584,6 +1629,12 @@ impl Stream for HashJoinStream { } } +impl EmbeddedProjection for HashJoinExec { + fn with_projection(&self, projection: Option>) -> Result { + self.with_projection(projection) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs index d2fa2fdc7b4d..ce960df32ec2 100644 --- a/datafusion/physical-plan/src/joins/nested_loop_join.rs +++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs @@ -36,7 +36,12 @@ use crate::joins::utils::{ build_join_schema, check_join_is_valid, estimate_join_statistics, BuildProbeJoinMetrics, ColumnIndex, JoinFilter, OnceAsync, OnceFut, }; +use crate::joins::SharedBitmapBuilder; use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet}; +use crate::projection::{ + try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData, + ProjectionExec, +}; use crate::{ handle_state, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, PlanProperties, RecordBatchStream, @@ -57,7 +62,6 @@ use datafusion_physical_expr::equivalence::{ join_equivalence_properties, ProjectionMapping, }; -use crate::joins::SharedBitmapBuilder; use futures::{ready, Stream, StreamExt, TryStreamExt}; use parking_lot::Mutex; @@ -555,6 +559,44 @@ impl ExecutionPlan for NestedLoopJoinExec { &self.join_schema, ) } + + /// Tries to push `projection` down through `nested_loop_join`. If possible, performs the + /// pushdown and returns a new [`NestedLoopJoinExec`] as the top plan which has projections + /// as its children. Otherwise, returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // TODO: currently if there is projection in NestedLoopJoinExec, we can't push down projection to left or right input. Maybe we can pushdown the mixed projection later. + if self.contains_projection() { + return Ok(None); + } + + if let Some(JoinData { + projected_left_child, + projected_right_child, + join_filter, + .. + }) = try_pushdown_through_join( + projection, + self.left(), + self.right(), + &[], + self.schema(), + self.filter(), + )? { + Ok(Some(Arc::new(NestedLoopJoinExec::try_new( + Arc::new(projected_left_child), + Arc::new(projected_right_child), + join_filter, + self.join_type(), + // Returned early if projection is not None + None, + )?))) + } else { + try_embed_projection(projection, self) + } + } } /// Asynchronously collect input into a single batch, and creates `JoinLeftData` from it @@ -979,6 +1021,12 @@ impl RecordBatchStream for NestedLoopJoinStr } } +impl EmbeddedProjection for NestedLoopJoinExec { + fn with_projection(&self, projection: Option>) -> Result { + self.with_projection(projection) + } +} + #[cfg(test)] pub(crate) mod tests { use super::*; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 87690c2da23a..6565e6799b83 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -34,6 +34,25 @@ use std::sync::atomic::Ordering::Relaxed; use std::sync::Arc; use std::task::{Context, Poll}; +use crate::execution_plan::{boundedness_from_children, EmissionType}; +use crate::expressions::PhysicalSortExpr; +use crate::joins::utils::{ + build_join_schema, check_join_is_valid, estimate_join_statistics, + reorder_output_after_swap, symmetric_join_output_partitioning, JoinFilter, JoinOn, + JoinOnRef, +}; +use crate::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; +use crate::projection::{ + join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, update_join_on, ProjectionExec, +}; +use crate::spill::spill_record_batches; +use crate::{ + metrics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, + ExecutionPlanProperties, PhysicalExpr, PlanProperties, RecordBatchStream, + SendableRecordBatchStream, Statistics, +}; + use arrow::array::*; use arrow::compute::{ self, concat_batches, filter_record_batch, is_not_null, take, SortOptions, @@ -54,21 +73,6 @@ use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::PhysicalExprRef; use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; -use crate::execution_plan::{boundedness_from_children, EmissionType}; -use crate::expressions::PhysicalSortExpr; -use crate::joins::utils::{ - build_join_schema, check_join_is_valid, estimate_join_statistics, - reorder_output_after_swap, symmetric_join_output_partitioning, JoinFilter, JoinOn, - JoinOnRef, -}; -use crate::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet}; -use crate::spill::spill_record_batches; -use crate::{ - metrics, DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, - ExecutionPlanProperties, PhysicalExpr, PlanProperties, RecordBatchStream, - SendableRecordBatchStream, Statistics, -}; - use futures::{Stream, StreamExt}; /// Join execution plan that executes equi-join predicates on multiple partitions using Sort-Merge @@ -504,6 +508,61 @@ impl ExecutionPlan for SortMergeJoinExec { &self.schema, ) } + + /// Tries to swap the projection with its input [`SortMergeJoinExec`]. If it can be done, + /// it returns the new swapped version having the [`SortMergeJoinExec`] as the top plan. + /// Otherwise, it returns None. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. + let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) + else { + return Ok(None); + }; + + let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( + self.left().schema().fields().len(), + &projection_as_columns, + ); + + if !join_allows_pushdown( + &projection_as_columns, + &self.schema(), + far_right_left_col_ind, + far_left_right_col_ind, + ) { + return Ok(None); + } + + let Some(new_on) = update_join_on( + &projection_as_columns[0..=far_right_left_col_ind as _], + &projection_as_columns[far_left_right_col_ind as _..], + self.on(), + self.left().schema().fields().len(), + ) else { + return Ok(None); + }; + + let (new_left, new_right) = new_join_children( + &projection_as_columns, + far_right_left_col_ind, + far_left_right_col_ind, + self.children()[0], + self.children()[1], + )?; + + Ok(Some(Arc::new(SortMergeJoinExec::try_new( + Arc::new(new_left), + Arc::new(new_right), + new_on, + self.filter.clone(), + self.join_type, + self.sort_options.clone(), + self.null_equals_null, + )?))) + } } /// Metrics for SortMergeJoinExec diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index b050d3adfeb5..2dbff25e703e 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -47,6 +47,10 @@ use crate::joins::utils::{ BatchTransformer, ColumnIndex, JoinFilter, JoinHashMapType, JoinOn, JoinOnRef, NoopBatchTransformer, StatefulStreamResult, }; +use crate::projection::{ + join_allows_pushdown, join_table_borders, new_join_children, + physical_to_column_exprs, update_join_filter, update_join_on, ProjectionExec, +}; use crate::{ joins::StreamJoinPartitionMode, metrics::{ExecutionPlanMetricsSet, MetricsSet}, @@ -71,9 +75,9 @@ use datafusion_expr::interval_arithmetic::Interval; use datafusion_physical_expr::equivalence::join_equivalence_properties; use datafusion_physical_expr::intervals::cp_solver::ExprIntervalGraph; use datafusion_physical_expr::PhysicalExprRef; +use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use ahash::RandomState; -use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; use futures::{ready, Stream, StreamExt}; use parking_lot::Mutex; @@ -556,6 +560,81 @@ impl ExecutionPlan for SymmetricHashJoinExec { })) } } + + /// Tries to swap the projection with its input [`SymmetricHashJoinExec`]. If it can be done, + /// it returns the new swapped version having the [`SymmetricHashJoinExec`] as the top plan. + /// Otherwise, it returns None. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // Convert projected PhysicalExpr's to columns. If not possible, we cannot proceed. + let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) + else { + return Ok(None); + }; + + let (far_right_left_col_ind, far_left_right_col_ind) = join_table_borders( + self.left().schema().fields().len(), + &projection_as_columns, + ); + + if !join_allows_pushdown( + &projection_as_columns, + &self.schema(), + far_right_left_col_ind, + far_left_right_col_ind, + ) { + return Ok(None); + } + + let Some(new_on) = update_join_on( + &projection_as_columns[0..=far_right_left_col_ind as _], + &projection_as_columns[far_left_right_col_ind as _..], + self.on(), + self.left().schema().fields().len(), + ) else { + return Ok(None); + }; + + let new_filter = if let Some(filter) = self.filter() { + match update_join_filter( + &projection_as_columns[0..=far_right_left_col_ind as _], + &projection_as_columns[far_left_right_col_ind as _..], + filter, + self.left().schema().fields().len(), + ) { + Some(updated_filter) => Some(updated_filter), + None => return Ok(None), + } + } else { + None + }; + + let (new_left, new_right) = new_join_children( + &projection_as_columns, + far_right_left_col_ind, + far_left_right_col_ind, + self.left(), + self.right(), + )?; + + Ok(Some(Arc::new(SymmetricHashJoinExec::try_new( + Arc::new(new_left), + Arc::new(new_right), + new_on, + new_filter, + self.join_type(), + self.null_equals_null(), + self.right() + .output_ordering() + .map(|p| LexOrdering::new(p.to_vec())), + self.left() + .output_ordering() + .map(|p| LexOrdering::new(p.to_vec())), + self.partition_mode(), + )?))) + } } /// A stream that issues [RecordBatch]es as they arrive from the right of the join. diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs index fb58a04fcc20..5ad3c4881b39 100644 --- a/datafusion/physical-plan/src/memory.rs +++ b/datafusion/physical-plan/src/memory.rs @@ -17,7 +17,6 @@ //! Execution plan for reading in-memory batches of data -use parking_lot::RwLock; use std::any::Any; use std::fmt; use std::sync::Arc; @@ -29,6 +28,9 @@ use super::{ Statistics, }; use crate::execution_plan::{Boundedness, EmissionType}; +use crate::projection::{ + all_alias_free_columns, new_projections_for_columns, ProjectionExec, +}; use arrow::datatypes::SchemaRef; use arrow::record_batch::RecordBatch; @@ -45,6 +47,7 @@ use datafusion_physical_expr::utils::collect_columns; use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; use futures::Stream; +use parking_lot::RwLock; /// Execution plan for reading in-memory batches of data #[derive(Clone)] @@ -166,6 +169,30 @@ impl ExecutionPlan for MemoryExec { self.projection.clone(), )) } + + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If there is any non-column or alias-carrier expression, Projection should not be removed. + // This process can be moved into MemoryExec, but it would be an overlap of their responsibility. + all_alias_free_columns(projection.expr()) + .then(|| { + let all_projections = (0..self.schema().fields().len()).collect(); + let new_projections = new_projections_for_columns( + projection, + self.projection().as_ref().unwrap_or(&all_projections), + ); + + MemoryExec::try_new( + self.partitions(), + self.original_schema(), + Some(new_projections), + ) + .map(|e| Arc::new(e) as _) + }) + .transpose() + } } impl MemoryExec { diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index e37a6b0dfb85..b364d4a870e3 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -26,24 +26,30 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use super::expressions::Column; +use super::expressions::{CastExpr, Column, Literal}; use super::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use super::{ DisplayAs, ExecutionPlanProperties, PlanProperties, RecordBatchStream, SendableRecordBatchStream, Statistics, }; +use crate::execution_plan::CardinalityEffect; +use crate::joins::utils::{ColumnIndex, JoinFilter}; use crate::{ColumnStatistics, DisplayFormatType, ExecutionPlan, PhysicalExpr}; use arrow::datatypes::{Field, Schema, SchemaRef}; use arrow::record_batch::{RecordBatch, RecordBatchOptions}; use datafusion_common::stats::Precision; -use datafusion_common::Result; +use datafusion_common::tree_node::{ + Transformed, TransformedResult, TreeNode, TreeNodeRecursion, +}; +use datafusion_common::{internal_err, JoinSide, Result}; use datafusion_execution::TaskContext; use datafusion_physical_expr::equivalence::ProjectionMapping; -use datafusion_physical_expr::expressions::{CastExpr, Literal}; +use datafusion_physical_expr::utils::collect_columns; +use datafusion_physical_expr::PhysicalExprRef; -use crate::execution_plan::CardinalityEffect; use futures::stream::{Stream, StreamExt}; +use itertools::Itertools; use log::trace; /// Execution plan for a projection @@ -239,6 +245,19 @@ impl ExecutionPlan for ProjectionExec { fn cardinality_effect(&self) -> CardinalityEffect { CardinalityEffect::Equal } + + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + let maybe_unified = try_unifying_projections(projection, self)?; + if let Some(new_plan) = maybe_unified { + // To unify 3 or more sequential projections: + remove_unnecessary_projections(new_plan).data().map(Some) + } else { + Ok(Some(Arc::new(projection.clone()))) + } + } } /// If 'e' is a direct column reference, returns the field level @@ -353,15 +372,714 @@ impl RecordBatchStream for ProjectionStream { } } +pub trait EmbeddedProjection: ExecutionPlan + Sized { + fn with_projection(&self, projection: Option>) -> Result; +} + +/// Some projection can't be pushed down left input or right input of hash join because filter or on need may need some columns that won't be used in later. +/// By embed those projection to hash join, we can reduce the cost of build_batch_from_indices in hash join (build_batch_from_indices need to can compute::take() for each column) and avoid unnecessary output creation. +pub fn try_embed_projection( + projection: &ProjectionExec, + execution_plan: &Exec, +) -> Result>> { + // Collect all column indices from the given projection expressions. + let projection_index = collect_column_indices(projection.expr()); + + if projection_index.is_empty() { + return Ok(None); + }; + + // If the projection indices is the same as the input columns, we don't need to embed the projection to hash join. + // Check the projection_index is 0..n-1 and the length of projection_index is the same as the length of execution_plan schema fields. + if projection_index.len() == projection_index.last().unwrap() + 1 + && projection_index.len() == execution_plan.schema().fields().len() + { + return Ok(None); + } + + let new_execution_plan = + Arc::new(execution_plan.with_projection(Some(projection_index.to_vec()))?); + + // Build projection expressions for update_expr. Zip the projection_index with the new_execution_plan output schema fields. + let embed_project_exprs = projection_index + .iter() + .zip(new_execution_plan.schema().fields()) + .map(|(index, field)| { + ( + Arc::new(Column::new(field.name(), *index)) as Arc, + field.name().to_owned(), + ) + }) + .collect::>(); + + let mut new_projection_exprs = Vec::with_capacity(projection.expr().len()); + + for (expr, alias) in projection.expr() { + // update column index for projection expression since the input schema has been changed. + let Some(expr) = update_expr(expr, embed_project_exprs.as_slice(), false)? else { + return Ok(None); + }; + new_projection_exprs.push((expr, alias.clone())); + } + // Old projection may contain some alias or expression such as `a + 1` and `CAST('true' AS BOOLEAN)`, but our projection_exprs in hash join just contain column, so we need to create the new projection to keep the original projection. + let new_projection = Arc::new(ProjectionExec::try_new( + new_projection_exprs, + Arc::clone(&new_execution_plan) as _, + )?); + if is_projection_removable(&new_projection) { + Ok(Some(new_execution_plan)) + } else { + Ok(Some(new_projection)) + } +} + +/// The on clause of the join, as vector of (left, right) columns. +pub type JoinOn = Vec<(PhysicalExprRef, PhysicalExprRef)>; +/// Reference for JoinOn. +pub type JoinOnRef<'a> = &'a [(PhysicalExprRef, PhysicalExprRef)]; + +pub struct JoinData { + pub projected_left_child: ProjectionExec, + pub projected_right_child: ProjectionExec, + pub join_filter: Option, + pub join_on: JoinOn, +} + +pub fn try_pushdown_through_join( + projection: &ProjectionExec, + join_left: &Arc, + join_right: &Arc, + join_on: JoinOnRef, + schema: SchemaRef, + filter: Option<&JoinFilter>, +) -> Result> { + // Convert projected expressions to columns. We can not proceed if this is not possible. + let Some(projection_as_columns) = physical_to_column_exprs(projection.expr()) else { + return Ok(None); + }; + + let (far_right_left_col_ind, far_left_right_col_ind) = + join_table_borders(join_left.schema().fields().len(), &projection_as_columns); + + if !join_allows_pushdown( + &projection_as_columns, + &schema, + far_right_left_col_ind, + far_left_right_col_ind, + ) { + return Ok(None); + } + + let new_filter = if let Some(filter) = filter { + match update_join_filter( + &projection_as_columns[0..=far_right_left_col_ind as _], + &projection_as_columns[far_left_right_col_ind as _..], + filter, + join_left.schema().fields().len(), + ) { + Some(updated_filter) => Some(updated_filter), + None => return Ok(None), + } + } else { + None + }; + + let Some(new_on) = update_join_on( + &projection_as_columns[0..=far_right_left_col_ind as _], + &projection_as_columns[far_left_right_col_ind as _..], + join_on, + join_left.schema().fields().len(), + ) else { + return Ok(None); + }; + + let (new_left, new_right) = new_join_children( + &projection_as_columns, + far_right_left_col_ind, + far_left_right_col_ind, + join_left, + join_right, + )?; + + Ok(Some(JoinData { + projected_left_child: new_left, + projected_right_child: new_right, + join_filter: new_filter, + join_on: new_on, + })) +} + +/// This function checks if `plan` is a [`ProjectionExec`], and inspects its +/// input(s) to test whether it can push `plan` under its input(s). This function +/// will operate on the entire tree and may ultimately remove `plan` entirely +/// by leveraging source providers with built-in projection capabilities. +pub fn remove_unnecessary_projections( + plan: Arc, +) -> Result>> { + let maybe_modified = + if let Some(projection) = plan.as_any().downcast_ref::() { + // If the projection does not cause any change on the input, we can + // safely remove it: + if is_projection_removable(projection) { + return Ok(Transformed::yes(Arc::clone(projection.input()))); + } + // If it does, check if we can push it under its child(ren): + projection + .input() + .try_swapping_with_projection(projection)? + } else { + return Ok(Transformed::no(plan)); + }; + Ok(maybe_modified.map_or(Transformed::no(plan), Transformed::yes)) +} + +/// Compare the inputs and outputs of the projection. All expressions must be +/// columns without alias, and projection does not change the order of fields. +/// For example, if the input schema is `a, b`, `SELECT a, b` is removable, +/// but `SELECT b, a` and `SELECT a+1, b` and `SELECT a AS c, b` are not. +fn is_projection_removable(projection: &ProjectionExec) -> bool { + let exprs = projection.expr(); + exprs.iter().enumerate().all(|(idx, (expr, alias))| { + let Some(col) = expr.as_any().downcast_ref::() else { + return false; + }; + col.name() == alias && col.index() == idx + }) && exprs.len() == projection.input().schema().fields().len() +} + +/// Given the expression set of a projection, checks if the projection causes +/// any renaming or constructs a non-`Column` physical expression. +pub fn all_alias_free_columns(exprs: &[(Arc, String)]) -> bool { + exprs.iter().all(|(expr, alias)| { + expr.as_any() + .downcast_ref::() + .map(|column| column.name() == alias) + .unwrap_or(false) + }) +} + +/// Updates a source provider's projected columns according to the given +/// projection operator's expressions. To use this function safely, one must +/// ensure that all expressions are `Column` expressions without aliases. +pub fn new_projections_for_columns( + projection: &ProjectionExec, + source: &[usize], +) -> Vec { + projection + .expr() + .iter() + .filter_map(|(expr, _)| { + expr.as_any() + .downcast_ref::() + .map(|expr| source[expr.index()]) + }) + .collect() +} + +/// Creates a new [`ProjectionExec`] instance with the given child plan and +/// projected expressions. +pub fn make_with_child( + projection: &ProjectionExec, + child: &Arc, +) -> Result> { + ProjectionExec::try_new(projection.expr().to_vec(), Arc::clone(child)) + .map(|e| Arc::new(e) as _) +} + +/// Returns `true` if all the expressions in the argument are `Column`s. +pub fn all_columns(exprs: &[(Arc, String)]) -> bool { + exprs.iter().all(|(expr, _)| expr.as_any().is::()) +} + +/// The function operates in two modes: +/// +/// 1) When `sync_with_child` is `true`: +/// +/// The function updates the indices of `expr` if the expression resides +/// in the input plan. For instance, given the expressions `a@1 + b@2` +/// and `c@0` with the input schema `c@2, a@0, b@1`, the expressions are +/// updated to `a@0 + b@1` and `c@2`. +/// +/// 2) When `sync_with_child` is `false`: +/// +/// The function determines how the expression would be updated if a projection +/// was placed before the plan associated with the expression. If the expression +/// cannot be rewritten after the projection, it returns `None`. For example, +/// given the expressions `c@0`, `a@1` and `b@2`, and the [`ProjectionExec`] with +/// an output schema of `a, c_new`, then `c@0` becomes `c_new@1`, `a@1` becomes +/// `a@0`, but `b@2` results in `None` since the projection does not include `b`. +pub fn update_expr( + expr: &Arc, + projected_exprs: &[(Arc, String)], + sync_with_child: bool, +) -> Result>> { + #[derive(Debug, PartialEq)] + enum RewriteState { + /// The expression is unchanged. + Unchanged, + /// Some part of the expression has been rewritten + RewrittenValid, + /// Some part of the expression has been rewritten, but some column + /// references could not be. + RewrittenInvalid, + } + + let mut state = RewriteState::Unchanged; + + let new_expr = Arc::clone(expr) + .transform_up(|expr: Arc| { + if state == RewriteState::RewrittenInvalid { + return Ok(Transformed::no(expr)); + } + + let Some(column) = expr.as_any().downcast_ref::() else { + return Ok(Transformed::no(expr)); + }; + if sync_with_child { + state = RewriteState::RewrittenValid; + // Update the index of `column`: + Ok(Transformed::yes(Arc::clone( + &projected_exprs[column.index()].0, + ))) + } else { + // default to invalid, in case we can't find the relevant column + state = RewriteState::RewrittenInvalid; + // Determine how to update `column` to accommodate `projected_exprs` + projected_exprs + .iter() + .enumerate() + .find_map(|(index, (projected_expr, alias))| { + projected_expr.as_any().downcast_ref::().and_then( + |projected_column| { + (column.name().eq(projected_column.name()) + && column.index() == projected_column.index()) + .then(|| { + state = RewriteState::RewrittenValid; + Arc::new(Column::new(alias, index)) as _ + }) + }, + ) + }) + .map_or_else( + || Ok(Transformed::no(expr)), + |c| Ok(Transformed::yes(c)), + ) + } + }) + .data(); + + new_expr.map(|e| (state == RewriteState::RewrittenValid).then_some(e)) +} + +/// Downcasts all the expressions in `exprs` to `Column`s. If any of the given +/// expressions is not a `Column`, returns `None`. +pub fn physical_to_column_exprs( + exprs: &[(Arc, String)], +) -> Option> { + exprs + .iter() + .map(|(expr, alias)| { + expr.as_any() + .downcast_ref::() + .map(|col| (col.clone(), alias.clone())) + }) + .collect() +} + +/// If pushing down the projection over this join's children seems possible, +/// this function constructs the new [`ProjectionExec`]s that will come on top +/// of the original children of the join. +pub fn new_join_children( + projection_as_columns: &[(Column, String)], + far_right_left_col_ind: i32, + far_left_right_col_ind: i32, + left_child: &Arc, + right_child: &Arc, +) -> Result<(ProjectionExec, ProjectionExec)> { + let new_left = ProjectionExec::try_new( + projection_as_columns[0..=far_right_left_col_ind as _] + .iter() + .map(|(col, alias)| { + ( + Arc::new(Column::new(col.name(), col.index())) as _, + alias.clone(), + ) + }) + .collect_vec(), + Arc::clone(left_child), + )?; + let left_size = left_child.schema().fields().len() as i32; + let new_right = ProjectionExec::try_new( + projection_as_columns[far_left_right_col_ind as _..] + .iter() + .map(|(col, alias)| { + ( + Arc::new(Column::new( + col.name(), + // Align projected expressions coming from the right + // table with the new right child projection: + (col.index() as i32 - left_size) as _, + )) as _, + alias.clone(), + ) + }) + .collect_vec(), + Arc::clone(right_child), + )?; + + Ok((new_left, new_right)) +} + +/// Checks three conditions for pushing a projection down through a join: +/// - Projection must narrow the join output schema. +/// - Columns coming from left/right tables must be collected at the left/right +/// sides of the output table. +/// - Left or right table is not lost after the projection. +pub fn join_allows_pushdown( + projection_as_columns: &[(Column, String)], + join_schema: &SchemaRef, + far_right_left_col_ind: i32, + far_left_right_col_ind: i32, +) -> bool { + // Projection must narrow the join output: + projection_as_columns.len() < join_schema.fields().len() + // Are the columns from different tables mixed? + && (far_right_left_col_ind + 1 == far_left_right_col_ind) + // Left or right table is not lost after the projection. + && far_right_left_col_ind >= 0 + && far_left_right_col_ind < projection_as_columns.len() as i32 +} + +/// Returns the last index before encountering a column coming from the right table when traveling +/// through the projection from left to right, and the last index before encountering a column +/// coming from the left table when traveling through the projection from right to left. +/// If there is no column in the projection coming from the left side, it returns (-1, ...), +/// if there is no column in the projection coming from the right side, it returns (..., projection length). +pub fn join_table_borders( + left_table_column_count: usize, + projection_as_columns: &[(Column, String)], +) -> (i32, i32) { + let far_right_left_col_ind = projection_as_columns + .iter() + .enumerate() + .take_while(|(_, (projection_column, _))| { + projection_column.index() < left_table_column_count + }) + .last() + .map(|(index, _)| index as i32) + .unwrap_or(-1); + + let far_left_right_col_ind = projection_as_columns + .iter() + .enumerate() + .rev() + .take_while(|(_, (projection_column, _))| { + projection_column.index() >= left_table_column_count + }) + .last() + .map(|(index, _)| index as i32) + .unwrap_or(projection_as_columns.len() as i32); + + (far_right_left_col_ind, far_left_right_col_ind) +} + +/// Tries to update the equi-join `Column`'s of a join as if the input of +/// the join was replaced by a projection. +pub fn update_join_on( + proj_left_exprs: &[(Column, String)], + proj_right_exprs: &[(Column, String)], + hash_join_on: &[(PhysicalExprRef, PhysicalExprRef)], + left_field_size: usize, +) -> Option> { + // TODO: Clippy wants the "map" call removed, but doing so generates + // a compilation error. Remove the clippy directive once this + // issue is fixed. + #[allow(clippy::map_identity)] + let (left_idx, right_idx): (Vec<_>, Vec<_>) = hash_join_on + .iter() + .map(|(left, right)| (left, right)) + .unzip(); + + let new_left_columns = new_columns_for_join_on(&left_idx, proj_left_exprs, 0); + let new_right_columns = + new_columns_for_join_on(&right_idx, proj_right_exprs, left_field_size); + + match (new_left_columns, new_right_columns) { + (Some(left), Some(right)) => Some(left.into_iter().zip(right).collect()), + _ => None, + } +} + +/// Tries to update the column indices of a [`JoinFilter`] as if the input of +/// the join was replaced by a projection. +pub fn update_join_filter( + projection_left_exprs: &[(Column, String)], + projection_right_exprs: &[(Column, String)], + join_filter: &JoinFilter, + left_field_size: usize, +) -> Option { + let mut new_left_indices = new_indices_for_join_filter( + join_filter, + JoinSide::Left, + projection_left_exprs, + 0, + ) + .into_iter(); + let mut new_right_indices = new_indices_for_join_filter( + join_filter, + JoinSide::Right, + projection_right_exprs, + left_field_size, + ) + .into_iter(); + + // Check if all columns match: + (new_right_indices.len() + new_left_indices.len() + == join_filter.column_indices().len()) + .then(|| { + JoinFilter::new( + Arc::clone(join_filter.expression()), + join_filter + .column_indices() + .iter() + .map(|col_idx| ColumnIndex { + index: if col_idx.side == JoinSide::Left { + new_left_indices.next().unwrap() + } else { + new_right_indices.next().unwrap() + }, + side: col_idx.side, + }) + .collect(), + Arc::clone(join_filter.schema()), + ) + }) +} + +/// Unifies `projection` with its input (which is also a [`ProjectionExec`]). +fn try_unifying_projections( + projection: &ProjectionExec, + child: &ProjectionExec, +) -> Result>> { + let mut projected_exprs = vec![]; + let mut column_ref_map: HashMap = HashMap::new(); + + // Collect the column references usage in the outer projection. + projection.expr().iter().for_each(|(expr, _)| { + expr.apply(|expr| { + Ok({ + if let Some(column) = expr.as_any().downcast_ref::() { + *column_ref_map.entry(column.clone()).or_default() += 1; + } + TreeNodeRecursion::Continue + }) + }) + .unwrap(); + }); + // Merging these projections is not beneficial, e.g + // If an expression is not trivial and it is referred more than 1, unifies projections will be + // beneficial as caching mechanism for non-trivial computations. + // See discussion in: https://github.com/apache/datafusion/issues/8296 + if column_ref_map.iter().any(|(column, count)| { + *count > 1 && !is_expr_trivial(&Arc::clone(&child.expr()[column.index()].0)) + }) { + return Ok(None); + } + for (expr, alias) in projection.expr() { + // If there is no match in the input projection, we cannot unify these + // projections. This case will arise if the projection expression contains + // a `PhysicalExpr` variant `update_expr` doesn't support. + let Some(expr) = update_expr(expr, child.expr(), true)? else { + return Ok(None); + }; + projected_exprs.push((expr, alias.clone())); + } + ProjectionExec::try_new(projected_exprs, Arc::clone(child.input())) + .map(|e| Some(Arc::new(e) as _)) +} + +/// Collect all column indices from the given projection expressions. +fn collect_column_indices(exprs: &[(Arc, String)]) -> Vec { + // Collect indices and remove duplicates. + let mut indices = exprs + .iter() + .flat_map(|(expr, _)| collect_columns(expr)) + .map(|x| x.index()) + .collect::>() + .into_iter() + .collect::>(); + indices.sort(); + indices +} + +/// This function determines and returns a vector of indices representing the +/// positions of columns in `projection_exprs` that are involved in `join_filter`, +/// and correspond to a particular side (`join_side`) of the join operation. +/// +/// Notes: Column indices in the projection expressions are based on the join schema, +/// whereas the join filter is based on the join child schema. `column_index_offset` +/// represents the offset between them. +fn new_indices_for_join_filter( + join_filter: &JoinFilter, + join_side: JoinSide, + projection_exprs: &[(Column, String)], + column_index_offset: usize, +) -> Vec { + join_filter + .column_indices() + .iter() + .filter(|col_idx| col_idx.side == join_side) + .filter_map(|col_idx| { + projection_exprs + .iter() + .position(|(col, _)| col_idx.index + column_index_offset == col.index()) + }) + .collect() +} + +/// This function generates a new set of columns to be used in a hash join +/// operation based on a set of equi-join conditions (`hash_join_on`) and a +/// list of projection expressions (`projection_exprs`). +/// +/// Notes: Column indices in the projection expressions are based on the join schema, +/// whereas the join on expressions are based on the join child schema. `column_index_offset` +/// represents the offset between them. +fn new_columns_for_join_on( + hash_join_on: &[&PhysicalExprRef], + projection_exprs: &[(Column, String)], + column_index_offset: usize, +) -> Option> { + let new_columns = hash_join_on + .iter() + .filter_map(|on| { + // Rewrite all columns in `on` + Arc::clone(*on) + .transform(|expr| { + if let Some(column) = expr.as_any().downcast_ref::() { + // Find the column in the projection expressions + let new_column = projection_exprs + .iter() + .enumerate() + .find(|(_, (proj_column, _))| { + column.name() == proj_column.name() + && column.index() + column_index_offset + == proj_column.index() + }) + .map(|(index, (_, alias))| Column::new(alias, index)); + if let Some(new_column) = new_column { + Ok(Transformed::yes(Arc::new(new_column))) + } else { + // If the column is not found in the projection expressions, + // it means that the column is not projected. In this case, + // we cannot push the projection down. + internal_err!( + "Column {:?} not found in projection expressions", + column + ) + } + } else { + Ok(Transformed::no(expr)) + } + }) + .data() + .ok() + }) + .collect::>(); + (new_columns.len() == hash_join_on.len()).then_some(new_columns) +} + +/// Checks if the given expression is trivial. +/// An expression is considered trivial if it is either a `Column` or a `Literal`. +fn is_expr_trivial(expr: &Arc) -> bool { + expr.as_any().downcast_ref::().is_some() + || expr.as_any().downcast_ref::().is_some() +} + #[cfg(test)] mod tests { use super::*; + use std::sync::Arc; + use crate::common::collect; use crate::test; use arrow_schema::DataType; use datafusion_common::ScalarValue; + use datafusion_expr::Operator; + use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal}; + + #[test] + fn test_collect_column_indices() -> Result<()> { + let expr = Arc::new(BinaryExpr::new( + Arc::new(Column::new("b", 7)), + Operator::Minus, + Arc::new(BinaryExpr::new( + Arc::new(Literal::new(ScalarValue::Int32(Some(1)))), + Operator::Plus, + Arc::new(Column::new("a", 1)), + )), + )); + let column_indices = collect_column_indices(&[(expr, "b-(1+a)".to_string())]); + assert_eq!(column_indices, vec![1, 7]); + Ok(()) + } + + #[test] + fn test_join_table_borders() -> Result<()> { + let projections = vec![ + (Column::new("b", 1), "b".to_owned()), + (Column::new("c", 2), "c".to_owned()), + (Column::new("e", 4), "e".to_owned()), + (Column::new("d", 3), "d".to_owned()), + (Column::new("c", 2), "c".to_owned()), + (Column::new("f", 5), "f".to_owned()), + (Column::new("h", 7), "h".to_owned()), + (Column::new("g", 6), "g".to_owned()), + ]; + let left_table_column_count = 5; + assert_eq!( + join_table_borders(left_table_column_count, &projections), + (4, 5) + ); + + let left_table_column_count = 8; + assert_eq!( + join_table_borders(left_table_column_count, &projections), + (7, 8) + ); + + let left_table_column_count = 1; + assert_eq!( + join_table_borders(left_table_column_count, &projections), + (-1, 0) + ); + + let projections = vec![ + (Column::new("a", 0), "a".to_owned()), + (Column::new("b", 1), "b".to_owned()), + (Column::new("d", 3), "d".to_owned()), + (Column::new("g", 6), "g".to_owned()), + (Column::new("e", 4), "e".to_owned()), + (Column::new("f", 5), "f".to_owned()), + (Column::new("e", 4), "e".to_owned()), + (Column::new("h", 7), "h".to_owned()), + ]; + let left_table_column_count = 5; + assert_eq!( + join_table_borders(left_table_column_count, &projections), + (2, 7) + ); + + let left_table_column_count = 7; + assert_eq!( + join_table_borders(left_table_column_count, &projections), + (6, 7) + ); + + Ok(()) + } + #[tokio::test] async fn project_no_column() -> Result<()> { let task_ctx = Arc::new(TaskContext::default()); diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs index 270aabeb553c..8d180c212eba 100644 --- a/datafusion/physical-plan/src/repartition/mod.rs +++ b/datafusion/physical-plan/src/repartition/mod.rs @@ -32,6 +32,7 @@ use super::{ use crate::execution_plan::CardinalityEffect; use crate::hash_utils::create_hashes; use crate::metrics::BaselineMetrics; +use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec}; use crate::repartition::distributor_channels::{ channels, partition_aware_channels, DistributionReceiver, DistributionSender, }; @@ -44,6 +45,7 @@ use arrow::datatypes::{SchemaRef, UInt32Type}; use arrow::record_batch::RecordBatch; use arrow_array::{PrimitiveArray, RecordBatchOptions}; use datafusion_common::utils::transpose; +use datafusion_common::HashMap; use datafusion_common::{not_impl_err, DataFusionError, Result}; use datafusion_common_runtime::SpawnedTask; use datafusion_execution::memory_pool::MemoryConsumer; @@ -51,7 +53,6 @@ use datafusion_execution::TaskContext; use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr}; use datafusion_physical_expr_common::sort_expr::LexOrdering; -use datafusion_common::HashMap; use futures::stream::Stream; use futures::{FutureExt, StreamExt, TryStreamExt}; use log::trace; @@ -672,6 +673,46 @@ impl ExecutionPlan for RepartitionExec { fn cardinality_effect(&self) -> CardinalityEffect { CardinalityEffect::Equal } + + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down. + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + + // If pushdown is not beneficial or applicable, break it. + if projection.benefits_from_input_partitioning()[0] + || !all_columns(projection.expr()) + { + return Ok(None); + } + + let new_projection = make_with_child(projection, self.input())?; + + let new_partitioning = match self.partitioning() { + Partitioning::Hash(partitions, size) => { + let mut new_partitions = vec![]; + for partition in partitions { + let Some(new_partition) = + update_expr(partition, projection.expr(), false)? + else { + return Ok(None); + }; + new_partitions.push(new_partition); + } + Partitioning::Hash(new_partitions, *size) + } + others => others.clone(), + }; + + Ok(Some(Arc::new(RepartitionExec::try_new( + new_projection, + new_partitioning, + )?))) + } } impl RepartitionExec { diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs index fd7e426a82c5..caa9a754a0d2 100644 --- a/datafusion/physical-plan/src/sorts/sort.rs +++ b/datafusion/physical-plan/src/sorts/sort.rs @@ -31,6 +31,7 @@ use crate::limit::LimitStream; use crate::metrics::{ BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, MetricsSet, }; +use crate::projection::{make_with_child, update_expr, ProjectionExec}; use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::spill::{ get_record_batch_memory_size, read_spill_as_stream, spill_record_batches, @@ -1024,6 +1025,37 @@ impl ExecutionPlan for SortExec { CardinalityEffect::LowerEqual } } + + /// Tries to swap the projection with its input [`SortExec`]. If it can be done, + /// it returns the new swapped version having the [`SortExec`] as the top plan. + /// Otherwise, it returns None. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down. + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + + let mut updated_exprs = LexOrdering::default(); + for sort in self.expr() { + let Some(new_expr) = update_expr(&sort.expr, projection.expr(), false)? + else { + return Ok(None); + }; + updated_exprs.push(PhysicalSortExpr { + expr: new_expr, + options: sort.options, + }); + } + + Ok(Some(Arc::new( + SortExec::new(updated_exprs, make_with_child(projection, self.input())?) + .with_fetch(self.fetch()) + .with_preserve_partitioning(self.preserve_partitioning()), + ))) + } } #[cfg(test)] diff --git a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs index adcb28e538fd..4f4237204fb1 100644 --- a/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs +++ b/datafusion/physical-plan/src/sorts/sort_preserving_merge.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use crate::common::spawn_buffered; use crate::limit::LimitStream; use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::projection::{make_with_child, update_expr, ProjectionExec}; use crate::sorts::streaming_merge::StreamingMergeBuilder; use crate::{ DisplayAs, DisplayFormatType, Distribution, ExecutionPlan, ExecutionPlanProperties, @@ -32,8 +33,9 @@ use crate::{ use datafusion_common::{internal_err, Result}; use datafusion_execution::memory_pool::MemoryConsumer; use datafusion_execution::TaskContext; - +use datafusion_physical_expr::PhysicalSortExpr; use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement}; + use log::{debug, trace}; /// Sort preserving merge execution plan @@ -334,6 +336,39 @@ impl ExecutionPlan for SortPreservingMergeExec { fn supports_limit_pushdown(&self) -> bool { true } + + /// Tries to swap the projection with its input [`SortPreservingMergeExec`]. + /// If this is possible, it returns the new [`SortPreservingMergeExec`] whose + /// child is a projection. Otherwise, it returns None. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection does not narrow the schema, we should not try to push it down. + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + + let mut updated_exprs = LexOrdering::default(); + for sort in self.expr() { + let Some(updated_expr) = update_expr(&sort.expr, projection.expr(), false)? + else { + return Ok(None); + }; + updated_exprs.push(PhysicalSortExpr { + expr: updated_expr, + options: sort.options, + }); + } + + Ok(Some(Arc::new( + SortPreservingMergeExec::new( + updated_exprs, + make_with_child(projection, self.input())?, + ) + .with_fetch(self.fetch()), + ))) + } } #[cfg(test)] diff --git a/datafusion/physical-plan/src/streaming.rs b/datafusion/physical-plan/src/streaming.rs index da8b0e877dcc..751af9921448 100644 --- a/datafusion/physical-plan/src/streaming.rs +++ b/datafusion/physical-plan/src/streaming.rs @@ -24,6 +24,11 @@ use std::sync::Arc; use super::{DisplayAs, DisplayFormatType, PlanProperties}; use crate::display::{display_orderings, ProjectSchemaDisplay}; use crate::execution_plan::{Boundedness, EmissionType}; +use crate::limit::LimitStream; +use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; +use crate::projection::{ + all_alias_free_columns, new_projections_for_columns, update_expr, ProjectionExec, +}; use crate::stream::RecordBatchStreamAdapter; use crate::{ExecutionPlan, Partitioning, SendableRecordBatchStream}; @@ -31,10 +36,8 @@ use arrow::datatypes::SchemaRef; use arrow_schema::Schema; use datafusion_common::{internal_err, plan_err, Result}; use datafusion_execution::TaskContext; -use datafusion_physical_expr::{EquivalenceProperties, LexOrdering}; +use datafusion_physical_expr::{EquivalenceProperties, LexOrdering, PhysicalSortExpr}; -use crate::limit::LimitStream; -use crate::metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet}; use async_trait::async_trait; use futures::stream::StreamExt; use log::debug; @@ -272,6 +275,53 @@ impl ExecutionPlan for StreamingTableExec { }) } + /// Tries to embed `projection` to its input (`streaming table`). + /// If possible, returns [`StreamingTableExec`] as the top plan. Otherwise, + /// returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + if !all_alias_free_columns(projection.expr()) { + return Ok(None); + } + + let streaming_table_projections = + self.projection().as_ref().map(|i| i.as_ref().to_vec()); + let new_projections = new_projections_for_columns( + projection, + &streaming_table_projections + .unwrap_or((0..self.schema().fields().len()).collect()), + ); + + let mut lex_orderings = vec![]; + for lex_ordering in self.projected_output_ordering().into_iter() { + let mut orderings = LexOrdering::default(); + for order in lex_ordering { + let Some(new_ordering) = + update_expr(&order.expr, projection.expr(), false)? + else { + return Ok(None); + }; + orderings.push(PhysicalSortExpr { + expr: new_ordering, + options: order.options, + }); + } + lex_orderings.push(orderings); + } + + StreamingTableExec::try_new( + Arc::clone(self.partition_schema()), + self.partitions().clone(), + Some(new_projections.as_ref()), + lex_orderings, + self.is_infinite(), + self.limit(), + ) + .map(|e| Some(Arc::new(e) as _)) + } + fn metrics(&self) -> Option { Some(self.metrics.clone_inner()) } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index bcd9572f45c7..bacd02398ec0 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -36,6 +36,7 @@ use crate::execution_plan::{ boundedness_from_children, emission_type_from_children, InvariantLevel, }; use crate::metrics::BaselineMetrics; +use crate::projection::{make_with_child, ProjectionExec}; use crate::stream::ObservedStream; use arrow::datatypes::{Field, Schema, SchemaRef}; @@ -271,6 +272,27 @@ impl ExecutionPlan for UnionExec { fn supports_limit_pushdown(&self) -> bool { true } + + /// Tries to push `projection` down through `union`. If possible, performs the + /// pushdown and returns a new [`UnionExec`] as the top plan which has projections + /// as its children. Otherwise, returns `None`. + fn try_swapping_with_projection( + &self, + projection: &ProjectionExec, + ) -> Result>> { + // If the projection doesn't narrow the schema, we shouldn't try to push it down. + if projection.expr().len() >= projection.input().schema().fields().len() { + return Ok(None); + } + + let new_children = self + .children() + .into_iter() + .map(|child| make_with_child(projection, child)) + .collect::>>()?; + + Ok(Some(Arc::new(UnionExec::new(new_children)))) + } } /// Combines multiple input streams by interleaving them. diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index 73ccbf935b2f..d88186fbf366 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -936,6 +936,7 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions { fn try_from( value: &protobuf::ParquetOptions, ) -> datafusion_common::Result { + #[allow(deprecated)] // max_statistics_size Ok(ParquetOptions { enable_page_index: value.enable_page_index, pruning: value.pruning, @@ -1013,6 +1014,7 @@ impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions { fn try_from( value: &protobuf::ParquetColumnOptions, ) -> datafusion_common::Result { + #[allow(deprecated)] // max_statistics_size Ok(ParquetColumnOptions { compression: value.compression_opt.clone().map(|opt| match opt { protobuf::parquet_column_options::CompressionOpt::Compression(v) => Some(v), diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index 83296ca76f0f..88bbbfd60426 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -820,6 +820,7 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions { dictionary_enabled_opt: value.dictionary_enabled.map(protobuf::parquet_options::DictionaryEnabledOpt::DictionaryEnabled), dictionary_page_size_limit: value.dictionary_page_size_limit as u64, statistics_enabled_opt: value.statistics_enabled.clone().map(protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled), + #[allow(deprecated)] max_statistics_size_opt: value.max_statistics_size.map(|v| protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v as u64)), max_row_group_size: value.max_row_group_size as u64, created_by: value.created_by.clone(), @@ -858,6 +859,7 @@ impl TryFrom<&ParquetColumnOptions> for protobuf::ParquetColumnOptions { .statistics_enabled .clone() .map(protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled), + #[allow(deprecated)] max_statistics_size_opt: value.max_statistics_size.map(|v| { protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize( v as u32, diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs index 772e6d23426a..237e6d2a7137 100644 --- a/datafusion/proto/src/logical_plan/file_formats.rs +++ b/datafusion/proto/src/logical_plan/file_formats.rs @@ -362,6 +362,7 @@ impl TableParquetOptionsProto { }; let column_specific_options = global_options.column_specific_options; + #[allow(deprecated)] // max_statistics_size TableParquetOptionsProto { global: Some(ParquetOptionsProto { enable_page_index: global_options.global.enable_page_index, @@ -455,6 +456,7 @@ impl TableParquetOptionsProto { impl From<&ParquetOptionsProto> for ParquetOptions { fn from(proto: &ParquetOptionsProto) -> Self { + #[allow(deprecated)] // max_statistics_size ParquetOptions { enable_page_index: proto.enable_page_index, pruning: proto.pruning, @@ -509,6 +511,7 @@ impl From<&ParquetOptionsProto> for ParquetOptions { impl From for ParquetColumnOptions { fn from(proto: ParquetColumnOptionsProto) -> Self { + #[allow(deprecated)] // max_statistics_size ParquetColumnOptions { bloom_filter_enabled: proto.bloom_filter_enabled_opt.map( |parquet_column_options::BloomFilterEnabledOpt::BloomFilterEnabled(v)| v, diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index c1dc41196b36..38fe14ab90b7 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -53,7 +53,7 @@ object_store = { workspace = true } postgres-protocol = { version = "0.6.7", optional = true } postgres-types = { version = "0.2.8", features = ["derive", "with-chrono-0_4"], optional = true } rust_decimal = { version = "1.36.0", features = ["tokio-pg"] } -sqllogictest = "0.26.0" +sqllogictest = "0.26.4" sqlparser = { workspace = true } tempfile = { workspace = true } testcontainers = { version = "0.23", features = ["default"], optional = true } diff --git a/datafusion/sqllogictest/bin/sqllogictests.rs b/datafusion/sqllogictest/bin/sqllogictests.rs index f6b35bf3771c..2c518217d5ed 100644 --- a/datafusion/sqllogictest/bin/sqllogictests.rs +++ b/datafusion/sqllogictest/bin/sqllogictests.rs @@ -18,16 +18,19 @@ use clap::Parser; use datafusion_common::instant::Instant; use datafusion_common::utils::get_available_parallelism; -use datafusion_common::{exec_datafusion_err, exec_err, DataFusionError, Result}; +use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_common_runtime::SpawnedTask; -use datafusion_sqllogictest::{DataFusion, TestContext}; +use datafusion_sqllogictest::{ + df_value_validator, read_dir_recursive, setup_scratch_dir, value_normalizer, + DataFusion, TestContext, +}; use futures::stream::StreamExt; use indicatif::{ HumanDuration, MultiProgress, ProgressBar, ProgressDrawTarget, ProgressStyle, }; use itertools::Itertools; -use log::Level::{Info, Warn}; -use log::{info, log_enabled, warn}; +use log::Level::Info; +use log::{info, log_enabled}; use sqllogictest::{ parse_file, strict_column_validator, AsyncDB, Condition, Normalizer, Record, Validator, @@ -38,7 +41,6 @@ use crate::postgres_container::{ initialize_postgres_container, terminate_postgres_container, }; use std::ffi::OsStr; -use std::fs; use std::path::{Path, PathBuf}; #[cfg(feature = "postgres")] @@ -56,14 +58,6 @@ pub fn main() -> Result<()> { .block_on(run_tests()) } -// Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not -// If particular test wants to cover trailing whitespace on a value, -// it should project additional non-whitespace column on the right. -#[allow(clippy::ptr_arg)] -fn value_normalizer(s: &String) -> String { - s.trim_end().to_string() -} - fn sqlite_value_validator( normalizer: Normalizer, actual: &[Vec], @@ -93,54 +87,6 @@ fn sqlite_value_validator( normalized_actual == normalized_expected } -fn df_value_validator( - normalizer: Normalizer, - actual: &[Vec], - expected: &[String], -) -> bool { - let normalized_expected = expected.iter().map(normalizer).collect::>(); - let normalized_actual = actual - .iter() - .map(|strs| strs.iter().join(" ")) - .map(|str| str.trim_end().to_string()) - .collect_vec(); - - if log_enabled!(Warn) && normalized_actual != normalized_expected { - warn!("df validation failed. actual vs expected:"); - for i in 0..normalized_actual.len() { - warn!("[{i}] {}", normalized_actual[i]); - warn!( - "[{i}] {}", - if normalized_expected.len() >= i { - &normalized_expected[i] - } else { - "No more results" - } - ); - } - } - - normalized_actual == normalized_expected -} - -/// Sets up an empty directory at test_files/scratch/ -/// creating it if needed and clearing any file contents if it exists -/// This allows tests for inserting to external tables or copy to -/// persist data to disk and have consistent state when running -/// a new test -fn setup_scratch_dir(name: &Path) -> Result<()> { - // go from copy.slt --> copy - let file_stem = name.file_stem().expect("File should have a stem"); - let path = PathBuf::from("test_files").join("scratch").join(file_stem); - - info!("Creating scratch dir in {path:?}"); - if path.exists() { - fs::remove_dir_all(&path)?; - } - fs::create_dir_all(&path)?; - Ok(()) -} - async fn run_tests() -> Result<()> { // Enable logging (e.g. set RUST_LOG=debug to see debug logs) env_logger::init(); @@ -573,33 +519,6 @@ fn read_test_files<'a>( Ok(Box::new(paths.into_iter())) } -fn read_dir_recursive>(path: P) -> Result> { - let mut dst = vec![]; - read_dir_recursive_impl(&mut dst, path.as_ref())?; - Ok(dst) -} - -/// Append all paths recursively to dst -fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { - let entries = fs::read_dir(path) - .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?; - for entry in entries { - let path = entry - .map_err(|e| { - exec_datafusion_err!("Error reading entry in directory {path:?}: {e}") - })? - .path(); - - if path.is_dir() { - read_dir_recursive_impl(dst, &path)?; - } else { - dst.push(path); - } - } - - Ok(()) -} - /// Parsed command line options /// /// This structure attempts to mimic the command line options of the built-in rust test runner diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs index 7421edb87b11..09cf70280e7c 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs @@ -250,7 +250,7 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { } /// Converts columns to a result as expected by sqllogicteset. -pub(crate) fn convert_schema_to_types(columns: &Fields) -> Vec { +pub fn convert_schema_to_types(columns: &Fields) -> Vec { columns .iter() .map(|f| f.data_type()) diff --git a/datafusion/sqllogictest/src/engines/mod.rs b/datafusion/sqllogictest/src/engines/mod.rs index 7b65c0aa77cb..3569dea70176 100644 --- a/datafusion/sqllogictest/src/engines/mod.rs +++ b/datafusion/sqllogictest/src/engines/mod.rs @@ -21,6 +21,8 @@ mod datafusion_engine; mod output; pub use datafusion_engine::convert_batches; +pub use datafusion_engine::convert_schema_to_types; +pub use datafusion_engine::DFSqlLogicTestError; pub use datafusion_engine::DataFusion; pub use output::DFColumnType; pub use output::DFOutput; diff --git a/datafusion/sqllogictest/src/lib.rs b/datafusion/sqllogictest/src/lib.rs index 30a882011dd5..0ea55782d34e 100644 --- a/datafusion/sqllogictest/src/lib.rs +++ b/datafusion/sqllogictest/src/lib.rs @@ -20,12 +20,17 @@ mod engines; pub use engines::convert_batches; +pub use engines::convert_schema_to_types; pub use engines::DFColumnType; pub use engines::DFOutput; +pub use engines::DFSqlLogicTestError; pub use engines::DataFusion; #[cfg(feature = "postgres")] pub use engines::Postgres; mod test_context; +mod util; + pub use test_context::TestContext; +pub use util::*; diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs new file mode 100644 index 000000000000..1bdfdd03360f --- /dev/null +++ b/datafusion/sqllogictest/src/util.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::{exec_datafusion_err, Result}; +use itertools::Itertools; +use log::Level::Warn; +use log::{info, log_enabled, warn}; +use sqllogictest::Normalizer; +use std::fs; +use std::path::{Path, PathBuf}; + +/// Sets up an empty directory at `test_files/scratch/` +/// creating it if needed and clearing any file contents if it exists +/// This allows tests for inserting to external tables or copy to +/// persist data to disk and have consistent state when running +/// a new test +pub fn setup_scratch_dir(name: &Path) -> Result<()> { + // go from copy.slt --> copy + let file_stem = name.file_stem().expect("File should have a stem"); + let path = PathBuf::from("test_files").join("scratch").join(file_stem); + + info!("Creating scratch dir in {path:?}"); + if path.exists() { + fs::remove_dir_all(&path)?; + } + fs::create_dir_all(&path)?; + Ok(()) +} + +/// Trailing whitespace from lines in SLT will typically be removed, but do not fail if it is not +/// If particular test wants to cover trailing whitespace on a value, +/// it should project additional non-whitespace column on the right. +#[allow(clippy::ptr_arg)] +pub fn value_normalizer(s: &String) -> String { + s.trim_end().to_string() +} + +pub fn read_dir_recursive>(path: P) -> Result> { + let mut dst = vec![]; + read_dir_recursive_impl(&mut dst, path.as_ref())?; + Ok(dst) +} + +/// Append all paths recursively to dst +fn read_dir_recursive_impl(dst: &mut Vec, path: &Path) -> Result<()> { + let entries = fs::read_dir(path) + .map_err(|e| exec_datafusion_err!("Error reading directory {path:?}: {e}"))?; + for entry in entries { + let path = entry + .map_err(|e| { + exec_datafusion_err!("Error reading entry in directory {path:?}: {e}") + })? + .path(); + + if path.is_dir() { + read_dir_recursive_impl(dst, &path)?; + } else { + dst.push(path); + } + } + + Ok(()) +} + +/// Validate the actual and expected values. +pub fn df_value_validator( + normalizer: Normalizer, + actual: &[Vec], + expected: &[String], +) -> bool { + let normalized_expected = expected.iter().map(normalizer).collect::>(); + let normalized_actual = actual + .iter() + .map(|strs| strs.iter().join(" ")) + .map(|str| str.trim_end().to_string()) + .collect_vec(); + + if log_enabled!(Warn) && normalized_actual != normalized_expected { + warn!("df validation failed. actual vs expected:"); + for i in 0..normalized_actual.len() { + warn!("[{i}] {}", normalized_actual[i]); + warn!( + "[{i}] {}", + if normalized_expected.len() >= i { + &normalized_expected[i] + } else { + "No more results" + } + ); + } + } + + normalized_actual == normalized_expected +} diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 22a85eb15512..baf4ef7795e7 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -2760,6 +2760,30 @@ select ---- [[1], [1], [1], [1], [1]] [[1.1, 2.2, 3.3], [1.1, 2.2, 3.3], [1.1, 2.2, 3.3]] [[NULL, NULL], [NULL, NULL], [NULL, NULL]] [[[1, 2], [3, 4]], [[1, 2], [3, 4]]] +# array_repeat scalar function with count of different integer types +query ???????? +Select + array_repeat(1, arrow_cast(2,'Int8')), + array_repeat(2, arrow_cast(2,'Int16')), + array_repeat(3, arrow_cast(2,'Int32')), + array_repeat(4, arrow_cast(2,'Int64')), + array_repeat(1, arrow_cast(2,'UInt8')), + array_repeat(2, arrow_cast(2,'UInt16')), + array_repeat(3, arrow_cast(2,'UInt32')), + array_repeat(4, arrow_cast(2,'UInt64')); +---- +[1, 1] [2, 2] [3, 3] [4, 4] [1, 1] [2, 2] [3, 3] [4, 4] + +# array_repeat scalar function with count of negative integer types +query ???? +Select + array_repeat(1, arrow_cast(-2,'Int8')), + array_repeat(2, arrow_cast(-2,'Int16')), + array_repeat(3, arrow_cast(-2,'Int32')), + array_repeat(4, arrow_cast(-2,'Int64')); +---- +[] [] [] [] + # array_repeat with columns #1 statement ok diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt index 02f66f342b8a..0798c8cc1453 100644 --- a/datafusion/sqllogictest/test_files/ddl.slt +++ b/datafusion/sqllogictest/test_files/ddl.slt @@ -840,4 +840,3 @@ DROP TABLE t1; statement ok DROP TABLE t2; - diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt index be1c5aa40583..960e81f4d14c 100644 --- a/datafusion/sqllogictest/test_files/encoding.slt +++ b/datafusion/sqllogictest/test_files/encoding.slt @@ -125,4 +125,4 @@ NULL NULL NULL R Ug 52 query T select encode(digest('hello', 'sha256'), 'hex'); ---- -2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 \ No newline at end of file +2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index a3592ba159e2..b2aa4b3fac8f 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -2046,4 +2046,4 @@ select 1 where null between null and 2; query T select 'A' where null between 2 and null; ---- +---- diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 46618b32d77a..4653df400080 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -301,7 +301,7 @@ datafusion.execution.parquet.dictionary_page_size_limit 1048576 (writing) Sets b datafusion.execution.parquet.enable_page_index true (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded. datafusion.execution.parquet.encoding NULL (writing) Sets default encoding for any column. Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. -datafusion.execution.parquet.max_statistics_size 4096 (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting +datafusion.execution.parquet.max_statistics_size 4096 (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame. datafusion.execution.parquet.maximum_parallel_row_group_writers 1 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame. datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt.part similarity index 100% rename from datafusion/sqllogictest/test_files/join.slt rename to datafusion/sqllogictest/test_files/join.slt.part diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index cf897d628da5..0bdf223a11b7 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -23,7 +23,7 @@ statement ok set datafusion.optimizer.repartition_joins = false; -include ./join.slt +include ./join.slt.part statement ok CREATE EXTERNAL TABLE annotated_data ( diff --git a/datafusion/sqllogictest/test_files/join_only.slt b/datafusion/sqllogictest/test_files/join_only.slt new file mode 100644 index 000000000000..b2b6a1fa9b9d --- /dev/null +++ b/datafusion/sqllogictest/test_files/join_only.slt @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +include ./join.slt.part diff --git a/datafusion/sqllogictest/test_files/operator.slt b/datafusion/sqllogictest/test_files/operator.slt index ac977a0c514a..83bd1d7ee3cd 100644 --- a/datafusion/sqllogictest/test_files/operator.slt +++ b/datafusion/sqllogictest/test_files/operator.slt @@ -33,6 +33,8 @@ SELECT arrow_cast(1.25, 'Decimal128(5, 2)') as decimal ; +############### Addition ############### + # Plus with the same operand type, expect the same output type # except for decimal which is promoted to the highest precision query TTTTTTTTTTT @@ -52,6 +54,43 @@ from numeric_types; ---- Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Decimal128(6, 2) +# Plus with literal integer +query TTTTTTTTTTT +select + arrow_typeof(int8 + 2), + arrow_typeof(int16 + 2), + arrow_typeof(int32 + 2), + arrow_typeof(int64 + 2), + arrow_typeof(uint8 + 2), + arrow_typeof(uint16 + 2), + arrow_typeof(uint32 + 2), + arrow_typeof(uint64 + 2), + arrow_typeof(float32 + 2), + arrow_typeof(float64 + 2), + arrow_typeof(decimal + 2) +from numeric_types; +---- +Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Float32 Float64 Decimal128(23, 2) + +# Plus with literal decimal +query TTTTTTTTTTT +select + arrow_typeof(int8 + 2.0), + arrow_typeof(int16 + 2.0), + arrow_typeof(int32 + 2.0), + arrow_typeof(int64 + 2.0), + arrow_typeof(uint8 + 2.0), + arrow_typeof(uint16 + 2.0), + arrow_typeof(uint32 + 2.0), + arrow_typeof(uint64 + 2.0), + arrow_typeof(float32 + 2.0), + arrow_typeof(float64 + 2.0), + arrow_typeof(decimal + 2.0) +from numeric_types; +---- +Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 + +############### Subtraction ############### # Minus with the same operand type, expect the same output type # except for decimal which is promoted to the highest precision @@ -72,6 +111,44 @@ from numeric_types; ---- Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Decimal128(6, 2) +# Minus with literal integer +query TTTTTTTTTTT +select + arrow_typeof(int8 - 2), + arrow_typeof(int16 - 2), + arrow_typeof(int32 - 2), + arrow_typeof(int64 - 2), + arrow_typeof(uint8 - 2), + arrow_typeof(uint16 - 2), + arrow_typeof(uint32 - 2), + arrow_typeof(uint64 - 2), + arrow_typeof(float32 - 2), + arrow_typeof(float64 - 2), + arrow_typeof(decimal - 2) +from numeric_types; +---- +Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Float32 Float64 Decimal128(23, 2) + +# Minus with literal decimal +query TTTTTTTTTTT +select + arrow_typeof(int8 - 2.0), + arrow_typeof(int16 - 2.0), + arrow_typeof(int32 - 2.0), + arrow_typeof(int64 - 2.0), + arrow_typeof(uint8 - 2.0), + arrow_typeof(uint16 - 2.0), + arrow_typeof(uint32 - 2.0), + arrow_typeof(uint64 - 2.0), + arrow_typeof(float32 - 2.0), + arrow_typeof(float64 - 2.0), + arrow_typeof(decimal - 2.0) +from numeric_types; +---- +Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 + +############### Multiplication ############### + # Multiply with the same operand type, expect the same output type # except for decimal which is promoted to the highest precision query TTTTTTTTTTT @@ -91,6 +168,45 @@ from numeric_types; ---- Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Decimal128(11, 4) +# Multiply with literal integer +query TTTTTTTTTTT +select + arrow_typeof(int8 * 2), + arrow_typeof(int16 * 2), + arrow_typeof(int32 * 2), + arrow_typeof(int64 * 2), + arrow_typeof(uint8 * 2), + arrow_typeof(uint16 * 2), + arrow_typeof(uint32 * 2), + arrow_typeof(uint64 * 2), + arrow_typeof(float32 * 2), + arrow_typeof(float64 * 2), + arrow_typeof(decimal * 2) +from numeric_types; +---- +Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Float32 Float64 Decimal128(26, 2) + +# Multiply with literal decimal +query TTTTTTTTTTT +select + arrow_typeof(int8 * 2.0), + arrow_typeof(int16 * 2.0), + arrow_typeof(int32 * 2.0), + arrow_typeof(int64 * 2.0), + arrow_typeof(uint8 * 2.0), + arrow_typeof(uint16 * 2.0), + arrow_typeof(uint32 * 2.0), + arrow_typeof(uint64 * 2.0), + arrow_typeof(float32 * 2.0), + arrow_typeof(float64 * 2.0), + arrow_typeof(decimal * 2.0) +from numeric_types; +---- +Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 + +############### Division ############### + + # Divide with the same operand type, expect the same output type # except for decimal which is promoted to the highest precision query TTTTTTTTTTT @@ -110,5 +226,139 @@ from numeric_types; ---- Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float32 Float64 Decimal128(11, 6) +# Divide with literal integer +query TTTTTTTTTTT +select + arrow_typeof(int8 / 2), + arrow_typeof(int16 / 2), + arrow_typeof(int32 / 2), + arrow_typeof(int64 / 2), + arrow_typeof(uint8 / 2), + arrow_typeof(uint16 / 2), + arrow_typeof(uint32 / 2), + arrow_typeof(uint64 / 2), + arrow_typeof(float32 / 2), + arrow_typeof(float64 / 2), + arrow_typeof(decimal / 2) +from numeric_types; +---- +Int64 Int64 Int64 Int64 Int64 Int64 Int64 Int64 Float32 Float64 Decimal128(9, 6) + +# Divide with literal decimal +query TTTTTTTTTTT +select + arrow_typeof(int8 / 2.0), + arrow_typeof(int16 / 2.0), + arrow_typeof(int32 / 2.0), + arrow_typeof(int64 / 2.0), + arrow_typeof(uint8 / 2.0), + arrow_typeof(uint16 / 2.0), + arrow_typeof(uint32 / 2.0), + arrow_typeof(uint64 / 2.0), + arrow_typeof(float32 / 2.0), + arrow_typeof(float64 / 2.0), + arrow_typeof(decimal / 2.0) +from numeric_types; +---- +Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 Float64 + +############### +# Test for comparison with constants uses efficient types +# Expect the physical plans to compare with constants of the same type +# should have no casts of the column to a different type + +statement ok +set datafusion.explain.physical_plan_only = true; + +############### Less Than ############### + +## < positive integer (expect no casts) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 < 5 AND uint64 < 5 AND float64 < 5 AND decimal < 5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: int64@3 < 5 AND uint64@7 < 5 AND float64@9 < 5 AND decimal@10 < Some(500),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## < negative integer (expect no casts) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 < -5 AND uint64 < -5 AND float64 < -5 AND decimal < -5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: int64@3 < -5 AND CAST(uint64@7 AS Decimal128(20, 0)) < Some(-5),20,0 AND float64@9 < -5 AND decimal@10 < Some(-500),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## < decimal (expect casts for integers to float) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 < 5.1 AND uint64 < 5.1 AND float64 < 5.1 AND decimal < 5.1; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: CAST(int64@3 AS Float64) < 5.1 AND CAST(uint64@7 AS Float64) < 5.1 AND float64@9 < 5.1 AND decimal@10 < Some(510),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## < negative decimal (expect casts for integers to float) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 < -5.1 AND uint64 < -5.1 AND float64 < -5.1 AND decimal < -5.1; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: CAST(int64@3 AS Float64) < -5.1 AND CAST(uint64@7 AS Float64) < -5.1 AND float64@9 < -5.1 AND decimal@10 < Some(-510),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + + +############### Equality ############### + +## = positive integer (expect no casts) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 = 5 AND uint64 = 5 AND float64 = 5 AND decimal = 5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: int64@3 = 5 AND uint64@7 = 5 AND float64@9 = 5 AND decimal@10 = Some(500),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## = negative integer (expect no casts) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 = -5 AND uint64 = -5 AND float64 = -5 AND decimal = -5; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: int64@3 = -5 AND CAST(uint64@7 AS Decimal128(20, 0)) = Some(-5),20,0 AND float64@9 = -5 AND decimal@10 = Some(-500),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## = decimal (expect casts for integers to float) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 = 5.1 AND uint64 = 5.1 AND float64 = 5.1 AND decimal = 5.1; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: CAST(int64@3 AS Float64) = 5.1 AND CAST(uint64@7 AS Float64) = 5.1 AND float64@9 = 5.1 AND decimal@10 = Some(510),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + +## = negative decimal (expect casts for integers to float) +query TT +EXPLAIN SELECT * FROM numeric_types +WHERE int64 = -5.1 AND uint64 = -5.1 AND float64 = -5.1 AND decimal = -5.1; +---- +physical_plan +01)CoalesceBatchesExec: target_batch_size=8192 +02)--FilterExec: CAST(int64@3 AS Float64) = -5.1 AND CAST(uint64@7 AS Float64) = -5.1 AND float64@9 = -5.1 AND decimal@10 = Some(-510),5,2 +03)----MemoryExec: partitions=1, partition_sizes=[1] + + +statement ok +set datafusion.explain.physical_plan_only = false; + + statement ok drop table numeric_types diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index dce685f5c137..98620bd6c635 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1836,3 +1836,7 @@ DROP TABLE test; # Can't reference an unqualified column by a qualified name query error DataFusion error: Schema error: No field named t1.v1. Column names are case sensitive. You can use double quotes to refer to the "t1.v1" column or set the datafusion.sql_parser.enable_ident_normalization configuration. Valid fields are "t1.v1". SELECT t1.v1 FROM (SELECT 1 AS "t1.v1"); + +# Test issue: https://github.com/apache/datafusion/issues/14124 +query error DataFusion error: Arrow error: Arithmetic overflow: Overflow happened on: 10000 \* 100000000000000000000000000000000000 +SELECT ('0.54321543215432154321543215432154321'::DECIMAL(35,35) + 10000)::VARCHAR diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index 6933514951b3..8895a2986103 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -1385,4 +1385,4 @@ WHERE moving_sum > 60; item1 1970-01-01T00:00:03 75 statement ok -drop table source_table; \ No newline at end of file +drop table source_table; diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part index c60848cb56c1..34d63f67efdf 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q1.slt.part @@ -57,4 +57,4 @@ physical_plan 08)--------------ProjectionExec: expr=[l_extendedprice@1 * (Some(1),20,0 - l_discount@2) as __common_expr_1, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] 09)----------------CoalesceBatchesExec: target_batch_size=8192 10)------------------FilterExec: l_shipdate@6 <= 1998-09-02, projection=[l_quantity@0, l_extendedprice@1, l_discount@2, l_tax@3, l_returnflag@4, l_linestatus@5] -11)--------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], has_header=false \ No newline at end of file +11)--------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate], has_header=false diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part index 6645ede2a73b..3928684a6824 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part @@ -123,4 +123,3 @@ physical_plan 48)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0] 49)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 50)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false - diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part index e9486c39f08c..93021d371e6f 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q16.slt.part @@ -97,4 +97,3 @@ physical_plan 30)--------------------------------FilterExec: s_comment@1 LIKE %Customer%Complaints%, projection=[s_suppkey@0] 31)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 32)------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_comment], has_header=false - diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part index f28a5ef54885..5fc2973d6fa2 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q17.slt.part @@ -74,4 +74,3 @@ physical_plan 21)------------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4 22)--------------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey], aggr=[avg(lineitem.l_quantity)] 23)----------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_quantity], has_header=false - diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part index e3918ba62b02..4d0cb1bc7e1d 100644 --- a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part +++ b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part @@ -124,4 +124,3 @@ physical_plan 41)--------------------------CoalesceBatchesExec: target_batch_size=8192 42)----------------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2] 43)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], has_header=false - diff --git a/datafusion/sqllogictest/test_files/tpch/tpch.slt b/datafusion/sqllogictest/test_files/tpch/tpch.slt index 127152daa580..c6d630997e29 100644 --- a/datafusion/sqllogictest/test_files/tpch/tpch.slt +++ b/datafusion/sqllogictest/test_files/tpch/tpch.slt @@ -28,4 +28,3 @@ set datafusion.optimizer.prefer_hash_join = false; include ./answers/q*.slt.part include ./drop_tables.slt.part - diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 5b3f29abc5c3..e362b5318cbe 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1767,7 +1767,7 @@ logical_plan 01)Projection: count(*) AS global_count 02)--Aggregate: groupBy=[[]], aggr=[[count(Int64(1)) AS count(*)]] 03)----SubqueryAlias: a -04)------Projection: +04)------Projection: 05)--------Aggregate: groupBy=[[aggregate_test_100.c1]], aggr=[[]] 06)----------Projection: aggregate_test_100.c1 07)------------Filter: aggregate_test_100.c13 != Utf8("C2GT5KVyOPZpgKVl110TyZO0NcJ434") diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 1c39064c15d7..dd9ce759b28a 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -66,7 +66,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.dictionary_enabled | true | (writing) Sets if dictionary encoding is enabled. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | -| datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting | +| datafusion.execution.parquet.max_statistics_size | 4096 | (writing) Sets max statistics size for any column. If NULL, uses default parquet writer setting max_statistics_size is deprecated, currently it is not being used | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | | datafusion.execution.parquet.created_by | datafusion version 44.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | diff --git a/docs/source/user-guide/introduction.md b/docs/source/user-guide/introduction.md index 9bdb22b7145f..c97042fdc525 100644 --- a/docs/source/user-guide/introduction.md +++ b/docs/source/user-guide/introduction.md @@ -103,6 +103,7 @@ Here are some active projects using DataFusion: - [Dask SQL](https://github.com/dask-contrib/dask-sql) Distributed SQL query engine in Python - [delta-rs](https://github.com/delta-io/delta-rs) Native Rust implementation of Delta Lake - [Exon](https://github.com/wheretrue/exon) Analysis toolkit for life-science applications +- [Funnel](https://funnel.io/) Data Platform powering Marketing Intelligence applications. - [GlareDB](https://github.com/GlareDB/glaredb) Fast SQL database for querying and analyzing distributed data. - [GreptimeDB](https://github.com/GreptimeTeam/greptimedb) Open Source & Cloud Native Distributed Time Series Database - [HoraeDB](https://github.com/apache/incubator-horaedb) Distributed Time-Series Database