Skip to content

Commit

Permalink
Improve Union Equivalence Propagation (apache#11506)
Browse files Browse the repository at this point in the history
* Initial commit

* Fix formatting

* Minor changes

* Fix failing test

* Change union calculation algorithm to make it symmetric

* Minor changes

* Add unit tests

* Simplifications

* Review Part 1

* Move test and union equivalence

* Add new tests

* Support for union with different schema

* Address reviews

* Review Part 2

* Add new tests

* Final Review

---------

Co-authored-by: Mehmet Ozan Kabak <[email protected]>
  • Loading branch information
2 people authored and wiedld committed Aug 8, 2024
1 parent 1648d29 commit 35104d2
Show file tree
Hide file tree
Showing 7 changed files with 647 additions and 511 deletions.
33 changes: 31 additions & 2 deletions datafusion/physical-expr-common/src/physical_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ use std::fmt::{Debug, Display};
use std::hash::{Hash, Hasher};
use std::sync::Arc;

use crate::expressions::column::Column;
use crate::utils::scatter;

use arrow::array::BooleanArray;
use arrow::compute::filter_record_batch;
use arrow::datatypes::{DataType, Schema};
use arrow::datatypes::{DataType, Schema, SchemaRef};
use arrow::record_batch::RecordBatch;
use datafusion_common::{internal_err, not_impl_err, Result};
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{internal_err, not_impl_err, plan_err, Result};
use datafusion_expr::interval_arithmetic::Interval;
use datafusion_expr::sort_properties::ExprProperties;
use datafusion_expr::ColumnarValue;
Expand Down Expand Up @@ -191,6 +193,33 @@ pub fn with_new_children_if_necessary(
}
}

/// Rewrites an expression according to new schema; i.e. changes the columns it
/// refers to with the column at corresponding index in the new schema. Returns
/// an error if the given schema has fewer columns than the original schema.
/// Note that the resulting expression may not be valid if data types in the
/// new schema is incompatible with expression nodes.
pub fn with_new_schema(
expr: Arc<dyn PhysicalExpr>,
schema: &SchemaRef,
) -> Result<Arc<dyn PhysicalExpr>> {
Ok(expr
.transform_up(|expr| {
if let Some(col) = expr.as_any().downcast_ref::<Column>() {
let idx = col.index();
let Some(field) = schema.fields().get(idx) else {
return plan_err!(
"New schema has fewer columns than original schema"
);
};
let new_col = Column::new(field.name(), idx);
Ok(Transformed::yes(Arc::new(new_col) as _))
} else {
Ok(Transformed::no(expr))
}
})?
.data)
}

pub fn down_cast_any_ref(any: &dyn Any) -> &dyn Any {
if any.is::<Arc<dyn PhysicalExpr>>() {
any.downcast_ref::<Arc<dyn PhysicalExpr>>()
Expand Down
4 changes: 3 additions & 1 deletion datafusion/physical-expr/src/equivalence/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ mod properties;
pub use class::{ConstExpr, EquivalenceClass, EquivalenceGroup};
pub use ordering::OrderingEquivalenceClass;
pub use projection::ProjectionMapping;
pub use properties::{join_equivalence_properties, EquivalenceProperties};
pub use properties::{
calculate_union, join_equivalence_properties, EquivalenceProperties,
};

/// This function constructs a duplicate-free `LexOrderingReq` by filtering out
/// duplicate entries that have same physical expression inside. For example,
Expand Down
Loading

0 comments on commit 35104d2

Please sign in to comment.