Skip to content

Commit

Permalink
type coercion: support the boolen op, is_true, is_not_true, is_false,…
Browse files Browse the repository at this point in the history
… is_not_false
  • Loading branch information
liukun4515 committed Sep 16, 2022
1 parent 3319220 commit 5e68b31
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 1 deletion.
20 changes: 20 additions & 0 deletions datafusion/expr/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,26 @@ impl Expr {
}
}

/// Return `IsTrue(Box(self))
pub fn is_true(self) -> Expr {
Expr::IsTrue(Box::new(self))
}

/// Return `IsNotTrue(Box(self))
pub fn is_not_true(self) -> Expr {
Expr::IsNotTrue(Box::new(self))
}

/// Return `IsFalse(Box(self))
pub fn is_false(self) -> Expr {
Expr::IsFalse(Box::new(self))
}

/// Return `IsNotFalse(Box(self))
pub fn is_not_false(self) -> Expr {
Expr::IsNotFalse(Box::new(self))
}

pub fn try_into_col(&self) -> Result<Column> {
match self {
Expr::Column(it) => Ok(it.clone()),
Expand Down
20 changes: 20 additions & 0 deletions datafusion/expr/src/expr_fn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,26 @@ pub fn is_null(expr: Expr) -> Expr {
Expr::IsNull(Box::new(expr))
}

/// Create is true expression
pub fn is_true(expr: Expr) -> Expr {
Expr::IsTrue(Box::new(expr))
}

/// Create is not true expression
pub fn is_not_true(expr: Expr) -> Expr {
Expr::IsNotTrue(Box::new(expr))
}

/// Create is false expression
pub fn is_false(expr: Expr) -> Expr {
Expr::IsFalse(Box::new(expr))
}

/// Create is not false expression
pub fn is_not_false(expr: Expr) -> Expr {
Expr::IsNotFalse(Box::new(expr))
}

/// Create an convenience function representing a unary scalar function
macro_rules! unary_scalar_expr {
($ENUM:ident, $FUNC:ident, $DOC:expr) => {
Expand Down
97 changes: 96 additions & 1 deletion datafusion/optimizer/src/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ use datafusion_expr::binary_rule::{coerce_types, comparison_coercion};
use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion};
use datafusion_expr::type_coercion::data_types;
use datafusion_expr::utils::from_plan;
use datafusion_expr::{Expr, LogicalPlan};
use datafusion_expr::{
is_false, is_not_false, is_not_true, is_true, Expr, LogicalPlan, Operator,
};
use datafusion_expr::{ExprSchemable, Signature};
use datafusion_physical_expr::execution_props::ExecutionProps;
use std::sync::Arc;
Expand Down Expand Up @@ -98,6 +100,22 @@ impl ExprRewriter for TypeCoercionRewriter<'_> {

fn mutate(&mut self, expr: Expr) -> Result<Expr> {
match expr {
Expr::IsTrue(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_true(result_expr))
}
Expr::IsNotTrue(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_not_true(result_expr))
}
Expr::IsFalse(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_false(result_expr))
}
Expr::IsNotFalse(expr) => {
let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?;
Ok(is_not_false(result_expr))
}
Expr::BinaryExpr {
ref left,
op,
Expand Down Expand Up @@ -203,6 +221,15 @@ impl ExprRewriter for TypeCoercionRewriter<'_> {
}
}

// Support the `IsTure` `IsNotTrue` `IsFalse` `IsNotFalse` type coercion.
// The above op will be rewrite to the binary op when creating the physical op.
fn get_casted_expr_for_bool_op(expr: &Expr, schema: &DFSchemaRef) -> Result<Expr> {
let left_type = expr.get_type(schema)?;
let right_type = DataType::Boolean;
let coerced_type = coerce_types(&left_type, &Operator::IsDistinctFrom, &right_type)?;
expr.clone().cast_to(&coerced_type, schema)
}

/// Attempts to coerce the types of `list_types` to be comparable with the
/// `expr_type`.
/// Returns the common data type for `expr_type` and `list_types`
Expand Down Expand Up @@ -440,10 +467,78 @@ mod test {
Ok(())
}

#[test]
fn is_bool_for_type_coercion() -> Result<()> {
// is true
let expr = col("a").is_true();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(
vec![expr.clone()],
empty,
None,
)?);
let rule = TypeCoercion::new();
let mut config = OptimizerConfig::default();
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS TRUE\n EmptyRelation",
&format!("{:?}", plan)
);
let empty = empty_with_type(DataType::Int64);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let plan = rule.optimize(&plan, &mut config);
assert!(plan.is_err());
assert!(plan.unwrap_err().to_string().contains("'Int64 IS DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to"));

// is not true
let expr = col("a").is_not_true();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS NOT TRUE\n EmptyRelation",
&format!("{:?}", plan)
);

// is false
let expr = col("a").is_false();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS FALSE\n EmptyRelation",
&format!("{:?}", plan)
);

// is not false
let expr = col("a").is_not_false();
let empty = empty_with_type(DataType::Boolean);
let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?);
let plan = rule.optimize(&plan, &mut config).unwrap();
assert_eq!(
"Projection: #a IS NOT FALSE\n EmptyRelation",
&format!("{:?}", plan)
);
Ok(())
}

fn empty() -> Arc<LogicalPlan> {
Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: Arc::new(DFSchema::empty()),
}))
}

fn empty_with_type(data_type: DataType) -> Arc<LogicalPlan> {
Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: Arc::new(
DFSchema::new_with_metadata(
vec![DFField::new(None, "a", data_type, true)],
std::collections::HashMap::new(),
)
.unwrap(),
),
}))
}
}

0 comments on commit 5e68b31

Please sign in to comment.