From 67696c294b6e632a50eb6b3c2666b4aacc4b2c3c Mon Sep 17 00:00:00 2001 From: liukun4515 Date: Sat, 17 Sep 2022 19:21:55 +0800 Subject: [PATCH] suport like,unknown for type coercion --- datafusion/expr/src/expr.rs | 10 ++ datafusion/expr/src/expr_fn.rs | 10 ++ datafusion/optimizer/src/type_coercion.rs | 202 ++++++++++++++++++++-- 3 files changed, 209 insertions(+), 13 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index cd72392f6aa3..8cdf53c26ed3 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -517,6 +517,16 @@ impl Expr { Expr::IsNotFalse(Box::new(self)) } + /// Return `IsUnknown(Box(self))` + pub fn is_unknown(self) -> Expr { + Expr::IsUnknown(Box::new(self)) + } + + /// Return `IsNotUnknown(Box(self))` + pub fn is_not_unknown(self) -> Expr { + Expr::IsNotUnknown(Box::new(self)) + } + pub fn try_into_col(&self) -> Result { match self { Expr::Column(it) => Ok(it.clone()), diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index c131890224a4..7b11b630464a 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -302,6 +302,16 @@ pub fn is_not_false(expr: Expr) -> Expr { Expr::IsNotFalse(Box::new(expr)) } +/// Create is unknown expression +pub fn is_unknown(expr: Expr) -> Expr { + Expr::IsUnknown(Box::new(expr)) +} + +/// Create is not unknown expression +pub fn is_not_unknown(expr: Expr) -> Expr { + Expr::IsNotUnknown(Box::new(expr)) +} + /// Create an convenience function representing a unary scalar function macro_rules! unary_scalar_expr { ($ENUM:ident, $FUNC:ident, $DOC:expr) => { diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index a4cdbcec038e..1d32eaa9c2a4 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -26,7 +26,8 @@ use datafusion_expr::expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecurs use datafusion_expr::type_coercion::data_types; use datafusion_expr::utils::from_plan; use datafusion_expr::{ - is_false, is_not_false, is_not_true, is_true, Expr, LogicalPlan, Operator, + is_false, is_not_false, is_not_true, is_not_unknown, is_true, is_unknown, Expr, + LogicalPlan, Operator, }; use datafusion_expr::{ExprSchemable, Signature}; use datafusion_physical_expr::execution_props::ExecutionProps; @@ -101,20 +102,79 @@ impl ExprRewriter for TypeCoercionRewriter<'_> { fn mutate(&mut self, expr: Expr) -> Result { match expr { Expr::IsTrue(expr) => { - let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?; - Ok(is_true(result_expr)) + let expr = is_true(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) } Expr::IsNotTrue(expr) => { - let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?; - Ok(is_not_true(result_expr)) + let expr = is_not_true(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) } Expr::IsFalse(expr) => { - let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?; - Ok(is_false(result_expr)) + let expr = is_false(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) } Expr::IsNotFalse(expr) => { - let result_expr = get_casted_expr_for_bool_op(&expr, &self.schema)?; - Ok(is_not_false(result_expr)) + let expr = + is_not_false(get_casted_expr_for_bool_op(&expr, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::Like { + negated, + expr, + pattern, + escape_char, + } => { + let left_type = expr.get_type(&self.schema)?; + let right_type = pattern.get_type(&self.schema)?; + let coerced_type = + coerce_types(&left_type, &Operator::Like, &right_type)?; + let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?); + let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?); + let expr = Expr::Like { + negated, + expr, + pattern, + escape_char, + }; + expr.rewrite(&mut self.const_evaluator) + } + Expr::ILike { + negated, + expr, + pattern, + escape_char, + } => { + let left_type = expr.get_type(&self.schema)?; + let right_type = pattern.get_type(&self.schema)?; + let coerced_type = + coerce_types(&left_type, &Operator::Like, &right_type)?; + let expr = Box::new(expr.cast_to(&coerced_type, &self.schema)?); + let pattern = Box::new(pattern.cast_to(&coerced_type, &self.schema)?); + let expr = Expr::ILike { + negated, + expr, + pattern, + escape_char, + }; + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsUnknown(expr) => { + // will convert the binary(expr,IsNotDistinctFrom,lit(Boolean(None)); + let left_type = expr.get_type(&self.schema)?; + let right_type = DataType::Boolean; + let coerced_type = + coerce_types(&left_type, &Operator::IsNotDistinctFrom, &right_type)?; + let expr = is_unknown(expr.cast_to(&coerced_type, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) + } + Expr::IsNotUnknown(expr) => { + // will convert the binary(expr,IsDistinctFrom,lit(Boolean(None)); + let left_type = expr.get_type(&self.schema)?; + let right_type = DataType::Boolean; + let coerced_type = + coerce_types(&left_type, &Operator::IsDistinctFrom, &right_type)?; + let expr = is_not_unknown(expr.cast_to(&coerced_type, &self.schema)?); + expr.rewrite(&mut self.const_evaluator) } Expr::BinaryExpr { ref left, @@ -154,18 +214,34 @@ impl ExprRewriter for TypeCoercionRewriter<'_> { } => { let expr_type = expr.get_type(&self.schema)?; let low_type = low.get_type(&self.schema)?; - let coerced_type = comparison_coercion(&expr_type, &low_type) + let low_coerced_type = comparison_coercion(&expr_type, &low_type) .ok_or_else(|| { DataFusionError::Internal(format!( "Failed to coerce types {} and {} in BETWEEN expression", expr_type, low_type )) })?; + let high_type = high.get_type(&self.schema)?; + let high_coerced_type = comparison_coercion(&expr_type, &low_type) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to coerce types {} and {} in BETWEEN expression", + expr_type, high_type + )) + })?; + let coercion_type = + comparison_coercion(&low_coerced_type, &high_coerced_type) + .ok_or_else(|| { + DataFusionError::Internal(format!( + "Failed to coerce types {} and {} in BETWEEN expression", + expr_type, high_type + )) + })?; let expr = Expr::Between { - expr: Box::new(expr.cast_to(&coerced_type, &self.schema)?), + expr: Box::new(expr.cast_to(&coercion_type, &self.schema)?), negated, - low: Box::new(low.cast_to(&coerced_type, &self.schema)?), - high: Box::new(high.cast_to(&coerced_type, &self.schema)?), + low: Box::new(low.cast_to(&coercion_type, &self.schema)?), + high: Box::new(high.cast_to(&coercion_type, &self.schema)?), }; expr.rewrite(&mut self.const_evaluator) } @@ -522,6 +598,106 @@ mod test { Ok(()) } + #[test] + fn like_for_type_coercion() -> Result<()> { + // like : utf8 like "abc" + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string())))); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Utf8); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a LIKE Utf8(\"abc\")\n EmptyRelation", + &format!("{:?}", plan) + ); + + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Null)); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Utf8); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a LIKE Utf8(NULL)\n EmptyRelation", + &format!("{:?}", plan) + ); + + let expr = Box::new(col("a")); + let pattern = Box::new(lit(ScalarValue::Utf8(Some("abc".to_string())))); + let like_expr = Expr::Like { + negated: false, + expr, + pattern, + escape_char: None, + }; + let empty = empty_with_type(DataType::Int64); + let plan = + LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config); + assert!(plan.is_err()); + assert!(plan.unwrap_err().to_string().contains("'Int64 LIKE Utf8' can't be evaluated because there isn't a common type to coerce the types to")); + Ok(()) + } + + #[test] + fn unknown_for_type_coercion() -> Result<()> { + // unknown + let expr = col("a").is_unknown(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new( + vec![expr.clone()], + empty, + None, + )?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS UNKNOWN\n EmptyRelation", + &format!("{:?}", plan) + ); + + let empty = empty_with_type(DataType::Utf8); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config); + assert!(plan.is_err()); + assert!(plan.unwrap_err().to_string().contains("'Utf8 IS NOT DISTINCT FROM Boolean' can't be evaluated because there isn't a common type to coerce the types to")); + + // is not unknown + let expr = col("a").is_not_unknown(); + let empty = empty_with_type(DataType::Boolean); + let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty, None)?); + let rule = TypeCoercion::new(); + let mut config = OptimizerConfig::default(); + let plan = rule.optimize(&plan, &mut config).unwrap(); + assert_eq!( + "Projection: #a IS NOT UNKNOWN\n EmptyRelation", + &format!("{:?}", plan) + ); + Ok(()) + } + fn empty() -> Arc { Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false,