From 8b244eee3a731bc37d0a87efe455e47d58e68846 Mon Sep 17 00:00:00 2001 From: Lorrens Pantelis <100197010+LorrensP-2158466@users.noreply.github.com> Date: Tue, 25 Jun 2024 04:14:55 +0200 Subject: [PATCH] Resolve empty relation opt for join types (#11066) * handle left/right anti with empty join_table & added tests for those * add tests and only resolve cases 1,2,3 of issue * forgot to change a test * Update datafusion/optimizer/src/propagate_empty_relation.rs Co-authored-by: Jonah Gao --------- Co-authored-by: Andrew Lamb Co-authored-by: Jonah Gao --- .../optimizer/src/propagate_empty_relation.rs | 57 +++++++++++++++++-- datafusion/sqllogictest/test_files/joins.slt | 30 ++++++++++ .../sqllogictest/test_files/subquery.slt | 5 +- 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/datafusion/optimizer/src/propagate_empty_relation.rs b/datafusion/optimizer/src/propagate_empty_relation.rs index 6d7521dac777..63b357510f2f 100644 --- a/datafusion/optimizer/src/propagate_empty_relation.rs +++ b/datafusion/optimizer/src/propagate_empty_relation.rs @@ -88,9 +88,6 @@ impl OptimizerRule for PropagateEmptyRelation { LogicalPlan::Join(ref join) => { // TODO: For Join, more join type need to be careful: - // For LeftAnti Join, if the right side is empty, the Join result is left side(should exclude null ??). - // For RightAnti Join, if the left side is empty, the Join result is right side(should exclude null ??). - // For Full Join, only both sides are empty, the Join result is empty. // For LeftOut/Full Join, if the right side is empty, the Join can be eliminated with a Projection with left side // columns + right side columns replaced with null values. // For RightOut/Full Join, if the left side is empty, the Join can be eliminated with a Projection with right side @@ -98,6 +95,13 @@ impl OptimizerRule for PropagateEmptyRelation { let (left_empty, right_empty) = binary_plan_children_is_empty(&plan)?; match join.join_type { + // For Full Join, only both sides are empty, the Join result is empty. + JoinType::Full if left_empty && right_empty => Ok(Transformed::yes( + LogicalPlan::EmptyRelation(EmptyRelation { + produce_one_row: false, + schema: join.schema.clone(), + }), + )), JoinType::Inner if left_empty || right_empty => Ok(Transformed::yes( LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, @@ -134,13 +138,19 @@ impl OptimizerRule for PropagateEmptyRelation { schema: join.schema.clone(), }), )), + JoinType::LeftAnti if right_empty => { + Ok(Transformed::yes((*join.left).clone())) + } + JoinType::RightAnti if left_empty => { + Ok(Transformed::yes((*join.right).clone())) + } JoinType::RightAnti if right_empty => Ok(Transformed::yes( LogicalPlan::EmptyRelation(EmptyRelation { produce_one_row: false, schema: join.schema.clone(), }), )), - _ => Ok(Transformed::no(LogicalPlan::Join(join.clone()))), + _ => Ok(Transformed::no(plan)), } } LogicalPlan::Aggregate(ref agg) => { @@ -467,8 +477,39 @@ mod tests { assert_together_optimized_plan(plan, expected, eq) } + // TODO: fix this long name + fn assert_anti_join_empty_join_table_is_base_table( + anti_left_join: bool, + ) -> Result<()> { + // if we have an anti join with an empty join table, then the result is the base_table + let (left, right, join_type, expected) = if anti_left_join { + let left = test_table_scan()?; + let right = LogicalPlanBuilder::from(test_table_scan()?) + .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))? + .build()?; + let expected = left.display_indent().to_string(); + (left, right, JoinType::LeftAnti, expected) + } else { + let right = test_table_scan()?; + let left = LogicalPlanBuilder::from(test_table_scan()?) + .filter(Expr::Literal(ScalarValue::Boolean(Some(false))))? + .build()?; + let expected = right.display_indent().to_string(); + (left, right, JoinType::RightAnti, expected) + }; + + let plan = LogicalPlanBuilder::from(left) + .join_using(right, join_type, vec![Column::from_name("a".to_string())])? + .build()?; + + assert_together_optimized_plan(plan, &expected, true) + } + #[test] fn test_join_empty_propagation_rules() -> Result<()> { + // test full join with empty left and empty right + assert_empty_left_empty_right_lp(true, true, JoinType::Full, true)?; + // test left join with empty left assert_empty_left_empty_right_lp(true, false, JoinType::Left, true)?; @@ -491,7 +532,13 @@ mod tests { assert_empty_left_empty_right_lp(true, false, JoinType::LeftAnti, true)?; // test right anti join empty right - assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true) + assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true)?; + + // test left anti join empty right + assert_anti_join_empty_join_table_is_base_table(true)?; + + // test right anti join empty left + assert_anti_join_empty_join_table_is_base_table(false) } #[test] diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index c2949b98aa77..501ae497745b 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3781,3 +3781,33 @@ EXPLAIN SELECT * FROM ( ) AS a RIGHT ANTI JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a; ---- logical_plan EmptyRelation + +# FULL OUTER join with empty left and empty right table +query TT +EXPLAIN SELECT * FROM ( + SELECT 1 as a WHERE 1=0 +) AS a FULL JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a; +---- +logical_plan EmptyRelation + +# Left ANTI join with empty right table +query TT +EXPLAIN SELECT * FROM ( + SELECT 1 as a +) AS a LEFT ANTI JOIN (SELECT 1 AS a WHERE 1=0) as b ON a.a=b.a; +---- +logical_plan +01)SubqueryAlias: a +02)--Projection: Int64(1) AS a +03)----EmptyRelation + +# Right ANTI join with empty left table +query TT +EXPLAIN SELECT * FROM ( + SELECT 1 as a WHERE 1=0 +) AS a RIGHT ANTI JOIN (SELECT 1 AS a) as b ON a.a=b.a; +---- +logical_plan +01)SubqueryAlias: b +02)--Projection: Int64(1) AS a +03)----EmptyRelation diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt index f56729f85e1c..30b3631681e7 100644 --- a/datafusion/sqllogictest/test_files/subquery.slt +++ b/datafusion/sqllogictest/test_files/subquery.slt @@ -638,10 +638,7 @@ SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id query TT explain SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0) ---- -logical_plan -01)LeftAnti Join: t1.t1_id = __correlated_sq_1.t2_id -02)--TableScan: t1 projection=[t1_id, t1_name] -03)--EmptyRelation +logical_plan TableScan: t1 projection=[t1_id, t1_name] query IT rowsort SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)