Skip to content

Commit

Permalink
Resolve empty relation opt for join types (apache#11066)
Browse files Browse the repository at this point in the history
* handle left/right anti with empty join_table & added tests for those

* add tests and only resolve cases 1,2,3 of issue

* forgot to change a test

* Update datafusion/optimizer/src/propagate_empty_relation.rs

Co-authored-by: Jonah Gao <[email protected]>

---------

Co-authored-by: Andrew Lamb <[email protected]>
Co-authored-by: Jonah Gao <[email protected]>
  • Loading branch information
3 people authored Jun 25, 2024
1 parent 31daf25 commit 8b244ee
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 9 deletions.
57 changes: 52 additions & 5 deletions datafusion/optimizer/src/propagate_empty_relation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,20 @@ impl OptimizerRule for PropagateEmptyRelation {

LogicalPlan::Join(ref join) => {
// TODO: For Join, more join type need to be careful:
// For LeftAnti Join, if the right side is empty, the Join result is left side(should exclude null ??).
// For RightAnti Join, if the left side is empty, the Join result is right side(should exclude null ??).
// For Full Join, only both sides are empty, the Join result is empty.
// For LeftOut/Full Join, if the right side is empty, the Join can be eliminated with a Projection with left side
// columns + right side columns replaced with null values.
// For RightOut/Full Join, if the left side is empty, the Join can be eliminated with a Projection with right side
// columns + left side columns replaced with null values.
let (left_empty, right_empty) = binary_plan_children_is_empty(&plan)?;

match join.join_type {
// For Full Join, only both sides are empty, the Join result is empty.
JoinType::Full if left_empty && right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: join.schema.clone(),
}),
)),
JoinType::Inner if left_empty || right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
Expand Down Expand Up @@ -134,13 +138,19 @@ impl OptimizerRule for PropagateEmptyRelation {
schema: join.schema.clone(),
}),
)),
JoinType::LeftAnti if right_empty => {
Ok(Transformed::yes((*join.left).clone()))
}
JoinType::RightAnti if left_empty => {
Ok(Transformed::yes((*join.right).clone()))
}
JoinType::RightAnti if right_empty => Ok(Transformed::yes(
LogicalPlan::EmptyRelation(EmptyRelation {
produce_one_row: false,
schema: join.schema.clone(),
}),
)),
_ => Ok(Transformed::no(LogicalPlan::Join(join.clone()))),
_ => Ok(Transformed::no(plan)),
}
}
LogicalPlan::Aggregate(ref agg) => {
Expand Down Expand Up @@ -467,8 +477,39 @@ mod tests {
assert_together_optimized_plan(plan, expected, eq)
}

// TODO: fix this long name
fn assert_anti_join_empty_join_table_is_base_table(
anti_left_join: bool,
) -> Result<()> {
// if we have an anti join with an empty join table, then the result is the base_table
let (left, right, join_type, expected) = if anti_left_join {
let left = test_table_scan()?;
let right = LogicalPlanBuilder::from(test_table_scan()?)
.filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
.build()?;
let expected = left.display_indent().to_string();
(left, right, JoinType::LeftAnti, expected)
} else {
let right = test_table_scan()?;
let left = LogicalPlanBuilder::from(test_table_scan()?)
.filter(Expr::Literal(ScalarValue::Boolean(Some(false))))?
.build()?;
let expected = right.display_indent().to_string();
(left, right, JoinType::RightAnti, expected)
};

let plan = LogicalPlanBuilder::from(left)
.join_using(right, join_type, vec![Column::from_name("a".to_string())])?
.build()?;

assert_together_optimized_plan(plan, &expected, true)
}

#[test]
fn test_join_empty_propagation_rules() -> Result<()> {
// test full join with empty left and empty right
assert_empty_left_empty_right_lp(true, true, JoinType::Full, true)?;

// test left join with empty left
assert_empty_left_empty_right_lp(true, false, JoinType::Left, true)?;

Expand All @@ -491,7 +532,13 @@ mod tests {
assert_empty_left_empty_right_lp(true, false, JoinType::LeftAnti, true)?;

// test right anti join empty right
assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true)
assert_empty_left_empty_right_lp(false, true, JoinType::RightAnti, true)?;

// test left anti join empty right
assert_anti_join_empty_join_table_is_base_table(true)?;

// test right anti join empty left
assert_anti_join_empty_join_table_is_base_table(false)
}

#[test]
Expand Down
30 changes: 30 additions & 0 deletions datafusion/sqllogictest/test_files/joins.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3781,3 +3781,33 @@ EXPLAIN SELECT * FROM (
) AS a RIGHT ANTI JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a;
----
logical_plan EmptyRelation

# FULL OUTER join with empty left and empty right table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a WHERE 1=0
) AS a FULL JOIN (SELECT 1 AS a WHERE 1=0) AS b ON a.a=b.a;
----
logical_plan EmptyRelation

# Left ANTI join with empty right table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a
) AS a LEFT ANTI JOIN (SELECT 1 AS a WHERE 1=0) as b ON a.a=b.a;
----
logical_plan
01)SubqueryAlias: a
02)--Projection: Int64(1) AS a
03)----EmptyRelation

# Right ANTI join with empty left table
query TT
EXPLAIN SELECT * FROM (
SELECT 1 as a WHERE 1=0
) AS a RIGHT ANTI JOIN (SELECT 1 AS a) as b ON a.a=b.a;
----
logical_plan
01)SubqueryAlias: b
02)--Projection: Int64(1) AS a
03)----EmptyRelation
5 changes: 1 addition & 4 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,7 @@ SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id
query TT
explain SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)
----
logical_plan
01)LeftAnti Join: t1.t1_id = __correlated_sq_1.t2_id
02)--TableScan: t1 projection=[t1_id, t1_name]
03)--EmptyRelation
logical_plan TableScan: t1 projection=[t1_id, t1_name]

query IT rowsort
SELECT t1_id, t1_name FROM t1 WHERE NOT EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 0)
Expand Down

0 comments on commit 8b244ee

Please sign in to comment.