From 0e6e5a2cd46e6745a62c2014760b8acd6c57a649 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 25 Aug 2024 15:13:45 -0700 Subject: [PATCH] fix: Skip buffered rows which are not joined with streamed side when checking join filter results --- datafusion/physical-plan/src/joins/sort_merge_join.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/physical-plan/src/joins/sort_merge_join.rs b/datafusion/physical-plan/src/joins/sort_merge_join.rs index 96d5ba728a30b..6a4b49c1e6c44 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join.rs @@ -1460,6 +1460,12 @@ impl SMJStream { // we need to output it with nulls as streamed side. if matches!(self.join_type, JoinType::Full) { for i in 0..pre_mask.len() { + // If the buffered row is not joined with streamed side, + // skip it. + if buffered_indices.is_null(i) { + continue; + } + let buffered_batch = &mut self.buffered_data.batches [chunk.buffered_batch_idx.unwrap()]; let buffered_index = buffered_indices.value(i);