Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-5705: Fix direct dispatch behavior with ShareInputScan #1160

Merged
merged 16 commits into from
Dec 23, 2024
Merged
6 changes: 6 additions & 0 deletions src/backend/cdb/cdbmutate.c
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,11 @@ shareinput_mutator_xslice_1(Node *node, PlannerInfo *root, bool fPop)
if (shared)
ctxt->shared_inputs[sisc->share_id].producer_slice_id = motId;
share_info->participant_slices = bms_add_member(share_info->participant_slices, motId);
if (currentSlice->directDispatch.haveProcessedAnyCalculations)
MergeDirectDispatchCalculationInfo(
&share_info->directDispatch,
&currentSlice->directDispatch
);

sisc->this_slice_id = motId;
}
Expand Down Expand Up @@ -993,6 +998,7 @@ shareinput_mutator_xslice_2(Node *node, PlannerInfo *root, bool fPop)

sisc->producer_slice_id = pershare->producer_slice_id;
sisc->nconsumers = bms_num_members(pershare->participant_slices) - 1;
currentSlice->directDispatch = pershare->directDispatch;
Stolb27 marked this conversation as resolved.
Show resolved Hide resolved

/*
* The SISC's plan contains modifying operation, which
Expand Down
1 change: 1 addition & 0 deletions src/include/nodes/pathnodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ typedef struct ApplyShareInputContextPerShare
{
int producer_slice_id;
Bitmapset *participant_slices;
DirectDispatchInfo directDispatch;
} ApplyShareInputContextPerShare;

typedef struct ApplyShareInputContext
Expand Down
84 changes: 84 additions & 0 deletions src/test/regress/expected/with.out
Original file line number Diff line number Diff line change
Expand Up @@ -2660,3 +2660,87 @@ SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte U

DROP TABLE d;
DROP TABLE r;
-- Test that planner doesn't use direct dispatch in case of shared CTE.
Stolb27 marked this conversation as resolved.
Show resolved Hide resolved
CREATE TABLE with_test (i int) DISTRIBUTED BY (i);
INSERT INTO with_test VALUES (1), (2), (3);
EXPLAIN (slicetable, costs off)
WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);
QUERY PLAN
------------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (share0_ref2.i = b.i)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: share0_ref2.i
-> Hash Join
Hash Cond: (b_1.i = (share0_ref2.i + 1))
-> Seq Scan on with_test b_1
-> Hash
-> Redistribute Motion 1:3 (slice3; segments: 1)
Hash Key: (share0_ref2.i + 1)
-> Shared Scan (share slice:id 3:0)
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
Hash Key: b.i
-> Subquery Scan on b
-> Hash Join
Hash Cond: (b_2.i = (share0_ref1.i + 2))
-> Seq Scan on with_test b_2
-> Hash
-> Redistribute Motion 1:3 (slice5; segments: 1)
Hash Key: (share0_ref1.i + 2)
-> Shared Scan (share slice:id 5:0)
-> Seq Scan on with_test
Filter: (i = 1)
Optimizer: Postgres-based planner
Slice 0: Dispatcher; root 0; parent -1; gang size 0
Slice 1: Reader; root 0; parent 0; gang size 3
Slice 2: Reader; root 0; parent 1; gang size 3
Slice 3: Reader; root 0; parent 2; gang size 1
Slice 4: Reader; root 0; parent 1; gang size 3
Slice 5: Reader; root 0; parent 4; gang size 1
(32 rows)

WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);
i | j | k
---+---+---
1 | 2 | 3
(1 row)

-- same but with modifying CTE
EXPLAIN (slicetable, costs off)
WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (share0_ref2.i = share0_ref1.i)
-> Redistribute Motion 1:3 (slice2; segments: 1)
Hash Key: share0_ref2.i
-> Shared Scan (share slice:id 2:0)
-> Hash
-> Redistribute Motion 1:3 (slice3; segments: 1)
Hash Key: share0_ref1.i
-> Shared Scan (share slice:id 3:0)
-> Insert on with_test
-> Result
Optimizer: Postgres-based planner
Slice 0: Dispatcher; root 0; parent -1; gang size 0
Slice 1: Reader; root 0; parent 0; gang size 3
Slice 2: Reader; root 0; parent 1; gang size 1
Slice 3: Primary Writer; root 0; parent 1; gang size 1
(17 rows)

WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);
i
---
4
(1 row)

DROP TABLE with_test;
83 changes: 83 additions & 0 deletions src/test/regress/expected/with_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -2694,3 +2694,86 @@ SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte U

DROP TABLE d;
DROP TABLE r;
-- Test that planner doesn't use direct dispatch in case of shared CTE.
CREATE TABLE with_test (i int) DISTRIBUTED BY (i);
INSERT INTO with_test VALUES (1), (2), (3);
EXPLAIN (slicetable, costs off)
WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);
QUERY PLAN
----------------------------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Sequence
-> Shared Scan (share slice:id 1:0)
-> Seq Scan on with_test
Filter: (i = 1)
-> Hash Join
Hash Cond: ((share0_ref3.i + 1) = b_1.i)
-> Redistribute Motion 3:3 (slice2; segments: 3)
Hash Key: (share0_ref3.i + 1)
-> Hash Join
Hash Cond: ((share0_ref2.i + 2) = b.i)
-> Redistribute Motion 3:3 (slice3; segments: 3)
Hash Key: (share0_ref2.i + 2)
-> Hash Join
Hash Cond: (share0_ref3.i = share0_ref2.i)
-> Result
Filter: (share0_ref3.i = 1)
-> Shared Scan (share slice:id 3:0)
-> Hash
-> Result
Filter: (share0_ref2.i = 1)
-> Shared Scan (share slice:id 3:0)
-> Hash
-> Seq Scan on with_test b
-> Hash
-> Seq Scan on with_test b_1
Optimizer: GPORCA
Slice 0: Dispatcher; root 0; parent -1; gang size 0
Slice 1: Reader; root 0; parent 0; gang size 3
Slice 2: Reader; root 0; parent 1; gang size 3
Slice 3: Reader; root 0; parent 2; gang size 3
(31 rows)

WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);
i | j | k
---+---+---
1 | 2 | 3
(1 row)

-- same but with modifying CTE
EXPLAIN (slicetable, costs off)
WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);
QUERY PLAN
------------------------------------------------------------------
Gather Motion 3:1 (slice1; segments: 3)
-> Hash Join
Hash Cond: (share0_ref2.i = share0_ref1.i)
-> Redistribute Motion 1:3 (slice2; segments: 1)
Hash Key: share0_ref2.i
-> Shared Scan (share slice:id 2:0)
-> Hash
-> Redistribute Motion 1:3 (slice3; segments: 1)
Hash Key: share0_ref1.i
-> Shared Scan (share slice:id 3:0)
-> Insert on with_test
-> Result
Optimizer: Postgres-based planner
Slice 0: Dispatcher; root 0; parent -1; gang size 0
Slice 1: Reader; root 0; parent 0; gang size 3
Slice 2: Reader; root 0; parent 1; gang size 1
Slice 3: Primary Writer; root 0; parent 1; gang size 1
(17 rows)

WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);
i
---
4
(1 row)

DROP TABLE with_test;
23 changes: 23 additions & 0 deletions src/test/regress/sql/with.sql
Original file line number Diff line number Diff line change
Expand Up @@ -1290,3 +1290,26 @@ SELECT * FROM cte JOIN (SELECT * FROM d JOIN cte USING (a) LIMIT 1) d_join_cte U

DROP TABLE d;
DROP TABLE r;

-- Test that planner doesn't use direct dispatch in case of shared CTE.
CREATE TABLE with_test (i int) DISTRIBUTED BY (i);
RekGRpth marked this conversation as resolved.
Show resolved Hide resolved
INSERT INTO with_test VALUES (1), (2), (3);

EXPLAIN (slicetable, costs off)
WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);

WITH cte AS (SELECT * FROM with_test WHERE i = 1)
SELECT * FROM (SELECT a.i AS i, b.i AS j FROM cte a JOIN with_test b ON a.i + 1 = b.i) AS a
JOIN (SELECT a.i AS i, b.i AS k FROM cte a JOIN with_test b ON a.i + 2 = b.i) AS b USING (i);

-- same but with modifying CTE
EXPLAIN (slicetable, costs off)
WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);

WITH cte AS (INSERT INTO with_test SELECT 4 RETURNING *)
SELECT * FROM cte AS a JOIN cte AS b USING (i);

DROP TABLE with_test;
Loading