diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index c15dcd8e9437..0b1b6feeff5e 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -790,13 +790,21 @@ cdbllize_decorate_subplans_with_motions(PlannerInfo *root, Plan *plan) if (!subplan->flow) elog(ERROR, "subplan is missing Flow information"); + /* + * Broadcasting Replicated locus leads to data duplicates. + */ + if (subplan->flow->locustype == CdbLocusType_Replicated && + subplan->flow->numsegments != context.currentPlanFlow->numsegments) + elog(ERROR, "could not parallelize SubPlan"); + /* * If the subquery result is not available where the outer query needs it, * we have to add a Motion node to redistribute it. */ if (subplan->flow->locustype != CdbLocusType_OuterQuery && subplan->flow->locustype != CdbLocusType_SegmentGeneral && - subplan->flow->locustype != CdbLocusType_General) + subplan->flow->locustype != CdbLocusType_General && + subplan->flow->locustype != CdbLocusType_Replicated) { subplan = fix_subplan_motion(root, subplan, context.currentPlanFlow); @@ -878,6 +886,7 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte if (IsA(plan, Motion)) { Motion *motion = (Motion *) plan; + bool shouldOmit = false; /* sanity check: Sub plan must have flow */ Assert(motion->plan.lefttree->flow); @@ -942,14 +951,35 @@ fix_outer_query_motions_mutator(Node *node, decorate_subplans_with_motions_conte elog(ERROR, "unexpected Flow type in parent of a SubPlan"); } + if (plan->lefttree->flow->flotype == FLOW_REPLICATED) + { + /* + * Broadcasting Replicated locus leads to data duplication. + * We can only Explicit Gather it to a single QE or we can + * omit this motion if number of segments is equal. + */ + if (context->currentPlanFlow->flotype == FLOW_SINGLETON) + { + motion->motionType = MOTIONTYPE_GATHER_SINGLE; + } + else if (plan->lefttree->flow->numsegments == context->currentPlanFlow->numsegments) + { + shouldOmit = true; + } + else + elog(ERROR, "could not parallelize SubPlan"); + } + /* * For non-top slice, if this motion is QE singleton and subplan's locus * is CdbLocusType_SegmentGeneral, omit this motion. */ - if (context->sliceDepth > 0 && - context->currentPlanFlow->flotype == FLOW_SINGLETON && - context->currentPlanFlow->segindex == 0 && - motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral) + shouldOmit |= context->sliceDepth > 0 && + context->currentPlanFlow->flotype == FLOW_SINGLETON && + context->currentPlanFlow->segindex == 0 && + motion->plan.lefttree->flow->locustype == CdbLocusType_SegmentGeneral; + + if (shouldOmit) { /* * Omit this motion. If there were any InitPlans attached to it, diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index 58e0570d16a9..7ed72f8cb8e4 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -330,6 +330,10 @@ cdbpath_create_motion_path(PlannerInfo *root, if (!bms_is_empty(PATH_REQ_OUTER(subpath))) return NULL; + if (CdbPathLocus_IsReplicated(subpath->locus) && + contain_volatile_functions((Node *) subpath->pathtarget->exprs)) + goto invalid_motion_request; + /* * Data is only available on segments, to distingush it with * CdbLocusType_General, adding a motion to indicated this @@ -1347,21 +1351,6 @@ cdbpath_motion_for_join(PlannerInfo *root, } } - /* - * Locus type Replicated can only be generated by join operation. - * And in the function cdbpathlocus_join there is a rule: - * join => any locus type - * Proof by contradiction, it shows that when code arrives here, - * it is impossible that any of the two input paths' locus - * is Replicated. So we add two asserts here. - */ - Assert(!CdbPathLocus_IsReplicated(outer.locus)); - Assert(!CdbPathLocus_IsReplicated(inner.locus)); - - if (CdbPathLocus_IsReplicated(outer.locus) || - CdbPathLocus_IsReplicated(inner.locus)) - goto fail; - outer.has_wts = cdbpath_contains_wts(outer.path); inner.has_wts = cdbpath_contains_wts(inner.path); @@ -1743,100 +1732,182 @@ cdbpath_motion_for_join(PlannerInfo *root, other = &outer; } - Assert(CdbPathLocus_IsBottleneck(other->locus) || - CdbPathLocus_IsPartitioned(other->locus)); - - /* - * For UPDATE/DELETE, replicated table can't guarantee a logic row has - * same ctid or item pointer on each copy. If we broadcast matched tuples - * to all segments, the segments may update the wrong tuples or can't - * find a valid tuple according to ctid or item pointer. - * - * So For UPDATE/DELETE on replicated table, we broadcast other path so - * all target tuples can be selected on all copys and then be updated - * locally. - */ - if (root->upd_del_replicated_table > 0 && - bms_is_member(root->upd_del_replicated_table, - segGeneral->path->parent->relids)) + if (CdbPathLocus_IsReplicated(other->locus)) { /* - * For UPDATE on a replicated table, we have to do it - * everywhere so that for each segment, we have to collect - * all the information of other that is we should broadcast it + * The case, when UPDATE/DELETE operation on a replicated table + * also has join operand with Replicated locus, is unknown. */ - - /* - * FIXME: do we need to test other's movable? - */ - CdbPathLocus_MakeReplicated(&other->move_to, - CdbPathLocus_NumSegments(segGeneral->locus)); - } - else if (CdbPathLocus_IsBottleneck(other->locus)) - { + Assert(root->upd_del_replicated_table == 0); + + int numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); + /* - * if the locus type is equal and segment count is unequal, - * we will dispatch the one on more segments to the other + * Replicated locus corresponds to the result of + * the CTE with modifying DML operation over a replicated + * table inside. In case when SegmentGeneral locus is + * propagated at more number of segments than Replicated locus + * is, it is appropriate to reduce SegmentGeneral's number to + * perform join on segments. Otherwise, perform join at + * SingleQE. */ - numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, - other->locus); - segGeneral->move_to = other->locus; - segGeneral->move_to.numsegments = numsegments; + if (segGeneral->locus.numsegments >= other->locus.numsegments) + { + segGeneral->locus.numsegments = numsegments; + return other->locus; + } + else + { + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + } } else { - /* - * This branch handles for partitioned other locus - * hashed, hashoj, strewn - */ - Assert(CdbPathLocus_IsPartitioned(other->locus)); + + Assert(CdbPathLocus_IsBottleneck(other->locus) || + CdbPathLocus_IsPartitioned(other->locus)); - if (!segGeneral->ok_to_replicate) + /* + * For UPDATE/DELETE, replicated table can't guarantee a logic row has + * same ctid or item pointer on each copy. If we broadcast matched tuples + * to all segments, the segments may update the wrong tuples or can't + * find a valid tuple according to ctid or item pointer. + * + * So For UPDATE/DELETE on replicated table, we broadcast other path so + * all target tuples can be selected on all copys and then be updated + * locally. + */ + if (root->upd_del_replicated_table > 0 && + bms_is_member(root->upd_del_replicated_table, + segGeneral->path->parent->relids)) { - if (!try_redistribute(root, segGeneral, - other, redistribution_clauses)) - { - /* - * FIXME: do we need to test movable? - */ - CdbPathLocus_MakeSingleQE(&segGeneral->move_to, - CdbPathLocus_NumSegments(segGeneral->locus)); - CdbPathLocus_MakeSingleQE(&other->move_to, - CdbPathLocus_NumSegments(other->locus)); - } + /* + * For UPDATE on a replicated table, we have to do it + * everywhere so that for each segment, we have to collect + * all the information of other that is we should broadcast it + */ + + /* + * FIXME: do we need to test other's movable? + */ + CdbPathLocus_MakeReplicated(&other->move_to, + CdbPathLocus_NumSegments(segGeneral->locus)); + } + else if (CdbPathLocus_IsBottleneck(other->locus)) + { + /* + * if the locus type is equal and segment count is unequal, + * we will dispatch the one on more segments to the other + */ + numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); + segGeneral->move_to = other->locus; + segGeneral->move_to.numsegments = numsegments; } else { /* - * If all other's segments have segGeneral stored, then no motion - * is needed. - * - * A sql to reach here: - * select * from d2 a join r1 b using (c1); - * where d2 is a replicated table on 2 segment, - * r1 is a random table on 1 segments. - */ - if (CdbPathLocus_NumSegments(segGeneral->locus) >= - CdbPathLocus_NumSegments(other->locus)) - return other->locus; - else + * This branch handles for partitioned other locus + * hashed, hashoj, strewn + */ + Assert(CdbPathLocus_IsPartitioned(other->locus)); + + if (!segGeneral->ok_to_replicate) { if (!try_redistribute(root, segGeneral, - other, redistribution_clauses)) + other, redistribution_clauses)) { - numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, - other->locus); /* - * FIXME: do we need to test movable? - */ - CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); - CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + * FIXME: do we need to test movable? + */ + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, + CdbPathLocus_NumSegments(segGeneral->locus)); + CdbPathLocus_MakeSingleQE(&other->move_to, + CdbPathLocus_NumSegments(other->locus)); + } + } + else + { + /* + * If all other's segments have segGeneral stored, then no motion + * is needed. + * + * A sql to reach here: + * select * from d2 a join r1 b using (c1); + * where d2 is a replicated table on 2 segment, + * r1 is a random table on 1 segments. + */ + if (CdbPathLocus_NumSegments(segGeneral->locus) >= + CdbPathLocus_NumSegments(other->locus)) + return other->locus; + else + { + if (!try_redistribute(root, segGeneral, + other, redistribution_clauses)) + { + numsegments = CdbPathLocus_CommonSegments(segGeneral->locus, + other->locus); + /* + * FIXME: do we need to test movable? + */ + CdbPathLocus_MakeSingleQE(&segGeneral->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + } } } } } } } + else if (CdbPathLocus_IsReplicated(outer.locus) || + CdbPathLocus_IsReplicated(inner.locus)) + { + /* + * Replicated paths shouldn't occur except ones including + * modifying CTEs with DML operations on replicated table. + */ + Assert(root->upd_del_replicated_table == 0); + + CdbpathMfjRel *replicated; + CdbpathMfjRel *other; + + if (CdbPathLocus_IsReplicated(outer.locus)) + { + replicated = &outer; + other = &inner; + } + else + { + replicated = &inner; + other = &outer; + } + + int numsegments = CdbPathLocus_CommonSegments(replicated->locus, + other->locus); + + /* + * If Replicated locus is joined with Partitioned locus group + * it will be possible to perform join locally (if number of segments + * is equal). Otherwise, join must be performed at single segment. + */ + if (CdbPathLocus_IsBottleneck(other->locus)) + CdbPathLocus_MakeSimple(&replicated->move_to, + other->locus.locustype, numsegments); + else if (CdbPathLocus_IsPartitioned(other->locus)) + { + if (replicated->ok_to_replicate && + CdbPathLocus_NumSegments(replicated->locus) == + CdbPathLocus_NumSegments(other->locus)) + return other->locus; + else + { + CdbPathLocus_MakeSingleQE(&replicated->move_to, numsegments); + CdbPathLocus_MakeSingleQE(&other->move_to, numsegments); + } + } + } /* * Is either source confined to a single process? NB: Motion to a single * process (qDisp or qExec) is the only motion in which we may use Merge @@ -2629,7 +2700,9 @@ create_split_update_path(PlannerInfo *root, Index rti, GpPolicy *policy, Path *s Path * turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) { - if ((CdbPathLocus_IsSegmentGeneral(path->locus) || CdbPathLocus_IsGeneral(path->locus)) && + if ((CdbPathLocus_IsSegmentGeneral(path->locus) || + CdbPathLocus_IsGeneral(path->locus) || + CdbPathLocus_IsReplicated(path->locus)) && (contain_volatile_functions(node) || IsA(path, LimitPath))) { CdbPathLocus singleQE; @@ -2647,6 +2720,14 @@ turn_volatile_seggen_to_singleqe(PlannerInfo *root, Path *path, Node *node) getgpsegmentCount()); return path; } + else if (CdbPathLocus_IsReplicated(path->locus)) + { + /* + * Replicated locus is not supported yet in context of + * volatile functions handling. + */ + elog(ERROR, "could not devise a plan"); + } CdbPathLocus_MakeSingleQE(&singleQE, CdbPathLocus_NumSegments(path->locus)); diff --git a/src/backend/executor/nodeMotion.c b/src/backend/executor/nodeMotion.c index 7c0c86f5a0b3..a4dd0dc59e8e 100644 --- a/src/backend/executor/nodeMotion.c +++ b/src/backend/executor/nodeMotion.c @@ -433,7 +433,8 @@ execMotionSortedReceiver(MotionState *node) Motion *motion = (Motion *) node->ps.plan; EState *estate = node->ps.state; - AssertState(motion->motionType == MOTIONTYPE_GATHER && + AssertState((motion->motionType == MOTIONTYPE_GATHER || + motion->motionType == MOTIONTYPE_GATHER_SINGLE) && motion->sendSorted && hp != NULL); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 69bbb8dce7d7..b331246df35c 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -2269,6 +2269,16 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path) List *tlist; List *quals; + if (CdbPathLocus_IsReplicated(best_path->path.locus) && + contain_volatile_functions((Node *) best_path->qual)) + { + /* + * Replicated locus is not supported yet in context of volatile + * functions handling. + */ + elog(ERROR, "could not devise a plan"); + } + /* * Agg can project, so no need to be terribly picky about child tlist, but * we do need grouping columns to be available @@ -2404,6 +2414,16 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path) Assert(root->parse->groupingSets); Assert(rollups != NIL); + if (CdbPathLocus_IsReplicated(best_path->path.locus) && + contain_volatile_functions((Node *) best_path->qual)) + { + /* + * Replicated locus is not supported yet in context of volatile + * functions handling. + */ + elog(ERROR, "could not devise a plan"); + } + /* * Agg can project, so no need to be terribly picky about child tlist, but * we do need grouping columns to be available @@ -2934,6 +2954,20 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->onconflict, best_path->epqParam); + /* + * Currently, we prohibit applying volatile functions + * to the result of modifying CTE with locus Replicated. + * + * Assumption: we only create subroots for subqueries and CTEs, + * and only CTEs can have ModifyTable. We are creating a + * ModifyTable, therefore if we are a subroot we are inside a + * modifying CTE. + */ + if (root && root->parent_root && + CdbPathLocus_IsReplicated(best_path->path.locus) && + contain_volatile_functions((Node *) plan->returningLists)) + elog(ERROR, "could not devise a plan"); + copy_generic_path_info(&plan->plan, &best_path->path); if (list_length(plan->resultRelations) > 0 && Gp_role == GP_ROLE_DISPATCH) @@ -3164,7 +3198,8 @@ create_motion_plan(PlannerInfo *root, CdbMotionPath *path) motion = cdbpathtoplan_create_motion_plan(root, path, subplan); motion->senderSliceInfo = sendSlice; - if (subpath->locus.locustype == CdbLocusType_Replicated) + if (subpath->locus.locustype == CdbLocusType_Replicated && + motion->motionType != MOTIONTYPE_OUTER_QUERY) motion->motionType = MOTIONTYPE_GATHER_SINGLE; /* The topmost Plan in the sender slice must have 'flow' set correctly. */ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 8559eb27c869..7adb741f324c 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -433,6 +433,16 @@ make_subplan(PlannerInfo *root, Query *orig_subquery, (contain_volatile_functions((Node *) subroot->parse->havingQual) || contain_volatile_functions((Node *) best_path->pathtarget->exprs))) CdbPathLocus_MakeSingleQE(&(best_path->locus), getgpsegmentCount()); + else if (CdbPathLocus_IsReplicated(best_path->locus) && + (contain_volatile_functions((Node *) subroot->parse->havingQual) || + contain_volatile_functions((Node *) best_path->pathtarget->exprs))) + { + /* + * Replicated locus is not supported yet in context of volatile + * functions handling. + */ + elog(ERROR, "could not devise a plan"); + } best_path = cdbllize_adjust_init_plan_path(root, best_path); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index f6e26365b7fa..490c3679bb0c 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1793,8 +1793,18 @@ set_append_path_locus(PlannerInfo *root, Path *pathnode, RelOptInfo *rel, CdbPathLocus_IsSegmentGeneral(subpath->locus) || CdbPathLocus_IsReplicated(subpath->locus)) { - numsegments = Min(numsegments, - CdbPathLocus_NumSegments(subpath->locus)); + /* + * If target locus type is Replicated, we can allow to align + * numsegments only to subpath with locus Replicated, because + * locus Replicated is executed strictly on its number of + * segments. + */ + if (targetlocustype != CdbLocusType_Replicated || + CdbPathLocus_IsReplicated(subpath->locus)) + { + numsegments = Min(numsegments, + CdbPathLocus_NumSegments(subpath->locus)); + } } } CdbPathLocus_MakeSimple(&targetlocus, targetlocustype, numsegments); diff --git a/src/test/regress/expected/with_clause.out b/src/test/regress/expected/with_clause.out index 98c03999e136..1f6b51dd1c30 100644 --- a/src/test/regress/expected/with_clause.out +++ b/src/test/regress/expected/with_clause.out @@ -1,3 +1,12 @@ +-- start_matchsubs +-- +-- m/ERROR: could not devise a plan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs drop table if exists with_test1 cascade; NOTICE: table "with_test1" does not exist, skipping create table with_test1 (i int, t text, value int) distributed by (i); @@ -3098,3 +3107,959 @@ cte2 as (10 rows) drop table with_dml; +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +NOTICE: table "with_dml_dr" does not exist, skipping +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(7 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Update on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i <= 5) + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Delete on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i > 0) + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + i +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +insert into t_repl values (1, 1), (2, 2), (3, 3); +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (t_repl.i = share0_ref1.i) + -> Seq Scan on t_repl + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 2:1 (slice1; segments: 2) + -> Hash Join + Hash Cond: (t_repl.i = share0_ref1.i) + -> Seq Scan on t_repl + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = share0_ref1.i) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on with_dml_dr_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + count +------- + 5 +(1 row) + +drop table with_dml_dr_seg2; +drop table t_repl; +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Hash Right Join + Hash Cond: (((random() * '0'::double precision)) = (share0_ref1.i)::double precision) + -> Function Scan on generate_series + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (gp_segment_configuration.port = share0_ref1.i) + -> Seq Scan on gp_segment_configuration + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (share0_ref1.i = a.a) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series a + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +NOTICE: table "t_hashed" does not exist, skipping +drop table if exists t_strewn; +NOTICE: table "t_strewn" does not exist, skipping +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t_hashed.i = share0_ref1.i) + -> Seq Scan on t_hashed + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (t_hashed.i = share0_ref1.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t_hashed + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t_strewn.i = share0_ref1.i) + -> Seq Scan on t_strewn + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (t_strewn.i = share0_ref1.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t_strewn + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +drop table t_strewn; +drop table t_hashed; +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (t_hashed_seg2.i = share0_ref1.i) + -> Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t_hashed_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (t_strewn_seg2.i = share0_ref1.i) + -> Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t_strewn_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + count +------- + 5 +(1 row) + +drop table t_strewn_seg2; +drop table t_hashed_seg2; +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + QUERY PLAN +-------------------------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (share0_ref2.i = share0_ref1.i) + -> Shared Scan (share slice:id 1:0) + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i_1 + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + count +------- + 5 +(1 row) + +reset gp_cte_sharing; +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); +ERROR: could not devise a plan (cdbpath.c:2725) +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; +ERROR: could not devise a plan (createplan.c:2949) +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; +ERROR: could not build Motion path (cdbpath.c:637) +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); +ERROR: could not devise a plan (createplan.c:2279) +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); +ERROR: could not devise a plan (cdbpath.c:2725) +drop table t_repl; +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t1 select i, i from generate_series(1, 6) i; +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Hash Semi Join + Hash Cond: (t1.i = cte.i) + -> Seq Scan on t1 + -> Hash + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(13 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Result + Filter: (cte.i = t1.j) + -> Materialize + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(15 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, can be handled by Explicit Gather Motion. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +------------------------------------------------------------------------------ + Sort + Sort Key: t1.i + -> Hash Semi Join + Hash Cond: (t1.i = cte.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + -> Hash + -> Explicit Gather Motion 2:1 (slice2; segments: 2) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(13 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +drop table t1; +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +drop table if exists t_repl_seg2; +NOTICE: table "t_repl_seg2" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +---------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + QUERY PLAN +--------------------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Result + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Seq Scan on t_repl_seg2 + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +---------------------------------------------------- + Explicit Gather Motion 2:1 (slice1; segments: 2) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr_seg2 + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr; diff --git a/src/test/regress/expected/with_clause_optimizer.out b/src/test/regress/expected/with_clause_optimizer.out index a623153fc1e6..ed6ce8a5e3a1 100644 --- a/src/test/regress/expected/with_clause_optimizer.out +++ b/src/test/regress/expected/with_clause_optimizer.out @@ -1,3 +1,12 @@ +-- start_matchsubs +-- +-- m/ERROR: could not devise a plan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs drop table if exists with_test1 cascade; NOTICE: table "with_test1" does not exist, skipping create table with_test1 (i int, t text, value int) distributed by (i); @@ -3107,3 +3116,959 @@ cte2 as (10 rows) drop table with_dml; +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +NOTICE: table "with_dml_dr" does not exist, skipping +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(7 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Update on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i <= 5) + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + QUERY PLAN +--------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Delete on with_dml_dr + -> Seq Scan on with_dml_dr + Filter: (i > 0) + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + count +------- + 5 +(1 row) + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + QUERY PLAN +------------------------------------------------------------ + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(8 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + i +--- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +insert into t_repl values (1, 1), (2, 2), (3, 3); +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (t_repl.i = share0_ref1.i) + -> Seq Scan on t_repl + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 2:1 (slice1; segments: 2) + -> Hash Join + Hash Cond: (t_repl.i = share0_ref1.i) + -> Seq Scan on t_repl + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + count +------- + 3 +(1 row) + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (with_dml_dr_seg2.i = share0_ref1.i) + -> Gather Motion 1:1 (slice1; segments: 1) + -> Seq Scan on with_dml_dr_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + count +------- + 5 +(1 row) + +drop table with_dml_dr_seg2; +drop table t_repl; +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Aggregate + -> Hash Right Join + Hash Cond: (((random() * '0'::double precision)) = (share0_ref1.i)::double precision) + -> Function Scan on generate_series + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (gp_segment_configuration.port = share0_ref1.i) + -> Seq Scan on gp_segment_configuration + -> Hash + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + QUERY PLAN +------------------------------------------------------------------ + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (share0_ref1.i = a.a) + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + -> Hash + -> Function Scan on generate_series a + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + count +------- + 5 +(1 row) + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +NOTICE: table "t_hashed" does not exist, skipping +drop table if exists t_strewn; +NOTICE: table "t_strewn" does not exist, skipping +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t_hashed.i = share0_ref1.i) + -> Seq Scan on t_hashed + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (t_hashed.i = share0_ref1.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t_hashed + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------------------ + Finalize Aggregate + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate + -> Hash Join + Hash Cond: (t_strewn.i = share0_ref1.i) + -> Seq Scan on t_strewn + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Right Join + Hash Cond: (t_strewn.i = share0_ref1.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t_strewn + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + count +------- + 5 +(1 row) + +drop table t_strewn; +drop table t_hashed; +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (t_hashed_seg2.i = share0_ref1.i) + -> Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t_hashed_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + count +------- + 5 +(1 row) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + QUERY PLAN +------------------------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (t_strewn_seg2.i = share0_ref1.i) + -> Gather Motion 2:1 (slice1; segments: 2) + -> Seq Scan on t_strewn_seg2 + -> Hash + -> Explicit Gather Motion 3:1 (slice2; segments: 3) + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + count +------- + 5 +(1 row) + +drop table t_strewn_seg2; +drop table t_hashed_seg2; +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + QUERY PLAN +-------------------------------------------------------------------------- + Aggregate + -> Explicit Gather Motion 3:1 (slice1; segments: 3) + -> Hash Join + Hash Cond: (share0_ref2.i = share0_ref1.i) + -> Shared Scan (share slice:id 1:0) + -> Hash + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i_1 + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + count +------- + 5 +(1 row) + +reset gp_cte_sharing; +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); +ERROR: could not devise a plan (cdbpath.c:2725) +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; +ERROR: could not devise a plan (createplan.c:2949) +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; +ERROR: could not build Motion path (cdbpath.c:637) +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); +ERROR: could not devise a plan (createplan.c:2279) +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); +ERROR: could not devise a plan (cdbpath.c:2725) +drop table t_repl; +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +NOTICE: table "t1" does not exist, skipping +drop table if exists with_dml_dr_seg2; +NOTICE: table "with_dml_dr_seg2" does not exist, skipping +--end_ignore +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t1 select i, i from generate_series(1, 6) i; +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Hash Semi Join + Hash Cond: (t1.i = cte.i) + -> Seq Scan on t1 + -> Hash + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(13 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + QUERY PLAN +-------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: t1.i + -> Sort + Sort Key: t1.i + -> Seq Scan on t1 + Filter: (SubPlan 1) + SubPlan 1 + -> Result + Filter: (cte.i = t1.j) + -> Materialize + -> Subquery Scan on cte + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(15 rows) + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, can be handled by Explicit Gather Motion. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + QUERY PLAN +------------------------------------------------------------------------------ + Sort + Sort Key: t1.i + -> Hash Semi Join + Hash Cond: (t1.i = cte.i) + -> Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on t1 + -> Hash + -> Explicit Gather Motion 2:1 (slice2; segments: 2) + -> Subquery Scan on cte + -> Shared Scan (share slice:id 2:0) + -> Insert on with_dml_dr_seg2 + -> Function Scan on generate_series i + Optimizer: Postgres-based planner +(13 rows) + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; +ERROR: could not parallelize SubPlan (cdbllize.c:598) +drop table t1; +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +NOTICE: table "t_repl" does not exist, skipping +drop table if exists t_repl_seg2; +NOTICE: table "t_repl_seg2" does not exist, skipping +--end_ignore +create table t_repl (i int, j int) distributed replicated; +select gp_debug_set_create_table_default_numsegments(2); + gp_debug_set_create_table_default_numsegments +----------------------------------------------- + 2 +(1 row) + +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + gp_debug_reset_create_table_default_numsegments +------------------------------------------------- + +(1 row) + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +---------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + QUERY PLAN +--------------------------------------------------------------- + Explicit Gather Motion 3:1 (slice1; segments: 3) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr + -> Result + -> Broadcast Motion 1:3 (slice2; segments: 1) + -> Seq Scan on t_repl_seg2 + Optimizer: Postgres-based planner +(11 rows) + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + QUERY PLAN +---------------------------------------------------- + Explicit Gather Motion 2:1 (slice1; segments: 2) + Merge Key: share0_ref1.i + -> Sort + Sort Key: share0_ref1.i + -> Append + -> Shared Scan (share slice:id 1:0) + -> Insert on with_dml_dr_seg2 + -> Result + -> Seq Scan on t_repl + Optimizer: Postgres-based planner +(10 rows) + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + i | j +---+--- + 1 | 1 + 2 | 2 +(2 rows) + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr; diff --git a/src/test/regress/sql/with_clause.sql b/src/test/regress/sql/with_clause.sql index 8bf83de7d8ff..db7fc8952f4d 100644 --- a/src/test/regress/sql/with_clause.sql +++ b/src/test/regress/sql/with_clause.sql @@ -1,3 +1,16 @@ +-- start_matchsubs +-- +-- m/ERROR: could not devise a plan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- m/ERROR: could not parallelize SubPlan \([a-z]+\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs +-- start_ignore +create extension if not exists gp_debug_numsegments; +-- end_ignore + drop table if exists with_test1 cascade; create table with_test1 (i int, t text, value int) distributed by (i); insert into with_test1 select i%10, 'text' || i%20, i%30 from generate_series(0, 99) i; @@ -828,3 +841,509 @@ cte2 as ) select * from cte2; drop table with_dml; + +-- Test various SELECT statements from CTE with +-- modifying DML operations over replicated tables +--start_ignore +drop table if exists with_dml_dr; +--end_ignore +create table with_dml_dr(i int, j int) distributed replicated; + +-- Test plain SELECT from CTE with modifying DML queries on replicated table. +-- Explicit Gather Motion should present at the top of the plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte; + +explain (costs off) +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + +with cte as ( + update with_dml_dr + set j = j + 1 where i <= 5 + returning j +) select count(*) from cte; + +explain (costs off) +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + +with cte as ( + delete from with_dml_dr where i > 0 + returning i +) select count(*) from cte; + +-- Test ORDER BY clause is applied correctly to the result of modifying +-- CTE over replicated table. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select * from cte order by i; + +-- Test join operations between CTE conaining various modifying DML operations +-- over replicated table and other tables. Ensure that CdbLocusType_Replicated +-- is compatible with other type of locuses during joins. +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral. +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +insert into t_repl values (1, 1), (2, 2), (3, 3); + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +-- Test join CdbLocusType_Replicated with CdbLocusType_SegmentGeneral +-- in case when relations are propagated on different number of segments. +--start_ignore +drop table if exists with_dml_dr_seg2; +--end_ignore +select gp_debug_set_create_table_default_numsegments(2); +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + + +-- SegmentGeneral's number of segments is larger than Replicated's, +-- the join is performed at number of segments of Replicated locus. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_repl using (i); + +-- SegmentGeneral's number of segments is less than Replicated's, +-- the join is performed at SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join with_dml_dr_seg2 using (i); + +drop table with_dml_dr_seg2; +drop table t_repl; + +-- Test join CdbLocusType_Replicated with CdbLocusType_SingleQE. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join + (select random() * 0 v from generate_series(1,5)) x on cte.i = x.v; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Entry. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(-5,-1) i + returning i +) select count(*) from cte left join gp_segment_configuration on cte.i = port; + +-- Test join CdbLocusType_Replicated with CdbLocusType_General. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i) +select count(*) from cte join +(select a from generate_series(1,5) a) x on cte.i = x.a; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed +-- and CdbLocusType_Strewn. +--start_ignore +drop table if exists t_hashed; +drop table if exists t_strewn; +--end_ignore +create table t_hashed (i int, j int) distributed by (i); +create table t_strewn (i int, j int) distributed randomly; +insert into t_hashed select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn select i, i * 2 from generate_series(1, 10) i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed on cte.i = t_hashed.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_hashed on cte.i = t_hashed.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn on cte.i = t_strewn.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte left join t_strewn on cte.i = t_strewn.i; + +drop table t_strewn; +drop table t_hashed; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Hashed and +-- CdbLocusType_Strewn in case when relations are propagated on +-- different number of segments. +select gp_debug_set_create_table_default_numsegments(2); +create table t_hashed_seg2 (i int, j int) distributed by (i); +create table t_strewn_seg2 (i int, j int) distributed randomly; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t_hashed_seg2 select i, i * 2 from generate_series(1, 10) i; +insert into t_strewn_seg2 select i, i * 2 from generate_series(1, 10) i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_hashed_seg2 on cte.i = t_hashed_seg2.i; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte join t_strewn_seg2 on cte.i = t_strewn_seg2.i; + +drop table t_strewn_seg2; +drop table t_hashed_seg2; + +-- Test join CdbLocusType_Replicated with CdbLocusType_Replicated. +-- Join can be performed correctly only when CTE is shared. +set gp_cte_sharing = 1; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i +) select count(*) from cte a join cte b using (i); + +reset gp_cte_sharing; + +-- Test prohibition of volatile functions applied to the +-- locus Replicated. The appropriate error should be thrown. +--start_ignore +drop table if exists t_repl; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +-- Prohibit volatile qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte where cte.j > random(); + +-- Prohibit volatile returning list +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j * random() +) select * from cte; + +-- Prohibit volatile targetlist. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, j * random() from cte; + +-- Prohibit volatile having qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select i, sum(j) from cte group by i having sum(j) > random(); + +-- Prohibit volatile join qualifications. +explain (costs off, verbose) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,5) i + returning i, j +) select * from cte join t_repl on cte.i = t_repl.j * random(); + +drop table t_repl; + +-- Test that node with locus Replicated is not boradcasted inside +-- a correlated/uncorrlated SubPlan. In case of different number of +-- segments between replicated node inside the SubPlan and main plan +-- the proper error should be thrown. +--start_ignore +drop table if exists t1; +drop table if exists with_dml_dr_seg2; +--end_ignore + +create table t1 (i int, j int) distributed by (i); +select gp_debug_set_create_table_default_numsegments(2); +create table with_dml_dr_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t1 select i, i from generate_series(1, 6) i; + +-- Case when number of segments is equal, no Broadcast at the top of CTE plan. +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +explain (costs off) +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +with cte as ( + insert into with_dml_dr + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, can be handled by Explicit Gather Motion. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte) +order by 1; + +-- Case with unequal number of segments between replicated node inside the +-- SubPlan and main plan, the error should be thrown. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + select i, i * 100 from generate_series(1,6) i + returning i, j +) select * from t1 +where t1.i in (select i from cte where cte.i = t1.j) +order by 1; + +drop table t1; + +-- Test UNION ALL command when combining SegmentGeneral locus and Replicated. +--start_ignore +drop table if exists t_repl; +drop table if exists t_repl_seg2; +--end_ignore +create table t_repl (i int, j int) distributed replicated; + +select gp_debug_set_create_table_default_numsegments(2); +create table t_repl_seg2 (i int, j int) distributed replicated; +select gp_debug_reset_create_table_default_numsegments(); + +insert into t_repl values (2, 2); +insert into t_repl_seg2 values (2, 2); + +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +-- Case when SegmentGeneral is originally propagated at less number +-- of segments. +explain (costs off) +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + +with cte as ( + insert into with_dml_dr + values (1,1) + returning i, j +) select * from cte union all select * from t_repl_seg2 +order by 1; + +-- Case when final number of segments is aligned to Replicated subplan. +explain (costs off) +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +with cte as ( + insert into with_dml_dr_seg2 + values (1,1) + returning i, j +) select * from cte union all select * from t_repl +order by 1; + +drop table t_repl_seg2; +drop table t_repl; +drop table with_dml_dr_seg2; +drop table with_dml_dr;