diff --git a/benchmarks/expected-plans/q1.txt b/benchmarks/expected-plans/q1.txt index e02c1402a39f..c890796699f8 100644 --- a/benchmarks/expected-plans/q1.txt +++ b/benchmarks/expected-plans/q1.txt @@ -1,23 +1,23 @@ -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST | -| | Projection: lineitem.l_returnflag, lineitem.l_linestatus, SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order | -| | Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(CAST(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount AS Decimal128(38, 6)) * CAST(Decimal128(Some(100),23,2) + CAST(lineitem.l_tax AS Decimal128(23, 2)) AS Decimal128(38, 6))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]] | -| | Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus | -| | Filter: lineitem.l_shipdate <= Date32("10471") | -| | TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] | -| physical_plan | SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] | -| | SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] | -| | ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, SUM(lineitem.l_quantity)@2 as sum_qty, SUM(lineitem.l_extendedprice)@3 as sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, AVG(lineitem.l_quantity)@6 as avg_qty, AVG(lineitem.l_extendedprice)@7 as avg_price, AVG(lineitem.l_discount)@8 as avg_disc, COUNT(UInt8(1))@9 as count_order] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] | -| | ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(38, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(38, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@6 <= 10471 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: lineitem.l_returnflag ASC NULLS LAST, lineitem.l_linestatus ASC NULLS LAST | +| | Projection: lineitem.l_returnflag, lineitem.l_linestatus, SUM(lineitem.l_quantity) AS sum_qty, SUM(lineitem.l_extendedprice) AS sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax) AS sum_charge, AVG(lineitem.l_quantity) AS avg_qty, AVG(lineitem.l_extendedprice) AS avg_price, AVG(lineitem.l_discount) AS avg_disc, COUNT(UInt8(1)) AS count_order | +| | Aggregate: groupBy=[[lineitem.l_returnflag, lineitem.l_linestatus]], aggr=[[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(PROMOTE_PRECISION(CAST(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount AS Decimal128(38, 4))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) + PROMOTE_PRECISION(CAST(lineitem.l_tax AS Decimal128(23, 2))) AS Decimal128(38, 4)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))]] | +| | Projection: PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))) AS PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_tax, lineitem.l_returnflag, lineitem.l_linestatus | +| | Filter: lineitem.l_shipdate <= Date32("10471") | +| | TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag, l_linestatus, l_shipdate] | +| physical_plan | SortPreservingMergeExec: [l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] | +| | SortExec: expr=[l_returnflag@0 ASC NULLS LAST,l_linestatus@1 ASC NULLS LAST] | +| | ProjectionExec: expr=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus, SUM(lineitem.l_quantity)@2 as sum_qty, SUM(lineitem.l_extendedprice)@3 as sum_base_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@4 as sum_disc_price, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax)@5 as sum_charge, AVG(lineitem.l_quantity)@6 as avg_qty, AVG(lineitem.l_extendedprice)@7 as avg_price, AVG(lineitem.l_discount)@8 as avg_disc, COUNT(UInt8(1))@9 as count_order] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_returnflag@0 as l_returnflag, l_linestatus@1 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_returnflag", index: 0 }, Column { name: "l_linestatus", index: 1 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[l_returnflag@5 as l_returnflag, l_linestatus@6 as l_linestatus], aggr=[SUM(lineitem.l_quantity), SUM(lineitem.l_extendedprice), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount * Int64(1) + lineitem.l_tax), AVG(lineitem.l_quantity), AVG(lineitem.l_extendedprice), AVG(lineitem.l_discount), COUNT(UInt8(1))] | +| | ProjectionExec: expr=[PROMOTE_PRECISION(CAST(l_extendedprice@1 AS Decimal128(23, 2))) * PROMOTE_PRECISION(Some(100),23,2 - PROMOTE_PRECISION(CAST(l_discount@2 AS Decimal128(23, 2)))) as PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice, l_quantity@0 as l_quantity, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_tax@3 as l_tax, l_returnflag@4 as l_returnflag, l_linestatus@5 as l_linestatus] | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@6 <= 10471 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q10.txt b/benchmarks/expected-plans/q10.txt index 794ab50938ef..e9aa5271070a 100644 --- a/benchmarks/expected-plans/q10.txt +++ b/benchmarks/expected-plans/q10.txt @@ -1,63 +1,63 @@ -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: revenue DESC NULLS FIRST | -| | Projection: customer.c_custkey, customer.c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment | -| | Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | -| | Inner Join: customer.c_nationkey = nation.n_nationkey | -| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | -| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey | -| | Inner Join: customer.c_custkey = orders.o_custkey | -| | TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] | -| | Projection: orders.o_orderkey, orders.o_custkey | -| | Filter: orders.o_orderdate >= Date32("8674") AND orders.o_orderdate < Date32("8766") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | -| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: lineitem.l_returnflag = Utf8("R") | -| | TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag] | -| | TableScan: nation projection=[n_nationkey, n_name] | -| physical_plan | SortPreservingMergeExec: [revenue@2 DESC] | -| | SortExec: expr=[revenue@2 DESC] | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] | -| | AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@10 as n_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@9 as l_extendedprice, l_discount@10 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 7 }, Column { name: "l_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, o_orderkey@7 as o_orderkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: o_orderdate@2 >= 8674 AND o_orderdate@2 < 8766 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_returnflag@3 = R | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: revenue DESC NULLS FIRST | +| | Projection: customer.c_custkey, customer.c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, customer.c_acctbal, nation.n_name, customer.c_address, customer.c_phone, customer.c_comment | +| | Aggregate: groupBy=[[customer.c_custkey, customer.c_name, customer.c_acctbal, customer.c_phone, nation.n_name, customer.c_address, customer.c_comment]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | +| | Inner Join: customer.c_nationkey = nation.n_nationkey | +| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | +| | Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey | +| | Inner Join: customer.c_custkey = orders.o_custkey | +| | TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment] | +| | Projection: orders.o_orderkey, orders.o_custkey | +| | Filter: orders.o_orderdate >= Date32("8674") AND orders.o_orderdate < Date32("8766") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | +| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: lineitem.l_returnflag = Utf8("R") | +| | TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag] | +| | TableScan: nation projection=[n_nationkey, n_name] | +| physical_plan | SortPreservingMergeExec: [revenue@2 DESC] | +| | SortExec: expr=[revenue@2 DESC] | +| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment] | +| | AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }, Column { name: "c_name", index: 1 }, Column { name: "c_acctbal", index: 2 }, Column { name: "c_phone", index: 3 }, Column { name: "n_name", index: 4 }, Column { name: "c_address", index: 5 }, Column { name: "c_comment", index: 6 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@7 as l_extendedprice, l_discount@8 as l_discount, n_name@10 as n_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, l_extendedprice@9 as l_extendedprice, l_discount@10 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 7 }, Column { name: "l_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 7 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_address@2 as c_address, c_nationkey@3 as c_nationkey, c_phone@4 as c_phone, c_acctbal@5 as c_acctbal, c_comment@6 as c_comment, o_orderkey@7 as o_orderkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: o_orderdate@2 >= 8674 AND o_orderdate@2 < 8766 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_returnflag@3 = R | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q11.txt b/benchmarks/expected-plans/q11.txt index a403a31e8b2d..0a6627494431 100644 --- a/benchmarks/expected-plans/q11.txt +++ b/benchmarks/expected-plans/q11.txt @@ -1,89 +1,89 @@ -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: value DESC NULLS FIRST | -| | Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value | -| | Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15)) | -| | CrossJoin: | -| | Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]] | -| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = Utf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name] | -| | SubqueryAlias: __scalar_sq_1 | -| | Projection: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64) * Float64(0.0001) AS __value | -| | Aggregate: groupBy=[[]], aggr=[[SUM(CAST(partsupp.ps_supplycost AS Decimal128(26, 2)) * CAST(partsupp.ps_availqty AS Decimal128(26, 2)))]] | -| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | -| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | -| | TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = Utf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name] | -| physical_plan | SortExec: expr=[value@1 DESC] | -| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > CAST(__value@2 AS Decimal128(38, 15)) | -| | CrossJoinExec | -| | CoalescePartitionsExec | -| | AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | -| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@1 = GERMANY | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | ProjectionExec: expr=[CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 as __value] | -| | AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | -| | CoalescePartitionsExec | -| | AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | -| | ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@1 = GERMANY | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: value DESC NULLS FIRST | +| | Projection: partsupp.ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS value | +| | Filter: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Decimal128(38, 15)) > CAST(__scalar_sq_1.__value AS Decimal128(38, 15)) | +| | CrossJoin: | +| | Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[SUM(PROMOTE_PRECISION(CAST(partsupp.ps_supplycost AS Decimal128(15, 2))) * PROMOTE_PRECISION(CAST(partsupp.ps_availqty AS Decimal128(15, 2))))]] | +| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: partsupp.ps_partkey, partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Projection: nation.n_nationkey | +| | Filter: nation.n_name = Utf8("GERMANY") | +| | TableScan: nation projection=[n_nationkey, n_name] | +| | SubqueryAlias: __scalar_sq_1 | +| | Projection: PROMOTE_PRECISION(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS Float64)) * Float64(0.0001) AS __value | +| | Aggregate: groupBy=[[]], aggr=[[SUM(PROMOTE_PRECISION(CAST(partsupp.ps_supplycost AS Decimal128(15, 2))) * PROMOTE_PRECISION(CAST(partsupp.ps_availqty AS Decimal128(15, 2))))]] | +| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: partsupp.ps_availqty, partsupp.ps_supplycost, supplier.s_nationkey | +| | Inner Join: partsupp.ps_suppkey = supplier.s_suppkey | +| | TableScan: partsupp projection=[ps_suppkey, ps_availqty, ps_supplycost] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Projection: nation.n_nationkey | +| | Filter: nation.n_name = Utf8("GERMANY") | +| | TableScan: nation projection=[n_nationkey, n_name] | +| physical_plan | SortExec: expr=[value@1 DESC] | +| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 as value] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@1 AS Decimal128(38, 15)) > CAST(__value@2 AS Decimal128(38, 15)) | +| | CrossJoinExec | +| | CoalescePartitionsExec | +| | AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "ps_partkey", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | +| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[ps_partkey@0 as ps_partkey, ps_availqty@2 as ps_availqty, ps_supplycost@3 as ps_supplycost, s_nationkey@5 as s_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@1 = GERMANY | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | ProjectionExec: expr=[PROMOTE_PRECISION(CAST(SUM(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64)) * 0.0001 as __value] | +| | AggregateExec: mode=Final, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[SUM(partsupp.ps_supplycost * partsupp.ps_availqty)] | +| | ProjectionExec: expr=[ps_availqty@0 as ps_availqty, ps_supplycost@1 as ps_supplycost] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[ps_availqty@1 as ps_availqty, ps_supplycost@2 as ps_supplycost, s_nationkey@4 as s_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "ps_suppkey", index: 0 }, Column { name: "s_suppkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@1 = GERMANY | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q14.txt b/benchmarks/expected-plans/q14.txt index b1b8a423d0b1..7713f20d54d5 100644 --- a/benchmarks/expected-plans/q14.txt +++ b/benchmarks/expected-plans/q14.txt @@ -1,30 +1,30 @@ -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: Float64(100) * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64) AS promo_revenue | -| | Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, part.p_type | -| | Inner Join: lineitem.l_partkey = part.p_partkey | -| | Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: lineitem.l_shipdate >= Date32("9374") AND lineitem.l_shipdate < Date32("9404") | -| | TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate] | -| | TableScan: part projection=[p_partkey, p_type] | -| physical_plan | ProjectionExec: expr=[100 * CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64) / CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64) as promo_revenue] | -| | AggregateExec: mode=Final, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalescePartitionsExec | -| | AggregateExec: mode=Partial, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | ProjectionExec: expr=[CAST(l_extendedprice@1 AS Decimal128(38, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(38, 4)) as CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)CAST(lineitem.l_extendedprice AS Decimal128(38, 4))lineitem.l_extendedprice, p_type@4 as p_type] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@3 >= 9374 AND l_shipdate@3 < 9404 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Projection: PROMOTE_PRECISION(CAST(Float64(100) * PROMOTE_PRECISION(CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END) AS Float64)) AS Float64)) / PROMOTE_PRECISION(CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS Float64)) AS promo_revenue | +| | Aggregate: groupBy=[[]], aggr=[[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice AS lineitem.l_extendedprice * Decimal128(Some(100),23,2) - lineitem.l_discount) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))) AS PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice, part.p_type | +| | Inner Join: lineitem.l_partkey = part.p_partkey | +| | Projection: lineitem.l_partkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: lineitem.l_shipdate >= Date32("9374") AND lineitem.l_shipdate < Date32("9404") | +| | TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount, l_shipdate] | +| | TableScan: part projection=[p_partkey, p_type] | +| physical_plan | ProjectionExec: expr=[PROMOTE_PRECISION(100 * PROMOTE_PRECISION(CAST(SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END)@0 AS Float64))) / PROMOTE_PRECISION(CAST(SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 AS Float64)) as promo_revenue] | +| | AggregateExec: mode=Final, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[SUM(CASE WHEN part.p_type LIKE Utf8("PROMO%") THEN lineitem.l_extendedprice * Int64(1) - lineitem.l_discount ELSE Int64(0) END), SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | ProjectionExec: expr=[PROMOTE_PRECISION(CAST(l_extendedprice@1 AS Decimal128(23, 2))) * PROMOTE_PRECISION(Some(100),23,2 - PROMOTE_PRECISION(CAST(l_discount@2 AS Decimal128(23, 2)))) as PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2)))CAST(lineitem.l_discount AS Decimal128(23, 2))lineitem.l_discountDecimal128(Some(100),23,2)PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2)))CAST(lineitem.l_extendedprice AS Decimal128(23, 2))lineitem.l_extendedprice, p_type@4 as p_type] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_partkey", index: 0 }, Column { name: "p_partkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@3 >= 9374 AND l_shipdate@3 < 9404 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q15.txt b/benchmarks/expected-plans/q15.txt index 50cfe85418e1..73feaa008b58 100644 --- a/benchmarks/expected-plans/q15.txt +++ b/benchmarks/expected-plans/q15.txt @@ -1,66 +1,66 @@ -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: supplier.s_suppkey ASC NULLS LAST | -| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue | -| | Inner Join: revenue0.total_revenue = __scalar_sq_1.__value | -| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue | -| | Inner Join: supplier.s_suppkey = revenue0.supplier_no | -| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone] | -| | SubqueryAlias: revenue0 | -| | Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue | -| | Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") | -| | TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] | -| | SubqueryAlias: __scalar_sq_1 | -| | Projection: MAX(revenue0.total_revenue) AS __value | -| | Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]] | -| | SubqueryAlias: revenue0 | -| | Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue | -| | Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") | -| | TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] | -| physical_plan | SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST] | -| | SortExec: expr=[s_suppkey@0 ASC NULLS LAST] | -| | ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@4 as total_revenue] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "__value", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "total_revenue", index: 4 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@5 as total_revenue] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "supplier_no", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | ProjectionExec: expr=[l_suppkey@0 as supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "__value", index: 0 }], 2), input_partitions=1 | -| | ProjectionExec: expr=[MAX(revenue0.total_revenue)@0 as __value] | -| | AggregateExec: mode=Final, gby=[], aggr=[MAX(revenue0.total_revenue)] | -| | CoalescePartitionsExec | -| | AggregateExec: mode=Partial, gby=[], aggr=[MAX(revenue0.total_revenue)] | -| | ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: supplier.s_suppkey ASC NULLS LAST | +| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue | +| | Inner Join: revenue0.total_revenue = __scalar_sq_1.__value | +| | Projection: supplier.s_suppkey, supplier.s_name, supplier.s_address, supplier.s_phone, revenue0.total_revenue | +| | Inner Join: supplier.s_suppkey = revenue0.supplier_no | +| | TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone] | +| | SubqueryAlias: revenue0 | +| | Projection: lineitem.l_suppkey AS supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue | +| | Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") | +| | TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] | +| | SubqueryAlias: __scalar_sq_1 | +| | Projection: MAX(revenue0.total_revenue) AS __value | +| | Aggregate: groupBy=[[]], aggr=[[MAX(revenue0.total_revenue)]] | +| | SubqueryAlias: revenue0 | +| | Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue | +| | Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: lineitem.l_shipdate >= Date32("9496") AND lineitem.l_shipdate < Date32("9587") | +| | TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate] | +| physical_plan | SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST] | +| | SortExec: expr=[s_suppkey@0 ASC NULLS LAST] | +| | ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@4 as total_revenue] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "total_revenue", index: 4 }, Column { name: "__value", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "total_revenue", index: 4 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[s_suppkey@0 as s_suppkey, s_name@1 as s_name, s_address@2 as s_address, s_phone@3 as s_phone, total_revenue@5 as total_revenue] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "supplier_no", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | ProjectionExec: expr=[l_suppkey@0 as supplier_no, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "__value", index: 0 }], 2), input_partitions=1 | +| | ProjectionExec: expr=[MAX(revenue0.total_revenue)@0 as __value] | +| | AggregateExec: mode=Final, gby=[], aggr=[MAX(revenue0.total_revenue)] | +| | CoalescePartitionsExec | +| | AggregateExec: mode=Partial, gby=[], aggr=[MAX(revenue0.total_revenue)] | +| | ProjectionExec: expr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_suppkey@0 as l_suppkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@3 >= 9496 AND l_shipdate@3 < 9587 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q17.txt b/benchmarks/expected-plans/q17.txt index 998450328cfa..688bde9bd2e4 100644 --- a/benchmarks/expected-plans/q17.txt +++ b/benchmarks/expected-plans/q17.txt @@ -1,7 +1,7 @@ +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | plan_type | plan | +---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Projection: CAST(SUM(lineitem.l_extendedprice) AS Float64) / Float64(7) AS avg_yearly | +| logical_plan | Projection: PROMOTE_PRECISION(CAST(SUM(lineitem.l_extendedprice) AS Float64)) / Float64(7) AS avg_yearly | | | Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice)]] | | | Projection: lineitem.l_extendedprice | | | Filter: CAST(lineitem.l_quantity AS Decimal128(30, 15)) < CAST(__scalar_sq_1.__value AS Decimal128(30, 15)) AND __scalar_sq_1.l_partkey = lineitem.l_partkey | @@ -14,10 +14,10 @@ | | Filter: part.p_brand = Utf8("Brand#23") AND part.p_container = Utf8("MED BOX") | | | TableScan: part projection=[p_partkey, p_brand, p_container] | | | SubqueryAlias: __scalar_sq_1 | -| | Projection: lineitem.l_partkey, Float64(0.2) * CAST(AVG(lineitem.l_quantity) AS Float64) AS __value | +| | Projection: lineitem.l_partkey, Float64(0.2) * PROMOTE_PRECISION(CAST(AVG(lineitem.l_quantity) AS Float64)) AS __value | | | Aggregate: groupBy=[[lineitem.l_partkey]], aggr=[[AVG(lineitem.l_quantity)]] | | | TableScan: lineitem projection=[l_partkey, l_quantity] | -| physical_plan | ProjectionExec: expr=[CAST(SUM(lineitem.l_extendedprice)@0 AS Float64) / 7 as avg_yearly] | +| physical_plan | ProjectionExec: expr=[PROMOTE_PRECISION(CAST(SUM(lineitem.l_extendedprice)@0 AS Float64)) / 7 as avg_yearly] | | | AggregateExec: mode=Final, gby=[], aggr=[SUM(lineitem.l_extendedprice)] | | | CoalescePartitionsExec | | | AggregateExec: mode=Partial, gby=[], aggr=[SUM(lineitem.l_extendedprice)] | @@ -44,7 +44,7 @@ | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: p_brand@1 = Brand#23 AND p_container@2 = MED BOX | | | MemoryExec: partitions=0, partition_sizes=[] | -| | ProjectionExec: expr=[l_partkey@0 as l_partkey, 0.2 * CAST(AVG(lineitem.l_quantity)@1 AS Float64) as __value] | +| | ProjectionExec: expr=[l_partkey@0 as l_partkey, 0.2 * PROMOTE_PRECISION(CAST(AVG(lineitem.l_quantity)@1 AS Float64)) as __value] | | | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey], aggr=[AVG(lineitem.l_quantity)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }], 2), input_partitions=2 | diff --git a/benchmarks/expected-plans/q19.txt b/benchmarks/expected-plans/q19.txt index 610238203282..d2f66885ce83 100644 --- a/benchmarks/expected-plans/q19.txt +++ b/benchmarks/expected-plans/q19.txt @@ -2,7 +2,7 @@ | plan_type | plan | +---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | logical_plan | Projection: SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue | -| | Aggregate: groupBy=[[]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Aggregate: groupBy=[[]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | | | Projection: lineitem.l_extendedprice, lineitem.l_discount | | | Filter: part.p_brand = Utf8("Brand#12") AND part.p_container IN ([Utf8("SM CASE"), Utf8("SM BOX"), Utf8("SM PACK"), Utf8("SM PKG")]) AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_container IN ([Utf8("MED BAG"), Utf8("MED BOX"), Utf8("MED PKG"), Utf8("MED PACK")]) AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_container IN ([Utf8("LG CASE"), Utf8("LG BOX"), Utf8("LG PACK"), Utf8("LG PKG")]) AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15) | | | Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, part.p_brand, part.p_size, part.p_container | diff --git a/benchmarks/expected-plans/q20.txt b/benchmarks/expected-plans/q20.txt index a3209f3e2d7d..cb78a4e0e05d 100644 --- a/benchmarks/expected-plans/q20.txt +++ b/benchmarks/expected-plans/q20.txt @@ -22,7 +22,7 @@ | | Filter: part.p_name LIKE Utf8("forest%") | | | TableScan: part projection=[p_partkey, p_name] | | | SubqueryAlias: __scalar_sq_1 | -| | Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * CAST(SUM(lineitem.l_quantity) AS Float64) AS __value | +| | Projection: lineitem.l_partkey, lineitem.l_suppkey, Float64(0.5) * PROMOTE_PRECISION(CAST(SUM(lineitem.l_quantity) AS Float64)) AS __value | | | Aggregate: groupBy=[[lineitem.l_partkey, lineitem.l_suppkey]], aggr=[[SUM(lineitem.l_quantity)]] | | | Projection: lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity | | | Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") | @@ -71,7 +71,7 @@ | | CoalesceBatchesExec: target_batch_size=8192 | | | FilterExec: p_name@1 LIKE forest% | | | MemoryExec: partitions=0, partition_sizes=[] | -| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * CAST(SUM(lineitem.l_quantity)@2 AS Float64) as __value] | +| | ProjectionExec: expr=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey, 0.5 * PROMOTE_PRECISION(CAST(SUM(lineitem.l_quantity)@2 AS Float64)) as __value] | | | AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[SUM(lineitem.l_quantity)] | | | CoalesceBatchesExec: target_batch_size=8192 | | | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 0 }, Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | diff --git a/benchmarks/expected-plans/q21.txt b/benchmarks/expected-plans/q21.txt index 47285e1c16e5..d91666018b82 100644 --- a/benchmarks/expected-plans/q21.txt +++ b/benchmarks/expected-plans/q21.txt @@ -1,96 +1,96 @@ -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST | -| | Projection: supplier.s_name, COUNT(UInt8(1)) AS numwait | -| | Aggregate: groupBy=[[supplier.s_name]], aggr=[[COUNT(UInt8(1))]] | -| | Projection: supplier.s_name | -| | LeftAnti Join: l1.l_orderkey = l3.l_orderkey Filter: l3.l_suppkey != l1.l_suppkey | -| | LeftSemi Join: l1.l_orderkey = l2.l_orderkey Filter: l2.l_suppkey != l1.l_suppkey | -| | Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: l1.l_orderkey = orders.o_orderkey | -| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | -| | Inner Join: supplier.s_suppkey = l1.l_suppkey | -| | TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] | -| | SubqueryAlias: l1 | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | -| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] | -| | Projection: orders.o_orderkey | -| | Filter: orders.o_orderstatus = Utf8("F") | -| | TableScan: orders projection=[o_orderkey, o_orderstatus] | -| | Projection: nation.n_nationkey | -| | Filter: nation.n_name = Utf8("SAUDI ARABIA") | -| | TableScan: nation projection=[n_nationkey, n_name] | -| | SubqueryAlias: l2 | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey] | -| | SubqueryAlias: l3 | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | -| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] | -| physical_plan | SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST] | -| | SortExec: expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST] | -| | ProjectionExec: expr=[s_name@0 as s_name, COUNT(UInt8(1))@1 as numwait] | -| | AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_name", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] | -| | ProjectionExec: expr=[s_name@0 as s_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 } } | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[s_name@0 as s_name, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 1 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[s_name@0 as s_name, s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 2 }, Column { name: "o_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[s_name@1 as s_name, s_nationkey@2 as s_nationkey, l_orderkey@3 as l_orderkey, l_suppkey@4 as l_suppkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_receiptdate@3 > l_commitdate@2 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: o_orderstatus@1 = F | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@1 = SAUDI ARABIA | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_receiptdate@3 > l_commitdate@2 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: numwait DESC NULLS FIRST, supplier.s_name ASC NULLS LAST | +| | Projection: supplier.s_name, COUNT(UInt8(1)) AS numwait | +| | Aggregate: groupBy=[[supplier.s_name]], aggr=[[COUNT(UInt8(1))]] | +| | Projection: supplier.s_name | +| | LeftAnti Join: l1.l_orderkey = l3.l_orderkey Filter: l3.l_suppkey != l1.l_suppkey | +| | LeftSemi Join: l1.l_orderkey = l2.l_orderkey Filter: l2.l_suppkey != l1.l_suppkey | +| | Projection: supplier.s_name, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: l1.l_orderkey = orders.o_orderkey | +| | Projection: supplier.s_name, supplier.s_nationkey, l1.l_orderkey, l1.l_suppkey | +| | Inner Join: supplier.s_suppkey = l1.l_suppkey | +| | TableScan: supplier projection=[s_suppkey, s_name, s_nationkey] | +| | SubqueryAlias: l1 | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | +| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] | +| | Projection: orders.o_orderkey | +| | Filter: orders.o_orderstatus = Utf8("F") | +| | TableScan: orders projection=[o_orderkey, o_orderstatus] | +| | Projection: nation.n_nationkey | +| | Filter: nation.n_name = Utf8("SAUDI ARABIA") | +| | TableScan: nation projection=[n_nationkey, n_name] | +| | SubqueryAlias: l2 | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey] | +| | SubqueryAlias: l3 | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey | +| | Filter: lineitem.l_receiptdate > lineitem.l_commitdate | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate] | +| physical_plan | SortPreservingMergeExec: [numwait@1 DESC,s_name@0 ASC NULLS LAST] | +| | SortExec: expr=[numwait@1 DESC,s_name@0 ASC NULLS LAST] | +| | ProjectionExec: expr=[s_name@0 as s_name, COUNT(UInt8(1))@1 as numwait] | +| | AggregateExec: mode=FinalPartitioned, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_name", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[s_name@0 as s_name], aggr=[COUNT(UInt8(1))] | +| | ProjectionExec: expr=[s_name@0 as s_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=LeftAnti, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 }, data_type: None } | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })], filter=BinaryExpr { left: Column { name: "l_suppkey", index: 1 }, op: NotEq, right: Column { name: "l_suppkey", index: 0 }, data_type: None } | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[s_name@0 as s_name, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 1 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 1 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[s_name@0 as s_name, s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_suppkey@3 as l_suppkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 2 }, Column { name: "o_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 2 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[s_name@1 as s_name, s_nationkey@2 as s_nationkey, l_orderkey@3 as l_orderkey, l_suppkey@4 as l_suppkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_receiptdate@3 > l_commitdate@2 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: o_orderstatus@1 = F | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[n_nationkey@0 as n_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@1 = SAUDI ARABIA | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_suppkey@1 as l_suppkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_receiptdate@3 > l_commitdate@2 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q3.txt b/benchmarks/expected-plans/q3.txt index 36ddcc8fdd15..7a4818686413 100644 --- a/benchmarks/expected-plans/q3.txt +++ b/benchmarks/expected-plans/q3.txt @@ -1,55 +1,55 @@ -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST | -| | Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority | -| | Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | -| | Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority | -| | Inner Join: customer.c_custkey = orders.o_custkey | -| | Projection: customer.c_custkey | -| | Filter: customer.c_mktsegment = Utf8("BUILDING") | -| | TableScan: customer projection=[c_custkey, c_mktsegment] | -| | Filter: orders.o_orderdate < Date32("9204") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority] | -| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Filter: lineitem.l_shipdate > Date32("9204") | -| | TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] | -| physical_plan | SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] | -| | SortExec: expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] | -| | AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | ProjectionExec: expr=[o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority, l_orderkey@3 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[o_orderkey@1 as o_orderkey, o_orderdate@3 as o_orderdate, o_shippriority@4 as o_shippriority] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[c_custkey@0 as c_custkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: c_mktsegment@1 = BUILDING | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: o_orderdate@2 < 9204 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@3 > 9204 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: revenue DESC NULLS FIRST, orders.o_orderdate ASC NULLS LAST | +| | Projection: lineitem.l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue, orders.o_orderdate, orders.o_shippriority | +| | Aggregate: groupBy=[[lineitem.l_orderkey, orders.o_orderdate, orders.o_shippriority]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: orders.o_orderdate, orders.o_shippriority, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | +| | Projection: orders.o_orderkey, orders.o_orderdate, orders.o_shippriority | +| | Inner Join: customer.c_custkey = orders.o_custkey | +| | Projection: customer.c_custkey | +| | Filter: customer.c_mktsegment = Utf8("BUILDING") | +| | TableScan: customer projection=[c_custkey, c_mktsegment] | +| | Filter: orders.o_orderdate < Date32("9204") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate, o_shippriority] | +| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Filter: lineitem.l_shipdate > Date32("9204") | +| | TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_shipdate] | +| physical_plan | SortPreservingMergeExec: [revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] | +| | SortExec: expr=[revenue@1 DESC,o_orderdate@2 ASC NULLS LAST] | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@3 as revenue, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority] | +| | AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey, o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderdate", index: 1 }, Column { name: "o_shippriority", index: 2 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[l_orderkey@2 as l_orderkey, o_orderdate@0 as o_orderdate, o_shippriority@1 as o_shippriority], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | ProjectionExec: expr=[o_orderdate@1 as o_orderdate, o_shippriority@2 as o_shippriority, l_orderkey@3 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 0 }, Column { name: "l_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[o_orderkey@1 as o_orderkey, o_orderdate@3 as o_orderdate, o_shippriority@4 as o_shippriority] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[c_custkey@0 as c_custkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: c_mktsegment@1 = BUILDING | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: o_orderdate@2 < 9204 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@3 > 9204 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q5.txt b/benchmarks/expected-plans/q5.txt index de6854eecd51..820048269633 100644 --- a/benchmarks/expected-plans/q5.txt +++ b/benchmarks/expected-plans/q5.txt @@ -1,85 +1,85 @@ -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: revenue DESC NULLS FIRST | -| | Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue | -| | Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | -| | Inner Join: nation.n_regionkey = region.r_regionkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey | -| | Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | -| | Projection: customer.c_nationkey, orders.o_orderkey | -| | Inner Join: customer.c_custkey = orders.o_custkey | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | Projection: orders.o_orderkey, orders.o_custkey | -| | Filter: orders.o_orderdate >= Date32("8766") AND orders.o_orderdate < Date32("9131") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = Utf8("ASIA") | -| | TableScan: region projection=[r_regionkey, r_name] | -| physical_plan | SortPreservingMergeExec: [revenue@1 DESC] | -| | SortExec: expr=[revenue@1 DESC] | -| | ProjectionExec: expr=[n_name@0 as n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_name", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@2 as n_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@4 as n_name, n_regionkey@5 as n_regionkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 }), (Column { name: "c_nationkey", index: 0 }, Column { name: "s_nationkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[c_nationkey@0 as c_nationkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[c_nationkey@1 as c_nationkey, o_orderkey@2 as o_orderkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: o_orderdate@2 >= 8766 AND o_orderdate@2 < 9131 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: r_name@1 = ASIA | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: revenue DESC NULLS FIRST | +| | Projection: nation.n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS revenue | +| | Aggregate: groupBy=[[nation.n_name]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2)))) AS SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]] | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name | +| | Inner Join: nation.n_regionkey = region.r_regionkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, nation.n_name, nation.n_regionkey | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey, customer.c_nationkey = supplier.s_nationkey | +| | Projection: customer.c_nationkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: orders.o_orderkey = lineitem.l_orderkey | +| | Projection: customer.c_nationkey, orders.o_orderkey | +| | Inner Join: customer.c_custkey = orders.o_custkey | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | Projection: orders.o_orderkey, orders.o_custkey | +| | Filter: orders.o_orderdate >= Date32("8766") AND orders.o_orderdate < Date32("9131") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | TableScan: nation projection=[n_nationkey, n_name, n_regionkey] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = Utf8("ASIA") | +| | TableScan: region projection=[r_regionkey, r_name] | +| physical_plan | SortPreservingMergeExec: [revenue@1 DESC] | +| | SortExec: expr=[revenue@1 DESC] | +| | ProjectionExec: expr=[n_name@0 as n_name, SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[n_name@0 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_name", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[n_name@2 as n_name], aggr=[SUM(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)] | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@2 as n_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, n_name@4 as n_name, n_regionkey@5 as n_regionkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 }), (Column { name: "c_nationkey", index: 0 }, Column { name: "s_nationkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }, Column { name: "c_nationkey", index: 0 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[c_nationkey@0 as c_nationkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_orderkey", index: 1 }, Column { name: "l_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[c_nationkey@1 as c_nationkey, o_orderkey@2 as o_orderkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_custkey", index: 0 }, Column { name: "o_custkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[o_orderkey@0 as o_orderkey, o_custkey@1 as o_custkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: o_orderdate@2 >= 8766 AND o_orderdate@2 < 9131 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }, Column { name: "s_nationkey", index: 1 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: r_name@1 = ASIA | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q6.txt b/benchmarks/expected-plans/q6.txt index d42b08f962ae..3319e3726c0e 100644 --- a/benchmarks/expected-plans/q6.txt +++ b/benchmarks/expected-plans/q6.txt @@ -2,7 +2,7 @@ | plan_type | plan | +---------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ | logical_plan | Projection: SUM(lineitem.l_extendedprice * lineitem.l_discount) AS revenue | -| | Aggregate: groupBy=[[]], aggr=[[SUM(lineitem.l_extendedprice * lineitem.l_discount)]] | +| | Aggregate: groupBy=[[]], aggr=[[SUM(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(15, 2))) * PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(15, 2))))]] | | | Projection: lineitem.l_extendedprice, lineitem.l_discount | | | Filter: lineitem.l_shipdate >= Date32("8766") AND lineitem.l_shipdate < Date32("9131") AND lineitem.l_discount >= Decimal128(Some(5),15,2) AND lineitem.l_discount <= Decimal128(Some(7),15,2) AND lineitem.l_quantity < Decimal128(Some(2400),15,2) | | | TableScan: lineitem projection=[l_quantity, l_extendedprice, l_discount, l_shipdate] | diff --git a/benchmarks/expected-plans/q7.txt b/benchmarks/expected-plans/q7.txt index 2f5d7fc663e9..e36e6f175c2c 100644 --- a/benchmarks/expected-plans/q7.txt +++ b/benchmarks/expected-plans/q7.txt @@ -1,93 +1,93 @@ -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST | -| | Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, SUM(shipping.volume) AS revenue | -| | Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[SUM(shipping.volume)]] | -| | SubqueryAlias: shipping | -| | Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume | -| | Filter: n1.n_name = Utf8("FRANCE") AND n2.n_name = Utf8("GERMANY") OR n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("FRANCE") | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, n1.n_name, n2.n_name | -| | Inner Join: customer.c_nationkey = n2.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name | -| | Inner Join: supplier.s_nationkey = n1.n_nationkey | -| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey | -| | Inner Join: orders.o_custkey = customer.c_custkey | -| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate | -| | Inner Join: supplier.s_suppkey = lineitem.l_suppkey | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Filter: lineitem.l_shipdate >= Date32("9131") AND lineitem.l_shipdate <= Date32("9861") | -| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate] | -| | TableScan: orders projection=[o_orderkey, o_custkey] | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | SubqueryAlias: n1 | -| | Filter: nation.n_name = Utf8("FRANCE") OR nation.n_name = Utf8("GERMANY") | -| | TableScan: nation projection=[n_nationkey, n_name] | -| | SubqueryAlias: n2 | -| | Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE") | -| | TableScan: nation projection=[n_nationkey, n_name] | -| physical_plan | SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] | -| | SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] | -| | ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, SUM(shipping.volume)@3 as revenue] | -| | AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] | -| | ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, datepart(YEAR, l_shipdate@2) as l_year, CAST(l_extendedprice@0 AS Decimal128(38, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(38, 4)) as volume] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@3 = FRANCE AND n_name@4 = GERMANY OR n_name@3 = GERMANY AND n_name@4 = FRANCE | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, l_shipdate@2 as l_shipdate, n_name@4 as n_name, n_name@6 as n_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@6 as n_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 0 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@6 as c_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 4 }, Column { name: "c_custkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 4 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@6 as o_custkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "o_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | -| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, l_shipdate@6 as l_shipdate] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: l_shipdate@4 >= 9131 AND l_shipdate@4 <= 9861 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: shipping.supp_nation ASC NULLS LAST, shipping.cust_nation ASC NULLS LAST, shipping.l_year ASC NULLS LAST | +| | Projection: shipping.supp_nation, shipping.cust_nation, shipping.l_year, SUM(shipping.volume) AS revenue | +| | Aggregate: groupBy=[[shipping.supp_nation, shipping.cust_nation, shipping.l_year]], aggr=[[SUM(shipping.volume)]] | +| | SubqueryAlias: shipping | +| | Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, datepart(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))) AS volume | +| | Filter: n1.n_name = Utf8("FRANCE") AND n2.n_name = Utf8("GERMANY") OR n1.n_name = Utf8("GERMANY") AND n2.n_name = Utf8("FRANCE") | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, n1.n_name, n2.n_name | +| | Inner Join: customer.c_nationkey = n2.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey, n1.n_name | +| | Inner Join: supplier.s_nationkey = n1.n_nationkey | +| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, customer.c_nationkey | +| | Inner Join: orders.o_custkey = customer.c_custkey | +| | Projection: supplier.s_nationkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate, orders.o_custkey | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: supplier.s_nationkey, lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, lineitem.l_shipdate | +| | Inner Join: supplier.s_suppkey = lineitem.l_suppkey | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Filter: lineitem.l_shipdate >= Date32("9131") AND lineitem.l_shipdate <= Date32("9861") | +| | TableScan: lineitem projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate] | +| | TableScan: orders projection=[o_orderkey, o_custkey] | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | SubqueryAlias: n1 | +| | Filter: nation.n_name = Utf8("FRANCE") OR nation.n_name = Utf8("GERMANY") | +| | TableScan: nation projection=[n_nationkey, n_name] | +| | SubqueryAlias: n2 | +| | Filter: nation.n_name = Utf8("GERMANY") OR nation.n_name = Utf8("FRANCE") | +| | TableScan: nation projection=[n_nationkey, n_name] | +| physical_plan | SortPreservingMergeExec: [supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] | +| | SortExec: expr=[supp_nation@0 ASC NULLS LAST,cust_nation@1 ASC NULLS LAST,l_year@2 ASC NULLS LAST] | +| | ProjectionExec: expr=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year, SUM(shipping.volume)@3 as revenue] | +| | AggregateExec: mode=FinalPartitioned, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "supp_nation", index: 0 }, Column { name: "cust_nation", index: 1 }, Column { name: "l_year", index: 2 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[supp_nation@0 as supp_nation, cust_nation@1 as cust_nation, l_year@2 as l_year], aggr=[SUM(shipping.volume)] | +| | ProjectionExec: expr=[n_name@3 as supp_nation, n_name@4 as cust_nation, datepart(YEAR, l_shipdate@2) as l_year, PROMOTE_PRECISION(CAST(l_extendedprice@0 AS Decimal128(23, 2))) * PROMOTE_PRECISION(Some(100),23,2 - PROMOTE_PRECISION(CAST(l_discount@1 AS Decimal128(23, 2)))) as volume] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@3 = FRANCE AND n_name@4 = GERMANY OR n_name@3 = GERMANY AND n_name@4 = FRANCE | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, l_shipdate@2 as l_shipdate, n_name@4 as n_name, n_name@6 as n_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@4 as c_nationkey, n_name@6 as n_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 0 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 0 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, l_shipdate@3 as l_shipdate, c_nationkey@6 as c_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 4 }, Column { name: "c_custkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 4 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[s_nationkey@0 as s_nationkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, l_shipdate@4 as l_shipdate, o_custkey@6 as o_custkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 1 }, Column { name: "o_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=2 | +| | ProjectionExec: expr=[s_nationkey@1 as s_nationkey, l_orderkey@2 as l_orderkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, l_shipdate@6 as l_shipdate] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_suppkey", index: 0 }, Column { name: "l_suppkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: l_shipdate@4 >= 9131 AND l_shipdate@4 <= 9861 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q8.txt b/benchmarks/expected-plans/q8.txt index d196bcc21486..0c7106f26514 100644 --- a/benchmarks/expected-plans/q8.txt +++ b/benchmarks/expected-plans/q8.txt @@ -1,113 +1,113 @@ -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: all_nations.o_year ASC NULLS LAST | -| | Projection: all_nations.o_year, SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) / SUM(all_nations.volume) AS mkt_share | -| | Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]] | -| | SubqueryAlias: all_nations | -| | Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) AS volume, n2.n_name AS nation | -| | Inner Join: n1.n_regionkey = region.r_regionkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name | -| | Inner Join: supplier.s_nationkey = n2.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey | -| | Inner Join: customer.c_nationkey = n1.n_nationkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey | -| | Inner Join: orders.o_custkey = customer.c_custkey | -| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | -| | Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: part.p_partkey = lineitem.l_partkey | -| | Projection: part.p_partkey | -| | Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL") | -| | TableScan: part projection=[p_partkey, p_type] | -| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861") | -| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | -| | TableScan: customer projection=[c_custkey, c_nationkey] | -| | SubqueryAlias: n1 | -| | TableScan: nation projection=[n_nationkey, n_regionkey] | -| | SubqueryAlias: n2 | -| | TableScan: nation projection=[n_nationkey, n_name] | -| | Projection: region.r_regionkey | -| | Filter: region.r_name = Utf8("AMERICA") | -| | TableScan: region projection=[r_regionkey, r_name] | -| physical_plan | SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] | -| | SortExec: expr=[o_year@0 ASC NULLS LAST] | -| | ProjectionExec: expr=[o_year@0 as o_year, SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 / SUM(all_nations.volume)@2 as mkt_share] | -| | AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_year", index: 0 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] | -| | ProjectionExec: expr=[datepart(YEAR, o_orderdate@2) as o_year, CAST(l_extendedprice@0 AS Decimal128(38, 4)) * CAST(Some(100),23,2 - CAST(l_discount@1 AS Decimal128(23, 2)) AS Decimal128(38, 4)) as volume, n_name@4 as nation] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@6 as n_name] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@3 as o_orderdate, n_regionkey@6 as n_regionkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 4 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@6 as c_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 3 }, Column { name: "c_custkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_custkey@5 as o_custkey, o_orderdate@6 as o_orderdate] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: p_type@1 = ECONOMY ANODIZED STEEL | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: o_orderdate@2 >= 9131 AND o_orderdate@2 <= 9861 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: r_name@1 = AMERICA | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: all_nations.o_year ASC NULLS LAST | +| | Projection: all_nations.o_year, CAST(CAST(SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) AS Decimal128(12, 2)) / CAST(SUM(all_nations.volume) AS Decimal128(12, 2)) AS Decimal128(15, 2)) AS mkt_share | +| | Aggregate: groupBy=[[all_nations.o_year]], aggr=[[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)]] | +| | SubqueryAlias: all_nations | +| | Projection: datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))) AS volume, n2.n_name AS nation | +| | Inner Join: n1.n_regionkey = region.r_regionkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name | +| | Inner Join: supplier.s_nationkey = n2.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, n1.n_regionkey | +| | Inner Join: customer.c_nationkey = n1.n_nationkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_orderdate, customer.c_nationkey | +| | Inner Join: orders.o_custkey = customer.c_custkey | +| | Projection: lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, orders.o_custkey, orders.o_orderdate | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | +| | Projection: lineitem.l_orderkey, lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: part.p_partkey = lineitem.l_partkey | +| | Projection: part.p_partkey | +| | Filter: part.p_type = Utf8("ECONOMY ANODIZED STEEL") | +| | TableScan: part projection=[p_partkey, p_type] | +| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | Filter: orders.o_orderdate >= Date32("9131") AND orders.o_orderdate <= Date32("9861") | +| | TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate] | +| | TableScan: customer projection=[c_custkey, c_nationkey] | +| | SubqueryAlias: n1 | +| | TableScan: nation projection=[n_nationkey, n_regionkey] | +| | SubqueryAlias: n2 | +| | TableScan: nation projection=[n_nationkey, n_name] | +| | Projection: region.r_regionkey | +| | Filter: region.r_name = Utf8("AMERICA") | +| | TableScan: region projection=[r_regionkey, r_name] | +| physical_plan | SortPreservingMergeExec: [o_year@0 ASC NULLS LAST] | +| | SortExec: expr=[o_year@0 ASC NULLS LAST] | +| | ProjectionExec: expr=[o_year@0 as o_year, CAST(CAST(SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END)@1 AS Decimal128(12, 2)) / CAST(SUM(all_nations.volume)@2 AS Decimal128(12, 2)) AS Decimal128(15, 2)) as mkt_share] | +| | AggregateExec: mode=FinalPartitioned, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_year", index: 0 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[SUM(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), SUM(all_nations.volume)] | +| | ProjectionExec: expr=[datepart(YEAR, o_orderdate@2) as o_year, PROMOTE_PRECISION(CAST(l_extendedprice@0 AS Decimal128(23, 2))) * PROMOTE_PRECISION(Some(100),23,2 - PROMOTE_PRECISION(CAST(l_discount@1 AS Decimal128(23, 2)))) as volume, n_name@4 as nation] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "n_regionkey", index: 3 }, Column { name: "r_regionkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_regionkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, o_orderdate@3 as o_orderdate, n_regionkey@4 as n_regionkey, n_name@6 as n_name] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 2 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 2 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@3 as o_orderdate, n_regionkey@6 as n_regionkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "c_nationkey", index: 4 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_nationkey", index: 4 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@0 as l_extendedprice, l_discount@1 as l_discount, s_nationkey@2 as s_nationkey, o_orderdate@4 as o_orderdate, c_nationkey@6 as c_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "o_custkey", index: 3 }, Column { name: "c_custkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_custkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_extendedprice@1 as l_extendedprice, l_discount@2 as l_discount, s_nationkey@3 as s_nationkey, o_custkey@5 as o_custkey, o_orderdate@6 as o_orderdate] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@5 as s_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 1 }, Column { name: "s_suppkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 1 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_suppkey@3 as l_suppkey, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: p_type@1 = ECONOMY ANODIZED STEEL | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: o_orderdate@2 >= 9131 AND o_orderdate@2 <= 9861 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "c_custkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "r_regionkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[r_regionkey@0 as r_regionkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: r_name@1 = AMERICA | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/expected-plans/q9.txt b/benchmarks/expected-plans/q9.txt index 414ecf804a99..4a7a9ac8c41e 100644 --- a/benchmarks/expected-plans/q9.txt +++ b/benchmarks/expected-plans/q9.txt @@ -1,79 +1,79 @@ -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| plan_type | plan | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| logical_plan | Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST | -| | Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit | -| | Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]] | -| | SubqueryAlias: profit | -| | Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, CAST(lineitem.l_extendedprice AS Decimal128(38, 4)) * CAST(Decimal128(Some(100),23,2) - CAST(lineitem.l_discount AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(partsupp.ps_supplycost * lineitem.l_quantity AS Decimal128(38, 4)) AS amount | -| | Inner Join: supplier.s_nationkey = nation.n_nationkey | -| | Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate | -| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | -| | Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost | -| | Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey | -| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | -| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | -| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | -| | Inner Join: part.p_partkey = lineitem.l_partkey | -| | Projection: part.p_partkey | -| | Filter: part.p_name LIKE Utf8("%green%") | -| | TableScan: part projection=[p_partkey, p_name] | -| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] | -| | TableScan: supplier projection=[s_suppkey, s_nationkey] | -| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | -| | TableScan: orders projection=[o_orderkey, o_orderdate] | -| | TableScan: nation projection=[n_nationkey, n_name] | -| physical_plan | SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] | -| | SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC] | -| | ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, SUM(profit.amount)@2 as sum_profit] | -| | AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2), input_partitions=2 | -| | AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] | -| | ProjectionExec: expr=[n_name@7 as nation, datepart(YEAR, o_orderdate@5) as o_year, CAST(l_extendedprice@1 AS Decimal128(38, 4)) * CAST(Some(100),23,2 - CAST(l_discount@2 AS Decimal128(23, 2)) AS Decimal128(38, 4)) - CAST(ps_supplycost@4 * l_quantity@0 AS Decimal128(38, 4)) as amount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@7 as o_orderdate] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@6 as s_nationkey, ps_supplycost@9 as ps_supplycost] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "ps_suppkey", index: 1 }), (Column { name: "l_partkey", index: 1 }, Column { name: "ps_partkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_partkey@1 as l_partkey, l_suppkey@2 as l_suppkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@7 as s_nationkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 2), input_partitions=2 | -| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2 | -| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | -| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | FilterExec: p_name@1 LIKE %green% | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | CoalesceBatchesExec: target_batch_size=8192 | -| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | -| | MemoryExec: partitions=0, partition_sizes=[] | -| | | -+---------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| plan_type | plan | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| logical_plan | Sort: profit.nation ASC NULLS LAST, profit.o_year DESC NULLS FIRST | +| | Projection: profit.nation, profit.o_year, SUM(profit.amount) AS sum_profit | +| | Aggregate: groupBy=[[profit.nation, profit.o_year]], aggr=[[SUM(profit.amount)]] | +| | SubqueryAlias: profit | +| | Projection: nation.n_name AS nation, datepart(Utf8("YEAR"), orders.o_orderdate) AS o_year, PROMOTE_PRECISION(CAST(PROMOTE_PRECISION(CAST(lineitem.l_extendedprice AS Decimal128(23, 2))) * PROMOTE_PRECISION(CAST(Decimal128(Some(100),23,2) - PROMOTE_PRECISION(CAST(lineitem.l_discount AS Decimal128(23, 2))) AS Decimal128(23, 2))) AS Decimal128(38, 4))) - PROMOTE_PRECISION(CAST(PROMOTE_PRECISION(CAST(partsupp.ps_supplycost AS Decimal128(15, 2))) * PROMOTE_PRECISION(CAST(lineitem.l_quantity AS Decimal128(15, 2))) AS Decimal128(38, 4))) AS amount | +| | Inner Join: supplier.s_nationkey = nation.n_nationkey | +| | Projection: lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost, orders.o_orderdate | +| | Inner Join: lineitem.l_orderkey = orders.o_orderkey | +| | Projection: lineitem.l_orderkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey, partsupp.ps_supplycost | +| | Inner Join: lineitem.l_suppkey = partsupp.ps_suppkey, lineitem.l_partkey = partsupp.ps_partkey | +| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount, supplier.s_nationkey | +| | Inner Join: lineitem.l_suppkey = supplier.s_suppkey | +| | Projection: lineitem.l_orderkey, lineitem.l_partkey, lineitem.l_suppkey, lineitem.l_quantity, lineitem.l_extendedprice, lineitem.l_discount | +| | Inner Join: part.p_partkey = lineitem.l_partkey | +| | Projection: part.p_partkey | +| | Filter: part.p_name LIKE Utf8("%green%") | +| | TableScan: part projection=[p_partkey, p_name] | +| | TableScan: lineitem projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount] | +| | TableScan: supplier projection=[s_suppkey, s_nationkey] | +| | TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost] | +| | TableScan: orders projection=[o_orderkey, o_orderdate] | +| | TableScan: nation projection=[n_nationkey, n_name] | +| physical_plan | SortPreservingMergeExec: [nation@0 ASC NULLS LAST,o_year@1 DESC] | +| | SortExec: expr=[nation@0 ASC NULLS LAST,o_year@1 DESC] | +| | ProjectionExec: expr=[nation@0 as nation, o_year@1 as o_year, SUM(profit.amount)@2 as sum_profit] | +| | AggregateExec: mode=FinalPartitioned, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "nation", index: 0 }, Column { name: "o_year", index: 1 }], 2), input_partitions=2 | +| | AggregateExec: mode=Partial, gby=[nation@0 as nation, o_year@1 as o_year], aggr=[SUM(profit.amount)] | +| | ProjectionExec: expr=[n_name@7 as nation, datepart(YEAR, o_orderdate@5) as o_year, PROMOTE_PRECISION(PROMOTE_PRECISION(CAST(l_extendedprice@1 AS Decimal128(23, 2))) * PROMOTE_PRECISION(Some(100),23,2 - PROMOTE_PRECISION(CAST(l_discount@2 AS Decimal128(23, 2))))) - PROMOTE_PRECISION(CAST(PROMOTE_PRECISION(ps_supplycost@4) * PROMOTE_PRECISION(l_quantity@0) AS Decimal128(38, 4))) as amount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "s_nationkey", index: 3 }, Column { name: "n_nationkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_nationkey", index: 3 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_quantity@1 as l_quantity, l_extendedprice@2 as l_extendedprice, l_discount@3 as l_discount, s_nationkey@4 as s_nationkey, ps_supplycost@5 as ps_supplycost, o_orderdate@7 as o_orderdate] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_orderkey", index: 0 }, Column { name: "o_orderkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_orderkey", index: 0 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@6 as s_nationkey, ps_supplycost@9 as ps_supplycost] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "ps_suppkey", index: 1 }), (Column { name: "l_partkey", index: 1 }, Column { name: "ps_partkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }, Column { name: "l_partkey", index: 1 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_orderkey@0 as l_orderkey, l_partkey@1 as l_partkey, l_suppkey@2 as l_suppkey, l_quantity@3 as l_quantity, l_extendedprice@4 as l_extendedprice, l_discount@5 as l_discount, s_nationkey@7 as s_nationkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "l_suppkey", index: 2 }, Column { name: "s_suppkey", index: 0 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_suppkey", index: 2 }], 2), input_partitions=2 | +| | ProjectionExec: expr=[l_orderkey@1 as l_orderkey, l_partkey@2 as l_partkey, l_suppkey@3 as l_suppkey, l_quantity@4 as l_quantity, l_extendedprice@5 as l_extendedprice, l_discount@6 as l_discount] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | HashJoinExec: mode=Partitioned, join_type=Inner, on=[(Column { name: "p_partkey", index: 0 }, Column { name: "l_partkey", index: 1 })] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "p_partkey", index: 0 }], 2), input_partitions=2 | +| | RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=0 | +| | ProjectionExec: expr=[p_partkey@0 as p_partkey] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | FilterExec: p_name@1 LIKE %green% | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "l_partkey", index: 1 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "s_suppkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "ps_suppkey", index: 1 }, Column { name: "ps_partkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "o_orderkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | CoalesceBatchesExec: target_batch_size=8192 | +| | RepartitionExec: partitioning=Hash([Column { name: "n_nationkey", index: 0 }], 2), input_partitions=0 | +| | MemoryExec: partitions=0, partition_sizes=[] | +| | | ++---------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ \ No newline at end of file diff --git a/benchmarks/queries/q8.sql b/benchmarks/queries/q8.sql index 6ddb2a674758..3c2971a6b057 100644 --- a/benchmarks/queries/q8.sql +++ b/benchmarks/queries/q8.sql @@ -1,9 +1,9 @@ select o_year, - sum(case + cast(cast(sum(case when nation = 'BRAZIL' then volume else 0 - end) / sum(volume) as mkt_share + end) as decimal(12,2)) / cast(sum(volume) as decimal(12,2)) as decimal(15,2)) as mkt_share from ( select diff --git a/datafusion/core/src/datasource/listing/helpers.rs b/datafusion/core/src/datasource/listing/helpers.rs index 184b9cd1a31c..23e6d833630a 100644 --- a/datafusion/core/src/datasource/listing/helpers.rs +++ b/datafusion/core/src/datasource/listing/helpers.rs @@ -81,6 +81,7 @@ pub fn expr_applicable_for_cols(col_names: &[String], expr: &Expr) -> bool { | Expr::IsNotUnknown(_) | Expr::Negative(_) | Expr::Cast { .. } + | Expr::PromotePrecision { .. } | Expr::TryCast { .. } | Expr::BinaryExpr { .. } | Expr::Between { .. } diff --git a/datafusion/core/src/physical_plan/planner.rs b/datafusion/core/src/physical_plan/planner.rs index 4b21a9bd735e..66ea721858a4 100644 --- a/datafusion/core/src/physical_plan/planner.rs +++ b/datafusion/core/src/physical_plan/planner.rs @@ -61,7 +61,7 @@ use async_trait::async_trait; use datafusion_common::{DFSchema, ScalarValue}; use datafusion_expr::expr::{ self, AggregateFunction, Between, BinaryExpr, Cast, GetIndexedField, GroupingSet, - Like, TryCast, WindowFunction, + Like, PromotePrecision, TryCast, WindowFunction, }; use datafusion_expr::expr_rewriter::unnormalize_cols; use datafusion_expr::logical_plan::builder::wrap_projection_for_join_if_necessary; @@ -111,7 +111,9 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { Expr::Alias(_, name) => Ok(name.clone()), Expr::ScalarVariable(_, variable_names) => Ok(variable_names.join(".")), Expr::Literal(value) => Ok(format!("{value:?}")), - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { let left = create_physical_name(left, false)?; let right = create_physical_name(right, false)?; Ok(format!("{left} {op} {right}")) @@ -134,6 +136,10 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { // CAST does not change the expression name create_physical_name(expr, false) } + Expr::PromotePrecision(PromotePrecision { expr }) => { + // PromotePrecision does not change the expression name + create_physical_name(expr, false) + } Expr::TryCast(TryCast { expr, .. }) => { // CAST does not change the expression name create_physical_name(expr, false) @@ -1895,7 +1901,7 @@ mod tests { // verify that the plan correctly casts u8 to i64 // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5)) // the cast here is implicit so has CastOptions with safe=true - let expected = "BinaryExpr { left: Column { name: \"c7\", index: 2 }, op: Lt, right: Literal { value: Int64(5) } }"; + let expected = "BinaryExpr { left: Column { name: \"c7\", index: 2 }, op: Lt, right: Literal { value: Int64(5) }, data_type: None }"; assert!(format!("{exec_plan:?}").contains(expected)); Ok(()) } @@ -2141,7 +2147,7 @@ mod tests { let execution_plan = plan(&logical_plan).await?; // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated. - let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\") } }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\") } } }"; + let expected = "expr: [(BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\") }, data_type: None }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\") }, data_type: None }, data_type: None }"; let actual = format!("{execution_plan:?}"); assert!(actual.contains(expected), "{}", actual); diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs index dc6f741c5b47..73b92afa1097 100644 --- a/datafusion/core/tests/sql/joins.rs +++ b/datafusion/core/tests/sql/joins.rs @@ -1690,7 +1690,7 @@ async fn right_semi_join() -> Result<()> { vec!["SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]", " SortExec: expr=[t1_id@0 ASC NULLS LAST]", " CoalesceBatchesExec: target_batch_size=4096", - " HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 1 }, op: NotEq, right: Column { name: \"t1_name\", index: 0 } }", + " HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 1 }, op: NotEq, right: Column { name: \"t1_name\", index: 0 }, data_type: None }", " CoalesceBatchesExec: target_batch_size=4096", " RepartitionExec: partitioning=Hash([Column { name: \"t2_id\", index: 0 }], 2), input_partitions=2", " RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1", @@ -1704,7 +1704,7 @@ async fn right_semi_join() -> Result<()> { vec![ "SortExec: expr=[t1_id@0 ASC NULLS LAST]", " CoalesceBatchesExec: target_batch_size=4096", - " HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 1 }, op: NotEq, right: Column { name: \"t1_name\", index: 0 } }", + " HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 1 }, op: NotEq, right: Column { name: \"t1_name\", index: 0 }, data_type: None }", " MemoryExec: partitions=1, partition_sizes=[1]", " MemoryExec: partitions=1, partition_sizes=[1]", ] @@ -1734,7 +1734,7 @@ async fn right_semi_join() -> Result<()> { vec!["SortPreservingMergeExec: [t1_id@0 ASC NULLS LAST]", " SortExec: expr=[t1_id@0 ASC NULLS LAST]", " CoalesceBatchesExec: target_batch_size=4096", - " HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 0 }, op: NotEq, right: Column { name: \"t1_name\", index: 1 } }", + " HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 0 }, op: NotEq, right: Column { name: \"t1_name\", index: 1 }, data_type: None }", " CoalesceBatchesExec: target_batch_size=4096", " RepartitionExec: partitioning=Hash([Column { name: \"t2_id\", index: 0 }], 2), input_partitions=2", " RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1", @@ -1748,7 +1748,7 @@ async fn right_semi_join() -> Result<()> { vec![ "SortExec: expr=[t1_id@0 ASC NULLS LAST]", " CoalesceBatchesExec: target_batch_size=4096", - " HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 0 }, op: NotEq, right: Column { name: \"t1_name\", index: 1 } }", + " HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(Column { name: \"t2_id\", index: 0 }, Column { name: \"t1_id\", index: 0 })], filter=BinaryExpr { left: Column { name: \"t2_name\", index: 0 }, op: NotEq, right: Column { name: \"t1_name\", index: 1 }, data_type: None }", " MemoryExec: partitions=1, partition_sizes=[1]", " MemoryExec: partitions=1, partition_sizes=[1]", ] @@ -2564,7 +2564,7 @@ async fn right_as_inner_table_nested_loop_join() -> Result<()> { // right is single partition side, so it will be visited many times. let expected = vec![ - "NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: Column { name: \"t1_id\", index: 0 }, op: Gt, right: Column { name: \"t2_id\", index: 1 } }", + "NestedLoopJoinExec: join_type=Inner, filter=BinaryExpr { left: Column { name: \"t1_id\", index: 0 }, op: Gt, right: Column { name: \"t2_id\", index: 1 }, data_type: None }", " CoalesceBatchesExec: target_batch_size=4096", " FilterExec: t1_id@0 > 10", " RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1", @@ -2614,7 +2614,7 @@ async fn left_as_inner_table_nested_loop_join() -> Result<()> { // left is single partition side, so it will be visited many times. let expected = vec![ - "NestedLoopJoinExec: join_type=Right, filter=BinaryExpr { left: Column { name: \"t1_id\", index: 0 }, op: Lt, right: Column { name: \"t2_id\", index: 1 } }", + "NestedLoopJoinExec: join_type=Right, filter=BinaryExpr { left: Column { name: \"t1_id\", index: 0 }, op: Lt, right: Column { name: \"t2_id\", index: 1 }, data_type: None }", " CoalescePartitionsExec", " CoalesceBatchesExec: target_batch_size=4096", " FilterExec: t1_id@0 > 22", diff --git a/datafusion/core/tests/sqllogictests/src/engines/conversion.rs b/datafusion/core/tests/sqllogictests/src/engines/conversion.rs index 0d013c47b348..7ab1c8330bab 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/conversion.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/conversion.rs @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. +use arrow::datatypes::{Decimal128Type, DecimalType}; use bigdecimal::BigDecimal; use half::f16; use rust_decimal::prelude::*; -use rust_decimal::Decimal; pub const NULL_STR: &str = "NULL"; @@ -74,9 +74,9 @@ pub fn f64_to_str(value: f64) -> String { } } -pub fn i128_to_str(value: i128, scale: u32) -> String { +pub fn i128_to_str(value: i128, precision: &u8, scale: &i8) -> String { big_decimal_to_str( - BigDecimal::from_str(&Decimal::from_i128_with_scale(value, scale).to_string()) + BigDecimal::from_str(&Decimal128Type::format_decimal(value, *precision, *scale)) .unwrap(), ) } diff --git a/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs b/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs index 69b6c54c2fec..f8b2fff18129 100644 --- a/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs +++ b/datafusion/core/tests/sqllogictests/src/engines/datafusion/normalize.rs @@ -192,10 +192,9 @@ pub fn cell_to_string(col: &ArrayRef, row: usize) -> Result { DataType::Float64 => { Ok(f64_to_str(get_row_value!(array::Float64Array, col, row))) } - DataType::Decimal128(_, scale) => { + DataType::Decimal128(precision, scale) => { let value = get_row_value!(array::Decimal128Array, col, row); - let decimal_scale = u32::try_from((*scale).max(0)).unwrap(); - Ok(i128_to_str(value, decimal_scale)) + Ok(i128_to_str(value, precision, scale)) } DataType::LargeUtf8 => Ok(varchar_to_str(get_row_value!( array::LargeStringArray, diff --git a/datafusion/core/tests/sqllogictests/test_files/dates.slt b/datafusion/core/tests/sqllogictests/test_files/dates.slt index ccd07e14b637..7f1e9390a55a 100644 --- a/datafusion/core/tests/sqllogictests/test_files/dates.slt +++ b/datafusion/core/tests/sqllogictests/test_files/dates.slt @@ -85,6 +85,6 @@ g h ## Plan error when compare Utf8 and timestamp in where clause -statement error DataFusion error: Error during planning: The type of Timestamp\(Nanosecond, Some\("\+00:00"\)\) Plus Utf8 of binary physical should be same +statement error DataFusion error: Internal error: The type of Timestamp\(Nanosecond, Some\("\+00:00"\)\) Plus Utf8 of binary physical should be same\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker select i_item_desc from test where d3_date > now() + '5 days'; diff --git a/datafusion/core/tests/sqllogictests/test_files/tpch.slt b/datafusion/core/tests/sqllogictests/test_files/tpch.slt index ee9b8d92d5ed..af4d2353cb87 100644 --- a/datafusion/core/tests/sqllogictests/test_files/tpch.slt +++ b/datafusion/core/tests/sqllogictests/test_files/tpch.slt @@ -125,7 +125,7 @@ select sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, - sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, + sum(cast(l_extendedprice as decimal(12,2)) * (1 - l_discount) * (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 2806683ab87b..1ef92df78165 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -146,6 +146,8 @@ pub enum Expr { /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast. /// This expression is guaranteed to have a fixed type. Cast(Cast), + /// Wraps the child expression when promoting the precision of DecimalType to avoid promote multiple times. + PromotePrecision(PromotePrecision), /// Casts the expression to a given type and will return a null value if the expression cannot be cast. /// This expression is guaranteed to have a fixed type. TryCast(TryCast), @@ -234,12 +236,33 @@ pub struct BinaryExpr { pub op: Operator, /// Right-hand side of the expression pub right: Box, + /// The data type of the expression, if known + pub data_type: Option, } impl BinaryExpr { /// Create a new binary expression pub fn new(left: Box, op: Operator, right: Box) -> Self { - Self { left, op, right } + Self { + left, + op, + right, + data_type: None, + } + } + + pub fn new_with_data_type( + left: Box, + op: Operator, + right: Box, + data_type: Option, + ) -> Self { + Self { + left, + op, + right, + data_type, + } } } @@ -385,6 +408,20 @@ impl Cast { } } +/// Cast expression +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct PromotePrecision { + /// The expression being promoted + pub expr: Box, +} + +impl PromotePrecision { + /// Create a new PromotePrecision expression + pub fn new(expr: Box) -> Self { + Self { expr } + } +} + /// TryCast Expression #[derive(Clone, PartialEq, Eq, Hash)] pub struct TryCast { @@ -569,6 +606,7 @@ impl Expr { Expr::BinaryExpr { .. } => "BinaryExpr", Expr::Case { .. } => "Case", Expr::Cast { .. } => "Cast", + Expr::PromotePrecision { .. } => "PromotePrecision", Expr::Column(..) => "Column", Expr::OuterReferenceColumn(_, _) => "Outer", Expr::Exists { .. } => "Exists", @@ -858,6 +896,9 @@ impl fmt::Debug for Expr { Expr::Cast(Cast { expr, data_type }) => { write!(f, "CAST({expr:?} AS {data_type:?})") } + Expr::PromotePrecision(PromotePrecision { expr }) => { + write!(f, "PROMOTE_PRECISION({expr:?})") + } Expr::TryCast(TryCast { expr, data_type }) => { write!(f, "TRY_CAST({expr:?} AS {data_type:?})") } @@ -1211,6 +1252,7 @@ fn create_name(e: &Expr) -> Result { // CAST does not change the expression name create_name(expr) } + Expr::PromotePrecision(PromotePrecision { expr }) => create_name(expr), Expr::TryCast(TryCast { expr, .. }) => { // CAST does not change the expression name create_name(expr) diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index b20629946b01..b4edf5f7cc3c 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -74,6 +74,21 @@ pub fn binary_expr(left: Expr, op: Operator, right: Expr) -> Expr { Expr::BinaryExpr(BinaryExpr::new(Box::new(left), op, Box::new(right))) } +/// Return a new expression `left right` +pub fn binary_expr_with_data_type( + left: Expr, + op: Operator, + right: Expr, + data_type: Option, +) -> Expr { + Expr::BinaryExpr(BinaryExpr::new_with_data_type( + Box::new(left), + op, + Box::new(right), + data_type, + )) +} + /// Return a new expression with a logical AND pub fn and(left: Expr, right: Expr) -> Expr { Expr::BinaryExpr(BinaryExpr::new( diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index fafda79a6f61..3c1d8f17c984 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -17,7 +17,8 @@ use super::{Between, Expr, Like}; use crate::expr::{ - AggregateFunction, BinaryExpr, Cast, GetIndexedField, Sort, TryCast, WindowFunction, + AggregateFunction, BinaryExpr, Cast, GetIndexedField, PromotePrecision, Sort, + TryCast, WindowFunction, }; use crate::field_util::get_indexed_field; use crate::type_coercion::binary::binary_operator_data_type; @@ -39,6 +40,13 @@ pub trait ExprSchemable { /// cast to a type with respect to a schema fn cast_to(self, cast_to_type: &DataType, schema: &S) -> Result; + + /// promote to a type with respect to a schema + fn promote_to( + self, + promote_to_type: &DataType, + schema: &S, + ) -> Result; } impl ExprSchemable for Expr { @@ -71,6 +79,7 @@ impl ExprSchemable for Expr { Expr::Case(case) => case.when_then_expr[0].1.get_type(schema), Expr::Cast(Cast { data_type, .. }) | Expr::TryCast(TryCast { data_type, .. }) => Ok(data_type.clone()), + Expr::PromotePrecision(PromotePrecision { expr }) => expr.get_type(schema), Expr::ScalarUDF { fun, args } => { let data_types = args .iter() @@ -126,11 +135,18 @@ impl ExprSchemable for Expr { ref left, ref right, ref op, - }) => binary_operator_data_type( - &left.get_type(schema)?, - op, - &right.get_type(schema)?, - ), + ref data_type, + }) => { + if let Some(dt) = data_type { + Ok(dt.clone()) + } else { + binary_operator_data_type( + &left.get_type(schema)?, + op, + &right.get_type(schema)?, + ) + } + } Expr::Like { .. } | Expr::ILike { .. } | Expr::SimilarTo { .. } => { Ok(DataType::Boolean) } @@ -195,6 +211,9 @@ impl ExprSchemable for Expr { } } Expr::Cast(Cast { expr, .. }) => expr.nullable(input_schema), + Expr::PromotePrecision(PromotePrecision { expr }) => { + expr.nullable(input_schema) + } Expr::ScalarVariable(_, _) | Expr::TryCast { .. } | Expr::ScalarFunction { .. } @@ -284,6 +303,41 @@ impl ExprSchemable for Expr { ))) } } + + /// Wraps this expression in a promote precision to a target [arrow::datatypes::DataType]. + /// + /// # Errors + /// + /// This function errors when it is impossible to cast the + /// expression to the target [arrow::datatypes::DataType]. + fn promote_to( + self, + promote_to_type: &DataType, + schema: &S, + ) -> Result { + let this_type = self.get_type(schema)?; + + match self { + Expr::Cast(_) if this_type == *promote_to_type => return Ok(self), + Expr::PromotePrecision(_) => { + return Err(DataFusionError::Plan( + "Cannot promote precision of a promote precision expression" + .to_string(), + )) + } + _ => {} + } + + if can_cast_types(&this_type, promote_to_type) { + Ok(Expr::PromotePrecision(PromotePrecision::new(Box::new( + Expr::Cast(Cast::new(Box::new(self), promote_to_type.clone())), + )))) + } else { + Err(DataFusionError::Plan(format!( + "Cannot automatically convert {this_type:?} to {promote_to_type:?}" + ))) + } + } } #[cfg(test)] diff --git a/datafusion/expr/src/tree_node/expr.rs b/datafusion/expr/src/tree_node/expr.rs index 61a5c91fec09..fae4bc01b8da 100644 --- a/datafusion/expr/src/tree_node/expr.rs +++ b/datafusion/expr/src/tree_node/expr.rs @@ -19,7 +19,7 @@ use crate::expr::{ AggregateFunction, Between, BinaryExpr, Case, Cast, GetIndexedField, GroupingSet, - Like, Sort, TryCast, WindowFunction, + Like, PromotePrecision, Sort, TryCast, WindowFunction, }; use crate::Expr; use datafusion_common::tree_node::VisitRecursion; @@ -44,6 +44,7 @@ impl TreeNode for Expr { | Expr::Negative(expr) | Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) + | Expr::PromotePrecision(PromotePrecision { expr, .. }) | Expr::Sort(Sort { expr, .. }) | Expr::InSubquery { expr, .. } => vec![expr.as_ref().clone()], Expr::GetIndexedField(GetIndexedField { expr, .. }) => { @@ -161,13 +162,17 @@ impl TreeNode for Expr { Expr::ScalarSubquery(_) => self, Expr::ScalarVariable(ty, names) => Expr::ScalarVariable(ty, names), Expr::Literal(value) => Expr::Literal(value), - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - Expr::BinaryExpr(BinaryExpr::new( - transform_boxed(left, &mut transform)?, - op, - transform_boxed(right, &mut transform)?, - )) - } + Expr::BinaryExpr(BinaryExpr { + left, + op, + right, + data_type, + }) => Expr::BinaryExpr(BinaryExpr::new_with_data_type( + transform_boxed(left, &mut transform)?, + op, + transform_boxed(right, &mut transform)?, + data_type, + )), Expr::Like(Like { negated, expr, @@ -258,6 +263,9 @@ impl TreeNode for Expr { transform_boxed(expr, &mut transform)?, data_type, )), + Expr::PromotePrecision(PromotePrecision { expr }) => Expr::PromotePrecision( + PromotePrecision::new(transform_boxed(expr, &mut transform)?), + ), Expr::Sort(Sort { expr, asc, diff --git a/datafusion/expr/src/type_coercion/binary.rs b/datafusion/expr/src/type_coercion/binary.rs index 9c6eefe9e7c9..b853d6f43926 100644 --- a/datafusion/expr/src/type_coercion/binary.rs +++ b/datafusion/expr/src/type_coercion/binary.rs @@ -35,9 +35,52 @@ pub fn binary_operator_data_type( op: &Operator, rhs_type: &DataType, ) -> Result { + let coerced_type = coerce_types(lhs_type, op, rhs_type)?; // validate that it is possible to perform the operation on incoming types. // (or the return datatype cannot be inferred) - let result_type = coerce_types(lhs_type, op, rhs_type)?; + let result_type = if !matches!(coerced_type, DataType::Decimal128(_, _)) { + coerced_type + } else { + let lhs_type = match lhs_type { + DataType::Decimal128(_, _) | DataType::Null => lhs_type.clone(), + DataType::Dictionary(_, value_type) + if matches!(**value_type, DataType::Decimal128(_, _)) => + { + lhs_type.clone() + } + _ => coerce_numeric_type_to_decimal(lhs_type).ok_or_else(|| { + DataFusionError::Internal(format!( + "Could not coerce numeric type to decimal: {:?}", + lhs_type + )) + })?, + }; + + let rhs_type = match rhs_type { + DataType::Decimal128(_, _) | DataType::Null => rhs_type.clone(), + DataType::Dictionary(_, value_type) + if matches!(**value_type, DataType::Decimal128(_, _)) => + { + rhs_type.clone() + } + _ => coerce_numeric_type_to_decimal(rhs_type).ok_or_else(|| { + DataFusionError::Internal(format!( + "Could not coerce numeric type to decimal: {:?}", + rhs_type + )) + })?, + }; + + match op { + // For Plus and Minus, the result type is the same as the input type which is already promoted + Operator::Plus | Operator::Minus => coerced_type, + Operator::Divide | Operator::Multiply | Operator::Modulo => { + decimal_op_mathematics_type(op, &lhs_type, &rhs_type) + .unwrap_or(coerced_type) + } + _ => coerced_type, + } + }; match op { // operators that return a boolean @@ -447,6 +490,8 @@ fn mathematics_numerical_coercion( if lhs_type == rhs_type && !(matches!(lhs_type, DataType::Dictionary(_, _)) || matches!(rhs_type, DataType::Dictionary(_, _))) + // For decimal, we always need to coerce/promote the decimal types. + && !matches!(lhs_type, DataType::Decimal128(_, _)) { return Some(lhs_type.clone()); } @@ -517,10 +562,39 @@ fn create_decimal_type(precision: u8, scale: i8) -> DataType { ) } +/// Returns the promotion type of applying mathematics operations on decimal types. +/// Two sides of the mathematics operation will be promoted to the same type. fn coercion_decimal_mathematics_type( mathematics_op: &Operator, left_decimal_type: &DataType, right_decimal_type: &DataType, +) -> Option { + use arrow::datatypes::DataType::*; + match (left_decimal_type, right_decimal_type) { + // The promotion rule from spark + // https://github.com/apache/spark/blob/c20af535803a7250fef047c2bf0fe30be242369d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala#L35 + (Decimal128(_, _), Decimal128(_, _)) => match mathematics_op { + Operator::Plus | Operator::Minus => decimal_op_mathematics_type( + mathematics_op, + left_decimal_type, + right_decimal_type, + ), + Operator::Multiply | Operator::Divide | Operator::Modulo => { + get_wider_decimal_type(left_decimal_type, right_decimal_type) + } + _ => None, + }, + _ => None, + } +} + +/// Returns the output type of applying mathematics operations on decimal types. +/// The rule is from spark. Note that this is different to the promoted type applied +/// to two sides of the arithmetic operation. +fn decimal_op_mathematics_type( + mathematics_op: &Operator, + left_decimal_type: &DataType, + right_decimal_type: &DataType, ) -> Option { use arrow::datatypes::DataType::*; match (left_decimal_type, right_decimal_type) { @@ -932,6 +1006,9 @@ mod tests { &left_decimal_type, &right_decimal_type, ); + assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); + let result = + decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); assert_eq!(DataType::Decimal128(31, 7), result.unwrap()); let op = Operator::Divide; let result = coercion_decimal_mathematics_type( @@ -939,6 +1016,9 @@ mod tests { &left_decimal_type, &right_decimal_type, ); + assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); + let result = + decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); assert_eq!(DataType::Decimal128(35, 24), result.unwrap()); let op = Operator::Modulo; let result = coercion_decimal_mathematics_type( @@ -946,6 +1026,9 @@ mod tests { &left_decimal_type, &right_decimal_type, ); + assert_eq!(DataType::Decimal128(20, 4), result.unwrap()); + let result = + decimal_op_mathematics_type(&op, &left_decimal_type, &right_decimal_type); assert_eq!(DataType::Decimal128(11, 4), result.unwrap()); } @@ -1225,19 +1308,19 @@ mod tests { DataType::Int32, DataType::Decimal128(10, 2), Operator::Multiply, - DataType::Decimal128(21, 2) + DataType::Decimal128(12, 2) ); test_coercion_binary_rule!( DataType::Int32, DataType::Decimal128(10, 2), Operator::Divide, - DataType::Decimal128(23, 11) + DataType::Decimal128(12, 2) ); test_coercion_binary_rule!( DataType::Int32, DataType::Decimal128(10, 2), Operator::Modulo, - DataType::Decimal128(10, 2) + DataType::Decimal128(12, 2) ); // TODO add other data type Ok(()) diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index bfcadd25ea9d..2b7c1d8508e9 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -294,6 +294,7 @@ pub fn expr_to_columns(expr: &Expr, accum: &mut HashSet) -> Result<()> { | Expr::Between { .. } | Expr::Case { .. } | Expr::Cast { .. } + | Expr::PromotePrecision { .. } | Expr::TryCast { .. } | Expr::Sort { .. } | Expr::ScalarFunction { .. } @@ -786,7 +787,7 @@ pub fn from_plan( let new_on:Vec<(Expr,Expr)> = expr.iter().take(equi_expr_count).map(|equi_expr| { // SimplifyExpression rule may add alias to the equi_expr. let unalias_expr = equi_expr.clone().unalias(); - if let Expr::BinaryExpr(BinaryExpr { left, op:Operator::Eq, right }) = unalias_expr { + if let Expr::BinaryExpr(BinaryExpr { left, op:Operator::Eq, right , .. }) = unalias_expr { Ok((*left, *right)) } else { Err(DataFusionError::Internal(format!( diff --git a/datafusion/jit/src/ast.rs b/datafusion/jit/src/ast.rs index 36741432ec25..e3f9a0f38480 100644 --- a/datafusion/jit/src/ast.rs +++ b/datafusion/jit/src/ast.rs @@ -156,6 +156,7 @@ impl TryFrom<(datafusion_expr::Expr, DFSchemaRef)> for Expr { left, op, right, + .. }) => { let op = match op { datafusion_expr::Operator::Eq => BinaryExpr::Eq, diff --git a/datafusion/optimizer/src/eliminate_cross_join.rs b/datafusion/optimizer/src/eliminate_cross_join.rs index 533566a0bf69..d97de49198c0 100644 --- a/datafusion/optimizer/src/eliminate_cross_join.rs +++ b/datafusion/optimizer/src/eliminate_cross_join.rs @@ -260,7 +260,10 @@ fn intersect( /// Extract join keys from a WHERE clause fn extract_possible_join_keys(expr: &Expr, accum: &mut Vec<(Expr, Expr)>) -> Result<()> { - if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr { + if let Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) = expr + { match op { Operator::Eq => { // Ensure that we don't add the same Join keys multiple times @@ -298,7 +301,9 @@ fn remove_join_expressions( join_keys: &HashSet<(Expr, Expr)>, ) -> Result> { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => match op { Operator::Eq => { if join_keys.contains(&(*left.clone(), *right.clone())) || join_keys.contains(&(*right.clone(), *left.clone())) diff --git a/datafusion/optimizer/src/eliminate_outer_join.rs b/datafusion/optimizer/src/eliminate_outer_join.rs index 8dfdfae035a1..7fde5c767bef 100644 --- a/datafusion/optimizer/src/eliminate_outer_join.rs +++ b/datafusion/optimizer/src/eliminate_outer_join.rs @@ -178,7 +178,9 @@ fn extract_non_nullable_columns( non_nullable_cols.push(col.clone()); Ok(()) } - Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => match op { // If one of the inputs are null for these operators, the results should be false. Operator::Eq | Operator::NotEq diff --git a/datafusion/optimizer/src/extract_equijoin_predicate.rs b/datafusion/optimizer/src/extract_equijoin_predicate.rs index 20b9c629712c..594e65a95a17 100644 --- a/datafusion/optimizer/src/extract_equijoin_predicate.rs +++ b/datafusion/optimizer/src/extract_equijoin_predicate.rs @@ -114,6 +114,7 @@ fn split_eq_and_noneq_join_predicate( left, op: Operator::Eq, right, + .. }) => { let left = left.as_ref(); let right = right.as_ref(); diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 2a78551ea131..2a0ee3a9e5b3 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -209,6 +209,7 @@ fn extract_or_clauses_for_join( left, op: Operator::Or, right, + .. }) = expr { let left_expr = extract_or_clause(left.as_ref(), &schema_columns); @@ -244,6 +245,7 @@ fn extract_or_clause(expr: &Expr, schema_columns: &HashSet) -> Option { let l_expr = extract_or_clause(l_expr, schema_columns); let r_expr = extract_or_clause(r_expr, schema_columns); @@ -256,6 +258,7 @@ fn extract_or_clause(expr: &Expr, schema_columns: &HashSet) -> Option { let l_expr = extract_or_clause(l_expr, schema_columns); let r_expr = extract_or_clause(r_expr, schema_columns); diff --git a/datafusion/optimizer/src/rewrite_disjunctive_predicate.rs b/datafusion/optimizer/src/rewrite_disjunctive_predicate.rs index 57513fa4fff4..4ab841fde15e 100644 --- a/datafusion/optimizer/src/rewrite_disjunctive_predicate.rs +++ b/datafusion/optimizer/src/rewrite_disjunctive_predicate.rs @@ -161,7 +161,9 @@ enum Predicate { fn predicate(expr: &Expr) -> Result { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => match op { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => match op { Operator::And => { let args = vec![predicate(left)?, predicate(right)?]; Ok(Predicate::And { args }) diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index df0b9245faec..79b23ef7d426 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -59,7 +59,9 @@ impl ScalarSubqueryToJoin { let mut others = vec![]; for it in filters.iter() { match it { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { let l_query = Subquery::try_from_expr(left); let r_query = Subquery::try_from_expr(right); if l_query.is_err() && r_query.is_err() { diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index e904f895e12a..67eb58861a3d 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -285,6 +285,7 @@ impl<'a> ConstEvaluator<'a> { | Expr::SimilarTo { .. } | Expr::Case(_) | Expr::Cast { .. } + | Expr::PromotePrecision { .. } | Expr::TryCast { .. } | Expr::InList { .. } | Expr::GetIndexedField { .. } => true, @@ -363,6 +364,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Eq, right, + .. }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => { match as_bool_lit(*left)? { Some(true) => *right, @@ -377,6 +379,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Eq, right, + .. }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => { match as_bool_lit(*right)? { Some(true) => *left, @@ -453,6 +456,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: NotEq, right, + .. }) if is_bool_lit(&left) && info.is_boolean_type(&right)? => { match as_bool_lit(*left)? { Some(true) => Expr::Not(right), @@ -467,6 +471,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: NotEq, right, + .. }) if is_bool_lit(&right) && info.is_boolean_type(&left)? => { match as_bool_lit(*right)? { Some(true) => Expr::Not(left), @@ -484,30 +489,35 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Or, right: _, + .. }) if is_true(&left) => *left, // false OR A --> A Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if is_false(&left) => *right, // A OR true --> true (even if A is null) Expr::BinaryExpr(BinaryExpr { left: _, op: Or, right, + .. }) if is_true(&right) => *right, // A OR false --> A Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if is_false(&right) => *left, // A OR !A ---> true (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if is_not_of(&right, &left) && !info.nullable(&left)? => { Expr::Literal(ScalarValue::Boolean(Some(true))) } @@ -516,6 +526,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Or, right, + .. }) if is_not_of(&left, &right) && !info.nullable(&right)? => { Expr::Literal(ScalarValue::Boolean(Some(true))) } @@ -524,24 +535,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Or, right, + .. }) if expr_contains(&left, &right, Or) => *left, // A OR (..A..) --> (..A..) Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if expr_contains(&right, &left, Or) => *right, // A OR (A AND B) --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if !info.nullable(&right)? && is_op_with(And, &right, &left) => *left, // (A AND B) OR A --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: Or, right, + .. }) if !info.nullable(&left)? && is_op_with(And, &left, &right) => *right, // @@ -553,30 +568,35 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: And, right, + .. }) if is_true(&left) => *right, // false AND A --> false (even if A is null) Expr::BinaryExpr(BinaryExpr { left, op: And, right: _, + .. }) if is_false(&left) => *left, // A AND true --> A Expr::BinaryExpr(BinaryExpr { left, op: And, right, + .. }) if is_true(&right) => *left, // A AND false --> false (even if A is null) Expr::BinaryExpr(BinaryExpr { left: _, op: And, right, + .. }) if is_false(&right) => *right, // A AND !A ---> false (if A not nullable) Expr::BinaryExpr(BinaryExpr { left, op: And, right, + .. }) if is_not_of(&right, &left) && !info.nullable(&left)? => { Expr::Literal(ScalarValue::Boolean(Some(false))) } @@ -585,6 +605,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: And, right, + .. }) if is_not_of(&left, &right) && !info.nullable(&right)? => { Expr::Literal(ScalarValue::Boolean(Some(false))) } @@ -593,24 +614,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: And, right, + .. }) if expr_contains(&left, &right, And) => *left, // A AND (..A..) --> (..A..) Expr::BinaryExpr(BinaryExpr { left, op: And, right, + .. }) if expr_contains(&right, &left, And) => *right, // A AND (A OR B) --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: And, right, + .. }) if !info.nullable(&right)? && is_op_with(Or, &right, &left) => *left, // (A OR B) AND A --> A (if B not null) Expr::BinaryExpr(BinaryExpr { left, op: And, right, + .. }) if !info.nullable(&left)? && is_op_with(Or, &left, &right) => *right, // @@ -622,24 +647,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Multiply, right, + .. }) if is_one(&right) => *left, // 1 * A --> A Expr::BinaryExpr(BinaryExpr { left, op: Multiply, right, + .. }) if is_one(&left) => *right, // A * null --> null Expr::BinaryExpr(BinaryExpr { left: _, op: Multiply, right, + .. }) if is_null(&right) => *right, // null * A --> null Expr::BinaryExpr(BinaryExpr { left, op: Multiply, right: _, + .. }) if is_null(&left) => *left, // A * 0 --> 0 (if A is not null) @@ -647,12 +676,14 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Multiply, right, + .. }) if !info.nullable(&left)? && is_zero(&right) => *right, // 0 * A --> 0 (if A is not null) Expr::BinaryExpr(BinaryExpr { left, op: Multiply, right, + .. }) if !info.nullable(&right)? && is_zero(&left) => *left, // @@ -664,24 +695,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Divide, right, + .. }) if is_one(&right) => *left, // null / A --> null Expr::BinaryExpr(BinaryExpr { left, op: Divide, right: _, + .. }) if is_null(&left) => *left, // A / null --> null Expr::BinaryExpr(BinaryExpr { left: _, op: Divide, right, + .. }) if is_null(&right) => *right, // 0 / 0 -> null Expr::BinaryExpr(BinaryExpr { left, op: Divide, right, + .. }) if is_zero(&left) && is_zero(&right) => { Expr::Literal(ScalarValue::Int32(None)) } @@ -690,6 +725,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: Divide, right, + .. }) if !info.nullable(&left)? && is_zero(&right) => { return Err(DataFusionError::ArrowError(ArrowError::DivideByZero)); } @@ -703,24 +739,28 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: Modulo, right, + .. }) if is_null(&right) => *right, // null % A --> null Expr::BinaryExpr(BinaryExpr { left, op: Modulo, right: _, + .. }) if is_null(&left) => *left, // A % 1 --> 0 Expr::BinaryExpr(BinaryExpr { left, op: Modulo, right, + .. }) if !info.nullable(&left)? && is_one(&right) => lit(0), // A % 0 --> DivideByZero Error Expr::BinaryExpr(BinaryExpr { left, op: Modulo, right, + .. }) if !info.nullable(&left)? && is_zero(&right) => { return Err(DataFusionError::ArrowError(ArrowError::DivideByZero)); } @@ -734,6 +774,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: BitwiseAnd, right, + .. }) if is_null(&right) => *right, // null & A -> null @@ -741,6 +782,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right: _, + .. }) if is_null(&left) => *left, // A & 0 -> 0 (if A not nullable) @@ -748,6 +790,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if !info.nullable(&left)? && is_zero(&right) => *right, // 0 & A -> 0 (if A not nullable) @@ -755,6 +798,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if !info.nullable(&right)? && is_zero(&left) => *left, // !A & A -> 0 (if A not nullable) @@ -762,6 +806,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?) } @@ -771,6 +816,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { Expr::Literal(ScalarValue::new_zero(&info.get_data_type(&left)?)?) } @@ -780,6 +826,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if expr_contains(&left, &right, BitwiseAnd) => *left, // A & (..A..) --> (..A..) @@ -787,6 +834,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if expr_contains(&right, &left, BitwiseAnd) => *right, // A & (A | B) --> A (if B not null) @@ -794,6 +842,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if !info.nullable(&right)? && is_op_with(BitwiseOr, &right, &left) => { *left } @@ -803,6 +852,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseAnd, right, + .. }) if !info.nullable(&left)? && is_op_with(BitwiseOr, &left, &right) => { *right } @@ -816,6 +866,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: BitwiseOr, right, + .. }) if is_null(&right) => *right, // null | A -> null @@ -823,6 +874,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right: _, + .. }) if is_null(&left) => *left, // A | 0 -> A (even if A is null) @@ -830,6 +882,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if is_zero(&right) => *left, // 0 | A -> A (even if A is null) @@ -837,6 +890,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if is_zero(&left) => *right, // !A | A -> -1 (if A not nullable) @@ -844,6 +898,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } @@ -853,6 +908,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } @@ -862,6 +918,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if expr_contains(&left, &right, BitwiseOr) => *left, // A | (..A..) --> (..A..) @@ -869,6 +926,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if expr_contains(&right, &left, BitwiseOr) => *right, // A | (A & B) --> A (if B not null) @@ -876,6 +934,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if !info.nullable(&right)? && is_op_with(BitwiseAnd, &right, &left) => { *left } @@ -885,6 +944,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseOr, right, + .. }) if !info.nullable(&left)? && is_op_with(BitwiseAnd, &left, &right) => { *right } @@ -898,6 +958,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: BitwiseXor, right, + .. }) if is_null(&right) => *right, // null ^ A -> null @@ -905,6 +966,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right: _, + .. }) if is_null(&left) => *left, // A ^ 0 -> A (if A not nullable) @@ -912,6 +974,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if !info.nullable(&left)? && is_zero(&right) => *left, // 0 ^ A -> A (if A not nullable) @@ -919,6 +982,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if !info.nullable(&right)? && is_zero(&left) => *right, // !A ^ A -> -1 (if A not nullable) @@ -926,6 +990,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if is_negative_of(&left, &right) && !info.nullable(&right)? => { Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } @@ -935,6 +1000,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if is_negative_of(&right, &left) && !info.nullable(&left)? => { Expr::Literal(ScalarValue::new_negative_one(&info.get_data_type(&left)?)?) } @@ -944,6 +1010,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if expr_contains(&left, &right, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&left, &right, false); if expr == *right { @@ -958,6 +1025,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseXor, right, + .. }) if expr_contains(&right, &left, BitwiseXor) => { let expr = delete_xor_in_complex_expr(&right, &left, true); if expr == *left { @@ -976,6 +1044,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: BitwiseShiftRight, right, + .. }) if is_null(&right) => *right, // null >> A -> null @@ -983,6 +1052,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseShiftRight, right: _, + .. }) if is_null(&left) => *left, // A >> 0 -> A (even if A is null) @@ -990,6 +1060,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseShiftRight, right, + .. }) if is_zero(&right) => *left, // @@ -1001,6 +1072,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left: _, op: BitwiseShiftLeft, right, + .. }) if is_null(&right) => *right, // null << A -> null @@ -1008,6 +1080,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseShiftLeft, right: _, + .. }) if is_null(&left) => *left, // A << 0 -> A (even if A is null) @@ -1015,6 +1088,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: BitwiseShiftLeft, right, + .. }) if is_zero(&right) => *left, // @@ -1116,6 +1190,7 @@ impl<'a, S: SimplifyInfo> TreeNodeRewriter for Simplifier<'a, S> { left, op: op @ (RegexMatch | RegexNotMatch | RegexIMatch | RegexNotIMatch), right, + .. }) => simplify_regex_expr(left, op, right)?, // no additional rewrites possible @@ -2383,6 +2458,7 @@ mod tests { left: Box::new(left), op: Operator::RegexMatch, right: Box::new(right), + data_type: None, }) } @@ -2391,6 +2467,7 @@ mod tests { left: Box::new(left), op: Operator::RegexNotMatch, right: Box::new(right), + data_type: None, }) } @@ -2399,6 +2476,7 @@ mod tests { left: Box::new(left), op: Operator::RegexIMatch, right: Box::new(right), + data_type: None, }) } @@ -2407,6 +2485,7 @@ mod tests { left: Box::new(left), op: Operator::RegexNotIMatch, right: Box::new(right), + data_type: None, }) } diff --git a/datafusion/optimizer/src/simplify_expressions/regex.rs b/datafusion/optimizer/src/simplify_expressions/regex.rs index 13d170fd886f..ae36c2bb9ab6 100644 --- a/datafusion/optimizer/src/simplify_expressions/regex.rs +++ b/datafusion/optimizer/src/simplify_expressions/regex.rs @@ -55,7 +55,7 @@ pub fn simplify_regex_expr( } // leave untouched if optimization didn't work - Ok(Expr::BinaryExpr(BinaryExpr { left, op, right })) + Ok(Expr::BinaryExpr(BinaryExpr::new(left, op, right))) } struct OperatorMode { diff --git a/datafusion/optimizer/src/simplify_expressions/utils.rs b/datafusion/optimizer/src/simplify_expressions/utils.rs index 8b3f437dc233..1bf975a18f66 100644 --- a/datafusion/optimizer/src/simplify_expressions/utils.rs +++ b/datafusion/optimizer/src/simplify_expressions/utils.rs @@ -69,7 +69,9 @@ pub static POWS_OF_TEN: [i128; 38] = [ /// expressions. Such as: (A AND B) AND C pub fn expr_contains(expr: &Expr, needle: &Expr, search_op: Operator) -> bool { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) if *op == search_op => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) if *op == search_op => { expr_contains(left, needle, search_op) || expr_contains(right, needle, search_op) } @@ -87,9 +89,9 @@ pub fn delete_xor_in_complex_expr(expr: &Expr, needle: &Expr, is_left: bool) -> xor_counter: &mut i32, ) -> Expr { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) - if *op == Operator::BitwiseXor => - { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) if *op == Operator::BitwiseXor => { let left_expr = recursive_delete_xor_in_expr(left, needle, xor_counter); let right_expr = recursive_delete_xor_in_expr(right, needle, xor_counter); if left_expr == *needle { @@ -206,7 +208,7 @@ pub fn is_false(expr: &Expr) -> bool { /// returns true if `haystack` looks like (needle OP X) or (X OP needle) pub fn is_op_with(target_op: Operator, haystack: &Expr, needle: &Expr) -> bool { - matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref())) + matches!(haystack, Expr::BinaryExpr(BinaryExpr { left, op, right, .. }) if op == &target_op && (needle == left.as_ref() || needle == right.as_ref())) } /// returns true if `not_expr` is !`expr` (not) @@ -246,7 +248,9 @@ pub fn as_bool_lit(expr: Expr) -> Result> { /// For others, use Not clause pub fn negate_clause(expr: Expr) -> Expr { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { if let Some(negated_op) = op.negate() { return Expr::BinaryExpr(BinaryExpr::new(left, negated_op, right)); } @@ -321,7 +325,9 @@ pub fn negate_clause(expr: Expr) -> Expr { /// For others, use Negative clause pub fn distribute_negation(expr: Expr) -> Expr { match expr { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { match op { // ~(A & B) ===> ~A | ~B Operator::BitwiseAnd => { diff --git a/datafusion/optimizer/src/type_coercion.rs b/datafusion/optimizer/src/type_coercion.rs index 0be9c89b6ccb..aac6354453b8 100644 --- a/datafusion/optimizer/src/type_coercion.rs +++ b/datafusion/optimizer/src/type_coercion.rs @@ -28,7 +28,7 @@ use datafusion_common::{ use datafusion_expr::expr::{self, Between, BinaryExpr, Case, Like, WindowFunction}; use datafusion_expr::logical_plan::Subquery; use datafusion_expr::type_coercion::binary::{ - coerce_types, comparison_coercion, like_coercion, + binary_operator_data_type, coerce_types, comparison_coercion, like_coercion, }; use datafusion_expr::type_coercion::functions::data_types; use datafusion_expr::type_coercion::other::{ @@ -237,6 +237,7 @@ impl TreeNodeRewriter for TypeCoercionRewriter { ref left, op, ref right, + .. }) => { let left_type = left.get_type(&self.schema)?; let right_type = right.get_type(&self.schema)?; @@ -248,6 +249,42 @@ impl TreeNodeRewriter for TypeCoercionRewriter { // this is a workaround for https://github.com/apache/arrow-datafusion/issues/3419 Ok(expr.clone()) } + (DataType::Decimal128(_, _), _) | (_, DataType::Decimal128(_, _)) + if matches!( + op, + Operator::Plus + | Operator::Minus + | Operator::Divide + | Operator::Multiply + | Operator::Modulo + ) => + { + if !matches!(left.as_ref(), &Expr::PromotePrecision(_)) + && !matches!(right.as_ref(), &Expr::PromotePrecision(_)) + { + // Promote decimal types if they are not already promoted + let coerced_type = + coerce_types(&left_type, &op, &right_type)?; + let result_type = + binary_operator_data_type(&left_type, &op, &right_type)?; + let expr = Expr::BinaryExpr(BinaryExpr::new_with_data_type( + Box::new( + left.clone() + .promote_to(&coerced_type, &self.schema)?, + ), + op, + Box::new( + right + .clone() + .promote_to(&coerced_type, &self.schema)?, + ), + Some(result_type), + )); + Ok(expr) + } else { + Ok(expr.clone()) + } + } _ => { let coerced_type = coerce_types(&left_type, &op, &right_type)?; let expr = Expr::BinaryExpr(BinaryExpr::new( diff --git a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs index 296b3b33c960..ac4926770b24 100644 --- a/datafusion/optimizer/src/unwrap_cast_in_comparison.rs +++ b/datafusion/optimizer/src/unwrap_cast_in_comparison.rs @@ -30,7 +30,7 @@ use datafusion_common::{DFSchemaRef, DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{BinaryExpr, Cast, TryCast}; use datafusion_expr::utils::from_plan; use datafusion_expr::{ - binary_expr, in_list, lit, Expr, ExprSchemable, LogicalPlan, Operator, + binary_expr_with_data_type, in_list, lit, Expr, ExprSchemable, LogicalPlan, Operator, }; use std::cmp::Ordering; use std::sync::Arc; @@ -134,7 +134,12 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { // For case: // try_cast/cast(expr as data_type) op literal // literal op try_cast/cast(expr as data_type) - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, + op, + right, + data_type, + }) => { let left = left.as_ref().clone(); let right = right.as_ref().clone(); let left_type = left.get_type(&self.schema)?; @@ -157,10 +162,11 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { try_cast_literal_to_type(left_lit_value, &expr_type)?; if let Some(value) = casted_scalar_value { // unwrap the cast/try_cast for the right expr - return Ok(binary_expr( + return Ok(binary_expr_with_data_type( lit(value), *op, expr.as_ref().clone(), + data_type.clone(), )); } } @@ -176,10 +182,11 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { try_cast_literal_to_type(right_lit_value, &expr_type)?; if let Some(value) = casted_scalar_value { // unwrap the cast/try_cast for the left expr - return Ok(binary_expr( + return Ok(binary_expr_with_data_type( expr.as_ref().clone(), *op, lit(value), + data_type.clone(), )); } } @@ -189,7 +196,12 @@ impl TreeNodeRewriter for UnwrapCastExprRewriter { }; } // return the new binary op - Ok(binary_expr(left, *op, right)) + Ok(binary_expr_with_data_type( + left, + *op, + right, + data_type.clone(), + )) } // For case: // try_cast/cast(expr as left_type) in (expr1,expr2,expr3) diff --git a/datafusion/optimizer/src/utils.rs b/datafusion/optimizer/src/utils.rs index e1a266dbab3c..6031da7b9a24 100644 --- a/datafusion/optimizer/src/utils.rs +++ b/datafusion/optimizer/src/utils.rs @@ -75,6 +75,7 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: Vec<&'a Expr>) -> Vec<& right, op: Operator::And, left, + .. }) => { let exprs = split_conjunction_impl(left, exprs); split_conjunction_impl(right, exprs) @@ -144,7 +145,9 @@ fn split_binary_owned_impl( mut exprs: Vec, ) -> Vec { match expr { - Expr::BinaryExpr(BinaryExpr { right, op, left }) if op == operator => { + Expr::BinaryExpr(BinaryExpr { + right, op, left, .. + }) if op == operator => { let exprs = split_binary_owned_impl(*left, operator, exprs); split_binary_owned_impl(*right, operator, exprs) } @@ -169,7 +172,9 @@ fn split_binary_impl<'a>( mut exprs: Vec<&'a Expr>, ) -> Vec<&'a Expr> { match expr { - Expr::BinaryExpr(BinaryExpr { right, op, left }) if *op == operator => { + Expr::BinaryExpr(BinaryExpr { + right, op, left, .. + }) if *op == operator => { let exprs = split_binary_impl(left, operator, exprs); split_binary_impl(right, operator, exprs) } @@ -242,6 +247,7 @@ pub fn verify_not_disjunction(predicates: &[&Expr]) -> Result<()> { left: _, op: Operator::Or, right: _, + .. }) => { plan_err!("Optimizing disjunctions not supported!") } @@ -299,9 +305,9 @@ pub fn find_join_exprs( } else { // TODO remove the logic let (left, op, right) = match filter { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - (*left.clone(), *op, *right.clone()) - } + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => (*left.clone(), *op, *right.clone()), _ => { others.push((*filter).clone()); continue; @@ -370,9 +376,9 @@ pub fn exprs_to_join_cols( let mut others: Vec = vec![]; for filter in exprs.iter() { let (left, op, right) = match filter { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - (*left.clone(), *op, *right.clone()) - } + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => (*left.clone(), *op, *right.clone()), _ => plan_err!("Invalid correlation expression!")?, }; match op { diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 4e65a9fdd539..97e3bd809fe3 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -89,6 +89,7 @@ pub struct BinaryExpr { left: Arc, op: Operator, right: Arc, + data_type: Option, } impl BinaryExpr { @@ -98,7 +99,27 @@ impl BinaryExpr { op: Operator, right: Arc, ) -> Self { - Self { left, op, right } + Self { + left, + op, + right, + data_type: None, + } + } + + /// Create new binary expression + pub fn new_with_data_type( + left: Arc, + op: Operator, + right: Arc, + data_type: Option, + ) -> Self { + Self { + left, + op, + right, + data_type, + } } /// Get the left side of the binary expression @@ -366,12 +387,14 @@ macro_rules! compute_primitive_op_dyn_scalar { /// LEFT is Decimal or Dictionary array of decimal values, RIGHT is scalar value /// OP_TYPE is the return type of scalar function macro_rules! compute_primitive_decimal_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr) => {{ + ($LEFT:expr, $RIGHT:expr, $OP:ident, $OP_TYPE:expr, $RET_TYPE:expr) => {{ // generate the scalar function name, such as add_decimal_dyn_scalar, // from the $OP parameter (which could have a value of add) and the // suffix _decimal_dyn_scalar if let Some(value) = $RIGHT { - Ok(paste::expr! {[<$OP _decimal_dyn_scalar>]}($LEFT, value)?) + Ok(paste::expr! {[<$OP _decimal_dyn_scalar>]}( + $LEFT, value, $RET_TYPE, + )?) } else { // when the $RIGHT is a NULL, generate a NULL array of $OP_TYPE Ok(Arc::new(new_null_array($OP_TYPE, $LEFT.len()))) @@ -419,15 +442,15 @@ macro_rules! binary_string_array_op { /// The binary_primitive_array_op macro only evaluates for primitive types /// like integers and floats. macro_rules! binary_primitive_array_op_dyn { - ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ match $LEFT.data_type() { DataType::Decimal128(_, _) => { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT)?) + Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) } DataType::Dictionary(_, value_type) if matches!(value_type.as_ref(), &DataType::Decimal128(_, _)) => { - Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT)?) + Ok(paste::expr! {[<$OP _decimal>]}(&$LEFT, &$RIGHT, $RET_TYPE)?) } _ => Ok(Arc::new( $OP(&$LEFT, &$RIGHT).map_err(|err| DataFusionError::ArrowError(err))?, @@ -440,13 +463,13 @@ macro_rules! binary_primitive_array_op_dyn { /// The binary_primitive_array_op_dyn_scalar macro only evaluates for primitive /// types like integers and floats. macro_rules! binary_primitive_array_op_dyn_scalar { - ($LEFT:expr, $RIGHT:expr, $OP:ident) => {{ + ($LEFT:expr, $RIGHT:expr, $OP:ident, $RET_TYPE:expr) => {{ // unwrap underlying (non dictionary) value let right = unwrap_dict_value($RIGHT); let op_type = $LEFT.data_type(); let result: Result> = match right { - ScalarValue::Decimal128(v, _, _) => compute_primitive_decimal_op_dyn_scalar!($LEFT, v, $OP, op_type), + ScalarValue::Decimal128(v, _, _) => compute_primitive_decimal_op_dyn_scalar!($LEFT, v, $OP, op_type, $RET_TYPE), ScalarValue::Int8(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int8Type), ScalarValue::Int16(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int16Type), ScalarValue::Int32(v) => compute_primitive_op_dyn_scalar!($LEFT, v, $OP, op_type, Int32Type), @@ -626,11 +649,15 @@ impl PhysicalExpr for BinaryExpr { } fn data_type(&self, input_schema: &Schema) -> Result { - binary_operator_data_type( - &self.left.data_type(input_schema)?, - &self.op, - &self.right.data_type(input_schema)?, - ) + if self.data_type.is_some() { + Ok(self.data_type.as_ref().unwrap().clone()) + } else { + binary_operator_data_type( + &self.left.data_type(input_schema)?, + &self.op, + &self.right.data_type(input_schema)?, + ) + } } fn nullable(&self, input_schema: &Schema) -> Result { @@ -1012,6 +1039,7 @@ impl BinaryExpr { scalar: ScalarValue, ) -> Result>> { let bool_type = &DataType::Boolean; + let result_type = &self.data_type; let scalar_result = match &self.op { Operator::Lt => { binary_array_op_dyn_scalar!(array, scalar, lt, bool_type) @@ -1032,19 +1060,29 @@ impl BinaryExpr { binary_array_op_dyn_scalar!(array, scalar, neq, bool_type) } Operator::Plus => { - binary_primitive_array_op_dyn_scalar!(array, scalar, add) + binary_primitive_array_op_dyn_scalar!(array, scalar, add, result_type) } Operator::Minus => { - binary_primitive_array_op_dyn_scalar!(array, scalar, subtract) + binary_primitive_array_op_dyn_scalar!( + array, + scalar, + subtract, + result_type + ) } Operator::Multiply => { - binary_primitive_array_op_dyn_scalar!(array, scalar, multiply) + binary_primitive_array_op_dyn_scalar!( + array, + scalar, + multiply, + result_type + ) } Operator::Divide => { - binary_primitive_array_op_dyn_scalar!(array, scalar, divide) + binary_primitive_array_op_dyn_scalar!(array, scalar, divide, result_type) } Operator::Modulo => { - binary_primitive_array_op_dyn_scalar!(array, scalar, modulus) + binary_primitive_array_op_dyn_scalar!(array, scalar, modulus, result_type) } Operator::RegexMatch => binary_string_array_flag_op_scalar!( array, @@ -1126,6 +1164,7 @@ impl BinaryExpr { right: Arc, right_data_type: &DataType, ) -> Result { + let result_type = &self.data_type; match &self.op { Operator::Lt => lt_dyn(&left, &right), Operator::LtEq => lt_eq_dyn(&left, &right), @@ -1146,16 +1185,20 @@ impl BinaryExpr { Operator::IsNotDistinctFrom => { binary_array_op!(left, right, is_not_distinct_from) } - Operator::Plus => binary_primitive_array_op_dyn!(left, right, add_dyn), - Operator::Minus => binary_primitive_array_op_dyn!(left, right, subtract_dyn), + Operator::Plus => { + binary_primitive_array_op_dyn!(left, right, add_dyn, result_type) + } + Operator::Minus => { + binary_primitive_array_op_dyn!(left, right, subtract_dyn, result_type) + } Operator::Multiply => { - binary_primitive_array_op_dyn!(left, right, multiply_dyn) + binary_primitive_array_op_dyn!(left, right, multiply_dyn, result_type) } Operator::Divide => { - binary_primitive_array_op_dyn!(left, right, divide_dyn_opt) + binary_primitive_array_op_dyn!(left, right, divide_dyn_opt, result_type) } Operator::Modulo => { - binary_primitive_array_op_dyn!(left, right, modulus_dyn) + binary_primitive_array_op_dyn!(left, right, modulus_dyn, result_type) } Operator::And => { if left_data_type == &DataType::Boolean { @@ -1229,6 +1272,28 @@ pub fn binary( Ok(Arc::new(BinaryExpr::new(lhs, op, rhs))) } +/// Create a binary expression whose arguments are correctly coerced. +/// This function errors if it is not possible to coerce the arguments +/// to computational types supported by the operator. +pub fn binary_with_data_type( + lhs: Arc, + op: Operator, + rhs: Arc, + input_schema: &Schema, + data_type: Option, +) -> Result> { + let lhs_type = &lhs.data_type(input_schema)?; + let rhs_type = &rhs.data_type(input_schema)?; + if !lhs_type.eq(rhs_type) { + return Err(DataFusionError::Internal(format!( + "The type of {lhs_type} {op:?} {rhs_type} of binary physical should be same" + ))); + } + Ok(Arc::new(BinaryExpr::new_with_data_type( + lhs, op, rhs, data_type, + ))) +} + #[cfg(test)] mod tests { use super::*; @@ -1247,8 +1312,9 @@ mod tests { op: Operator, r: Arc, input_schema: &Schema, + x: &DataType, ) -> Arc { - binary(l, op, r, input_schema).unwrap() + binary_with_data_type(l, op, r, input_schema, Some(x.clone())).unwrap() } #[test] @@ -1266,6 +1332,7 @@ mod tests { Operator::Lt, col("b", &schema)?, &schema, + &DataType::Boolean, ); let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?; @@ -1299,6 +1366,7 @@ mod tests { Operator::Lt, col("b", &schema)?, &schema, + &DataType::Boolean, ), Operator::Or, binary_simple( @@ -1306,8 +1374,10 @@ mod tests { Operator::Eq, col("b", &schema)?, &schema, + &DataType::Boolean, ), &schema, + &DataType::Boolean, ); let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])?; @@ -2763,8 +2833,13 @@ mod tests { op: Operator, expected: PrimitiveArray, ) -> Result<()> { - let arithmetic_op = - binary_simple(col("a", &schema)?, op, col("b", &schema)?, &schema); + let arithmetic_op = binary_simple( + col("a", &schema)?, + op, + col("b", &schema)?, + &schema, + expected.data_type(), + ); let batch = RecordBatch::try_new(schema, data)?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); @@ -2780,7 +2855,8 @@ mod tests { expected: ArrayRef, ) -> Result<()> { let lit = Arc::new(Literal::new(literal)); - let arithmetic_op = binary_simple(col("a", &schema)?, op, lit, &schema); + let arithmetic_op = + binary_simple(col("a", &schema)?, op, lit, &schema, expected.data_type()); let batch = RecordBatch::try_new(schema, data)?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); @@ -2801,7 +2877,8 @@ mod tests { let left_expr = try_cast(col("a", schema)?, schema, result_type.clone())?; let right_expr = try_cast(col("b", schema)?, schema, result_type)?; - let arithmetic_op = binary_simple(left_expr, op, right_expr, schema); + let arithmetic_op = + binary_simple(left_expr, op, right_expr, schema, &DataType::Boolean); let data: Vec = vec![left.clone(), right.clone()]; let batch = RecordBatch::try_new(schema.clone(), data)?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); @@ -2831,7 +2908,8 @@ mod tests { try_cast(col("a", schema)?, schema, op_type)? }; - let arithmetic_op = binary_simple(left_expr, op, right_expr, schema); + let arithmetic_op = + binary_simple(left_expr, op, right_expr, schema, &DataType::Boolean); let batch = RecordBatch::try_new(Arc::clone(schema), vec![Arc::clone(arr)])?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); assert_eq!(result.as_ref(), expected); @@ -2860,7 +2938,8 @@ mod tests { try_cast(col("a", schema)?, schema, op_type)? }; - let arithmetic_op = binary_simple(left_expr, op, right_expr, schema); + let arithmetic_op = + binary_simple(left_expr, op, right_expr, schema, &DataType::Boolean); let batch = RecordBatch::try_new(Arc::clone(schema), vec![Arc::clone(arr)])?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); assert_eq!(result.as_ref(), expected); @@ -3428,7 +3507,7 @@ mod tests { let tree_depth: i32 = 100; let expr = (0..tree_depth) .map(|_| col("a", schema.as_ref()).unwrap()) - .reduce(|l, r| binary_simple(l, Operator::Plus, r, &schema)) + .reduce(|l, r| binary_simple(l, Operator::Plus, r, &schema, &DataType::Int32)) .unwrap(); let result = expr @@ -3935,7 +4014,13 @@ mod tests { schema.field(1).is_nullable(), ), ]); - let arithmetic_op = binary_simple(left_expr, op, right_expr, &coerced_schema); + let arithmetic_op = binary_simple( + left_expr, + op, + right_expr, + &coerced_schema, + expected.data_type(), + ); let data: Vec = vec![left.clone(), right.clone()]; let batch = RecordBatch::try_new(schema.clone(), data)?; let result = arithmetic_op.evaluate(&batch)?.into_array(batch.num_rows()); @@ -4004,9 +4089,9 @@ mod tests { // multiply: decimal array multiply int32 array let expect = Arc::new(create_decimal_array( - &[Some(15129), None, Some(15006), Some(15376)], - 21, - 2, + &[Some(1512900), None, Some(1500600), Some(1537600)], + 38, + 4, )) as ArrayRef; apply_arithmetic_op( &schema, @@ -4597,6 +4682,7 @@ mod tests { Operator::GtEq, lit(ScalarValue::from(25)), &schema, + &DataType::Boolean, ); let context = AnalysisContext::from_statistics(&schema, &statistics); @@ -4626,6 +4712,7 @@ mod tests { Operator::GtEq, a.clone(), &schema, + &DataType::Boolean, ); let context = AnalysisContext::from_statistics(&schema, &statistics); diff --git a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs index 57cf6a1cf80d..70f91d53eb06 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels_arrow.rs @@ -28,6 +28,8 @@ use arrow::{array::*, datatypes::ArrowNumericType, downcast_dictionary_array}; use arrow_schema::DataType; use datafusion_common::cast::as_decimal128_array; use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::type_coercion::binary::binary_operator_data_type; +use datafusion_expr::Operator; use std::sync::Arc; // Simple (low performance) kernels until optimized kernels are added to arrow @@ -257,14 +259,22 @@ pub(crate) fn is_not_distinct_from_decimal( .collect()) } -pub(crate) fn add_dyn_decimal(left: &dyn Array, right: &dyn Array) -> Result { - let (precision, scale) = get_precision_scale(left)?; +pub(crate) fn add_dyn_decimal( + left: &dyn Array, + right: &dyn Array, + result_type: &Option, +) -> Result { + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = add_dyn(left, right)?; decimal_array_with_precision_scale(array, precision, scale) } -pub(crate) fn add_decimal_dyn_scalar(left: &dyn Array, right: i128) -> Result { - let (precision, scale) = get_precision_scale(left)?; +pub(crate) fn add_decimal_dyn_scalar( + left: &dyn Array, + right: i128, + result_type: &Option, +) -> Result { + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = add_scalar_dyn::(left, right)?; decimal_array_with_precision_scale(array, precision, scale) @@ -273,25 +283,28 @@ pub(crate) fn add_decimal_dyn_scalar(left: &dyn Array, right: i128) -> Result, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = subtract_scalar_dyn::(left, right)?; decimal_array_with_precision_scale(array, precision, scale) } -fn get_precision_scale(left: &dyn Array) -> Result<(u8, i8)> { - match left.data_type() { +fn get_precision_scale(data_type: &DataType) -> Result<(u8, i8)> { + match data_type { DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), DataType::Dictionary(_, value_type) => match value_type.as_ref() { DataType::Decimal128(precision, scale) => Ok((*precision, *scale)), - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), + _ => Err(DataFusionError::Internal(format!( + "Unexpected data type: {}", + data_type + ))), }, - _ => Err(DataFusionError::Internal( - "Unexpected data type".to_string(), - )), + _ => Err(DataFusionError::Internal(format!( + "Unexpected data type: {}", + data_type + ))), } } @@ -333,23 +346,34 @@ fn decimal_array_with_precision_scale( pub(crate) fn multiply_decimal_dyn_scalar( left: &dyn Array, right: i128, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; - let array = multiply_scalar_dyn::(left, right)?; + let op_type = binary_operator_data_type( + left.data_type(), + &Operator::Multiply, + left.data_type(), + )?; + let (_, op_scale) = get_precision_scale(&op_type)?; - let divide = 10_i128.pow(scale as u32); - let array = divide_scalar_dyn::(&array, divide)?; + let array = multiply_scalar_dyn::(left, right)?; - decimal_array_with_precision_scale(array, precision, scale) + if op_scale > scale { + let div = 10_i128.pow((op_scale - scale) as u32); + let array = divide_scalar_dyn::(&array, div)?; + decimal_array_with_precision_scale(array, precision, scale) + } else { + decimal_array_with_precision_scale(array, precision, scale) + } } pub(crate) fn divide_decimal_dyn_scalar( left: &dyn Array, right: i128, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; - + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let mul = 10_i128.pow(scale as u32); let array = multiply_scalar_dyn::(left, mul)?; @@ -360,8 +384,9 @@ pub(crate) fn divide_decimal_dyn_scalar( pub(crate) fn subtract_dyn_decimal( left: &dyn Array, right: &dyn Array, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = subtract_dyn(left, right)?; decimal_array_with_precision_scale(array, precision, scale) } @@ -369,33 +394,50 @@ pub(crate) fn subtract_dyn_decimal( pub(crate) fn multiply_dyn_decimal( left: &dyn Array, right: &dyn Array, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; + + let op_type = binary_operator_data_type( + left.data_type(), + &Operator::Multiply, + right.data_type(), + )?; + let (_, op_scale) = get_precision_scale(&op_type)?; - let divide = 10_i128.pow(scale as u32); let array = multiply_dyn(left, right)?; - let array = divide_scalar_dyn::(&array, divide)?; - decimal_array_with_precision_scale(array, precision, scale) + + if op_scale > scale { + let div = 10_i128.pow((op_scale - scale) as u32); + let array = divide_scalar_dyn::(&array, div)?; + decimal_array_with_precision_scale(array, precision, scale) + } else { + decimal_array_with_precision_scale(array, precision, scale) + } } pub(crate) fn divide_dyn_opt_decimal( left: &dyn Array, right: &dyn Array, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let mul = 10_i128.pow(scale as u32); + let array = multiply_scalar_dyn::(left, mul)?; let array = decimal_array_with_precision_scale(array, precision, scale)?; let array = divide_dyn_opt(&array, right)?; + decimal_array_with_precision_scale(array, precision, scale) } pub(crate) fn modulus_dyn_decimal( left: &dyn Array, right: &dyn Array, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = modulus_dyn(left, right)?; decimal_array_with_precision_scale(array, precision, scale) } @@ -403,9 +445,9 @@ pub(crate) fn modulus_dyn_decimal( pub(crate) fn modulus_decimal_dyn_scalar( left: &dyn Array, right: i128, + result_type: &Option, ) -> Result { - let (precision, scale) = get_precision_scale(left)?; - + let (precision, scale) = get_precision_scale(&result_type.clone().unwrap())?; let array = modulus_scalar_dyn::(left, right)?; decimal_array_with_precision_scale(array, precision, scale) } @@ -503,34 +545,69 @@ mod tests { 3, ); // add - let result = add_dyn_decimal(&left_decimal_array, &right_decimal_array)?; + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Plus, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let result = + add_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; let result = as_decimal128_array(&result)?; let expect = - create_decimal_array(&[Some(246), None, Some(245), Some(247)], 25, 3); + create_decimal_array(&[Some(246), None, Some(245), Some(247)], 26, 3); assert_eq!(&expect, result); - let result = add_decimal_dyn_scalar(&left_decimal_array, 10)?; + let result = add_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; let result = as_decimal128_array(&result)?; let expect = - create_decimal_array(&[Some(133), None, Some(132), Some(134)], 25, 3); + create_decimal_array(&[Some(133), None, Some(132), Some(134)], 26, 3); assert_eq!(&expect, result); // subtract - let result = subtract_dyn_decimal(&left_decimal_array, &right_decimal_array)?; + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Minus, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let result = subtract_dyn_decimal( + &left_decimal_array, + &right_decimal_array, + &result_type, + )?; let result = as_decimal128_array(&result)?; - let expect = create_decimal_array(&[Some(0), None, Some(-1), Some(1)], 25, 3); + let expect = create_decimal_array(&[Some(0), None, Some(-1), Some(1)], 26, 3); assert_eq!(&expect, result); - let result = subtract_decimal_dyn_scalar(&left_decimal_array, 10)?; + let result = subtract_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; let result = as_decimal128_array(&result)?; let expect = - create_decimal_array(&[Some(113), None, Some(112), Some(114)], 25, 3); + create_decimal_array(&[Some(113), None, Some(112), Some(114)], 26, 3); assert_eq!(&expect, result); // multiply - let result = multiply_dyn_decimal(&left_decimal_array, &right_decimal_array)?; + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Multiply, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let result = multiply_dyn_decimal( + &left_decimal_array, + &right_decimal_array, + &result_type, + )?; let result = as_decimal128_array(&result)?; - let expect = create_decimal_array(&[Some(15), None, Some(15), Some(15)], 25, 3); + let expect = + create_decimal_array(&[Some(15129), None, Some(15006), Some(15252)], 38, 6); assert_eq!(&expect, result); - let result = multiply_decimal_dyn_scalar(&left_decimal_array, 10)?; + let result = multiply_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; let result = as_decimal128_array(&result)?; - let expect = create_decimal_array(&[Some(1), None, Some(1), Some(1)], 25, 3); + let expect = + create_decimal_array(&[Some(1230), None, Some(1220), Some(1240)], 38, 6); assert_eq!(&expect, result); // divide let left_decimal_array = create_decimal_array( @@ -549,34 +626,62 @@ mod tests { 25, 3, ); - let result = divide_dyn_opt_decimal(&left_decimal_array, &right_decimal_array)?; + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Divide, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let result = divide_dyn_opt_decimal( + &left_decimal_array, + &right_decimal_array, + &result_type, + )?; let result = as_decimal128_array(&result)?; let expect = create_decimal_array( - &[Some(123456700), None, Some(22446672), Some(-10037130), None], - 25, - 3, + &[ + Some(12345670000000000000000000000000000), + None, + Some(2244667272727272727272727272727272), + Some(-1003713008130081300813008130081300), + None, + ], + 38, + 29, ); assert_eq!(&expect, result); - let result = divide_decimal_dyn_scalar(&left_decimal_array, 10)?; + let result = divide_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; let result = as_decimal128_array(&result)?; let expect = create_decimal_array( &[ - Some(123456700), + Some(12345670000000000000000000000000000), None, - Some(123456700), - Some(123456700), - Some(123456700), + Some(12345670000000000000000000000000000), + Some(12345670000000000000000000000000000), + Some(12345670000000000000000000000000000), ], - 25, - 3, + 38, + 29, ); assert_eq!(&expect, result); - let result = modulus_dyn_decimal(&left_decimal_array, &right_decimal_array)?; + // modulus + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Modulo, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let result = + modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type)?; let result = as_decimal128_array(&result)?; let expect = create_decimal_array(&[Some(7), None, Some(37), Some(16), None], 25, 3); assert_eq!(&expect, result); - let result = modulus_decimal_dyn_scalar(&left_decimal_array, 10)?; + let result = modulus_decimal_dyn_scalar(&left_decimal_array, 10, &result_type)?; let result = as_decimal128_array(&result)?; let expect = create_decimal_array(&[Some(7), None, Some(7), Some(7), Some(7)], 25, 3); @@ -590,12 +695,31 @@ mod tests { let left_decimal_array = create_decimal_array(&[Some(101)], 10, 1); let right_decimal_array = create_decimal_array(&[Some(0)], 1, 1); - let err = divide_decimal_dyn_scalar(&left_decimal_array, 0).unwrap_err(); + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Divide, + right_decimal_array.data_type(), + ) + .unwrap(), + ); + let err = + divide_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); assert_eq!("Arrow error: Divide by zero error", err.to_string()); + let result_type = Some( + binary_operator_data_type( + left_decimal_array.data_type(), + &Operator::Modulo, + right_decimal_array.data_type(), + ) + .unwrap(), + ); let err = - modulus_dyn_decimal(&left_decimal_array, &right_decimal_array).unwrap_err(); + modulus_dyn_decimal(&left_decimal_array, &right_decimal_array, &result_type) + .unwrap_err(); assert_eq!("Arrow error: Divide by zero error", err.to_string()); - let err = modulus_decimal_dyn_scalar(&left_decimal_array, 0).unwrap_err(); + let err = + modulus_decimal_dyn_scalar(&left_decimal_array, 0, &result_type).unwrap_err(); assert_eq!("Arrow error: Divide by zero error", err.to_string()); } diff --git a/datafusion/physical-expr/src/expressions/mod.rs b/datafusion/physical-expr/src/expressions/mod.rs index 63fb7b7d37ad..3f94d4fff627 100644 --- a/datafusion/physical-expr/src/expressions/mod.rs +++ b/datafusion/physical-expr/src/expressions/mod.rs @@ -33,6 +33,7 @@ mod negative; mod no_op; mod not; mod nullif; +mod promote_precision; mod try_cast; /// Module with some convenient methods used in expression building @@ -72,7 +73,7 @@ pub use crate::window::rank::{dense_rank, percent_rank, rank}; pub use crate::window::rank::{Rank, RankType}; pub use crate::window::row_number::RowNumber; -pub use binary::{binary, BinaryExpr}; +pub use binary::{binary, binary_with_data_type, BinaryExpr}; pub use case::{case, CaseExpr}; pub use cast::{ cast, cast_column, cast_with_options, CastExpr, DEFAULT_DATAFUSION_CAST_OPTIONS, @@ -89,6 +90,7 @@ pub use negative::{negative, NegativeExpr}; pub use no_op::NoOp; pub use not::{not, NotExpr}; pub use nullif::nullif_func; +pub use promote_precision::promote_precision; pub use try_cast::{try_cast, TryCastExpr}; /// returns the name of the state diff --git a/datafusion/physical-expr/src/expressions/promote_precision.rs b/datafusion/physical-expr/src/expressions/promote_precision.rs new file mode 100644 index 000000000000..0d9a7144e5f4 --- /dev/null +++ b/datafusion/physical-expr/src/expressions/promote_precision.rs @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::physical_expr::down_cast_any_ref; +use crate::PhysicalExpr; +use arrow::record_batch::RecordBatch; +use arrow_schema::{DataType, Schema}; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use std::any::Any; +use std::fmt; +use std::sync::Arc; + +/// PromotePrecision expression wraps an expression which was promoted to a specific data type +#[derive(Debug)] +pub struct PromotePrecisionExpr { + /// The expression to be promoted + expr: Arc, +} + +impl PromotePrecisionExpr { + /// Create a new PromotePrecisionExpr + pub fn new(expr: Arc) -> Self { + Self { expr } + } +} + +impl fmt::Display for PromotePrecisionExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "PROMOTE_PRECISION({})", self.expr) + } +} + +impl PhysicalExpr for PromotePrecisionExpr { + /// Return a reference to Any that can be used for downcasting + fn as_any(&self) -> &dyn Any { + self + } + + fn data_type(&self, _input_schema: &Schema) -> Result { + self.expr.data_type(_input_schema) + } + + fn nullable(&self, input_schema: &Schema) -> Result { + self.expr.nullable(input_schema) + } + + fn evaluate(&self, batch: &RecordBatch) -> Result { + self.expr.evaluate(batch) + } + + fn children(&self) -> Vec> { + vec![self.expr.clone()] + } + + fn with_new_children( + self: Arc, + children: Vec>, + ) -> Result> { + Ok(Arc::new(PromotePrecisionExpr::new(children[0].clone()))) + } +} + +impl PartialEq for PromotePrecisionExpr { + fn eq(&self, other: &dyn Any) -> bool { + down_cast_any_ref(other) + .downcast_ref::() + .map(|x| self.expr.eq(&x.expr)) + .unwrap_or(false) + } +} + +/// Creates a unary expression PromotePrecisionExpr +pub fn promote_precision(arg: Arc) -> Result> { + Ok(Arc::new(PromotePrecisionExpr::new(arg))) +} diff --git a/datafusion/physical-expr/src/planner.rs b/datafusion/physical-expr/src/planner.rs index 0266ecfd2e51..592a2e963da1 100644 --- a/datafusion/physical-expr/src/planner.rs +++ b/datafusion/physical-expr/src/planner.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::expressions::binary_with_data_type; use crate::var_provider::is_system_variables; use crate::{ execution_props::ExecutionProps, @@ -27,7 +28,7 @@ use crate::{ }; use arrow::datatypes::{DataType, Schema}; use datafusion_common::{DFSchema, DataFusionError, Result, ScalarValue}; -use datafusion_expr::expr::Cast; +use datafusion_expr::expr::{Cast, PromotePrecision}; use datafusion_expr::{ binary_expr, Between, BinaryExpr, Expr, GetIndexedField, Like, Operator, TryCast, }; @@ -169,7 +170,12 @@ pub fn create_physical_expr( execution_props, ) } - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, + op, + right, + data_type, + }) => { // Create physical expressions for left and right operands let lhs = create_physical_expr( left, @@ -239,7 +245,7 @@ pub fn create_physical_expr( // // There should be no coercion during physical // planning. - binary(lhs, *op, rhs, input_schema) + binary_with_data_type(lhs, *op, rhs, input_schema, data_type.clone()) } } } @@ -364,6 +370,14 @@ pub fn create_physical_expr( input_schema, data_type.clone(), ), + Expr::PromotePrecision(PromotePrecision { expr }) => { + expressions::promote_precision(create_physical_expr( + expr, + input_dfschema, + input_schema, + execution_props, + )?) + } Expr::TryCast(TryCast { expr, data_type }) => expressions::try_cast( create_physical_expr(expr, input_dfschema, input_schema, execution_props)?, input_schema, diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index 76ec5b001708..094e7fa1b2c3 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -358,7 +358,7 @@ message LogicalExprNode { SimilarToNode similar_to = 33; PlaceholderNode placeholder = 34; - + PromotePrecisionNode promote_precision = 35; } } @@ -367,6 +367,10 @@ message PlaceholderNode { ArrowType data_type = 2; } +message PromotePrecisionNode { + LogicalExprNode expr = 1; +} + message LogicalExprList { repeated LogicalExprNode expr = 1; } diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 406d9ee27aa5..1224f8f939e7 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -10213,6 +10213,9 @@ impl serde::Serialize for LogicalExprNode { logical_expr_node::ExprType::Placeholder(v) => { struct_ser.serialize_field("placeholder", v)?; } + logical_expr_node::ExprType::PromotePrecision(v) => { + struct_ser.serialize_field("promotePrecision", v)?; + } } } struct_ser.end() @@ -10280,6 +10283,8 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "similar_to", "similarTo", "placeholder", + "promote_precision", + "promotePrecision", ]; #[allow(clippy::enum_variant_names)] @@ -10318,6 +10323,7 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { Ilike, SimilarTo, Placeholder, + PromotePrecision, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -10373,6 +10379,7 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { "ilike" => Ok(GeneratedField::Ilike), "similarTo" | "similar_to" => Ok(GeneratedField::SimilarTo), "placeholder" => Ok(GeneratedField::Placeholder), + "promotePrecision" | "promote_precision" => Ok(GeneratedField::PromotePrecision), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -10630,6 +10637,13 @@ impl<'de> serde::Deserialize<'de> for LogicalExprNode { return Err(serde::de::Error::duplicate_field("placeholder")); } expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::Placeholder) +; + } + GeneratedField::PromotePrecision => { + if expr_type__.is_some() { + return Err(serde::de::Error::duplicate_field("promotePrecision")); + } + expr_type__ = map.next_value::<::std::option::Option<_>>()?.map(logical_expr_node::ExprType::PromotePrecision) ; } } @@ -16427,6 +16441,97 @@ impl<'de> serde::Deserialize<'de> for ProjectionNode { deserializer.deserialize_struct("datafusion.ProjectionNode", FIELDS, GeneratedVisitor) } } +impl serde::Serialize for PromotePrecisionNode { + #[allow(deprecated)] + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + let mut len = 0; + if self.expr.is_some() { + len += 1; + } + let mut struct_ser = serializer.serialize_struct("datafusion.PromotePrecisionNode", len)?; + if let Some(v) = self.expr.as_ref() { + struct_ser.serialize_field("expr", v)?; + } + struct_ser.end() + } +} +impl<'de> serde::Deserialize<'de> for PromotePrecisionNode { + #[allow(deprecated)] + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + const FIELDS: &[&str] = &[ + "expr", + ]; + + #[allow(clippy::enum_variant_names)] + enum GeneratedField { + Expr, + } + impl<'de> serde::Deserialize<'de> for GeneratedField { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + struct GeneratedVisitor; + + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = GeneratedField; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "expected one of: {:?}", &FIELDS) + } + + #[allow(unused_variables)] + fn visit_str(self, value: &str) -> std::result::Result + where + E: serde::de::Error, + { + match value { + "expr" => Ok(GeneratedField::Expr), + _ => Err(serde::de::Error::unknown_field(value, FIELDS)), + } + } + } + deserializer.deserialize_identifier(GeneratedVisitor) + } + } + struct GeneratedVisitor; + impl<'de> serde::de::Visitor<'de> for GeneratedVisitor { + type Value = PromotePrecisionNode; + + fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + formatter.write_str("struct datafusion.PromotePrecisionNode") + } + + fn visit_map(self, mut map: V) -> std::result::Result + where + V: serde::de::MapAccess<'de>, + { + let mut expr__ = None; + while let Some(k) = map.next_key()? { + match k { + GeneratedField::Expr => { + if expr__.is_some() { + return Err(serde::de::Error::duplicate_field("expr")); + } + expr__ = map.next_value()?; + } + } + } + Ok(PromotePrecisionNode { + expr: expr__, + }) + } + } + deserializer.deserialize_struct("datafusion.PromotePrecisionNode", FIELDS, GeneratedVisitor) + } +} impl serde::Serialize for RepartitionExecNode { #[allow(deprecated)] fn serialize(&self, serializer: S) -> std::result::Result diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index e5b4534f60a0..03f3c6bd2375 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -457,7 +457,7 @@ pub struct SubqueryAliasNode { pub struct LogicalExprNode { #[prost( oneof = "logical_expr_node::ExprType", - tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34" + tags = "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35" )] pub expr_type: ::core::option::Option, } @@ -542,6 +542,8 @@ pub mod logical_expr_node { SimilarTo(::prost::alloc::boxed::Box), #[prost(message, tag = "34")] Placeholder(super::PlaceholderNode), + #[prost(message, tag = "35")] + PromotePrecision(::prost::alloc::boxed::Box), } } #[allow(clippy::derive_partial_eq_without_eq)] @@ -554,6 +556,12 @@ pub struct PlaceholderNode { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct PromotePrecisionNode { + #[prost(message, optional, boxed, tag = "1")] + pub expr: ::core::option::Option<::prost::alloc::boxed::Box>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct LogicalExprList { #[prost(message, repeated, tag = "1")] pub expr: ::prost::alloc::vec::Vec, diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 845cb60d17a9..47c8e71298c0 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -32,6 +32,7 @@ use datafusion_common::{ Column, DFField, DFSchema, DFSchemaRef, DataFusionError, OwnedTableReference, Result, ScalarValue, }; +use datafusion_expr::expr::PromotePrecision; use datafusion_expr::{ abs, acos, array, ascii, asin, atan, atan2, bit_length, btrim, ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, date_bin, @@ -1378,6 +1379,14 @@ pub fn parse_expr( data_type: Some(data_type.try_into()?), }), }, + ExprType::PromotePrecision(promote) => { + let expr = Box::new(parse_required_expr( + promote.expr.as_deref(), + registry, + "expr", + )?); + Ok(Expr::PromotePrecision(PromotePrecision::new(expr))) + } } } diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index e8570cf3c7e4..a80c1d4b17bf 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -35,7 +35,8 @@ use arrow::datatypes::{ }; use datafusion_common::{Column, DFField, DFSchemaRef, OwnedTableReference, ScalarValue}; use datafusion_expr::expr::{ - self, Between, BinaryExpr, Cast, GetIndexedField, GroupingSet, Like, Sort, + self, Between, BinaryExpr, Cast, GetIndexedField, GroupingSet, Like, + PromotePrecision, Sort, }; use datafusion_expr::{ logical_plan::PlanType, logical_plan::StringifiedPlan, AggregateFunction, @@ -464,7 +465,9 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { expr_type: Some(ExprType::Literal(pb_value)), } } - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { // Try to linerize a nested binary expression tree of the same operator // into a flat vector of expressions. let mut exprs = vec![right.as_ref()]; @@ -473,6 +476,7 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { left, op: current_op, right, + .. }) = current_expr { if current_op == op { @@ -944,7 +948,14 @@ impl TryFrom<&Expr> for protobuf::LogicalExprNode { })), } } - + Expr::PromotePrecision(PromotePrecision { expr }) => { + let expr = Box::new(protobuf::PromotePrecisionNode { + expr: Some(Box::new(expr.as_ref().try_into()?)), + }); + Self { + expr_type: Some(ExprType::PromotePrecision(expr)), + } + } Expr::QualifiedWildcard { .. } => return Err(Error::General( "Proto serialization error: Expr::QualifiedWildcard { .. } not supported" .to_string(), diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 846d44382b19..f8d7f78f79f2 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -523,7 +523,10 @@ fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Resu fn infer_placeholder_types(expr: Expr, schema: &DFSchema) -> Result { expr.transform(&|mut expr| { // Default to assuming the arguments are the same type - if let Expr::BinaryExpr(BinaryExpr { left, op: _, right }) = &mut expr { + if let Expr::BinaryExpr(BinaryExpr { + left, op: _, right, .. + }) = &mut expr + { rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?; rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?; }; diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 91cef6d4712e..61017d1f6d3e 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -23,7 +23,7 @@ use sqlparser::ast::Ident; use datafusion_common::{DataFusionError, Result, ScalarValue}; use datafusion_expr::expr::{ AggregateFunction, Between, BinaryExpr, Case, GetIndexedField, GroupingSet, Like, - WindowFunction, + PromotePrecision, WindowFunction, }; use datafusion_expr::expr::{Cast, Sort}; use datafusion_expr::utils::{expr_as_column_expr, find_column_exprs}; @@ -232,13 +232,17 @@ where .collect::>>()?, negated: *negated, }), - Expr::BinaryExpr(BinaryExpr { left, right, op }) => { - Ok(Expr::BinaryExpr(BinaryExpr::new( - Box::new(clone_with_replacement(left, replacement_fn)?), - *op, - Box::new(clone_with_replacement(right, replacement_fn)?), - ))) - } + Expr::BinaryExpr(BinaryExpr { + left, + right, + op, + data_type, + }) => Ok(Expr::BinaryExpr(BinaryExpr::new_with_data_type( + Box::new(clone_with_replacement(left, replacement_fn)?), + *op, + Box::new(clone_with_replacement(right, replacement_fn)?), + data_type.clone(), + ))), Expr::Like(Like { negated, expr, @@ -344,6 +348,11 @@ where Box::new(clone_with_replacement(expr, replacement_fn)?), data_type.clone(), ))), + Expr::PromotePrecision(PromotePrecision { expr }) => { + Ok(Expr::PromotePrecision(PromotePrecision::new(Box::new( + clone_with_replacement(expr, replacement_fn)?, + )))) + } Expr::TryCast(TryCast { expr: nested_expr, data_type, diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 767c4a39375a..86965fa4aa00 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -310,22 +310,22 @@ pub async fn from_substrait_rel( let join_exprs: Vec<(Column, Column, bool)> = predicates .iter() .map(|p| match p { - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { - match (left.as_ref(), right.as_ref()) { - (Expr::Column(l), Expr::Column(r)) => match op { - Operator::Eq => Ok((l.clone(), r.clone(), false)), - Operator::IsNotDistinctFrom => { - Ok((l.clone(), r.clone(), true)) - } - _ => Err(DataFusionError::Internal( - "invalid join condition op".to_string(), - )), - }, + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => match (left.as_ref(), right.as_ref()) { + (Expr::Column(l), Expr::Column(r)) => match op { + Operator::Eq => Ok((l.clone(), r.clone(), false)), + Operator::IsNotDistinctFrom => { + Ok((l.clone(), r.clone(), true)) + } _ => Err(DataFusionError::Internal( - "invalid join condition expresssion".to_string(), + "invalid join condition op".to_string(), )), - } - } + }, + _ => Err(DataFusionError::Internal( + "invalid join condition expresssion".to_string(), + )), + }, _ => Err(DataFusionError::Internal( "Non-binary expression is not supported in join condition" .to_string(), @@ -674,6 +674,7 @@ pub async fn from_substrait_rex( .as_ref() .clone(), ), + data_type: None, }))) } (l, r) => Err(DataFusionError::NotImplemented(format!( diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index c0a4dd04f33a..5ce4eb5fbd51 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -584,7 +584,9 @@ pub fn to_substrait_rex( let index = schema.index_of_column(col)?; substrait_field_ref(index) } - Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + Expr::BinaryExpr(BinaryExpr { + left, op, right, .. + }) => { let l = to_substrait_rex(left, schema, extension_info)?; let r = to_substrait_rex(right, schema, extension_info)?;