Skip to content

Commit

Permalink
Add Casts and Date Intervals (hyrise#2411)
Browse files Browse the repository at this point in the history
This PR,enables Hyrise to resolve SQL's `CAST` and `INTERVAL` statements. Intervals are implemented only for dates.
This is achieved by a new `IntervalExpression` and date utilities. Anyway, this PR does not privode a full support of a Date type. Furthermore, the existing implementation of casts was revisited to match the SQL standard.
  • Loading branch information
dey4ss authored Oct 26, 2021
1 parent 8d42292 commit 7683162
Show file tree
Hide file tree
Showing 31 changed files with 595 additions and 177 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ cmake-build-*
default.profdata
default.profraw

tpcc_cached_tables/
tpcds_cached_tables/
tpch_cached_tables/
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
url = https://github.com/Tessil/robin-map.git
[submodule "third_party/sql-parser"]
path = third_party/sql-parser
url = https://github.com/hyrise/sql-parser
url = https://github.com/hyrise/sql-parser.git
[submodule "third_party/tpcds-kit"]
path = third_party/tpcds-kit
url = https://github.com/hyrise-mp/tpcds-kit.git
Expand Down
14 changes: 7 additions & 7 deletions resources/benchmark/tpcds/query_blacklist.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
14a.sql
14b.sql
#15.sql
16.sql
#16.sql
#17.sql
18.sql
#19.sql
Expand All @@ -32,12 +32,12 @@
#29.sql
30.sql
#31.sql
32.sql
#32.sql
33.sql
#34.sql
#35.sql
36.sql
37.sql
#37.sql
38.sql
#39a.sql
#39b.sql
Expand Down Expand Up @@ -83,7 +83,7 @@
#79.sql
80.sql
#81.sql
82.sql
#82.sql
#83.sql
84.sql
#85.sql
Expand All @@ -93,10 +93,10 @@
89.sql
90.sql
#91.sql
92.sql
#92.sql
#93.sql
94.sql
95.sql
#94.sql
#95.sql
#96.sql
#97.sql
98.sql
Expand Down
24 changes: 12 additions & 12 deletions resources/tpch_validation_queries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
-- l_returnflag, l_linestatus
--
-- Changes:
-- 1. dates are not supported
-- 1. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 2. implicit type conversions for arithmetic operations are not supported
Expand Down Expand Up @@ -116,7 +116,7 @@ SELECT l_orderkey, SUM(l_extendedprice*(1.0-l_discount)) as revenue, o_orderdate
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
SELECT o_orderpriority, count(*) as order_count FROM orders WHERE o_orderdate >= '1996-07-01' AND o_orderdate < '1996-10-01' AND exists (SELECT *FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate) GROUP BY o_orderpriority ORDER BY o_orderpriority;
Expand Down Expand Up @@ -153,7 +153,7 @@ SELECT o_orderpriority, count(*) as order_count FROM orders WHERE o_orderdate >=
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 3. implicit type conversions for arithmetic operations are not supported
Expand All @@ -171,7 +171,7 @@ SELECT n_name, SUM(l_extendedprice * (1.0 - l_discount)) as revenue FROM custome
-- AND L_DISCOUNT BETWEEN .06 - 0.01 AND .06 + 0.01 AND L_QUANTITY < 24
--
-- Changes:
-- 1. dates are not supported
-- 1. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 2. arithmetic expressions with constants are not resolved automatically yet, so pre-calculate them as well
Expand Down Expand Up @@ -207,7 +207,7 @@ SELECT sum(l_extendedprice*l_discount) AS REVENUE FROM lineitem WHERE l_shipdate
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 3. Extract is not supported
Expand Down Expand Up @@ -259,7 +259,7 @@ SELECT supp_nation, cust_nation, l_year, SUM(volume) as revenue FROM (SELECT n1.
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- 3. Extract is not supported
-- a. Use SUBSTR instead (because our date columns are strings AND SQLite doesn't support EXTRACT)
Expand Down Expand Up @@ -342,7 +342,7 @@ SELECT nation, o_year, SUM(amount) as sum_profit FROM (SELECT n_name as nation,
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 3. implicit type conversions for arithmetic operations are not supported
Expand Down Expand Up @@ -408,7 +408,7 @@ SELECT ps_partkey, SUM(ps_supplycost * ps_availqty) as value FROM partsupp, supp
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
SELECT l_shipmode, SUM(case when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' then 1 else 0 end) as high_line_count, SUM(case when o_orderpriority <> '1-URGENT' AND o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count FROM orders, lineitem WHERE o_orderkey = l_orderkey AND l_shipmode IN ('MAIL','SHIP') AND l_commitdate < l_receiptdate AND l_shipdate < l_commitdate AND l_receiptdate >= '1994-01-01' AND l_receiptdate < '1995-01-01' GROUP BY l_shipmode ORDER BY l_shipmode;
Expand Down Expand Up @@ -452,7 +452,7 @@ SELECT c_count, count(*) as custdist FROM (SELECT c_custkey, count(o_orderkey) a
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 3. implicit type conversions for arithmetic operations are not supported
Expand Down Expand Up @@ -487,7 +487,7 @@ SELECT 100.00 * SUM(case when p_type like 'PROMO%' then l_extendedprice*(1.0-l_d
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. "revenue[STREAM_ID]" renamed to "revenue"
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
-- 3. implicit type conversions for arithmetic operations are not supported
Expand Down Expand Up @@ -642,7 +642,7 @@ SELECT SUM(l_extendedprice * (1.0 - l_discount)) as revenue FROM lineitem, "part
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
SELECT s_name, s_address FROM supplier, nation WHERE s_suppkey in (SELECT ps_suppkey FROM partsupp WHERE ps_partkey in (SELECT p_partkey FROM "part" WHERE p_name like 'forest%') AND ps_availqty > (SELECT 0.5 * SUM(l_quantity) FROM lineitem WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey AND l_shipdate >= '1994-01-01' AND l_shipdate < '1995-01-01')) AND s_nationkey = n_nationkey AND n_name = 'CANADA' ORDER BY s_name;
Expand Down Expand Up @@ -683,7 +683,7 @@ SELECT s_name, s_address FROM supplier, nation WHERE s_suppkey in (SELECT ps_sup
--
-- Changes:
-- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console
-- 2. dates are not supported
-- 2. SQLite does not support dates and intervals
-- a. use strings as data type for now
-- b. pre-calculate date operation
SELECT s_name, count(*) as numwait FROM supplier, lineitem l1, orders, nation WHERE s_suppkey = l1.l_suppkey AND o_orderkey = l1.l_orderkey AND o_orderstatus = 'F' AND l1.l_receiptdate > l1.l_commitdate AND exists (SELECT * FROM lineitem l2 WHERE l2.l_orderkey = l1.l_orderkey AND l2.l_suppkey <> l1.l_suppkey) AND not exists (SELECT * FROM lineitem l3 WHERE l3.l_orderkey = l1.l_orderkey AND l3.l_suppkey <> l1.l_suppkey AND l3.l_receiptdate > l3.l_commitdate) AND s_nationkey = n_nationkey AND n_name = 'SAUDI ARABIA' GROUP BY s_name ORDER BY numwait DESC, s_name LIMIT 100;
Expand Down
52 changes: 27 additions & 25 deletions src/benchmarklib/jcch/jcch_benchmark_item_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <random>

#include "tpch/tpch_queries.hpp"
#include "utils/date_utils.hpp"
#include "utils/string_utils.hpp"
#include "utils/timer.hpp"

Expand Down Expand Up @@ -113,9 +114,11 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
switch (item_id) {
// Writing `1-1` to make people aware that this is zero-indexed while TPC-H query names are not
case 1 - 1: {
// In some cases, we still need to do the date calculations that Hyrise does not support yet
const auto date = _calculate_date(boost::gregorian::date{1998, 12, 01}, 0, -std::stoi(raw_params_iter->at(0)));
parameters.emplace_back("'"s + date + "'");
// In some cases, we still need to do the date calculations that SQLite (used for verification) does not
// support yet. When parsing a date, we expect the generator to provide sound date strings and omit checks.
const auto date = date_interval(boost::gregorian::date{1998, 12, 01}, -std::stoi(raw_params_iter->at(0)),
DatetimeComponent::Day);
parameters.emplace_back("'"s + date_to_string(date) + "'");
break;
}

Expand All @@ -135,31 +138,30 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
}

case 4 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0));
const auto end_date_str = _calculate_date(begin_date, 3);
const auto begin_date = *string_to_date(raw_params_iter->at(0));
const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month);

// Cannot use begin_date here, as we would have to convert it into a string first.
parameters.emplace_back("'"s + raw_params_iter->at(0) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
break;
}

case 5 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(1));
const auto end_date_str = _calculate_date(begin_date, 12);
const auto begin_date = *string_to_date(raw_params_iter->at(1));
const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

parameters.emplace_back("'"s + raw_params_iter->at(0) + "'");
parameters.emplace_back("'"s + raw_params_iter->at(1) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
break;
}

case 6 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0));
const auto end_date_str = _calculate_date(begin_date, 12);
const auto begin_date = *string_to_date(raw_params_iter->at(0));
const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

parameters.emplace_back("'"s + raw_params_iter->at(0) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
parameters.emplace_back(raw_params_iter->at(1));
parameters.emplace_back(raw_params_iter->at(1));
parameters.emplace_back(raw_params_iter->at(2));
Expand Down Expand Up @@ -210,13 +212,13 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
}

case 12 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(2));
const auto end_date_str = _calculate_date(begin_date, 12);
const auto begin_date = *string_to_date(raw_params_iter->at(2));
const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

parameters.emplace_back("'"s + raw_params_iter->at(0) + "'");
parameters.emplace_back("'"s + raw_params_iter->at(1) + "'");
parameters.emplace_back("'"s + raw_params_iter->at(2) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
break;
}

Expand All @@ -226,19 +228,19 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
}

case 14 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0));
const auto end_date_str = _calculate_date(begin_date, 1);
const auto begin_date = *string_to_date(raw_params_iter->at(0));
const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Month);

parameters.emplace_back("'"s + raw_params_iter->at(0) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
break;
}

case 15 - 1: {
auto query_15 = std::string{tpch_queries.at(15)};

const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0));
const auto end_date_str = _calculate_date(begin_date, 3);
const auto begin_date = *string_to_date(raw_params_iter->at(0));
const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month);

// Hack: We cannot use prepared statements in TPC-H 15. Thus, we need to build the SQL string by hand.
// By manually replacing the `?` from tpch_queries.cpp, we can keep all queries in a readable form there.
Expand All @@ -249,7 +251,7 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
std::string_view{&query_15[END_DATE_OFFSET], 10} == "1996-04-01"),
"TPC-H 15 string has been modified");
query_15.replace(BEGIN_DATE_OFFSET, 10, raw_params_iter->at(0));
query_15.replace(END_DATE_OFFSET, 10, end_date_str);
query_15.replace(END_DATE_OFFSET, 10, date_to_string(end_date));

const auto view_id = std::atomic_fetch_add(&_q15_view_id, size_t{1});
boost::replace_all(query_15, std::string("revenue_view"), std::string("revenue") + std::to_string(view_id));
Expand Down Expand Up @@ -303,12 +305,12 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be
}

case 20 - 1: {
const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(1));
const auto end_date_str = _calculate_date(begin_date, 12);
const auto begin_date = *string_to_date(raw_params_iter->at(1));
const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year);

parameters.emplace_back("'"s + raw_params_iter->at(0) + "%'");
parameters.emplace_back("'"s + raw_params_iter->at(1) + "'");
parameters.emplace_back("'"s + end_date_str + "'");
parameters.emplace_back("'"s + date_to_string(end_date) + "'");
parameters.emplace_back("'"s + raw_params_iter->at(2) + "'");
break;
}
Expand Down
Loading

0 comments on commit 7683162

Please sign in to comment.