From 7683162bf4748eb1f0776418e6573f4eceac181f Mon Sep 17 00:00:00 2001 From: Daniel Lindner <27929897+dey4ss@users.noreply.github.com> Date: Tue, 26 Oct 2021 11:49:33 +0200 Subject: [PATCH] Add Casts and Date Intervals (#2411) This PR,enables Hyrise to resolve SQL's `CAST` and `INTERVAL` statements. Intervals are implemented only for dates. This is achieved by a new `IntervalExpression` and date utilities. Anyway, this PR does not privode a full support of a Date type. Furthermore, the existing implementation of casts was revisited to match the SQL standard. --- .gitignore | 2 + .gitmodules | 2 +- resources/benchmark/tpcds/query_blacklist.cfg | 14 +- .../benchmark/tpcds/tpcds-result-reproduction | 2 +- resources/tpch_validation_queries.sql | 24 ++-- .../jcch/jcch_benchmark_item_runner.cpp | 52 ++++---- .../tpch/tpch_benchmark_item_runner.cpp | 87 ++++++------ .../tpch/tpch_benchmark_item_runner.hpp | 5 - src/benchmarklib/tpch/tpch_queries.cpp | 26 ++-- src/lib/CMakeLists.txt | 5 + src/lib/all_type_variant.hpp | 2 +- src/lib/expression/abstract_expression.hpp | 1 + .../evaluation/expression_evaluator.cpp | 41 +++--- src/lib/expression/expression_functional.cpp | 4 + src/lib/expression/expression_functional.hpp | 5 +- src/lib/expression/expression_utils.cpp | 26 +++- src/lib/expression/extract_expression.hpp | 2 - src/lib/expression/interval_expression.cpp | 38 ++++++ src/lib/expression/interval_expression.hpp | 31 +++++ src/lib/operators/sort.cpp | 2 +- src/lib/sql/sql_translator.cpp | 126 +++++++++++++----- src/lib/types.hpp | 2 + src/lib/utils/date_utils.cpp | 43 ++++++ src/lib/utils/date_utils.hpp | 25 ++++ src/test/CMakeLists.txt | 1 + .../expression_evaluator_to_values_test.cpp | 11 +- src/test/lib/expression/expression_test.cpp | 11 ++ .../lib/expression/expression_utils_test.cpp | 43 ++++++ src/test/lib/sql/sql_translator_test.cpp | 88 +++++++++++- src/test/lib/utils/date_utils_test.cpp | 49 +++++++ third_party/sql-parser | 2 +- 31 files changed, 595 insertions(+), 177 deletions(-) create mode 100644 src/lib/expression/interval_expression.cpp create mode 100755 src/lib/expression/interval_expression.hpp create mode 100644 src/lib/utils/date_utils.cpp create mode 100755 src/lib/utils/date_utils.hpp create mode 100755 src/test/lib/utils/date_utils_test.cpp diff --git a/.gitignore b/.gitignore index 351ad524da..9fa94d78ea 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,6 @@ cmake-build-* default.profdata default.profraw +tpcc_cached_tables/ tpcds_cached_tables/ +tpch_cached_tables/ diff --git a/.gitmodules b/.gitmodules index 37046b3579..b78b588227 100644 --- a/.gitmodules +++ b/.gitmodules @@ -49,7 +49,7 @@ url = https://github.com/Tessil/robin-map.git [submodule "third_party/sql-parser"] path = third_party/sql-parser - url = https://github.com/hyrise/sql-parser + url = https://github.com/hyrise/sql-parser.git [submodule "third_party/tpcds-kit"] path = third_party/tpcds-kit url = https://github.com/hyrise-mp/tpcds-kit.git diff --git a/resources/benchmark/tpcds/query_blacklist.cfg b/resources/benchmark/tpcds/query_blacklist.cfg index 7d77180dff..8be118ce3a 100644 --- a/resources/benchmark/tpcds/query_blacklist.cfg +++ b/resources/benchmark/tpcds/query_blacklist.cfg @@ -14,7 +14,7 @@ 14a.sql 14b.sql #15.sql -16.sql +#16.sql #17.sql 18.sql #19.sql @@ -32,12 +32,12 @@ #29.sql 30.sql #31.sql -32.sql +#32.sql 33.sql #34.sql #35.sql 36.sql -37.sql +#37.sql 38.sql #39a.sql #39b.sql @@ -83,7 +83,7 @@ #79.sql 80.sql #81.sql -82.sql +#82.sql #83.sql 84.sql #85.sql @@ -93,10 +93,10 @@ 89.sql 90.sql #91.sql -92.sql +#92.sql #93.sql -94.sql -95.sql +#94.sql +#95.sql #96.sql #97.sql 98.sql diff --git a/resources/benchmark/tpcds/tpcds-result-reproduction b/resources/benchmark/tpcds/tpcds-result-reproduction index 0b1d7f9043..81b1f12857 160000 --- a/resources/benchmark/tpcds/tpcds-result-reproduction +++ b/resources/benchmark/tpcds/tpcds-result-reproduction @@ -1 +1 @@ -Subproject commit 0b1d7f9043c4d4b5a9fa835cff2d5115f2267301 +Subproject commit 81b1f128575534a40e93a6cbcc1680561fa7812a diff --git a/resources/tpch_validation_queries.sql b/resources/tpch_validation_queries.sql index 55642602ff..0d1898278f 100644 --- a/resources/tpch_validation_queries.sql +++ b/resources/tpch_validation_queries.sql @@ -28,7 +28,7 @@ -- l_returnflag, l_linestatus -- -- Changes: --- 1. dates are not supported +-- 1. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 2. implicit type conversions for arithmetic operations are not supported @@ -116,7 +116,7 @@ SELECT l_orderkey, SUM(l_extendedprice*(1.0-l_discount)) as revenue, o_orderdate -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation SELECT o_orderpriority, count(*) as order_count FROM orders WHERE o_orderdate >= '1996-07-01' AND o_orderdate < '1996-10-01' AND exists (SELECT *FROM lineitem WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate) GROUP BY o_orderpriority ORDER BY o_orderpriority; @@ -153,7 +153,7 @@ SELECT o_orderpriority, count(*) as order_count FROM orders WHERE o_orderdate >= -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 3. implicit type conversions for arithmetic operations are not supported @@ -171,7 +171,7 @@ SELECT n_name, SUM(l_extendedprice * (1.0 - l_discount)) as revenue FROM custome -- AND L_DISCOUNT BETWEEN .06 - 0.01 AND .06 + 0.01 AND L_QUANTITY < 24 -- -- Changes: --- 1. dates are not supported +-- 1. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 2. arithmetic expressions with constants are not resolved automatically yet, so pre-calculate them as well @@ -207,7 +207,7 @@ SELECT sum(l_extendedprice*l_discount) AS REVENUE FROM lineitem WHERE l_shipdate -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 3. Extract is not supported @@ -259,7 +259,7 @@ SELECT supp_nation, cust_nation, l_year, SUM(volume) as revenue FROM (SELECT n1. -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- 3. Extract is not supported -- a. Use SUBSTR instead (because our date columns are strings AND SQLite doesn't support EXTRACT) @@ -342,7 +342,7 @@ SELECT nation, o_year, SUM(amount) as sum_profit FROM (SELECT n_name as nation, -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 3. implicit type conversions for arithmetic operations are not supported @@ -408,7 +408,7 @@ SELECT ps_partkey, SUM(ps_supplycost * ps_availqty) as value FROM partsupp, supp -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation SELECT l_shipmode, SUM(case when o_orderpriority ='1-URGENT' or o_orderpriority ='2-HIGH' then 1 else 0 end) as high_line_count, SUM(case when o_orderpriority <> '1-URGENT' AND o_orderpriority <> '2-HIGH' then 1 else 0 end) as low_line_count FROM orders, lineitem WHERE o_orderkey = l_orderkey AND l_shipmode IN ('MAIL','SHIP') AND l_commitdate < l_receiptdate AND l_shipdate < l_commitdate AND l_receiptdate >= '1994-01-01' AND l_receiptdate < '1995-01-01' GROUP BY l_shipmode ORDER BY l_shipmode; @@ -452,7 +452,7 @@ SELECT c_count, count(*) as custdist FROM (SELECT c_custkey, count(o_orderkey) a -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 3. implicit type conversions for arithmetic operations are not supported @@ -487,7 +487,7 @@ SELECT 100.00 * SUM(case when p_type like 'PROMO%' then l_extendedprice*(1.0-l_d -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console -- 2. "revenue[STREAM_ID]" renamed to "revenue" --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation -- 3. implicit type conversions for arithmetic operations are not supported @@ -642,7 +642,7 @@ SELECT SUM(l_extendedprice * (1.0 - l_discount)) as revenue FROM lineitem, "part -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation SELECT s_name, s_address FROM supplier, nation WHERE s_suppkey in (SELECT ps_suppkey FROM partsupp WHERE ps_partkey in (SELECT p_partkey FROM "part" WHERE p_name like 'forest%') AND ps_availqty > (SELECT 0.5 * SUM(l_quantity) FROM lineitem WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey AND l_shipdate >= '1994-01-01' AND l_shipdate < '1995-01-01')) AND s_nationkey = n_nationkey AND n_name = 'CANADA' ORDER BY s_name; @@ -683,7 +683,7 @@ SELECT s_name, s_address FROM supplier, nation WHERE s_suppkey in (SELECT ps_sup -- -- Changes: -- 1. This file contains hard-coded parameters to facilitate copy-and-paste into the console --- 2. dates are not supported +-- 2. SQLite does not support dates and intervals -- a. use strings as data type for now -- b. pre-calculate date operation SELECT s_name, count(*) as numwait FROM supplier, lineitem l1, orders, nation WHERE s_suppkey = l1.l_suppkey AND o_orderkey = l1.l_orderkey AND o_orderstatus = 'F' AND l1.l_receiptdate > l1.l_commitdate AND exists (SELECT * FROM lineitem l2 WHERE l2.l_orderkey = l1.l_orderkey AND l2.l_suppkey <> l1.l_suppkey) AND not exists (SELECT * FROM lineitem l3 WHERE l3.l_orderkey = l1.l_orderkey AND l3.l_suppkey <> l1.l_suppkey AND l3.l_receiptdate > l3.l_commitdate) AND s_nationkey = n_nationkey AND n_name = 'SAUDI ARABIA' GROUP BY s_name ORDER BY numwait DESC, s_name LIMIT 100; diff --git a/src/benchmarklib/jcch/jcch_benchmark_item_runner.cpp b/src/benchmarklib/jcch/jcch_benchmark_item_runner.cpp index 74d545f89d..3f93405911 100644 --- a/src/benchmarklib/jcch/jcch_benchmark_item_runner.cpp +++ b/src/benchmarklib/jcch/jcch_benchmark_item_runner.cpp @@ -4,6 +4,7 @@ #include #include "tpch/tpch_queries.hpp" +#include "utils/date_utils.hpp" #include "utils/string_utils.hpp" #include "utils/timer.hpp" @@ -113,9 +114,11 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be switch (item_id) { // Writing `1-1` to make people aware that this is zero-indexed while TPC-H query names are not case 1 - 1: { - // In some cases, we still need to do the date calculations that Hyrise does not support yet - const auto date = _calculate_date(boost::gregorian::date{1998, 12, 01}, 0, -std::stoi(raw_params_iter->at(0))); - parameters.emplace_back("'"s + date + "'"); + // In some cases, we still need to do the date calculations that SQLite (used for verification) does not + // support yet. When parsing a date, we expect the generator to provide sound date strings and omit checks. + const auto date = date_interval(boost::gregorian::date{1998, 12, 01}, -std::stoi(raw_params_iter->at(0)), + DatetimeComponent::Day); + parameters.emplace_back("'"s + date_to_string(date) + "'"); break; } @@ -135,31 +138,30 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be } case 4 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0)); - const auto end_date_str = _calculate_date(begin_date, 3); + const auto begin_date = *string_to_date(raw_params_iter->at(0)); + const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month); - // Cannot use begin_date here, as we would have to convert it into a string first. parameters.emplace_back("'"s + raw_params_iter->at(0) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } case 5 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(1)); - const auto end_date_str = _calculate_date(begin_date, 12); + const auto begin_date = *string_to_date(raw_params_iter->at(1)); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + raw_params_iter->at(0) + "'"); parameters.emplace_back("'"s + raw_params_iter->at(1) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } case 6 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0)); - const auto end_date_str = _calculate_date(begin_date, 12); + const auto begin_date = *string_to_date(raw_params_iter->at(0)); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + raw_params_iter->at(0) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); parameters.emplace_back(raw_params_iter->at(1)); parameters.emplace_back(raw_params_iter->at(1)); parameters.emplace_back(raw_params_iter->at(2)); @@ -210,13 +212,13 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be } case 12 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(2)); - const auto end_date_str = _calculate_date(begin_date, 12); + const auto begin_date = *string_to_date(raw_params_iter->at(2)); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + raw_params_iter->at(0) + "'"); parameters.emplace_back("'"s + raw_params_iter->at(1) + "'"); parameters.emplace_back("'"s + raw_params_iter->at(2) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } @@ -226,19 +228,19 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be } case 14 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0)); - const auto end_date_str = _calculate_date(begin_date, 1); + const auto begin_date = *string_to_date(raw_params_iter->at(0)); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Month); parameters.emplace_back("'"s + raw_params_iter->at(0) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } case 15 - 1: { auto query_15 = std::string{tpch_queries.at(15)}; - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(0)); - const auto end_date_str = _calculate_date(begin_date, 3); + const auto begin_date = *string_to_date(raw_params_iter->at(0)); + const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month); // Hack: We cannot use prepared statements in TPC-H 15. Thus, we need to build the SQL string by hand. // By manually replacing the `?` from tpch_queries.cpp, we can keep all queries in a readable form there. @@ -249,7 +251,7 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be std::string_view{&query_15[END_DATE_OFFSET], 10} == "1996-04-01"), "TPC-H 15 string has been modified"); query_15.replace(BEGIN_DATE_OFFSET, 10, raw_params_iter->at(0)); - query_15.replace(END_DATE_OFFSET, 10, end_date_str); + query_15.replace(END_DATE_OFFSET, 10, date_to_string(end_date)); const auto view_id = std::atomic_fetch_add(&_q15_view_id, size_t{1}); boost::replace_all(query_15, std::string("revenue_view"), std::string("revenue") + std::to_string(view_id)); @@ -303,12 +305,12 @@ bool JCCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be } case 20 - 1: { - const auto begin_date = boost::gregorian::from_string(raw_params_iter->at(1)); - const auto end_date_str = _calculate_date(begin_date, 12); + const auto begin_date = *string_to_date(raw_params_iter->at(1)); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + raw_params_iter->at(0) + "%'"); parameters.emplace_back("'"s + raw_params_iter->at(1) + "'"); - parameters.emplace_back("'"s + end_date_str + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); parameters.emplace_back("'"s + raw_params_iter->at(2) + "'"); break; } diff --git a/src/benchmarklib/tpch/tpch_benchmark_item_runner.cpp b/src/benchmarklib/tpch/tpch_benchmark_item_runner.cpp index e411d049c3..30734b93a9 100644 --- a/src/benchmarklib/tpch/tpch_benchmark_item_runner.cpp +++ b/src/benchmarklib/tpch/tpch_benchmark_item_runner.cpp @@ -17,6 +17,7 @@ extern "C" { #include "sql/sql_pipeline_builder.hpp" #include "tpch_queries.hpp" #include "utils/assert.hpp" +#include "utils/date_utils.hpp" namespace opossum { @@ -61,16 +62,6 @@ bool TPCHBenchmarkItemRunner::_on_execute_item(const BenchmarkItemID item_id, Be return true; } -std::string TPCHBenchmarkItemRunner::_calculate_date(boost::gregorian::date date, int months, int days) { - date = date + boost::gregorian::months(months) + boost::gregorian::days(days); - - std::stringstream output; - output << static_cast(date.year()) << "-" << std::setw(2) << std::setfill('0') // NOLINT - << static_cast(date.month()) // NOLINT - << "-" << std::setw(2) << std::setfill('0') << static_cast(date.day()); // NOLINT - return output.str(); -} - void TPCHBenchmarkItemRunner::on_tables_loaded() { // Make sure that clustering, indexes, and constraints have made it all the way up to here const auto orders_table = Hyrise::get().storage_manager.get_table("orders"); @@ -150,9 +141,10 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) // Writing `1-1` to make people aware that this is zero-indexed while TPC-H query names are not case 1 - 1: { std::uniform_int_distribution<> date_diff_dist{60, 120}; - const auto date = _calculate_date(boost::gregorian::date{1998, 12, 01}, 0, -date_diff_dist(random_engine)); + const auto date = + date_interval(boost::gregorian::date{1998, 12, 01}, -date_diff_dist(random_engine), DatetimeComponent::Day); - parameters.emplace_back("'"s + date + "'"); + parameters.emplace_back("'"s + date_to_string(date) + "'"); break; } @@ -172,22 +164,23 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) case 3 - 1: { const auto* const segment = c_mseg_set.list[segment_dist(random_engine)].text; std::uniform_int_distribution<> date_diff_dist{0, 30}; - const auto date = _calculate_date(boost::gregorian::date{1995, 03, 01}, 0, date_diff_dist(random_engine)); + const auto date = + date_interval(boost::gregorian::date{1995, 03, 01}, date_diff_dist(random_engine), DatetimeComponent::Day); parameters.emplace_back("'"s + segment + "'"); - parameters.emplace_back("'"s + date + "'"); - parameters.emplace_back("'"s + date + "'"); + parameters.emplace_back("'"s + date_to_string(date) + "'"); + parameters.emplace_back("'"s + date_to_string(date) + "'"); break; } case 4 - 1: { std::uniform_int_distribution<> date_diff_dist{0, 4 * 12 + 9}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff + 3); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Month); + const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } @@ -196,20 +189,20 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) std::uniform_int_distribution<> date_diff_dist{0, 4}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff * 12); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, (diff + 1) * 12); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Year); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + region + "'"); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } case 6 - 1: { std::uniform_int_distribution<> date_diff_dist{0, 4}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff * 12); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, (diff + 1) * 12); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Year); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); static std::uniform_int_distribution<> discount_dist{2, 9}; const auto discount = 0.01f * static_cast(discount_dist(random_engine)); @@ -217,8 +210,8 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) std::uniform_int_distribution<> quantity_dist{24, 25}; const auto quantity = quantity_dist(random_engine); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); parameters.emplace_back(std::to_string(discount)); parameters.emplace_back(std::to_string(discount)); parameters.emplace_back(std::to_string(quantity)); @@ -275,11 +268,11 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) case 10 - 1: { std::uniform_int_distribution<> date_diff_dist{0, 23}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, (diff + 3)); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Month); + const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } @@ -302,13 +295,13 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) std::uniform_int_distribution<> date_diff_dist{0, 4}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff * 12); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, (diff + 1) * 12); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Year); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); parameters.emplace_back("'"s + shipmode1 + "'"); parameters.emplace_back("'"s + shipmode2 + "'"); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } @@ -325,11 +318,11 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) case 14 - 1: { std::uniform_int_distribution<> date_diff_dist{0, 5 * 12}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff + 1); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Month); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Month); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); break; } @@ -338,8 +331,8 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) std::uniform_int_distribution<> date_diff_dist{0, 4 * 12 + 9}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff + 3); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Month); + const auto end_date = date_interval(begin_date, 3, DatetimeComponent::Month); // Hack: We cannot use prepared statements in TPC-H 15. Thus, we need to build the SQL string by hand. // By manually replacing the `?` from tpch_queries.cpp, we can keep all queries in a readable form there. @@ -349,8 +342,8 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) DebugAssert((std::string_view{&query_15[BEGIN_DATE_OFFSET], 10} == "1996-01-01" && std::string_view{&query_15[END_DATE_OFFSET], 10} == "1996-04-01"), "TPC-H 15 string has been modified"); - query_15.replace(BEGIN_DATE_OFFSET, 10, begin_date); - query_15.replace(END_DATE_OFFSET, 10, end_date); + query_15.replace(BEGIN_DATE_OFFSET, 10, date_to_string(begin_date)); + query_15.replace(END_DATE_OFFSET, 10, date_to_string(end_date)); const auto view_id = std::atomic_fetch_add(&_q15_view_id, size_t{1}); boost::replace_all(query_15, std::string("revenue_view"), std::string("revenue") + std::to_string(view_id)); @@ -419,13 +412,13 @@ std::string TPCHBenchmarkItemRunner::_build_query(const BenchmarkItemID item_id) const auto* const color = colors.list[color_dist(random_engine)].text; std::uniform_int_distribution<> date_diff_dist{0, 4}; const auto diff = date_diff_dist(random_engine); - const auto begin_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, diff * 12); - const auto end_date = _calculate_date(boost::gregorian::date{1993, 01, 01}, (diff + 1) * 12); + const auto begin_date = date_interval(boost::gregorian::date{1993, 01, 01}, diff, DatetimeComponent::Year); + const auto end_date = date_interval(begin_date, 1, DatetimeComponent::Year); const auto* const nation = nations.list[nation_dist(random_engine)].text; parameters.emplace_back("'"s + color + "%'"); - parameters.emplace_back("'"s + begin_date + "'"); - parameters.emplace_back("'"s + end_date + "'"); + parameters.emplace_back("'"s + date_to_string(begin_date) + "'"); + parameters.emplace_back("'"s + date_to_string(end_date) + "'"); parameters.emplace_back("'"s + nation + "'"); break; } diff --git a/src/benchmarklib/tpch/tpch_benchmark_item_runner.hpp b/src/benchmarklib/tpch/tpch_benchmark_item_runner.hpp index 6e03fd46b7..2e3ad8878d 100644 --- a/src/benchmarklib/tpch/tpch_benchmark_item_runner.hpp +++ b/src/benchmarklib/tpch/tpch_benchmark_item_runner.hpp @@ -2,8 +2,6 @@ #include -#include - #include "abstract_benchmark_item_runner.hpp" #include "tpch_constants.hpp" @@ -34,9 +32,6 @@ class TPCHBenchmarkItemRunner : public AbstractBenchmarkItemRunner { // Runs the PREPARE queries if _use_prepared_statements is set, otherwise does nothing void _prepare_queries() const; - // Adds (or subtracts) specified number of months and days - static std::string _calculate_date(boost::gregorian::date date, int months, int days = 0); - // Returns an SQL query with random parameters for a given (zero-indexed) benchmark item (i.e., 0 -> TPC-H 1) std::string _build_query(const BenchmarkItemID item_id); diff --git a/src/benchmarklib/tpch/tpch_queries.cpp b/src/benchmarklib/tpch/tpch_queries.cpp index 6e9f21321c..7d3a5a0ecf 100644 --- a/src/benchmarklib/tpch/tpch_queries.cpp +++ b/src/benchmarklib/tpch/tpch_queries.cpp @@ -28,7 +28,7 @@ namespace { * l_returnflag, l_linestatus * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -128,7 +128,7 @@ const char* const tpch_query_3 = * ORDER BY o_orderpriority; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -169,7 +169,7 @@ const char* const tpch_query_4 = * revenue DESC; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -193,7 +193,7 @@ const char* const tpch_query_5 = * AND L_DISCOUNT BETWEEN [DISCOUNT] - 0.01 AND [DISCOUNT] + 0.01 AND L_QUANTITY < [QUANTITY] * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation * 2. ".06 + 0.01" is less than "0.07" in sqlite, but >= "0.07" in hyrise. @@ -235,10 +235,10 @@ const char* const tpch_query_6 = * ORDER BY supp_nation, cust_nation, l_year; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation - * 2. Extract is not supported + * 2. SQLite does not support extract * a. Use SUBSTR instead (because our date columns are strings AND SQLite doesn't support EXTRACT) */ const char* const tpch_query_7 = @@ -317,7 +317,7 @@ const char* const tpch_query_7 = * ORDER BY o_year; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * 2. Extract is not supported * a. Use SUBSTR instead (because our date columns are strings AND SQLite doesn't support EXTRACT) @@ -411,7 +411,7 @@ const char* const tpch_query_9 = * The limit is not part of the printed query but hidden in the specification text. * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -484,7 +484,7 @@ const char* const tpch_query_11 = * ORDER BY l_shipmode; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -539,7 +539,7 @@ const char* const tpch_query_13 = * AND l_shipdate < date '[DATE]' + interval '1' month; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -575,7 +575,7 @@ const char* const tpch_query_14 = * * Changes: * 1. "revenue[STREAM_ID]" renamed to "revenue_view" - * 2. dates are not supported + * 2. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -746,7 +746,7 @@ const char* const tpch_query_19 = * ORDER BY s_name; * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation */ @@ -792,7 +792,7 @@ const char* const tpch_query_20 = * The limit is not part of the printed query but hidden in the specification text. * * Changes: - * 1. dates are not supported + * 1. SQLite does not support dates and intervals * a. use strings as data type for now * b. pre-calculate date operation diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 92dc84e3cb..15147d9008 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -61,6 +61,8 @@ set( expression/function_expression.hpp expression/in_expression.cpp expression/in_expression.hpp + expression/interval_expression.cpp + expression/interval_expression.hpp expression/is_null_expression.cpp expression/is_null_expression.hpp expression/list_expression.cpp @@ -564,6 +566,8 @@ set( utils/column_ids_after_pruning.cpp utils/column_ids_after_pruning.hpp utils/copyable_atomic.hpp + utils/date_utils.cpp + utils/date_utils.hpp utils/enum_constant.hpp utils/format_bytes.cpp utils/format_bytes.hpp @@ -652,6 +656,7 @@ set( ${Boost_CONTAINER_LIBRARY} ${Boost_SYSTEM_LIBRARY} ${Boost_THREAD_LIBRARY} + ${Boost_DATE_TIME_LIBRARY} ${TBB_LIBRARY} ${SQLITE3_LIBRARY} ${CMAKE_DL_LIBS} diff --git a/src/lib/all_type_variant.hpp b/src/lib/all_type_variant.hpp index 62796a6c37..ffac6d6aa2 100644 --- a/src/lib/all_type_variant.hpp +++ b/src/lib/all_type_variant.hpp @@ -32,7 +32,7 @@ namespace hana = boost::hana; namespace detail { // clang-format off -#define DATA_TYPE_INFO \ +#define DATA_TYPE_INFO \ ((int32_t, Int, "int")) \ ((int64_t, Long, "long")) \ ((float, Float, "float")) \ diff --git a/src/lib/expression/abstract_expression.hpp b/src/lib/expression/abstract_expression.hpp index 724430728a..fd05dfe4fe 100644 --- a/src/lib/expression/abstract_expression.hpp +++ b/src/lib/expression/abstract_expression.hpp @@ -25,6 +25,7 @@ enum class ExpressionType { Exists, Extract, Function, + Interval, List, Logical, Placeholder, diff --git a/src/lib/expression/evaluation/expression_evaluator.cpp b/src/lib/expression/evaluation/expression_evaluator.cpp index d78bde1e53..25e0fa3ac3 100644 --- a/src/lib/expression/evaluation/expression_evaluator.cpp +++ b/src/lib/expression/evaluation/expression_evaluator.cpp @@ -27,6 +27,7 @@ #include "expression_functors.hpp" #include "hyrise.hpp" #include "like_matcher.hpp" +#include "lossy_cast.hpp" #include "operators/abstract_operator.hpp" #include "resolve_type.hpp" #include "scheduler/operator_task.hpp" @@ -227,6 +228,9 @@ std::shared_ptr> ExpressionEvaluator::evaluate_expressi Fail( "Can't evaluate an expressions still containing placeholders. Are you trying to execute a PreparedPlan " "without instantiating it first?"); + + case ExpressionType::Interval: + Fail("IntervalExpression should have been resolved by SQLTranslator"); } // Store the result in the cache @@ -664,6 +668,13 @@ std::shared_ptr> ExpressionEvaluator::_evaluate_case_ex template std::shared_ptr> ExpressionEvaluator::_evaluate_cast_expression( const CastExpression& cast_expression) { + Assert(cast_expression.data_type() != DataType::Null, "Cast as NULL is undefined"); + resolve_data_type(cast_expression.data_type(), [](auto type) { + using CastDataType = typename decltype(type)::type; + if constexpr (!std::is_same_v) { + Fail("Cast data types are ambiguous"); + } + }); /** * Implements SQL's CAST with the following semantics * Float/Double -> Int/Long: Value gets floor()ed @@ -679,35 +690,23 @@ std::shared_ptr> ExpressionEvaluator::_evaluate_cast_ex using ArgumentDataType = typename std::decay_t::Type; const auto result_size = _result_size(argument_result.size()); - values.resize(result_size); for (auto chunk_offset = ChunkOffset{0}; chunk_offset < static_cast(result_size); ++chunk_offset) { const auto& argument_value = argument_result.value(chunk_offset); - // NOLINTNEXTLINE(bugprone-branch-clone) - if constexpr (std::is_same_v || std::is_same_v) { - // " to Null" cast. Do nothing, this is handled by the `nulls` vector - } else if constexpr (std::is_same_v) { - // " to String" cast. Sould never fail, thus boost::lexical_cast (which throws on error) is fine - values[chunk_offset] = boost::lexical_cast(argument_value); - } else { - if constexpr (std::is_same_v) { - // "String to Numeric" cast - // Same as in SQLite, an illegal conversion (e.g. CAST("Hello" AS INT)) yields zero - // Does NOT use boost::lexical_cast() as that would throw on error - and we do not do the - // exception-as-flow-control thing. - if (!boost::conversion::try_lexical_convert(argument_value, values[chunk_offset])) { - values[chunk_offset] = 0; - } - } else { - // "Numeric to Numeric" cast. Use static_cast<> as boost::conversion::try_lexical_convert() would fail for - // CAST(5.5 AS INT) - values[chunk_offset] = static_cast(argument_value); + // "NULL to " cast is handled by the `nulls` vector + if constexpr (!std::is_same_v) { + try { + values[chunk_offset] = *lossy_variant_cast(argument_value); + } catch (boost::bad_lexical_cast&) { + std::stringstream error_message; + error_message << "Cannot cast '" << argument_value << "' as " + << magic_enum::enum_name(cast_expression.data_type()); + Fail(error_message.str()); } } } - nulls = argument_result.nulls; }); diff --git a/src/lib/expression/expression_functional.cpp b/src/lib/expression/expression_functional.cpp index 5f8f0569a7..4219e78ecc 100644 --- a/src/lib/expression/expression_functional.cpp +++ b/src/lib/expression/expression_functional.cpp @@ -45,6 +45,10 @@ std::shared_ptr exists_(const std::shared_ptr not_exists_(const std::shared_ptr& subquery_expression) { // NOLINT - clang-tidy doesn't like the suffix return std::make_shared(subquery_expression, ExistsExpressionType::NotExists); } + +std::shared_ptr interval_(const int64_t duration, const DatetimeComponent unit) { // NOLINT - clang-tidy doesn't like the suffix + return std::make_shared(duration, unit); +} // clang-format on } // namespace opossum::expression_functional diff --git a/src/lib/expression/expression_functional.hpp b/src/lib/expression/expression_functional.hpp index 1837039dcd..bb9a460deb 100644 --- a/src/lib/expression/expression_functional.hpp +++ b/src/lib/expression/expression_functional.hpp @@ -14,6 +14,7 @@ #include "extract_expression.hpp" #include "function_expression.hpp" #include "in_expression.hpp" +#include "interval_expression.hpp" #include "is_null_expression.hpp" #include "list_expression.hpp" #include "logical_expression.hpp" @@ -47,7 +48,7 @@ * const auto case_a_eq_1234 = std::make_shared(a_eq_1234, int_float_a_expression, null_value); * const auto case_a_eq_123 = std::make_shared(a_eq_123, int_float_b_expression, case_a_eq_1234); * - * ...and I think that's beautiful. + * ...and I think that's beautiful. */ namespace opossum { @@ -242,6 +243,8 @@ std::shared_ptr cast_(const Argument& argument, const DataType d return std::make_shared(to_expression(argument), data_type); } +std::shared_ptr interval_(const int64_t duration, const DatetimeComponent unit); + } // namespace expression_functional } // namespace opossum diff --git a/src/lib/expression/expression_utils.cpp b/src/lib/expression/expression_utils.cpp index 1d654cfbb9..6bc95804ee 100644 --- a/src/lib/expression/expression_utils.cpp +++ b/src/lib/expression/expression_utils.cpp @@ -6,6 +6,7 @@ #include "expression_functional.hpp" #include "logical_expression.hpp" +#include "lossy_cast.hpp" #include "lqp_column_expression.hpp" #include "lqp_subquery_expression.hpp" #include "operators/abstract_operator.hpp" @@ -300,9 +301,30 @@ std::optional expression_get_value_or_parameter(const AbstractEx return *correlated_parameter_expression->value(); } else if (expression.type == ExpressionType::Value) { return static_cast(expression).value; - } else { - return std::nullopt; + } else if (expression.type == ExpressionType::Cast) { + const auto& cast_expression = static_cast(expression); + Assert(expression.data_type() != DataType::Null, "Cast as NULL is undefined"); + // More complicated casts should be resolved by ExpressionEvaluator. + // E.g., CAST(any_column AS INT) cannot and should not be evaluated here. + if (cast_expression.argument()->type != ExpressionType::Value) return std::nullopt; + const auto& value_expression = static_cast(*cast_expression.argument()); + + // Casts from NULL are NULL + if (variant_is_null(value_expression.value)) return NULL_VALUE; + std::optional result; + resolve_data_type(expression.data_type(), [&](auto type) { + using TargetDataType = typename decltype(type)::type; + try { + // lossy_variant_cast returns std::nullopt when it casts from a NULL value. We have handled this above. + result = *lossy_variant_cast(value_expression.value); + } catch (boost::bad_lexical_cast&) { + Fail("Cannot cast " + cast_expression.argument()->as_column_name() + " as " + + std::string{magic_enum::enum_name(expression.data_type())}); + } + }); + return result; } + return std::nullopt; } std::vector> find_pqp_subquery_expressions( diff --git a/src/lib/expression/extract_expression.hpp b/src/lib/expression/extract_expression.hpp index caa7d4e679..b7345aded1 100644 --- a/src/lib/expression/extract_expression.hpp +++ b/src/lib/expression/extract_expression.hpp @@ -6,8 +6,6 @@ namespace opossum { -enum class DatetimeComponent { Year, Month, Day, Hour, Minute, Second }; - std::ostream& operator<<(std::ostream& stream, const DatetimeComponent datetime_component); /** diff --git a/src/lib/expression/interval_expression.cpp b/src/lib/expression/interval_expression.cpp new file mode 100644 index 0000000000..501485bc7e --- /dev/null +++ b/src/lib/expression/interval_expression.cpp @@ -0,0 +1,38 @@ +#include "interval_expression.hpp" + +#include + +#include + +namespace opossum { + +IntervalExpression::IntervalExpression(const int64_t init_duration, const DatetimeComponent init_unit) + : AbstractExpression(ExpressionType::Interval, {}), duration(init_duration), unit(init_unit) {} + +DataType IntervalExpression::data_type() const { return DataType::String; } + +std::shared_ptr IntervalExpression::_on_deep_copy( + std::unordered_map>& copied_ops) const { + return std::make_shared(duration, unit); +} + +std::string IntervalExpression::description(const DescriptionMode mode) const { + std::stringstream stream; + stream << "INTERVAL '" << duration << "' " << magic_enum::enum_name(unit); + return stream.str(); +} + +bool IntervalExpression::_shallow_equals(const AbstractExpression& expression) const { + DebugAssert(dynamic_cast(&expression), + "Different expression type should have been caught by AbstractExpression::operator=="); + const auto& other_interval_expression = static_cast(expression); + return duration == other_interval_expression.duration && unit == other_interval_expression.unit; +} + +size_t IntervalExpression::_shallow_hash() const { + auto hash = boost::hash_value(static_cast(unit)); + boost::hash_combine(hash, duration); + return hash; +} + +} // namespace opossum diff --git a/src/lib/expression/interval_expression.hpp b/src/lib/expression/interval_expression.hpp new file mode 100755 index 0000000000..66abf5b73f --- /dev/null +++ b/src/lib/expression/interval_expression.hpp @@ -0,0 +1,31 @@ +#pragma once + +#include + +#include "abstract_expression.hpp" + +namespace opossum { + +/** + * SQL's INTERVAL + */ +class IntervalExpression : public AbstractExpression { + public: + IntervalExpression(const int64_t init_duration, const DatetimeComponent init_unit); + + std::shared_ptr _on_deep_copy( + std::unordered_map>& copied_ops) const override; + std::string description(const DescriptionMode mode) const override; + + DataType data_type() const override; + + const int64_t duration; + + const DatetimeComponent unit; + + protected: + bool _shallow_equals(const AbstractExpression& expression) const override; + size_t _shallow_hash() const override; +}; + +} // namespace opossum diff --git a/src/lib/operators/sort.cpp b/src/lib/operators/sort.cpp index 8f79c1d8ae..aa7d858a7f 100644 --- a/src/lib/operators/sort.cpp +++ b/src/lib/operators/sort.cpp @@ -391,7 +391,7 @@ class Sort::SortImpl { // NULLs come before all values. The SQL standard allows for this to be implementation-defined. We used to have // a NULLS LAST mode, but never used it over multiple years. Different databases have different behaviors, and // storing NULLs first even for descending orders is somewhat uncommon: - // https://docs.mendix.com/refguide/null-ordering-behavior + // https://docs.mendix.com/refguide/ordering-behavior#null-ordering-behavior // For Hyrise, we found that storing NULLs first is the method that requires the least amount of code. _row_id_value_vector.insert(_row_id_value_vector.begin(), _null_value_rows.begin(), _null_value_rows.end()); } diff --git a/src/lib/sql/sql_translator.cpp b/src/lib/sql/sql_translator.cpp index 82c50d43e0..cf22cb4c11 100644 --- a/src/lib/sql/sql_translator.cpp +++ b/src/lib/sql/sql_translator.cpp @@ -22,6 +22,7 @@ #include "expression/expression_functional.hpp" #include "expression/expression_utils.hpp" #include "expression/in_expression.hpp" +#include "expression/interval_expression.hpp" #include "expression/is_null_expression.hpp" #include "expression/list_expression.hpp" #include "expression/logical_expression.hpp" @@ -60,6 +61,7 @@ #include "logical_query_plan/validate_node.hpp" #include "storage/lqp_view.hpp" #include "storage/table.hpp" +#include "utils/date_utils.hpp" #include "utils/meta_table_manager.hpp" #include "SQLParser.h" @@ -103,6 +105,12 @@ const std::unordered_map order_type_to_sort_mode = { {hsql::kOrderDesc, SortMode::Descending}, }; +const std::unordered_map supported_hsql_data_types = { + {hsql::DataType::INT, DataType::Int}, {hsql::DataType::LONG, DataType::Long}, + {hsql::DataType::FLOAT, DataType::Float}, {hsql::DataType::DOUBLE, DataType::Double}, + {hsql::DataType::TEXT, DataType::String}, +}; + JoinMode translate_join_mode(const hsql::JoinType join_type) { static const std::unordered_map join_type_to_mode = { {hsql::kJoinInner, JoinMode::Inner}, {hsql::kJoinFull, JoinMode::FullOuter}, {hsql::kJoinLeft, JoinMode::Left}, @@ -726,7 +734,7 @@ std::shared_ptr SQLTranslator::_translate_stored_table( // Publish the columns of the table in the SQLIdentifierResolver for (auto column_id = ColumnID{0}; column_id < table->column_count(); ++column_id) { const auto& column_definition = table->column_definitions()[column_id]; - const auto column_expression = std::make_shared(stored_table_node, column_id); + const auto column_expression = lqp_column_(stored_table_node, column_id); sql_identifier_resolver->add_column_name(column_expression, column_definition.name); sql_identifier_resolver->set_table_name(column_expression, name); } @@ -759,7 +767,7 @@ std::shared_ptr SQLTranslator::_translate_meta_table( // Publish the columns of the table in the SQLIdentifierResolver for (auto column_id = ColumnID{0}; column_id < meta_table->column_count(); ++column_id) { const auto& column_definition = meta_table->column_definitions()[column_id]; - const auto column_expression = std::make_shared(static_table_node, column_id); + const auto column_expression = lqp_column_(static_table_node, column_id); sql_identifier_resolver->add_column_name(column_expression, column_definition.name); sql_identifier_resolver->set_table_name(column_expression, name); } @@ -892,8 +900,7 @@ SQLTranslator::TableSourceState SQLTranslator::_translate_natural_join(const hsq if (left_expression) { // Two columns match, let's join on them. - join_predicates.emplace_back( - std::make_shared(PredicateCondition::Equals, left_expression, right_expression)); + join_predicates.emplace_back(equals_(left_expression, right_expression)); continue; } @@ -958,6 +965,8 @@ std::vector SQLTranslator::_translate_select_l for (const auto& hsql_select_expr : select_list) { if (hsql_select_expr->type == hsql::kExprStar) { select_list_elements.emplace_back(SelectListElement{nullptr}); + } else if (hsql_select_expr->type == hsql::kExprLiteralInterval) { + FailInput("Interval can only be added to or substracted from a date"); } else { auto expression = _translate_hsql_expr(*hsql_select_expr, _sql_identifier_resolver); select_list_elements.emplace_back(SelectListElement{expression}); @@ -1239,8 +1248,7 @@ std::shared_ptr SQLTranslator::_translate_show(const hsql::Show const auto columns_meta_table = Hyrise::get().meta_table_manager.generate_table("columns"); const auto static_table_node = StaticTableNode::make(columns_meta_table); const auto table_name_column = lqp_column_(static_table_node, ColumnID{0}); - const auto predicate = std::make_shared(PredicateCondition::Equals, table_name_column, - value_(show_statement.name)); + const auto predicate = equals_(table_name_column, value_(show_statement.name)); return PredicateNode::make(predicate, static_table_node); } } @@ -1316,7 +1324,7 @@ std::shared_ptr SQLTranslator::_translate_create_table(const hs case hsql::DataType::CHAR: case hsql::DataType::VARCHAR: case hsql::DataType::TEXT: - // Ignoring the length of CHAR and VARCHAR columns for now as Hyrise as no way of working with these + // Ignoring the length of CHAR and VARCHAR columns for now as Hyrise has no way of working with these column_definition.data_type = DataType::String; break; case hsql::DataType::DATE: @@ -1474,6 +1482,14 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( const auto left = expr.expr ? _translate_hsql_expr(*expr.expr, sql_identifier_resolver) : nullptr; const auto right = expr.expr2 ? _translate_hsql_expr(*expr.expr2, sql_identifier_resolver) : nullptr; + if (left) { + AssertInput(left->type != ExpressionType::Interval, "IntervalExpression must follow another expression"); + } + if (right && right->type == ExpressionType::Interval) { + AssertInput(expr.type == hsql::kExprOperator && (expr.opType == hsql::kOpPlus || expr.opType == hsql::kOpMinus), + "Intervals can only be added or substracted"); + } + switch (expr.type) { case hsql::kExprColumnRef: { const auto table_name = expr.table ? std::optional(std::string(expr.table)) : std::nullopt; @@ -1490,35 +1506,40 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( } case hsql::kExprLiteralFloat: - return std::make_shared(expr.fval); + return value_(expr.fval); case hsql::kExprLiteralString: AssertInput(expr.name, "No value given for string literal"); - return std::make_shared(pmr_string{name}); + return value_(pmr_string{name}); case hsql::kExprLiteralInt: if (static_cast(expr.ival) == expr.ival) { - return std::make_shared(static_cast(expr.ival)); + return value_(static_cast(expr.ival)); } else { - return std::make_shared(expr.ival); + return value_(expr.ival); } case hsql::kExprLiteralNull: - return std::make_shared(NullValue{}); + return null_(); + + case hsql::kExprLiteralDate: { + const auto date = string_to_date(name); + if (date) return value_(pmr_string{name}); + FailInput("'" + name + "' is not a valid date"); + } case hsql::kExprParameter: { Assert(expr.ival >= 0 && expr.ival <= std::numeric_limits::max(), "ValuePlaceholderID out of range"); auto value_placeholder_id = ValuePlaceholderID{static_cast(expr.ival)}; - return std::make_shared( - _parameter_id_allocator->allocate_for_value_placeholder(value_placeholder_id)); + return placeholder_(_parameter_id_allocator->allocate_for_value_placeholder(value_placeholder_id)); } case hsql::kExprExtract: { Assert(expr.datetimeField != hsql::kDatetimeNone, "No DatetimeField specified in EXTRACT. Bug in sqlparser?"); auto datetime_component = hsql_datetime_field.at(expr.datetimeField); - return std::make_shared(datetime_component, left); + return extract_(datetime_component, left); } case hsql::kExprFunctionRef: { @@ -1577,7 +1598,7 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( }); Assert(leaf_node, "No leaf node found below COUNT(*)"); - const auto column_expression = std::make_shared(leaf_node, INVALID_COLUMN_ID); + const auto column_expression = lqp_column_(leaf_node, INVALID_COLUMN_ID); aggregate_expression = std::make_shared(aggregate_function, column_expression); } else { @@ -1639,7 +1660,24 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( const auto arithmetic_operators_iter = hsql_arithmetic_operators.find(expr.opType); if (arithmetic_operators_iter != hsql_arithmetic_operators.end()) { Assert(left && right, "Unexpected SQLParserResult. Didn't receive two arguments for binary expression."); - return std::make_shared(arithmetic_operators_iter->second, left, right); + const auto arithmetic_operator = arithmetic_operators_iter->second; + + // Handle intervals + if (right->type == ExpressionType::Interval) { + AssertInput(left->type == ExpressionType::Value && left->data_type() == DataType::String, + "Interval can only be applied to ValueExpression with String value"); + const auto start_date_string = + std::string{boost::get(static_cast(*left).value)}; + const auto start_date = string_to_date(start_date_string); + AssertInput(start_date, "'" + start_date_string + "' is not a valid date"); + const auto& interval_expression = static_cast(*right); + // We already ensured to have either Addition or Substraction right at the beginning + const auto duration = arithmetic_operator == ArithmeticOperator::Addition ? interval_expression.duration + : -interval_expression.duration; + const auto end_date = date_interval(*start_date, duration, interval_expression.unit); + return value_(pmr_string{date_to_string(end_date)}); + } + return std::make_shared(arithmetic_operator, left, right); } // Translate PredicateExpression @@ -1652,28 +1690,26 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( return std::make_shared(predicate_condition, left, right); } else if (predicate_condition == PredicateCondition::BetweenInclusive) { Assert(expr.exprList && expr.exprList->size() == 2, "Expected two arguments for BETWEEN"); - return std::make_shared( - PredicateCondition::BetweenInclusive, left, - _translate_hsql_expr(*(*expr.exprList)[0], sql_identifier_resolver), - _translate_hsql_expr(*(*expr.exprList)[1], sql_identifier_resolver)); + return between_inclusive_(left, _translate_hsql_expr(*(*expr.exprList)[0], sql_identifier_resolver), + _translate_hsql_expr(*(*expr.exprList)[1], sql_identifier_resolver)); } } // Translate other expression types that can be expected at this point switch (expr.opType) { case hsql::kOpUnaryMinus: - return std::make_shared(left); + return unary_minus_(left); case hsql::kOpCase: return _translate_hsql_case(expr, sql_identifier_resolver); case hsql::kOpOr: - return std::make_shared(LogicalOperator::Or, left, right); + return or_(left, right); case hsql::kOpAnd: - return std::make_shared(LogicalOperator::And, left, right); + return and_(left, right); case hsql::kOpIn: { if (expr.select) { // `a IN (SELECT ...)` const auto subquery = _translate_hsql_subquery(*expr.select, sql_identifier_resolver); - return std::make_shared(PredicateCondition::In, left, subquery); + return in_(left, subquery); } else { // `a IN (x, y, z)` @@ -1687,7 +1723,7 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( } const auto array = std::make_shared(arguments); - return std::make_shared(PredicateCondition::In, left, array); + return in_(left, array); } } @@ -1699,8 +1735,7 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( case hsql::kOpExists: AssertInput(expr.select, "Expected SELECT argument for EXISTS"); - return std::make_shared(_translate_hsql_subquery(*expr.select, sql_identifier_resolver), - ExistsExpressionType::Exists); + return exists_(_translate_hsql_subquery(*expr.select, sql_identifier_resolver)); default: Fail("Unexpected expression type"); // There are 19 of these, so we make an exception here and use default @@ -1722,8 +1757,33 @@ std::shared_ptr SQLTranslator::_translate_hsql_expr( case hsql::kExprHint: FailInput("Hints are not yet supported"); - case hsql::kExprCast: - FailInput("Explicit casts are not yet supported"); + case hsql::kExprCast: { + const auto source_data_type = left->data_type(); + if (expr.columnType.data_type == hsql::DataType::DATE) { + AssertInput(source_data_type == DataType::String, "Cannot cast " + left->as_column_name() + " as Date"); + // We do not know if an expression to be casted other than a ValueExpression actually contains dates, and we + // cannot check this later due to the lack of a Date data type + AssertInput(left->type == ExpressionType::Value, "Only ValueExpressions can be casted as Date"); + const auto date_string = boost::get(static_cast(*left).value); + const auto date = string_to_date(std::string{date_string}); + if (date) return std::const_pointer_cast(left); + FailInput("'" + std::string{date_string} + "' is not a valid date"); + } + const auto data_type_iter = supported_hsql_data_types.find(expr.columnType.data_type); + AssertInput(data_type_iter != supported_hsql_data_types.cend(), + "CAST as " + std::string{magic_enum::enum_name(expr.columnType.data_type)} + " is not supported"); + const auto target_data_type = data_type_iter->second; + // Omit redundant casts + if (source_data_type == target_data_type) return std::const_pointer_cast(left); + return cast_(left, target_data_type); + } + + case hsql::kExprLiteralInterval: { + const auto unit = hsql_datetime_field.at(expr.datetimeField); + AssertInput(unit == DatetimeComponent::Day || unit == DatetimeComponent::Month || unit == DatetimeComponent::Year, + "Only date intervals are supported yet"); + return interval_(expr.ival, unit); + } } Fail("Invalid enum value"); } @@ -1777,7 +1837,7 @@ std::shared_ptr SQLTranslator::_translate_hsql_case( current_case_expression = _translate_hsql_expr(*expr.expr2, sql_identifier_resolver); } else { // No ELSE specified, use NULL - current_case_expression = std::make_shared(NullValue{}); + current_case_expression = null_(); } for (auto case_reverse_idx = size_t{0}; case_reverse_idx < expr.exprList->size(); ++case_reverse_idx) { @@ -1786,11 +1846,11 @@ std::shared_ptr SQLTranslator::_translate_hsql_case( auto when = _translate_hsql_expr(*case_clause->expr, sql_identifier_resolver); if (simple_case_left_operand) { - when = std::make_shared(PredicateCondition::Equals, simple_case_left_operand, when); + when = equals_(simple_case_left_operand, when); } const auto then = _translate_hsql_expr(*case_clause->expr2, sql_identifier_resolver); - current_case_expression = std::make_shared(when, then, current_case_expression); + current_case_expression = case_(when, then, current_case_expression); } return current_case_expression; diff --git a/src/lib/types.hpp b/src/lib/types.hpp index d371c2f57f..808199f0c3 100644 --- a/src/lib/types.hpp +++ b/src/lib/types.hpp @@ -240,6 +240,8 @@ enum class AutoCommit : bool { Yes = true, No = false }; enum class LogLevel { Debug, Info, Warning }; +enum class DatetimeComponent { Year, Month, Day, Hour, Minute, Second }; + // Used as a template parameter that is passed whenever we conditionally erase the type of a template. This is done to // reduce the compile time at the cost of the runtime performance. Examples are iterators, which are replaced by // AnySegmentIterators that use virtual method calls. diff --git a/src/lib/utils/date_utils.cpp b/src/lib/utils/date_utils.cpp new file mode 100644 index 0000000000..cdbb0d919f --- /dev/null +++ b/src/lib/utils/date_utils.cpp @@ -0,0 +1,43 @@ +#include "date_utils.hpp" + +#include + +namespace opossum { + +std::optional string_to_date(const std::string& date_string) { + try { + const auto date = boost::gregorian::from_string(date_string); + if (date.is_not_a_date()) return std::nullopt; + return date; + } catch (const boost::wrapexcept&) { + } catch (const boost::wrapexcept&) { + } catch (const boost::wrapexcept&) { + } catch (const boost::wrapexcept&) { + } + return std::nullopt; +} + +boost::gregorian::date date_interval(const boost::gregorian::date& start_date, int64_t offset, DatetimeComponent unit) { + switch (unit) { + case DatetimeComponent::Year: { + const boost::date_time::year_functor interval(offset); + return start_date + interval.get_offset(start_date); + } + case DatetimeComponent::Month: { + const boost::date_time::month_functor interval(offset); + return start_date + interval.get_offset(start_date); + } + case DatetimeComponent::Day: { + const boost::date_time::day_functor interval(offset); + return start_date + interval.get_offset(start_date); + } + default: + Fail("Invalid time unit for date interval: " + std::string{magic_enum::enum_name(unit)}); + } +} + +std::string date_to_string(const boost::gregorian::date& date) { + return boost::gregorian::to_iso_extended_string(date); +} + +} // namespace opossum diff --git a/src/lib/utils/date_utils.hpp b/src/lib/utils/date_utils.hpp new file mode 100755 index 0000000000..f5bac18625 --- /dev/null +++ b/src/lib/utils/date_utils.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "types.hpp" + +#include + +namespace opossum { + +/** + * Takes delimited date string with order year-month-day (ISO 8601 extended format), e.g., "2001-01-01". + * Notably, Boost's gregorian dates do not support years < 1400 or > 9999. + */ +std::optional string_to_date(const std::string& date_string); + +/** + * This also handles edge cases with days that are the end of a month. + * E.g., March 31 + one month == April 30, and vice versa. + * This also applies to leap years. + */ +boost::gregorian::date date_interval(const boost::gregorian::date& start_date, int64_t offset, DatetimeComponent unit); + +// ISO 8601 extended format representation of the date +std::string date_to_string(const boost::gregorian::date& date); + +} // namespace opossum diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 460e33a592..4b99627f28 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -204,6 +204,7 @@ set( lib/storage/value_segment_test.cpp lib/tasks/chunk_compression_task_test.cpp lib/utils/check_table_equal_test.cpp + lib/utils/date_utils_test.cpp lib/utils/column_ids_after_pruning_test.cpp lib/utils/format_bytes_test.cpp lib/utils/format_duration_test.cpp diff --git a/src/test/lib/expression/expression_evaluator_to_values_test.cpp b/src/test/lib/expression/expression_evaluator_to_values_test.cpp index 86f6308851..1a621a4cd4 100644 --- a/src/test/lib/expression/expression_evaluator_to_values_test.cpp +++ b/src/test/lib/expression/expression_evaluator_to_values_test.cpp @@ -794,9 +794,14 @@ TEST_F(ExpressionEvaluatorToValuesTest, CastLiterals) { EXPECT_TRUE(test_expression(*cast_(5.5, DataType::String), {"5.5"})); EXPECT_TRUE(test_expression(*cast_(null_(), DataType::Int), {std::nullopt})); - // Following SQLite, CAST("Hello" AS INT) yields zero - EXPECT_TRUE(test_expression(*cast_("Hello", DataType::Int), {0})); - EXPECT_TRUE(test_expression(*cast_("Hello", DataType::Float), {0.0f})); + // Ensure requested data type is cast data type + EXPECT_THROW(test_expression(*cast_("1.2", DataType::Float), {}), std::logic_error); + // Following SQL standard, CAST("Hello" AS INT) errors + EXPECT_THROW(test_expression(*cast_("Hello", DataType::Int), {}), std::logic_error); + EXPECT_THROW(test_expression(*cast_("Hello", DataType::Float), {}), std::logic_error); + EXPECT_THROW(test_expression(*cast_("1.2", DataType::Int), {}), std::logic_error); + // Cast as Null is undefined + EXPECT_THROW(test_expression(*cast_("Hello", DataType::Null), {}), std::logic_error); } TEST_F(ExpressionEvaluatorToValuesTest, CastSeries) { diff --git a/src/test/lib/expression/expression_test.cpp b/src/test/lib/expression/expression_test.cpp index a730202660..be98a8d307 100644 --- a/src/test/lib/expression/expression_test.cpp +++ b/src/test/lib/expression/expression_test.cpp @@ -77,6 +77,9 @@ TEST_F(ExpressionTest, Equals) { EXPECT_NE(*unary_minus_(6), *unary_minus_(6.5)); EXPECT_EQ(*cast_(6.5, DataType::Int), *cast_(6.5, DataType::Int)); EXPECT_NE(*cast_(6.5, DataType::Int), *cast_(6.5, DataType::Float)); + EXPECT_EQ(*interval_(3, DatetimeComponent::Day), *interval_(3, DatetimeComponent::Day)); + EXPECT_NE(*interval_(3, DatetimeComponent::Day), *interval_(3, DatetimeComponent::Month)); + EXPECT_NE(*interval_(3, DatetimeComponent::Day), *interval_(4, DatetimeComponent::Day)); } TEST_F(ExpressionTest, DeepEquals) { @@ -164,6 +167,7 @@ TEST_F(ExpressionTest, DeepCopySubplanDeduplication) { expressions.emplace_back(extract_(DatetimeComponent::Day, pqp_subquery_expression_int1)); // Function Expression expressions.emplace_back(substr_(pqp_subquery_expression_string, 0, 1)); + // Interval Expression [never contains PQPSubqueryExpression] // List Expression expressions.emplace_back( list_(pqp_subquery_expression_int1, pqp_subquery_expression_int2, pqp_subquery_expression_int3)); @@ -276,6 +280,7 @@ TEST_F(ExpressionTest, AsColumnName) { EXPECT_EQ(correlated_parameter_(ParameterID{0}, a)->as_column_name(), "Parameter[name=a; ParameterID=0]"); EXPECT_EQ(in_(5, list_(1, 2, 3))->as_column_name(), "(5) IN (1, 2, 3)"); EXPECT_EQ(not_in_(5, list_(1, 2, 3))->as_column_name(), "(5) NOT IN (1, 2, 3)"); + EXPECT_EQ(interval_(30, DatetimeComponent::Day)->as_column_name(), "INTERVAL '30' Day"); } TEST_F(ExpressionTest, AsColumnNameNested) { @@ -331,6 +336,7 @@ TEST_F(ExpressionTest, DataType) { EXPECT_EQ(null_()->data_type(), DataType::Null); EXPECT_EQ(cast_(36.5, DataType::Int)->data_type(), DataType::Int); EXPECT_EQ(cast_(null_(), DataType::Float)->data_type(), DataType::Float); + EXPECT_EQ(interval_(30, DatetimeComponent::Day)->data_type(), DataType::String); EXPECT_EQ(less_than_(1, 2)->data_type(), DataType::Int); EXPECT_EQ(less_than_(1.5, 2)->data_type(), DataType::Int); @@ -462,6 +468,11 @@ TEST_F(ExpressionTest, EqualsAndHash) { expressions.emplace_back(__LINE__, not_in_(5, list_(1, 2, 3))); expressions.emplace_back(__LINE__, in_(5, lqp_subquery_(int_float_node))); + // IntervalExpression + expressions.emplace_back(__LINE__, interval_(1, DatetimeComponent::Day)); + expressions.emplace_back(__LINE__, interval_(6, DatetimeComponent::Day)); + expressions.emplace_back(__LINE__, interval_(6, DatetimeComponent::Month)); + // IsNullExpression expressions.emplace_back(__LINE__, is_null_(1)); expressions.emplace_back(__LINE__, is_null_(2)); diff --git a/src/test/lib/expression/expression_utils_test.cpp b/src/test/lib/expression/expression_utils_test.cpp index 708f2865a5..a7bd6a7ee0 100644 --- a/src/test/lib/expression/expression_utils_test.cpp +++ b/src/test/lib/expression/expression_utils_test.cpp @@ -191,4 +191,47 @@ TEST_F(ExpressionUtilsTest, CollectPQPSubqueryExpressionsIgnoreNested) { } } +TEST_F(ExpressionUtilsTest, GetValueOrParameter) { + const auto expected_value = AllTypeVariant{int64_t{1}}; + const auto value_expression = value_(expected_value); + const auto correlated_parameter_expression = correlated_parameter_(ParameterID{0}, value_expression); + correlated_parameter_expression->set_value(expected_value); + const auto invalid_cast = cast_(value_(pmr_string{"1.2"}), DataType::Int); + const auto cast_as_null = cast_(expected_value, DataType::Null); + const auto cast_as_float = cast_(value_expression, DataType::Float); + const auto cast_from_null = cast_(null_(), DataType::Int); + const auto cast_column = cast_(a_a, DataType::Float); + + EXPECT_THROW(expression_get_value_or_parameter(*invalid_cast), std::logic_error); + // Casts as NULL are undefined + EXPECT_THROW(expression_get_value_or_parameter(*cast_as_null), std::logic_error); + + { + const auto actual_value = expression_get_value_or_parameter(*value_expression); + EXPECT_NE(actual_value, std::nullopt); + EXPECT_EQ(*actual_value, expected_value); + } + { + const auto actual_value = expression_get_value_or_parameter(*correlated_parameter_expression); + EXPECT_NE(actual_value, std::nullopt); + EXPECT_EQ(*actual_value, expected_value); + } + { + const auto actual_value = expression_get_value_or_parameter(*cast_as_float); + EXPECT_NE(actual_value, std::nullopt); + EXPECT_FLOAT_EQ(boost::get(*actual_value), 1.0); + } + { + // Casts from NULL should return a NULL value + const auto actual_value = expression_get_value_or_parameter(*cast_from_null); + EXPECT_NE(actual_value, std::nullopt); + EXPECT_TRUE(variant_is_null(*actual_value)); + } + { + // More complicated casts should be evaluated by ExpressionEvaluator + const auto actual_value = expression_get_value_or_parameter(*cast_column); + EXPECT_EQ(actual_value, std::nullopt); + } +} + } // namespace opossum diff --git a/src/test/lib/sql/sql_translator_test.cpp b/src/test/lib/sql/sql_translator_test.cpp index bbfea61c97..08f68d5c5d 100644 --- a/src/test/lib/sql/sql_translator_test.cpp +++ b/src/test/lib/sql/sql_translator_test.cpp @@ -1656,7 +1656,6 @@ TEST_F(SQLTranslatorTest, Extract) { std::shared_ptr actual_lqp; std::shared_ptr expected_lqp; - ProjectionNode::make(expression_vector(extract_(DatetimeComponent::Year, "1993-08-01")), DummyTableNode::make()); for (const auto& component : components) { std::stringstream query_str; @@ -3013,4 +3012,91 @@ TEST_F(SQLTranslatorTest, ImportStatement) { } } +TEST_F(SQLTranslatorTest, DateLiteral) { + EXPECT_THROW(sql_to_lqp_helper("SELECT DATE '2001-01-35';"), InvalidInputException); + + const auto value_expression = expression_vector(value_(pmr_string{"2000-01-31"})); + // clang-format off + const auto expected_lqp = + AliasNode::make(value_expression, std::vector{"2000-01-31"}, + ProjectionNode::make(value_expression, + DummyTableNode::make())); + // clang-format on + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT DATE '2000-01-31';"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); +} + +TEST_F(SQLTranslatorTest, IntervalLiteral) { + // Though most of these queries are valid SQL, we want to ensure to reject expressions Hyrise cannot handle + EXPECT_THROW(sql_to_lqp_helper("SELECT INTERVAL '3' day from int_string;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT * from int_string WHERE b = INTERVAL '3' day;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT 'abc' + 3 days;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT 1 + 3 days;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT DATE '2001-01-01' / 3 days;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT DATE '2001-01-01' + 1 second;"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT INTERVAL '1' day + INTERVAL '1' day;"), InvalidInputException); + + // clang-format off + const auto expected_lqp = + ProjectionNode::make(expression_vector(value_(pmr_string{"2000-01-31"})), + DummyTableNode::make()); + // clang-format on + + { + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT '2000-01-01' + INTERVAL '30' day;"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } + { + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT '2002-01-31' - INTERVAL '2 years';"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } + { + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT '1999-12-31' + 1 month;"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } +} + +TEST_F(SQLTranslatorTest, CastStatement) { + EXPECT_THROW(sql_to_lqp_helper("SELECT CAST('abc' AS DATE)"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT CAST(1 AS DATE)"), InvalidInputException); + EXPECT_THROW(sql_to_lqp_helper("SELECT CAST(a AS DATE) FROM int_string"), InvalidInputException); + // We do not have a real Date data type, so we cannot pass this cast anywhere else. + EXPECT_THROW(sql_to_lqp_helper("SELECT CAST(b AS DATE) FROM int_string"), InvalidInputException); + // This is valid SQL, but we do not have a DateTime data type + EXPECT_THROW(sql_to_lqp_helper("SELECT CAST('2000-01-01 00:00:00' AS DATETIME)"), InvalidInputException); + + { + const auto cast_expression = expression_vector(cast_(value_(pmr_string{'1'}), DataType::Int)); + // clang-format off + const auto expected_lqp = + ProjectionNode::make(cast_expression, + DummyTableNode::make()); + // clang-format on + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT CAST('1' as INT);"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } + { + // Omit the cast when source and target data type are identical + const auto value_expression = expression_vector(value_(pmr_string{'1'})); + // clang-format off + const auto expected_lqp = + ProjectionNode::make(value_expression, + DummyTableNode::make()); + // clang-format on + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT CAST('1' as TEXT);"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } + { + // Dates need to be resolved, as we do not have a Date DataType + const auto value_expression = expression_vector(value_(pmr_string{"2000-01-01"})); + // clang-format off + const auto expected_lqp = + ProjectionNode::make(value_expression, + DummyTableNode::make()); + // clang-format on + const auto [actual_lqp, translation_info] = sql_to_lqp_helper("SELECT CAST('2000-01-01' as DATE);"); + EXPECT_LQP_EQ(actual_lqp, expected_lqp); + } +} + } // namespace opossum diff --git a/src/test/lib/utils/date_utils_test.cpp b/src/test/lib/utils/date_utils_test.cpp new file mode 100755 index 0000000000..a0541ed6e7 --- /dev/null +++ b/src/test/lib/utils/date_utils_test.cpp @@ -0,0 +1,49 @@ +#include "base_test.hpp" + +#include "utils/date_utils.hpp" + +namespace opossum { + +class DateUtilsTest : public BaseTest {}; + +TEST_F(DateUtilsTest, StringToDate) { + EXPECT_EQ(string_to_date("2000-01-45"), std::nullopt); + EXPECT_EQ(string_to_date("2000-13-01"), std::nullopt); + EXPECT_EQ(string_to_date("2000-04-31"), std::nullopt); + EXPECT_EQ(string_to_date("2001-02-29"), std::nullopt); + EXPECT_EQ(string_to_date("-1-02-29"), std::nullopt); + EXPECT_EQ(string_to_date("foo"), std::nullopt); + + const auto parsed_date = string_to_date("2000-01-31"); + EXPECT_NE(parsed_date, std::nullopt); + EXPECT_EQ(parsed_date->year(), 2000); + EXPECT_EQ(parsed_date->month(), 1); + EXPECT_EQ(parsed_date->day(), 31); + + const auto leap_year_date = string_to_date("2000-02-29"); + EXPECT_NE(leap_year_date, std::nullopt); + EXPECT_EQ(leap_year_date->year(), 2000); + EXPECT_EQ(leap_year_date->month(), 2); + EXPECT_EQ(leap_year_date->day(), 29); +} + +TEST_F(DateUtilsTest, DateInterval) { + const boost::gregorian::date date{2000, 1, 31}; + const boost::gregorian::date leap_year_date{2000, 2, 29}; + EXPECT_THROW(date_interval(date, 0, DatetimeComponent::Second), std::logic_error); + EXPECT_THROW(date_interval(date, 0, DatetimeComponent::Minute), std::logic_error); + EXPECT_THROW(date_interval(date, 0, DatetimeComponent::Hour), std::logic_error); + EXPECT_EQ(date_interval(date, 1, DatetimeComponent::Day), (boost::gregorian::date{2000, 2, 1})); + EXPECT_EQ(date_interval(date, 1, DatetimeComponent::Month), leap_year_date); + EXPECT_EQ(date_interval(boost::gregorian::date{1999, 11, 30}, 2, DatetimeComponent::Month), date); + EXPECT_EQ(date_interval(date, 1, DatetimeComponent::Year), (boost::gregorian::date{2001, 1, 31})); + EXPECT_EQ(date_interval(leap_year_date, 1, DatetimeComponent::Year), (boost::gregorian::date{2001, 2, 28})); + EXPECT_EQ(date_interval(boost::gregorian::date{1999, 2, 28}, 1, DatetimeComponent::Year), leap_year_date); +} + +TEST_F(DateUtilsTest, DateToString) { + const boost::gregorian::date date{2000, 1, 31}; + EXPECT_EQ(date_to_string(date), "2000-01-31"); +} + +} // namespace opossum diff --git a/third_party/sql-parser b/third_party/sql-parser index 6f5a7645fe..f1afb9aa15 160000 --- a/third_party/sql-parser +++ b/third_party/sql-parser @@ -1 +1 @@ -Subproject commit 6f5a7645fe3e0fc7f945f86ccecbdcc7dee68a3d +Subproject commit f1afb9aa15ff4a76a691adea09a781ae345206ef