From 72956467958b4b79353e55c1096eff575033060d Mon Sep 17 00:00:00 2001 From: Pedro Flemming Date: Mon, 29 May 2017 14:39:50 +0200 Subject: [PATCH] Adds basic SQL capabilities (#6) --- .gitmodules | 3 + README.md | 1 + premake4.lua | 36 +- src/benchmark/operators/sql_benchmark.cpp | 52 +++ src/lib/operators/get_table.cpp | 4 +- src/lib/operators/get_table.hpp | 4 +- src/lib/sql/sql_query_operator.cpp | 57 ++++ src/lib/sql/sql_query_operator.hpp | 41 +++ src/lib/sql/sql_query_translator.cpp | 398 ++++++++++++++++++++++ src/lib/sql/sql_query_translator.hpp | 71 ++++ src/lib/sql/sql_result_operator.cpp | 24 ++ src/lib/sql/sql_result_operator.hpp | 36 ++ src/test/sql/sql_query_operator_test.cpp | 116 +++++++ src/test/sql/sql_select_test.cpp | 216 ++++++++++++ third_party/sql-parser | 1 + 15 files changed, 1047 insertions(+), 13 deletions(-) create mode 100644 src/benchmark/operators/sql_benchmark.cpp create mode 100644 src/lib/sql/sql_query_operator.cpp create mode 100644 src/lib/sql/sql_query_operator.hpp create mode 100644 src/lib/sql/sql_query_translator.cpp create mode 100644 src/lib/sql/sql_query_translator.hpp create mode 100644 src/lib/sql/sql_result_operator.cpp create mode 100644 src/lib/sql/sql_result_operator.hpp create mode 100644 src/test/sql/sql_query_operator_test.cpp create mode 100644 src/test/sql/sql_select_test.cpp create mode 160000 third_party/sql-parser diff --git a/.gitmodules b/.gitmodules index 39d7350151..aa86f763c4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "third_party/benchmark"] path = third_party/benchmark url = https://github.com/google/benchmark.git +[submodule "third_party/sql-parser"] + path = third_party/sql-parser + url = https://github.com/hyrise/sql-parser diff --git a/README.md b/README.md index 7b41c725ae..906fe02801 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,7 @@ Contact: firstname.lastname@hpi.de - Timo Djürken - Moritz Eyssen - Martin Fischer +- Pedro Flemming - Michael Janke - Max Jendruk - Marvin Keller diff --git a/premake4.lua b/premake4.lua index 543bac72ae..562d337cca 100644 --- a/premake4.lua +++ b/premake4.lua @@ -97,6 +97,8 @@ solution "opossum" defines { "OPOSSUM_NUMA_SUPPORT=0" } end + libs[#libs+1] = "sqlparser" + configuration "Debug" defines { "IS_DEBUG=1" } flags { "Symbols" } @@ -124,44 +126,56 @@ project "googlebenchmark" configuration "Debug or Release" defines {"NDEBUG", "HAVE_STD_REGEX"} +project "sqlparser" + kind "StaticLib" + buildoptions { "-O3 -Wno-sign-compare" } + + -- clang throws unneeded-internal-declaration for parser generated code. + -- This warning does not exist in gcc, so we have to make a special fork here. + if _OPTIONS["compiler"] == "clang" then + buildoptions { "-O3 -Wno-sign-compare -Wno-unneeded-internal-declaration" } + end + + files { "third_party/sql-parser/src/**.cpp" } + project "opossum" kind "StaticLib" - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } files { "src/lib/**.hpp", "src/lib/**.cpp" } project "opossum-asan" kind "StaticLib" buildoptions {"-fsanitize=address -fno-omit-frame-pointer"} linkoptions {"-fsanitize=address"} - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } files { "src/lib/**.hpp", "src/lib/**.cpp", "src/bin/server_main.cpp" } project "opossumCoverage" kind "StaticLib" buildoptions { "-fprofile-arcs -ftest-coverage" } linkoptions { "-lgcov --coverage" } - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } files { "src/lib/**.hpp", "src/lib/**.cpp" } -- Static lib for the opossum protobuf and grpc code generated from opossum.proto (see action 'protoc' below) project "opossumProtobuf" kind "StaticLib" buildoptions ("-Wno-unused-parameter -Wno-deprecated-declarations") - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } files { "src/lib/network/generated/**.pb.cc" } -- Exemplary opossum client, showing how to use grpc and protobuf at client-side project "client" kind "ConsoleApp" links { "opossumProtobuf", "protobuf", "grpc++", "grpc", "z", "boost_program_options" } - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } libdirs { "third_party/grpc/libs/opt/", "third_party/grpc/libs/opt/protobuf" } files { "src/bin/client.cpp" } project "server" kind "ConsoleApp" links { "opossum", "opossumProtobuf", "protobuf", "grpc++", "grpc", "z", "boost_program_options" } -- z is needed on macos to link grpc - includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } libdirs { "third_party/grpc/libs/opt/", "third_party/grpc/libs/opt/protobuf" } links(libs) files { "src/bin/server_main.cpp" } @@ -176,7 +190,7 @@ project "test" kind "ConsoleApp" links { "opossum", "googletest", "opossumProtobuf", "protobuf", "grpc++", "grpc", "z" } - includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } libdirs { "third_party/grpc/libs/opt/", "third_party/grpc/libs/opt/protobuf" } links(libs) files { "src/test/**.hpp", "src/test/**.cpp" } @@ -188,7 +202,7 @@ project "asan" links { "opossum-asan", "googletest", "opossumProtobuf", "protobuf", "grpc++", "grpc", "z" } links(libs) files { "src/test/**.hpp", "src/test/**.cpp" } - includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } libdirs { "third_party/grpc/libs/opt/", "third_party/grpc/libs/opt/protobuf" } buildoptions {"-fsanitize=address -fno-omit-frame-pointer"} linkoptions { "-fsanitize=address" } @@ -197,9 +211,9 @@ project "asan" project "benchmark" kind "ConsoleApp" - links { "opossum", "googlebenchmark" } + links { "opossum", "googlebenchmark", "sqlparser" } files { "src/benchmark/**.hpp", "src/benchmark/**.cpp" } - includedirs { "third_party/benchmark/include" } + includedirs { "third_party/benchmark/include", "third_party/sql-parser/src/" } postbuildcommands { "./build/benchmark --benchmark_format=json > benchmark.json" } project "coverage" @@ -210,7 +224,7 @@ project "coverage" linkoptions {"--coverage"} files { "src/test/**.hpp", "src/test/**.cpp" } buildoptions { "-fprofile-arcs -ftest-coverage" } - includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/" } + includedirs { "third_party/googletest/googletest/include", "third_party/grpc/include/", "third_party/grpc/third_party/protobuf/src/", "third_party/sql-parser/src/" } libdirs { "third_party/grpc/libs/opt/", "third_party/grpc/libs/opt/protobuf" } postbuildcommands { "./build/coverage && rm -fr coverage; mkdir coverage && gcovr -s -r . --exclude=\"(.*types*.|.*test*.|.*\.pb\.|third_party)\" --html --html-details -o coverage/index.html" } diff --git a/src/benchmark/operators/sql_benchmark.cpp b/src/benchmark/operators/sql_benchmark.cpp new file mode 100644 index 0000000000..c48f409d0a --- /dev/null +++ b/src/benchmark/operators/sql_benchmark.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +#include "benchmark/benchmark.h" + +#include "../../lib/sql/sql_query_translator.hpp" +#include "../base_fixture.cpp" +#include "SQLParser.h" + +namespace opossum { + +class SQLBenchmark : public BenchmarkBasicFixture { + public: + virtual void SetUp(const ::benchmark::State& state) {} + + virtual void TearDown(const ::benchmark::State& state) {} +}; + +BENCHMARK_F(SQLBenchmark, BM_SQLTranslationTotal)(benchmark::State& state) { + clear_cache(); + const std::string query = "SELECT * FROM benchmark_table_one WHERE a >= 7;"; + + while (state.KeepRunning()) { + SQLQueryTranslator translator; + translator.translate_query(query); + } +} + +BENCHMARK_F(SQLBenchmark, BM_SQLTranslationOnlyParsing)(benchmark::State& state) { + clear_cache(); + const std::string query = "SELECT * FROM benchmark_table_one WHERE a >= 7;"; + + while (state.KeepRunning()) { + hsql::SQLParserResult result; + hsql::SQLParser::parseSQLString(query, &result); + } +} + +BENCHMARK_F(SQLBenchmark, BM_SQLTranslationOnlyTransation)(benchmark::State& state) { + clear_cache(); + const std::string query = "SELECT * FROM benchmark_table_one WHERE a >= 7;"; + hsql::SQLParserResult result; + hsql::SQLParser::parseSQLString(query, &result); + + while (state.KeepRunning()) { + SQLQueryTranslator translator; + translator.translate_statement(*result.getStatement(0)); + } +} + +} // namespace opossum diff --git a/src/lib/operators/get_table.cpp b/src/lib/operators/get_table.cpp index 131ea31648..6fb6783b02 100644 --- a/src/lib/operators/get_table.cpp +++ b/src/lib/operators/get_table.cpp @@ -7,7 +7,7 @@ namespace opossum { -GetTable::GetTable(const std::string &name) : _name(name) {} +GetTable::GetTable(const std::string& name) : _name(name) {} const std::string GetTable::name() const { return "GetTable"; } @@ -15,5 +15,7 @@ uint8_t GetTable::num_in_tables() const { return 0; } uint8_t GetTable::num_out_tables() const { return 1; } +const std::string& GetTable::table_name() const { return _name; } + std::shared_ptr GetTable::on_execute() { return StorageManager::get().get_table(_name); } } // namespace opossum diff --git a/src/lib/operators/get_table.hpp b/src/lib/operators/get_table.hpp index 0bcb622957..2d8599df1c 100644 --- a/src/lib/operators/get_table.hpp +++ b/src/lib/operators/get_table.hpp @@ -10,12 +10,14 @@ namespace opossum { // operator to retrieve a table from the StorageManager by specifying its name class GetTable : public AbstractReadOnlyOperator { public: - explicit GetTable(const std::string &name); + explicit GetTable(const std::string& name); const std::string name() const override; uint8_t num_in_tables() const override; uint8_t num_out_tables() const override; + const std::string& table_name() const; + protected: std::shared_ptr on_execute() override; diff --git a/src/lib/sql/sql_query_operator.cpp b/src/lib/sql/sql_query_operator.cpp new file mode 100644 index 0000000000..b815964d41 --- /dev/null +++ b/src/lib/sql/sql_query_operator.cpp @@ -0,0 +1,57 @@ +#include "sql_query_operator.hpp" + +#include +#include + +#include "sql_query_translator.hpp" + +#include "SQLParser.h" + +namespace opossum { + +SQLQueryOperator::SQLQueryOperator(const std::string& query) : _query(query) { + _result_op = std::make_shared(); + _result_task = std::make_shared(_result_op); +} + +const std::string SQLQueryOperator::name() const { return "SQLQueryOperator"; } + +uint8_t SQLQueryOperator::num_in_tables() const { return 0; } + +uint8_t SQLQueryOperator::num_out_tables() const { return 0; } + +const std::shared_ptr& SQLQueryOperator::get_result_task() const { return _result_task; } + +std::shared_ptr SQLQueryOperator::on_execute(std::shared_ptr context) { + // TODO(torpedro): Check query cache for execution plan. + + // TODO(torpedro): Check query cache for syntax tree. + + SQLQueryTranslator translator; + + hsql::SQLParserResult result; + + if (!translator.parse_query(_query, &result)) { + throw translator.get_error_msg(); + } + + // Translate the query. + if (!translator.translate_parse_result(result)) { + throw translator.get_error_msg(); + } + + // Schedule all tasks. + auto tasks = translator.get_tasks(); + + tasks.back()->set_as_predecessor_of(_result_task); + _result_op->set_input_operator(tasks.back()->get_operator()); + + for (const auto& task : tasks) { + task->schedule(); + } + _result_task->schedule(); + + return nullptr; +} + +} // namespace opossum diff --git a/src/lib/sql/sql_query_operator.hpp b/src/lib/sql/sql_query_operator.hpp new file mode 100644 index 0000000000..e63a4f13bb --- /dev/null +++ b/src/lib/sql/sql_query_operator.hpp @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +#include "operators/abstract_operator.hpp" +#include "operators/abstract_read_only_operator.hpp" +#include "scheduler/operator_task.hpp" +#include "sql/sql_result_operator.hpp" + +namespace opossum { + +// The SQLQueryOperator takes a SQL query, parses and transforms it. +// The it schedules the resulting execution plan. To get the result +// of the execution plan, it exposes an SQLResultOperator task, which +// will upon completion contain the result table of the query. +class SQLQueryOperator : public AbstractOperator { + public: + explicit SQLQueryOperator(const std::string& query); + + const std::string name() const override; + + uint8_t num_in_tables() const override; + + uint8_t num_out_tables() const override; + + const std::shared_ptr& get_result_task() const; + + protected: + std::shared_ptr on_execute(std::shared_ptr context) override; + + // Raw SQL query string. + const std::string _query; + + // Result operator, which will be dependent on the full execution of the exec plan. + std::shared_ptr _result_op; + + std::shared_ptr _result_task; +}; + +} // namespace opossum diff --git a/src/lib/sql/sql_query_translator.cpp b/src/lib/sql/sql_query_translator.cpp new file mode 100644 index 0000000000..e70807ab28 --- /dev/null +++ b/src/lib/sql/sql_query_translator.cpp @@ -0,0 +1,398 @@ +#include "sql_query_translator.hpp" + +#include +#include +#include +#include +#include + +#include "../operators/abstract_join_operator.hpp" +#include "../operators/abstract_operator.hpp" +#include "../operators/difference.hpp" +#include "../operators/export_binary.hpp" +#include "../operators/export_csv.hpp" +#include "../operators/get_table.hpp" +#include "../operators/import_csv.hpp" +#include "../operators/index_column_scan.hpp" +#include "../operators/join_nested_loop_a.hpp" +#include "../operators/print.hpp" +#include "../operators/product.hpp" +#include "../operators/projection.hpp" +#include "../operators/sort.hpp" +#include "../operators/table_scan.hpp" +#include "../operators/union_all.hpp" + +using hsql::Expr; +using hsql::SQLParser; +using hsql::SQLParserResult; +using hsql::SQLStatement; +using hsql::SelectStatement; +using hsql::TableRef; +using hsql::JoinDefinition; + +namespace opossum { + +SQLQueryTranslator::SQLQueryTranslator() {} + +SQLQueryTranslator::~SQLQueryTranslator() {} + +const std::vector>& SQLQueryTranslator::get_tasks() { return _tasks; } + +const std::string& SQLQueryTranslator::get_error_msg() { return _error_msg; } + +void SQLQueryTranslator::reset() { + _tasks.clear(); + _error_msg = ""; +} + +bool SQLQueryTranslator::translate_query(const std::string& query) { + hsql::SQLParserResult result; + + // Parse the query. + if (!parse_query(query, &result)) { + return false; + } + + // Translate into execution plan. + if (!translate_parse_result(result)) { + return false; + } + + return true; +} + +bool SQLQueryTranslator::parse_query(const std::string& query, hsql::SQLParserResult* result) { + SQLParser::parseSQLString(query, result); + + if (!result->isValid()) { + std::stringstream ss; + ss << "SQL Parsing failed: " << result->errorMsg(); + ss << " (L" << result->errorLine() << ":" << result->errorColumn() << ")"; + _error_msg = ss.str(); + return false; + } + + return true; +} + +bool SQLQueryTranslator::translate_parse_result(const hsql::SQLParserResult& result) { + const std::vector& statements = result.getStatements(); + + for (const SQLStatement* stmt : statements) { + if (!translate_statement(*stmt)) { + return false; + } + } + + return true; +} + +bool SQLQueryTranslator::translate_statement(const SQLStatement& statement) { + switch (statement.type()) { + case hsql::kStmtSelect: { + const SelectStatement& select = (const SelectStatement&)statement; + return _translate_select(select); + } + + default: + _error_msg = "Can only translate SELECT queries at the moment!"; + return false; + } +} + +bool SQLQueryTranslator::_translate_select(const SelectStatement& select) { + // SQL Order of Operations: http://www.bennadel.com/blog/70-sql-query-order-of-operations.htm + // 1. FROM clause + // 2. WHERE clause + // 3. GROUP BY clause + // 4. HAVING clause + // 5. SELECT clause + // 6. ORDER BY clause + + // Translate FROM. + if (!_translate_table_ref(*select.fromTable)) { + return false; + } + + // Translate WHERE. + // Add table scan if applicable. + if (select.whereClause != nullptr) { + Expr& where = *select.whereClause; + auto input_task = _tasks.back(); + + if (!_translate_filter_expr(where, input_task)) { + return false; + } + } + + // TODO(torpedro): Transform GROUP BY. + // TODO(torpedro): Transform HAVING. + + // Translate SELECT list. + // Add projection for select list. + // TODO(torpedro): Handle DISTINCT. + if (!_translate_projection(*select.selectList, _tasks.back())) { + return false; + } + + // Translate ORDER BY. + if (select.order != nullptr) { + if (!_translate_order_by(*select.order, _tasks.back())) { + return false; + } + } + + // TODO(torpedro): Translate LIMIT/TOP. + + return true; +} + +bool SQLQueryTranslator::_translate_filter_expr(const hsql::Expr& expr, + const std::shared_ptr& input_task) { + if (!expr.isType(hsql::kExprOperator)) { + _error_msg = "Filter expression clause has to be of type operator!"; + return false; + } + + // Handle operation types and get the filter op string.. + std::string filter_op = ""; + switch (expr.opType) { + case hsql::kOpAnd: + // Recursively translate the child expressions. + // This will chain TableScans. + if (!_translate_filter_expr(*expr.expr, input_task)) { + return false; + } + if (!_translate_filter_expr(*expr.expr2, _tasks.back())) { + return false; + } + return true; + + default: + // Get the operation string, if possible. + if (!_translate_filter_op(expr, &filter_op)) { + _error_msg = "Filter expression clause operator is not supported yet!"; + return false; + } + } + + // TODO(torpedro): Handle BETWEEN. + + // Get the column_name. + Expr* column_expr = (expr.expr->isType(hsql::kExprColumnRef)) ? expr.expr : expr.expr2; + + if (!column_expr->isType(hsql::kExprColumnRef)) { + _error_msg = "Unsupported filter expression!"; + return false; + } + std::string column_name = _get_column_name(*column_expr); + + // Get the value. + // At this moment the value is expected to be a literal. + Expr* other_expr = (column_expr == expr.expr) ? expr.expr2 : expr.expr; + AllTypeVariant value; + if (!_translate_literal(*other_expr, &value)) { + _error_msg = "Expected literal in WHERE condition."; + return false; + } + + if (filter_op.length() == 0 || column_name.length() == 0) { + _error_msg = "Unsupported filter expression!"; + return false; + } + + auto table_scan = std::make_shared(input_task->get_operator(), ColumnName(column_name), filter_op, value); + auto scan_task = std::make_shared(table_scan); + input_task->set_as_predecessor_of(scan_task); + _tasks.push_back(scan_task); + return true; +} + +bool SQLQueryTranslator::_translate_projection(const std::vector& expr_list, + const std::shared_ptr& input_task) { + std::vector columns; + for (const Expr* expr : expr_list) { + // At this moment we only support selecting columns in the projection. + if (!expr->isType(hsql::kExprColumnRef) && !expr->isType(hsql::kExprStar)) { + _error_msg = "Projection only supports columns to be selected."; + return false; + } + + if (expr->isType(hsql::kExprStar)) { + columns.push_back("*"); + continue; + } + + columns.push_back(_get_column_name(*expr)); + } + + // If only * is selected, no projection operator is needed. + if (columns.size() == 1 && columns[0].compare("*") == 0) { + return true; + } + + auto projection = std::make_shared(input_task->get_operator(), columns); + auto projection_task = std::make_shared(projection); + input_task->set_as_predecessor_of(projection_task); + _tasks.push_back(projection_task); + return true; +} + +bool SQLQueryTranslator::_translate_order_by(const std::vector order_list, + const std::shared_ptr& input_task) { + // Make mutable copy. + std::shared_ptr prev_task = input_task; + + // Go through all the order descriptions and create sort task for each. + for (const hsql::OrderDescription* order_desc : order_list) { + const Expr& expr = *order_desc->expr; + + // TODO(torpedro): Check that Expr is actual column ref. + const std::string name = _get_column_name(expr); + const bool asc = (order_desc->type == hsql::kOrderAsc); + auto sort = std::make_shared(prev_task->get_operator(), name, asc); + auto sort_task = std::make_shared(sort); + prev_task->set_as_predecessor_of(sort_task); + _tasks.push_back(sort_task); + + prev_task = sort_task; + } + + return true; +} + +bool SQLQueryTranslator::_translate_table_ref(const hsql::TableRef& table) { + switch (table.type) { + case hsql::kTableName: { + auto get_table = std::make_shared(table.name); + auto task = std::make_shared(get_table); + _tasks.push_back(task); + return true; + } + case hsql::kTableSelect: { + return _translate_select(*table.select); + } + case hsql::kTableJoin: { + // TODO(torpedro): Split into method. + const JoinDefinition& join_def = *table.join; + + // Get left and right sub tables. + if (!_translate_table_ref(*join_def.left)) { + return false; + } + auto left_task = _tasks.back(); + + if (!_translate_table_ref(*join_def.right)) { + return false; + } + auto right_task = _tasks.back(); + + // Determine join condition. + const Expr& condition = *join_def.condition; + std::pair columns(condition.expr->name, condition.expr2->name); + std::string op; + if (!_translate_filter_op(condition, &op)) { + _error_msg = "Can not handle JOIN condition."; + return false; + } + + // Determine join mode. + JoinMode mode; + switch (join_def.type) { + case hsql::kJoinInner: + mode = Inner; + break; + case hsql::kJoinOuter: + mode = Outer; + break; + case hsql::kJoinLeft: + mode = Left; + break; + case hsql::kJoinRight: + mode = Right; + break; + case hsql::kJoinNatural: + mode = Natural; + break; + case hsql::kJoinCross: + mode = Cross; + break; + default: + _error_msg = "Unable to handle join type."; + return false; + } + + // In Opossum, the join requires a prefix. + std::string prefix_left = std::string(join_def.left->getName()) + "."; + std::string prefix_right = std::string(join_def.right->getName()) + "."; + + // TODO(torpedro): Optimize join type selection. + auto join = std::make_shared(left_task->get_operator(), right_task->get_operator(), columns, op, + mode, prefix_left, prefix_right); + auto task = std::make_shared(join); + left_task->set_as_predecessor_of(task); + right_task->set_as_predecessor_of(task); + _tasks.push_back(task); + return true; + } + case hsql::kTableCrossProduct: { + _error_msg = "Unable to translate table cross product."; + return false; + } + } + _error_msg = "Unable to translate source table."; + return false; +} + +// static +bool SQLQueryTranslator::_translate_literal(const hsql::Expr& expr, AllTypeVariant* output) { + switch (expr.type) { + case hsql::kExprLiteralInt: + *output = expr.ival; + return true; + case hsql::kExprLiteralFloat: + *output = expr.fval; + return true; + case hsql::kExprLiteralString: + *output = expr.name; + return true; + default: + return false; + } +} + +// static +bool SQLQueryTranslator::_translate_filter_op(const hsql::Expr& expr, std::string* output) { + switch (expr.opType) { + case hsql::kOpSimple: + if (expr.isSimpleOp('=')) *output = "="; + if (expr.isSimpleOp('<')) *output = "<"; + if (expr.isSimpleOp('>')) *output = ">"; + return true; + case hsql::kOpGreaterEq: + *output = ">="; + return true; + case hsql::kOpLessEq: + *output = "<="; + return true; + case hsql::kOpNotEquals: + *output = "!="; + return true; + case hsql::kOpBetween: + *output = "BETWEEN"; + return true; + default: + return false; + } + return false; +} + +// static +std::string SQLQueryTranslator::_get_column_name(const hsql::Expr& expr) { + std::string name = ""; + if (expr.hasTable()) name += std::string(expr.table) + "."; + name += expr.name; + return name; +} + +} // namespace opossum diff --git a/src/lib/sql/sql_query_translator.hpp b/src/lib/sql/sql_query_translator.hpp new file mode 100644 index 0000000000..fd2e9a76e0 --- /dev/null +++ b/src/lib/sql/sql_query_translator.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include +#include +#include + +#include "SQLParser.h" +#include "scheduler/operator_task.hpp" + +namespace opossum { + +// The SQLQueryTranslator offers functionality to parse a query string and +// transform it into an execution plan. This object should not be called +// concurrently. +class SQLQueryTranslator { + public: + SQLQueryTranslator(); + virtual ~SQLQueryTranslator(); + + // Returns the list of tasks that were created during translation. + const std::vector>& get_tasks(); + + // Get the error message, if any exists. + const std::string& get_error_msg(); + + // Destroy the currently stored execution plan and state. + void reset(); + + // Parses the given query into a C++ object representation. + bool parse_query(const std::string& query, hsql::SQLParserResult* result); + + // Translates the give SQL result. Adds the generated execution plan to _tasks. + bool translate_parse_result(const hsql::SQLParserResult& result); + + // Translates the give SQL query. Adds the generated execution plan to _tasks. + // Calls parse_query and translate_parse_result to get the result. + bool translate_query(const std::string& query); + + // Translates the single given SQL statement. Adds the generated execution plan to _tasks. + bool translate_statement(const hsql::SQLStatement& statement); + + protected: + bool _translate_select(const hsql::SelectStatement& select); + + // Evaluates the expression and pushes one or more TableScans onto + // the tasks list. AND expressions are chained TableScans. + // OR expressions are not supported yet. + bool _translate_filter_expr(const hsql::Expr& expr, const std::shared_ptr& input_task); + + bool _translate_projection(const std::vector& expr_list, + const std::shared_ptr& input_task); + + bool _translate_order_by(const std::vector order_list, + const std::shared_ptr& input_task); + + bool _translate_table_ref(const hsql::TableRef& table); + + static bool _translate_literal(const hsql::Expr& expr, AllTypeVariant* output); + + static bool _translate_filter_op(const hsql::Expr& expr, std::string* output); + + static std::string _get_column_name(const hsql::Expr& expr); + + // Generated execution plan. + std::vector> _tasks; + + // Details about the error, if one occurred. + std::string _error_msg; +}; + +} // namespace opossum diff --git a/src/lib/sql/sql_result_operator.cpp b/src/lib/sql/sql_result_operator.cpp new file mode 100644 index 0000000000..9f7f7fc4de --- /dev/null +++ b/src/lib/sql/sql_result_operator.cpp @@ -0,0 +1,24 @@ +#include "sql_query_operator.hpp" + +#include +#include + +#include "sql_query_translator.hpp" + +#include "SQLParser.h" + +namespace opossum { + +SQLResultOperator::SQLResultOperator() {} + +const std::string SQLResultOperator::name() const { return "SQLResultOperator"; } + +uint8_t SQLResultOperator::num_in_tables() const { return 1; } + +uint8_t SQLResultOperator::num_out_tables() const { return 1; } + +void SQLResultOperator::set_input_operator(const std::shared_ptr input) { _input = input; } + +std::shared_ptr SQLResultOperator::on_execute() { return _input->get_output(); } + +} // namespace opossum diff --git a/src/lib/sql/sql_result_operator.hpp b/src/lib/sql/sql_result_operator.hpp new file mode 100644 index 0000000000..aa4d2a746c --- /dev/null +++ b/src/lib/sql/sql_result_operator.hpp @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include "operators/abstract_operator.hpp" +#include "operators/abstract_read_only_operator.hpp" +#include "scheduler/operator_task.hpp" + +namespace opossum { + +// The SQLTaskOperator creates this task to be executed after the execution +// of a query plan. This operator only passes through the result of the previous +// operator. This is useful to bind to the result of this operator, before +// the translation of an SQL query has occurred. +class SQLResultOperator : public AbstractReadOnlyOperator { + public: + SQLResultOperator(); + + const std::string name() const override; + + uint8_t num_in_tables() const override; + + uint8_t num_out_tables() const override; + + std::shared_ptr on_execute() override; + + // Called by SQLQueryOperator to dynamically set the input operator. + // Most common operators require the input to be given at construction. + void set_input_operator(const std::shared_ptr input); + + protected: + std::shared_ptr _input; +}; + +} // namespace opossum diff --git a/src/test/sql/sql_query_operator_test.cpp b/src/test/sql/sql_query_operator_test.cpp new file mode 100644 index 0000000000..9859021099 --- /dev/null +++ b/src/test/sql/sql_query_operator_test.cpp @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "../base_test.hpp" +#include "gtest/gtest.h" + +#include "network/response_builder.hpp" +#include "operators/sort.hpp" +#include "scheduler/current_scheduler.hpp" +#include "scheduler/job_task.hpp" +#include "scheduler/node_queue_scheduler.hpp" +#include "scheduler/topology.hpp" +#include "sql/sql_query_operator.hpp" +#include "storage/storage_manager.hpp" +#include "storage/table.hpp" + +namespace opossum { + +// The fixture for testing class GetTable. +class SQLQueryOperatorTest : public BaseTest { + protected: + void SetUp() override { + CurrentScheduler::set(std::make_shared(Topology::create_fake_numa_topology(8, 4))); + + std::shared_ptr table_a = load_table("src/test/tables/int_float.tbl", 2); + StorageManager::get().add_table("table_a", std::move(table_a)); + + std::shared_ptr
table_b = load_table("src/test/tables/int_float2.tbl", 2); + StorageManager::get().add_table("table_b", std::move(table_b)); + } + + void TearDown() override { + CurrentScheduler::set(nullptr); // Make sure there is no Scheduler anymore + } +}; + +TEST_F(SQLQueryOperatorTest, BasicTest) { + const std::string query = "SELECT * FROM table_a;"; + auto sql_op = std::make_shared(query); + auto sql_task = std::make_shared(sql_op); + sql_task->schedule(); + + CurrentScheduler::get()->finish(); + + auto sql_result_task = sql_op->get_result_task(); + auto expected_result = load_table("src/test/tables/int_float.tbl", 2); + EXPECT_TABLE_EQ(sql_result_task->get_operator()->get_output(), expected_result); +} + +TEST_F(SQLQueryOperatorTest, ComplexQueryTest) { + const std::string query = + "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" INNER JOIN table_b AS " + "\"right\" ON a = a"; + auto sql_op = std::make_shared(query); + auto sql_task = std::make_shared(sql_op); + sql_task->schedule(); + + CurrentScheduler::get()->finish(); + + auto sql_result_task = sql_op->get_result_task(); + auto expected_result = load_table("src/test/tables/joinoperators/int_inner_join.tbl", 1); + EXPECT_TABLE_EQ(sql_result_task->get_operator()->get_output(), expected_result); +} + +TEST_F(SQLQueryOperatorTest, NextTaskTest) { + const std::string query = "SELECT a, b FROM table_a;"; + + auto sql_op = std::make_shared(query); + auto sql_task = std::make_shared(sql_op); + auto sql_result_task = sql_op->get_result_task(); + + // Add sort to the result of the SQL query. + auto sort = std::make_shared(sql_result_task->get_operator(), "a", true); + auto sort_task = std::make_shared(sort); + sql_result_task->set_as_predecessor_of(sort_task); + + // Schedule. + sort_task->schedule(); + sql_task->schedule(); + + CurrentScheduler::get()->finish(); + + auto expected_result = load_table("src/test/tables/int_float_sorted.tbl", 2); + EXPECT_TABLE_EQ(sort->get_output(), expected_result, true); +} + +// Similar to how it's done in request_handler.cpp +TEST_F(SQLQueryOperatorTest, NextAdHocTaskTest) { + const std::string query = "SELECT a, b FROM table_a;"; + + auto sql_op = std::make_shared(query); + auto sql_task = std::make_shared(sql_op); + auto result_task = sql_op->get_result_task(); + auto result_operator = result_task->get_operator(); + + auto materialize_job = std::make_shared([this, result_operator]() { + // These lines are executed by the opossum scheduler + auto table = result_operator->get_output(); + // Materialize and fill response + proto::Response response; + ResponseBuilder response_builder; + response_builder.build_response(response, std::move(table)); + + // send_response(); + }); + result_task->set_as_predecessor_of(materialize_job); + + // Schedule. + materialize_job->schedule(); + sql_task->schedule(); + + CurrentScheduler::get()->finish(); +} + +} // namespace opossum diff --git a/src/test/sql/sql_select_test.cpp b/src/test/sql/sql_select_test.cpp new file mode 100644 index 0000000000..b24fb07f7b --- /dev/null +++ b/src/test/sql/sql_select_test.cpp @@ -0,0 +1,216 @@ + +#include +#include +#include + +#include "../base_test.hpp" +#include "gtest/gtest.h" + +#include "operators/get_table.hpp" +#include "operators/table_scan.hpp" +#include "scheduler/node_queue_scheduler.hpp" +#include "scheduler/topology.hpp" +#include "sql/sql_query_translator.hpp" +#include "storage/storage_manager.hpp" + +namespace opossum { + +class SQLSelectTest : public BaseTest { + protected: + void SetUp() override { + std::shared_ptr
table_a = load_table("src/test/tables/int_float.tbl", 2); + StorageManager::get().add_table("table_a", std::move(table_a)); + + std::shared_ptr
table_b = load_table("src/test/tables/int_float2.tbl", 2); + StorageManager::get().add_table("table_b", std::move(table_b)); + + std::shared_ptr
table_c = load_table("src/test/tables/int_string.tbl", 4); + StorageManager::get().add_table("table_c", std::move(table_c)); + + std::shared_ptr
table_d = load_table("src/test/tables/string_int.tbl", 3); + StorageManager::get().add_table("table_d", std::move(table_d)); + + std::shared_ptr
test_table2 = load_table("src/test/tables/int_string2.tbl", 2); + StorageManager::get().add_table("TestTable", test_table2); + } + + SQLQueryTranslator _translator; +}; + +TEST_F(SQLSelectTest, BasicSuccessTest) { + const std::string query = "SELECT * FROM test;"; + ASSERT_TRUE(_translator.translate_query(query)); + + const std::string faulty_query = "SELECT * WHERE test;"; + ASSERT_FALSE(_translator.translate_query(faulty_query)); +} + +TEST_F(SQLSelectTest, SelectStarAllTest) { + const std::string query = "SELECT * FROM table_a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(1u, tasks.size()); + + // Check GetTable task. + auto get_table_task = tasks[0]; + auto get_table = (const std::shared_ptr&)get_table_task->get_operator(); + ASSERT_EQ("table_a", get_table->table_name()); + + // Execute GetTable and check result. + auto expected_result = load_table("src/test/tables/int_float.tbl", 1); + get_table->execute(); + + EXPECT_TABLE_EQ(get_table->get_output(), expected_result); +} + +TEST_F(SQLSelectTest, SelectWithSingleCondition) { + const std::string query = "SELECT * FROM table_a WHERE a >= 1234;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(2u, tasks.size()); + + auto get_table = (const std::shared_ptr&)tasks[0]->get_operator(); + auto table_scan = (const std::shared_ptr&)tasks[1]->get_operator(); + + get_table->execute(); + table_scan->execute(); + + std::shared_ptr
expected_result = load_table("src/test/tables/int_float_filtered2.tbl", 1); + EXPECT_TABLE_EQ(table_scan->get_output(), expected_result); +} + +TEST_F(SQLSelectTest, SelectWithAndCondition) { + const std::string query = "SELECT * FROM table_a WHERE a >= 1234 AND b < 457.9"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(3u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/int_float_filtered.tbl", 2); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result); +} + +// TEST_F(SQLSelectTest, SelectWithBetween) { +// const std::string query = "SELECT * FROM TestTable WHERE a BETWEEN 122 AND 124"; +// ASSERT_TRUE(_translator.translate_query(query)); + +// auto tasks = _translator.get_tasks(); +// ASSERT_EQ(2u, tasks.size()); + +// for (const auto task : tasks) { +// task->get_operator()->execute(); +// } + +// std::shared_ptr
expected_result = load_table("src/test/tables/int_string_filtered.tbl", 2); +// EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result); +// } + +TEST_F(SQLSelectTest, SimpleProjectionTest) { + const std::string query = "SELECT a FROM table_a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(2u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/int.tbl", 2); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result); +} + +TEST_F(SQLSelectTest, SelectSingleOrderByTest) { + const std::string query = "SELECT a, b FROM table_a ORDER BY a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(3u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/int_float_sorted.tbl", 2); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result, true); +} + +TEST_F(SQLSelectTest, SelectFromSubSelect) { + const std::string query = "SELECT a FROM (SELECT a, b FROM table_a WHERE a > 1 ORDER BY b) WHERE a > 0 ORDER BY a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(7u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/int.tbl", 2); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result, true); +} + +TEST_F(SQLSelectTest, SelectBasicInnerJoinTest) { + const std::string query = + "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" JOIN table_b AS \"right\" ON a " + "= a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(4u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/joinoperators/int_inner_join.tbl", 1); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result); +} + +TEST_F(SQLSelectTest, SelectBasicLeftJoinTest) { + const std::string query = "SELECT * FROM table_a AS \"left\" LEFT JOIN table_b AS \"right\" ON a = a;"; + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + ASSERT_EQ(3u, tasks.size()); + + for (const auto task : tasks) { + task->get_operator()->execute(); + } + + std::shared_ptr
expected_result = load_table("src/test/tables/joinoperators/int_left_join.tbl", 1); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result); +} + +TEST_F(SQLSelectTest, SelectWithSchedulerTest) { + CurrentScheduler::set(std::make_shared(Topology::create_fake_numa_topology(8, 4))); + + const std::string query = + "SELECT \"left\".a, \"left\".b, \"right\".a, \"right\".b FROM table_a AS \"left\" INNER JOIN table_b AS " + "\"right\" ON a = a"; + + // TODO(torpedro): Adding 'WHERE \"left\".a >= 0;' causes wrong data. Investigate. + // Probable bug in TableScan. + + ASSERT_TRUE(_translator.translate_query(query)); + + auto tasks = _translator.get_tasks(); + + for (const auto& task : tasks) { + task->schedule(); + } + + CurrentScheduler::get()->finish(); + CurrentScheduler::set(nullptr); + + std::shared_ptr
expected_result = load_table("src/test/tables/joinoperators/int_inner_join.tbl", 1); + EXPECT_TABLE_EQ(tasks.back()->get_operator()->get_output(), expected_result, true); +} + +} // namespace opossum diff --git a/third_party/sql-parser b/third_party/sql-parser new file mode 160000 index 0000000000..128cd74670 --- /dev/null +++ b/third_party/sql-parser @@ -0,0 +1 @@ +Subproject commit 128cd746708664147e534ff891816c3f990774c4