Skip to content

Commit

Permalink
JIT Improve JitExpression Evaluation (hyrise#1496)
Browse files Browse the repository at this point in the history
* Use less context in JitExpressions

* Fix tests

* Lint

* Correct DebugAssert

* Update print

* Fix test

* Improve jit_is_null

* Update comments

* Add extra variant class

* Reduce change in jit_filter and jit_compute

* Make JitVariant a struct

* Add comments

* refactor

* Update comment

* Add comments

* update comment in JitFilter

* Update comments and lint

* use optional

* Fix

* Remove unnessary code

* Only use optional

* remove semicolon

* Address comments

* Fix linting
Add explicit key word

* Fix clang tidy

* Rename JitExpression::resultI() to JitExpression::result_value_type()

* [2/2] Rename JitExpression::resultI() to JitExpression::result_value_type()

* Rename JitTupleValue to JitTupleEntry
and JitHashmapValue to JitHashmapEntry
  • Loading branch information
FabianWiebe authored and mrks committed Feb 26, 2019
1 parent be0436a commit ca2d139
Show file tree
Hide file tree
Showing 30 changed files with 1,409 additions and 1,153 deletions.
27 changes: 12 additions & 15 deletions src/lib/logical_query_plan/jit_aware_lqp_translator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,7 @@ std::shared_ptr<JitOperatorWrapper> JitAwareLQPTranslator::_try_translate_sub_pl
_try_translate_expression_to_jit_expression(boolean_expression, *read_tuples, input_node);
if (!jit_boolean_expression) return nullptr;

// make sure that the expression gets computed ...
jit_operator->add_jit_operator(std::make_shared<JitCompute>(jit_boolean_expression));
// and then filter on the resulting boolean.
jit_operator->add_jit_operator(std::make_shared<JitFilter>(jit_boolean_expression->result()));
jit_operator->add_jit_operator(std::make_shared<JitFilter>(jit_boolean_expression));
}

if (use_validate && validate_after_filter) jit_operator->add_jit_operator(std::make_shared<JitValidate>());
Expand All @@ -180,7 +177,7 @@ std::shared_ptr<JitOperatorWrapper> JitAwareLQPTranslator::_try_translate_sub_pl
jit_operator->add_jit_operator(std::make_shared<JitCompute>(jit_expression));
}
// ... and add the column to the JitAggregate operator.
aggregate->add_groupby_column(groupby_expression->as_column_name(), jit_expression->result());
aggregate->add_groupby_column(groupby_expression->as_column_name(), jit_expression->result_entry());
}

for (auto expression_idx = aggregate_node->aggregate_expressions_begin_idx;
Expand All @@ -207,7 +204,7 @@ std::shared_ptr<JitOperatorWrapper> JitAwareLQPTranslator::_try_translate_sub_pl
jit_operator->add_jit_operator(std::make_shared<JitCompute>(jit_expression));
}
// ... and add the aggregate expression to the JitAggregate operator.
aggregate->add_aggregate_column(aggregate_expression->as_column_name(), jit_expression->result(),
aggregate->add_aggregate_column(aggregate_expression->as_column_name(), jit_expression->result_entry(),
aggregate_expression->aggregate_function);
}
}
Expand Down Expand Up @@ -236,7 +233,7 @@ std::shared_ptr<JitOperatorWrapper> JitAwareLQPTranslator::_try_translate_sub_pl
jit_operator->add_jit_operator(std::make_shared<JitCompute>(jit_expression));
}

write_table->add_output_column_definition(column_expression->as_column_name(), jit_expression->result());
write_table->add_output_column_definition(column_expression->as_column_name(), jit_expression->result_entry());
}

jit_operator->add_jit_operator(write_table);
Expand All @@ -262,24 +259,24 @@ std::shared_ptr<const JitExpression> JitAwareLQPTranslator::_try_translate_expre
const std::shared_ptr<AbstractLQPNode>& input_node) const {
const auto input_node_column_id = input_node->find_column_id(*expression);
if (input_node_column_id) {
const auto tuple_value = jit_source.add_input_column(
const auto tuple_entry = jit_source.add_input_column(
expression->data_type(), input_node->is_column_nullable(input_node->get_column_id(*expression)),
*input_node_column_id);
return std::make_shared<JitExpression>(tuple_value);
return std::make_shared<JitExpression>(tuple_entry);
}

std::shared_ptr<const JitExpression> left, right;
switch (expression->type) {
case ExpressionType::Value: {
const auto value_expression = std::dynamic_pointer_cast<const ValueExpression>(expression);
const auto tuple_value = jit_source.add_literal_value(value_expression->value);
return std::make_shared<JitExpression>(tuple_value);
const auto tuple_entry = jit_source.add_literal_value(value_expression->value);
return std::make_shared<JitExpression>(tuple_entry, value_expression->value);
}

case ExpressionType::CorrelatedParameter: {
const auto parameter = std::dynamic_pointer_cast<const CorrelatedParameterExpression>(expression);
const auto tuple_value = jit_source.add_parameter(parameter->data_type(), parameter->parameter_id);
return std::make_shared<JitExpression>(tuple_value);
const auto tuple_entry = jit_source.add_parameter(parameter->data_type(), parameter->parameter_id);
return std::make_shared<JitExpression>(tuple_entry);
}

case ExpressionType::LQPColumn:
Expand All @@ -303,8 +300,8 @@ std::shared_ptr<const JitExpression> JitAwareLQPTranslator::_try_translate_expre
jit_source.add_temporary_value());
} else if (jit_expression_arguments.size() == 2) {
// An expression can handle strings only exclusively
if ((jit_expression_arguments[0]->result().data_type() == DataType::String) !=
(jit_expression_arguments[1]->result().data_type() == DataType::String)) {
if ((jit_expression_arguments[0]->result_entry().data_type() == DataType::String) !=
(jit_expression_arguments[1]->result_entry().data_type() == DataType::String)) {
return nullptr;
}
return std::make_shared<JitExpression>(jit_expression_arguments[0], jit_expression_type,
Expand Down
4 changes: 2 additions & 2 deletions src/lib/logical_query_plan/jit_aware_lqp_translator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ namespace opossum {
* 2) Once we know which nodes we want to jit, we can start building out JitOperatorWrapper:
* We start by adding a JitReadTuples node. This node is passed to all translation functions during the construction
* of further operators. If any jit operator depends on a column or literal value, this value is registered with the
* JitReadTuples operator. The operator returns a JitTupleValue that serves as a placeholder in the requesting
* JitReadTuples operator. The operator returns a JitTupleEntry that serves as a placeholder in the requesting
* operator. The JitReadTuples operator will make sure that the actual value is then accessible through the
* JitTupleValue at runtime.
* JitTupleEntry at runtime.
* The output columns are determined by the top-most ProjectionNode. If there is no ProjectionNode, all columns from
* the input node are considered as outputs.
* In case we find any PredicateNode or UnionNode during our traversal, we need to create a JitFilter operator.
Expand Down
5 changes: 3 additions & 2 deletions src/lib/operators/jit_operator/jit_constant_mappings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
namespace opossum {

const boost::bimap<JitExpressionType, std::string> jit_expression_type_to_string =
make_bimap<JitExpressionType, std::string>({{JitExpressionType::Addition, "+"},
{JitExpressionType::Column, "<COLUMN>"},
make_bimap<JitExpressionType, std::string>({{JitExpressionType::Column, "<COLUMN>"},
{JitExpressionType::Value, "<VALUE>"},
{JitExpressionType::Addition, "+"},
{JitExpressionType::Subtraction, "-"},
{JitExpressionType::Multiplication, "*"},
{JitExpressionType::Division, "/"},
Expand Down
175 changes: 122 additions & 53 deletions src/lib/operators/jit_operator/jit_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,10 @@

namespace opossum {

// Returns the enum value (e.g., DataType::Int, DataType::String) of a data type defined in the DATA_TYPE_INFO sequence
#define JIT_GET_ENUM_VALUE(index, s) APPEND_ENUM_NAMESPACE(_, _, BOOST_PP_TUPLE_ELEM(3, 1, BOOST_PP_SEQ_ELEM(index, s)))

// Returns the data type (e.g., int32_t, std::string) of a data type defined in the DATA_TYPE_INFO sequence
#define JIT_GET_DATA_TYPE(index, s) BOOST_PP_TUPLE_ELEM(3, 0, BOOST_PP_SEQ_ELEM(index, s))

#define JIT_HASH_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): \
return std::hash<JIT_GET_DATA_TYPE(0, types)>()( \
context.tuple.get<JIT_GET_DATA_TYPE(0, types)>(value.tuple_index()));
context.tuple.get<JIT_GET_DATA_TYPE(0, types)>(tuple_entry.tuple_index()));

#define JIT_AGGREGATE_EQUALS_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): \
Expand All @@ -21,52 +15,117 @@ namespace opossum {
case JIT_GET_ENUM_VALUE(0, types): \
return to.set<JIT_GET_DATA_TYPE(0, types)>(from.get<JIT_GET_DATA_TYPE(0, types)>(context), to_index, context);

#define JIT_GROW_BY_ONE_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): \
return context.hashmap.columns[value.column_index()].grow_by_one<JIT_GET_DATA_TYPE(0, types)>(initial_value);
#define JIT_GROW_BY_ONE_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): \
return context.hashmap.columns[hashmap_entry.column_index()].grow_by_one<JIT_GET_DATA_TYPE(0, types)>( \
initial_value);

#define JIT_IS_NULL_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): { \
const auto result = left_side.compute<JIT_GET_DATA_TYPE(0, types)>(context); \
return !result.has_value(); \
}

#define JIT_IS_NOT_NULL_CASE(r, types) \
case JIT_GET_ENUM_VALUE(0, types): { \
const auto result = left_side.compute<JIT_GET_DATA_TYPE(0, types)>(context); \
return result.has_value(); \
}

void jit_not(const JitTupleValue& lhs, const JitTupleValue& result, JitRuntimeContext& context) {
std::optional<bool> jit_not(const JitExpression& left_side, JitRuntimeContext& context) {
// If the input value is computed by a non-jit operator, its data type is int but it can be read as a bool value.
DebugAssert(
(lhs.data_type() == DataType::Bool || lhs.data_type() == DataType::Int) && result.data_type() == DataType::Bool,
(left_side.result_entry().data_type() == DataType::Bool || left_side.result_entry().data_type() == DataType::Int),
"invalid type for jit operation not");
result.set<bool>(!lhs.get<bool>(context), context);
result.set_is_null(lhs.is_null(context), context);
const auto value = left_side.compute<bool>(context);
if (left_side.result_entry().is_nullable() && !value.has_value()) {
return std::nullopt;
} else {
return !value.value();
}
}

void jit_and(const JitTupleValue& lhs, const JitTupleValue& rhs, const JitTupleValue& result,
JitRuntimeContext& context) {
// If the input values are computed by non-jit operators, their data type is int but they can be read as bool values.
DebugAssert((lhs.data_type() == DataType::Bool || lhs.data_type() == DataType::Int) &&
(rhs.data_type() == DataType::Bool || rhs.data_type() == DataType::Int) &&
result.data_type() == DataType::Bool,
std::optional<bool> jit_and(const JitExpression& left_side, const JitExpression& right_side,
JitRuntimeContext& context) {
// three-valued logic AND
// Short-circuit evaluation is used to skip the evaluation of the right side if the value of the left side is not null
// and false.

// Get left and right operand types, the actual operand values are computed later
const auto left_entry = left_side.result_entry();
const auto right_entry = right_side.result_entry();

// If the input value is computed by a non-jit operator, its data type is int but it can be read as a bool value.
DebugAssert((left_entry.data_type() == DataType::Bool || left_entry.data_type() == DataType::Int) &&
(right_entry.data_type() == DataType::Bool || right_entry.data_type() == DataType::Int),
"invalid type for jit operation and");

// three-valued logic AND
if (lhs.is_null(context)) {
result.set<bool>(false, context);
result.set_is_null(rhs.is_null(context) || rhs.get<bool>(context), context);
const auto left_result = left_side.compute<bool>(context);
// Computation of right hand side can be pruned if left result is false and not null
if (!left_entry.is_nullable() || left_result.has_value()) { // Left result is not null
if (!left_result.value()) { // Left result is false
return false;
}
}

// Left result is null or true
const auto right_result = right_side.compute<bool>(context);
if (left_entry.is_nullable() && !left_result.has_value()) { // Left result is null
// Right result is null or true
if ((right_entry.is_nullable() && !right_result.has_value()) || right_result.value()) {
return std::nullopt;
} else { // Right result is false
return false;
}
}

// Left result is false and not null
if (right_entry.is_nullable() && !right_result.has_value()) {
return std::nullopt;
} else {
result.set<bool>(lhs.get<bool>(context) && rhs.get<bool>(context), context);
result.set_is_null(lhs.get<bool>(context) && rhs.is_null(context), context);
return right_result.value();
}
}

void jit_or(const JitTupleValue& lhs, const JitTupleValue& rhs, const JitTupleValue& result,
JitRuntimeContext& context) {
// If the input values are computed by non-jit operators, their data type is int but they can be read as bool values.
DebugAssert((lhs.data_type() == DataType::Bool || lhs.data_type() == DataType::Int) &&
(rhs.data_type() == DataType::Bool || rhs.data_type() == DataType::Int) &&
result.data_type() == DataType::Bool,
std::optional<bool> jit_or(const JitExpression& left_side, const JitExpression& right_side,
JitRuntimeContext& context) {
// three-valued logic OR
// Short-circuit evaluation is used to skip the evaluation of the right side if the value of the left side is not null
// and true.

// Get left and right operand types, the actual operand values are computed later
const auto left_entry = left_side.result_entry();
const auto right_entry = right_side.result_entry();

// If the input value is computed by a non-jit operator, its data type is int but it can be read as a bool value.
DebugAssert((left_entry.data_type() == DataType::Bool || left_entry.data_type() == DataType::Int) &&
(right_entry.data_type() == DataType::Bool || right_entry.data_type() == DataType::Int),
"invalid type for jit operation or");

// three-valued logic OR
if (lhs.is_null(context)) {
result.set<bool>(true, context);
result.set_is_null(rhs.is_null(context) || !rhs.get<bool>(context), context);
const auto left_result = left_side.compute<bool>(context);
// Computation of right hand side can be pruned if left result is true and not null
if (!left_entry.is_nullable() || left_result.has_value()) { // Left result is not null
if (left_result.value()) { // Left result is true
return true;
}
}

// Left result is null or false
const auto right_result = right_side.compute<bool>(context);
if (left_entry.is_nullable() && !left_result.has_value()) { // Left result is null
// Right result is null or false
if ((right_entry.is_nullable() && !right_result.has_value()) || !right_result.value()) {
return std::nullopt;
} else { // Right result is true
return true;
}
}

// Left result is false and not null
if (right_entry.is_nullable() && !right_result.has_value()) {
return std::nullopt;
} else {
result.set<bool>(lhs.get<bool>(context) || rhs.get<bool>(context), context);
result.set_is_null(!lhs.get<bool>(context) && rhs.is_null(context), context);
return right_result.value();
}
}

Expand All @@ -84,31 +143,41 @@ bool jit_not_like(const std::string& a, const std::string& b) {
return !std::regex_match(a, regex);
}

void jit_is_null(const JitTupleValue& lhs, const JitTupleValue& result, JitRuntimeContext& context) {
result.set_is_null(false, context);
result.set<bool>(lhs.is_null(context), context);
std::optional<bool> jit_is_null(const JitExpression& left_side, JitRuntimeContext& context) {
// switch and macros required to call compute<ResultValueType>() on left_side with the correct ResultValueType
// template parameter for each data type.
switch (left_side.result_entry().data_type()) {
BOOST_PP_SEQ_FOR_EACH_PRODUCT(JIT_IS_NULL_CASE, (JIT_DATA_TYPE_INFO))
case DataType::Null:
return true;
}
}

void jit_is_not_null(const JitTupleValue& lhs, const JitTupleValue& result, JitRuntimeContext& context) {
result.set_is_null(false, context);
result.set<bool>(!lhs.is_null(context), context);
std::optional<bool> jit_is_not_null(const JitExpression& left_side, JitRuntimeContext& context) {
// switch and macros required to call compute<ResultValueType>() on left_side with the correct ResultValueType
// template parameter for each data type.
switch (left_side.result_entry().data_type()) {
BOOST_PP_SEQ_FOR_EACH_PRODUCT(JIT_IS_NOT_NULL_CASE, (JIT_DATA_TYPE_INFO))
case DataType::Null:
return false;
}
}

uint64_t jit_hash(const JitTupleValue& value, JitRuntimeContext& context) {
uint64_t jit_hash(const JitTupleEntry& tuple_entry, JitRuntimeContext& context) {
// NULL values hash to 0.
if (value.is_null(context)) {
if (tuple_entry.is_null(context)) {
return 0;
}

// For all other values the hash is computed by the corresponding std::hash function
switch (value.data_type()) {
switch (tuple_entry.data_type()) {
BOOST_PP_SEQ_FOR_EACH_PRODUCT(JIT_HASH_CASE, (JIT_DATA_TYPE_INFO))
default:
Fail("unreachable");
}
}

bool jit_aggregate_equals(const JitTupleValue& lhs, const JitHashmapValue& rhs, const size_t rhs_index,
bool jit_aggregate_equals(const JitTupleEntry& lhs, const JitHashmapEntry& rhs, const size_t rhs_index,
JitRuntimeContext& context) {
// NULL == NULL when grouping tuples in the aggregate operator
if (lhs.is_null(context) && rhs.is_null(rhs_index, context)) {
Expand All @@ -128,7 +197,7 @@ bool jit_aggregate_equals(const JitTupleValue& lhs, const JitHashmapValue& rhs,
}
}

void jit_assign(const JitTupleValue& from, const JitHashmapValue& to, const size_t to_index,
void jit_assign(const JitTupleEntry& from, const JitHashmapEntry& to, const size_t to_index,
JitRuntimeContext& context) {
// jit_assign only supports identical data types. This is sufficient for the current JitAggregate implementation.
// However, this function could easily be extended to support cross-data type assignment in a fashion similar to the
Expand All @@ -151,21 +220,21 @@ void jit_assign(const JitTupleValue& from, const JitHashmapValue& to, const size
}
}

size_t jit_grow_by_one(const JitHashmapValue& value, const JitVariantVector::InitialValue initial_value,
size_t jit_grow_by_one(const JitHashmapEntry& hashmap_entry, const JitVariantVector::InitialValue initial_value,
JitRuntimeContext& context) {
switch (value.data_type()) {
switch (hashmap_entry.data_type()) {
BOOST_PP_SEQ_FOR_EACH_PRODUCT(JIT_GROW_BY_ONE_CASE, (JIT_DATA_TYPE_INFO))
default:
return 0;
}
}

// cleanup
#undef JIT_GET_ENUM_VALUE
#undef JIT_GET_DATA_TYPE
#undef JIT_HASH_CASE
#undef JIT_AGGREGATE_EQUALS_CASE
#undef JIT_ASSIGN_CASE
#undef JIT_GROW_BY_ONE_CASE
#undef JIT_IS_NULL_CASE
#undef JIT_IS_NOT_NULL_CASE

} // namespace opossum
Loading

0 comments on commit ca2d139

Please sign in to comment.