Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Add ColumnAndPredicate (backport #53635) #54875

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion be/src/exprs/min_max_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "column/chunk.h"
#include "column/column_helper.h"
#include "column/type_traits.h"
Expand Down Expand Up @@ -145,4 +147,4 @@ class MinMaxPredicateBuilder {
const JoinRuntimeFilter* _filter;
};

} // namespace starrocks
} // namespace starrocks
1 change: 1 addition & 0 deletions be/src/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ add_library(Storage STATIC
column_not_in_predicate.cpp
column_null_predicate.cpp
column_or_predicate.cpp
column_and_predicate.cpp
column_expr_predicate.cpp
conjunctive_predicates.cpp
predicate_tree/predicate_tree.cpp
Expand Down
80 changes: 80 additions & 0 deletions be/src/storage/column_and_predicate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "storage/column_and_predicate.h"

namespace starrocks {
Status ColumnAndPredicate::evaluate(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const {
return _evaluate(column, selection, from, to);
}

Status ColumnAndPredicate::evaluate_and(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const {
for (const ColumnPredicate* child : _child) {
RETURN_IF_ERROR(child->evaluate_and(column, selection, from, to));
}
return Status::OK();
}

Status ColumnAndPredicate::evaluate_or(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const {
_buff.resize(column->size());
RETURN_IF_ERROR(_evaluate(column, _buff.data(), from, to));
const uint8_t* p = _buff.data();
for (uint16_t i = from; i < to; i++) {
selection[i] |= p[i];
}
return Status::OK();
}

std::string ColumnAndPredicate::debug_string() const {
std::stringstream ss;
ss << "AND(";
for (size_t i = 0; i < _child.size(); i++) {
if (i != 0) {
ss << ", ";
}
ss << i << ":" << _child[i]->debug_string();
}
ss << ")";
return ss.str();
}

Status ColumnAndPredicate::_evaluate(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const {
RETURN_IF_ERROR(_child[0]->evaluate(column, selection, from, to));
for (size_t i = 1; i < _child.size(); i++) {
RETURN_IF_ERROR(_child[i]->evaluate_and(column, selection, from, to));
}
return Status::OK();
}

// return false if page not satisfied
bool ColumnAndPredicate::zone_map_filter(const ZoneMapDetail& detail) const {
for (const ColumnPredicate* child : _child) {
RETURN_IF(!child->zone_map_filter(detail), false);
}
return true;
}

Status ColumnAndPredicate::convert_to(const ColumnPredicate** output, const TypeInfoPtr& target_type_ptr,
ObjectPool* obj_pool) const {
ColumnAndPredicate* new_pred =
obj_pool->add(new ColumnAndPredicate(get_type_info(target_type_ptr.get()), _column_id));
for (auto pred : _child) {
const ColumnPredicate* new_child = nullptr;
RETURN_IF_ERROR(pred->convert_to(&new_child, get_type_info(target_type_ptr.get()), obj_pool));
new_pred->_child.emplace_back(new_child);
}
*output = new_pred;
return Status::OK();
}
} // namespace starrocks
58 changes: 58 additions & 0 deletions be/src/storage/column_and_predicate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "storage/column_predicate.h"

namespace starrocks {

class ColumnAndPredicate final : public ColumnPredicate {
public:
explicit ColumnAndPredicate(const TypeInfoPtr& type_info, ColumnId cid) : ColumnPredicate(type_info, cid) {}

template <typename Container>
ColumnAndPredicate(const TypeInfoPtr& type_info, ColumnId cid, const Container& c)
: ColumnPredicate(type_info, cid), _child(c.begin(), c.end()) {}

void add_child(ColumnPredicate* child) { _child.emplace_back(child); }

template <typename Iterator>
void add_child(Iterator begin, Iterator end) {
_child.insert(_child.end(), begin, end);
}

Status evaluate(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const override;
Status evaluate_and(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const override;
Status evaluate_or(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const override;

bool filter(const BloomFilter& bf) const override { return true; }
bool zone_map_filter(const ZoneMapDetail& detail) const override;

bool can_vectorized() const override { return false; }
PredicateType type() const override { return PredicateType::kAnd; }
Datum value() const override { return {}; }
std::vector<Datum> values() const override { return std::vector<Datum>{}; }

Status convert_to(const ColumnPredicate** output, const TypeInfoPtr& target_type_info,
ObjectPool* obj_pool) const override;
std::string debug_string() const override;

private:
Status _evaluate(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const;

std::vector<const ColumnPredicate*> _child;
mutable std::vector<uint8_t> _buff;
};
} // namespace starrocks
4 changes: 2 additions & 2 deletions be/src/storage/column_expr_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Column;
// And this class has a big limitation that it does not support range evaluatation. In another word, `from` supposed to be 0 always.
// The fundamental reason is `ExprContext` requires `Column*` as a total piece, unless we can create a class to represent `ColumnSlice`.
// And that task is almost impossible.
class ColumnExprPredicate : public ColumnPredicate {
class ColumnExprPredicate final : public ColumnPredicate {
public:
static StatusOr<ColumnExprPredicate*> make_column_expr_predicate(TypeInfoPtr type_info, ColumnId column_id,
RuntimeState* state, ExprContext* expr_ctx,
Expand Down Expand Up @@ -86,7 +86,7 @@ class ColumnExprPredicate : public ColumnPredicate {
mutable std::vector<uint8_t> _tmp_select;
};

class ColumnTruePredicate : public ColumnPredicate {
class ColumnTruePredicate final : public ColumnPredicate {
public:
ColumnTruePredicate(TypeInfoPtr type_info, ColumnId column_id) : ColumnPredicate(std::move(type_info), column_id) {}
~ColumnTruePredicate() override = default;
Expand Down
4 changes: 2 additions & 2 deletions be/src/storage/column_in_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
namespace starrocks {

template <LogicalType field_type, typename ItemSet>
class ColumnInPredicate : public ColumnPredicate {
class ColumnInPredicate final : public ColumnPredicate {
using ValueType = typename CppTypeTraits<field_type>::CppType;
static_assert(std::is_same_v<ValueType, typename ItemSet::value_type>);

Expand Down Expand Up @@ -190,7 +190,7 @@ class ColumnInPredicate : public ColumnPredicate {

// Template specialization for binary column
template <LogicalType field_type>
class BinaryColumnInPredicate : public ColumnPredicate {
class BinaryColumnInPredicate final : public ColumnPredicate {
public:
BinaryColumnInPredicate(const TypeInfoPtr& type_info, ColumnId id, std::vector<std::string> strings)
: ColumnPredicate(type_info, id), _zero_padded_strs(std::move(strings)) {
Expand Down
4 changes: 2 additions & 2 deletions be/src/storage/column_not_in_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
namespace starrocks {

template <LogicalType field_type>
class ColumnNotInPredicate : public ColumnPredicate {
class ColumnNotInPredicate final : public ColumnPredicate {
using ValueType = typename CppTypeTraits<field_type>::CppType;

public:
Expand Down Expand Up @@ -161,7 +161,7 @@ class ColumnNotInPredicate : public ColumnPredicate {

// Template specialization for binary column
template <LogicalType field_type>
class BinaryColumnNotInPredicate : public ColumnPredicate {
class BinaryColumnNotInPredicate final : public ColumnPredicate {
public:
BinaryColumnNotInPredicate(const TypeInfoPtr& type_info, ColumnId id, std::vector<std::string> strings)
: ColumnPredicate(type_info, id), _zero_padded_strs(std::move(strings)) {
Expand Down
4 changes: 2 additions & 2 deletions be/src/storage/column_null_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

namespace starrocks {

class ColumnIsNullPredicate : public ColumnPredicate {
class ColumnIsNullPredicate final : public ColumnPredicate {
public:
explicit ColumnIsNullPredicate(const TypeInfoPtr& type_info, ColumnId id) : ColumnPredicate(type_info, id) {}

Expand Down Expand Up @@ -99,7 +99,7 @@ class ColumnIsNullPredicate : public ColumnPredicate {
}
};

class ColumnNotNullPredicate : public ColumnPredicate {
class ColumnNotNullPredicate final : public ColumnPredicate {
public:
explicit ColumnNotNullPredicate(const TypeInfoPtr& type_info, ColumnId id) : ColumnPredicate(type_info, id) {}

Expand Down
13 changes: 13 additions & 0 deletions be/src/storage/column_or_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,17 @@ Status ColumnOrPredicate::convert_to(const ColumnPredicate** output, const TypeI
return Status::OK();
}

std::string ColumnOrPredicate::debug_string() const {
std::stringstream ss;
ss << "OR(";
for (size_t i = 0; i < _child.size(); i++) {
if (i != 0) {
ss << ", ";
}
ss << i << ":" << _child[i]->debug_string();
}
ss << ")";
return ss.str();
}

} // namespace starrocks
2 changes: 2 additions & 0 deletions be/src/storage/column_or_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class ColumnOrPredicate : public ColumnPredicate {
Status convert_to(const ColumnPredicate** output, const TypeInfoPtr& target_type_info,
ObjectPool* obj_pool) const override;

std::string debug_string() const override;

private:
Status _evaluate(const Column* column, uint8_t* selection, uint16_t from, uint16_t to) const;

Expand Down
2 changes: 2 additions & 0 deletions be/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ set(EXEC_FILES
./storage/persistent_index_consistency_test.cpp
./storage/meta_reader_test.cpp
./storage/dictionary_cache_manager_test.cpp
./storage/column_or_predicate_test.cpp
./storage/column_and_predicate_test.cpp
./runtime/buffer_control_block_test.cpp
./runtime/data_stream_mgr_test.cpp
./runtime/datetime_value_test.cpp
Expand Down
121 changes: 121 additions & 0 deletions be/test/storage/column_and_predicate_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "storage/column_and_predicate.h"

#include <gtest/gtest.h>

#include "storage/rowset/block_split_bloom_filter.h"
#include "testutil/column_test_helper.h"

namespace starrocks {
class ColumnAndPredicateTest : public ::testing::Test {
public:
void SetUp() override {
_left.reset(new_column_ge_predicate(get_type_info(TYPE_INT), 0, "10"));
_right.reset(new_column_le_predicate(get_type_info(TYPE_INT), 0, "20"));
_pred = std::make_unique<ColumnAndPredicate>(get_type_info(TYPE_INT), 0);
_pred->add_child(_left.get());
_pred->add_child(_right.get());

std::vector<int32_t> values = {5, 15, 17, 25, 0, 0};
std::vector<uint8_t> null_values = {0, 0, 0, 0, 1, 1};
_col = ColumnTestHelper::build_nullable_column<int32_t>(values, null_values);
}

protected:
std::unique_ptr<ColumnPredicate> _left;
std::unique_ptr<ColumnPredicate> _right;
std::unique_ptr<ColumnAndPredicate> _pred;
ColumnPtr _col;
BlockSplitBloomFilter _bf;
ObjectPool _pool;
};

TEST_F(ColumnAndPredicateTest, basic) {
ASSERT_TRUE(_pred->filter(_bf));
ASSERT_FALSE(_pred->can_vectorized());
ASSERT_EQ(_pred->type(), PredicateType::kAnd);
ASSERT_TRUE(_pred->value().is_null());
ASSERT_EQ(_pred->values().size(), 0);
ASSERT_EQ(_pred->debug_string(), "AND(0:(columnId(0)>=10), 1:(columnId(0)<=20))");
}

TEST_F(ColumnAndPredicateTest, evaluate) {
std::vector<uint8_t> buff = {0, 0, 0, 0, 0, 0};
auto st = _pred->evaluate(_col.get(), buff.data(), 0, 6);
ASSERT_TRUE(st.ok());

std::vector<uint8_t> result = {0, 1, 1, 0, 0, 0};
ASSERT_EQ(buff, result);
}

TEST_F(ColumnAndPredicateTest, evaluate_and) {
std::vector<uint8_t> buff = {1, 1, 0, 1, 1, 1};
auto st = _pred->evaluate_and(_col.get(), buff.data(), 0, 6);
ASSERT_TRUE(st.ok());

std::vector<uint8_t> result = {0, 1, 0, 0, 0, 0};
ASSERT_EQ(buff, result);
}

TEST_F(ColumnAndPredicateTest, evaluate_or) {
std::vector<uint8_t> buff = {0, 0, 0, 0, 1, 0};
auto st = _pred->evaluate_or(_col.get(), buff.data(), 0, 6);
ASSERT_TRUE(st.ok());

std::vector<uint8_t> result = {0, 1, 1, 0, 1, 0};
ASSERT_EQ(buff, result);
}

TEST_F(ColumnAndPredicateTest, zonemap_filter) {
Datum min_value_1((int32_t)10);
Datum max_value_1((int32_t)20);
ZoneMapDetail zone_map_1(min_value_1, max_value_1, false);
ASSERT_TRUE(_pred->zone_map_filter(zone_map_1));

Datum min_value_2((int32_t)5);
Datum max_value_2((int32_t)25);
ZoneMapDetail zone_map_2(min_value_2, max_value_2, false);
ASSERT_TRUE(_pred->zone_map_filter(zone_map_2));

Datum min_value_3((int32_t)1);
Datum max_value_3((int32_t)5);
ZoneMapDetail zone_map_3(min_value_3, max_value_3, false);
ASSERT_FALSE(_pred->zone_map_filter(zone_map_3));

Datum min_value_4((int32_t)30);
Datum max_value_4((int32_t)40);
ZoneMapDetail zone_map_4(min_value_4, max_value_4, false);
ASSERT_FALSE(_pred->zone_map_filter(zone_map_4));

Datum min_value_5((int32_t)5);
Datum max_value_5((int32_t)25);
ZoneMapDetail zone_map_5(min_value_5, max_value_5, false);
ASSERT_TRUE(_pred->zone_map_filter(zone_map_5));

Datum min_value_6((int32_t)15);
Datum max_value_6((int32_t)40);
ZoneMapDetail zone_map_6(min_value_6, max_value_6, false);
ASSERT_TRUE(_pred->zone_map_filter(zone_map_6));
}

TEST_F(ColumnAndPredicateTest, convert_to) {
const ColumnPredicate* new_pred = nullptr;
Status st = _pred->convert_to(&new_pred, get_type_info(TYPE_INT), &_pool);
ASSERT_TRUE(st.ok());
ASSERT_EQ(new_pred->debug_string(), "AND(0:(columnId(0)>=10), 1:(columnId(0)<=20))");
}

} // namespace starrocks
Loading
Loading