Skip to content

Commit

Permalink
B-Tree Index (hyrise#929)
Browse files Browse the repository at this point in the history
* add btree index

* Btree index now implements BaseIndex

* add cpp-btree submodule

* btree include

* btree include

* started refactoring

* btree refactoring

* remove trailing whitespace

* fixed linter issues

* add cpp-btree subdirectory

* clang disable werror test

* another test

* disable btree warnings for clang

* fix gcc

* Incorporated PR comments

* Added a test for BTreeIndex

* lint

* Added BTreeIndex to IndexScanTest and IndexJoinTest
  • Loading branch information
ArneMayer authored and mrks committed Jun 29, 2018
1 parent 1b99106 commit 9286ce2
Show file tree
Hide file tree
Showing 17 changed files with 339 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,6 @@
[submodule "third_party/libpqxx"]
path = third_party/libpqxx
url = https://github.com/jtv/libpqxx.git
[submodule "third_party/cpp-btree"]
path = third_party/cpp-btree
url = https://github.com/algorithm-ninja/cpp-btree
1 change: 1 addition & 0 deletions DEPENDENCIES.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@
- libpqxx (https://github.com/jtv/libpqxx)
- sql-parser (https://github.com/hyrise/sql-parser)
- pgasus (https://github.com/kateyy/pgasus)
- cpp-btree (https://github.com/algorithm-ninja/cpp-btree)
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ include_directories(
${PROJECT_SOURCE_DIR}/third_party/cxxopts/include
${PROJECT_SOURCE_DIR}/third_party/json
${PROJECT_SOURCE_DIR}/third_party/sql-parser/src
${PROJECT_SOURCE_DIR}/third_party/cpp-btree

${PROJECT_SOURCE_DIR}/src/benchmarklib/
${PROJECT_SOURCE_DIR}/src/lib/
Expand Down
4 changes: 4 additions & 0 deletions src/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ set(
storage/index/adaptive_radix_tree/adaptive_radix_tree_index.hpp
storage/index/adaptive_radix_tree/adaptive_radix_tree_nodes.cpp
storage/index/adaptive_radix_tree/adaptive_radix_tree_nodes.hpp
storage/index/b_tree/b_tree_index.cpp
storage/index/b_tree/b_tree_index.hpp
storage/index/b_tree/b_tree_index_impl.cpp
storage/index/b_tree/b_tree_index_impl.hpp
storage/index/base_index.cpp
storage/index/base_index.hpp
storage/index/column_index_type.hpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ namespace opossum {
AdaptiveRadixTreeIndex::AdaptiveRadixTreeIndex(const std::vector<std::shared_ptr<const BaseColumn>>& index_columns)
: BaseIndex{get_index_type_of<AdaptiveRadixTreeIndex>()},
_index_column(std::dynamic_pointer_cast<const BaseDictionaryColumn>(index_columns.front())) {
DebugAssert(static_cast<bool>(_index_column), "AdaptiveRadixTree only works with dictionary columns for now");
DebugAssert((index_columns.size() == 1), "AdaptiveRadixTree only works with a single column");
Assert(static_cast<bool>(_index_column), "AdaptiveRadixTree only works with dictionary columns for now");
Assert((index_columns.size() == 1), "AdaptiveRadixTree only works with a single column");

// For each value ID in the attribute vector, create a pair consisting of a BinaryComparable of
// this value ID and its ChunkOffset (needed for bulk-inserting).
Expand Down
36 changes: 36 additions & 0 deletions src/lib/storage/index/b_tree/b_tree_index.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include "b_tree_index.hpp"

#include "storage/index/column_index_type.hpp"
#include "resolve_type.hpp"

namespace opossum {

BTreeIndex::BTreeIndex(const std::vector<std::shared_ptr<const BaseColumn>> index_columns)
: BaseIndex{get_index_type_of<BTreeIndex>()}, _index_column(index_columns[0]) {
Assert((index_columns.size() == 1), "BTreeIndex only works with a single column.");
_impl = make_shared_by_data_type<BaseBTreeIndexImpl, BTreeIndexImpl>(_index_column->data_type(), _index_column);
}

uint64_t BTreeIndex::memory_consumption() const {
return _impl->memory_consumption();
}

BTreeIndex::Iterator BTreeIndex::_lower_bound(const std::vector<AllTypeVariant>& values) const {
return _impl->lower_bound(values);
}

BTreeIndex::Iterator BTreeIndex::_upper_bound(const std::vector<AllTypeVariant>& values) const {
return _impl->upper_bound(values);
}

BTreeIndex::Iterator BTreeIndex::_cbegin() const {
return _impl->cbegin();
}

BTreeIndex::Iterator BTreeIndex::_cend() const {
return _impl->cend();
}

std::vector<std::shared_ptr<const BaseColumn>> BTreeIndex::_get_index_columns() const { return {_index_column}; }

} // namespace opossum
35 changes: 35 additions & 0 deletions src/lib/storage/index/b_tree/b_tree_index.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#pragma once

#include "types.hpp"
#include "all_type_variant.hpp"
#include "storage/index/base_index.hpp"
#include "storage/base_column.hpp"
#include "b_tree_index_impl.hpp"

namespace opossum {

class BTreeIndexTest;

class BTreeIndex : public BaseIndex {
friend BTreeIndexTest;

public:
using Iterator = std::vector<ChunkOffset>::const_iterator;

BTreeIndex() = delete;
explicit BTreeIndex(const std::vector<std::shared_ptr<const BaseColumn>> index_columns);

virtual uint64_t memory_consumption() const;

protected:
Iterator _lower_bound(const std::vector<AllTypeVariant>&) const override;
Iterator _upper_bound(const std::vector<AllTypeVariant>&) const override;
Iterator _cbegin() const override;
Iterator _cend() const override;
std::vector<std::shared_ptr<const BaseColumn>> _get_index_columns() const override;

std::shared_ptr<const BaseColumn> _index_column;
std::shared_ptr<BaseBTreeIndexImpl> _impl;
};

} // namespace opossum
96 changes: 96 additions & 0 deletions src/lib/storage/index/b_tree/b_tree_index_impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#include "b_tree_index_impl.hpp"

#include "storage/index/base_index.hpp"
#include "types.hpp"
#include "resolve_type.hpp"
#include "utils/assert.hpp"
#include "storage/create_iterable_from_column.hpp"

namespace opossum {

template <typename DataType>
BTreeIndexImpl<DataType>::BTreeIndexImpl(std::shared_ptr<const BaseColumn> index_column) {
_bulk_insert(index_column);
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::lower_bound(const std::vector<AllTypeVariant>& values) const {
return lower_bound(type_cast<DataType>(values[0]));
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::upper_bound(const std::vector<AllTypeVariant>& values) const {
return upper_bound(type_cast<DataType>(values[0]));
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::cbegin() const {
return _chunk_offsets.begin();
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::cend() const {
return _chunk_offsets.end();
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::lower_bound(DataType value) const {
auto result = _btree.lower_bound(value);
if (result == _btree.end()) {
return _chunk_offsets.end();
} else {
return _chunk_offsets.begin() + result->second;
}
}

template <typename DataType>
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::upper_bound(DataType value) const {
auto result = _btree.upper_bound(value);
if (result == _btree.end()) {
return _chunk_offsets.end();
} else {
return _chunk_offsets.begin() + result->second;
}
}

template <typename DataType>
uint64_t BTreeIndexImpl<DataType>::memory_consumption() const {
return sizeof(std::vector<ChunkOffset>) +
sizeof(ChunkOffset) * _chunk_offsets.size() +
_btree.bytes_used();
}

template <typename DataType>
void BTreeIndexImpl<DataType>::_bulk_insert(const std::shared_ptr<const BaseColumn> column) {
std::vector<std::pair<ChunkOffset, DataType>> values;

// Materialize
resolve_column_type<DataType>(*column, [&](const auto& typed_column) {
auto iterable_left = create_iterable_from_column<DataType>(typed_column);
iterable_left.for_each([&](const auto& value) {
if (value.is_null()) return;
values.push_back(std::make_pair(value.chunk_offset(), value.value()));
});
});

// Sort
std::sort(values.begin(), values.end(), [](const auto& a, const auto& b){ return a.second < b.second; });
_chunk_offsets.resize(values.size());
for (size_t i = 0; i < values.size(); i++) {
_chunk_offsets[i] = values[i].first;
}

// Build index
DataType current_value = values[0].second;
_btree[current_value] = 0;
for (size_t i = 0; i < values.size(); i++) {
if (values[i].second != current_value) {
current_value = values[i].second;
_btree[current_value] = i;
}
}
}

EXPLICITLY_INSTANTIATE_DATA_TYPES(BTreeIndexImpl);

} // namespace opossum
75 changes: 75 additions & 0 deletions src/lib/storage/index/b_tree/b_tree_index_impl.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#pragma once

#ifdef __clang__
#pragma clang diagnostic ignored "-Wall"
#include <btree_map.h>
#pragma clang diagnostic pop
#elif __GNUC__
#pragma GCC system_header
#include <btree_map.h>
#endif


#include "types.hpp"
#include "all_type_variant.hpp"
#include "storage/base_column.hpp"

namespace opossum {

class BTreeIndexTest;

class BaseBTreeIndexImpl {
friend BTreeIndexTest;

public:
BaseBTreeIndexImpl() = default;
BaseBTreeIndexImpl(BaseBTreeIndexImpl&&) = default;
BaseBTreeIndexImpl& operator=(BaseBTreeIndexImpl&&) = default;
virtual ~BaseBTreeIndexImpl() = default;

using Iterator = std::vector<ChunkOffset>::const_iterator;
virtual uint64_t memory_consumption() const = 0;
virtual Iterator lower_bound(const std::vector<AllTypeVariant>&) const = 0;
virtual Iterator upper_bound(const std::vector<AllTypeVariant>&) const = 0;
virtual Iterator cbegin() const = 0;
virtual Iterator cend() const = 0;

protected:
std::vector<ChunkOffset> _chunk_offsets;
};

/**
* Implementation: https://code.google.com/archive/p/cpp-btree/
* Note: does not support null values right now.
*/
template <typename DataType>
class BTreeIndexImpl : public BaseBTreeIndexImpl {
friend BTreeIndexTest;

public:
BTreeIndexImpl() = delete;
explicit BTreeIndexImpl(std::shared_ptr<const BaseColumn> index_column);

BTreeIndexImpl(const BTreeIndexImpl&) = delete;
BTreeIndexImpl& operator=(const BTreeIndexImpl&) = delete;

BTreeIndexImpl(BTreeIndexImpl&&) = default;
BTreeIndexImpl& operator=(BTreeIndexImpl&&) = default;

uint64_t memory_consumption() const override;

Iterator lower_bound(DataType value) const;
Iterator upper_bound(DataType value) const;

Iterator lower_bound(const std::vector<AllTypeVariant>&) const override;
Iterator upper_bound(const std::vector<AllTypeVariant>&) const override;
Iterator cbegin() const override;
Iterator cend() const override;

protected:
void _bulk_insert(const std::shared_ptr<const BaseColumn>);

btree::btree_map<DataType, size_t> _btree;
};

} // namespace opossum
6 changes: 4 additions & 2 deletions src/lib/storage/index/column_index_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,20 @@ namespace opossum {

namespace hana = boost::hana;

enum class ColumnIndexType : uint8_t { Invalid, GroupKey, CompositeGroupKey, AdaptiveRadixTree };
enum class ColumnIndexType : uint8_t { Invalid, GroupKey, CompositeGroupKey, AdaptiveRadixTree, BTree };

class GroupKeyIndex;
class CompositeGroupKeyIndex;
class AdaptiveRadixTreeIndex;
class BTreeIndex;

namespace detail {

constexpr auto column_index_map =
hana::make_map(hana::make_pair(hana::type_c<GroupKeyIndex>, ColumnIndexType::GroupKey),
hana::make_pair(hana::type_c<CompositeGroupKeyIndex>, ColumnIndexType::CompositeGroupKey),
hana::make_pair(hana::type_c<AdaptiveRadixTreeIndex>, ColumnIndexType::AdaptiveRadixTree));
hana::make_pair(hana::type_c<AdaptiveRadixTreeIndex>, ColumnIndexType::AdaptiveRadixTree),
hana::make_pair(hana::type_c<BTreeIndex>, ColumnIndexType::BTree));

} // namespace detail

Expand Down
6 changes: 3 additions & 3 deletions src/lib/storage/index/group_key/composite_group_key_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace opossum {

CompositeGroupKeyIndex::CompositeGroupKeyIndex(const std::vector<std::shared_ptr<const BaseColumn>>& indexed_columns)
: BaseIndex{get_index_type_of<CompositeGroupKeyIndex>()} {
DebugAssert(!indexed_columns.empty(), "CompositeGroupKeyIndex requires at least one column to be indexed.");
Assert(!indexed_columns.empty(), "CompositeGroupKeyIndex requires at least one column to be indexed.");

if (IS_DEBUG) {
auto first_size = indexed_columns.front()->size();
Expand All @@ -37,8 +37,8 @@ CompositeGroupKeyIndex::CompositeGroupKeyIndex(const std::vector<std::shared_ptr
_indexed_columns.reserve(indexed_columns.size());
for (const auto& column : indexed_columns) {
auto dict_column = std::dynamic_pointer_cast<const BaseDictionaryColumn>(column);
DebugAssert(static_cast<bool>(dict_column), "CompositeGroupKeyIndex only works with dictionary columns.");
DebugAssert(is_fixed_size_byte_aligned(dict_column->compressed_vector_type()),
Assert(static_cast<bool>(dict_column), "CompositeGroupKeyIndex only works with dictionary columns.");
Assert(is_fixed_size_byte_aligned(dict_column->compressed_vector_type()),
"CompositeGroupKeyIndex only works with fixed-size byte-aligned compressed attribute vectors.");
_indexed_columns.emplace_back(dict_column);
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib/storage/index/group_key/group_key_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ namespace opossum {
GroupKeyIndex::GroupKeyIndex(const std::vector<std::shared_ptr<const BaseColumn>> index_columns)
: BaseIndex{get_index_type_of<GroupKeyIndex>()},
_index_column(std::dynamic_pointer_cast<const BaseDictionaryColumn>(index_columns[0])) {
DebugAssert(static_cast<bool>(_index_column), "GroupKeyIndex only works with dictionary columns.");
DebugAssert((index_columns.size() == 1), "GroupKeyIndex only works with a single column.");
Assert(static_cast<bool>(_index_column), "GroupKeyIndex only works with dictionary columns.");
Assert((index_columns.size() == 1), "GroupKeyIndex only works with a single column.");

// 1) Initialize the index structures
// 1a) Set the index_offset to size of the dictionary + 1 (plus one to mark the ending position)
Expand Down
1 change: 1 addition & 0 deletions src/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ set(
storage/encoding_test.hpp
storage/encoded_column_test.cpp
storage/group_key_index_test.cpp
storage/btree_index_test.cpp
storage/iterables_test.cpp
storage/materialize_test.cpp
storage/multi_column_index_test.cpp
Expand Down
6 changes: 4 additions & 2 deletions src/test/operators/index_scan_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "storage/index/adaptive_radix_tree/adaptive_radix_tree_index.hpp"
#include "storage/index/group_key/composite_group_key_index.hpp"
#include "storage/index/group_key/group_key_index.hpp"
#include "storage/index/b_tree/b_tree_index.hpp"
#include "storage/table.hpp"
#include "types.hpp"

Expand Down Expand Up @@ -87,8 +88,9 @@ class OperatorsIndexScanTest : public BaseTest {
ColumnIndexType _index_type;
};

typedef ::testing::Types<GroupKeyIndex, AdaptiveRadixTreeIndex, CompositeGroupKeyIndex /* add further indices */>
DerivedIndices;
typedef ::testing::Types<GroupKeyIndex, AdaptiveRadixTreeIndex, CompositeGroupKeyIndex,
BTreeIndex /* add further indices */> DerivedIndices;

TYPED_TEST_CASE(OperatorsIndexScanTest, DerivedIndices);

TYPED_TEST(OperatorsIndexScanTest, SingleColumnScanOnDataTable) {
Expand Down
4 changes: 3 additions & 1 deletion src/test/operators/join_index_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "storage/index/adaptive_radix_tree/adaptive_radix_tree_index.hpp"
#include "storage/index/group_key/composite_group_key_index.hpp"
#include "storage/index/group_key/group_key_index.hpp"
#include "storage/index/b_tree/b_tree_index.hpp"
#include "storage/table.hpp"
#include "types.hpp"

Expand Down Expand Up @@ -100,7 +101,8 @@ class JoinIndexTest : public BaseTest {
_table_wrapper_k, _table_wrapper_l, _table_wrapper_m, _table_wrapper_n;
};

typedef ::testing::Types<AdaptiveRadixTreeIndex, CompositeGroupKeyIndex /* , GroupKeyIndex */> DerivedIndices;
typedef ::testing::Types<AdaptiveRadixTreeIndex, CompositeGroupKeyIndex,
BTreeIndex /* , GroupKeyIndex */> DerivedIndices;

TYPED_TEST_CASE(JoinIndexTest, DerivedIndices);

Expand Down
Loading

0 comments on commit 9286ce2

Please sign in to comment.