forked from hyrise/hyrise
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add btree index * Btree index now implements BaseIndex * add cpp-btree submodule * btree include * btree include * started refactoring * btree refactoring * remove trailing whitespace * fixed linter issues * add cpp-btree subdirectory * clang disable werror test * another test * disable btree warnings for clang * fix gcc * Incorporated PR comments * Added a test for BTreeIndex * lint * Added BTreeIndex to IndexScanTest and IndexJoinTest
- Loading branch information
Showing
17 changed files
with
339 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#include "b_tree_index.hpp" | ||
|
||
#include "storage/index/column_index_type.hpp" | ||
#include "resolve_type.hpp" | ||
|
||
namespace opossum { | ||
|
||
BTreeIndex::BTreeIndex(const std::vector<std::shared_ptr<const BaseColumn>> index_columns) | ||
: BaseIndex{get_index_type_of<BTreeIndex>()}, _index_column(index_columns[0]) { | ||
Assert((index_columns.size() == 1), "BTreeIndex only works with a single column."); | ||
_impl = make_shared_by_data_type<BaseBTreeIndexImpl, BTreeIndexImpl>(_index_column->data_type(), _index_column); | ||
} | ||
|
||
uint64_t BTreeIndex::memory_consumption() const { | ||
return _impl->memory_consumption(); | ||
} | ||
|
||
BTreeIndex::Iterator BTreeIndex::_lower_bound(const std::vector<AllTypeVariant>& values) const { | ||
return _impl->lower_bound(values); | ||
} | ||
|
||
BTreeIndex::Iterator BTreeIndex::_upper_bound(const std::vector<AllTypeVariant>& values) const { | ||
return _impl->upper_bound(values); | ||
} | ||
|
||
BTreeIndex::Iterator BTreeIndex::_cbegin() const { | ||
return _impl->cbegin(); | ||
} | ||
|
||
BTreeIndex::Iterator BTreeIndex::_cend() const { | ||
return _impl->cend(); | ||
} | ||
|
||
std::vector<std::shared_ptr<const BaseColumn>> BTreeIndex::_get_index_columns() const { return {_index_column}; } | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#pragma once | ||
|
||
#include "types.hpp" | ||
#include "all_type_variant.hpp" | ||
#include "storage/index/base_index.hpp" | ||
#include "storage/base_column.hpp" | ||
#include "b_tree_index_impl.hpp" | ||
|
||
namespace opossum { | ||
|
||
class BTreeIndexTest; | ||
|
||
class BTreeIndex : public BaseIndex { | ||
friend BTreeIndexTest; | ||
|
||
public: | ||
using Iterator = std::vector<ChunkOffset>::const_iterator; | ||
|
||
BTreeIndex() = delete; | ||
explicit BTreeIndex(const std::vector<std::shared_ptr<const BaseColumn>> index_columns); | ||
|
||
virtual uint64_t memory_consumption() const; | ||
|
||
protected: | ||
Iterator _lower_bound(const std::vector<AllTypeVariant>&) const override; | ||
Iterator _upper_bound(const std::vector<AllTypeVariant>&) const override; | ||
Iterator _cbegin() const override; | ||
Iterator _cend() const override; | ||
std::vector<std::shared_ptr<const BaseColumn>> _get_index_columns() const override; | ||
|
||
std::shared_ptr<const BaseColumn> _index_column; | ||
std::shared_ptr<BaseBTreeIndexImpl> _impl; | ||
}; | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#include "b_tree_index_impl.hpp" | ||
|
||
#include "storage/index/base_index.hpp" | ||
#include "types.hpp" | ||
#include "resolve_type.hpp" | ||
#include "utils/assert.hpp" | ||
#include "storage/create_iterable_from_column.hpp" | ||
|
||
namespace opossum { | ||
|
||
template <typename DataType> | ||
BTreeIndexImpl<DataType>::BTreeIndexImpl(std::shared_ptr<const BaseColumn> index_column) { | ||
_bulk_insert(index_column); | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::lower_bound(const std::vector<AllTypeVariant>& values) const { | ||
return lower_bound(type_cast<DataType>(values[0])); | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::upper_bound(const std::vector<AllTypeVariant>& values) const { | ||
return upper_bound(type_cast<DataType>(values[0])); | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::cbegin() const { | ||
return _chunk_offsets.begin(); | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::cend() const { | ||
return _chunk_offsets.end(); | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::lower_bound(DataType value) const { | ||
auto result = _btree.lower_bound(value); | ||
if (result == _btree.end()) { | ||
return _chunk_offsets.end(); | ||
} else { | ||
return _chunk_offsets.begin() + result->second; | ||
} | ||
} | ||
|
||
template <typename DataType> | ||
BaseBTreeIndexImpl::Iterator BTreeIndexImpl<DataType>::upper_bound(DataType value) const { | ||
auto result = _btree.upper_bound(value); | ||
if (result == _btree.end()) { | ||
return _chunk_offsets.end(); | ||
} else { | ||
return _chunk_offsets.begin() + result->second; | ||
} | ||
} | ||
|
||
template <typename DataType> | ||
uint64_t BTreeIndexImpl<DataType>::memory_consumption() const { | ||
return sizeof(std::vector<ChunkOffset>) + | ||
sizeof(ChunkOffset) * _chunk_offsets.size() + | ||
_btree.bytes_used(); | ||
} | ||
|
||
template <typename DataType> | ||
void BTreeIndexImpl<DataType>::_bulk_insert(const std::shared_ptr<const BaseColumn> column) { | ||
std::vector<std::pair<ChunkOffset, DataType>> values; | ||
|
||
// Materialize | ||
resolve_column_type<DataType>(*column, [&](const auto& typed_column) { | ||
auto iterable_left = create_iterable_from_column<DataType>(typed_column); | ||
iterable_left.for_each([&](const auto& value) { | ||
if (value.is_null()) return; | ||
values.push_back(std::make_pair(value.chunk_offset(), value.value())); | ||
}); | ||
}); | ||
|
||
// Sort | ||
std::sort(values.begin(), values.end(), [](const auto& a, const auto& b){ return a.second < b.second; }); | ||
_chunk_offsets.resize(values.size()); | ||
for (size_t i = 0; i < values.size(); i++) { | ||
_chunk_offsets[i] = values[i].first; | ||
} | ||
|
||
// Build index | ||
DataType current_value = values[0].second; | ||
_btree[current_value] = 0; | ||
for (size_t i = 0; i < values.size(); i++) { | ||
if (values[i].second != current_value) { | ||
current_value = values[i].second; | ||
_btree[current_value] = i; | ||
} | ||
} | ||
} | ||
|
||
EXPLICITLY_INSTANTIATE_DATA_TYPES(BTreeIndexImpl); | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#pragma once | ||
|
||
#ifdef __clang__ | ||
#pragma clang diagnostic ignored "-Wall" | ||
#include <btree_map.h> | ||
#pragma clang diagnostic pop | ||
#elif __GNUC__ | ||
#pragma GCC system_header | ||
#include <btree_map.h> | ||
#endif | ||
|
||
|
||
#include "types.hpp" | ||
#include "all_type_variant.hpp" | ||
#include "storage/base_column.hpp" | ||
|
||
namespace opossum { | ||
|
||
class BTreeIndexTest; | ||
|
||
class BaseBTreeIndexImpl { | ||
friend BTreeIndexTest; | ||
|
||
public: | ||
BaseBTreeIndexImpl() = default; | ||
BaseBTreeIndexImpl(BaseBTreeIndexImpl&&) = default; | ||
BaseBTreeIndexImpl& operator=(BaseBTreeIndexImpl&&) = default; | ||
virtual ~BaseBTreeIndexImpl() = default; | ||
|
||
using Iterator = std::vector<ChunkOffset>::const_iterator; | ||
virtual uint64_t memory_consumption() const = 0; | ||
virtual Iterator lower_bound(const std::vector<AllTypeVariant>&) const = 0; | ||
virtual Iterator upper_bound(const std::vector<AllTypeVariant>&) const = 0; | ||
virtual Iterator cbegin() const = 0; | ||
virtual Iterator cend() const = 0; | ||
|
||
protected: | ||
std::vector<ChunkOffset> _chunk_offsets; | ||
}; | ||
|
||
/** | ||
* Implementation: https://code.google.com/archive/p/cpp-btree/ | ||
* Note: does not support null values right now. | ||
*/ | ||
template <typename DataType> | ||
class BTreeIndexImpl : public BaseBTreeIndexImpl { | ||
friend BTreeIndexTest; | ||
|
||
public: | ||
BTreeIndexImpl() = delete; | ||
explicit BTreeIndexImpl(std::shared_ptr<const BaseColumn> index_column); | ||
|
||
BTreeIndexImpl(const BTreeIndexImpl&) = delete; | ||
BTreeIndexImpl& operator=(const BTreeIndexImpl&) = delete; | ||
|
||
BTreeIndexImpl(BTreeIndexImpl&&) = default; | ||
BTreeIndexImpl& operator=(BTreeIndexImpl&&) = default; | ||
|
||
uint64_t memory_consumption() const override; | ||
|
||
Iterator lower_bound(DataType value) const; | ||
Iterator upper_bound(DataType value) const; | ||
|
||
Iterator lower_bound(const std::vector<AllTypeVariant>&) const override; | ||
Iterator upper_bound(const std::vector<AllTypeVariant>&) const override; | ||
Iterator cbegin() const override; | ||
Iterator cend() const override; | ||
|
||
protected: | ||
void _bulk_insert(const std::shared_ptr<const BaseColumn>); | ||
|
||
btree::btree_map<DataType, size_t> _btree; | ||
}; | ||
|
||
} // namespace opossum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.