Skip to content

Commit

Permalink
Make the augmented metadata smaller.
Browse files Browse the repository at this point in the history
Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Jan 15, 2025
1 parent acb6633 commit f4b10d6
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 83 deletions.
37 changes: 20 additions & 17 deletions src/index/CompressedRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,8 @@ CompressedRelationReader::IdTableGenerator CompressedRelationReader::lazyScan(
ScanSpecification scanSpec,
std::vector<CompressedBlockMetadata> blockMetadata,
ColumnIndices additionalColumns, CancellationHandle cancellationHandle,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock,
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock,
LimitOffsetClause limitOffset) const {
AD_CONTRACT_CHECK(cancellationHandle);

Expand Down Expand Up @@ -497,7 +498,8 @@ IdTable CompressedRelationReader::scan(
std::span<const CompressedBlockMetadata> blocks,
ColumnIndicesRef additionalColumns,
const CancellationHandle& cancellationHandle,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock,
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock,
const LimitOffsetClause& limitOffset) const {
auto columnIndices = prepareColumnIndices(scanSpec, additionalColumns);
IdTable result(columnIndices.size(), allocator_);
Expand Down Expand Up @@ -529,7 +531,7 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock(
const ScanSpecification& scanSpec, const ScanImplConfig& scanConfig,
const CompressedBlockMetadata& blockMetadata,
std::optional<std::reference_wrapper<LazyScanMetadata>> scanMetadata,
const LocatedTriplesPerBlock& locatedTriples) const {
const LocatedTriplesPerBlockReadOnly& locatedTriples) const {
AD_CORRECTNESS_CHECK(ADDITIONAL_COLUMN_GRAPH_ID <
blockMetadata.offsetsAndCompressedSize_.size());

Expand Down Expand Up @@ -618,8 +620,8 @@ template <bool exactSize>
std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const {
// Get all the blocks that possibly might contain our pair of col0Id and
// col1Id
auto relevantBlocks = getRelevantBlocks(scanSpec, blocks);
Expand Down Expand Up @@ -679,16 +681,16 @@ std::pair<size_t, size_t> CompressedRelationReader::getResultSizeImpl(
std::pair<size_t, size_t> CompressedRelationReader::getSizeEstimateForScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const {
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const {
return getResultSizeImpl<false>(scanSpec, blocks, locatedTriplesPerBlock);
}

// ____________________________________________________________________________
size_t CompressedRelationReader::getResultSizeOfScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const {
auto [lower, upper] =
getResultSizeImpl<true>(scanSpec, blocks, locatedTriplesPerBlock);
AD_CORRECTNESS_CHECK(lower == upper);
Expand All @@ -702,7 +704,7 @@ IdTable CompressedRelationReader::getDistinctColIdsAndCountsImpl(
const ScanSpecification& scanSpec,
const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const {
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const {
// The result has two columns: one for the distinct `Id`s and one for their
// counts.
IdTableStatic<2> table(allocator_);
Expand Down Expand Up @@ -783,8 +785,8 @@ IdTable CompressedRelationReader::getDistinctColIdsAndCountsImpl(
IdTable CompressedRelationReader::getDistinctCol0IdsAndCounts(
const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const {
ScanSpecification scanSpec{std::nullopt, std::nullopt, std::nullopt};
return getDistinctColIdsAndCountsImpl(
&CompressedBlockMetadata::PermutedTriple::col0Id_, scanSpec,
Expand All @@ -795,8 +797,8 @@ IdTable CompressedRelationReader::getDistinctCol0IdsAndCounts(
IdTable CompressedRelationReader::getDistinctCol1IdsAndCounts(
Id col0Id, const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const {
ScanSpecification scanSpec{col0Id, std::nullopt, std::nullopt};

return getDistinctColIdsAndCountsImpl(
Expand Down Expand Up @@ -1044,7 +1046,8 @@ CompressedRelationReader::getBlocksFromMetadata(
// _____________________________________________________________________________
auto CompressedRelationReader::getFirstAndLastTriple(
const CompressedRelationReader::ScanSpecAndBlocks& metadataAndBlocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock) const
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const
-> std::optional<ScanSpecAndBlocksAndBounds::FirstAndLastTriple> {
auto relevantBlocks = getBlocksFromMetadata(metadataAndBlocks);
if (relevantBlocks.empty()) {
Expand Down Expand Up @@ -1502,8 +1505,8 @@ auto CompressedRelationWriter::createPermutationPair(
std::optional<CompressedRelationMetadata>
CompressedRelationReader::getMetadataForSmallRelation(
const std::vector<CompressedBlockMetadata>& allBlocksMetadata, Id col0Id,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const {
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const {
CompressedRelationMetadata metadata;
metadata.col0Id_ = col0Id;
metadata.offsetInBlock_ = 0;
Expand Down Expand Up @@ -1542,7 +1545,7 @@ CompressedRelationReader::getMetadataForSmallRelation(
auto CompressedRelationReader::getScanConfig(
const ScanSpecification& scanSpec,
CompressedRelationReader::ColumnIndicesRef additionalColumns,
const LocatedTriplesPerBlock& locatedTriples) -> ScanImplConfig {
const LocatedTriplesPerBlockReadOnly& locatedTriples) -> ScanImplConfig {
auto columnIndices = prepareColumnIndices(scanSpec, additionalColumns);
// Determine the index of the graph column (which we need either for
// filtering or for the output or both) and whether we we need it for
Expand Down
32 changes: 16 additions & 16 deletions src/index/CompressedRelation.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
// Forward declarations
class IdTable;

class LocatedTriplesPerBlock;
class LocatedTriplesPerBlockReadOnly;

// This type is used to buffer small relations that will be stored in the same
// block.
Expand Down Expand Up @@ -459,7 +459,7 @@ class CompressedRelationReader {
struct ScanImplConfig {
ColumnIndices scanColumns_;
FilterDuplicatesAndGraphs graphFilter_;
const LocatedTriplesPerBlock& locatedTriples_;
const LocatedTriplesPerBlockReadOnly& locatedTriples_;
};

// The specification of scan, together with the blocks on which this scan is
Expand Down Expand Up @@ -577,7 +577,7 @@ class CompressedRelationReader {
std::span<const CompressedBlockMetadata> blocks,
ColumnIndicesRef additionalColumns,
const CancellationHandle& cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock,
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock,
const LimitOffsetClause& limitOffset = {}) const;

// Similar to `scan` (directly above), but the result of the scan is lazily
Expand All @@ -587,7 +587,7 @@ class CompressedRelationReader {
ScanSpecification scanSpec,
std::vector<CompressedBlockMetadata> blockMetadata,
ColumnIndices additionalColumns, CancellationHandle cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock,
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock,
LimitOffsetClause limitOffset = {}) const;

// Get the exact size of the result of the scan, taking the given located
Expand All @@ -596,15 +596,15 @@ class CompressedRelationReader {
size_t getResultSizeOfScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;

// Get a lower and an upper bound for the size of the result of the scan. For
// this call, it is enough that each located triple knows the block to which
// it belongs (which is the case for `LocatedTriplesPerBlock`).
// it belongs (which is the case for `LocatedTriplesPerBlockReadOnly`).
std::pair<size_t, size_t> getSizeEstimateForScan(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;

private:
// Common implementation of `getResultSizeOfScan` and `getSizeEstimateForScan`
Expand All @@ -613,27 +613,27 @@ class CompressedRelationReader {
std::pair<size_t, size_t> getResultSizeImpl(
const ScanSpecification& scanSpec,
const vector<CompressedBlockMetadata>& blocks,
[[maybe_unused]] const LocatedTriplesPerBlock& locatedTriplesPerBlock)
const;
[[maybe_unused]] const LocatedTriplesPerBlockReadOnly&
locatedTriplesPerBlock) const;

public:
// For a given relation, determine the `col1Id`s and their counts. This is
// used for `computeGroupByObjectWithCount`.
IdTable getDistinctCol1IdsAndCounts(
Id col0Id, const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;

// For all `col0Ids` determine their counts. This is
// used for `computeGroupByForFullScan`.
IdTable getDistinctCol0IdsAndCounts(
const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;

std::optional<CompressedRelationMetadata> getMetadataForSmallRelation(
const std::vector<CompressedBlockMetadata>& allBlocksMetadata, Id col0Id,
const LocatedTriplesPerBlock&) const;
const LocatedTriplesPerBlockReadOnly&) const;

// Get the contiguous subrange of the given `blockB` for the blocks
// that contain the triples that have the relationId/col0Id that was specified
Expand All @@ -656,7 +656,7 @@ class CompressedRelationReader {
std::optional<ScanSpecAndBlocksAndBounds::FirstAndLastTriple>
getFirstAndLastTriple(
const ScanSpecAndBlocks& metadataAndBlocks,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;

// Get access to the underlying allocator
const Allocator& allocator() const { return allocator_; }
Expand Down Expand Up @@ -709,7 +709,7 @@ class CompressedRelationReader {
const ScanSpecification& scanSpec, const ScanImplConfig& scanConfig,
const CompressedBlockMetadata& blockMetadata,
std::optional<std::reference_wrapper<LazyScanMetadata>> scanMetadata,
const LocatedTriplesPerBlock&) const;
const LocatedTriplesPerBlockReadOnly&) const;

// Yield all the blocks in the range `[beginBlock, endBlock)`. If the
// `columnIndices` are set, only the specified columns from the blocks
Expand Down Expand Up @@ -742,7 +742,7 @@ class CompressedRelationReader {

static ScanImplConfig getScanConfig(
const ScanSpecification& scanSpec, ColumnIndicesRef additionalColumns,
const LocatedTriplesPerBlock& locatedTriples);
const LocatedTriplesPerBlockReadOnly& locatedTriples);

// The common implementation for `getDistinctCol0IdsAndCounts` and
// `getCol1IdsAndCounts`.
Expand All @@ -752,7 +752,7 @@ class CompressedRelationReader {
const ScanSpecification& scanSpec,
const std::vector<CompressedBlockMetadata>& allBlocksMetadata,
const CancellationHandle& cancellationHandle,
const LocatedTriplesPerBlock& locatedTriplesPerBlock) const;
const LocatedTriplesPerBlockReadOnly& locatedTriplesPerBlock) const;
};

// TODO<joka921>
Expand Down
9 changes: 7 additions & 2 deletions src/index/DeltaTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle,
}

// ____________________________________________________________________________
const LocatedTriplesPerBlock&
const LocatedTriplesPerBlockReadOnly&
LocatedTriplesSnapshot::getLocatedTriplesForPermutation(
Permutation::Enum permutation) const {
return locatedTriplesPerBlock_[static_cast<int>(permutation)];
Expand All @@ -183,8 +183,13 @@ SharedLocatedTriplesSnapshot DeltaTriples::getSnapshot() {
// copies), hence the explicit `clone`.
auto snapshotIndex = nextSnapshotIndex_;
++nextSnapshotIndex_;
auto makeReadOnly = [this]() {
LocatedTriplesPerBlockAllPermutationsReadOnly res;
ql::ranges::copy(locatedTriples(), res.begin());
return res;
};
return SharedLocatedTriplesSnapshot{std::make_shared<LocatedTriplesSnapshot>(
locatedTriples(), localVocab_.clone(), snapshotIndex)};
makeReadOnly(), localVocab_.clone(), snapshotIndex)};
}

// ____________________________________________________________________________
Expand Down
8 changes: 5 additions & 3 deletions src/index/DeltaTriples.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,20 @@
// permutations.
using LocatedTriplesPerBlockAllPermutations =
std::array<LocatedTriplesPerBlock, Permutation::ALL.size()>;
using LocatedTriplesPerBlockAllPermutationsReadOnly =
std::array<LocatedTriplesPerBlockReadOnly, Permutation::ALL.size()>;

// The locations of a set of delta triples (triples that were inserted or
// deleted since the index was built) in each of the six permutations, and a
// local vocab. This is all the information that is required to perform a query
// that correctly respects these delta triples, hence the name.
struct LocatedTriplesSnapshot {
LocatedTriplesPerBlockAllPermutations locatedTriplesPerBlock_;
LocatedTriplesPerBlockAllPermutationsReadOnly locatedTriplesPerBlock_;
LocalVocab localVocab_;
// A unique index for this snapshot that is used in the query cache.
size_t index_;
// Get `TripleWithPosition` objects for given permutation.
const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
const LocatedTriplesPerBlockReadOnly& getLocatedTriplesForPermutation(
Permutation::Enum permutation) const;
};

Expand Down Expand Up @@ -121,7 +123,7 @@ class DeltaTriples {
public:
const LocalVocab& localVocab() const { return localVocab_; }

const LocatedTriplesPerBlock& getLocatedTriplesForPermutation(
const LocatedTriplesPerBlockReadOnly& getLocatedTriplesForPermutation(
Permutation::Enum permutation) const {
return locatedTriples_.at(static_cast<size_t>(permutation));
}
Expand Down
31 changes: 18 additions & 13 deletions src/index/LocatedTriples.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,13 @@ std::vector<LocatedTriple> LocatedTriple::locateTriplesInPermutation(
}

// ____________________________________________________________________________
bool LocatedTriplesPerBlock::hasUpdates(size_t blockIndex) const {
bool LocatedTriplesPerBlockReadOnly::hasUpdates(size_t blockIndex) const {
return map_.contains(blockIndex);
}

// ____________________________________________________________________________
NumAddedAndDeleted LocatedTriplesPerBlock::numTriples(size_t blockIndex) const {
NumAddedAndDeleted LocatedTriplesPerBlockReadOnly::numTriples(
size_t blockIndex) const {
// If no located triples for `blockIndex_` exist, there is no entry in `map_`.
if (!hasUpdates(blockIndex)) {
return {0, 0};
Expand Down Expand Up @@ -99,8 +100,8 @@ auto tieLocatedTriple(auto& lt) {

// ____________________________________________________________________________
template <size_t numIndexColumns, bool includeGraphColumn>
IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
const IdTable& block) const {
IdTable LocatedTriplesPerBlockReadOnly::mergeTriplesImpl(
size_t blockIndex, const IdTable& block) const {
// This method should only be called if there are located triples in the
// specified block.
AD_CONTRACT_CHECK(map_.contains(blockIndex));
Expand Down Expand Up @@ -185,10 +186,9 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
}

// ____________________________________________________________________________
IdTable LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
const IdTable& block,
size_t numIndexColumns,
bool includeGraphColumn) const {
IdTable LocatedTriplesPerBlockReadOnly::mergeTriples(
size_t blockIndex, const IdTable& block, size_t numIndexColumns,
bool includeGraphColumn) const {
// The following code does nothing more than turn `numIndexColumns` and
// `includeGraphColumn` into template parameters of `mergeTriplesImpl`.
auto mergeTriplesImplHelper = [numIndexColumns, blockIndex, &block,
Expand Down Expand Up @@ -246,8 +246,8 @@ void LocatedTriplesPerBlock::erase(size_t blockIndex,
// ____________________________________________________________________________
void LocatedTriplesPerBlock::setOriginalMetadata(
std::vector<CompressedBlockMetadata> metadata) {
originalMetadata_ = std::move(metadata);
updateAugmentedMetadata();
augmentedMetadata_ = std::move(metadata);
originalMetadata_.reset();
}

// Update the `blockMetadata`, such that its graph info is consistent with the
Expand Down Expand Up @@ -297,7 +297,12 @@ void LocatedTriplesPerBlock::updateAugmentedMetadata() {
// TODO<C++23> use view::enumerate
size_t blockIndex = 0;
// Copy to preserve originalMetadata_.
augmentedMetadata_ = originalMetadata_;
AD_CONTRACT_CHECK(augmentedMetadata_.has_value());
if (!originalMetadata_.has_value()) {
originalMetadata_ = augmentedMetadata_;
} else {
augmentedMetadata_ = originalMetadata_;
}
for (auto& blockMetadata : augmentedMetadata_.value()) {
if (hasUpdates(blockIndex)) {
const auto& blockUpdates = map_.at(blockIndex);
Expand Down Expand Up @@ -352,8 +357,8 @@ std::ostream& operator<<(std::ostream& os, const std::vector<IdTriple<0>>& v) {
}

// ____________________________________________________________________________
bool LocatedTriplesPerBlock::isLocatedTriple(const IdTriple<0>& triple,
bool isInsertion) const {
bool LocatedTriplesPerBlockReadOnly::isLocatedTriple(const IdTriple<0>& triple,
bool isInsertion) const {
auto blockContains = [&triple, isInsertion](const LocatedTriples& lt,
size_t blockIndex) {
LocatedTriple locatedTriple{blockIndex, triple, isInsertion};
Expand Down
Loading

0 comments on commit f4b10d6

Please sign in to comment.