From 6757a237c59248b13f6c3f991c5841d0cd8c7d0d Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:25:02 +0100 Subject: [PATCH] Address PR comments --- src/engine/Bind.h | 2 +- src/engine/CartesianProductJoin.h | 4 ++-- src/engine/CountAvailablePredicates.h | 4 ++-- src/engine/Describe.h | 3 +-- src/engine/Distinct.h | 3 +-- src/engine/ExistsJoin.h | 2 +- src/engine/Filter.h | 2 +- src/engine/GroupBy.h | 1 + src/engine/HasPredicateScan.h | 2 +- src/engine/IndexScan.h | 4 ++-- src/engine/Join.h | 4 ++-- src/engine/Minus.h | 4 ++-- src/engine/MultiColumnJoin.h | 2 +- src/engine/Operation.cpp | 2 ++ src/engine/Operation.h | 2 ++ src/engine/OptionalJoin.h | 2 +- src/engine/OrderBy.h | 2 +- src/engine/PathSearch.cpp | 17 ++++++++--------- src/engine/PathSearch.h | 2 +- src/engine/Service.h | 2 +- src/engine/Sort.h | 2 +- src/engine/SpatialJoin.h | 2 +- src/engine/TextIndexScanForEntity.h | 2 +- src/engine/TextIndexScanForWord.h | 2 +- src/engine/TextLimit.h | 2 +- src/engine/TransitivePathBase.h | 8 ++++++++ src/engine/TransitivePathBinSearch.cpp | 10 ++-------- src/engine/TransitivePathBinSearch.h | 2 +- src/engine/TransitivePathHashMap.cpp | 10 ++-------- src/engine/TransitivePathHashMap.h | 4 ++-- src/engine/Union.h | 2 +- src/engine/Values.h | 2 +- 32 files changed, 56 insertions(+), 58 deletions(-) diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 87e09101be..86f271d3ec 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -31,9 +31,9 @@ class Bind : public Operation { std::vector getChildren() override; size_t getCostEstimate() override; bool supportsLimit() const override; - std::unique_ptr cloneImpl() const override; private: + std::unique_ptr cloneImpl() const override; uint64_t getSizeEstimateBeforeLimit() override; public: diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 01a332b61f..817c620ddb 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -61,6 +61,8 @@ class CartesianProductJoin : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + std::unique_ptr cloneImpl() const override; + public: float getMultiplicity([[maybe_unused]] size_t col) override; @@ -69,8 +71,6 @@ class CartesianProductJoin : public Operation { // The Cartesian product join can efficiently evaluate a limited result. [[nodiscard]] bool supportsLimit() const override { return true; } - std::unique_ptr cloneImpl() const override; - protected: // Don't promise any sorting of the result. // TODO Depending on the implementation we could propagate sorted diff --git a/src/engine/CountAvailablePredicates.h b/src/engine/CountAvailablePredicates.h index dd8626ee7c..38d5a4a022 100644 --- a/src/engine/CountAvailablePredicates.h +++ b/src/engine/CountAvailablePredicates.h @@ -64,6 +64,8 @@ class CountAvailablePredicates : public Operation { private: uint64_t getSizeEstimateBeforeLimit() override; + std::unique_ptr cloneImpl() const override; + public: size_t getCostEstimate() override; @@ -72,8 +74,6 @@ class CountAvailablePredicates : public Operation { const Variable& predicateVariable() const { return predicateVariable_; } const Variable& countVariable() const { return countVariable_; } - std::unique_ptr cloneImpl() const override; - private: /** * @brief Computes all relations that have one of input[inputCol]'s entities diff --git a/src/engine/Describe.h b/src/engine/Describe.h index 1d7647f3bb..ab566ed286 100644 --- a/src/engine/Describe.h +++ b/src/engine/Describe.h @@ -46,9 +46,8 @@ class Describe : public Operation { float getMultiplicity(size_t col) override; bool knownEmptyResult() override; - std::unique_ptr cloneImpl() const override; - private: + std::unique_ptr cloneImpl() const override; [[nodiscard]] vector resultSortedOn() const override; ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Distinct.h b/src/engine/Distinct.h index 5da4a8f807..8257980b0b 100644 --- a/src/engine/Distinct.h +++ b/src/engine/Distinct.h @@ -48,12 +48,11 @@ class Distinct : public Operation { return {subtree_.get()}; } - std::unique_ptr cloneImpl() const override; - protected: [[nodiscard]] string getCacheKeyImpl() const override; private: + std::unique_ptr cloneImpl() const override; ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/ExistsJoin.h b/src/engine/ExistsJoin.h index c6db23d432..82c0e5a318 100644 --- a/src/engine/ExistsJoin.h +++ b/src/engine/ExistsJoin.h @@ -75,9 +75,9 @@ class ExistsJoin : public Operation { return {left_.get(), right_.get()}; } + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Filter.h b/src/engine/Filter.h index 56bf3657fb..c8e55c195b 100644 --- a/src/engine/Filter.h +++ b/src/engine/Filter.h @@ -53,9 +53,9 @@ class Filter : public Operation { return _subtree->getMultiplicity(col); } + private: std::unique_ptr cloneImpl() const override; - private: VariableToColumnMap computeVariableToColumnMap() const override { return _subtree->getVariableColumns(); } diff --git a/src/engine/GroupBy.h b/src/engine/GroupBy.h index 2afbe1fa22..f1c5e44f74 100644 --- a/src/engine/GroupBy.h +++ b/src/engine/GroupBy.h @@ -576,6 +576,7 @@ class GroupBy : public Operation { // GROUP BY. This is used by some of the optimizations above. bool isVariableBoundInSubtree(const Variable& variable) const; + private: std::unique_ptr cloneImpl() const override; // TODO implement optimization when *additional* Variables are diff --git a/src/engine/HasPredicateScan.h b/src/engine/HasPredicateScan.h index e4332d38a7..6668c6fcdb 100644 --- a/src/engine/HasPredicateScan.h +++ b/src/engine/HasPredicateScan.h @@ -108,9 +108,9 @@ class HasPredicateScan : public Operation { ProtoResult computeSubqueryS(IdTable* result, const CompactVectorOfStrings& patterns); + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; [[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index 9c93af8bfa..13e1a9955e 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -161,8 +161,6 @@ class IndexScan final : public Operation { // An index scan can directly and efficiently support LIMIT and OFFSET [[nodiscard]] bool supportsLimit() const override { return true; } - std::unique_ptr cloneImpl() const override; - Permutation::Enum permutation() const { return permutation_; } // Return the stored triple in the order that corresponds to the @@ -178,6 +176,8 @@ class IndexScan final : public Operation { const CompressedRelationReader::LazyScanMetadata& metadata); private: + std::unique_ptr cloneImpl() const override; + ProtoResult computeResult(bool requestLaziness) override; vector getChildren() override { return {}; } diff --git a/src/engine/Join.h b/src/engine/Join.h index b08e6e079e..6a90a1f438 100644 --- a/src/engine/Join.h +++ b/src/engine/Join.h @@ -142,12 +142,12 @@ class Join : public Operation { static void hashJoin(const IdTable& dynA, ColumnIndex jc1, const IdTable& dynB, ColumnIndex jc2, IdTable* dynRes); - std::unique_ptr cloneImpl() const override; - protected: virtual string getCacheKeyImpl() const override; private: + std::unique_ptr cloneImpl() const override; + ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Minus.h b/src/engine/Minus.h index abce4d9ec6..6a7964d274 100644 --- a/src/engine/Minus.h +++ b/src/engine/Minus.h @@ -25,7 +25,7 @@ class Minus : public Operation { // Uninitialized Object for testing the computeMinus method struct OnlyForTestingTag {}; - explicit Minus(OnlyForTestingTag){}; + explicit Minus(OnlyForTestingTag) {}; protected: string getCacheKeyImpl() const override; @@ -62,9 +62,9 @@ class Minus : public Operation { const vector>& matchedColumns, IdTable* result) const; + private: std::unique_ptr cloneImpl() const override; - private: /** * @brief Compares the two rows under the assumption that the first * entries of the rows are equal. diff --git a/src/engine/MultiColumnJoin.h b/src/engine/MultiColumnJoin.h index fc31b06199..6ea855c721 100644 --- a/src/engine/MultiColumnJoin.h +++ b/src/engine/MultiColumnJoin.h @@ -62,9 +62,9 @@ class MultiColumnJoin : public Operation { const std::vector>& joinColumns, IdTable* resultMightBeUnsorted); + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index b5ef712043..771c3c4b51 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -649,5 +649,7 @@ std::unique_ptr Operation::clone() const { return true; }; AD_CORRECTNESS_CHECK(areChildrenDifferent()); + AD_CORRECTNESS_CHECK(variableToColumnMap_ == result->variableToColumnMap_); + AD_EXPENSIVE_CHECK(getCacheKey() == result->getCacheKey()); return result; } diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 1a3d7332a7..5ef65ae0f0 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -316,9 +316,11 @@ class Operation { const auto& getLimit() const { return _limit; } + private: // Actual implementation of `clone()` without extra checks. virtual std::unique_ptr cloneImpl() const = 0; + public: // Create a deep copy of this operation. std::unique_ptr clone() const; diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index f6d3dfe214..1771db1f37 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -66,9 +66,9 @@ class OptionalJoin : public Operation { IdTable* dynResult, Implementation implementation = Implementation::GeneralCase); + private: std::unique_ptr cloneImpl() const override; - private: void computeSizeEstimateAndMultiplicities(); ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; diff --git a/src/engine/OrderBy.h b/src/engine/OrderBy.h index 854e9ae032..c7ed7c27bf 100644 --- a/src/engine/OrderBy.h +++ b/src/engine/OrderBy.h @@ -77,9 +77,9 @@ class OrderBy : public Operation { return {subtree_.get()}; } + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override { diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 0fe615b2fa..8acdd2c2fd 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -476,14 +476,13 @@ void PathSearch::pathsToResultTable(IdTable& tableDyn, PathsLimited& paths, std::unique_ptr PathSearch::cloneImpl() const { auto copy = std::make_unique(*this); copy->subtree_ = subtree_->clone(); - if (sourceTree_.has_value()) { - copy->sourceTree_ = sourceTree_.value()->clone(); - } - if (targetTree_.has_value()) { - copy->targetTree_ = targetTree_.value()->clone(); - } - if (sourceAndTargetTree_.has_value()) { - copy->sourceAndTargetTree_ = sourceAndTargetTree_.value()->clone(); - } + auto cloneIfNonEmpty = [](auto& tree) { + if (tree.has_value()) { + tree = tree.value()->clone(); + } + }; + cloneIfNonEmpty(copy->sourceTree_); + cloneIfNonEmpty(copy->targetTree_); + cloneIfNonEmpty(copy->sourceAndTargetTree_); return copy; } diff --git a/src/engine/PathSearch.h b/src/engine/PathSearch.h index 45b64ee08b..e9dc8b537f 100644 --- a/src/engine/PathSearch.h +++ b/src/engine/PathSearch.h @@ -252,9 +252,9 @@ class PathSearch : public Operation { Result computeResult([[maybe_unused]] bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; + private: std::unique_ptr cloneImpl() const override; - private: std::pair, std::span> handleSearchSides() const; /** diff --git a/src/engine/Service.h b/src/engine/Service.h index 475db7da54..6a914f66c6 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -102,9 +102,9 @@ class Service : public Operation { std::shared_ptr right, bool rightOnly, bool requestLaziness); + private: std::unique_ptr cloneImpl() const override; - private: // The string returned by this function is used as cache key. std::string getCacheKeyImpl() const override; diff --git a/src/engine/Sort.h b/src/engine/Sort.h index f1a98219e9..5075d79c46 100644 --- a/src/engine/Sort.h +++ b/src/engine/Sort.h @@ -66,9 +66,9 @@ class Sort : public Operation { return {subtree_.get()}; } + private: std::unique_ptr cloneImpl() const override; - private: virtual ProtoResult computeResult( [[maybe_unused]] bool requestLaziness) override; diff --git a/src/engine/SpatialJoin.h b/src/engine/SpatialJoin.h index fbdec20dd3..fc94a51866 100644 --- a/src/engine/SpatialJoin.h +++ b/src/engine/SpatialJoin.h @@ -174,9 +174,9 @@ class SpatialJoin : public Operation { return childRight_; } + private: std::unique_ptr cloneImpl() const override; - private: // helper function to generate a variable to column map from `childRight_` // that only contains the columns selected by `config_.payloadVariables_` // and (automatically added) the `config_.right_` variable. diff --git a/src/engine/TextIndexScanForEntity.h b/src/engine/TextIndexScanForEntity.h index 3c8e1e500e..fd68377c33 100644 --- a/src/engine/TextIndexScanForEntity.h +++ b/src/engine/TextIndexScanForEntity.h @@ -92,9 +92,9 @@ class TextIndexScanForEntity : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + private: std::unique_ptr cloneImpl() const override; - private: const VocabIndex& getVocabIndexOfFixedEntity() const { AD_CONTRACT_CHECK(hasFixedEntity()); return std::get(varOrFixed_.entity_).second; diff --git a/src/engine/TextIndexScanForWord.h b/src/engine/TextIndexScanForWord.h index 0a855c78b3..cfe5dff415 100644 --- a/src/engine/TextIndexScanForWord.h +++ b/src/engine/TextIndexScanForWord.h @@ -44,9 +44,9 @@ class TextIndexScanForWord : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + private: std::unique_ptr cloneImpl() const override; - private: // Returns a Result containing an IdTable with the columns being // the text variable and the completed word (if it was prefixed) ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; diff --git a/src/engine/TextLimit.h b/src/engine/TextLimit.h index 57f94867a1..552e19923a 100644 --- a/src/engine/TextLimit.h +++ b/src/engine/TextLimit.h @@ -61,9 +61,9 @@ class TextLimit : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override; vector getChildren() override { return {child_.get()}; } diff --git a/src/engine/TransitivePathBase.h b/src/engine/TransitivePathBase.h index 71607eed5e..633d465cf8 100644 --- a/src/engine/TransitivePathBase.h +++ b/src/engine/TransitivePathBase.h @@ -55,6 +55,14 @@ struct TransitivePathSide { // TODO use ql::ranges::starts_with return (!sortedOn.empty() && sortedOn[0] == col); } + + TransitivePathSide clone() const { + TransitivePathSide copy = *this; + if (copy.treeAndCol_.has_value()) { + copy.treeAndCol_.value().first = copy.treeAndCol_.value().first->clone(); + } + return copy; + } }; // We deliberately use the `std::` variants of a hash set and hash map because diff --git a/src/engine/TransitivePathBinSearch.cpp b/src/engine/TransitivePathBinSearch.cpp index e7da7a5e5a..970f5ee940 100644 --- a/src/engine/TransitivePathBinSearch.cpp +++ b/src/engine/TransitivePathBinSearch.cpp @@ -36,13 +36,7 @@ BinSearchMap TransitivePathBinSearch::setupEdgesMap( std::unique_ptr TransitivePathBinSearch::cloneImpl() const { auto copy = std::make_unique(*this); copy->subtree_ = subtree_->clone(); - if (copy->lhs_.treeAndCol_.has_value()) { - copy->lhs_.treeAndCol_.value().first = - lhs_.treeAndCol_.value().first->clone(); - } - if (copy->rhs_.treeAndCol_.has_value()) { - copy->rhs_.treeAndCol_.value().first = - rhs_.treeAndCol_.value().first->clone(); - } + copy->lhs_ = lhs_.clone(); + copy->rhs_ = rhs_.clone(); return copy; } diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h index 1fc7532fd8..faeda642ce 100644 --- a/src/engine/TransitivePathBinSearch.h +++ b/src/engine/TransitivePathBinSearch.h @@ -69,9 +69,9 @@ class TransitivePathBinSearch : public TransitivePathImpl { TransitivePathSide rightSide, size_t minDist, size_t maxDist); + private: std::unique_ptr cloneImpl() const override; - private: // initialize the map from the subresult BinSearchMap setupEdgesMap( const IdTable& dynSub, const TransitivePathSide& startSide, diff --git a/src/engine/TransitivePathHashMap.cpp b/src/engine/TransitivePathHashMap.cpp index 033fd6a006..7d03f751e8 100644 --- a/src/engine/TransitivePathHashMap.cpp +++ b/src/engine/TransitivePathHashMap.cpp @@ -49,13 +49,7 @@ HashMapWrapper TransitivePathHashMap::setupEdgesMap( std::unique_ptr TransitivePathHashMap::cloneImpl() const { auto copy = std::make_unique(*this); copy->subtree_ = subtree_->clone(); - if (copy->lhs_.treeAndCol_.has_value()) { - copy->lhs_.treeAndCol_.value().first = - lhs_.treeAndCol_.value().first->clone(); - } - if (copy->rhs_.treeAndCol_.has_value()) { - copy->rhs_.treeAndCol_.value().first = - rhs_.treeAndCol_.value().first->clone(); - } + copy->lhs_ = lhs_.clone(); + copy->rhs_ = rhs_.clone(); return copy; } diff --git a/src/engine/TransitivePathHashMap.h b/src/engine/TransitivePathHashMap.h index f871de6794..45f9832b0a 100644 --- a/src/engine/TransitivePathHashMap.h +++ b/src/engine/TransitivePathHashMap.h @@ -23,7 +23,7 @@ struct HashMapWrapper { Set emptySet_; HashMapWrapper(Map map, ad_utility::AllocatorWithLimit allocator) - : map_(std::move(map)), emptySet_(allocator){}; + : map_(std::move(map)), emptySet_(allocator) {}; /** * @brief Return the successors for the given Id. The successors are all ids, @@ -58,9 +58,9 @@ class TransitivePathHashMap : public TransitivePathImpl { TransitivePathSide rightSide, size_t minDist, size_t maxDist); + private: std::unique_ptr cloneImpl() const override; - private: /** * @brief Prepare a Map and a nodes vector for the transitive hull * computation. diff --git a/src/engine/Union.h b/src/engine/Union.h index ab00df222c..3d87664380 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -61,9 +61,9 @@ class Union : public Operation { return {_subtrees[0].get(), _subtrees[1].get()}; } + private: std::unique_ptr cloneImpl() const override; - private: ProtoResult computeResult(bool requestLaziness) override; VariableToColumnMap computeVariableToColumnMap() const override; diff --git a/src/engine/Values.h b/src/engine/Values.h index 372cae490d..f490fa4993 100644 --- a/src/engine/Values.h +++ b/src/engine/Values.h @@ -53,9 +53,9 @@ class Values : public Operation { VariableToColumnMap computeVariableToColumnMap() const override; + private: std::unique_ptr cloneImpl() const override; - private: // Compute the per-column multiplicity of the parsed values. void computeMultiplicities();