From c5a397ca8d5639b6d412b1289ff7a8b2d7bc7fa8 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 13 Feb 2025 12:54:47 +0100 Subject: [PATCH 1/3] This seems to work. Signed-off-by: Johannes Kalmbach --- src/engine/Bind.cpp | 59 +++++++++++++++++++++++++++++++++---------- src/engine/Bind.h | 4 +++ src/engine/Result.cpp | 4 +++ src/engine/Result.h | 2 ++ src/util/Iterators.h | 3 +++ 5 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index a0e7e78c9a..181a3af339 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -94,6 +94,45 @@ IdTable Bind::cloneSubView(const IdTable& idTable, return result; } +// Anonymous namespace: This class is not exposed outside this translation unit. +namespace { +// An input range that lazily applies a `BIND` operation to a fully materialized +// subresult by splitting the subresult into chunks of size `chunkSize_`. +template +struct LazyBindForMaterializedInput + : ad_utility::InputRangeFromGet { + ApplyBind applyBind_; + std::shared_ptr result_; + size_t chunkSize_; + size_t size_ = result_->idTable().size(); + size_t offset_ = 0; + + // Constructor. The `ApplyBind` function performs the actual `Bind`. + LazyBindForMaterializedInput(ApplyBind applyBind, + std::shared_ptr result, + size_t chunkSize_) + : applyBind_(std::move(applyBind)), + result_{std::move(result)}, + chunkSize_{chunkSize_} {} + + // The `get` function that is needed for the `InputRangeFromGet`. + std::optional get() { + if (offset_ >= size_) { + return std::nullopt; + } + auto curOffset = offset_; + offset_ += chunkSize_; + LocalVocab outVocab = result_->getCopyOfLocalVocab(); + IdTable idTable = + applyBind_(Bind::cloneSubView( + result_->idTable(), + {curOffset, std::min(size_, curOffset + chunkSize_)}), + &outVocab); + return Result::IdTableVocabPair{std::move(idTable), std::move(outVocab)}; + } +}; +} // namespace + // _____________________________________________________________________________ ProtoResult Bind::computeResult(bool requestLaziness) { _subtree->setLimit(getLimit()); @@ -108,21 +147,13 @@ ProtoResult Bind::computeResult(bool requestLaziness) { if (subRes->isFullyMaterialized()) { if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) { - return { - [](auto applyBind, - std::shared_ptr result) -> Result::Generator { - size_t size = result->idTable().size(); - for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) { - LocalVocab outVocab = result->getCopyOfLocalVocab(); - IdTable idTable = applyBind( - cloneSubView(result->idTable(), - {offset, std::min(size, offset + CHUNK_SIZE)}), - &outVocab); - co_yield {std::move(idTable), std::move(outVocab)}; - } - }(std::move(applyBind), std::move(subRes)), - resultSortedOn()}; + // The `LazyBindFor...` is the actual implementation, the `LazyResult` + // wraps it in a type-erased way. + return {Result::LazyResult{LazyBindForMaterializedInput{ + std::move(applyBind), std::move(subRes), CHUNK_SIZE}}, + resultSortedOn()}; } + // Make a deep copy of the local vocab from `subRes` and then add to it (in // case BIND adds a new word or words). // diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 0abd5b2cec..d76e8da794 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -45,9 +45,13 @@ class Bind : public Operation { private: ProtoResult computeResult(bool requestLaziness) override; + public: + // This function has to be public, as a free struct in the `.cpp` file uses + // it. static IdTable cloneSubView(const IdTable& idTable, const std::pair& subrange); + private: // Implementation for the binding of arbitrary expressions. IdTable computeExpressionBind( LocalVocab* localVocab, IdTable idTable, diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 3b476777bb..11ce5324fc 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -72,6 +72,10 @@ Result::Result(IdTableVocabPair pair, std::vector sortedBy) // _____________________________________________________________________________ Result::Result(Generator idTables, std::vector sortedBy) + : Result{LazyResult{std::move(idTables)}, std::move(sortedBy)} {} + +// _____________________________________________________________________________ +Result::Result(LazyResult idTables, std::vector sortedBy) : data_{GenContainer{[](auto idTables, auto sortedBy) -> Generator { std::optional previousId = std::nullopt; for (IdTableVocabPair& pair : idTables) { diff --git a/src/engine/Result.h b/src/engine/Result.h index c372cf7102..18d92e5806 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -117,6 +117,8 @@ class Result { LocalVocab&& localVocab); Result(IdTableVocabPair pair, std::vector sortedBy); Result(Generator idTables, std::vector sortedBy); + Result(LazyResult idTables, std::vector sortedBy); + // Prevent accidental copying of a result table. Result(const Result& other) = delete; Result& operator=(const Result& other) = delete; diff --git a/src/util/Iterators.h b/src/util/Iterators.h index 93ae1b51f4..c2d9209abd 100644 --- a/src/util/Iterators.h +++ b/src/util/Iterators.h @@ -276,6 +276,9 @@ class InputRangeFromGet { public: virtual ~InputRangeFromGet() = default; + InputRangeFromGet() = default; + InputRangeFromGet(InputRangeFromGet&&) = default; + InputRangeFromGet& operator=(InputRangeFromGet&&) = default; // Get the next value and store it. void getNextAndStore() { storage_ = get(); } From 63e68a2a77d2688659bda1964a7b5983588eea94 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 13 Feb 2025 12:58:35 +0100 Subject: [PATCH 2/3] Only the changes for this PR Signed-off-by: Johannes Kalmbach --- src/engine/Result.cpp | 4 ++++ src/engine/Result.h | 2 ++ src/util/Iterators.h | 3 +++ 3 files changed, 9 insertions(+) diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index 3b476777bb..11ce5324fc 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -72,6 +72,10 @@ Result::Result(IdTableVocabPair pair, std::vector sortedBy) // _____________________________________________________________________________ Result::Result(Generator idTables, std::vector sortedBy) + : Result{LazyResult{std::move(idTables)}, std::move(sortedBy)} {} + +// _____________________________________________________________________________ +Result::Result(LazyResult idTables, std::vector sortedBy) : data_{GenContainer{[](auto idTables, auto sortedBy) -> Generator { std::optional previousId = std::nullopt; for (IdTableVocabPair& pair : idTables) { diff --git a/src/engine/Result.h b/src/engine/Result.h index c372cf7102..18d92e5806 100644 --- a/src/engine/Result.h +++ b/src/engine/Result.h @@ -117,6 +117,8 @@ class Result { LocalVocab&& localVocab); Result(IdTableVocabPair pair, std::vector sortedBy); Result(Generator idTables, std::vector sortedBy); + Result(LazyResult idTables, std::vector sortedBy); + // Prevent accidental copying of a result table. Result(const Result& other) = delete; Result& operator=(const Result& other) = delete; diff --git a/src/util/Iterators.h b/src/util/Iterators.h index 93ae1b51f4..c2d9209abd 100644 --- a/src/util/Iterators.h +++ b/src/util/Iterators.h @@ -276,6 +276,9 @@ class InputRangeFromGet { public: virtual ~InputRangeFromGet() = default; + InputRangeFromGet() = default; + InputRangeFromGet(InputRangeFromGet&&) = default; + InputRangeFromGet& operator=(InputRangeFromGet&&) = default; // Get the next value and store it. void getNextAndStore() { storage_ = get(); } From 181941244d89aa1f8533c3ec62b1a1a2997bd656 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 13 Feb 2025 13:03:04 +0100 Subject: [PATCH 3/3] Make the constructors really explicit. Signed-off-by: Johannes Kalmbach --- src/util/Iterators.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/util/Iterators.h b/src/util/Iterators.h index c2d9209abd..57c5c66ea8 100644 --- a/src/util/Iterators.h +++ b/src/util/Iterators.h @@ -279,6 +279,8 @@ class InputRangeFromGet { InputRangeFromGet() = default; InputRangeFromGet(InputRangeFromGet&&) = default; InputRangeFromGet& operator=(InputRangeFromGet&&) = default; + InputRangeFromGet(const InputRangeFromGet&) = default; + InputRangeFromGet& operator=(const InputRangeFromGet&) = default; // Get the next value and store it. void getNextAndStore() { storage_ = get(); }