diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index 58428a5f3a..cc43379d9c 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -366,25 +366,23 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) { } if (useHashMapOptimization) { - if (subresult->isFullyMaterialized()) { - using Pair = std::pair, LocalVocab>; - auto gen = [](const Result& input) -> cppcoro::generator { - Pair p{input.idTable(), input.getCopyOfLocalVocab()}; - co_yield p; - }(*subresult); + auto computeWithHashMap = [this, &metadataForUnsequentialData, + &groupByCols](auto&& subresults) { auto doCompute = [&] { return computeGroupByForHashMapOptimization( - metadataForUnsequentialData->aggregateAliases_, std::move(gen), + metadataForUnsequentialData->aggregateAliases_, AD_FWD(subresults), groupByCols); }; return ad_utility::callFixedSize(groupByCols.size(), doCompute); + }; + + if (subresult->isFullyMaterialized()) { + // `computeWithHashMap` takes a range, so we artificially create one with + // a single input. + return computeWithHashMap(std::array{std::pair{ + std::ref(subresult->idTable()), subresult->getCopyOfLocalVocab()}}); } else { - auto doCompute = [&] { - return computeGroupByForHashMapOptimization( - metadataForUnsequentialData->aggregateAliases_, - std::move(subresult->idTables()), groupByCols); - }; - return ad_utility::callFixedSize(groupByCols.size(), doCompute); + return computeWithHashMap(std::move(subresult->idTables())); } } @@ -1512,6 +1510,8 @@ Result GroupBy::computeGroupByForHashMapOptimization( getExecutionContext()->getAllocator(), aggregateAliases, columnIndices.size()); + ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped}; + ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped}; for (const auto& [inputTableRef, inputLocalVocab] : subresults) { const IdTable& inputTable = inputTableRef; localVocab.mergeWith(std::span{&inputLocalVocab, 1}); @@ -1525,8 +1525,6 @@ Result GroupBy::computeGroupByForHashMapOptimization( _groupByVariables.begin(), _groupByVariables.end()}; evaluationContext._isPartOfGroupBy = true; - ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped}; - ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped}; for (size_t i = 0; i < inputTable.size(); i += GROUP_BY_HASH_MAP_BLOCK_SIZE) { checkCancellation(); @@ -1572,10 +1570,10 @@ Result GroupBy::computeGroupByForHashMapOptimization( } aggregationTimer.stop(); } - runtimeInfo().addDetail("timeMapLookup", lookupTimer.msecs()); - runtimeInfo().addDetail("timeAggregation", aggregationTimer.msecs()); } + runtimeInfo().addDetail("timeMapLookup", lookupTimer.msecs()); + runtimeInfo().addDetail("timeAggregation", aggregationTimer.msecs()); IdTable resultTable = createResultFromHashMap(aggregationData, aggregateAliases, &localVocab); return {std::move(resultTable), resultSortedOn(), std::move(localVocab)}; diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index 0c2e314e46..5aa0b1bc1b 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -3,6 +3,7 @@ // Authors: Florian Kramer (florian.kramer@mail.uni-freiburg.de) // Johannes Kalmbach (kalmbach@cs.uni-freiburg.de) +#include #include #include @@ -36,6 +37,7 @@ using ::testing::Optional; namespace { auto I = IntId; +auto D = DoubleId; // Return a matcher that checks, whether a given `std::optionalasDebugString()); } +// _____________________________________________________________________________ +TEST_F(GroupByOptimizations, hashMapOptimizationLazyAndMaterializedInputs) { + /* Setup query: + SELECT ?x (AVG(?y) as ?avg) WHERE { + # explicitly defined subresult. + } GROUP BY ?x + */ + // Setup three unsorted input blocks. The first column will be the grouped + // `?x`, and the second column the variable `?y` of which we compute the + // average. + auto runTest = [this](bool inputIsLazy) { + std::vector tables; + tables.push_back(makeIdTableFromVector({{3, 6}, {8, 27}, {5, 7}}, I)); + tables.push_back(makeIdTableFromVector({{8, 27}, {5, 9}}, I)); + tables.push_back(makeIdTableFromVector({{5, 2}, {3, 4}}, I)); + // The expected averages are as follows: (3 -> 5.0), (5 -> 6.0), (8 + // -> 27.0). + auto subtree = ad_utility::makeExecutionTree( + qec, std::move(tables), + std::vector>{Variable{"?x"}, Variable{"?y"}}); + auto& values = + dynamic_cast(*subtree->getRootOperation()); + values.forceFullyMaterialized() = !inputIsLazy; + + SparqlExpressionPimpl avgYPimpl = makeAvgPimpl(varY); + std::vector aliasesAvgY{Alias{avgYPimpl, Variable{"?avg"}}}; + + // Calculate result with optimization + qec->getQueryTreeCache().clearAll(); + RuntimeParameters().set<"group-by-hash-map-enabled">(true); + GroupBy groupBy{qec, variablesOnlyX, aliasesAvgY, std::move(subtree)}; + auto result = groupBy.computeResultOnlyForTesting(); + ASSERT_TRUE(result.isFullyMaterialized()); + EXPECT_THAT( + result.idTable(), + matchesIdTableFromVector({{I(3), D(5)}, {I(5), D(6)}, {I(8), D(27)}})); + }; + runTest(true); + runTest(false); +} + // _____________________________________________________________________________ TEST_F(GroupByOptimizations, correctResultForHashMapOptimizationForCountStar) { /* Setup query: diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index c02a9826bc..097ccd9c78 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -120,6 +120,8 @@ class ValuesForTesting : public Operation { } bool supportsLimit() const override { return supportsLimit_; } + bool& forceFullyMaterialized() { return forceFullyMaterialized_; } + private: // ___________________________________________________________________________ string getCacheKeyImpl() const override {