Skip to content

Commit

Permalink
Add unit tests.
Browse files Browse the repository at this point in the history
Signed-off-by: Johannes Kalmbach <[email protected]>
  • Loading branch information
joka921 committed Dec 18, 2024
1 parent 5510bb9 commit ecb6a2f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 17 deletions.
32 changes: 15 additions & 17 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,25 +366,23 @@ ProtoResult GroupBy::computeResult(bool requestLaziness) {
}

if (useHashMapOptimization) {
if (subresult->isFullyMaterialized()) {
using Pair = std::pair<std::reference_wrapper<const IdTable>, LocalVocab>;
auto gen = [](const Result& input) -> cppcoro::generator<Pair> {
Pair p{input.idTable(), input.getCopyOfLocalVocab()};
co_yield p;
}(*subresult);
auto computeWithHashMap = [this, &metadataForUnsequentialData,
&groupByCols](auto&& subresults) {
auto doCompute = [&]<int NumCols> {
return computeGroupByForHashMapOptimization<NumCols>(
metadataForUnsequentialData->aggregateAliases_, std::move(gen),
metadataForUnsequentialData->aggregateAliases_, AD_FWD(subresults),
groupByCols);
};
return ad_utility::callFixedSize(groupByCols.size(), doCompute);
};

if (subresult->isFullyMaterialized()) {
// `computeWithHashMap` takes a range, so we artificially create one with
// a single input.
return computeWithHashMap(std::array{std::pair{
std::ref(subresult->idTable()), subresult->getCopyOfLocalVocab()}});
} else {
auto doCompute = [&]<int NumCols> {
return computeGroupByForHashMapOptimization<NumCols>(
metadataForUnsequentialData->aggregateAliases_,
std::move(subresult->idTables()), groupByCols);
};
return ad_utility::callFixedSize(groupByCols.size(), doCompute);
return computeWithHashMap(std::move(subresult->idTables()));
}
}

Expand Down Expand Up @@ -1512,6 +1510,8 @@ Result GroupBy::computeGroupByForHashMapOptimization(
getExecutionContext()->getAllocator(), aggregateAliases,
columnIndices.size());

ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped};
ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped};
for (const auto& [inputTableRef, inputLocalVocab] : subresults) {
const IdTable& inputTable = inputTableRef;
localVocab.mergeWith(std::span{&inputLocalVocab, 1});
Expand All @@ -1525,8 +1525,6 @@ Result GroupBy::computeGroupByForHashMapOptimization(
_groupByVariables.begin(), _groupByVariables.end()};
evaluationContext._isPartOfGroupBy = true;

ad_utility::Timer lookupTimer{ad_utility::Timer::Stopped};
ad_utility::Timer aggregationTimer{ad_utility::Timer::Stopped};
for (size_t i = 0; i < inputTable.size();
i += GROUP_BY_HASH_MAP_BLOCK_SIZE) {
checkCancellation();
Expand Down Expand Up @@ -1572,10 +1570,10 @@ Result GroupBy::computeGroupByForHashMapOptimization(
}
aggregationTimer.stop();
}
runtimeInfo().addDetail("timeMapLookup", lookupTimer.msecs());
runtimeInfo().addDetail("timeAggregation", aggregationTimer.msecs());
}

runtimeInfo().addDetail("timeMapLookup", lookupTimer.msecs());
runtimeInfo().addDetail("timeAggregation", aggregationTimer.msecs());
IdTable resultTable =
createResultFromHashMap(aggregationData, aggregateAliases, &localVocab);
return {std::move(resultTable), resultSortedOn(), std::move(localVocab)};
Expand Down
43 changes: 43 additions & 0 deletions test/GroupByTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// Authors: Florian Kramer ([email protected])
// Johannes Kalmbach ([email protected])

#include <engine/SpatialJoinAlgorithms.h>
#include <gmock/gmock.h>

#include <cstdio>
Expand Down Expand Up @@ -36,6 +37,7 @@ using ::testing::Optional;

namespace {
auto I = IntId;
auto D = DoubleId;

// Return a matcher that checks, whether a given `std::optional<IdTable` has a
// value and that value is equal to `makeIdTableFromVector(table)`.
Expand Down Expand Up @@ -747,6 +749,47 @@ TEST_F(GroupByOptimizations, correctResultForHashMapOptimization) {
resultWithoutOptimization->asDebugString());
}

// _____________________________________________________________________________
TEST_F(GroupByOptimizations, hashMapOptimizationLazyAndMaterializedInputs) {
/* Setup query:
SELECT ?x (AVG(?y) as ?avg) WHERE {
# explicitly defined subresult.
} GROUP BY ?x
*/
// Setup three unsorted input blocks. The first column will be the grouped
// `?x`, and the second column the variable `?y` of which we compute the
// average.
auto runTest = [this](bool inputIsLazy) {
std::vector<IdTable> tables;
tables.push_back(makeIdTableFromVector({{3, 6}, {8, 27}, {5, 7}}, I));
tables.push_back(makeIdTableFromVector({{8, 27}, {5, 9}}, I));
tables.push_back(makeIdTableFromVector({{5, 2}, {3, 4}}, I));
// The expected averages are as follows: (3 -> 5.0), (5 -> 6.0), (8
// -> 27.0).
auto subtree = ad_utility::makeExecutionTree<ValuesForTesting>(
qec, std::move(tables),
std::vector<std::optional<Variable>>{Variable{"?x"}, Variable{"?y"}});
auto& values =
dynamic_cast<ValuesForTesting&>(*subtree->getRootOperation());
values.forceFullyMaterialized() = !inputIsLazy;

SparqlExpressionPimpl avgYPimpl = makeAvgPimpl(varY);
std::vector<Alias> aliasesAvgY{Alias{avgYPimpl, Variable{"?avg"}}};

// Calculate result with optimization
qec->getQueryTreeCache().clearAll();
RuntimeParameters().set<"group-by-hash-map-enabled">(true);
GroupBy groupBy{qec, variablesOnlyX, aliasesAvgY, std::move(subtree)};
auto result = groupBy.computeResultOnlyForTesting();
ASSERT_TRUE(result.isFullyMaterialized());
EXPECT_THAT(
result.idTable(),
matchesIdTableFromVector({{I(3), D(5)}, {I(5), D(6)}, {I(8), D(27)}}));
};
runTest(true);
runTest(false);
}

// _____________________________________________________________________________
TEST_F(GroupByOptimizations, correctResultForHashMapOptimizationForCountStar) {
/* Setup query:
Expand Down
2 changes: 2 additions & 0 deletions test/engine/ValuesForTesting.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ class ValuesForTesting : public Operation {
}
bool supportsLimit() const override { return supportsLimit_; }

bool& forceFullyMaterialized() { return forceFullyMaterialized_; }

private:
// ___________________________________________________________________________
string getCacheKeyImpl() const override {
Expand Down

0 comments on commit ecb6a2f

Please sign in to comment.