diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index 79a3783f86..f7bc2b335f 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -12,6 +12,7 @@ #include #include "engine/Sort.h" +#include "engine/Union.h" #include "global/RuntimeParameters.h" using std::string; @@ -164,7 +165,14 @@ std::shared_ptr QueryExecutionTree::createSortedTree( return qet; } + // Push down sort into Union. QueryExecutionContext* qec = qet->getRootOperation()->getExecutionContext(); + if (auto unionOperation = + std::dynamic_pointer_cast(qet->getRootOperation())) { + return std::make_shared( + qec, unionOperation->createSortedVariant(sortColumns)); + } + auto sort = std::make_shared(qec, std::move(qet), sortColumns); return std::make_shared(qec, std::move(sort)); } diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index fa5a07427b..7e484d6fbe 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -13,8 +13,9 @@ const size_t Union::NO_COLUMN = std::numeric_limits::max(); Union::Union(QueryExecutionContext* qec, const std::shared_ptr& t1, - const std::shared_ptr& t2) - : Operation(qec) { + const std::shared_ptr& t2, + std::vector targetOrder) + : Operation(qec), targetOrder_{std::move(targetOrder)} { AD_CONTRACT_CHECK(t1 && t2); _subtrees[0] = t1; _subtrees[1] = t2; @@ -45,6 +46,31 @@ Union::Union(QueryExecutionContext* qec, AD_CORRECTNESS_CHECK(ql::ranges::all_of(_columnOrigins, [](const auto& el) { return el[0] != NO_COLUMN || el[1] != NO_COLUMN; })); + + if (!targetOrder_.empty()) { + auto computeSortOrder = [this](bool left) { + vector specificSortOrder; + for (ColumnIndex index : targetOrder_) { + ColumnIndex realIndex = _columnOrigins.at(index).at(!left); + if (realIndex != NO_COLUMN) { + specificSortOrder.push_back(realIndex); + } + } + return specificSortOrder; + }; + + _subtrees[0] = QueryExecutionTree::createSortedTree(std::move(_subtrees[0]), + computeSortOrder(true)); + _subtrees[1] = QueryExecutionTree::createSortedTree( + std::move(_subtrees[1]), computeSortOrder(false)); + + // Swap children to get cheaper computation + if (_columnOrigins.at(targetOrder_.at(0)).at(1) == NO_COLUMN) { + std::swap(_subtrees[0], _subtrees[1]); + ql::ranges::for_each(_columnOrigins, + [](auto& el) { std::swap(el[0], el[1]); }); + } + } } string Union::getCacheKeyImpl() const { @@ -52,6 +78,10 @@ string Union::getCacheKeyImpl() const { os << _subtrees[0]->getCacheKey() << "\n"; os << "UNION\n"; os << _subtrees[1]->getCacheKey() << "\n"; + os << "sort order: "; + for (size_t i : targetOrder_) { + os << i << " "; + } return std::move(os).str(); } @@ -64,7 +94,7 @@ size_t Union::getResultWidth() const { return _columnOrigins.size(); } -vector Union::resultSortedOn() const { return {}; } +vector Union::resultSortedOn() const { return targetOrder_; } // _____________________________________________________________________________ VariableToColumnMap Union::computeVariableToColumnMap() const { @@ -165,6 +195,18 @@ ProtoResult Union::computeResult(bool requestLaziness) { std::shared_ptr subRes2 = _subtrees[1]->getResult(requestLaziness); + // If first sort column is not present in left child, we can fall back to the + // cheap computation because it orders the left child first. + if (!targetOrder_.empty() && + _columnOrigins.at(targetOrder_.at(0)).at(0) != NO_COLUMN) { + auto generator = computeResultKeepOrder(requestLaziness, std::move(subRes1), + std::move(subRes2)); + return requestLaziness + ? ProtoResult{std::move(generator), resultSortedOn()} + : ProtoResult{cppcoro::getSingleElement(std::move(generator)), + resultSortedOn()}; + } + if (requestLaziness) { return {computeResultLazily(std::move(subRes1), std::move(subRes2)), resultSortedOn()}; @@ -292,3 +334,168 @@ std::unique_ptr Union::cloneImpl() const { } return copy; } + +// _____________________________________________________________________________ +std::shared_ptr Union::createSortedVariant( + const vector& sortOrder) const { + return std::make_shared(_executionContext, _subtrees.at(0), + _subtrees.at(1), sortOrder); +} + +namespace { +struct Wrapper { + const IdTable& idTable_; + const LocalVocab& localVocab_; +}; +Result::IdTableVocabPair& moveOrCopy(Result::IdTableVocabPair& element) { + return element; +} +Result::IdTableVocabPair moveOrCopy(const Wrapper& element) { + return {element.idTable_.clone(), element.localVocab_.clone()}; +} +} // namespace + +// _____________________________________________________________________________ +bool Union::isSmaller(const auto& row1, const auto& row2) const { + for (auto& col : targetOrder_) { + ColumnIndex index1 = _columnOrigins.at(col).at(0); + ColumnIndex index2 = _columnOrigins.at(col).at(1); + if (index1 == NO_COLUMN) { + return true; + } + if (index2 == NO_COLUMN) { + return false; + } + if (row1[index1] != row2[index2]) { + return row1[index1] < row2[index2]; + } + } + return false; +} + +// _____________________________________________________________________________ +Result::Generator Union::processRemaining(std::vector permutation, + auto& it, auto end, + bool requestLaziness, size_t index, + IdTable& resultTable, + LocalVocab& localVocab) const { + // append the remaining elements + while (it != end) { + if (requestLaziness) { + if (index != 0) { + resultTable.insertAtEnd(it->idTable_, index, std::nullopt, permutation, + Id::makeUndefined()); + localVocab.mergeWith(std::span{&it->localVocab_, 1}); + co_yield Result::IdTableVocabPair{std::move(resultTable), + std::move(localVocab)}; + } else { + if (resultTable.size() != 0) { + co_yield Result::IdTableVocabPair{std::move(resultTable), + std::move(localVocab)}; + } + auto&& pair = moveOrCopy(*it); + pair.idTable_ = transformToCorrectColumnFormat(std::move(pair.idTable_), + permutation); + co_yield pair; + } + } else { + resultTable.insertAtEnd(it->idTable_, index, std::nullopt, permutation, + Id::makeUndefined()); + } + index = 0; + ++it; + } +} + +// _____________________________________________________________________________ +Result::Generator Union::computeResultKeepOrderImpl( + bool requestLaziness, auto range1, auto range2, + std::pair, std::shared_ptr>) + const { + IdTable resultTable{getResultWidth(), allocator()}; + if (requestLaziness) { + resultTable.reserve(chunkSize); + } + LocalVocab localVocab; + auto it1 = range1.begin(); + auto it2 = range2.begin(); + size_t index1 = 0; + size_t index2 = 0; + auto pushRow = [this, &resultTable](bool left, const auto& row) { + resultTable.emplace_back(); + for (size_t column = 0; column < resultTable.numColumns(); column++) { + ColumnIndex origin = _columnOrigins.at(column).at(!left); + resultTable.at(resultTable.size() - 1, column) = + origin == NO_COLUMN ? Id::makeUndefined() : row[origin]; + } + }; + while (it1 != range1.end() && it2 != range2.end()) { + localVocab.mergeWith(std::span{&it1->localVocab_, 1}); + localVocab.mergeWith(std::span{&it2->localVocab_, 1}); + while (index1 < it1->idTable_.size() && index2 < it2->idTable_.size()) { + if (isSmaller(it1->idTable_.at(index1), it2->idTable_.at(index2))) { + pushRow(true, it1->idTable_.at(index1)); + index1++; + } else { + pushRow(false, it2->idTable_.at(index2)); + index2++; + } + if (requestLaziness && resultTable.size() >= chunkSize) { + co_yield Result::IdTableVocabPair{std::move(resultTable), + std::move(localVocab)}; + resultTable = IdTable{getResultWidth(), allocator()}; + resultTable.reserve(chunkSize); + localVocab = LocalVocab{}; + } + } + if (index1 == it1->idTable_.size()) { + ++it1; + index1 = 0; + } + if (index2 == it2->idTable_.size()) { + ++it2; + index2 = 0; + } + } + + // append the remaining elements + for (auto& pair : + processRemaining(computePermutation(), it1, range1.end(), + requestLaziness, index1, resultTable, localVocab)) { + AD_CORRECTNESS_CHECK(requestLaziness); + co_yield pair; + } + for (auto& pair : + processRemaining(computePermutation(), it2, range2.end(), + requestLaziness, index2, resultTable, localVocab)) { + AD_CORRECTNESS_CHECK(requestLaziness); + co_yield pair; + } + if (!requestLaziness) { + co_yield Result::IdTableVocabPair{std::move(resultTable), + std::move(localVocab)}; + } +} + +// _____________________________________________________________________________ +Result::Generator Union::computeResultKeepOrder( + bool requestLaziness, std::shared_ptr result1, + std::shared_ptr result2) const { + using Range = std::variant>; + Range leftRange = result1->isFullyMaterialized() + ? Range{std::array{Wrapper{result1->idTable(), + result1->localVocab()}}} + : Range{std::move(result1->idTables())}; + Range rightRange = result2->isFullyMaterialized() + ? Range{std::array{Wrapper{result2->idTable(), + result2->localVocab()}}} + : Range{std::move(result2->idTables())}; + return std::visit( + [this, requestLaziness, &result1, &result2](auto leftRange, + auto rightRange) { + return computeResultKeepOrderImpl( + requestLaziness, std::move(leftRange), std::move(rightRange), + std::pair{std::move(result1), std::move(result2)}); + }, + std::move(leftRange), std::move(rightRange)); +} diff --git a/src/engine/Union.h b/src/engine/Union.h index 3d87664380..3915be2aa8 100644 --- a/src/engine/Union.h +++ b/src/engine/Union.h @@ -22,11 +22,13 @@ class Union : public Operation { */ std::vector> _columnOrigins; std::array, 2> _subtrees; + std::vector targetOrder_; public: Union(QueryExecutionContext* qec, const std::shared_ptr& t1, - const std::shared_ptr& t2); + const std::shared_ptr& t2, + std::vector targetOrder = {}); protected: virtual string getCacheKeyImpl() const override; @@ -61,6 +63,14 @@ class Union : public Operation { return {_subtrees[0].get(), _subtrees[1].get()}; } + // Create a sorted variant of this operation. This can be more efficient than + // stacking a `Sort` operation on top of this one because Union can simply + // push the sort down to its children. If one of the children is already + // sorted properly then it is way cheaper to sort the other child and then + // merge the two sorted results. + std::shared_ptr createSortedVariant( + const vector& sortColumns) const; + private: std::unique_ptr cloneImpl() const override; @@ -85,4 +95,25 @@ class Union : public Operation { Result::Generator computeResultLazily( std::shared_ptr result1, std::shared_ptr result2) const; + + // Compares two rows with respect to the columns that the result is sorted on. + bool isSmaller(const auto& row1, const auto& row2) const; + + // Helper function for `computeResultKeepOrderImpl` that processes any + // remaining elements once one side is exhausted. + Result::Generator processRemaining(std::vector permutation, + auto& it, auto end, bool requestLaziness, + size_t index, IdTable& resultTable, + LocalVocab& localVocab) const; + + // Actual implementation of `computeResultKeepOrder`. + Result::Generator computeResultKeepOrderImpl( + bool requestLaziness, auto range1, auto range2, + std::pair, std::shared_ptr> + lifetimeExtension) const; + + // Similar to `computeResultLazily` but it keeps the order of the results. + Result::Generator computeResultKeepOrder( + bool requestLaziness, std::shared_ptr result1, + std::shared_ptr result2) const; }; diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index c06c16d7f2..d4f46e88b0 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -736,22 +736,35 @@ class IdTable { // The input must be some kind of `IdTable`. // TODO Can/should we constraint this functions by a concept? template - void insertAtEnd(const Table& table, - std::optional beginIdx = std::nullopt, - std::optional endIdx = std::nullopt) { - AD_CORRECTNESS_CHECK(table.numColumns() == numColumns()); + void insertAtEnd( + const Table& table, std::optional beginIdx = std::nullopt, + std::optional endIdx = std::nullopt, + std::optional> permutation = std::nullopt, + typename Table::single_value_type defaultValue = {}) { + AD_CORRECTNESS_CHECK( + table.numColumns() == numColumns() || + (permutation.has_value() && numColumns() == permutation->size())); auto begin = beginIdx.value_or(0); auto end = endIdx.value_or(table.size()); AD_CORRECTNESS_CHECK(begin <= end && end <= table.size()); auto numInserted = end - begin; auto oldSize = size(); resize(numRows() + numInserted); - ql::ranges::for_each(ad_utility::integerRange(numColumns()), - [this, &table, oldSize, begin, numInserted](size_t i) { - ql::ranges::copy( - table.getColumn(i).subspan(begin, numInserted), - getColumn(i).begin() + oldSize); - }); + ql::ranges::for_each( + ad_utility::integerRange(numColumns()), + [this, &table, oldSize, begin, numInserted, &permutation, + &defaultValue](size_t i) { + size_t mappedIndex = + permutation.has_value() ? permutation.value()[i] : i; + // Map out of index column indices from the default value. + if (mappedIndex >= table.numColumns()) { + ql::ranges::fill(getColumn(i).subspan(oldSize), defaultValue); + return; + } + ql::ranges::copy( + table.getColumn(mappedIndex).subspan(begin, numInserted), + getColumn(i).begin() + oldSize); + }); } // Check whether two `IdTables` have the same content. Mostly used for unit diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 438a7fdad8..034cdd3e5a 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -369,6 +369,45 @@ TEST(IdTable, insertAtEnd) { runTestForDifferentTypes<3>(runTestForIdTable, "idTableTest.insertAtEnd"); } +// _____________________________________________________________________________ +TEST(IdTable, insertAtEndWithPermutationAndLimit) { + // A lambda that is used as the `testCase` argument to the + // `runTestForDifferentTypes` function (see above for details). + auto runTestForIdTable = [](auto make, + auto... additionalArgs) { + Table init{4, std::move(additionalArgs.at(0))...}; + init.push_back({make(7), make(2), make(4), make(1)}); + init.push_back({make(0), make(22), make(1), make(4)}); + + Table t1{3, std::move(additionalArgs.at(1))...}; + t1.push_back({make(1), make(0), make(6)}); + t1.push_back({make(3), make(1), make(8)}); + t1.push_back({make(0), make(6), make(8)}); + t1.push_back({make(9), make(2), make(6)}); + + Table t2 = clone(init, std::move(additionalArgs.at(2))...); + std::vector permutation{2, 1, 0, 3}; + // Test inserting at the end + t2.insertAtEnd(t1, 1, 3, permutation, make(1337)); + for (size_t i = 0; i < init.size(); i++) { + ASSERT_EQ(init[i], t2[i]) << i; + } + ASSERT_EQ(t2.size(), init.size() + 2); + + EXPECT_EQ(t2.at(2, 0), make(8)); + EXPECT_EQ(t2.at(2, 1), make(1)); + EXPECT_EQ(t2.at(2, 2), make(3)); + EXPECT_EQ(t2.at(2, 3), make(1337)); + + EXPECT_EQ(t2.at(3, 0), make(8)); + EXPECT_EQ(t2.at(3, 1), make(6)); + EXPECT_EQ(t2.at(3, 2), make(0)); + EXPECT_EQ(t2.at(3, 3), make(1337)); + }; + runTestForDifferentTypes<3>(runTestForIdTable, + "IdTable.insertAtEndWithPermutationAndLimit"); +} + TEST(IdTable, reserve_and_resize) { // A lambda that is used as the `testCase` argument to the // `runTestForDifferentTypes` function (see above for details). diff --git a/test/UnionTest.cpp b/test/UnionTest.cpp index 5a2036a9a1..8f5be48a3d 100644 --- a/test/UnionTest.cpp +++ b/test/UnionTest.cpp @@ -196,3 +196,353 @@ TEST(Union, clone) { EXPECT_THAT(unionOperation, IsDeepCopy(*clone)); EXPECT_EQ(clone->getDescriptor(), unionOperation.getDescriptor()); } + +// _____________________________________________________________________________ +TEST(Union, cheapMergeIfOrderNotImportant) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 2}}), Vars{Var{"?a"}, Var{"?b"}}, false, + std::vector{0, 1}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{0, 0}, {2, 4}}), Vars{Var{"?a"}, Var{"?b"}}, + false, std::vector{0, 1}); + Union unionOperation{qec, std::move(leftT), std::move(rightT), {}}; + EXPECT_TRUE(unionOperation.resultSortedOn().empty()); + { + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + EXPECT_FALSE(result->isFullyMaterialized()); + auto& idTables = result->idTables(); + auto expected1 = makeIdTableFromVector({{1, 2}}); + auto expected2 = makeIdTableFromVector({{0, 0}, {2, 4}}); + + auto iterator = idTables.begin(); + ASSERT_NE(iterator, idTables.end()); + ASSERT_EQ(iterator->idTable_, expected1); + + ++iterator; + ASSERT_NE(iterator, idTables.end()); + ASSERT_EQ(iterator->idTable_, expected2); + + ASSERT_EQ(++iterator, idTables.end()); + } +} + +// _____________________________________________________________________________ +TEST(Union, sortedMerge) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + auto U = Id::makeUndefined(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 2, 4}}), + Vars{Var{"?a"}, Var{"?b"}, Var{"?c"}}, false, + std::vector{0, 1, 2}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{4, 1}, {8, 2}}), Vars{Var{"?c"}, Var{"?a"}}, + false, std::vector{1, 0}); + Union unionOperation{qec, std::move(leftT), std::move(rightT), {0, 1, 2}}; + EXPECT_EQ(unionOperation.resultSortedOn(), + (std::vector{0, 1, 2})); + { + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{1, U, 4}, {1, 2, 4}, {2, U, 8}}); + EXPECT_EQ(result->idTable(), expected); + } + { + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + auto expected = makeIdTableFromVector({{1, U, 4}, {1, 2, 4}, {2, U, 8}}); + auto& idTables = result->idTables(); + auto it = idTables.begin(); + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_, expected); + + ASSERT_EQ(++it, idTables.end()); + } +} + +// _____________________________________________________________________________ +TEST(Union, sortedMergeWithOneSideNonLazy) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1}}), Vars{Var{"?a"}}, false, + std::vector{0}, LocalVocab{}, std::nullopt, true); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{0}, {2}}), Vars{Var{"?a"}}, false, + std::vector{0}); + Union unionOperation{qec, std::move(leftT), std::move(rightT), {0}}; + auto expected = makeIdTableFromVector({{0}, {1}, {2}}); + { + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::FULLY_MATERIALIZED); + EXPECT_EQ(result->idTable(), expected); + } + { + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + auto& idTables = result->idTables(); + auto it = idTables.begin(); + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_, expected); + + ASSERT_EQ(++it, idTables.end()); + } +} + +// _____________________________________________________________________________ +TEST(Union, sortedMergeWithLocalVocab) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + + LocalVocab vocab1; + vocab1.getIndexAndAddIfNotContained( + LocalVocabEntry::fromStringRepresentation("\"Test1\"")); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1}, {2}, {4}}), Vars{Var{"?a"}}, false, + std::vector{0}, vocab1.clone()); + + LocalVocab vocab2; + vocab2.getIndexAndAddIfNotContained( + LocalVocabEntry::fromStringRepresentation("\"Test2\"")); + std::vector tables; + tables.push_back(makeIdTableFromVector({{0}})); + tables.push_back(makeIdTableFromVector({{3}})); + tables.push_back(makeIdTableFromVector({{5}})); + + auto rightT = ad_utility::makeExecutionTree( + qec, std::move(tables), Vars{Var{"?a"}}, false, + std::vector{0}, vocab2.clone()); + { + qec->getQueryTreeCache().clearAll(); + Union unionOperation{qec, leftT, rightT, {0}}; + auto result = + unionOperation.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{0}, {1}, {2}, {3}, {4}, {5}}); + EXPECT_EQ(result->idTable(), expected); + EXPECT_THAT(result->localVocab().getAllWordsForTesting(), + ::testing::IsSupersetOf(vocab1.getAllWordsForTesting())); + EXPECT_THAT(result->localVocab().getAllWordsForTesting(), + ::testing::IsSupersetOf(vocab2.getAllWordsForTesting())); + } + { + qec->getQueryTreeCache().clearAll(); + Union unionOperation{qec, std::move(leftT), std::move(rightT), {0}}; + auto result = + unionOperation.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + auto& idTables = result->idTables(); + + auto it = idTables.begin(); + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_, makeIdTableFromVector({{0}, {1}, {2}, {3}, {4}})); + EXPECT_THAT(it->localVocab_.getAllWordsForTesting(), + ::testing::IsSupersetOf(vocab1.getAllWordsForTesting())); + EXPECT_THAT(it->localVocab_.getAllWordsForTesting(), + ::testing::IsSupersetOf(vocab2.getAllWordsForTesting())); + + ++it; + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_, makeIdTableFromVector({{5}})); + EXPECT_EQ(it->localVocab_.getAllWordsForTesting(), + vocab2.getAllWordsForTesting()); + + ASSERT_EQ(++it, idTables.end()); + } +} + +// _____________________________________________________________________________ +TEST(Union, cacheKeyDiffersForDifferentOrdering) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + auto U = Id::makeUndefined(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 4}}), Vars{Var{"?a"}, Var{"?b"}}, false, + std::vector{0, 1}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 8}}), Vars{Var{"?a"}, Var{"?c"}}, false, + std::vector{0, 1}); + Union unionOperation1{qec, leftT, rightT, {0, 1, 2}}; + Union unionOperation2{qec, std::move(leftT), std::move(rightT), {0, 2, 1}}; + + EXPECT_NE(unionOperation1.getCacheKey(), unionOperation2.getCacheKey()); + EXPECT_EQ(unionOperation1.getChildren().at(0)->getCacheKey(), + unionOperation2.getChildren().at(0)->getCacheKey()); + EXPECT_EQ(unionOperation1.getChildren().at(1)->getCacheKey(), + unionOperation2.getChildren().at(1)->getCacheKey()); + + qec->getQueryTreeCache().clearAll(); + { + auto result = + unionOperation1.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{1, U, 8}, {1, 4, U}}); + EXPECT_EQ(result->idTable(), expected); + } + { + auto result = + unionOperation2.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{1, 4, U}, {1, U, 8}}); + EXPECT_EQ(result->idTable(), expected); + } +} + +// _____________________________________________________________________________ +// We use a trick to merge two children where the first sort column is not +// present in both children. This test checks that the trick works correctly. +TEST(Union, testEfficientMerge) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + auto U = Id::makeUndefined(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1}}), Vars{Var{"?a"}}, false, + std::vector{0}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{2}}), Vars{Var{"?b"}}, false, + std::vector{0}); + + { + qec->getQueryTreeCache().clearAll(); + Union unionOperation{qec, leftT, rightT, {0, 1}}; + // Check if children were swapped + EXPECT_EQ(unionOperation.getChildren().at(0)->getCacheKey(), + rightT->getCacheKey()); + EXPECT_EQ(unionOperation.getChildren().at(1)->getCacheKey(), + leftT->getCacheKey()); + + auto result = + unionOperation.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{U, 2}, {1, U}}); + EXPECT_EQ(result->idTable(), expected); + } + { + qec->getQueryTreeCache().clearAll(); + Union unionOperation{qec, leftT, rightT, {1, 0}}; + // Ensure children were not swapped + EXPECT_EQ(unionOperation.getChildren().at(0)->getCacheKey(), + leftT->getCacheKey()); + EXPECT_EQ(unionOperation.getChildren().at(1)->getCacheKey(), + rightT->getCacheKey()); + + auto result = + unionOperation.getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = makeIdTableFromVector({{1, U}, {U, 2}}); + EXPECT_EQ(result->idTable(), expected); + } +} + +// _____________________________________________________________________________ +TEST(Union, createSortedVariantWorksProperly) { + using Var = Variable; + auto* qec = ad_utility::testing::getQec(); + auto U = Id::makeUndefined(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 2, 4}}), + Vars{Var{"?a"}, Var{"?b"}, Var{"?c"}}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1, 4}, {2, 8}}), Vars{Var{"?a"}, Var{"?d"}}); + Union unionOperation{qec, std::move(leftT), std::move(rightT), {}}; + EXPECT_TRUE(unionOperation.resultSortedOn().empty()); + + { + qec->getQueryTreeCache().clearAll(); + auto variant = unionOperation.createSortedVariant({0, 1, 2, 3}); + EXPECT_EQ(variant->getResultSortedOn(), + (std::vector{0, 1, 2, 3})); + EXPECT_EQ( + variant->getChildren().at(0)->getRootOperation()->getResultSortedOn(), + (std::vector{0, 1, 2})); + EXPECT_EQ( + variant->getChildren().at(1)->getRootOperation()->getResultSortedOn(), + (std::vector{0, 1})); + auto result = variant->getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = + makeIdTableFromVector({{1, U, U, 4}, {1, 2, 4, U}, {2, U, U, 8}}); + EXPECT_EQ(result->idTable(), expected); + } + { + qec->getQueryTreeCache().clearAll(); + auto variant = unionOperation.createSortedVariant({0, 3, 1, 2}); + EXPECT_EQ(variant->getResultSortedOn(), + (std::vector{0, 3, 1, 2})); + EXPECT_EQ( + variant->getChildren().at(0)->getRootOperation()->getResultSortedOn(), + (std::vector{0, 1, 2})); + EXPECT_EQ( + variant->getChildren().at(1)->getRootOperation()->getResultSortedOn(), + (std::vector{0, 1})); + auto result = variant->getResult(true, ComputationMode::FULLY_MATERIALIZED); + auto expected = + makeIdTableFromVector({{1, 2, 4, U}, {1, U, U, 4}, {2, U, U, 8}}); + EXPECT_EQ(result->idTable(), expected); + } +} + +// _____________________________________________________________________________ +TEST(Union, checkChunkSizeSplitsProperly) { + using Var = Variable; + using ::testing::Each; + auto* qec = ad_utility::testing::getQec(); + + IdTable reference{1, qec->getAllocator()}; + reference.resize(Union::chunkSize + (Union::chunkSize / 2) + 1); + ql::ranges::fill(reference.getColumn(0), Id::makeFromInt(42)); + // Make sure we compute the expensive way + reference.getColumn(0).back() = Id::makeFromInt(1337); + + auto leftT = ad_utility::makeExecutionTree( + qec, reference.clone(), Vars{Var{"?a"}}, false, + std::vector{0}); + + auto rightT = ad_utility::makeExecutionTree( + qec, std::move(reference), Vars{Var{"?a"}}, false, + std::vector{0}); + + Union unionOperation{qec, std::move(leftT), std::move(rightT), {0}}; + + qec->getQueryTreeCache().clearAll(); + auto result = + unionOperation.getResult(true, ComputationMode::LAZY_IF_SUPPORTED); + auto& idTables = result->idTables(); + + auto it = idTables.begin(); + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_.size(), Union::chunkSize); + EXPECT_THAT(it->idTable_.getColumn(0), Each(Id::makeFromInt(42))); + + ++it; + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_.size(), Union::chunkSize); + EXPECT_THAT(it->idTable_.getColumn(0), Each(Id::makeFromInt(42))); + + ++it; + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_.size(), Union::chunkSize); + EXPECT_THAT(it->idTable_.getColumn(0), Each(Id::makeFromInt(42))); + + ++it; + ASSERT_NE(it, idTables.end()); + EXPECT_EQ(it->idTable_.size(), 2); + EXPECT_THAT(it->idTable_.getColumn(0), Each(Id::makeFromInt(1337))); + + ++it; + EXPECT_EQ(it, idTables.end()); +} diff --git a/test/engine/QueryExecutionTreeTest.cpp b/test/engine/QueryExecutionTreeTest.cpp index c67e17202f..0a59b20a57 100644 --- a/test/engine/QueryExecutionTreeTest.cpp +++ b/test/engine/QueryExecutionTreeTest.cpp @@ -8,6 +8,7 @@ #include "../util/IndexTestHelpers.h" #include "./ValuesForTesting.h" #include "engine/QueryExecutionTree.h" +#include "engine/Union.h" using namespace ad_utility::testing; @@ -24,3 +25,26 @@ TEST(QueryExecutionTree, getVariableColumn) { EXPECT_EQ(qet->getVariableColumnOrNullopt(y), std::nullopt); EXPECT_ANY_THROW(qet->getVariableColumn(y)); } + +// _____________________________________________________________________________ +TEST(QueryExecutionTree, sortedUnionSpecialCase) { + using Var = Variable; + using Vars = std::vector>; + auto* qec = ad_utility::testing::getQec(); + + auto leftT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{1}}), Vars{Var{"?a"}}); + + auto rightT = ad_utility::makeExecutionTree( + qec, makeIdTableFromVector({{0}}), Vars{Var{"?a"}}); + + auto sortedTree = QueryExecutionTree::createSortedTree( + ad_utility::makeExecutionTree(qec, leftT, rightT), {0}); + + // Ensure no `Sort` is added on top + EXPECT_TRUE(std::dynamic_pointer_cast(sortedTree->getRootOperation())); + + qec->getQueryTreeCache().clearAll(); + auto result = sortedTree->getResult(false); + EXPECT_EQ(result->idTable(), makeIdTableFromVector({{0}, {1}})); +}