From 0b1c80c08e36c4b02e1366e62071e4de87f9fae0 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:39:13 +0100 Subject: [PATCH] Add new test and fix wrong behaviour for transitive path --- src/engine/TransitivePathBinSearch.h | 6 ++++++ src/engine/TransitivePathHashMap.h | 8 +++++++ src/engine/TransitivePathImpl.h | 31 ++++++++++++++++++++++------ test/TransitivePathTest.cpp | 21 +++++++++++++++++++ 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h index 4973d9da5e..52ed1de9fb 100644 --- a/src/engine/TransitivePathBinSearch.h +++ b/src/engine/TransitivePathBinSearch.h @@ -53,6 +53,12 @@ struct BinSearchMap { return targetIds_.subspan(startIndex, range.size()); } + + // Linear lookup if the node is in the map, either as a key or as a value. + bool containsNode(const Id node) const { + return ql::ranges::binary_search(startIds_, node) || + ad_utility::contains(targetIds_, node); + } }; /** diff --git a/src/engine/TransitivePathHashMap.h b/src/engine/TransitivePathHashMap.h index 3bda2c2117..77ec88839c 100644 --- a/src/engine/TransitivePathHashMap.h +++ b/src/engine/TransitivePathHashMap.h @@ -40,6 +40,14 @@ struct HashMapWrapper { } return iterator->second; } + + // Linear lookup if the node is in the map, either as a key or as a value. + bool containsNode(const Id node) const { + return map_.contains(node) || + ql::ranges::any_of(map_ | ql::views::values, [node](const Set& set) { + return ad_utility::contains(set, node); + }); + } }; /** diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 4a09dd01b0..561240c70e 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -83,7 +83,7 @@ class TransitivePathImpl : public TransitivePathBase { targetSide.isVariable() ? std::nullopt : std::optional{std::get(targetSide.value_)}, - yieldOnce); + yieldOnce, true); auto result = fillTableWithHull( std::move(hull), startSide.outputCol_, targetSide.outputCol_, @@ -133,7 +133,7 @@ class TransitivePathImpl : public TransitivePathBase { targetSide.isVariable() ? std::nullopt : std::optional{std::get(targetSide.value_)}, - yieldOnce); + yieldOnce, false); auto result = fillTableWithHull(std::move(hull), startSide.outputCol_, targetSide.outputCol_, yieldOnce); @@ -196,10 +196,13 @@ class TransitivePathImpl : public TransitivePathBase { * @param startNode The node to start the search from. * @param target Optional target Id. If supplied, only paths which end in this * Id are added to the result. + * @param checkFirstNode If true and `minDist_` is 0, the first node is + * checked if it is actually found in the graph before being added. * @return A set of connected nodes in the graph. */ Set findConnectedNodes(const T& edges, Id startNode, - const std::optional& target) const { + const std::optional& target, + bool checkFirstNode) const { std::vector> stack; ad_utility::HashSetWithMemoryLimit marks{ getExecutionContext()->getAllocator()}; @@ -214,7 +217,8 @@ class TransitivePathImpl : public TransitivePathBase { if (steps <= maxDist_ && !marks.contains(node)) { if (steps >= minDist_) { marks.insert(node); - if (!target.has_value() || node == target.value()) { + if ((!target.has_value() || node == target.value()) && + (!checkFirstNode || steps > 0)) { connectedNodes.insert(node); } } @@ -225,6 +229,15 @@ class TransitivePathImpl : public TransitivePathBase { } } } + + // If we need to check the first node separately, we try the cheaper lookup + // in connected nodes first, and then fallback to the expensive computation. + if (minDist_ == 0 && checkFirstNode && + (!target.has_value() || startNode == target.value()) && + (connectedNodes.contains(startNode) || edges.containsNode(startNode))) { + connectedNodes.insert(startNode); + } + return connectedNodes; } @@ -234,6 +247,7 @@ class TransitivePathImpl : public TransitivePathBase { * * @param edges Adjacency lists, mapping Ids (nodes) to their connected * Ids. + * @param edgesVocab The `LocalVocab` holding the vocabulary of the edges. * @param startNodes A range that yields an instantiation of * `TableColumnWithVocab` that can be consumed to create a transitive hull. * @param target Optional target Id. If supplied, only paths which end @@ -242,11 +256,15 @@ class TransitivePathImpl : public TransitivePathBase { * code. When set to true, this will prevent yielding the same LocalVocab over * and over again to make merging faster (because merging with an empty * LocalVocab is a no-op). + * @param checkFirstNode If the start nodes are not a subset of the node + * sources, this parameter needs to be set to true so that the first node is + * checked if it is actually found in the graph before being added. * @return Map Maps each Id to its connected Ids in the transitive hull */ CPP_template(typename Node)(requires ql::ranges::range) NodeGenerator transitiveHull(const T& edges, LocalVocab edgesVocab, Node startNodes, - std::optional target, bool yieldOnce) const { + std::optional target, bool yieldOnce, + bool checkFirstNode) const { ad_utility::Timer timer{ad_utility::Timer::Stopped}; for (auto&& tableColumn : startNodes) { timer.cont(); @@ -254,7 +272,8 @@ class TransitivePathImpl : public TransitivePathBase { mergedVocab.mergeWith(std::span{&edgesVocab, 1}); size_t currentRow = 0; for (Id startNode : tableColumn.column_) { - Set connectedNodes = findConnectedNodes(edges, startNode, target); + Set connectedNodes = + findConnectedNodes(edges, startNode, target, checkFirstNode); if (!connectedNodes.empty()) { runtimeInfo().addDetail("Hull time", timer.msecs()); timer.stop(); diff --git a/test/TransitivePathTest.cpp b/test/TransitivePathTest.cpp index aff83e76d6..e984f653da 100644 --- a/test/TransitivePathTest.cpp +++ b/test/TransitivePathTest.cpp @@ -584,6 +584,27 @@ TEST_P(TransitivePathTest, boundForwardValuesOnZeroPath) { assertResultMatchesIdTable(resultTable, expected); } +// _____________________________________________________________________________ +TEST_P(TransitivePathTest, unboundValuesOnZeroPath) { + auto sub = makeIdTableFromVector({ + {1, 2}, + {3, 4}, + }); + + auto leftOpTable = makeIdTableFromVector({ + {5}, + }); + + TransitivePathSide left(std::nullopt, 0, Variable{"?x"}, 0); + TransitivePathSide right(std::nullopt, 1, Variable{"?y"}, 1); + auto T = + makePathBound(true, sub.clone(), {Variable{"?x"}, Variable{"?y"}}, + split(leftOpTable), 0, {Variable{"?x"}}, left, right, 0, 0); + + auto resultTable = T->computeResultOnlyForTesting(requestLaziness()); + assertResultMatchesIdTable(resultTable, IdTable{2, T->allocator()}); +} + // _____________________________________________________________________________ TEST_P(TransitivePathTest, maxLength2FromVariable) { auto sub = makeIdTableFromVector({