Skip to content

Commit

Permalink
Add new test and fix wrong behaviour for transitive path
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinTF committed Feb 14, 2025
1 parent ef831e3 commit 0b1c80c
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 6 deletions.
6 changes: 6 additions & 0 deletions src/engine/TransitivePathBinSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ struct BinSearchMap {

return targetIds_.subspan(startIndex, range.size());
}

// Linear lookup if the node is in the map, either as a key or as a value.
bool containsNode(const Id node) const {
return ql::ranges::binary_search(startIds_, node) ||
ad_utility::contains(targetIds_, node);
}
};

/**
Expand Down
8 changes: 8 additions & 0 deletions src/engine/TransitivePathHashMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ struct HashMapWrapper {
}
return iterator->second;
}

// Linear lookup if the node is in the map, either as a key or as a value.
bool containsNode(const Id node) const {
return map_.contains(node) ||
ql::ranges::any_of(map_ | ql::views::values, [node](const Set& set) {
return ad_utility::contains(set, node);
});
}
};

/**
Expand Down
31 changes: 25 additions & 6 deletions src/engine/TransitivePathImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TransitivePathImpl : public TransitivePathBase {
targetSide.isVariable()
? std::nullopt
: std::optional{std::get<Id>(targetSide.value_)},
yieldOnce);
yieldOnce, true);

auto result = fillTableWithHull(
std::move(hull), startSide.outputCol_, targetSide.outputCol_,
Expand Down Expand Up @@ -133,7 +133,7 @@ class TransitivePathImpl : public TransitivePathBase {
targetSide.isVariable()
? std::nullopt
: std::optional{std::get<Id>(targetSide.value_)},
yieldOnce);
yieldOnce, false);

auto result = fillTableWithHull(std::move(hull), startSide.outputCol_,
targetSide.outputCol_, yieldOnce);
Expand Down Expand Up @@ -196,10 +196,13 @@ class TransitivePathImpl : public TransitivePathBase {
* @param startNode The node to start the search from.
* @param target Optional target Id. If supplied, only paths which end in this
* Id are added to the result.
* @param checkFirstNode If true and `minDist_` is 0, the first node is
* checked if it is actually found in the graph before being added.
* @return A set of connected nodes in the graph.
*/
Set findConnectedNodes(const T& edges, Id startNode,
const std::optional<Id>& target) const {
const std::optional<Id>& target,
bool checkFirstNode) const {
std::vector<std::pair<Id, size_t>> stack;
ad_utility::HashSetWithMemoryLimit<Id> marks{
getExecutionContext()->getAllocator()};
Expand All @@ -214,7 +217,8 @@ class TransitivePathImpl : public TransitivePathBase {
if (steps <= maxDist_ && !marks.contains(node)) {
if (steps >= minDist_) {
marks.insert(node);
if (!target.has_value() || node == target.value()) {
if ((!target.has_value() || node == target.value()) &&
(!checkFirstNode || steps > 0)) {
connectedNodes.insert(node);
}
}
Expand All @@ -225,6 +229,15 @@ class TransitivePathImpl : public TransitivePathBase {
}
}
}

// If we need to check the first node separately, we try the cheaper lookup
// in connected nodes first, and then fallback to the expensive computation.
if (minDist_ == 0 && checkFirstNode &&
(!target.has_value() || startNode == target.value()) &&
(connectedNodes.contains(startNode) || edges.containsNode(startNode))) {
connectedNodes.insert(startNode);
}

return connectedNodes;
}

Expand All @@ -234,6 +247,7 @@ class TransitivePathImpl : public TransitivePathBase {
*
* @param edges Adjacency lists, mapping Ids (nodes) to their connected
* Ids.
* @param edgesVocab The `LocalVocab` holding the vocabulary of the edges.
* @param startNodes A range that yields an instantiation of
* `TableColumnWithVocab` that can be consumed to create a transitive hull.
* @param target Optional target Id. If supplied, only paths which end
Expand All @@ -242,19 +256,24 @@ class TransitivePathImpl : public TransitivePathBase {
* code. When set to true, this will prevent yielding the same LocalVocab over
* and over again to make merging faster (because merging with an empty
* LocalVocab is a no-op).
* @param checkFirstNode If the start nodes are not a subset of the node
* sources, this parameter needs to be set to true so that the first node is
* checked if it is actually found in the graph before being added.
* @return Map Maps each Id to its connected Ids in the transitive hull
*/
CPP_template(typename Node)(requires ql::ranges::range<Node>) NodeGenerator
transitiveHull(const T& edges, LocalVocab edgesVocab, Node startNodes,
std::optional<Id> target, bool yieldOnce) const {
std::optional<Id> target, bool yieldOnce,
bool checkFirstNode) const {
ad_utility::Timer timer{ad_utility::Timer::Stopped};
for (auto&& tableColumn : startNodes) {
timer.cont();
LocalVocab mergedVocab = std::move(tableColumn.vocab_);
mergedVocab.mergeWith(std::span{&edgesVocab, 1});
size_t currentRow = 0;
for (Id startNode : tableColumn.column_) {
Set connectedNodes = findConnectedNodes(edges, startNode, target);
Set connectedNodes =
findConnectedNodes(edges, startNode, target, checkFirstNode);
if (!connectedNodes.empty()) {
runtimeInfo().addDetail("Hull time", timer.msecs());
timer.stop();
Expand Down
21 changes: 21 additions & 0 deletions test/TransitivePathTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,27 @@ TEST_P(TransitivePathTest, boundForwardValuesOnZeroPath) {
assertResultMatchesIdTable(resultTable, expected);
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, unboundValuesOnZeroPath) {
auto sub = makeIdTableFromVector({
{1, 2},
{3, 4},
});

auto leftOpTable = makeIdTableFromVector({
{5},
});

TransitivePathSide left(std::nullopt, 0, Variable{"?x"}, 0);
TransitivePathSide right(std::nullopt, 1, Variable{"?y"}, 1);
auto T =
makePathBound(true, sub.clone(), {Variable{"?x"}, Variable{"?y"}},
split(leftOpTable), 0, {Variable{"?x"}}, left, right, 0, 0);

auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
assertResultMatchesIdTable(resultTable, IdTable{2, T->allocator()});
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, maxLength2FromVariable) {
auto sub = makeIdTableFromVector({
Expand Down

0 comments on commit 0b1c80c

Please sign in to comment.