Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make empty path return empty set when the graph is empty #1806

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/engine/TransitivePathBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,15 @@ std::shared_ptr<TransitivePathBase> TransitivePathBase::bindLeftOrRightSide(
// subtrees. This has the effect that the `TransitivePathBinSearch` will
// never re-sort an index scan (which should not happen because we can just
// take the appropriate index scan in the first place).
bool useBinSearch = dynamic_cast<const TransitivePathBinSearch*>(this);
std::vector<std::shared_ptr<TransitivePathBase>> candidates;
candidates.push_back(TransitivePathBase::makeTransitivePath(
getExecutionContext(), subtree_, lhs, rhs, minDist_, maxDist_));
candidates.push_back(makeTransitivePath(getExecutionContext(), subtree_, lhs,
rhs, minDist_, maxDist_,
useBinSearch));
for (const auto& alternativeSubtree : alternativeSubtrees()) {
candidates.push_back(TransitivePathBase::makeTransitivePath(
getExecutionContext(), alternativeSubtree, lhs, rhs, minDist_,
maxDist_));
candidates.push_back(makeTransitivePath(getExecutionContext(),
alternativeSubtree, lhs, rhs,
minDist_, maxDist_, useBinSearch));
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes a bug in the unit tests, where both implementations are supposed to get tested, but for bound variables it got reset to the configured default regardless of test configuration

}

auto& p = *ql::ranges::min_element(
Expand Down
6 changes: 6 additions & 0 deletions src/engine/TransitivePathBinSearch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ struct BinSearchMap {

return targetIds_.subspan(startIndex, range.size());
}

// Linear lookup if the node is in the map, either as a key or as a value.
bool containsNode(const Id node) const {
return ql::ranges::binary_search(startIds_, node) ||
ad_utility::contains(targetIds_, node);
}
};

/**
Expand Down
8 changes: 8 additions & 0 deletions src/engine/TransitivePathHashMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ struct HashMapWrapper {
}
return iterator->second;
}

// Linear lookup if the node is in the map, either as a key or as a value.
bool containsNode(const Id node) const {
return map_.contains(node) ||
ql::ranges::any_of(map_ | ql::views::values, [node](const Set& set) {
return ad_utility::contains(set, node);
});
}
};

/**
Expand Down
42 changes: 31 additions & 11 deletions src/engine/TransitivePathImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TransitivePathImpl : public TransitivePathBase {
targetSide.isVariable()
? std::nullopt
: std::optional{std::get<Id>(targetSide.value_)},
yieldOnce);
yieldOnce, true);

auto result = fillTableWithHull(
std::move(hull), startSide.outputCol_, targetSide.outputCol_,
Expand Down Expand Up @@ -133,7 +133,7 @@ class TransitivePathImpl : public TransitivePathBase {
targetSide.isVariable()
? std::nullopt
: std::optional{std::get<Id>(targetSide.value_)},
yieldOnce);
yieldOnce, false);

auto result = fillTableWithHull(std::move(hull), startSide.outputCol_,
targetSide.outputCol_, yieldOnce);
Expand Down Expand Up @@ -196,26 +196,27 @@ class TransitivePathImpl : public TransitivePathBase {
* @param startNode The node to start the search from.
* @param target Optional target Id. If supplied, only paths which end in this
* Id are added to the result.
* @param checkFirstNode If true and `minDist_` is 0, the first node is
* checked if it is actually found in the graph before being added.
* @return A set of connected nodes in the graph.
*/
Set findConnectedNodes(const T& edges, Id startNode,
const std::optional<Id>& target) const {
const std::optional<Id>& target,
bool checkFirstNode) const {
std::vector<std::pair<Id, size_t>> stack;
ad_utility::HashSetWithMemoryLimit<Id> marks{
getExecutionContext()->getAllocator()};
Set connectedNodes{getExecutionContext()->getAllocator()};
stack.emplace_back(startNode, 0);

if (minDist_ == 0 && (!target.has_value() || startNode == target.value())) {
connectedNodes.insert(startNode);
}
bool startNodeFound = false;

while (!stack.empty()) {
checkCancellation();
auto [node, steps] = stack.back();
stack.pop_back();

if (steps <= maxDist_ && marks.count(node) == 0) {
if (steps <= maxDist_ && !marks.contains(node)) {
if (steps >= minDist_) {
marks.insert(node);
if (!target.has_value() || node == target.value()) {
Expand All @@ -226,9 +227,21 @@ class TransitivePathImpl : public TransitivePathBase {
const auto& successors = edges.successors(node);
for (auto successor : successors) {
stack.emplace_back(successor, steps + 1);
if (successor == startNode) {
startNodeFound = true;
}
}
}
}

// If we need to check the first node separately, we check `startNodeFound`
// first because its cheaper and then fallback to the expensive computation
// that gives the definitive answer.
if (minDist_ == 0 && checkFirstNode && !startNodeFound &&
!edges.containsNode(startNode)) {
connectedNodes.erase(startNode);
}

return connectedNodes;
}

Expand All @@ -238,6 +251,7 @@ class TransitivePathImpl : public TransitivePathBase {
*
* @param edges Adjacency lists, mapping Ids (nodes) to their connected
* Ids.
* @param edgesVocab The `LocalVocab` holding the vocabulary of the edges.
* @param startNodes A range that yields an instantiation of
* `TableColumnWithVocab` that can be consumed to create a transitive hull.
* @param target Optional target Id. If supplied, only paths which end
Expand All @@ -246,19 +260,24 @@ class TransitivePathImpl : public TransitivePathBase {
* code. When set to true, this will prevent yielding the same LocalVocab over
* and over again to make merging faster (because merging with an empty
* LocalVocab is a no-op).
* @param checkFirstNode If the start nodes are not a subset of the node
* sources, this parameter needs to be set to true so that the first node is
* checked if it is actually found in the graph before being added.
* @return Map Maps each Id to its connected Ids in the transitive hull
*/
CPP_template(typename Node)(requires ql::ranges::range<Node>) NodeGenerator
transitiveHull(const T& edges, LocalVocab edgesVocab, Node startNodes,
std::optional<Id> target, bool yieldOnce) const {
std::optional<Id> target, bool yieldOnce,
bool checkFirstNode) const {
ad_utility::Timer timer{ad_utility::Timer::Stopped};
for (auto&& tableColumn : startNodes) {
timer.cont();
LocalVocab mergedVocab = std::move(tableColumn.vocab_);
mergedVocab.mergeWith(std::span{&edgesVocab, 1});
size_t currentRow = 0;
for (Id startNode : tableColumn.column_) {
Set connectedNodes = findConnectedNodes(edges, startNode, target);
Set connectedNodes =
findConnectedNodes(edges, startNode, target, checkFirstNode);
if (!connectedNodes.empty()) {
runtimeInfo().addDetail("Hull time", timer.msecs());
timer.stop();
Expand Down Expand Up @@ -313,7 +332,8 @@ class TransitivePathImpl : public TransitivePathBase {
* computation.
*
* @param startSide The TransitivePathSide where the edges start
* @param startSideTable An IdTable containing the Ids for the startSide
* @param startSideResult A `Result` wrapping an `IdTable` containing the Ids
* for the startSide
* @return cppcoro::generator<TableColumnWithVocab> An generator for
* the transitive hull computation
*/
Expand All @@ -335,7 +355,7 @@ class TransitivePathImpl : public TransitivePathBase {
std::move(localVocab)};
}
}
};
}

virtual T setupEdgesMap(const IdTable& dynSub,
const TransitivePathSide& startSide,
Expand Down
72 changes: 71 additions & 1 deletion test/TransitivePathTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "engine/ValuesForTesting.h"
#include "util/GTestHelpers.h"
#include "util/IdTableHelpers.h"
#include "util/IndexTestHelpers.h"

using ad_utility::testing::getQec;
namespace {
Expand Down Expand Up @@ -535,6 +534,77 @@ TEST_P(TransitivePathTest, startNodesWithNoMatchesLeftBound) {
assertResultMatchesIdTable(resultTable, expected);
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, boundReverseConnectedValuesOnZeroPath) {
auto sub = makeIdTableFromVector({
{1, 2},
{3, 4},
});

auto leftOpTable = makeIdTableFromVector({
{2},
});

auto expected = makeIdTableFromVector({
{2, 2},
});

TransitivePathSide left(std::nullopt, 0, Variable{"?x"}, 0);
TransitivePathSide right(std::nullopt, 1, Variable{"?y"}, 1);
auto T =
makePathBound(true, sub.clone(), {Variable{"?x"}, Variable{"?y"}},
split(leftOpTable), 0, {Variable{"?x"}}, left, right, 0, 0);

auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
assertResultMatchesIdTable(resultTable, expected);
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, boundForwardValuesOnZeroPath) {
auto sub = makeIdTableFromVector({
{1, 2},
{3, 4},
});

auto leftOpTable = makeIdTableFromVector({
{3},
});

auto expected = makeIdTableFromVector({
{3, 3},
});

TransitivePathSide left(std::nullopt, 0, Variable{"?x"}, 0);
TransitivePathSide right(std::nullopt, 1, Variable{"?y"}, 1);
auto T =
makePathBound(true, sub.clone(), {Variable{"?x"}, Variable{"?y"}},
split(leftOpTable), 0, {Variable{"?x"}}, left, right, 0, 0);

auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
assertResultMatchesIdTable(resultTable, expected);
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, unboundValuesOnZeroPath) {
auto sub = makeIdTableFromVector({
{1, 2},
{3, 4},
});

auto leftOpTable = makeIdTableFromVector({
{5},
});

TransitivePathSide left(std::nullopt, 0, Variable{"?x"}, 0);
TransitivePathSide right(std::nullopt, 1, Variable{"?y"}, 1);
auto T =
makePathBound(true, sub.clone(), {Variable{"?x"}, Variable{"?y"}},
split(leftOpTable), 0, {Variable{"?x"}}, left, right, 0, 0);

auto resultTable = T->computeResultOnlyForTesting(requestLaziness());
assertResultMatchesIdTable(resultTable, IdTable{2, T->allocator()});
}

// _____________________________________________________________________________
TEST_P(TransitivePathTest, maxLength2FromVariable) {
auto sub = makeIdTableFromVector({
Expand Down
Loading