From eef276572d27a06b3d8701e5f8359980e2dbde81 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 12 Feb 2025 16:28:52 +0100 Subject: [PATCH 1/6] Optimize headers even more (#1785) Follow-up to #1781, eliminates some dead code and optimizes includes again by moving the very expensive include of `RuntimeParameters.h` into the cpp files. --- src/engine/CMakeLists.txt | 3 +- src/engine/OptionalJoin.h | 10 +-- src/engine/QueryExecutionContext.cpp | 11 ++++ src/engine/QueryExecutionContext.h | 9 ++- src/engine/QueryExecutionTree.cpp | 2 +- src/global/Id.h | 95 +--------------------------- src/index/VocabularyMerger.h | 4 +- src/util/AllocatorWithLimit.h | 1 - test/CMakeLists.txt | 2 - test/GroupByTest.cpp | 12 ++-- test/MilestoneIdTest.cpp | 90 -------------------------- 11 files changed, 29 insertions(+), 210 deletions(-) create mode 100644 src/engine/QueryExecutionContext.cpp delete mode 100644 test/MilestoneIdTest.cpp diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index e81c834303..98faba8743 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -14,5 +14,6 @@ add_library(engine CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp - Describe.cpp GraphStoreProtocol.cpp) + Describe.cpp GraphStoreProtocol.cpp + QueryExecutionContext.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index 91d1a502d9..7685bd8708 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -58,14 +58,8 @@ class OptionalJoin : public Operation { return {_left.get(), _right.get()}; } - /** - * @brief Joins two result tables on any number of columns, inserting the - * special value ID_NO_VALUE for any entries marked as optional. - * @param a - * @param b - * @param joinColumns - * @param result - */ + // Joins two result tables on any number of columns, inserting the special + // value `Id::makeUndefined()` for any entries marked as optional. void optionalJoin( const IdTable& left, const IdTable& right, const std::vector>& joinColumns, diff --git a/src/engine/QueryExecutionContext.cpp b/src/engine/QueryExecutionContext.cpp new file mode 100644 index 0000000000..b458206613 --- /dev/null +++ b/src/engine/QueryExecutionContext.cpp @@ -0,0 +1,11 @@ +// Copyright 2025, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include "engine/QueryExecutionContext.h" + +#include "global/RuntimeParameters.h" + +bool QueryExecutionContext::areWebSocketUpdatesEnabled() { + return RuntimeParameters().get<"websocket-updates-enabled">(); +} diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 1e891a398f..501f2ba538 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -5,8 +5,6 @@ #pragma once -#include - #include #include @@ -19,7 +17,6 @@ #include "index/Index.h" #include "util/Cache.h" #include "util/ConcurrentCache.h" -#include "util/Synchronized.h" // The value of the `QueryResultCache` below. It consists of a `Result` together // with its `RuntimeInfo`. @@ -151,6 +148,9 @@ class QueryExecutionContext { return areWebsocketUpdatesEnabled_; } + private: + static bool areWebSocketUpdatesEnabled(); + private: const Index& _index; @@ -168,6 +168,5 @@ class QueryExecutionContext { std::function updateCallback_; // Cache the state of that runtime parameter to reduce the contention of the // mutex. - bool areWebsocketUpdatesEnabled_ = - RuntimeParameters().get<"websocket-updates-enabled">(); + bool areWebsocketUpdatesEnabled_ = areWebSocketUpdatesEnabled(); }; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index cef01b1129..79a3783f86 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -12,7 +12,7 @@ #include #include "engine/Sort.h" -#include "parser/RdfEscaping.h" +#include "global/RuntimeParameters.h" using std::string; diff --git a/src/global/Id.h b/src/global/Id.h index a62cf36142..af93705ba8 100644 --- a/src/global/Id.h +++ b/src/global/Id.h @@ -3,16 +3,12 @@ // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) #pragma once -#include - #include -#include #include "global/ValueId.h" -#include "util/Exception.h" using Id = ValueId; -typedef uint16_t Score; +using Score = uint16_t; // TODO Make the following ID and index types strong. using ColumnIndex = uint64_t; @@ -23,92 +19,3 @@ using WordIndex = uint64_t; using WordOrEntityIndex = uint64_t; using TextBlockIndex = uint64_t; using CompressionCode = uint64_t; - -// Integers, that are probably not integers but strong IDs or indices, but their -// true nature is still to be discovered. -using UnknownIndex = uint64_t; - -// A value to use when the result should be empty (e.g. due to an optional join) -// The highest two values are used as sentinels. -static const Id ID_NO_VALUE = Id::makeUndefined(); - -namespace ad_utility { - -// An exception that is thrown when an integer overflow occurs in the -// `MilestoneIdManager` -class MilestoneIdOverflowException : public std::exception { - private: - std::string _message; - - public: - explicit MilestoneIdOverflowException(std::string message) - : _message{std::move(message)} {} - [[nodiscard]] const char* what() const noexcept override { - return _message.c_str(); - } -}; - -// Manages two kinds of IDs: plain IDs (unsigned 64-bit integers, just called -// "IDs" in the following), and milestone IDs (unsigned 64-bit integers that are -// multiples of `distanceBetweenMilestones`. This class has the functionality to -// find the next milestone of plain ID, to check whether an ID is amilestone ID -// and to convert milestone IDs from and to a local ID space. -template -class MilestoneIdManager { - private: - // The next free ID; - uint64_t _nextId{0}; - // The last ID that was assigned. Used for overflow detection. - uint64_t _previousId{0}; - - public: - MilestoneIdManager() = default; - - // The maximum number of milestone Ids. - constexpr static uint64_t numMilestoneIds = - std::numeric_limits::max() / distanceBetweenMilestones; - - // Get the smallest milestone ID that is larger than all (milestone and - // non-milestone) previously obtained IDs. - uint64_t getNextMilestoneId() { - if (!isMilestoneId(_nextId)) { - _nextId = (milestoneIdFromLocal(milestoneIdToLocal(_nextId) + 1)); - } - return getNextId(); - } - - // Get the smallest ID that is larger than all previously obtained IDs. - uint64_t getNextId() { - if (_nextId < _previousId) { - throw MilestoneIdOverflowException{absl::StrCat( - "Overflow while assigning Ids from a MilestoneIdManager. The " - "previous " - "milestone Id was ", - _previousId, " the next id would be ", _nextId, - ". The maximum number of milestones is ", numMilestoneIds, ".")}; - } - _previousId = _nextId; - _nextId++; - return _previousId; - } - - // Is this ID a milestone id, equivalently: Is this ID a multiple of - // `distanceBetweenMilestones`? - constexpr static bool isMilestoneId(uint64_t id) { - return id % distanceBetweenMilestones == 0; - } - - // Convert a milestone ID to its "local" ID by dividing it by - // `distanceBetweenMilestones` (the i-th milestone ID will become `i`). - constexpr static uint64_t milestoneIdToLocal(uint64_t id) { - return id / distanceBetweenMilestones; - } - - // Convert "local" ID to milestone ID by multiplying it with - // `distanceBetweenMilestones`. - constexpr static uint64_t milestoneIdFromLocal(uint64_t id) { - return id * distanceBetweenMilestones; - } -}; - -} // namespace ad_utility diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index ae72da46c6..0048baea51 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -68,8 +68,8 @@ struct VocabularyMetaData { bool contains(Id id) const { return begin_ <= id && id < end_; } private: - Id begin_ = ID_NO_VALUE; - Id end_ = ID_NO_VALUE; + Id begin_ = Id::makeUndefined(); + Id end_ = Id::makeUndefined(); std::string prefix_; bool beginWasSeen_ = false; }; diff --git a/src/util/AllocatorWithLimit.h b/src/util/AllocatorWithLimit.h index 1a52d0532f..400e30d409 100644 --- a/src/util/AllocatorWithLimit.h +++ b/src/util/AllocatorWithLimit.h @@ -7,7 +7,6 @@ #include -#include #include #include diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 87b80f9fe6..904bd58706 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -254,8 +254,6 @@ addLinkAndDiscoverTest(ContentEncodingHelperTest http) addLinkAndDiscoverTest(PrefixCompressorTest) -addLinkAndDiscoverTest(MilestoneIdTest) - addLinkAndDiscoverTest(VocabularyTest index) addLinkAndDiscoverTestNoLibs(IteratorTest) diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index a23fef2475..8f89f9ff11 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -263,9 +263,9 @@ TEST_F(GroupByTest, doGroupBy) { ASSERT_EQ(123u, outTable._data[1][9]); ASSERT_EQ(0u, outTable._data[2][9]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[0][10]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[1][10]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[2][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[0][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[1][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[2][10]); std::memcpy(&buffer, &outTable._data[0][11], sizeof(float)); ASSERT_FLOAT_EQ(-3, buffer); @@ -283,9 +283,9 @@ TEST_F(GroupByTest, doGroupBy) { ASSERT_EQ(41223u, outTable._data[1][13]); ASSERT_EQ(41223u, outTable._data[2][13]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[0][14]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[1][14]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[2][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[0][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[1][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[2][14]); std::memcpy(&buffer, &outTable._data[0][15], sizeof(float)); ASSERT_FLOAT_EQ(2, buffer); diff --git a/test/MilestoneIdTest.cpp b/test/MilestoneIdTest.cpp deleted file mode 100644 index a13e51bf59..0000000000 --- a/test/MilestoneIdTest.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2022, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach - -#include - -#include "global/Id.h" -#include "util/Log.h" - -template -void testMilestoneIds() { - ad_utility::MilestoneIdManager m; - for (size_t i = 0; i < distance; ++i) { - auto id = m.getNextMilestoneId(); - ASSERT_EQ(i * distance, id); - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(i, m.milestoneIdToLocal(id)); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } -} - -TEST(MilestoneId, OnlyMilestoneIds) { - testMilestoneIds<256>(); - testMilestoneIds<257>(); -#ifdef QLEVER_RUN_EXPENSIVE_TESTS - testMilestoneIds<1024 * 1024>(); - testMilestoneIds<1024 * 1024 + 53>(); -#endif -} - -template -void testConsecutiveIds() { - ad_utility::MilestoneIdManager m; - for (size_t i = 0; i < 3 * distance; ++i) { - auto id = m.getNextId(); - ASSERT_EQ(i, id); - if (id == 0 || id == distance || id == 2 * distance) { - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } else { - ASSERT_FALSE(m.isMilestoneId(id)); - ASSERT_NE(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } - } -} - -TEST(MilestoneId, ConsecutiveIds) { - testConsecutiveIds<256>(); - testConsecutiveIds<257>(); - -#ifdef QLEVER_RUN_EXPENSIVE_TESTS - testConsecutiveIds<1024 * 1024>(); - testConsecutiveIds<1024 * 1024 + 53>(); -#endif -} - -template -void testMixedIds() { - ad_utility::MilestoneIdManager m; - m.getNextId(); - for (size_t i = 0; i < 680; ++i) { - for (size_t j = 0; j < 123; ++j) { - auto id = m.getNextId(); - ASSERT_EQ(i * distance + j + 1, id); - } - auto id = m.getNextMilestoneId(); - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(id, (i + 1) * distance); - ASSERT_EQ(m.milestoneIdToLocal(id), i + 1); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } -} - -TEST(MilestoneId, MixedIds) { - testMixedIds<256>(); - testMixedIds<257>(); - testMixedIds<1024 * 1024>(); - testMixedIds<1024 * 1024 + 53>(); -} - -TEST(MilestoneId, Overflow) { - constexpr static uint64_t distance = 1ull << 63; - auto m = ad_utility::MilestoneIdManager{}; - auto id = m.getNextMilestoneId(); - ASSERT_EQ(id, 0u); - id = m.getNextMilestoneId(); - ASSERT_EQ(id, distance); - ASSERT_THROW(m.getNextMilestoneId(), - ad_utility::MilestoneIdOverflowException); -} From 6a571f91b67e6b3ce2e4604fb59480582d6dced5 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:09:48 +0100 Subject: [PATCH 2/6] Implement limit support for `Bind` (#1795) Just a small change that allows `Bind` to avoid computing SPARQL expressions when a `LIMIT` or `OFFSET` is present, that would just trim them away anyway. --- src/engine/Bind.cpp | 4 ++++ src/engine/Bind.h | 1 + 2 files changed, 5 insertions(+) diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 95de8a4dfe..a0e7e78c9a 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -27,6 +27,9 @@ size_t Bind::getCostEstimate() { return _subtree->getCostEstimate() + _subtree->getSizeEstimate(); } +// We delegate the limit to the child operation, so we always support it. +bool Bind::supportsLimit() const { return true; } + float Bind::getMultiplicity(size_t col) { // this is the newly added column if (col == getResultWidth() - 1) { @@ -93,6 +96,7 @@ IdTable Bind::cloneSubView(const IdTable& idTable, // _____________________________________________________________________________ ProtoResult Bind::computeResult(bool requestLaziness) { + _subtree->setLimit(getLimit()); LOG(DEBUG) << "Get input to BIND operation..." << std::endl; std::shared_ptr subRes = _subtree->getResult(requestLaziness); LOG(DEBUG) << "Got input to Bind operation." << std::endl; diff --git a/src/engine/Bind.h b/src/engine/Bind.h index 34c515fb54..0abd5b2cec 100644 --- a/src/engine/Bind.h +++ b/src/engine/Bind.h @@ -30,6 +30,7 @@ class Bind : public Operation { [[nodiscard]] size_t getResultWidth() const override; std::vector getChildren() override; size_t getCostEstimate() override; + bool supportsLimit() const override; private: uint64_t getSizeEstimateBeforeLimit() override; From 72973057897738fb8e5943f534791e7cbe4ebf5c Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:18:32 +0100 Subject: [PATCH 3/6] Fix TurtleParser for `a` predicate (#1789) Fixes #1763 Before this fix the Turtle parser was broken for prefixes that starts with `"a"`. --- src/parser/RdfParser.cpp | 7 ++----- src/parser/RdfParser.h | 1 + test/RdfParserTest.cpp | 37 ++++++++++++++++++++++++++++++++++--- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/parser/RdfParser.cpp b/src/parser/RdfParser.cpp index 9cf4bffe93..c100a4b1dc 100644 --- a/src/parser/RdfParser.cpp +++ b/src/parser/RdfParser.cpp @@ -151,16 +151,13 @@ bool TurtleParser::objectList() { // ______________________________________________________ template bool TurtleParser::verb() { - return predicateSpecialA() || predicate(); + return predicate() || predicateSpecialA(); } // ___________________________________________________________________ template bool TurtleParser::predicateSpecialA() { - tok_.skipWhitespaceAndComments(); - if (auto [success, word] = tok_.template getNextToken(); - success) { - (void)word; + if (parseTerminal()) { activePredicate_ = TripleComponent::Iri::fromIriref( ""); return true; diff --git a/src/parser/RdfParser.h b/src/parser/RdfParser.h index d65c05934d..063cfc35e3 100644 --- a/src/parser/RdfParser.h +++ b/src/parser/RdfParser.h @@ -429,6 +429,7 @@ class TurtleParser : public RdfParserBase { FRIEND_TEST(RdfParserTest, booleanLiteralLongForm); FRIEND_TEST(RdfParserTest, collection); FRIEND_TEST(RdfParserTest, iriref); + FRIEND_TEST(RdfParserTest, specialPredicateA); }; template diff --git a/test/RdfParserTest.cpp b/test/RdfParserTest.cpp index 6eba30f45d..780952acfe 100644 --- a/test/RdfParserTest.cpp +++ b/test/RdfParserTest.cpp @@ -63,12 +63,12 @@ auto checkParseResult = [&]() { ASSERT_TRUE(optionalParser.has_value()); auto& parser = optionalParser.value(); - ASSERT_EQ(parser.getPosition(), expectedPosition.value_or(input.size())); + EXPECT_EQ(parser.getPosition(), expectedPosition.value_or(input.size())); if (expectedLastParseResult.has_value()) { - ASSERT_EQ(expectedLastParseResult, parser.getLastParseResult()); + EXPECT_EQ(expectedLastParseResult, parser.getLastParseResult()); } if (expectedTriples.has_value()) { - ASSERT_EQ(expectedTriples, parser.getTriples()); + EXPECT_EQ(expectedTriples, parser.getTriples()); } }(); return std::move(optionalParser.value()); @@ -1145,3 +1145,34 @@ TEST(RdfParserTest, multifileParser) { }; forAllMultifileParsers(impl); } + +// _____________________________________________________________________________ + +TEST(RdfParserTest, specialPredicateA) { + auto runCommonTests = [](const auto& checker) { + checker( + "@prefix a: . a:subject a:predicate " + "\"object\" .", + {}, {}, + std::vector{ + {iri(""), + iri(""), + lit("\"object\"")}}); + checker( + "@prefix a: . a:subject a \"object\" .", + {}, {}, + std::vector{ + {iri(""), + iri(""), + lit("\"object\"")}}); + }; + + auto parseTwoStatements = [](Parser& parser) { + return parser.statement() && parser.statement(); + }; + + auto checkRe2 = checkParseResult; + auto checkCTRE = checkParseResult; + runCommonTests(checkRe2); + runCommonTests(checkCTRE); +} From 2697035347eacdc9b32a57c578f36fad5db2e04e Mon Sep 17 00:00:00 2001 From: Julian <14220769+Qup42@users.noreply.github.com> Date: Wed, 12 Feb 2025 21:31:24 +0100 Subject: [PATCH 4/6] Various smaller improvements for `SPARQL Update` (#1784) - Store the `DeltaTriplesCount` as signed integer. The difference would otherwise underflow. - Add the number of insertions and deletions by an UPDATE operation to the returned metadata. Previously only information on the change in the delta triples was given. This only allowed limited conclusion on the effect of the update as insertions and deletions can cancel out in the number of delta triples over a operation. - Return a JSON from the `clear-delta-triples` command with the number of delta triples after the clear. This is more consistent with the other operations. - The triples to insert/delete are sorted and removed of duplicates earlier in the triple calculation (`ExecuteUpdate` vs. `DeltaTriples`). Triples that are inserted in an operation will not be deleted before that in the same operation. --- src/engine/ExecuteUpdate.cpp | 22 ++++++++++++ src/engine/ExecuteUpdate.h | 13 +++++++ src/engine/Server.cpp | 15 +++++--- src/index/DeltaTriples.cpp | 8 +++-- src/index/DeltaTriples.h | 12 ++++--- test/DeltaTriplesCountTest.cpp | 5 ++- test/DeltaTriplesTest.cpp | 30 ++++++++++------ test/DeltaTriplesTestHelpers.h | 2 +- test/ExecuteUpdateTest.cpp | 66 ++++++++++++++++++++++++++++------ test/ServerTest.cpp | 3 +- 10 files changed, 138 insertions(+), 38 deletions(-) diff --git a/src/engine/ExecuteUpdate.cpp b/src/engine/ExecuteUpdate.cpp index 3637410d22..7e43ae1ef3 100644 --- a/src/engine/ExecuteUpdate.cpp +++ b/src/engine/ExecuteUpdate.cpp @@ -176,9 +176,31 @@ ExecuteUpdate::computeGraphUpdateQuads( cancellationHandle->throwIfCancelled(); } } + sortAndRemoveDuplicates(toInsert); + sortAndRemoveDuplicates(toDelete); + metadata.inUpdate_ = DeltaTriplesCount{static_cast(toInsert.size()), + static_cast(toDelete.size())}; + toDelete = setMinus(toDelete, toInsert); metadata.triplePreparationTime_ = timer.msecs(); return { IdTriplesAndLocalVocab{std::move(toInsert), std::move(localVocabInsert)}, IdTriplesAndLocalVocab{std::move(toDelete), std::move(localVocabDelete)}}; } + +// _____________________________________________________________________________ +void ExecuteUpdate::sortAndRemoveDuplicates( + std::vector>& container) { + ql::ranges::sort(container); + container.erase(std::unique(container.begin(), container.end()), + container.end()); +} + +// _____________________________________________________________________________ +std::vector> ExecuteUpdate::setMinus( + const std::vector>& a, const std::vector>& b) { + std::vector> reducedToDelete; + reducedToDelete.reserve(a.size()); + ql::ranges::set_difference(a, b, std::back_inserter(reducedToDelete)); + return reducedToDelete; +} diff --git a/src/engine/ExecuteUpdate.h b/src/engine/ExecuteUpdate.h index 0f7e462c44..34b6b8177a 100644 --- a/src/engine/ExecuteUpdate.h +++ b/src/engine/ExecuteUpdate.h @@ -17,6 +17,7 @@ struct UpdateMetadata { Milliseconds triplePreparationTime_ = Zero; Milliseconds insertionTime_ = Zero; Milliseconds deletionTime_ = Zero; + std::optional inUpdate_; }; class ExecuteUpdate { @@ -71,4 +72,16 @@ class ExecuteUpdate { const CancellationHandle& cancellationHandle, UpdateMetadata& metadata); FRIEND_TEST(ExecuteUpdate, computeGraphUpdateQuads); + + // After the operation the vector is sorted and contains no duplicate + // elements. + static void sortAndRemoveDuplicates(std::vector>& container); + FRIEND_TEST(ExecuteUpdate, sortAndRemoveDuplicates); + + // For two sorted vectors `A` and `B` return a new vector + // that contains the element of `A\B`. + // Precondition: the inputs must be sorted. + static std::vector> setMinus(const std::vector>& a, + const std::vector>& b); + FRIEND_TEST(ExecuteUpdate, setMinus); }; diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index afb15ac841..3febf6c51a 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -297,12 +297,15 @@ Awaitable Server::process( [this] { // Use `this` explicitly to silence false-positive errors on the // captured `this` being unused. - this->index_.deltaTriplesManager().clear(); + return this->index_.deltaTriplesManager().modify( + [](auto& deltaTriples) { + deltaTriples.clear(); + return deltaTriples.getCounts(); + }); }, handle); - co_await std::move(coroutine); - response = createOkResponse("Delta triples have been cleared", request, - MediaType::textPlain); + auto countAfterClear = co_await std::move(coroutine); + response = createJsonResponse(nlohmann::json{countAfterClear}, request); } else if (auto cmd = checkParameter("cmd", "get-settings")) { logCommand(cmd, "get server settings"); response = createJsonResponse(RuntimeParameters().toMap(), request); @@ -821,6 +824,10 @@ json Server::createResponseMetadataForUpdate( response["delta-triples"]["after"] = nlohmann::json(countAfter); response["delta-triples"]["difference"] = nlohmann::json(countAfter - countBefore); + if (updateMetadata.inUpdate_.has_value()) { + response["delta-triples"]["operation"] = + json(updateMetadata.inUpdate_.value()); + } response["time"]["planning"] = formatTime(runtimeInfoWholeOp.timeQueryPlanning); response["time"]["where"] = diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index e8d35fa3b3..c3ecad2bde 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -150,9 +150,9 @@ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle, TriplesToHandlesMap& targetMap, TriplesToHandlesMap& inverseMap) { rewriteLocalVocabEntriesAndBlankNodes(triples); - ql::ranges::sort(triples); - auto first = std::unique(triples.begin(), triples.end()); - triples.erase(first, triples.end()); + AD_EXPENSIVE_CHECK(ql::ranges::is_sorted(triples)); + AD_EXPENSIVE_CHECK(std::unique(triples.begin(), triples.end()) == + triples.end()); std::erase_if(triples, [&targetMap](const IdTriple<0>& triple) { return targetMap.contains(triple); }); @@ -248,6 +248,8 @@ template void DeltaTriplesManager::modify( std::function const&); template nlohmann::json DeltaTriplesManager::modify( const std::function&); +template DeltaTriplesCount DeltaTriplesManager::modify( + const std::function&); // _____________________________________________________________________________ void DeltaTriplesManager::clear() { modify(&DeltaTriples::clear); } diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 3a2037768c..aa1f3e67e7 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -40,8 +40,8 @@ class SharedLocatedTriplesSnapshot // A class for keeping track of the number of triples of the `DeltaTriples`. struct DeltaTriplesCount { - size_t triplesInserted_; - size_t triplesDeleted_; + int64_t triplesInserted_; + int64_t triplesDeleted_; /// Output as json. The signature of this function is mandated by the json /// library to allow for implicit conversion. @@ -146,8 +146,12 @@ class DeltaTriples { void clear(); // The number of delta triples added and subtracted. - size_t numInserted() const { return triplesInserted_.size(); } - size_t numDeleted() const { return triplesDeleted_.size(); } + int64_t numInserted() const { + return static_cast(triplesInserted_.size()); + } + int64_t numDeleted() const { + return static_cast(triplesDeleted_.size()); + } DeltaTriplesCount getCounts() const; // Insert triples. diff --git a/test/DeltaTriplesCountTest.cpp b/test/DeltaTriplesCountTest.cpp index 1a72f356de..b04c0b4745 100644 --- a/test/DeltaTriplesCountTest.cpp +++ b/test/DeltaTriplesCountTest.cpp @@ -21,7 +21,6 @@ TEST(DeltaTriplesCountTest, toJson) { TEST(DeltaTriplesCountTest, subtractOperator) { constexpr DeltaTriplesCount count1{10, 5}; constexpr DeltaTriplesCount count2{3, 2}; - constexpr DeltaTriplesCount expected{7, 3}; - const DeltaTriplesCount actual = count1 - count2; - EXPECT_THAT(actual, testing::Eq(expected)); + EXPECT_THAT(count1 - count2, testing::Eq(DeltaTriplesCount{7, 3})); + EXPECT_THAT(count2 - count1, testing::Eq(DeltaTriplesCount{-7, -3})); } diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index e0fe673c39..3b4564f483 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -149,7 +149,7 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { EXPECT_THAT(deltaTriples, StateIs(0, 0, 0, {}, {})); - // Inserting triples. + // Inserting triples. The triples being inserted must be sorted. deltaTriples.insertTriples( cancellationHandle, makeIdTriples(vocab, localVocab, {" ", " "})); @@ -164,14 +164,14 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { deltaTriples, StateIs(3, 0, 3, {" ", " ", " "}, {})); - // Inserting unsorted triples works. + // Insert more triples. deltaTriples.insertTriples( cancellationHandle, - makeIdTriples(vocab, localVocab, {" ", " "})); + makeIdTriples(vocab, localVocab, {" ", " "})); EXPECT_THAT(deltaTriples, StateIs(5, 0, 5, - {" ", " ", " ", - " ", " "}, + {" ", " ", " ", + " ", " "}, {})); // Inserting already inserted triples has no effect. @@ -212,10 +212,20 @@ TEST_F(DeltaTriplesTest, insertTriplesAndDeleteTriples) { {" ", " ", " ", " "}, {" ", " ", " ", " "})); - // Deleting unsorted triples. + // Unsorted triples are not allowed. + if constexpr (ad_utility::areExpensiveChecksEnabled) { + AD_EXPECT_THROW_WITH_MESSAGE( + deltaTriples.deleteTriples( + cancellationHandle, + makeIdTriples(vocab, localVocab, + {" ", " "})), + testing::_); + } + + // Deleting triples. deltaTriples.deleteTriples( cancellationHandle, - makeIdTriples(vocab, localVocab, {" ", " "})); + makeIdTriples(vocab, localVocab, {" ", " "})); EXPECT_THAT( deltaTriples, StateIs(4, 6, 10, @@ -347,7 +357,7 @@ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { absl::StrCat(" ")}); auto triplesToDelete = makeIdTriples( vocab, localVocab, - {" ", absl::StrCat(" "), + {" ", absl::StrCat(" "), absl::StrCat(" ")}); // Insert the `triplesToInsert`. deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) { @@ -416,10 +426,8 @@ TEST_F(DeltaTriplesTest, DeltaTriplesManager) { // thread-exclusive triple and inserts one thread-exclusive triple that is // deleted right after (This triple is stored as deleted in the `DeltaTriples` // because it might be contained in the original input). Additionally, there - // is one common triple inserted by// all the threads and one common triple + // is one common triple inserted by all the threads and one common triple // that is deleted by all the threads. - // - auto deltaImpl = deltaTriplesManager.deltaTriples_.rlock(); EXPECT_THAT(*deltaImpl, NumTriples(numThreads + 1, 2 * numThreads + 1, 3 * numThreads + 2)); diff --git a/test/DeltaTriplesTestHelpers.h b/test/DeltaTriplesTestHelpers.h index 17d8e994ce..8bbd39ac6b 100644 --- a/test/DeltaTriplesTestHelpers.h +++ b/test/DeltaTriplesTestHelpers.h @@ -41,7 +41,7 @@ inline auto NumTriplesInAllPermutations = // `getCounts()` of a `DeltaTriples` and `numTriples()` for all // `LocatedTriplesPerBlock` of the `DeltaTriples`. inline auto NumTriples = - [](size_t inserted, size_t deleted, + [](int64_t inserted, int64_t deleted, size_t inAllPermutations) -> testing::Matcher { return testing::AllOf( AD_PROPERTY(DeltaTriples, numInserted, testing::Eq(inserted)), diff --git a/test/ExecuteUpdateTest.cpp b/test/ExecuteUpdateTest.cpp index b376660785..a1da7bcd62 100644 --- a/test/ExecuteUpdateTest.cpp +++ b/test/ExecuteUpdateTest.cpp @@ -52,7 +52,9 @@ TEST(ExecuteUpdate, executeUpdate) { auto expectExecuteUpdate = [&index, &expectExecuteUpdateHelper]( const std::string& update, - const testing::Matcher& deltaTriplesMatcher) { + const testing::Matcher& deltaTriplesMatcher, + source_location sourceLocation = source_location::current()) { + auto l = generateLocationTrace(sourceLocation); DeltaTriples deltaTriples{index}; expectExecuteUpdateHelper(update, deltaTriples); EXPECT_THAT(deltaTriples, deltaTriplesMatcher); @@ -127,7 +129,9 @@ TEST(ExecuteUpdate, computeGraphUpdateQuads) { [&executeComputeGraphUpdateQuads]( const std::string& update, const Matcher>&>& toInsertMatcher, - const Matcher>&>& toDeleteMatcher) { + const Matcher>&>& toDeleteMatcher, + source_location sourceLocation = source_location::current()) { + auto l = generateLocationTrace(sourceLocation); EXPECT_THAT(executeComputeGraphUpdateQuads(update), Pair(AD_FIELD(ExecuteUpdate::IdTriplesAndLocalVocab, idTriples_, toInsertMatcher), @@ -137,7 +141,9 @@ TEST(ExecuteUpdate, computeGraphUpdateQuads) { auto expectComputeGraphUpdateQuadsFails = [&executeComputeGraphUpdateQuads]( const std::string& update, - const Matcher& messageMatcher) { + const Matcher& messageMatcher, + source_location sourceLocation = source_location::current()) { + auto l = generateLocationTrace(sourceLocation); AD_EXPECT_THROW_WITH_MESSAGE(executeComputeGraphUpdateQuads(update), messageMatcher); }; @@ -151,22 +157,18 @@ TEST(ExecuteUpdate, computeGraphUpdateQuads) { ElementsAreArray({IdTriple(Id(""), Id("