From 4fb267f4b9d2d4b24ecfd40a6cb88589a74608ed Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 10 Feb 2025 10:39:14 +0100 Subject: [PATCH 1/3] Remove `MilestoneIdManager` --- src/global/Id.h | 89 --------------------------------------- test/CMakeLists.txt | 2 - test/MilestoneIdTest.cpp | 90 ---------------------------------------- 3 files changed, 181 deletions(-) delete mode 100644 test/MilestoneIdTest.cpp diff --git a/src/global/Id.h b/src/global/Id.h index a62cf36142..08ab0b57d4 100644 --- a/src/global/Id.h +++ b/src/global/Id.h @@ -23,92 +23,3 @@ using WordIndex = uint64_t; using WordOrEntityIndex = uint64_t; using TextBlockIndex = uint64_t; using CompressionCode = uint64_t; - -// Integers, that are probably not integers but strong IDs or indices, but their -// true nature is still to be discovered. -using UnknownIndex = uint64_t; - -// A value to use when the result should be empty (e.g. due to an optional join) -// The highest two values are used as sentinels. -static const Id ID_NO_VALUE = Id::makeUndefined(); - -namespace ad_utility { - -// An exception that is thrown when an integer overflow occurs in the -// `MilestoneIdManager` -class MilestoneIdOverflowException : public std::exception { - private: - std::string _message; - - public: - explicit MilestoneIdOverflowException(std::string message) - : _message{std::move(message)} {} - [[nodiscard]] const char* what() const noexcept override { - return _message.c_str(); - } -}; - -// Manages two kinds of IDs: plain IDs (unsigned 64-bit integers, just called -// "IDs" in the following), and milestone IDs (unsigned 64-bit integers that are -// multiples of `distanceBetweenMilestones`. This class has the functionality to -// find the next milestone of plain ID, to check whether an ID is amilestone ID -// and to convert milestone IDs from and to a local ID space. -template -class MilestoneIdManager { - private: - // The next free ID; - uint64_t _nextId{0}; - // The last ID that was assigned. Used for overflow detection. - uint64_t _previousId{0}; - - public: - MilestoneIdManager() = default; - - // The maximum number of milestone Ids. - constexpr static uint64_t numMilestoneIds = - std::numeric_limits::max() / distanceBetweenMilestones; - - // Get the smallest milestone ID that is larger than all (milestone and - // non-milestone) previously obtained IDs. - uint64_t getNextMilestoneId() { - if (!isMilestoneId(_nextId)) { - _nextId = (milestoneIdFromLocal(milestoneIdToLocal(_nextId) + 1)); - } - return getNextId(); - } - - // Get the smallest ID that is larger than all previously obtained IDs. - uint64_t getNextId() { - if (_nextId < _previousId) { - throw MilestoneIdOverflowException{absl::StrCat( - "Overflow while assigning Ids from a MilestoneIdManager. The " - "previous " - "milestone Id was ", - _previousId, " the next id would be ", _nextId, - ". The maximum number of milestones is ", numMilestoneIds, ".")}; - } - _previousId = _nextId; - _nextId++; - return _previousId; - } - - // Is this ID a milestone id, equivalently: Is this ID a multiple of - // `distanceBetweenMilestones`? - constexpr static bool isMilestoneId(uint64_t id) { - return id % distanceBetweenMilestones == 0; - } - - // Convert a milestone ID to its "local" ID by dividing it by - // `distanceBetweenMilestones` (the i-th milestone ID will become `i`). - constexpr static uint64_t milestoneIdToLocal(uint64_t id) { - return id / distanceBetweenMilestones; - } - - // Convert "local" ID to milestone ID by multiplying it with - // `distanceBetweenMilestones`. - constexpr static uint64_t milestoneIdFromLocal(uint64_t id) { - return id * distanceBetweenMilestones; - } -}; - -} // namespace ad_utility diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3a04b9d201..7beef5bcf7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -254,8 +254,6 @@ addLinkAndDiscoverTest(ContentEncodingHelperTest http) addLinkAndDiscoverTest(PrefixCompressorTest) -addLinkAndDiscoverTest(MilestoneIdTest) - addLinkAndDiscoverTest(VocabularyTest index) addLinkAndDiscoverTestNoLibs(IteratorTest) diff --git a/test/MilestoneIdTest.cpp b/test/MilestoneIdTest.cpp deleted file mode 100644 index a13e51bf59..0000000000 --- a/test/MilestoneIdTest.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright 2022, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach - -#include - -#include "global/Id.h" -#include "util/Log.h" - -template -void testMilestoneIds() { - ad_utility::MilestoneIdManager m; - for (size_t i = 0; i < distance; ++i) { - auto id = m.getNextMilestoneId(); - ASSERT_EQ(i * distance, id); - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(i, m.milestoneIdToLocal(id)); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } -} - -TEST(MilestoneId, OnlyMilestoneIds) { - testMilestoneIds<256>(); - testMilestoneIds<257>(); -#ifdef QLEVER_RUN_EXPENSIVE_TESTS - testMilestoneIds<1024 * 1024>(); - testMilestoneIds<1024 * 1024 + 53>(); -#endif -} - -template -void testConsecutiveIds() { - ad_utility::MilestoneIdManager m; - for (size_t i = 0; i < 3 * distance; ++i) { - auto id = m.getNextId(); - ASSERT_EQ(i, id); - if (id == 0 || id == distance || id == 2 * distance) { - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } else { - ASSERT_FALSE(m.isMilestoneId(id)); - ASSERT_NE(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } - } -} - -TEST(MilestoneId, ConsecutiveIds) { - testConsecutiveIds<256>(); - testConsecutiveIds<257>(); - -#ifdef QLEVER_RUN_EXPENSIVE_TESTS - testConsecutiveIds<1024 * 1024>(); - testConsecutiveIds<1024 * 1024 + 53>(); -#endif -} - -template -void testMixedIds() { - ad_utility::MilestoneIdManager m; - m.getNextId(); - for (size_t i = 0; i < 680; ++i) { - for (size_t j = 0; j < 123; ++j) { - auto id = m.getNextId(); - ASSERT_EQ(i * distance + j + 1, id); - } - auto id = m.getNextMilestoneId(); - ASSERT_TRUE(m.isMilestoneId(id)); - ASSERT_EQ(id, (i + 1) * distance); - ASSERT_EQ(m.milestoneIdToLocal(id), i + 1); - ASSERT_EQ(id, m.milestoneIdFromLocal(m.milestoneIdToLocal(id))); - } -} - -TEST(MilestoneId, MixedIds) { - testMixedIds<256>(); - testMixedIds<257>(); - testMixedIds<1024 * 1024>(); - testMixedIds<1024 * 1024 + 53>(); -} - -TEST(MilestoneId, Overflow) { - constexpr static uint64_t distance = 1ull << 63; - auto m = ad_utility::MilestoneIdManager{}; - auto id = m.getNextMilestoneId(); - ASSERT_EQ(id, 0u); - id = m.getNextMilestoneId(); - ASSERT_EQ(id, distance); - ASSERT_THROW(m.getNextMilestoneId(), - ad_utility::MilestoneIdOverflowException); -} From 1722daa6b0b5f11415cae131d736123fd098623b Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 10 Feb 2025 11:00:52 +0100 Subject: [PATCH 2/3] Optimize additional includes --- src/engine/OptionalJoin.h | 10 ++-------- src/engine/QueryExecutionContext.h | 4 +--- src/global/Id.h | 6 +----- src/index/VocabularyMerger.h | 4 ++-- src/util/AllocatorWithLimit.h | 1 - test/GroupByTest.cpp | 12 ++++++------ 6 files changed, 12 insertions(+), 25 deletions(-) diff --git a/src/engine/OptionalJoin.h b/src/engine/OptionalJoin.h index 91d1a502d9..7685bd8708 100644 --- a/src/engine/OptionalJoin.h +++ b/src/engine/OptionalJoin.h @@ -58,14 +58,8 @@ class OptionalJoin : public Operation { return {_left.get(), _right.get()}; } - /** - * @brief Joins two result tables on any number of columns, inserting the - * special value ID_NO_VALUE for any entries marked as optional. - * @param a - * @param b - * @param joinColumns - * @param result - */ + // Joins two result tables on any number of columns, inserting the special + // value `Id::makeUndefined()` for any entries marked as optional. void optionalJoin( const IdTable& left, const IdTable& right, const std::vector>& joinColumns, diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 1e891a398f..7f1739e0da 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -5,8 +5,6 @@ #pragma once -#include - #include #include @@ -15,11 +13,11 @@ #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" #include "global/Id.h" +#include "global/RuntimeParameters.h" #include "index/DeltaTriples.h" #include "index/Index.h" #include "util/Cache.h" #include "util/ConcurrentCache.h" -#include "util/Synchronized.h" // The value of the `QueryResultCache` below. It consists of a `Result` together // with its `RuntimeInfo`. diff --git a/src/global/Id.h b/src/global/Id.h index 08ab0b57d4..af93705ba8 100644 --- a/src/global/Id.h +++ b/src/global/Id.h @@ -3,16 +3,12 @@ // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) #pragma once -#include - #include -#include #include "global/ValueId.h" -#include "util/Exception.h" using Id = ValueId; -typedef uint16_t Score; +using Score = uint16_t; // TODO Make the following ID and index types strong. using ColumnIndex = uint64_t; diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index ae72da46c6..0048baea51 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -68,8 +68,8 @@ struct VocabularyMetaData { bool contains(Id id) const { return begin_ <= id && id < end_; } private: - Id begin_ = ID_NO_VALUE; - Id end_ = ID_NO_VALUE; + Id begin_ = Id::makeUndefined(); + Id end_ = Id::makeUndefined(); std::string prefix_; bool beginWasSeen_ = false; }; diff --git a/src/util/AllocatorWithLimit.h b/src/util/AllocatorWithLimit.h index 1a52d0532f..400e30d409 100644 --- a/src/util/AllocatorWithLimit.h +++ b/src/util/AllocatorWithLimit.h @@ -7,7 +7,6 @@ #include -#include #include #include diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index a23fef2475..8f89f9ff11 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -263,9 +263,9 @@ TEST_F(GroupByTest, doGroupBy) { ASSERT_EQ(123u, outTable._data[1][9]); ASSERT_EQ(0u, outTable._data[2][9]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[0][10]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[1][10]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[2][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[0][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[1][10]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[2][10]); std::memcpy(&buffer, &outTable._data[0][11], sizeof(float)); ASSERT_FLOAT_EQ(-3, buffer); @@ -283,9 +283,9 @@ TEST_F(GroupByTest, doGroupBy) { ASSERT_EQ(41223u, outTable._data[1][13]); ASSERT_EQ(41223u, outTable._data[2][13]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[0][14]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[1][14]); - ASSERT_EQ(ID_NO_VALUE, outTable._data[2][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[0][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[1][14]); + ASSERT_EQ(Id::makeUndefined(), outTable._data[2][14]); std::memcpy(&buffer, &outTable._data[0][15], sizeof(float)); ASSERT_FLOAT_EQ(2, buffer); From 229f7f6943104e61d8055a953f074d28b2e9ec27 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 10 Feb 2025 11:13:43 +0100 Subject: [PATCH 3/3] Avoid `RuntimeParameters()` in header --- src/engine/CMakeLists.txt | 3 ++- src/engine/QueryExecutionContext.cpp | 11 +++++++++++ src/engine/QueryExecutionContext.h | 7 ++++--- src/engine/QueryExecutionTree.cpp | 2 +- 4 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 src/engine/QueryExecutionContext.cpp diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index e81c834303..98faba8743 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -14,5 +14,6 @@ add_library(engine CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp - Describe.cpp GraphStoreProtocol.cpp) + Describe.cpp GraphStoreProtocol.cpp + QueryExecutionContext.cpp) qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2) diff --git a/src/engine/QueryExecutionContext.cpp b/src/engine/QueryExecutionContext.cpp new file mode 100644 index 0000000000..b458206613 --- /dev/null +++ b/src/engine/QueryExecutionContext.cpp @@ -0,0 +1,11 @@ +// Copyright 2025, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Robin Textor-Falconi + +#include "engine/QueryExecutionContext.h" + +#include "global/RuntimeParameters.h" + +bool QueryExecutionContext::areWebSocketUpdatesEnabled() { + return RuntimeParameters().get<"websocket-updates-enabled">(); +} diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 7f1739e0da..501f2ba538 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -13,7 +13,6 @@ #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" #include "global/Id.h" -#include "global/RuntimeParameters.h" #include "index/DeltaTriples.h" #include "index/Index.h" #include "util/Cache.h" @@ -149,6 +148,9 @@ class QueryExecutionContext { return areWebsocketUpdatesEnabled_; } + private: + static bool areWebSocketUpdatesEnabled(); + private: const Index& _index; @@ -166,6 +168,5 @@ class QueryExecutionContext { std::function updateCallback_; // Cache the state of that runtime parameter to reduce the contention of the // mutex. - bool areWebsocketUpdatesEnabled_ = - RuntimeParameters().get<"websocket-updates-enabled">(); + bool areWebsocketUpdatesEnabled_ = areWebSocketUpdatesEnabled(); }; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index cef01b1129..79a3783f86 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -12,7 +12,7 @@ #include #include "engine/Sort.h" -#include "parser/RdfEscaping.h" +#include "global/RuntimeParameters.h" using std::string;