Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into small-subresults-to…
Browse files Browse the repository at this point in the history
…-union
  • Loading branch information
Hannah Bast committed Jan 28, 2025
2 parents bef7554 + d060444 commit b9edfbd
Show file tree
Hide file tree
Showing 97 changed files with 2,195 additions and 822 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ jobs:
submodules: 'recursive'
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
with:
# As of Jan, 28, 2025 the default value here (`binfmt:latest`)
# downloads a QEMU version that leads to segfaults in the compiler.
# We therefore fix a working version
# TODO<joka921> GitHub actions now has ARM runners,
# avoid cross-compilation completely
image : 'tonistiigi/binfmt:desktop-v8.1.5-44'
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to Docker Hub
Expand Down Expand Up @@ -56,7 +63,7 @@ jobs:
tags: adfreiburg/qlever:test

- name: E2E in Docker
run: |
run: |
sudo mkdir ${{github.workspace}}/e2e_data
sudo chmod a+rwx ${{github.workspace}}/e2e_data
sudo docker run -i --rm -v "${{github.workspace}}/e2e_data:/app/e2e_data/" --entrypoint e2e/e2e.sh adfreiburg/qlever:test
Expand Down
12 changes: 6 additions & 6 deletions benchmark/GroupByHashMapBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ class GroupByHashMapBenchmark : public BenchmarkInterface {
} else {
firstColumn = generateRandomGroupVec(numInputRows, numGroups);
}
std::ranges::transform(
ql::ranges::transform(
firstColumn.begin(), firstColumn.end(), groupValues.begin(),
[](size_t value) {
return ValueId::makeFromInt(static_cast<int64_t>(value));
Expand All @@ -375,7 +375,7 @@ class GroupByHashMapBenchmark : public BenchmarkInterface {
auto localVocab = LocalVocab{};
if (valueTypes != ValueIdType::Strings) {
auto secondColumn = generateRandomDoubleVec(numInputRows);
std::ranges::transform(
ql::ranges::transform(
secondColumn.begin(), secondColumn.end(), otherValues.begin(),
[&](double value) {
if (valueTypes == ValueIdType::OnlyDouble)
Expand All @@ -396,10 +396,10 @@ class GroupByHashMapBenchmark : public BenchmarkInterface {
numInputRows, randomStringLength);
localVocab = std::move(newLocalVocab);

std::ranges::transform(indices.begin(), indices.end(),
otherValues.begin(), [&](LocalVocabIndex idx) {
return ValueId::makeFromLocalVocabIndex(idx);
});
ql::ranges::transform(indices.begin(), indices.end(), otherValues.begin(),
[&](LocalVocabIndex idx) {
return ValueId::makeFromLocalVocabIndex(idx);
});
}

std::vector<std::optional<Variable>> variables = {Variable{"?a"},
Expand Down
46 changes: 23 additions & 23 deletions benchmark/JoinAlgorithmBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ struct SetOfIdTableColumnElements {
*/
explicit SetOfIdTableColumnElements(
const std::span<const ValueId>& idTableColumnRef) {
std::ranges::for_each(idTableColumnRef, [this](const ValueId& id) {
ql::ranges::for_each(idTableColumnRef, [this](const ValueId& id) {
if (auto numOccurrencesIterator = numOccurrences_.find(id);
numOccurrencesIterator != numOccurrences_.end()) {
(numOccurrencesIterator->second)++;
Expand Down Expand Up @@ -190,7 +190,7 @@ static size_t createOverlapRandomly(IdTableAndJoinColumn* const smallerTable,
// Create the overlap.
ad_utility::HashMap<ValueId, std::reference_wrapper<const ValueId>>
smallerTableElementToNewElement{};
std::ranges::for_each(
ql::ranges::for_each(
smallerTableJoinColumnRef,
[&randomDouble, &probabilityToCreateOverlap,
&smallerTableElementToNewElement, &randomBiggerTableElement,
Expand Down Expand Up @@ -295,7 +295,7 @@ static size_t createOverlapRandomly(IdTableAndJoinColumn* const smallerTable,
size_t newOverlapMatches{0};
ad_utility::HashMap<ValueId, std::reference_wrapper<const ValueId>>
smallerTableElementToNewElement{};
std::ranges::for_each(
ql::ranges::for_each(
smallerTableJoinColumnSet.uniqueElements_,
[&randomBiggerTableElement, &wantedNumNewOverlapMatches,
&newOverlapMatches, &smallerTableElementToNewElement,
Expand Down Expand Up @@ -326,7 +326,7 @@ static size_t createOverlapRandomly(IdTableAndJoinColumn* const smallerTable,
});

// Overwrite the designated values in the smaller table.
std::ranges::for_each(
ql::ranges::for_each(
smallerTableJoinColumnRef, [&smallerTableElementToNewElement](auto& id) {
if (auto newValueIterator = smallerTableElementToNewElement.find(id);
newValueIterator != smallerTableElementToNewElement.end()) {
Expand Down Expand Up @@ -465,17 +465,17 @@ static std::vector<T> mergeSortedVectors(
std::vector<T> mergedVector{};

// Merge.
std::ranges::for_each(intervals, [&mergedVector](std::vector<T> elements) {
ql::ranges::for_each(intervals, [&mergedVector](std::vector<T> elements) {
if (mergedVector.empty() || elements.empty()) {
std::ranges::copy(elements, std::back_inserter(mergedVector));
ql::ranges::copy(elements, std::back_inserter(mergedVector));
return;
}
const size_t idxOldLastElem = mergedVector.size() - 1;
std::ranges::copy(elements, std::back_inserter(mergedVector));
ql::ranges::copy(elements, std::back_inserter(mergedVector));
if (mergedVector.at(idxOldLastElem) > mergedVector.at(idxOldLastElem + 1)) {
std::ranges::inplace_merge(
ql::ranges::inplace_merge(
mergedVector,
std::ranges::next(mergedVector.begin(), idxOldLastElem + 1));
ql::ranges::next(mergedVector.begin(), idxOldLastElem + 1));
}
});

Expand Down Expand Up @@ -935,7 +935,7 @@ class GeneralInterfaceImplementation : public BenchmarkInterface {
"' must be bigger than, or equal to, 0.")};
config.addValidator(
[](const benchmarkSampleSizeRatiosValueType& vec) {
return std::ranges::all_of(
return ql::ranges::all_of(
vec,
[](const benchmarkSampleSizeRatiosValueType::value_type ratio) {
return ratio >= 0.f;
Expand All @@ -961,7 +961,7 @@ class GeneralInterfaceImplementation : public BenchmarkInterface {
".")};
config.addValidator(
[](const benchmarkSampleSizeRatiosValueType& vec) {
return std::ranges::max(vec) <=
return ql::ranges::max(vec) <=
getMaxValue<benchmarkSampleSizeRatiosValueType::value_type>() -
1.f;
},
Expand Down Expand Up @@ -1056,9 +1056,9 @@ class GeneralInterfaceImplementation : public BenchmarkInterface {
},
descriptor, descriptor, option);
};
std::ranges::for_each(std::vector{minBiggerTableRows, maxBiggerTableRows,
minSmallerTableRows},
addCastableValidator);
ql::ranges::for_each(std::vector{minBiggerTableRows, maxBiggerTableRows,
minSmallerTableRows},
addCastableValidator);
}

/*
Expand Down Expand Up @@ -1303,7 +1303,7 @@ class GeneralInterfaceImplementation : public BenchmarkInterface {
ColumnNumWithType<float>{toUnderlying(TimeForMergeGallopingJoin)});

// Calculate, how much of a speedup the hash join algorithm has in
// comparison to the merge/galloping join algrithm.
// comparison to the merge/galloping join algorithm.
calculateSpeedupOfColumn(
table, {toUnderlying(JoinAlgorithmSpeedup)},
{toUnderlying(TimeForHashJoin)},
Expand Down Expand Up @@ -1684,7 +1684,7 @@ class BmOnlyBiggerTableSizeChanges final
static_cast<double>(getConfigVariables().minBiggerTableRows_) /
static_cast<double>(smallerTableNumRows)))};
auto growthFunction = createDefaultGrowthLambda<float>(
10.f, std::ranges::max(minRatio, 10.f),
10.f, ql::ranges::max(minRatio, 10.f),
generateNaturalNumberSequenceInterval(minRatio, 9.f));
ResultTable& table = makeGrowingBenchmarkTable(
&results, tableName, "Row ratio", alwaysFalse,
Expand Down Expand Up @@ -1742,8 +1742,8 @@ class BmOnlySmallerTableSizeChanges final
for (const float ratioRows : mergeSortedVectors<float>(
{generateNaturalNumberSequenceInterval(
getConfigVariables().minRatioRows_,
std::ranges::min(getConfigVariables().maxRatioRows_,
10.f)),
ql::ranges::min(getConfigVariables().maxRatioRows_,
10.f)),
generateExponentInterval(
10.f, getConfigVariables().minRatioRows_,
getConfigVariables().maxRatioRows_)})) {
Expand All @@ -1755,7 +1755,7 @@ class BmOnlySmallerTableSizeChanges final
// Returns the amount of rows in the smaller `IdTable`, used for the
// measurements in a given row.
auto growthFunction = createDefaultGrowthLambda(
10UL, std::ranges::max(
10UL, ql::ranges::max(
static_cast<size_t>(
static_cast<double>(
getConfigVariables().minBiggerTableRows_) /
Expand Down Expand Up @@ -1867,7 +1867,7 @@ class BmSampleSizeRatio final : public GeneralInterfaceImplementation {
BenchmarkResults runAllBenchmarks() override {
BenchmarkResults results{};
const auto& ratios{getConfigVariables().benchmarkSampleSizeRatios_};
const float maxSampleSizeRatio{std::ranges::max(ratios)};
const float maxSampleSizeRatio{ql::ranges::max(ratios)};

/*
We work with the biggest possible smaller and bigger table. That should make
Expand Down Expand Up @@ -2097,17 +2097,17 @@ class BmSmallerTableGrowsBiggerTableRemainsSameSize final
static_cast<double>(biggerTableNumRows) /
static_cast<double>(getConfigVariables().minSmallerTableRows_))};
std::vector<size_t> smallerTableRows;
std::ranges::transform(
ql::ranges::transform(
mergeSortedVectors<float>(
{generateNaturalNumberSequenceInterval(
1.f, std::ranges::min(10.f, biggestRowRatio)),
1.f, ql::ranges::min(10.f, biggestRowRatio)),
generateExponentInterval(10.f, 10.f, biggestRowRatio)}),
std::back_inserter(smallerTableRows),
[&biggerTableNumRows](const float ratio) {
return static_cast<size_t>(
static_cast<double>(biggerTableNumRows) / ratio);
});
std::ranges::reverse(smallerTableRows);
ql::ranges::reverse(smallerTableRows);
const size_t lastSmallerTableRow{smallerTableRows.back()};
auto growthFunction = createDefaultGrowthLambda(
10UL, lastSmallerTableRow + 1UL, std::move(smallerTableRows));
Expand Down
4 changes: 2 additions & 2 deletions benchmark/ParallelMergeBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ class IdTableCompressedWriterBenchmark : public BenchmarkInterface {
ad_utility::integerRange(numInputRows)) {
res.push_back(gen());
}
std::ranges::sort(res);
ql::ranges::sort(res);
return res;
};
std::vector<std::vector<size_t>> inputs;
inputs.resize(numInputs);
std::ranges::generate(inputs, generateRandomVec);
ql::ranges::generate(inputs, generateRandomVec);

auto run = [&inputs]() {
auto merger = ad_utility::parallelMultiwayMerge<size_t, false>(
Expand Down
16 changes: 8 additions & 8 deletions benchmark/infrastructure/BenchmarkMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ static void writeBenchmarkClassAndBenchmarkResultsToJsonFile(
Print the configuration documentation of all registered benchmarks.
*/
static __attribute__((noreturn)) void printConfigurationOptionsAndExit() {
std::ranges::for_each(
ql::ranges::for_each(
BenchmarkRegister::getAllRegisteredBenchmarks(),
[](const BenchmarkInterface* bench) {
std::cerr << createCategoryTitle(
Expand Down Expand Up @@ -211,13 +211,13 @@ int main(int argc, char** argv) {
// Actually processing the arguments.
if (vm.count("print")) {
// Print the results and metadata.
std::ranges::for_each(benchmarkClassAndResults,
[](const auto& pair) {
std::cout << benchmarkResultsToString(pair.first,
pair.second)
<< "\n\n";
},
{});
ql::ranges::for_each(benchmarkClassAndResults,
[](const auto& pair) {
std::cout << benchmarkResultsToString(pair.first,
pair.second)
<< "\n\n";
},
{});
}

if (vm.count("write")) {
Expand Down
8 changes: 4 additions & 4 deletions benchmark/infrastructure/BenchmarkMeasurementContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ void ResultTable::init(const std::string& descriptor,
descriptorForLog_ = std::move(descriptorForLog);
columnNames_ = columnNames;
entries_.resize(rowNames.size());
std::ranges::fill(entries_, std::vector<EntryType>(columnNames.size()));
ql::ranges::fill(entries_, std::vector<EntryType>(columnNames.size()));

// Setting the row names.
for (size_t row = 0; row < rowNames.size(); row++) {
Expand Down Expand Up @@ -287,10 +287,10 @@ ResultTable::operator std::string() const {
});

// Which of the entries is the longest?
columnMaxStringWidth.at(column) = std::ranges::max(stringWidthOfRow);
columnMaxStringWidth.at(column) = ql::ranges::max(stringWidthOfRow);

// Is the name of the column bigger?
columnMaxStringWidth.at(column) = std::ranges::max(
columnMaxStringWidth.at(column) = ql::ranges::max(
columnMaxStringWidth.at(column), columnNames_.at(column).length());
}

Expand Down Expand Up @@ -384,7 +384,7 @@ void ResultGroup::deleteEntryImpl(T& entry) {
}();

// Delete `entry`.
auto entryIterator{std::ranges::find(
auto entryIterator{ql::ranges::find(
vec, &entry, [](const ad_utility::CopyableUniquePtr<T>& pointer) {
return pointer.get();
})};
Expand Down
4 changes: 2 additions & 2 deletions benchmark/infrastructure/BenchmarkToJson.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ static nlohmann::json transformIntoJsonArray(
*/
nlohmann::ordered_json jsonArray = nlohmann::ordered_json::array();

std::ranges::transform(vec, std::back_inserter(jsonArray),
translationFunction);
ql::ranges::transform(vec, std::back_inserter(jsonArray),
translationFunction);

return jsonArray;
}
Expand Down
4 changes: 2 additions & 2 deletions benchmark/util/ResultTableColumnOperations.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ requires(sizeof...(ColumnInputTypes) > 0) void generateColumnWithColumnInput(
// Using a column more than once is the sign of an error.
std::array<size_t, sizeof...(ColumnInputTypes)> allColumnNums{
{inputColumns.columnNum_...}};
std::ranges::sort(allColumnNums);
AD_CONTRACT_CHECK(std::ranges::adjacent_find(allColumnNums) ==
ql::ranges::sort(allColumnNums);
AD_CONTRACT_CHECK(ql::ranges::adjacent_find(allColumnNums) ==
allColumnNums.end());

// Fill the result column.
Expand Down
4 changes: 2 additions & 2 deletions src/backports/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
#include "backports/concepts.h"

// The following defines namespaces `ql::ranges` and `ql::views` that are almost
// drop-in replacements for `std::ranges` and `std::views`. In C++20 mode (when
// drop-in replacements for `ql::ranges` and `std::views`. In C++20 mode (when
// the `QLEVER_CPP_17` macro is not used), these namespaces are simply aliases
// for `std::ranges` and `std::views`. In C++17 mode they contain the ranges and
// for `ql::ranges` and `std::views`. In C++17 mode they contain the ranges and
// views from Erice Niebler's `range-v3` library. NOTE: `ql::ranges::unique`
// currently doesn't work, because the interface to this function is different
// in both implementations. NOTE: There might be other caveats which we are
Expand Down
2 changes: 1 addition & 1 deletion src/engine/AddCombinedRowToTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ class AddCombinedRowToIdTable {
// Only merge non-null vocabs.
auto range = currentVocabs_ | ql::views::filter(toBool) |
ql::views::transform(dereference);
mergedVocab_.mergeWith(std::ranges::ref_view{range});
mergedVocab_.mergeWith(ql::ranges::ref_view{range});
}
}
const IdTableView<0>& inputLeft() const {
Expand Down
2 changes: 1 addition & 1 deletion src/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ add_library(engine
CartesianProductJoin.cpp TextIndexScanForWord.cpp TextIndexScanForEntity.cpp
TextLimit.cpp LazyGroupBy.cpp GroupByHashMapOptimization.cpp SpatialJoin.cpp
CountConnectedSubgraphs.cpp SpatialJoinAlgorithms.cpp PathSearch.cpp ExecuteUpdate.cpp
Describe.cpp)
Describe.cpp GraphStoreProtocol.cpp)
qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams s2)
25 changes: 14 additions & 11 deletions src/engine/CartesianProductJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ VariableToColumnMap CartesianProductJoin::computeVariableToColumnMap() const {
}

// _____________________________________________________________________________
IdTable CartesianProductJoin::writeAllColumns(
std::ranges::random_access_range auto idTables, size_t offset, size_t limit,
size_t lastTableOffset) const {
CPP_template_def(typename R)(requires ql::ranges::random_access_range<R>)
IdTable CartesianProductJoin::writeAllColumns(
R idTables, size_t offset, size_t limit, size_t lastTableOffset) const {
AD_CORRECTNESS_CHECK(offset >= lastTableOffset);
IdTable result{getResultWidth(), getExecutionContext()->getAllocator()};
// TODO<joka921> Find a solution to cheaply handle the case, that only a
Expand Down Expand Up @@ -255,13 +255,14 @@ CartesianProductJoin::calculateSubResults(bool requestLaziness) {
auto children = childView();
AD_CORRECTNESS_CHECK(!ql::ranges::empty(children));
// Get all child results (possibly with limit, see above).
for (Operation& child : children) {
if (limitIfPresent.has_value() && child.supportsLimit()) {
child.setLimit(limitIfPresent.value());
for (std::shared_ptr<QueryExecutionTree>& childTree : children_) {
if (limitIfPresent.has_value() && childTree->supportsLimit()) {
childTree->setLimit(limitIfPresent.value());
}
auto& child = *childTree->getRootOperation();
// To preserve order of the columns we can only consume the first child
// lazily. In the future this restriction may be lifted by permutating the
// columns afterwards.
// columns afterward.
bool isLast = &child == &children.back();
bool requestLazy = requestLaziness && isLast;
auto result = child.getResult(
Expand Down Expand Up @@ -302,12 +303,14 @@ CartesianProductJoin::calculateSubResults(bool requestLaziness) {
}

// _____________________________________________________________________________
Result::Generator CartesianProductJoin::produceTablesLazily(
LocalVocab mergedVocab, std::ranges::range auto idTables, size_t offset,
size_t limit, size_t lastTableOffset) const {
CPP_template_def(typename R)(requires ql::ranges::range<R>) Result::Generator
CartesianProductJoin::produceTablesLazily(LocalVocab mergedVocab,
R idTables, size_t offset,
size_t limit,
size_t lastTableOffset) const {
while (limit > 0) {
uint64_t limitWithChunkSize = std::min(limit, chunkSize_);
IdTable idTable = writeAllColumns(std::ranges::ref_view(idTables), offset,
IdTable idTable = writeAllColumns(ql::ranges::ref_view(idTables), offset,
limitWithChunkSize, lastTableOffset);
size_t tableSize = idTable.size();
AD_CORRECTNESS_CHECK(tableSize <= limit);
Expand Down
Loading

0 comments on commit b9edfbd

Please sign in to comment.