From f5814bad28eb16d4cfbf330e037eebe291d93d4e Mon Sep 17 00:00:00 2001 From: realHannes Date: Wed, 19 Feb 2025 22:29:16 +0100 Subject: [PATCH] apply changes for code review (2) --- .../PrefilterExpressionIndex.cpp | 127 ++++++++++++------ 1 file changed, 87 insertions(+), 40 deletions(-) diff --git a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp index 8a6f59bd0f..cd9c772a63 100644 --- a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp +++ b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp @@ -134,6 +134,7 @@ static auto getSetUnion(const std::vector& blocks1, //______________________________________________________________________________ // Refers to an `ValueId` vector `::iterator` (index). using RandomIt = std::vector::iterator; + // A pair containing the `start` and `end` (iterator) indices for a specified // single datatype range over a given `ValueId` vector. // For the given cases below, the `ValueId` vector is retrieved from the @@ -141,32 +142,92 @@ using RandomIt = std::vector::iterator; // related `BlockMetadata` values. using RandomItPair = std::pair; +// Refers to a `BlockMetadata` span `::interator` (index). +using BlockSpanIt = + std::span::iterator; + +//______________________________________________________________________________ +static std::pair getInputAdjustedRangeIndices( + const BlockSpanIt& beginInput, const BlockSpanIt& endInput, + const RandomIt& valueIdsInputBegin, const RandomIt& firstIt, + const RandomIt& lastIt) { + return { + beginInput + std::distance(valueIdsInputBegin, firstIt) / 2, + // Round up, for Ids contained within the bounding Ids of firstTriple + // and lastTriple we have to include the respective metadata block + // (that block is partially relevant). + // Remark: Given lastIt refers to the highest possible input index + // (endInput), select endInput to avoid out-of-bound errors later on. + std::min(beginInput + (std::distance(valueIdsInputBegin, lastIt) + 1) / 2, + endInput)}; +} + +//______________________________________________________________________________ +// Similar to `getRelevantBlocksFromIdRanges`, but this function retrieves the +// relevant (and save) complementing `BlockMetadata` values w.r.t. `input` and +// `relevantIdRanges`. +static std::vector getComplementBlocksForRelevantIdRanges( + const std::vector& relevantIdRanges, + std::span input, const RandomIt& valueIdsInputBegin, + const RandomIt& valueIdsInputEnd) { + // If relevantIdRanges is empty, the complement are simply all `BlockMetadata` + // values (input). + if (relevantIdRanges.empty()) { + return std::vector{input.begin(), input.end()}; + } + // Use helper getInputAdjustedRangesIndices (w.r.t. std::span input) and bind relevant constant arguments. + const auto getInputAdjustedRange = std::bind( + getInputAdjustedRangeIndices, input.begin(), input.end(), + valueIdsInputBegin, std::placeholders::_1, std::placeholders::_2); + // Section retrieve complementing BlockMetadata values. + std::vector complementBlocks; + complementBlocks.reserve(input.size()); + // We implicitly handle the first complement range: + // input[input.begin(), (first) inputAdjustedBegin] + RandomIt currentBegin = valueIdsInputBegin; + for (const auto& [firstRelevantIt, lastRelevantIt] : relevantIdRanges) { + const auto& [inputAdjustedBegin, inputAdjustedEnd] = + getInputAdjustedRange(currentBegin, firstRelevantIt); + complementBlocks.insert(complementBlocks.end(), inputAdjustedBegin, + inputAdjustedEnd); + currentBegin = lastRelevantIt; + } + // Handle the complementing range: + // input[(last) inputAdjustedBegin, input.end()] + if (currentBegin < valueIdsInputEnd) { + const auto& [inputAdjustedBegin, inputAdjustedEnd] = + getInputAdjustedRange(currentBegin, valueIdsInputEnd); + complementBlocks.insert(complementBlocks.end(), inputAdjustedBegin, + inputAdjustedEnd); + } + complementBlocks.shrink_to_fit(); + return complementBlocks; +} + //______________________________________________________________________________ static std::vector getRelevantBlocksFromIdRanges( const std::vector& relevantIdRanges, - std::span input, std::vector& valueIdsInput) { + std::span input, const RandomIt& valueIdsInputBegin) { // The vector for relevant BlockMetadata values which contain ValueIds // defined as relevant by relevantIdRanges. std::vector relevantBlocks; // Reserve memory, input.size() is upper bound. relevantBlocks.reserve(input.size()); + const auto getInputAdjustedRange = std::bind( + getInputAdjustedRangeIndices, input.begin(), input.end(), + valueIdsInputBegin, std::placeholders::_1, std::placeholders::_2); // Given the relevant Id ranges, retrieve the corresponding relevant // BlockMetadata values from vector input and add them to the relevantBlocks - // vector. - auto endValueIdsInput = valueIdsInput.end(); + // vector. The direct Id ranges must be adjusted w.r.t. input by making use + // of getInputAdjustedRange. for (const auto& [firstId, secondId] : relevantIdRanges) { - // Ensures that index is within bounds of index vector. - auto secondIdAdjusted = - secondId < endValueIdsInput ? secondId + 1 : secondId; - relevantBlocks.insert( - relevantBlocks.end(), - input.begin() + std::distance(valueIdsInput.begin(), firstId) / 2, - // Round up, for Ids contained within the bounding Ids of firstTriple - // and lastTriple we have to include the respective metadata block - // (that block is partially relevant). - input.begin() + - std::distance(valueIdsInput.begin(), secondIdAdjusted) / 2); + const auto& [inputAdjustedBegin, inputAdjustedEnd] = + getInputAdjustedRange(firstId, secondId); + assert(inputAdjustedEnd >= inputAdjustedBegin); + relevantBlocks.insert(relevantBlocks.end(), inputAdjustedBegin, + inputAdjustedEnd); } relevantBlocks.shrink_to_fit(); return relevantBlocks; @@ -226,27 +287,6 @@ static std::string getLogicalOpStr(const LogicalOperator logOp) { } } -//______________________________________________________________________________ -// Helper to retrieve to complementing ranges. -static std::vector getComplementingRanges( - const std::vector& relevantRanges, const RandomIt& begin, - const RandomIt& end) { - std::vector rangesComplemented; - if (begin < relevantRanges.front().first) { - rangesComplemented.push_back( - RandomItPair{begin, relevantRanges.front().first}); - } - for (size_t i = 1; i < relevantRanges.size(); ++i) { - rangesComplemented.push_back( - RandomItPair{relevantRanges[i - 1].second, relevantRanges[i].first}); - } - if (end > relevantRanges.back().second) { - rangesComplemented.push_back( - RandomItPair{relevantRanges.back().second, end}); - } - return rangesComplemented; -} - // SECTION PREFILTER EXPRESSION (BASE CLASS) //______________________________________________________________________________ std::vector PrefilterExpression::evaluate( @@ -365,9 +405,9 @@ std::vector RelationalExpression::evaluateImpl( : getRangesForId(valueIdsInput.begin(), valueIdsInput.end(), referenceId, Comparison, false); - return getSetUnion( - getRelevantBlocksFromIdRanges(relevantIdRanges, input, valueIdsInput), - mixedDatatypeBlocks); + return getSetUnion(getRelevantBlocksFromIdRanges(relevantIdRanges, input, + valueIdsInput.begin()), + mixedDatatypeBlocks); }; //______________________________________________________________________________ @@ -478,6 +518,11 @@ std::vector IsDatatypeExpression::evaluateImpl( relevantRanges.push_back(valueIdComparators::getRangeForDatatype( idsBegin, idsEnd, Datatype::BlankNodeIndex)); } else if constexpr (Datatype == NUMERIC) { + // Remark: Swapping the defined relational order for Datatype::Int to + // Datatype::Double ValueIds (in ValueId.h) might affect the correctness of + // the following lines! + static_assert(Datatype::Int < Datatype::Double); + const auto& rangeInt = valueIdComparators::getRangeForDatatype( idsBegin, idsEnd, Datatype::Int); const auto& rangeDouble = valueIdComparators::getRangeForDatatype( @@ -511,9 +556,11 @@ std::vector IsDatatypeExpression::evaluateImpl( // If this IsDatatypeExpression expression is negated, retrieve the // corresponding complementing ranges. if (isNegated_) { - relevantRanges = getComplementingRanges(relevantRanges, idsBegin, idsEnd); + return getComplementBlocksForRelevantIdRanges( + relevantRanges, input, valueIdsInput.begin(), valueIdsInput.end()); } - return getRelevantBlocksFromIdRanges(relevantRanges, input, valueIdsInput); + return getRelevantBlocksFromIdRanges(relevantRanges, input, + valueIdsInput.begin()); }; // SECTION LOGICAL OPERATIONS