Skip to content

Commit

Permalink
Merge branch 'master' into variousUpdateImprovements
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/engine/Server.cpp
#	src/engine/Server.h
  • Loading branch information
Qup42 committed Feb 11, 2025
2 parents b735786 + 949e7db commit bece917
Show file tree
Hide file tree
Showing 31 changed files with 1,054 additions and 912 deletions.
15 changes: 13 additions & 2 deletions src/engine/AddCombinedRowToTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,24 @@
#include <cstdint>
#include <vector>

#include "backports/concepts.h"
#include "engine/idTable/IdTable.h"
#include "global/Id.h"
#include "util/CancellationHandle.h"
#include "util/Exception.h"
#include "util/TransparentFunctors.h"

namespace ad_utility {

namespace detail::concepts {
template <typename T>
CPP_requires(HasAsStaticView,
requires(T& table)(table.template asStaticView<0>()));

template <typename T>
CPP_requires(HasGetLocalVocab, requires(T& table)(table.getLocalVocab()));
} // namespace detail::concepts

// This class handles the efficient writing of the results of a JOIN operation
// to a column-based `IdTable`. The underlying assumption is that in both inputs
// the join columns are the first columns. On each call to `addRow`, we only
Expand Down Expand Up @@ -130,7 +141,7 @@ class AddCombinedRowToIdTable {
// `IdTableView<0>`. Identity for `IdTableView<0>`.
template <typename T>
static IdTableView<0> toView(const T& table) {
if constexpr (requires { table.template asStaticView<0>(); }) {
if constexpr (CPP_requires_ref(detail::concepts::HasAsStaticView, T)) {
return table.template asStaticView<0>();
} else {
return table;
Expand All @@ -142,7 +153,7 @@ class AddCombinedRowToIdTable {
template <typename T>
void mergeVocab(const T& table, const LocalVocab*& currentVocab) {
AD_CORRECTNESS_CHECK(currentVocab == nullptr);
if constexpr (requires { table.getLocalVocab(); }) {
if constexpr (CPP_requires_ref(detail::concepts::HasGetLocalVocab, T)) {
currentVocab = &table.getLocalVocab();
mergedVocab_.mergeWith(std::span{&table.getLocalVocab(), 1});
}
Expand Down
6 changes: 5 additions & 1 deletion src/engine/ExportQueryExecutionTrees.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ExportQueryExecutionTrees.h"

#include <absl/strings/str_cat.h>
#include <absl/strings/str_replace.h>

#include <ranges>

Expand Down Expand Up @@ -590,8 +591,11 @@ ExportQueryExecutionTrees::selectQueryResultToStream(
co_yield optionalStringAndType.value().first;
}
}
co_yield j + 1 < selectedColumnIndices.size() ? separator : '\n';
if (j + 1 < selectedColumnIndices.size()) {
co_yield separator;
}
}
co_yield '\n';
cancellationHandle->throwIfCancelled();
}
}
Expand Down
9 changes: 7 additions & 2 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,15 @@ QueryExecutionTree::selectedVariablesToColumnIndices(

// _____________________________________________________________________________
size_t QueryExecutionTree::getCostEstimate() {
if (cachedResult_) {
// result is pinned in cache. Nothing to compute
// If the result is cached and `zero-cost-estimate-for-cached-subtrees` is set
// to `true`, we set the cost estimate to zero.
if (cachedResult_ &&
RuntimeParameters().get<"zero-cost-estimate-for-cached-subtree">()) {
return 0;
}

// Otherwise, we return the cost estimate of the root operation. For index
// scans, we assume one unit of work per result row.
if (getRootOperation()->isIndexScanWithNumVariables(1)) {
return getSizeEstimate();
} else {
Expand Down
209 changes: 209 additions & 0 deletions src/engine/SPARQLProtocol.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
// Copyright 2025, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Julian Mundhahs <[email protected]>

#pragma once

#include "util/Algorithm.h"
#include "util/TypeIdentity.h"
#include "util/TypeTraits.h"
#include "util/http/HttpUtils.h"
#include "util/http/UrlParser.h"
#include "util/http/beast.h"

class SPARQLProtocol {
FRIEND_TEST(SPARQLProtocolTest, extractAccessToken);

public:
/// Parse the path and URL parameters from the given request. Supports both
/// GET and POST request according to the SPARQL 1.1 standard.
static ad_utility::url_parser::ParsedRequest parseHttpRequest(
const ad_utility::httpUtils::HttpRequest auto& request) {
using namespace ad_utility::url_parser::sparqlOperation;
using namespace ad_utility::use_type_identity;
namespace http = boost::beast::http;
// For an HTTP request, `request.target()` yields the HTTP Request-URI.
// This is a concatenation of the URL path and the query strings.
auto parsedUrl =
ad_utility::url_parser::parseRequestTarget(request.target());
ad_utility::url_parser::ParsedRequest parsedRequest{
std::move(parsedUrl.path_), std::nullopt,
std::move(parsedUrl.parameters_), None{}};

// Some valid requests (e.g. QLever's custom commands like retrieving index
// statistics) don't have a query. So an empty operation is not necessarily
// an error.
auto setOperationIfSpecifiedInParams = [&parsedRequest]<typename Operation>(
TI<Operation>,
string_view paramName) {
auto operation = ad_utility::url_parser::getParameterCheckAtMostOnce(
parsedRequest.parameters_, paramName);
if (operation.has_value()) {
parsedRequest.operation_ = Operation{operation.value(), {}};
parsedRequest.parameters_.erase(paramName);
}
};
auto addToDatasetClausesIfOperationIs =
[&parsedRequest]<typename Operation>(
TI<Operation>, const std::string& key, bool isNamed) {
if (Operation* op =
std::get_if<Operation>(&parsedRequest.operation_)) {
ad_utility::appendVector(
op->datasetClauses_,
ad_utility::url_parser::parseDatasetClausesFrom(
parsedRequest.parameters_, key, isNamed));
}
};
auto addDatasetClauses = [&addToDatasetClausesIfOperationIs] {
addToDatasetClausesIfOperationIs(ti<Query>, "default-graph-uri", false);
addToDatasetClausesIfOperationIs(ti<Query>, "named-graph-uri", true);
addToDatasetClausesIfOperationIs(ti<Update>, "using-graph-uri", false);
addToDatasetClausesIfOperationIs(ti<Update>, "using-named-graph-uri",
true);
};
auto extractAccessTokenFromRequest = [&parsedRequest, &request]() {
parsedRequest.accessToken_ =
extractAccessToken(request, parsedRequest.parameters_);
};

if (request.method() == http::verb::get) {
setOperationIfSpecifiedInParams(ti<Query>, "query");
addDatasetClauses();
extractAccessTokenFromRequest();

if (parsedRequest.parameters_.contains("update")) {
throw std::runtime_error(
"SPARQL Update is not allowed as GET request.");
}
return parsedRequest;
}
if (request.method() == http::verb::post) {
// For a POST request, the content type *must* be either
// "application/x-www-form-urlencoded" (1), "application/sparql-query"
// (2) or "application/sparql-update" (3).
//
// (1) Section 2.1.2: The body of the POST request contains *all*
// parameters (including the query or update) in an encoded form (just
// like in the part of a GET request after the "?").
//
// (2) Section 2.1.3: The body of the POST request contains *only* the
// unencoded SPARQL query. There may be additional HTTP query parameters.
//
// (3) Section 2.2.2: The body of the POST request contains *only* the
// unencoded SPARQL update. There may be additional HTTP query parameters.
//
// Reference: https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321
std::string_view contentType = request.base()[http::field::content_type];
LOG(DEBUG) << "Content-type: \"" << contentType << "\"" << std::endl;
static constexpr std::string_view contentTypeUrlEncoded =
"application/x-www-form-urlencoded";
static constexpr std::string_view contentTypeSparqlQuery =
"application/sparql-query";
static constexpr std::string_view contentTypeSparqlUpdate =
"application/sparql-update";

// Note: For simplicity we only check via `starts_with`. This ignores
// additional parameters like `application/sparql-query;charset=utf8`. We
// currently always expect UTF-8.
// TODO<joka921> Implement more complete parsing that allows the checking
// of these parameters.
if (contentType.starts_with(contentTypeUrlEncoded)) {
// All parameters must be included in the request body for URL-encoded
// POST. The HTTP query string parameters must be empty. See SPARQL 1.1
// Protocol Sections 2.1.2
if (!parsedRequest.parameters_.empty()) {
throw std::runtime_error(
"URL-encoded POST requests must not contain query parameters in "
"the URL.");
}

// Set the url-encoded parameters from the request body.
// Note: previously we used `boost::urls::parse_query`, but that
// function doesn't unescape the `+` which encodes a space character.
// The following workaround of making the url-encoded parameters a
// complete relative url and parsing this URL seems to work. Note: We
// have to bind the result of `StrCat` to an explicit variable, as the
// `boost::urls` parsing routines only give back a view, which otherwise
// would be dangling.
auto bodyAsQuery = absl::StrCat("/?", request.body());
auto query = boost::urls::parse_origin_form(bodyAsQuery);
if (!query) {
throw std::runtime_error(
"Invalid URL-encoded POST request, body was: " + request.body());
}
parsedRequest.parameters_ =
ad_utility::url_parser::paramsToMap(query->params());

if (parsedRequest.parameters_.contains("query") &&
parsedRequest.parameters_.contains("update")) {
throw std::runtime_error(
R"(Request must only contain one of "query" and "update".)");
}
setOperationIfSpecifiedInParams(ti<Query>, "query");
setOperationIfSpecifiedInParams(ti<Update>, "update");
addDatasetClauses();
// We parse the access token from the url-encoded parameters in the
// body. The URL parameters must be empty for URL-encoded POST (see
// above).
extractAccessTokenFromRequest();

return parsedRequest;
}
if (contentType.starts_with(contentTypeSparqlQuery)) {
parsedRequest.operation_ = Query{request.body(), {}};
addDatasetClauses();
extractAccessTokenFromRequest();
return parsedRequest;
}
if (contentType.starts_with(contentTypeSparqlUpdate)) {
parsedRequest.operation_ = Update{request.body(), {}};
addDatasetClauses();
extractAccessTokenFromRequest();
return parsedRequest;
}
throw std::runtime_error(absl::StrCat(
"POST request with content type \"", contentType,
"\" not supported (must be \"", contentTypeUrlEncoded, "\", \"",
contentTypeSparqlQuery, "\" or \"", contentTypeSparqlUpdate, "\")"));
}
std::ostringstream requestMethodName;
requestMethodName << request.method();
throw std::runtime_error(
absl::StrCat("Request method \"", requestMethodName.str(),
"\" not supported (has to be GET or POST)"));
};

private:
static std::optional<std::string> extractAccessToken(
const ad_utility::httpUtils::HttpRequest auto& request,
const ad_utility::url_parser::ParamValueMap& params) {
namespace http = boost::beast::http;
std::optional<std::string> tokenFromAuthorizationHeader;
std::optional<std::string> tokenFromParameter;
if (request.find(http::field::authorization) != request.end()) {
string_view authorization = request[http::field::authorization];
const std::string prefix = "Bearer ";
if (!authorization.starts_with(prefix)) {
throw std::runtime_error(absl::StrCat(
"Authorization header doesn't start with \"", prefix, "\"."));
}
authorization.remove_prefix(prefix.length());
tokenFromAuthorizationHeader = std::string(authorization);
}
if (params.contains("access-token")) {
tokenFromParameter = ad_utility::url_parser::getParameterCheckAtMostOnce(
params, "access-token");
}
// If both are specified, they must be equal. This way there is no hidden
// precedence.
if (tokenFromAuthorizationHeader && tokenFromParameter &&
tokenFromAuthorizationHeader != tokenFromParameter) {
throw std::runtime_error(
"Access token is specified both in the `Authorization` header and by "
"the `access-token` parameter, but they are not the same");
}
return tokenFromAuthorizationHeader
? std::move(tokenFromAuthorizationHeader)
: std::move(tokenFromParameter);
};
};
Loading

0 comments on commit bece917

Please sign in to comment.