diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index e0808817b1..8bfc1fa356 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -122,6 +122,10 @@ int main(int argc, char** argv) { "variables that are unbound in the query throw an exception. These " "queries technically are allowed by the SPARQL standard, but typically " "are the result of typos and unintended by the user"); + add("request-body-limit", + optionFactory.getProgramOption<"request-body-limit">(), + "Set the maximum size for the body of requests the server will process. " + "Set to zero to disable the limit."); po::variables_map optionsMap; try { diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index f1c80ac15d..77186d86dd 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -24,6 +24,7 @@ #include "util/ParseableDuration.h" #include "util/TypeIdentity.h" #include "util/TypeTraits.h" +#include "util/http/HttpServer.h" #include "util/http/HttpUtils.h" #include "util/http/websocket/MessageSender.h" diff --git a/src/engine/Server.h b/src/engine/Server.h index 8c483c8fbe..02228328eb 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -21,7 +21,7 @@ #include "util/MemorySize/MemorySize.h" #include "util/ParseException.h" #include "util/TypeTraits.h" -#include "util/http/HttpServer.h" +#include "util/http/HttpUtils.h" #include "util/http/streamable_body.h" #include "util/http/websocket/QueryHub.h" #include "util/json.h" @@ -84,11 +84,11 @@ class Server { /// the `WebSocketHandler` created for `HttpServer`. std::weak_ptr queryHub_; - net::static_thread_pool queryThreadPool_; - net::static_thread_pool updateThreadPool_{1}; + boost::asio::static_thread_pool queryThreadPool_; + boost::asio::static_thread_pool updateThreadPool_{1}; /// Executor with a single thread that is used to run timers asynchronously. - net::static_thread_pool timerExecutor_{1}; + boost::asio::static_thread_pool timerExecutor_{1}; template using Awaitable = boost::asio::awaitable; @@ -186,12 +186,10 @@ class Server { TimeLimit timeLimit); // Plan a parsed query. - Awaitable planQuery(net::static_thread_pool& thread_pool, - ParsedQuery&& operation, - const ad_utility::Timer& requestTimer, - TimeLimit timeLimit, - QueryExecutionContext& qec, - SharedCancellationHandle handle); + Awaitable planQuery( + boost::asio::static_thread_pool& thread_pool, ParsedQuery&& operation, + const ad_utility::Timer& requestTimer, TimeLimit timeLimit, + QueryExecutionContext& qec, SharedCancellationHandle handle); // Creates a `MessageSender` for the given operation. CPP_template_2(typename RequestT)( requires ad_utility::httpUtils::HttpRequest) @@ -218,7 +216,7 @@ class Server { /// its completion, wrapping the result. template > - Awaitable computeInNewThread(net::static_thread_pool& threadPool, + Awaitable computeInNewThread(boost::asio::static_thread_pool& threadPool, Function function, SharedCancellationHandle handle); @@ -273,7 +271,7 @@ class Server { /// Forbidden HTTP response if the change is not allowed. Return the new /// timeout otherwise. CPP_template_2(typename RequestT, typename ResponseT)( - requires ad_utility::httpUtils::HttpRequest) net:: + requires ad_utility::httpUtils::HttpRequest) boost::asio:: awaitable> verifyUserSubmittedQueryTimeout( std::optional userTimeout, bool accessTokenOk, const RequestT& request, ResponseT& send) const; diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index f05c23b081..858b47c472 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -63,6 +63,8 @@ inline auto& RuntimeParameters() { // Determines whether the cost estimate for a cached subtree should be // set to zero in query planning. Bool<"zero-cost-estimate-for-cached-subtree">{false}, + // Maximum size for the body of requests that the server will process. + MemorySizeParameter<"request-body-limit">{100_MB}, }; }(); return params; diff --git a/src/util/http/CMakeLists.txt b/src/util/http/CMakeLists.txt index 37d9a86a76..e2580eb4aa 100644 --- a/src/util/http/CMakeLists.txt +++ b/src/util/http/CMakeLists.txt @@ -2,7 +2,7 @@ add_subdirectory(HttpParser) add_library(mediaTypes MediaTypes.h MediaTypes.cpp) target_compile_options(mediaTypes PUBLIC -Wno-attributes) qlever_target_link_libraries(mediaTypes util) -add_library(http HttpServer.h HttpClient.h HttpClient.cpp HttpUtils.h HttpUtils.cpp UrlParser.h UrlParser.cpp "HttpParser/AcceptHeaderQleverVisitor.h" +add_library(http HttpServer.h HttpServer.cpp HttpClient.h HttpClient.cpp HttpUtils.h HttpUtils.cpp UrlParser.h UrlParser.cpp "HttpParser/AcceptHeaderQleverVisitor.h" websocket/WebSocketSession.cpp websocket/MessageSender.cpp websocket/QueryToSocketDistributor.cpp websocket/QueryHub.cpp websocket/UpdateFetcher.cpp) diff --git a/src/util/http/HttpClient.cpp b/src/util/http/HttpClient.cpp index e09808c3c3..1998d0f947 100644 --- a/src/util/http/HttpClient.cpp +++ b/src/util/http/HttpClient.cpp @@ -119,7 +119,7 @@ HttpOrHttpsResponse HttpClientImpl::sendRequest( beast::flat_buffer buffer; auto responseParser = std::make_unique>(); - responseParser->body_limit(std::numeric_limits::max()); + responseParser->body_limit(boost::none); wait(http::async_read_header(*(client->stream_), buffer, *responseParser, net::use_awaitable)); diff --git a/src/util/http/HttpServer.cpp b/src/util/http/HttpServer.cpp new file mode 100644 index 0000000000..80effe9d84 --- /dev/null +++ b/src/util/http/HttpServer.cpp @@ -0,0 +1,12 @@ +// Copyright 2021-2025, University of Freiburg, +// Chair of Algorithms and Data Structures +// Authors: Johannes Kalmbach +// Julian Mundhahs + +#include "util/http/HttpServer.h" + +#include "global/RuntimeParameters.h" + +ad_utility::MemorySize getRequestBodyLimit() { + return RuntimeParameters().get<"request-body-limit">(); +} diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index d146fa48c5..238b302d6c 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -1,7 +1,7 @@ - -// Copyright 2021, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach +// Copyright 2021-2025, University of Freiburg, +// Chair of Algorithms and Data Structures +// Authors: Johannes Kalmbach +// Julian Mundhahs #ifndef QLEVER_HTTPSERVER_H #define QLEVER_HTTPSERVER_H @@ -22,6 +22,10 @@ namespace http = beast::http; // from namespace net = boost::asio; // from using tcp = boost::asio::ip::tcp; // from +// Including the `RuntimeParameters` header is expensive. Move functions that +// require it into an implementation file. +ad_utility::MemorySize getRequestBodyLimit(); + /* * \brief A Simple HttpServer, based on Boost::Beast. It can be configured via * the mandatory HttpHandler parameter. @@ -242,14 +246,25 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( // Sessions might be reused for multiple request/response pairs. while (true) { + // Optional to temporarily store an error response. We can not `co_await` + // in a `catch` block and thus can not send the error response directly in + // the `catch`. + std::optional> errorResponse; + try { // Set the timeout for reading the next request. stream.expires_after(std::chrono::seconds(30)); - http::request req; - // Read a request - co_await http::async_read(stream, buffer, req, + // Read a request. Use a parser so that we can control the limit of the + // request size. + http::request_parser requestParser; + auto bodyLimit = getRequestBodyLimit().getBytes(); + requestParser.body_limit(bodyLimit == 0 + ? boost::none + : boost::optional(bodyLimit)); + co_await http::async_read(stream, buffer, requestParser, boost::asio::use_awaitable); + http::request req = requestParser.release(); // Let request be handled by `WebSocketSession` if the HTTP // request is a WebSocket handshake @@ -284,6 +299,13 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( // The stream has ended, gracefully close the connection. beast::error_code ec; stream.socket().shutdown(tcp::socket::shutdown_send, ec); + } else if (error.code() == http::error::body_limit) { + errorResponse = ad_utility::httpUtils::createHttpResponseFromString( + absl::StrCat( + R"({"error": "Request body size exceeds the allowed size ()", + getRequestBodyLimit().asString(), + R"(). Send a smaller request or set the allowed size via the "request-body-limit" run-time parameter."})"), + http::status::payload_too_large, ad_utility::MediaType::json); } else { // This is the error "The socket was closed due to a timeout" or if // the client stream ended unexpectedly. @@ -295,8 +317,12 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( logBeastError(error.code(), error.what()); } } - // In case of an error, close the session by returning. - co_return; + // If we have an error response send it outside the `catch` block. (We + // can not `co_await` in the `catch` block) Otherwise close the + // session by returning. + if (!errorResponse) { + co_return; + } } catch (const std::exception& error) { LOG(ERROR) << error.what() << std::endl; co_return; @@ -306,6 +332,12 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( << std::endl; co_return; } + + // If we have an error response, send it and then close the session by + // returning. + if (errorResponse.has_value()) { + co_return co_await sendMessage(std::move(errorResponse).value()); + } } } }; diff --git a/src/util/http/HttpUtils.h b/src/util/http/HttpUtils.h index 0fb2325313..fefc3eb7bf 100644 --- a/src/util/http/HttpUtils.h +++ b/src/util/http/HttpUtils.h @@ -93,7 +93,32 @@ CPP_concept HttpRequest = detail::isHttpRequest; * @brief Create a http::response from a string, which will become the body * @param body The body of the response * @param status The http status. - * @param request The request to which the response belongs. + * @param mediaType The media type of the response. + * @param keepAlive Whether to set the keep alive header and if to which value + * (default: don't set it). + * @param version The HTTP version (default: HTTP 1.1). + * @return A http::response which is ready to be sent. + */ +inline http::response createHttpResponseFromString( + std::string body, http::status status, MediaType mediaType, + std::optional keepAlive = std::nullopt, unsigned version = 11) { + http::response response{status, version}; + response.set(http::field::content_type, toString(mediaType)); + response.body() = std::move(body); + if (keepAlive.has_value()) { + response.keep_alive(keepAlive.value()); + } + // Set Content-Length and Transfer-Encoding. + response.prepare_payload(); + return response; +} + +/** + * @brief Create a http::response from a string, which will become the body + * @param body The body of the response + * @param status The http status. + * @param request The request to which the response belongs, keep alive and HTTP + * version are copied from it. * @param mediaType The media type of the response. * @return A http::response which is ready to be sent. */ @@ -104,13 +129,8 @@ CPP_template(typename RequestType)( const RequestType& request, MediaType mediaType) { - http::response response{status, request.version()}; - response.set(http::field::content_type, toString(mediaType)); - response.keep_alive(request.keep_alive()); - response.body() = std::move(body); - // Set Content-Length and Transfer-Encoding. - response.prepare_payload(); - return response; + return createHttpResponseFromString(std::move(body), status, mediaType, + request.keep_alive(), request.version()); } /// Create a HttpResponse from a string with status 200 OK. Otherwise behaves diff --git a/test/HttpTest.cpp b/test/HttpTest.cpp index 217d912f4b..e04e7c76ad 100644 --- a/test/HttpTest.cpp +++ b/test/HttpTest.cpp @@ -8,6 +8,8 @@ #include #include "HttpTestHelpers.h" +#include "global/RuntimeParameters.h" +#include "util/GTestHelpers.h" #include "util/http/HttpClient.h" #include "util/http/HttpServer.h" #include "util/http/HttpUtils.h" @@ -275,3 +277,103 @@ TEST(HttpServer, ErrorHandlingInSession) { EXPECT_THAT(s, HasSubstr("Weird exception not inheriting from std::exception")); } + +// Test the request body size limit +TEST(HttpServer, RequestBodySizeLimit) { + ad_utility::SharedCancellationHandle handle = + std::make_shared>(); + + TestHttpServer httpServer([](auto request, + auto&& send) -> boost::asio::awaitable { + std::string methodName; + switch (request.method()) { + case verb::get: + methodName = "GET"; + break; + case verb::post: + methodName = "POST"; + break; + default: + methodName = "OTHER"; + } + + auto response = [](std::string methodName, + std::string target) -> cppcoro::generator { + co_yield methodName; + co_yield "\n"; + co_yield target; + }(methodName, std::string(toStd(request.target()))); + + // Send a response. + co_await send(createOkResponse(std::move(response), request, + ad_utility::MediaType::textPlain)); + }); + + httpServer.runInOwnThread(); + + auto ResponseMetadata = [](const status status, string_view content_type) { + return AllOf( + AD_FIELD(HttpOrHttpsResponse, status_, testing::Eq(status)), + AD_FIELD(HttpOrHttpsResponse, contentType_, testing::Eq(content_type))); + }; + auto expect_ = [&httpServer](const ad_utility::MemorySize& requestBodySize, + const std::string& expectedBody, + const auto& responseMatcher) { + ad_utility::SharedCancellationHandle handle = + std::make_shared>(); + + auto httpClient = std::make_unique( + "localhost", std::to_string(httpServer.getPort())); + + const std::string body(requestBodySize.getBytes(), 'f'); + + auto response = HttpClient::sendRequest( + std::move(httpClient), verb::post, "localhost", "target", handle, body); + EXPECT_THAT(response, responseMatcher); + EXPECT_THAT(toString(std::move(response.body_)), expectedBody); + }; + + auto expectRequestFails = [&ResponseMetadata, &expect_]( + const ad_utility::MemorySize& requestBodySize) { + const ad_utility::MemorySize currentLimit = + RuntimeParameters().get<"request-body-limit">(); + // For large requests we get an exception while writing to the request + // stream when going over the limit. For small requests we get the response + // normally. We would need the HttpClient to return the response even + // if it couldn't send the request fully in that case. + expect_( + requestBodySize, + R"({"error": "Request body size exceeds the allowed size ()" + + currentLimit.asString() + + R"(). Send a smaller request or set the allowed size via the "request-body-limit" run-time parameter."})", + ResponseMetadata(status::payload_too_large, "application/json")); + }; + auto expectRequest = [&expect_, &ResponseMetadata]( + const ad_utility::MemorySize requestBodySize) { + expect_(requestBodySize, "POST\ntarget", + ResponseMetadata(status::ok, "text/plain")); + }; + constexpr auto testingRequestBodyLimit = 50_kB; + + // Set a smaller limit for testing. The default of 100 MB is quite large. + RuntimeParameters().set("request-body-limit", 50_kB .asString()); + // Requests with bodies smaller than the request body limit are processed. + expectRequest(3_B); + // Exactly the limit is allowed. + expectRequest(testingRequestBodyLimit); + // Larger than the limit is forbidden. + expectRequestFails(testingRequestBodyLimit + 1_B); + + // Setting a smaller request-body limit. + RuntimeParameters().set("request-body-limit", 1_B .asString()); + expectRequestFails(3_B); + // Only the request body size counts. The empty body is allowed even if the + // body is limited to 1 byte. + expectRequest(0_B); + + // Disable the request body limit, by setting it to 0. + RuntimeParameters().set("request-body-limit", 0_B .asString()); + // Arbitrarily large requests are now allowed. + expectRequest(10_kB); + expectRequest(5_MB); +}