From d37d1c59f50e32ab5eadce54264ed5005a6eed79 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Tue, 21 Jan 2025 20:09:45 +0100 Subject: [PATCH 1/9] Increase the request size limit --- src/util/http/HttpServer.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 554714732a..b53339d261 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -233,11 +233,15 @@ class HttpServer { try { // Set the timeout for reading the next request. stream.expires_after(std::chrono::seconds(30)); - http::request req; - // Read a request - co_await http::async_read(stream, buffer, req, + // Read a request. Use a parser so that we can control the limit of the + // request size. + beast::http::request_parser + requestParser; + requestParser.body_limit(100'000'000); // 100 MB + co_await http::async_read(stream, buffer, requestParser, boost::asio::use_awaitable); + http::request req = requestParser.get(); // Let request be handled by `WebSocketSession` if the HTTP // request is a WebSocket handshake From 39f3775ca00ce0a2a77d6cfe98bcbf90651b067f Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Tue, 18 Feb 2025 19:47:23 +0100 Subject: [PATCH 2/9] set the request body size limit via a runtime parameter --- src/ServerMain.cpp | 4 ++++ src/global/RuntimeParameters.h | 2 ++ src/util/http/HttpServer.h | 10 +++++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index e0808817b1..8bfc1fa356 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -122,6 +122,10 @@ int main(int argc, char** argv) { "variables that are unbound in the query throw an exception. These " "queries technically are allowed by the SPARQL standard, but typically " "are the result of typos and unintended by the user"); + add("request-body-limit", + optionFactory.getProgramOption<"request-body-limit">(), + "Set the maximum size for the body of requests the server will process. " + "Set to zero to disable the limit."); po::variables_map optionsMap; try { diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index f61705e7fc..1e230eff04 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -60,6 +60,8 @@ inline auto& RuntimeParameters() { // its size estimate will be the size of the block divided by this // value. SizeT<"small-index-scan-size-estimate-divisor">{5}, + // Maximum size for the body of requests that the server will process. + MemorySizeParameter<"request-body-limit">{1_MB}, }; }(); return params; diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index b53339d261..92fb0ed352 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -10,6 +10,7 @@ #include #include "absl/cleanup/cleanup.h" +#include "global/RuntimeParameters.h" #include "util/Exception.h" #include "util/Log.h" #include "util/http/HttpUtils.h" @@ -236,9 +237,12 @@ class HttpServer { // Read a request. Use a parser so that we can control the limit of the // request size. - beast::http::request_parser - requestParser; - requestParser.body_limit(100'000'000); // 100 MB + http::request_parser requestParser; + auto bodyLimit = + RuntimeParameters().get<"request-body-limit">().getBytes(); + requestParser.body_limit(bodyLimit == 0 + ? boost::none + : boost::optional(bodyLimit)); co_await http::async_read(stream, buffer, requestParser, boost::asio::use_awaitable); http::request req = requestParser.get(); From bdb6694a9cb7b96803c6d83fd0213561fff46442 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Tue, 18 Feb 2025 20:24:10 +0100 Subject: [PATCH 3/9] move the request out from boost's parser --- src/util/http/HttpServer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 92fb0ed352..23e2c8742e 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -245,7 +245,7 @@ class HttpServer { : boost::optional(bodyLimit)); co_await http::async_read(stream, buffer, requestParser, boost::asio::use_awaitable); - http::request req = requestParser.get(); + http::request req = requestParser.release(); // Let request be handled by `WebSocketSession` if the HTTP // request is a WebSocket handshake From dd36afff83193dc1cf420e65f7057d9ed889c4b8 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Fri, 21 Feb 2025 21:43:06 +0100 Subject: [PATCH 4/9] include HttpServer.h only in Server.cpp --- src/engine/Server.cpp | 1 + src/engine/Server.h | 22 ++++++++++------------ src/util/http/HttpServer.h | 8 ++++---- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/engine/Server.cpp b/src/engine/Server.cpp index f1c80ac15d..77186d86dd 100644 --- a/src/engine/Server.cpp +++ b/src/engine/Server.cpp @@ -24,6 +24,7 @@ #include "util/ParseableDuration.h" #include "util/TypeIdentity.h" #include "util/TypeTraits.h" +#include "util/http/HttpServer.h" #include "util/http/HttpUtils.h" #include "util/http/websocket/MessageSender.h" diff --git a/src/engine/Server.h b/src/engine/Server.h index 8c483c8fbe..02228328eb 100644 --- a/src/engine/Server.h +++ b/src/engine/Server.h @@ -21,7 +21,7 @@ #include "util/MemorySize/MemorySize.h" #include "util/ParseException.h" #include "util/TypeTraits.h" -#include "util/http/HttpServer.h" +#include "util/http/HttpUtils.h" #include "util/http/streamable_body.h" #include "util/http/websocket/QueryHub.h" #include "util/json.h" @@ -84,11 +84,11 @@ class Server { /// the `WebSocketHandler` created for `HttpServer`. std::weak_ptr queryHub_; - net::static_thread_pool queryThreadPool_; - net::static_thread_pool updateThreadPool_{1}; + boost::asio::static_thread_pool queryThreadPool_; + boost::asio::static_thread_pool updateThreadPool_{1}; /// Executor with a single thread that is used to run timers asynchronously. - net::static_thread_pool timerExecutor_{1}; + boost::asio::static_thread_pool timerExecutor_{1}; template using Awaitable = boost::asio::awaitable; @@ -186,12 +186,10 @@ class Server { TimeLimit timeLimit); // Plan a parsed query. - Awaitable planQuery(net::static_thread_pool& thread_pool, - ParsedQuery&& operation, - const ad_utility::Timer& requestTimer, - TimeLimit timeLimit, - QueryExecutionContext& qec, - SharedCancellationHandle handle); + Awaitable planQuery( + boost::asio::static_thread_pool& thread_pool, ParsedQuery&& operation, + const ad_utility::Timer& requestTimer, TimeLimit timeLimit, + QueryExecutionContext& qec, SharedCancellationHandle handle); // Creates a `MessageSender` for the given operation. CPP_template_2(typename RequestT)( requires ad_utility::httpUtils::HttpRequest) @@ -218,7 +216,7 @@ class Server { /// its completion, wrapping the result. template > - Awaitable computeInNewThread(net::static_thread_pool& threadPool, + Awaitable computeInNewThread(boost::asio::static_thread_pool& threadPool, Function function, SharedCancellationHandle handle); @@ -273,7 +271,7 @@ class Server { /// Forbidden HTTP response if the change is not allowed. Return the new /// timeout otherwise. CPP_template_2(typename RequestT, typename ResponseT)( - requires ad_utility::httpUtils::HttpRequest) net:: + requires ad_utility::httpUtils::HttpRequest) boost::asio:: awaitable> verifyUserSubmittedQueryTimeout( std::optional userTimeout, bool accessTokenOk, const RequestT& request, ResponseT& send) const; diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 7530d5c6a5..206339773c 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -1,7 +1,7 @@ - -// Copyright 2021, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach +// Copyright 2021-2025, University of Freiburg, +// Chair of Algorithms and Data Structures +// Authors: Johannes Kalmbach +// Julian Mundhahs Date: Fri, 21 Feb 2025 21:47:14 +0100 Subject: [PATCH 5/9] set default request-body-limit to 100mb --- src/global/RuntimeParameters.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 93e328fe66..858b47c472 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -64,7 +64,7 @@ inline auto& RuntimeParameters() { // set to zero in query planning. Bool<"zero-cost-estimate-for-cached-subtree">{false}, // Maximum size for the body of requests that the server will process. - MemorySizeParameter<"request-body-limit">{1_MB}, + MemorySizeParameter<"request-body-limit">{100_MB}, }; }(); return params; From 602895540e83ca0a6a214646343c0dd06e303929 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Sat, 22 Feb 2025 21:25:24 +0100 Subject: [PATCH 6/9] add error handling for too large request bodies --- src/util/http/HttpServer.h | 26 ++++++++++++++++++++++++-- src/util/http/HttpUtils.h | 36 ++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 206339773c..795559952e 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -243,6 +243,11 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( // Sessions might be reused for multiple request/response pairs. while (true) { + // Optional to temporarily store an error response. We can not `co_await` + // in a `catch` block and thus can not send the error response directly in + // the `catch`. + std::optional> errorResponse; + try { // Set the timeout for reading the next request. stream.expires_after(std::chrono::seconds(30)); @@ -292,6 +297,13 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( // The stream has ended, gracefully close the connection. beast::error_code ec; stream.socket().shutdown(tcp::socket::shutdown_send, ec); + } else if (error.code() == http::error::body_limit) { + errorResponse = ad_utility::httpUtils::createHttpResponseFromString( + absl::StrCat( + R"({"error": "Request body size exceeds the allowed size ()", + RuntimeParameters().get<"request-body-limit">().asString(), + R"(). Send a smaller request or set the allowed size via the "request-body-limit" run-time parameter."})"), + http::status::payload_too_large, ad_utility::MediaType::json); } else { // This is the error "The socket was closed due to a timeout" or if // the client stream ended unexpectedly. @@ -303,8 +315,12 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( logBeastError(error.code(), error.what()); } } - // In case of an error, close the session by returning. - co_return; + // If we have an error response send it outside the `catch` block. (We + // can not `co_await` in the `catch` block) Otherwise close the + // session by returning. + if (!errorResponse) { + co_return; + } } catch (const std::exception& error) { LOG(ERROR) << error.what() << std::endl; co_return; @@ -314,6 +330,12 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( << std::endl; co_return; } + + // If we have an error response, send it and then close the session by + // returning. + if (errorResponse.has_value()) { + co_return co_await sendMessage(std::move(errorResponse).value()); + } } } }; diff --git a/src/util/http/HttpUtils.h b/src/util/http/HttpUtils.h index 0fb2325313..fefc3eb7bf 100644 --- a/src/util/http/HttpUtils.h +++ b/src/util/http/HttpUtils.h @@ -93,7 +93,32 @@ CPP_concept HttpRequest = detail::isHttpRequest; * @brief Create a http::response from a string, which will become the body * @param body The body of the response * @param status The http status. - * @param request The request to which the response belongs. + * @param mediaType The media type of the response. + * @param keepAlive Whether to set the keep alive header and if to which value + * (default: don't set it). + * @param version The HTTP version (default: HTTP 1.1). + * @return A http::response which is ready to be sent. + */ +inline http::response createHttpResponseFromString( + std::string body, http::status status, MediaType mediaType, + std::optional keepAlive = std::nullopt, unsigned version = 11) { + http::response response{status, version}; + response.set(http::field::content_type, toString(mediaType)); + response.body() = std::move(body); + if (keepAlive.has_value()) { + response.keep_alive(keepAlive.value()); + } + // Set Content-Length and Transfer-Encoding. + response.prepare_payload(); + return response; +} + +/** + * @brief Create a http::response from a string, which will become the body + * @param body The body of the response + * @param status The http status. + * @param request The request to which the response belongs, keep alive and HTTP + * version are copied from it. * @param mediaType The media type of the response. * @return A http::response which is ready to be sent. */ @@ -104,13 +129,8 @@ CPP_template(typename RequestType)( const RequestType& request, MediaType mediaType) { - http::response response{status, request.version()}; - response.set(http::field::content_type, toString(mediaType)); - response.keep_alive(request.keep_alive()); - response.body() = std::move(body); - // Set Content-Length and Transfer-Encoding. - response.prepare_payload(); - return response; + return createHttpResponseFromString(std::move(body), status, mediaType, + request.keep_alive(), request.version()); } /// Create a HttpResponse from a string with status 200 OK. Otherwise behaves From 74a1e7c910833320ce5c4e599a1283c4fc64e8a6 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Mon, 24 Feb 2025 17:30:00 +0100 Subject: [PATCH 7/9] fix copyright header --- src/util/http/HttpServer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 795559952e..1d8d129844 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -1,7 +1,7 @@ // Copyright 2021-2025, University of Freiburg, // Chair of Algorithms and Data Structures // Authors: Johannes Kalmbach -// Julian Mundhahs #ifndef QLEVER_HTTPSERVER_H #define QLEVER_HTTPSERVER_H From 45bedbbe87541f451640b18bbacdf8d55b2c5889 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Mon, 24 Feb 2025 17:30:21 +0100 Subject: [PATCH 8/9] move inclusion of expensive RuntimeParameters header to implementation file --- src/util/http/CMakeLists.txt | 2 +- src/util/http/HttpServer.cpp | 12 ++++++++++++ src/util/http/HttpServer.h | 10 ++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) create mode 100644 src/util/http/HttpServer.cpp diff --git a/src/util/http/CMakeLists.txt b/src/util/http/CMakeLists.txt index 37d9a86a76..e2580eb4aa 100644 --- a/src/util/http/CMakeLists.txt +++ b/src/util/http/CMakeLists.txt @@ -2,7 +2,7 @@ add_subdirectory(HttpParser) add_library(mediaTypes MediaTypes.h MediaTypes.cpp) target_compile_options(mediaTypes PUBLIC -Wno-attributes) qlever_target_link_libraries(mediaTypes util) -add_library(http HttpServer.h HttpClient.h HttpClient.cpp HttpUtils.h HttpUtils.cpp UrlParser.h UrlParser.cpp "HttpParser/AcceptHeaderQleverVisitor.h" +add_library(http HttpServer.h HttpServer.cpp HttpClient.h HttpClient.cpp HttpUtils.h HttpUtils.cpp UrlParser.h UrlParser.cpp "HttpParser/AcceptHeaderQleverVisitor.h" websocket/WebSocketSession.cpp websocket/MessageSender.cpp websocket/QueryToSocketDistributor.cpp websocket/QueryHub.cpp websocket/UpdateFetcher.cpp) diff --git a/src/util/http/HttpServer.cpp b/src/util/http/HttpServer.cpp new file mode 100644 index 0000000000..80effe9d84 --- /dev/null +++ b/src/util/http/HttpServer.cpp @@ -0,0 +1,12 @@ +// Copyright 2021-2025, University of Freiburg, +// Chair of Algorithms and Data Structures +// Authors: Johannes Kalmbach +// Julian Mundhahs + +#include "util/http/HttpServer.h" + +#include "global/RuntimeParameters.h" + +ad_utility::MemorySize getRequestBodyLimit() { + return RuntimeParameters().get<"request-body-limit">(); +} diff --git a/src/util/http/HttpServer.h b/src/util/http/HttpServer.h index 1d8d129844..238b302d6c 100644 --- a/src/util/http/HttpServer.h +++ b/src/util/http/HttpServer.h @@ -10,7 +10,6 @@ #include #include "absl/cleanup/cleanup.h" -#include "global/RuntimeParameters.h" #include "util/Exception.h" #include "util/Log.h" #include "util/http/HttpUtils.h" @@ -23,6 +22,10 @@ namespace http = beast::http; // from namespace net = boost::asio; // from using tcp = boost::asio::ip::tcp; // from +// Including the `RuntimeParameters` header is expensive. Move functions that +// require it into an implementation file. +ad_utility::MemorySize getRequestBodyLimit(); + /* * \brief A Simple HttpServer, based on Boost::Beast. It can be configured via * the mandatory HttpHandler parameter. @@ -255,8 +258,7 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( // Read a request. Use a parser so that we can control the limit of the // request size. http::request_parser requestParser; - auto bodyLimit = - RuntimeParameters().get<"request-body-limit">().getBytes(); + auto bodyLimit = getRequestBodyLimit().getBytes(); requestParser.body_limit(bodyLimit == 0 ? boost::none : boost::optional(bodyLimit)); @@ -301,7 +303,7 @@ CPP_template(typename HttpHandler, typename WebSocketHandler)( errorResponse = ad_utility::httpUtils::createHttpResponseFromString( absl::StrCat( R"({"error": "Request body size exceeds the allowed size ()", - RuntimeParameters().get<"request-body-limit">().asString(), + getRequestBodyLimit().asString(), R"(). Send a smaller request or set the allowed size via the "request-body-limit" run-time parameter."})"), http::status::payload_too_large, ad_utility::MediaType::json); } else { From 48894984f648de1a051ab28e644d52d41fe1aac4 Mon Sep 17 00:00:00 2001 From: Julian Mundhahs Date: Tue, 25 Feb 2025 17:49:57 +0100 Subject: [PATCH 9/9] implement some basic tests --- src/util/http/HttpClient.cpp | 2 +- test/HttpTest.cpp | 102 +++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/src/util/http/HttpClient.cpp b/src/util/http/HttpClient.cpp index e09808c3c3..1998d0f947 100644 --- a/src/util/http/HttpClient.cpp +++ b/src/util/http/HttpClient.cpp @@ -119,7 +119,7 @@ HttpOrHttpsResponse HttpClientImpl::sendRequest( beast::flat_buffer buffer; auto responseParser = std::make_unique>(); - responseParser->body_limit(std::numeric_limits::max()); + responseParser->body_limit(boost::none); wait(http::async_read_header(*(client->stream_), buffer, *responseParser, net::use_awaitable)); diff --git a/test/HttpTest.cpp b/test/HttpTest.cpp index 217d912f4b..e04e7c76ad 100644 --- a/test/HttpTest.cpp +++ b/test/HttpTest.cpp @@ -8,6 +8,8 @@ #include #include "HttpTestHelpers.h" +#include "global/RuntimeParameters.h" +#include "util/GTestHelpers.h" #include "util/http/HttpClient.h" #include "util/http/HttpServer.h" #include "util/http/HttpUtils.h" @@ -275,3 +277,103 @@ TEST(HttpServer, ErrorHandlingInSession) { EXPECT_THAT(s, HasSubstr("Weird exception not inheriting from std::exception")); } + +// Test the request body size limit +TEST(HttpServer, RequestBodySizeLimit) { + ad_utility::SharedCancellationHandle handle = + std::make_shared>(); + + TestHttpServer httpServer([](auto request, + auto&& send) -> boost::asio::awaitable { + std::string methodName; + switch (request.method()) { + case verb::get: + methodName = "GET"; + break; + case verb::post: + methodName = "POST"; + break; + default: + methodName = "OTHER"; + } + + auto response = [](std::string methodName, + std::string target) -> cppcoro::generator { + co_yield methodName; + co_yield "\n"; + co_yield target; + }(methodName, std::string(toStd(request.target()))); + + // Send a response. + co_await send(createOkResponse(std::move(response), request, + ad_utility::MediaType::textPlain)); + }); + + httpServer.runInOwnThread(); + + auto ResponseMetadata = [](const status status, string_view content_type) { + return AllOf( + AD_FIELD(HttpOrHttpsResponse, status_, testing::Eq(status)), + AD_FIELD(HttpOrHttpsResponse, contentType_, testing::Eq(content_type))); + }; + auto expect_ = [&httpServer](const ad_utility::MemorySize& requestBodySize, + const std::string& expectedBody, + const auto& responseMatcher) { + ad_utility::SharedCancellationHandle handle = + std::make_shared>(); + + auto httpClient = std::make_unique( + "localhost", std::to_string(httpServer.getPort())); + + const std::string body(requestBodySize.getBytes(), 'f'); + + auto response = HttpClient::sendRequest( + std::move(httpClient), verb::post, "localhost", "target", handle, body); + EXPECT_THAT(response, responseMatcher); + EXPECT_THAT(toString(std::move(response.body_)), expectedBody); + }; + + auto expectRequestFails = [&ResponseMetadata, &expect_]( + const ad_utility::MemorySize& requestBodySize) { + const ad_utility::MemorySize currentLimit = + RuntimeParameters().get<"request-body-limit">(); + // For large requests we get an exception while writing to the request + // stream when going over the limit. For small requests we get the response + // normally. We would need the HttpClient to return the response even + // if it couldn't send the request fully in that case. + expect_( + requestBodySize, + R"({"error": "Request body size exceeds the allowed size ()" + + currentLimit.asString() + + R"(). Send a smaller request or set the allowed size via the "request-body-limit" run-time parameter."})", + ResponseMetadata(status::payload_too_large, "application/json")); + }; + auto expectRequest = [&expect_, &ResponseMetadata]( + const ad_utility::MemorySize requestBodySize) { + expect_(requestBodySize, "POST\ntarget", + ResponseMetadata(status::ok, "text/plain")); + }; + constexpr auto testingRequestBodyLimit = 50_kB; + + // Set a smaller limit for testing. The default of 100 MB is quite large. + RuntimeParameters().set("request-body-limit", 50_kB .asString()); + // Requests with bodies smaller than the request body limit are processed. + expectRequest(3_B); + // Exactly the limit is allowed. + expectRequest(testingRequestBodyLimit); + // Larger than the limit is forbidden. + expectRequestFails(testingRequestBodyLimit + 1_B); + + // Setting a smaller request-body limit. + RuntimeParameters().set("request-body-limit", 1_B .asString()); + expectRequestFails(3_B); + // Only the request body size counts. The empty body is allowed even if the + // body is limited to 1 byte. + expectRequest(0_B); + + // Disable the request body limit, by setting it to 0. + RuntimeParameters().set("request-body-limit", 0_B .asString()); + // Arbitrarily large requests are now allowed. + expectRequest(10_kB); + expectRequest(5_MB); +}