diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index 96731979cd2..ef0bc6333ed 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -1233,8 +1233,10 @@ mptcp .. ts:cv:: CONFIG proxy.config.http.strict_uri_parsing INT 0 - Enables (``1``) or disables (``0``) |TS| to return a 400 Bad Request - if client's request URI includes character which is not RFC 3986 compliant + Takes a value between 0 and 2. ``0`` disables strict_uri_parsing. Any character can appears + in the URI. ``1`` causes |TS| to return 400 Bad Request + if client's request URI includes character which is not RFC 3986 compliant. ``2`` directs |TS| + to reject the clients request if it contains whitespace or non-printable characters. .. ts:cv:: CONFIG proxy.config.http.errors.log_error_pages INT 1 :reloadable: diff --git a/mgmt/RecordsConfig.cc b/mgmt/RecordsConfig.cc index d5e089c3a5f..706b94fcee5 100644 --- a/mgmt/RecordsConfig.cc +++ b/mgmt/RecordsConfig.cc @@ -362,7 +362,7 @@ static const RecordElement RecordsConfig[] = , {RECT_CONFIG, "proxy.config.http.post.check.content_length.enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} , - {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL} + {RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-2]", RECA_NULL} , // # Send http11 requests // # diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc index 793f6e84b98..55c1991790d 100644 --- a/proxy/hdrs/HTTP.cc +++ b/proxy/hdrs/HTTP.cc @@ -876,7 +876,7 @@ http_parser_clear(HTTPParser *parser) ParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end, - bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size, + bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size, size_t max_hdr_field_size) { if (parser->m_parsing_http) { diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h index 838f843a7f4..bd57fb836b9 100644 --- a/proxy/hdrs/HTTP.h +++ b/proxy/hdrs/HTTP.h @@ -441,7 +441,7 @@ const char *http_hdr_reason_lookup(unsigned status); void http_parser_init(HTTPParser *parser); void http_parser_clear(HTTPParser *parser); ParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end, - bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size, + bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size, size_t max_hdr_field_size); ParseResult validate_hdr_host(HTTPHdrImpl *hh); ParseResult validate_hdr_content_length(HdrHeap *heap, HTTPHdrImpl *hh); @@ -632,11 +632,11 @@ class HTTPHdr : public MIMEHdr void mark_early_data(bool flag = true) const; bool is_early_data() const; - ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing = false, + ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing = 0, size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = 131070); ParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof); - ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing = false, + ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing = 0, size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = UINT16_MAX); ParseResult parse_resp(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof); @@ -1156,7 +1156,7 @@ HTTPHdr::is_early_data() const -------------------------------------------------------------------------*/ inline ParseResult -HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing, +HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing, size_t max_request_line_size, size_t max_hdr_field_size) { ink_assert(valid()); diff --git a/proxy/hdrs/HdrTSOnly.cc b/proxy/hdrs/HdrTSOnly.cc index 422cb56aca7..00c536d9781 100644 --- a/proxy/hdrs/HdrTSOnly.cc +++ b/proxy/hdrs/HdrTSOnly.cc @@ -45,7 +45,7 @@ -------------------------------------------------------------------------*/ ParseResult -HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing, +HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing, size_t max_request_line_size, size_t max_hdr_field_size) { const char *start; diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index 70fc177b626..e7fa7122205 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1191,14 +1191,38 @@ url_is_strictly_compliant(const char *start, const char *end) return true; } +/** + * This method will return TRUE if the uri is mostly compliant with + * RFC 3986 and it will return FALSE if not. Specifically denying white + * space an unprintable characters + */ +bool +url_is_mostly_compliant(const char *start, const char *end) +{ + for (const char *i = start; i < end; ++i) { + if (isspace(*i)) { + Debug("http", "Whitespace character [0x%.2X] found in URL", (unsigned char)*i); + return false; + } + if (!isprint(*i)) { + Debug("http", "Non-printable character [0x%.2X] found in URL", (unsigned char)*i); + return false; + } + } + return true; +} + } // namespace UrlImpl using namespace UrlImpl; ParseResult -url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing, +url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, int strict_uri_parsing, bool verify_host_characters) { - if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) { + if (strict_uri_parsing == 1 && !url_is_strictly_compliant(*start, end)) { + return PARSE_RESULT_ERROR; + } + if (strict_uri_parsing == 2 && !url_is_mostly_compliant(*start, end)) { return PARSE_RESULT_ERROR; } diff --git a/proxy/hdrs/URL.h b/proxy/hdrs/URL.h index d31107ee45a..baa35315d9d 100644 --- a/proxy/hdrs/URL.h +++ b/proxy/hdrs/URL.h @@ -209,7 +209,7 @@ void url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length constexpr bool USE_STRICT_URI_PARSING = true; ParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings, - bool strict_uri_parsing = false, bool verify_host_characters = true); + int strict_uri_parsing = false, bool verify_host_characters = true); constexpr bool COPY_STRINGS = true; diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc index 2fef10a6068..a39f7c01bff 100644 --- a/proxy/hdrs/unit_tests/test_URL.cc +++ b/proxy/hdrs/unit_tests/test_URL.cc @@ -56,7 +56,8 @@ TEST_CASE("ValidateURL", "[proxy][validurl]") namespace UrlImpl { bool url_is_strictly_compliant(const char *start, const char *end); -} +bool url_is_mostly_compliant(const char *start, const char *end); +} // namespace UrlImpl using namespace UrlImpl; TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]") @@ -69,6 +70,9 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]") {"/path/data?key=value#id", true}, {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true}, {"/abcdefghijklmnopqrstuvwxyz", true}, + {"/abcde fghijklmnopqrstuvwxyz", false}, + {"/abcde\tfghijklmnopqrstuvwxyz", false}, + {"/abcdefghijklmnopqrstuvwxyz", false}, {"/0123456789", true}, {":/?#[]@", true}, {"!$&'()*+,;=", true}, @@ -95,6 +99,45 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]") } } +TEST_CASE("ParseRulesMostlyStrictURI", "[proxy][parseuri]") +{ + const struct { + const char *const uri; + bool valid; + } http_mostly_strict_uri_parsing_test_case[] = {{"//index.html", true}, + {"/home", true}, + {"/path/data?key=value#id", true}, + {"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true}, + {"/abcdefghijklmnopqrstuvwxyz", true}, + {"/abcde fghijklmnopqrstuvwxyz", false}, + {"/abcde\tfghijklmnopqrstuvwxyz", false}, + {"/abcdefghijklmnopqrstuvwxyz", false}, + {"/0123456789", true}, + {":/?#[]@", true}, + {"!$&'()*+,;=", true}, + {"-._~", true}, + {"%", true}, + {"\n", false}, + {"\"", true}, + {"<", true}, + {">", true}, + {"\\", true}, + {"^", true}, + {"`", true}, + {"{", true}, + {"|", true}, + {"}", true}, + {"é", false}}; // Non-printable ascii + + for (auto i : http_mostly_strict_uri_parsing_test_case) { + const char *const uri = i.uri; + if (url_is_mostly_compliant(uri, uri + strlen(uri)) != i.valid) { + std::printf("Mostly strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false")); + CHECK(false); + } + } +} + struct url_parse_test_case { const std::string input_uri; const std::string expected_printed_url; diff --git a/proxy/http/HttpConfig.cc b/proxy/http/HttpConfig.cc index f93765b7284..b503eea991b 100644 --- a/proxy/http/HttpConfig.cc +++ b/proxy/http/HttpConfig.cc @@ -1637,7 +1637,7 @@ HttpConfig::reconfigure() params->referer_filter_enabled = INT_TO_BOOL(m_master.referer_filter_enabled); params->referer_format_redirect = INT_TO_BOOL(m_master.referer_format_redirect); - params->strict_uri_parsing = INT_TO_BOOL(m_master.strict_uri_parsing); + params->strict_uri_parsing = m_master.strict_uri_parsing; params->oride.down_server_timeout = m_master.oride.down_server_timeout; diff --git a/tests/gold_tests/headers/gold/bad_good_request_http1.gold b/tests/gold_tests/headers/gold/bad_good_request_http1.gold new file mode 100644 index 00000000000..5f31ffa97fe --- /dev/null +++ b/tests/gold_tests/headers/gold/bad_good_request_http1.gold @@ -0,0 +1,8 @@ +``HTTP/1.0 400 Invalid HTTP Request +``Server: ATS/`` +``Content-Length: 219 +`` +Bad Request +``

Bad Request

+``Description: Could not process this request. +`` diff --git a/tests/gold_tests/headers/good_request_after_bad.test.py b/tests/gold_tests/headers/good_request_after_bad.test.py index cfefaf69289..f65d542843e 100644 --- a/tests/gold_tests/headers/good_request_after_bad.test.py +++ b/tests/gold_tests/headers/good_request_after_bad.test.py @@ -27,9 +27,17 @@ Test.ContinueOnFail = True ts.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http', 'proxy.config.diags.debug.enabled': 0, + 'proxy.config.http.strict_uri_parsing': 1 }) -Test.ContinueOnFail = True +ts2 = Test.MakeATSProcess("ts2", enable_cache=True) + +ts2.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http', + 'proxy.config.diags.debug.enabled': 0, + 'proxy.config.http.strict_uri_parsing': 2 + }) + + server = Test.MakeOriginServer("server") request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n", "timestamp": "1469733493.993", "body": ""} response_header = { @@ -41,6 +49,15 @@ ts.Disk.remap_config.AddLine( 'map / http://127.0.0.1:{0}'.format(server.Variables.Port) ) +ts.Disk.remap_config.AddLine( + 'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port) +) +ts2.Disk.remap_config.AddLine( + 'map / http://127.0.0.1:{0}'.format(server.Variables.Port) +) +ts2.Disk.remap_config.AddLine( + 'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port) +) trace_out = Test.Disk.File("trace_curl.txt") @@ -51,6 +68,12 @@ tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts.Variables.port) tr.Processes.Default.ReturnCode = 0 +tr = Test.AddTestRun("Good control") +tr.Processes.Default.StartBefore(server) +tr.Processes.Default.StartBefore(Test.Processes.ts2) +tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts2.Variables.port) +tr.Processes.Default.ReturnCode = 0 + tr = Test.AddTestRun("space after header name") tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost : bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( ts.Variables.port) @@ -110,3 +133,44 @@ ts.Variables.port) tr.Processes.Default.ReturnCode = 0 tr.Processes.Default.Streams.stdout = 'gold/bad_good_request.gold' + +tr = Test.AddTestRun("Catch bad URL characters") +tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts.Variables.port) +tr.Processes.Default.ReturnCode = 0 +# Since the request line is messsed up ATS will reply with HTTP/1.0 +tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold' + +tr = Test.AddTestRun("Catch whitespace in URL") +tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts.Variables.port) +tr.Processes.Default.ReturnCode = 0 +# Since the request line is messsed up ATS will reply with HTTP/1.0 +tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold' + +tr = Test.AddTestRun("Extra characters in protocol") +tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts.Variables.port) +tr.Processes.Default.ReturnCode = 0 +# Since the request line is messsed up ATS will reply with HTTP/1.0 +tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold' + +tr = Test.AddTestRun("Characters that are strict but not case 2 bad") +tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts2.Variables.port) +tr.Processes.Default.ReturnCode = 0 +tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 200 OK", "Success") + +tr = Test.AddTestRun("Catch whitespace in URL") +tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts2.Variables.port) +tr.Processes.Default.ReturnCode = 0 +# Since the request line is messsed up ATS will reply with HTTP/1.0 +tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold' + +tr = Test.AddTestRun("Extra characters in protocol") +tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format( + ts2.Variables.port) +tr.Processes.Default.ReturnCode = 0 +# Since the request line is messsed up ATS will reply with HTTP/1.0 +tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'