Skip to content

Commit

Permalink
Add option to mostly strictly check URL characters (apache#8012)
Browse files Browse the repository at this point in the history
  • Loading branch information
shinrich authored Jul 12, 2021
1 parent f559a6b commit 284fe0a
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 15 deletions.
6 changes: 4 additions & 2 deletions doc/admin-guide/files/records.config.en.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1233,8 +1233,10 @@ mptcp

.. ts:cv:: CONFIG proxy.config.http.strict_uri_parsing INT 0
Enables (``1``) or disables (``0``) |TS| to return a 400 Bad Request
if client's request URI includes character which is not RFC 3986 compliant
Takes a value between 0 and 2. ``0`` disables strict_uri_parsing. Any character can appears
in the URI. ``1`` causes |TS| to return 400 Bad Request
if client's request URI includes character which is not RFC 3986 compliant. ``2`` directs |TS|
to reject the clients request if it contains whitespace or non-printable characters.

.. ts:cv:: CONFIG proxy.config.http.errors.log_error_pages INT 1
:reloadable:
Expand Down
2 changes: 1 addition & 1 deletion mgmt/RecordsConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ static const RecordElement RecordsConfig[] =
,
{RECT_CONFIG, "proxy.config.http.post.check.content_length.enabled", RECD_INT, "1", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
,
{RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-1]", RECA_NULL}
{RECT_CONFIG, "proxy.config.http.strict_uri_parsing", RECD_INT, "0", RECU_DYNAMIC, RR_NULL, RECC_INT, "[0-2]", RECA_NULL}
,
// # Send http11 requests
// #
Expand Down
2 changes: 1 addition & 1 deletion proxy/hdrs/HTTP.cc
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ http_parser_clear(HTTPParser *parser)

ParseResult
http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
size_t max_hdr_field_size)
{
if (parser->m_parsing_http) {
Expand Down
8 changes: 4 additions & 4 deletions proxy/hdrs/HTTP.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ const char *http_hdr_reason_lookup(unsigned status);
void http_parser_init(HTTPParser *parser);
void http_parser_clear(HTTPParser *parser);
ParseResult http_parser_parse_req(HTTPParser *parser, HdrHeap *heap, HTTPHdrImpl *hh, const char **start, const char *end,
bool must_copy_strings, bool eof, bool strict_uri_parsing, size_t max_request_line_size,
bool must_copy_strings, bool eof, int strict_uri_parsing, size_t max_request_line_size,
size_t max_hdr_field_size);
ParseResult validate_hdr_host(HTTPHdrImpl *hh);
ParseResult validate_hdr_content_length(HdrHeap *heap, HTTPHdrImpl *hh);
Expand Down Expand Up @@ -632,11 +632,11 @@ class HTTPHdr : public MIMEHdr
void mark_early_data(bool flag = true) const;
bool is_early_data() const;

ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing = false,
ParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing = 0,
size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = 131070);
ParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof);

ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing = false,
ParseResult parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing = 0,
size_t max_request_line_size = UINT16_MAX, size_t max_hdr_field_size = UINT16_MAX);
ParseResult parse_resp(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof);

Expand Down Expand Up @@ -1156,7 +1156,7 @@ HTTPHdr::is_early_data() const
-------------------------------------------------------------------------*/

inline ParseResult
HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, bool strict_uri_parsing,
HTTPHdr::parse_req(HTTPParser *parser, const char **start, const char *end, bool eof, int strict_uri_parsing,
size_t max_request_line_size, size_t max_hdr_field_size)
{
ink_assert(valid());
Expand Down
2 changes: 1 addition & 1 deletion proxy/hdrs/HdrTSOnly.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
-------------------------------------------------------------------------*/

ParseResult
HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, bool strict_uri_parsing,
HTTPHdr::parse_req(HTTPParser *parser, IOBufferReader *r, int *bytes_used, bool eof, int strict_uri_parsing,
size_t max_request_line_size, size_t max_hdr_field_size)
{
const char *start;
Expand Down
28 changes: 26 additions & 2 deletions proxy/hdrs/URL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1191,14 +1191,38 @@ url_is_strictly_compliant(const char *start, const char *end)
return true;
}

/**
* This method will return TRUE if the uri is mostly compliant with
* RFC 3986 and it will return FALSE if not. Specifically denying white
* space an unprintable characters
*/
bool
url_is_mostly_compliant(const char *start, const char *end)
{
for (const char *i = start; i < end; ++i) {
if (isspace(*i)) {
Debug("http", "Whitespace character [0x%.2X] found in URL", (unsigned char)*i);
return false;
}
if (!isprint(*i)) {
Debug("http", "Non-printable character [0x%.2X] found in URL", (unsigned char)*i);
return false;
}
}
return true;
}

} // namespace UrlImpl
using namespace UrlImpl;

ParseResult
url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing,
url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, int strict_uri_parsing,
bool verify_host_characters)
{
if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) {
if (strict_uri_parsing == 1 && !url_is_strictly_compliant(*start, end)) {
return PARSE_RESULT_ERROR;
}
if (strict_uri_parsing == 2 && !url_is_mostly_compliant(*start, end)) {
return PARSE_RESULT_ERROR;
}

Expand Down
2 changes: 1 addition & 1 deletion proxy/hdrs/URL.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ void url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length
constexpr bool USE_STRICT_URI_PARSING = true;

ParseResult url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings,
bool strict_uri_parsing = false, bool verify_host_characters = true);
int strict_uri_parsing = false, bool verify_host_characters = true);

constexpr bool COPY_STRINGS = true;

Expand Down
45 changes: 44 additions & 1 deletion proxy/hdrs/unit_tests/test_URL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ TEST_CASE("ValidateURL", "[proxy][validurl]")
namespace UrlImpl
{
bool url_is_strictly_compliant(const char *start, const char *end);
}
bool url_is_mostly_compliant(const char *start, const char *end);
} // namespace UrlImpl
using namespace UrlImpl;

TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
Expand All @@ -69,6 +70,9 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
{"/path/data?key=value#id", true},
{"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
{"/abcdefghijklmnopqrstuvwxyz", true},
{"/abcde fghijklmnopqrstuvwxyz", false},
{"/abcde\tfghijklmnopqrstuvwxyz", false},
{"/abcdefghijklmnopqrstuvwxyz", false},
{"/0123456789", true},
{":/?#[]@", true},
{"!$&'()*+,;=", true},
Expand All @@ -95,6 +99,45 @@ TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
}
}

TEST_CASE("ParseRulesMostlyStrictURI", "[proxy][parseuri]")
{
const struct {
const char *const uri;
bool valid;
} http_mostly_strict_uri_parsing_test_case[] = {{"//index.html", true},
{"/home", true},
{"/path/data?key=value#id", true},
{"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
{"/abcdefghijklmnopqrstuvwxyz", true},
{"/abcde fghijklmnopqrstuvwxyz", false},
{"/abcde\tfghijklmnopqrstuvwxyz", false},
{"/abcdefghijklmnopqrstuvwxyz", false},
{"/0123456789", true},
{":/?#[]@", true},
{"!$&'()*+,;=", true},
{"-._~", true},
{"%", true},
{"\n", false},
{"\"", true},
{"<", true},
{">", true},
{"\\", true},
{"^", true},
{"`", true},
{"{", true},
{"|", true},
{"}", true},
{"é", false}}; // Non-printable ascii

for (auto i : http_mostly_strict_uri_parsing_test_case) {
const char *const uri = i.uri;
if (url_is_mostly_compliant(uri, uri + strlen(uri)) != i.valid) {
std::printf("Mostly strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false"));
CHECK(false);
}
}
}

struct url_parse_test_case {
const std::string input_uri;
const std::string expected_printed_url;
Expand Down
2 changes: 1 addition & 1 deletion proxy/http/HttpConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1637,7 +1637,7 @@ HttpConfig::reconfigure()
params->referer_filter_enabled = INT_TO_BOOL(m_master.referer_filter_enabled);
params->referer_format_redirect = INT_TO_BOOL(m_master.referer_format_redirect);

params->strict_uri_parsing = INT_TO_BOOL(m_master.strict_uri_parsing);
params->strict_uri_parsing = m_master.strict_uri_parsing;

params->oride.down_server_timeout = m_master.oride.down_server_timeout;

Expand Down
8 changes: 8 additions & 0 deletions tests/gold_tests/headers/gold/bad_good_request_http1.gold
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
``HTTP/1.0 400 Invalid HTTP Request
``Server: ATS/``
``Content-Length: 219
``
<TITLE>Bad Request</TITLE>
``<H1>Bad Request</H1>
``Description: Could not process this request.
``
66 changes: 65 additions & 1 deletion tests/gold_tests/headers/good_request_after_bad.test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,17 @@
Test.ContinueOnFail = True
ts.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
'proxy.config.diags.debug.enabled': 0,
'proxy.config.http.strict_uri_parsing': 1
})

Test.ContinueOnFail = True
ts2 = Test.MakeATSProcess("ts2", enable_cache=True)

ts2.Disk.records_config.update({'proxy.config.diags.debug.tags': 'http',
'proxy.config.diags.debug.enabled': 0,
'proxy.config.http.strict_uri_parsing': 2
})


server = Test.MakeOriginServer("server")
request_header = {"headers": "GET / HTTP/1.1\r\nHost: www.example.com\r\n\r\n", "timestamp": "1469733493.993", "body": ""}
response_header = {
Expand All @@ -41,6 +49,15 @@
ts.Disk.remap_config.AddLine(
'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
)
ts.Disk.remap_config.AddLine(
'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
)
ts2.Disk.remap_config.AddLine(
'map / http://127.0.0.1:{0}'.format(server.Variables.Port)
)
ts2.Disk.remap_config.AddLine(
'map /bob<> http://127.0.0.1:{0}'.format(server.Variables.Port)
)

trace_out = Test.Disk.File("trace_curl.txt")

Expand All @@ -51,6 +68,12 @@
tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts.Variables.port)
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("Good control")
tr.Processes.Default.StartBefore(server)
tr.Processes.Default.StartBefore(Test.Processes.ts2)
tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost: bob\r\n\r\n" | nc 127.0.0.1 {}'.format(ts2.Variables.port)
tr.Processes.Default.ReturnCode = 0

tr = Test.AddTestRun("space after header name")
tr.Processes.Default.Command = 'printf "GET / HTTP/1.1\r\nHost : bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts.Variables.port)
Expand Down Expand Up @@ -110,3 +133,44 @@
ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request.gold'

tr = Test.AddTestRun("Catch bad URL characters")
tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
# Since the request line is messsed up ATS will reply with HTTP/1.0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'

tr = Test.AddTestRun("Catch whitespace in URL")
tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
# Since the request line is messsed up ATS will reply with HTTP/1.0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'

tr = Test.AddTestRun("Extra characters in protocol")
tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts.Variables.port)
tr.Processes.Default.ReturnCode = 0
# Since the request line is messsed up ATS will reply with HTTP/1.0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'

tr = Test.AddTestRun("Characters that are strict but not case 2 bad")
tr.Processes.Default.Command = 'printf "GET /bob<> HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts2.Variables.port)
tr.Processes.Default.ReturnCode = 0
tr.Processes.Default.Streams.All = Testers.ContainsExpression("HTTP/1.1 200 OK", "Success")

tr = Test.AddTestRun("Catch whitespace in URL")
tr.Processes.Default.Command = 'printf "GET /bob foo HTTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts2.Variables.port)
tr.Processes.Default.ReturnCode = 0
# Since the request line is messsed up ATS will reply with HTTP/1.0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'

tr = Test.AddTestRun("Extra characters in protocol")
tr.Processes.Default.Command = 'printf "GET / HTP/1.1\r\nhost: bob\r\n\r\nGET / HTTP/1.1\r\nHost: boa\r\n\r\n" | nc 127.0.0.1 {}'.format(
ts2.Variables.port)
tr.Processes.Default.ReturnCode = 0
# Since the request line is messsed up ATS will reply with HTTP/1.0
tr.Processes.Default.Streams.stdout = 'gold/bad_good_request_http1.gold'

0 comments on commit 284fe0a

Please sign in to comment.