From 13a47b16fb9f1b30107e4bb939c82134f1622e54 Mon Sep 17 00:00:00 2001 From: Daniel Vogelheim Date: Fri, 24 Jun 2022 13:18:31 +0000 Subject: [PATCH] [Sanitizer] Remove name normalization and adapt tests. Adapt Sanitizer API to the proposed https://github.com/WICG/sanitizer-api/issues/145 This removes normalization of element and attribute names and makes comparisons case-insensitive. Bug: 1326827 Change-Id: Ib7cebd9e74cec895c2716aa88b4eeb71bb7d169e Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3706742 Commit-Queue: Daniel Vogelheim Reviewed-by: Yifan Luo Cr-Commit-Position: refs/heads/main@{#1017597} --- .../modules/sanitizer_api/builtins.cc | 46 ------------------- .../renderer/modules/sanitizer_api/builtins.h | 5 -- .../modules/sanitizer_api/config_util.cc | 31 ++++--------- .../sanitizer-api/sanitizer-names.https.html | 7 +-- .../sanitizer-query-config.https.html | 2 +- .../sanitizer-api/support/testcases.sub.js | 10 +++- 6 files changed, 20 insertions(+), 81 deletions(-) diff --git a/third_party/blink/renderer/modules/sanitizer_api/builtins.cc b/third_party/blink/renderer/modules/sanitizer_api/builtins.cc index 62f137f5a528f1..7d6ca3b9b65229 100644 --- a/third_party/blink/renderer/modules/sanitizer_api/builtins.cc +++ b/third_party/blink/renderer/modules/sanitizer_api/builtins.cc @@ -21,17 +21,6 @@ namespace { // arguments. Thus we have this typedef as a work-around. typedef HashMap StringMap; -StringMap MixedCaseNames(const char* const* names) { - HashMap map; - for (const char* const* iter = names; *iter; ++iter) { - String name(*iter); - if (!name.IsLowerASCII()) { - map.insert(name.LowerASCII(), name); - } - } - return map; -} - SanitizerConfigImpl::ElementList ElementsFromAPI(const char* const* elements) { SanitizerConfigImpl::ElementList element_list; for (const char* const* elem = elements; *elem; ++elem) { @@ -91,18 +80,6 @@ const SanitizerConfigImpl::AttributeList& GetBaselineAllowAttributes() { return attributes_; } -const HashMap& GetMixedCaseElementNames() { - DEFINE_STATIC_LOCAL(StringMap, element_names_, - (MixedCaseNames(kBaselineElements))); - return element_names_; -} - -const HashMap& GetMixedCaseAttributeNames() { - DEFINE_STATIC_LOCAL(StringMap, attribute_names_, - (MixedCaseNames(kBaselineAttributes))); - return attribute_names_; -} - } // namespace default_config_names namespace with_namespace_names { @@ -126,18 +103,6 @@ const SanitizerConfigImpl::AttributeList& GetBaselineAllowAttributes() { return attributes_; } -const HashMap& GetMixedCaseElementNames() { - DEFINE_STATIC_LOCAL(StringMap, element_names_, - (MixedCaseNames(kBaselineElements))); - return element_names_; -} - -const HashMap& GetMixedCaseAttributeNames() { - DEFINE_STATIC_LOCAL(StringMap, attribute_names_, - (MixedCaseNames(kBaselineAttributes))); - return attribute_names_; -} - } // namespace with_namespace_names bool WithNamespaces() { @@ -161,15 +126,4 @@ const SanitizerConfigImpl::AttributeList& GetBaselineAllowAttributes() { return WithNamespaces() ? with_namespace_names::GetBaselineAllowAttributes() : default_config_names::GetBaselineAllowAttributes(); } - -const HashMap& GetMixedCaseElementNames() { - return WithNamespaces() ? with_namespace_names::GetMixedCaseElementNames() - : default_config_names::GetMixedCaseElementNames(); -} - -const HashMap& GetMixedCaseAttributeNames() { - return WithNamespaces() ? with_namespace_names::GetMixedCaseAttributeNames() - : default_config_names::GetMixedCaseAttributeNames(); -} - } // namespace blink diff --git a/third_party/blink/renderer/modules/sanitizer_api/builtins.h b/third_party/blink/renderer/modules/sanitizer_api/builtins.h index be5ddcd321467a..4b6521da2b2025 100644 --- a/third_party/blink/renderer/modules/sanitizer_api/builtins.h +++ b/third_party/blink/renderer/modules/sanitizer_api/builtins.h @@ -19,11 +19,6 @@ const SanitizerConfigImpl& GetDefaultConfig(); const SanitizerConfigImpl::ElementList& GetBaselineAllowElements(); const SanitizerConfigImpl::AttributeList& GetBaselineAllowAttributes(); -// We derive a map of lower-case to mixed-case names from the built-ins, for -// use in name normalization. -const HashMap& GetMixedCaseElementNames(); -const HashMap& GetMixedCaseAttributeNames(); - } // namespace blink #endif // THIRD_PARTY_BLINK_RENDERER_MODULES_SANITIZER_API_BUILTINS_H_ diff --git a/third_party/blink/renderer/modules/sanitizer_api/config_util.cc b/third_party/blink/renderer/modules/sanitizer_api/config_util.cc index 605a9f23663c27..c103ef55e03874 100644 --- a/third_party/blink/renderer/modules/sanitizer_api/config_util.cc +++ b/third_party/blink/renderer/modules/sanitizer_api/config_util.cc @@ -88,6 +88,7 @@ bool IsValidCharacter(UChar ch) { // obtuse, but it seems to allow all XML names. The HTML parser however // allows only ascii. Here, we settle for the simplest, most restrictive // variant. May it's too restrictive, though. + // TODO(vogelheim): "HTML parser allows only ascii" is no longer true. return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == ':' || ch == '-' || ch == '_'; } @@ -113,22 +114,15 @@ String ElementFromAPI(const String& name) { if (!IsValidName(name)) return Invalid(); - // Normalize element name, using the GetMixedCaseElementNames table. - String normalized = name.LowerASCII(); - const auto& mixed_case_names = GetMixedCaseElementNames(); - const auto iter = mixed_case_names.find(normalized); - if (iter != mixed_case_names.end()) - normalized = iter->value; - // Handle namespace prefixes: - wtf_size_t pos = normalized.find(':'); + wtf_size_t pos = name.find(':'); // Two (or more) colons => invalid. - if (pos != WTF::kNotFound && normalized.find(':', pos + 1) != WTF::kNotFound) + if (pos != WTF::kNotFound && name.find(':', pos + 1) != WTF::kNotFound) return Invalid(); // No prefix, or the ones explicitly allowed by the spec: okay. - if (pos == WTF::kNotFound || normalized.StartsWith("svg:") || - normalized.StartsWith("math:")) { - return normalized; + if (pos == WTF::kNotFound || name.StartsWith("svg:") || + name.StartsWith("math:")) { + return name; } // All else: invalid. return Invalid(); @@ -138,19 +132,12 @@ String AttributeFromAPI(const String& name) { if (!IsValidName(name)) return Invalid(); - // Normalize attribute name, using the GetMixedCaseAttributeNames table. - String normalized = name.LowerASCII(); - const auto& mixed_case_names = GetMixedCaseAttributeNames(); - const auto iter = mixed_case_names.find(normalized); - if (iter != mixed_case_names.end()) - normalized = iter->value; - // The spec allows only a specific list of prefixed attributes. Use the // GetBaselineAllowAttributes() table to check for those. All other uses // of colon are invalid. - if (normalized.find(':') == WTF::kNotFound || - GetBaselineAllowAttributes().Contains(normalized)) - return normalized; + if (name.find(':') == WTF::kNotFound || + GetBaselineAllowAttributes().Contains(name)) + return name; return Invalid(); } diff --git a/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-names.https.html b/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-names.https.html index 22b913574e83f0..635b7975e65ecd 100644 --- a/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-names.https.html +++ b/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-names.https.html @@ -103,14 +103,11 @@ assert_equals(sanitize(elem, probe), probe); }, `Mixed-case element names #${index}: "${elem}"`); test(t => { - assert_equals(sanitize(elem.toLowerCase(), probe), probe); + assert_not_equals(sanitize(elem.toLowerCase(), probe), probe); }, `Mixed-case element names #${index}: "${elem.toLowerCase()}"`); test(t => { - assert_equals(sanitize(elem.toUpperCase(), probe), probe); + assert_not_equals(sanitize(elem.toUpperCase(), probe), probe); }, `Mixed-case element names #${index}: "${elem.toUpperCase()}"`); - test(t => { - assert_equals(sanitize(elem, probe.toLowerCase()), probe); - }, `Mixed-case element names #${index}: "${elem}" with lowercase probe.`); test(t => { const elems = [elem]; assert_array_equals( diff --git a/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-query-config.https.html b/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-query-config.https.html index bd7c7ea3f65570..8824ba7796382c 100644 --- a/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-query-config.https.html +++ b/third_party/blink/web_tests/external/wpt/sanitizer-api/sanitizer-query-config.https.html @@ -65,7 +65,7 @@ "onclick": ["*"] }, allowCustomElements: true, }; - assert_deep_equals(configs[0], + assert_deep_equals(config_0_mixed, new Sanitizer(config_0_mixed).getConfiguration()); }, "SanitizerAPI getConfiguration() reflects creation config."); diff --git a/third_party/blink/web_tests/external/wpt/sanitizer-api/support/testcases.sub.js b/third_party/blink/web_tests/external/wpt/sanitizer-api/support/testcases.sub.js index e0efa1c67c46de..c54d4461517f36 100644 --- a/third_party/blink/web_tests/external/wpt/sanitizer-api/support/testcases.sub.js +++ b/third_party/blink/web_tests/external/wpt/sanitizer-api/support/testcases.sub.js @@ -26,7 +26,6 @@ const testcases = [ {config_input: {dropElements: ["custom-element"], allowCustomElements: true}, value: "testbla", result: "bla", message: "allow custom elements with drop list contains [\"custom-element\"]"}, {config_input: {dropElements: ["script"]}, value: "", result: "

Click.

", message: "dropAttributes list {\"data-attribute-with-dashes\": [\"*\"]} with dom dataset js access"}, {config_input: {allowAttributes: {"id": ["div"]}}, value: "

P

DIV
", result: "

P

DIV
", message: "allowAttributes list {\"id\": [\"div\"]} with id attribute"}, {config_input: {allowAttributes: {"id": ["*"]}}, value: "

Click.

", result: "

Click.

", message: "allowAttributes list {\"id\": [\"*\"]} with id attribute and onclick scripts"}, @@ -68,4 +66,12 @@ const testcases = [ {config_input: {allowComments: false}, value: "

commentintext

", result: "

commentintext

", message: "HTML with comments deeper in the tree, !allowComments"}, {config_input: {allowElements: ["svg"]}, value: "", result: "", message: "Unknown HTML names (HTMLUnknownElement instances) should not match elements parsed as non-HTML namespaces."}, {config_input: {allowElements: ["div", "svg"]}, value: "
", result: "
", message: "Unknown HTML names (HTMLUnknownElement instances) should not match elements parsed as non-HTML namespaces when nested."}, + + // Case normalization (actually: lack of) + {config_input: {dropElements: ["I", "DL"]}, value: "
balabala
test
", result: "
balabala
test
", message: "dropElements list [\"I\", \"DL\"]}"}, + {config_input: {dropElements: ["i", "dl"]}, value: "
balabala
test
", result: "
balabala
", message: "dropElements list [\"i\", \"dl\"]}"}, + {config_input: {dropElements: ["i", "dl"]}, value: "
balabala
test
", result: "
balabala
", message: "dropElements list [\"i\", \"dl\"]} with uppercase HTML"}, + {config_input: {dropAttributes: {"ID": ["*"]}}, value: "

Click.

", result: "

Click.

", message: "dropAttributes list {\"ID\": [\"*\"]} with id attribute"}, + {config_input: {dropAttributes: {"ID": ["*"]}}, value: "

Click.

", result: "

Click.

", message: "dropAttributes list {\"ID\": [\"*\"]} with ID attribute"}, + {config_input: {dropAttributes: {"id": ["*"]}}, value: "

Click.

", result: "

Click.

", message: "dropAttributes list {\"id\": [\"*\"]} with ID attribute"}, ];