From 4b04d73bfcc26c51b3fff49f373236ceb0456de3 Mon Sep 17 00:00:00 2001 From: Juan Cruz Viotti Date: Thu, 9 Jan 2025 22:12:52 -0400 Subject: [PATCH] Fix canonicalization of relative URI paths with leading slashes (#1430) Signed-off-by: Juan Cruz Viotti --- src/uri/uri.cc | 8 ++--- .../jsonschema_frame_2019_09_test.cc | 31 +++++++++++++++++++ test/uri/uri_canonicalize_test.cc | 6 ++++ test/uri/uri_resolve_from_test.cc | 7 +++++ 4 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/uri/uri.cc b/src/uri/uri.cc index f67d9e625..229f9b5c8 100644 --- a/src/uri/uri.cc +++ b/src/uri/uri.cc @@ -65,7 +65,7 @@ static auto uri_parse(const std::string &data, UriUriA *uri) -> void { uri_normalize(uri); } -static auto canonicalize_path(const std::string &path, const bool is_relative) +static auto canonicalize_path(const std::string &path) -> std::optional { // TODO: This is a hack, as this whole function works badly for // relative paths with ".." @@ -98,7 +98,7 @@ static auto canonicalize_path(const std::string &path, const bool is_relative) // Reconstruct the canonical path std::string canonical_path; - std::string separator = (is_relative && !has_leading_with_word) ? "/" : ""; + std::string separator = ""; for (const auto &seg : segments) { canonical_path += separator + seg; @@ -430,8 +430,7 @@ auto URI::canonicalize() -> URI & { // Clean Path form ".." and "." const auto result_path{this->path()}; if (result_path.has_value()) { - const auto canonical_path{ - canonicalize_path(result_path.value(), this->is_relative())}; + const auto canonical_path{canonicalize_path(result_path.value())}; if (canonical_path.has_value()) { this->path_ = canonical_path.value(); } @@ -559,6 +558,7 @@ auto URI::try_resolve_from(const URI &base) -> URI & { // TODO: This only handles a very specific case. We should generalize this // function to perform proper base resolution on relative bases } else if (this->is_fragment_only() && !base.fragment().has_value()) { + this->data = base.data; this->path_ = base.path_; this->userinfo_ = base.userinfo_; this->host_ = base.host_; diff --git a/test/jsonschema/jsonschema_frame_2019_09_test.cc b/test/jsonschema/jsonschema_frame_2019_09_test.cc index 6c877c17f..75b1b6485 100644 --- a/test/jsonschema/jsonschema_frame_2019_09_test.cc +++ b/test/jsonschema/jsonschema_frame_2019_09_test.cc @@ -2004,3 +2004,34 @@ TEST(JSONSchema_frame_2019_09, relative_base_uri_with_ref) { EXPECT_STATIC_REFERENCE(frame, "/allOf/0/$ref", "common#foo", "common", "foo"); } + +TEST(JSONSchema_frame_2019_09, relative_id_leading_slash) { + const sourcemeta::jsontoolkit::JSON document = + sourcemeta::jsontoolkit::parse(R"JSON({ + "$id": "/base", + "$schema": "https://json-schema.org/draft/2019-09/schema" + })JSON"); + + sourcemeta::jsontoolkit::Frame frame; + frame.analyse(document, sourcemeta::jsontoolkit::default_schema_walker, + sourcemeta::jsontoolkit::official_resolver); + + EXPECT_EQ(frame.locations().size(), 3); + EXPECT_FRAME_STATIC_2019_09_RESOURCE(frame, "/base", "/base", "", "/base", "", + 0); + + // JSON Pointers + + EXPECT_FRAME_STATIC_2019_09_POINTER(frame, "/base#/$id", "/base", "/$id", + "/base", "/$id", 0); + EXPECT_FRAME_STATIC_2019_09_POINTER(frame, "/base#/$schema", "/base", + "/$schema", "/base", "/$schema", 0); + + // References + + EXPECT_EQ(frame.references().size(), 1); + + EXPECT_STATIC_REFERENCE( + frame, "/$schema", "https://json-schema.org/draft/2019-09/schema", + "https://json-schema.org/draft/2019-09/schema", std::nullopt); +} diff --git a/test/uri/uri_canonicalize_test.cc b/test/uri/uri_canonicalize_test.cc index e75219e69..26ccbae00 100644 --- a/test/uri/uri_canonicalize_test.cc +++ b/test/uri/uri_canonicalize_test.cc @@ -92,6 +92,12 @@ TEST(URI_canonicalize, example_relative_4) { EXPECT_EQ(uri.recompose(), "foo/bar"); } +TEST(URI_canonicalize, example_relative_6) { + sourcemeta::jsontoolkit::URI uri{"/foo"}; + uri.canonicalize(); + EXPECT_EQ(uri.recompose(), "/foo"); +} + TEST(URI_canonicalize, example_12) { sourcemeta::jsontoolkit::URI uri{"#foo"}; uri.canonicalize(); diff --git a/test/uri/uri_resolve_from_test.cc b/test/uri/uri_resolve_from_test.cc index 0a2b06b41..2c09511ac 100644 --- a/test/uri/uri_resolve_from_test.cc +++ b/test/uri/uri_resolve_from_test.cc @@ -56,6 +56,13 @@ TEST(URI_try_resolve_from, pointer_fragment_on_relative_path) { EXPECT_EQ(relative.recompose(), "foo#/bar"); } +TEST(URI_try_resolve_from, base_relative_path_leading_slash) { + const sourcemeta::jsontoolkit::URI base{"/foo"}; + sourcemeta::jsontoolkit::URI relative{"#/bar"}; + relative.try_resolve_from(base); + EXPECT_EQ(relative.recompose(), "/foo#/bar"); +} + // RFC 3986, inspired from // https://cr.openjdk.org/~dfuchs/writeups/updating-uri/A Section "Resolutuon"