From 2b57d2a3e24abda2abe05dae19cb98f6856a1414 Mon Sep 17 00:00:00 2001 From: ignace nyamagana butera Date: Sat, 19 Aug 2023 16:59:15 +0200 Subject: [PATCH] Introduce the Encoder class * Introduce the Encoder class to normalize encoding/decoding in all packages * Introduce the KeyValuePairConverter class to normalize key/value parsing and building * Rewrite QueryString parser/builder class * Add new methods to QueryString --- CHANGELOG.md | 18 ++++++++++ Components/Component.php | 59 ++------------------------------- Components/DataPath.php | 20 +++++------ Components/Fragment.php | 5 ++- Components/HierarchicalPath.php | 7 ++-- Components/Host.php | 2 ++ Components/Path.php | 4 +-- Components/QueryTest.php | 22 +++++++----- Components/UserInfo.php | 45 ++++++------------------- Modifier.php | 32 ++++++++++++++++++ 10 files changed, 97 insertions(+), 117 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7523c181..15fda3b1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,24 @@ All Notable changes to `League\Uri\Components` will be documented in this file +## Next - TBD + +### Added + +- `Modifier::encodeQuery` + +### Fixed + +- Using the `Encoder` class to normalize encoding and decoding in all packages + +### Deprecated + +- None + +### Removed + +- None + ## [7.0.0](https://github.com/thephpleague/uri-components/compare/2.4.1...7.0.0) - 2023-08-10 ### Added diff --git a/Components/Component.php b/Components/Component.php index 0170ac7bb..b5953c16d 100644 --- a/Components/Component.php +++ b/Components/Component.php @@ -16,31 +16,17 @@ use League\Uri\Contracts\UriAccess; use League\Uri\Contracts\UriComponentInterface; use League\Uri\Contracts\UriInterface; +use League\Uri\Encoder; use League\Uri\Exceptions\SyntaxError; use League\Uri\Uri; use Psr\Http\Message\UriInterface as Psr7UriInterface; use Stringable; use function preg_match; -use function preg_replace_callback; -use function rawurldecode; -use function rawurlencode; use function sprintf; -use function strtoupper; abstract class Component implements UriComponentInterface { - protected const REGEXP_ENCODED_CHARS = ',%[A-Fa-f0-9]{2},'; protected const REGEXP_INVALID_URI_CHARS = '/[\x00-\x1f\x7f]/'; - protected const REGEXP_NO_ENCODING = '/[^A-Za-z0-9_\-.~]/'; - protected const REGEXP_NON_ASCII_PATTERN = '/[^\x20-\x7f]/'; - protected const REGEXP_PREVENTS_DECODING = ',% - 2[A-F|1-2|4-9]| - 3[0-9|B|D]| - 4[1-9|A-F]| - 5[0-9|A|F]| - 6[1-9|A-F]| - 7[0-9|E] - ,ix'; abstract public function value(): ?string; @@ -78,7 +64,7 @@ final protected static function filterUri(Stringable|string $uri): UriInterface| */ protected function validateComponent(Stringable|int|string|null $component): ?string { - return $this->decodeComponent(self::filterComponent($component)); + return Encoder::decodePartial($component); } /** @@ -95,45 +81,4 @@ final protected static function filterComponent(Stringable|int|string|null $comp default => (string) $component, }; } - - /** - * Filter the URI password component. - */ - protected function decodeComponent(?string $str): ?string - { - return match (true) { - null === $str => null, - default => preg_replace_callback(self::REGEXP_ENCODED_CHARS, $this->decodeMatches(...), $str), - }; - } - - /** - * Decodes Matches sequence. - */ - protected function decodeMatches(array $matches): string - { - return match (true) { - 1 === preg_match(static::REGEXP_PREVENTS_DECODING, $matches[0]) => strtoupper($matches[0]), - default => rawurldecode($matches[0]), - }; - } - - /** - * Returns the component as converted for RFC3986. - */ - protected function encodeComponent(?string $str, string $regexp): ?string - { - return match (true) { - null === $str || 1 !== preg_match(self::REGEXP_NO_ENCODING, $str) => $str, - default => preg_replace_callback($regexp, $this->encodeMatches(...), $str) ?? rawurlencode($str), - }; - } - - /** - * Encode Matches sequence. - */ - protected function encodeMatches(array $matches): string - { - return rawurlencode($matches[0]); - } } diff --git a/Components/DataPath.php b/Components/DataPath.php index 6ea72f84b..a3854b049 100644 --- a/Components/DataPath.php +++ b/Components/DataPath.php @@ -103,15 +103,11 @@ private function filterPath(string $path): string */ private function filterMimeType(string $mimetype): string { - if ('' == $mimetype) { - return self::DEFAULT_MIMETYPE; - } - - if (1 === preg_match(self::REGEXP_MIMETYPE, $mimetype)) { - return $mimetype; - } - - throw new SyntaxError(sprintf('Invalid mimeType, `%s`.', $mimetype)); + return match (true) { + '' == $mimetype => self::DEFAULT_MIMETYPE, + 1 === preg_match(self::REGEXP_MIMETYPE, $mimetype) => $mimetype, + default => throw new SyntaxError(sprintf('Invalid mimeType, `%s`.', $mimetype)), + }; } /** @@ -302,7 +298,11 @@ private function formatComponent( $path = $mimetype.$parameters.','.$data; - return preg_replace_callback(self::REGEXP_DATAPATH_ENCODING, $this->encodeMatches(...), $path) ?? $path; + return preg_replace_callback( + self::REGEXP_DATAPATH_ENCODING, + static fn (array $matches): string => rawurlencode($matches[0]), + $path + ) ?? $path; } public function toAscii(): DataPathInterface diff --git a/Components/Fragment.php b/Components/Fragment.php index 25b60f624..662d351ce 100644 --- a/Components/Fragment.php +++ b/Components/Fragment.php @@ -15,13 +15,12 @@ use League\Uri\Contracts\FragmentInterface; use League\Uri\Contracts\UriInterface; +use League\Uri\Encoder; use Psr\Http\Message\UriInterface as Psr7UriInterface; use Stringable; final class Fragment extends Component implements FragmentInterface { - private const REGEXP_FRAGMENT_ENCODING = '/[^A-Za-z0-9_\-.~!$&\'()*+,;=%:\/@?]+|%(?![A-Fa-f0-9]{2})/'; - private readonly ?string $fragment; /** @@ -53,7 +52,7 @@ public static function fromUri(Stringable|string $uri): self public function value(): ?string { - return $this->encodeComponent($this->fragment, self::REGEXP_FRAGMENT_ENCODING); + return Encoder::encodeQueryOrFragment($this->fragment); } public function getUriComponent(): string diff --git a/Components/HierarchicalPath.php b/Components/HierarchicalPath.php index 987742380..f0d3680ed 100644 --- a/Components/HierarchicalPath.php +++ b/Components/HierarchicalPath.php @@ -17,6 +17,7 @@ use League\Uri\Contracts\PathInterface; use League\Uri\Contracts\SegmentedPathInterface; use League\Uri\Contracts\UriInterface; +use League\Uri\Encoder; use League\Uri\Exceptions\OffsetOutOfBounds; use League\Uri\Exceptions\SyntaxError; use Psr\Http\Message\UriInterface as Psr7UriInterface; @@ -59,7 +60,7 @@ private function __construct(Stringable|string $path) } $this->path = $path; - $segments = $this->decodeComponent($this->path->value()) ?? ''; + $segments = $this->path->decoded(); if ($this->path->isAbsolute()) { $segments = substr($segments, 1); } @@ -153,7 +154,7 @@ public function decoded(): string public function getDirname(): string { - $path = (string) $this->decodeComponent($this->path->toString()); + $path = $this->path->decoded(); return str_replace( ['\\', "\0"], @@ -297,7 +298,7 @@ public function withSegment(int $key, Stringable|string $segment): SegmentedPath $segment = new self($segment); } - $segment = $this->decodeComponent((string) $segment); + $segment = Encoder::decodeAll($segment); if ($segment === $this->segments[$key]) { return $this; } diff --git a/Components/Host.php b/Components/Host.php index 234a17bf9..d3e04f52d 100644 --- a/Components/Host.php +++ b/Components/Host.php @@ -41,6 +41,8 @@ final class Host extends Component implements IpHostInterface { + protected const REGEXP_NON_ASCII_PATTERN = '/[^\x20-\x7f]/'; + /** * @see https://tools.ietf.org/html/rfc3986#section-3.2.2 * diff --git a/Components/Path.php b/Components/Path.php index 0d3b45c7d..bfd491778 100644 --- a/Components/Path.php +++ b/Components/Path.php @@ -15,6 +15,7 @@ use League\Uri\Contracts\PathInterface; use League\Uri\Contracts\UriInterface; +use League\Uri\Encoder; use Psr\Http\Message\UriInterface as Psr7UriInterface; use Stringable; use function array_pop; @@ -27,7 +28,6 @@ final class Path extends Component implements PathInterface { private const DOT_SEGMENTS = ['.' => 1, '..' => 1]; - private const REGEXP_PATH_ENCODING = '/[^A-Za-z0-9_\-.!$&\'()*+,;=%:\/@]+|%(?![A-Fa-f0-9]{2})/'; private const SEPARATOR = '/'; private readonly string $path; @@ -76,7 +76,7 @@ public static function fromUri(Stringable|string $uri): self public function value(): ?string { - return $this->encodeComponent($this->path, self::REGEXP_PATH_ENCODING); + return Encoder::encodePath($this->path); } public function decoded(): string diff --git a/Components/QueryTest.php b/Components/QueryTest.php index a2de56c85..68bacf1a9 100644 --- a/Components/QueryTest.php +++ b/Components/QueryTest.php @@ -37,10 +37,10 @@ protected function setUp(): void public function testSeparator(): void { $query = Query::new('foo=bar&kingkong=toto'); - $newQuery = $query->withSeparator('|'); + $newQuery = $query->withSeparator(';'); self::assertSame('&', $query->getSeparator()); - self::assertSame('|', $newQuery->getSeparator()); - self::assertSame('foo=bar|kingkong=toto', $newQuery->value()); + self::assertSame(';', $newQuery->getSeparator()); + self::assertSame('foo=bar;kingkong=toto', $newQuery->value()); $this->expectException(SyntaxError::class); $newQuery->withSeparator(''); @@ -682,11 +682,17 @@ public static function getURIProvider(): iterable ]; } - public function testCreateFromRFCSpecification(): void + public function testItFailsToCreateFromRFCSpecificationWithInvalidSeparator(): void { - self::assertEquals( - Query::fromRFC3986('foo=b%20ar|foo=baz', '|'), - Query::fromRFC1738('foo=b+ar|foo=baz', '|') - ); + $this->expectException(SyntaxError::class); + + Query::fromRFC3986('foo=b%20ar;foo=baz', ''); /* @phpstan-ignore-line */ + } + + public function testItFailsToCreateFromRFCSpecificationWithEmptySeparator(): void + { + $this->expectException(SyntaxError::class); + + Query::fromRFC1738('foo=b%20ar;foo=baz', ''); /* @phpstan-ignore-line */ } } diff --git a/Components/UserInfo.php b/Components/UserInfo.php index 2659f3143..465daa03b 100644 --- a/Components/UserInfo.php +++ b/Components/UserInfo.php @@ -17,20 +17,15 @@ use League\Uri\Contracts\UriComponentInterface; use League\Uri\Contracts\UriInterface; use League\Uri\Contracts\UserInfoInterface; +use League\Uri\Encoder; use League\Uri\Exceptions\SyntaxError; use Psr\Http\Message\UriInterface as Psr7UriInterface; use SensitiveParameter; use Stringable; use function explode; -use function preg_replace_callback; -use function rawurldecode; final class UserInfo extends Component implements UserInfoInterface { - private const REGEXP_USER_ENCODING = '/[^A-Za-z0-9_\-.~!$&\'()*+,;=%]+|%(?![A-Fa-f0-9]{2})/x'; - private const REGEXP_PASS_ENCODING = '/[^A-Za-z0-9_\-.~!$&\'()*+,;=%:]+|%(?![A-Fa-f0-9]{2})/x'; - private const REGEXP_ENCODED_CHAR = ',%[A-Fa-f0-9]{2},'; - private readonly ?string $username; private readonly ?string $password; @@ -69,11 +64,10 @@ public static function fromUri(Stringable|string $uri): self */ public static function fromAuthority(Stringable|string|null $authority): self { - if (!$authority instanceof AuthorityInterface) { - $authority = Authority::new($authority); - } - - return self::new($authority->getUserInfo()); + return match (true) { + $authority instanceof AuthorityInterface => self::new($authority->getUserInfo()), + default => self::new(Authority::new($authority)->getUserInfo()), + }; } /** @@ -111,33 +105,16 @@ public static function new(Stringable|string|null $value = null): self [$user, $pass] = explode(':', $value, 2) + [1 => null]; - return new self(self::decode($user), self::decode($pass)); - } - - /** - * Decodes an encoded string. - */ - private static function decode(?string $str): ?string - { - return null === $str ? null : preg_replace_callback( - self::REGEXP_ENCODED_CHAR, - static fn (array $matches): string => rawurldecode($matches[0]), - $str - ); + return new self(Encoder::decodeAll($user), Encoder::decodeAll($pass)); } public function value(): ?string { - if (null === $this->username) { - return null; - } - - $userInfo = $this->encodeComponent($this->username, self::REGEXP_USER_ENCODING); - if (null === $this->password) { - return $userInfo; - } - - return $userInfo.':'.$this->encodeComponent($this->password, self::REGEXP_PASS_ENCODING); + return match (true) { + null === $this->username => null, + null === $this->password => Encoder::encodeUser($this->username), + default => Encoder::encodeUser($this->username).':'.Encoder::encodePassword($this->password), + }; } public function getUriComponent(): string diff --git a/Modifier.php b/Modifier.php index 4e314940a..184e3c9b0 100644 --- a/Modifier.php +++ b/Modifier.php @@ -26,6 +26,7 @@ use League\Uri\Exceptions\SyntaxError; use League\Uri\Idna\Converter as IdnConverter; use League\Uri\IPv4\Converter as IPv4Converter; +use League\Uri\KeyValuePair\Converter as KeyValuePairConverter; use Psr\Http\Message\UriFactoryInterface; use Psr\Http\Message\UriInterface as Psr7UriInterface; use Stringable; @@ -155,6 +156,37 @@ public function sortQuery(): static )); } + /** + * Change the encoding of the query. + */ + public function encodeQuery(KeyValuePairConverter|int $to, KeyValuePairConverter|int $from = null): static + { + $to = match (true) { + !$to instanceof KeyValuePairConverter => KeyValuePairConverter::fromEncodingType($to), + default => $to, + }; + + $from = match (true) { + null === $from => KeyValuePairConverter::fromRFC3986(), + !$from instanceof KeyValuePairConverter => KeyValuePairConverter::fromEncodingType($from), + default => $from, + }; + + if ($to == $from) { + return $this; + } + + $originalQuery = $this->uri->getQuery(); + $query = QueryString::buildFromPairs(QueryString::parseFromValue($originalQuery, $from), $to); + + return match (true) { + null === $query, + '' === $query, + $originalQuery === $query => $this, + default => new static($this->uri->withQuery($query)), + }; + } + /********************************* * Host modifier methods *********************************/