From ea7d2cd3ffa2308f0dc67db6ef0a49382ac83228 Mon Sep 17 00:00:00 2001 From: Jake Hotson Date: Sat, 14 Sep 2024 18:54:24 +0100 Subject: [PATCH] [CLEANUP] Add DeclarationBlockParser class This is essentially a refactoring to move the `parseCssDeclarationsBlock` method that was duplicated in both `CssInliner` and `CssToAttributeConverter` to a separate class. Doing so also allows it to be tested independently, which is duly done. Additionally, it allows the results cache to be shared. This is also likely to be used by the solution for #1276 (as currently proposed). Longer-term, we should be able to use functionality from PHP-CSS-Parser for more accurate parsing (e.g. strings containing semicolons), whereupon this class can become a wrapper for that, but currently that functionality is not available in a standalone class. Any PHPStan errors from the original function have been fixed, including handling the failure case of `preg_split` (and using `trigger_error` in that case). There is a false positive from PHPStan for which there is an open ticket: https://github.com/phpstan/phpstan/issues/7799. --- phpstan-baseline.neon | 16 +- src/CssInliner.php | 68 ++------ src/HtmlProcessor/CssToAttributeConverter.php | 51 +----- src/Utilities/DeclarationBlockParser.php | 68 ++++++++ .../Utilities/DeclarationBlockParserTest.php | 159 ++++++++++++++++++ 5 files changed, 249 insertions(+), 113 deletions(-) create mode 100644 src/Utilities/DeclarationBlockParser.php create mode 100644 tests/Unit/Utilities/DeclarationBlockParserTest.php diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index ec2ba769..5e5dea50 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -7,7 +7,7 @@ parameters: - message: "#^Argument of an invalid type array\\\\|false supplied for foreach, only iterables are supported\\.$#" - count: 2 + count: 1 path: src/CssInliner.php - @@ -17,7 +17,7 @@ parameters: - message: "#^Only booleans are allowed in a negated boolean, int\\|false given\\.$#" - count: 3 + count: 2 path: src/CssInliner.php - @@ -45,11 +45,6 @@ parameters: count: 2 path: src/HtmlProcessor/AbstractHtmlProcessor.php - - - message: "#^Argument of an invalid type array\\\\|false supplied for foreach, only iterables are supported\\.$#" - count: 1 - path: src/HtmlProcessor/CssToAttributeConverter.php - - message: "#^Method Pelago\\\\Emogrifier\\\\HtmlProcessor\\\\CssToAttributeConverter\\:\\:getAllNodesWithStyleAttribute\\(\\) return type with generic class DOMNodeList does not specify its types\\: TNode$#" count: 1 @@ -62,7 +57,7 @@ parameters: - message: "#^Only booleans are allowed in a negated boolean, int\\|false given\\.$#" - count: 2 + count: 1 path: src/HtmlProcessor/CssToAttributeConverter.php - @@ -115,6 +110,11 @@ parameters: count: 1 path: src/Utilities/CssConcatenator.php + - + message: "#^Unreachable statement \\- code above always terminates\\.$#" + count: 1 + path: src/Utilities/DeclarationBlockParser.php + - message: "#^Offset 0 does not exist on array\\{0\\?\\: string\\}\\.$#" count: 1 diff --git a/src/CssInliner.php b/src/CssInliner.php index 0a2a9463..abc9f14a 100644 --- a/src/CssInliner.php +++ b/src/CssInliner.php @@ -7,6 +7,7 @@ use Pelago\Emogrifier\Css\CssDocument; use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor; use Pelago\Emogrifier\Utilities\CssConcatenator; +use Pelago\Emogrifier\Utilities\DeclarationBlockParser; use Pelago\Emogrifier\Utilities\Preg; use Symfony\Component\CssSelector\CssSelectorConverter; use Symfony\Component\CssSelector\Exception\ParseException; @@ -24,12 +25,7 @@ class CssInliner extends AbstractHtmlProcessor /** * @var int */ - private const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 1; - - /** - * @var int - */ - private const CACHE_KEY_COMBINED_STYLES = 2; + private const CACHE_KEY_COMBINED_STYLES = 1; /** * Regular expression component matching a static pseudo class in a selector, without the preceding ":", @@ -75,13 +71,11 @@ class CssInliner extends AbstractHtmlProcessor /** * @var array{ * 0: array, - * 1: array>, - * 2: array + * 1: array * } */ private $caches = [ self::CACHE_KEY_SELECTOR => [], - self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [], self::CACHE_KEY_COMBINED_STYLES => [], ]; @@ -393,7 +387,6 @@ private function clearAllCaches(): void { $this->caches = [ self::CACHE_KEY_SELECTOR => [], - self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [], self::CACHE_KEY_COMBINED_STYLES => [], ]; } @@ -465,55 +458,13 @@ static function (array $propertyNameMatches): string { // In order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles. $nodePath = $node->getNodePath(); if (\is_string($nodePath) && !isset($this->styleAttributesForNodes[$nodePath])) { - $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle); + $this->styleAttributesForNodes[$nodePath] = (new DeclarationBlockParser())->parse($normalizedOriginalStyle); $this->visitedNodes[$nodePath] = $node; } $node->setAttribute('style', $normalizedOriginalStyle); } - /** - * Parses a CSS declaration block into property name/value pairs. - * - * Example: - * - * The declaration block - * - * "color: #000; font-weight: bold;" - * - * will be parsed into the following array: - * - * "color" => "#000" - * "font-weight" => "bold" - * - * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty - * - * @return array - * the CSS declarations with the property names as array keys and the property values as array values - */ - private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array - { - if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) { - return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock]; - } - - $properties = []; - foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) { - /** @var list $matches */ - $matches = []; - if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) { - continue; - } - - $propertyName = \strtolower($matches[1]); - $propertyValue = $matches[2]; - $properties[$propertyName] = $propertyValue; - } - $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties; - - return $properties; - } - /** * Returns CSS content. * @@ -781,7 +732,8 @@ private function getCssSelectorPrecedence(string $selector): int private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule): void { $declarationsBlock = $cssRule['declarationsBlock']; - $newStyleDeclarations = $this->parseCssDeclarationsBlock($declarationsBlock); + $declarationBlockParser = new DeclarationBlockParser(); + $newStyleDeclarations = $declarationBlockParser->parse($declarationsBlock); if ($newStyleDeclarations === []) { return; } @@ -789,7 +741,7 @@ private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssR // if it has a style attribute, get it, process it, and append (overwrite) new stuff if ($node->hasAttribute('style')) { // break it up into an associative array - $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); + $oldStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style')); } else { $oldStyleDeclarations = []; } @@ -864,9 +816,10 @@ private function attributeValueIsImportant(string $attributeValue): bool */ private function fillStyleAttributesWithMergedStyles(): void { + $declarationBlockParser = new DeclarationBlockParser(); foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) { $node = $this->visitedNodes[$nodePath]; - $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style')); + $currentStyleAttributes = $declarationBlockParser->parse($node->getAttribute('style')); $node->setAttribute( 'style', $this->generateStyleStringFromDeclarationsArrays( @@ -905,7 +858,8 @@ private function removeImportantAnnotationFromAllInlineStyles(): void */ private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node): void { - $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); + $style = $node->getAttribute('style'); + $inlineStyleDeclarations = (new DeclarationBlockParser())->parse((bool) $style ? $style : ''); /** @var array $regularStyleDeclarations */ $regularStyleDeclarations = []; /** @var array $importantStyleDeclarations */ diff --git a/src/HtmlProcessor/CssToAttributeConverter.php b/src/HtmlProcessor/CssToAttributeConverter.php index 63a48d45..fc320e68 100644 --- a/src/HtmlProcessor/CssToAttributeConverter.php +++ b/src/HtmlProcessor/CssToAttributeConverter.php @@ -4,6 +4,7 @@ namespace Pelago\Emogrifier\HtmlProcessor; +use Pelago\Emogrifier\Utilities\DeclarationBlockParser; use Pelago\Emogrifier\Utilities\Preg; /** @@ -44,11 +45,6 @@ class CssToAttributeConverter extends AbstractHtmlProcessor ], ]; - /** - * @var array> - */ - private static $parsedCssCache = []; - /** * Maps the CSS from the style nodes to visual HTML attributes. * @@ -56,9 +52,10 @@ class CssToAttributeConverter extends AbstractHtmlProcessor */ public function convertCssToVisualAttributes(): self { + $declarationBlockParser = new DeclarationBlockParser(); /** @var \DOMElement $node */ foreach ($this->getAllNodesWithStyleAttribute() as $node) { - $inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); + $inlineStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style')); $this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node); } @@ -75,48 +72,6 @@ private function getAllNodesWithStyleAttribute(): \DOMNodeList return $this->getXPath()->query('//*[@style]'); } - /** - * Parses a CSS declaration block into property name/value pairs. - * - * Example: - * - * The declaration block - * - * "color: #000; font-weight: bold;" - * - * will be parsed into the following array: - * - * "color" => "#000" - * "font-weight" => "bold" - * - * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty - * - * @return array - * the CSS declarations with the property names as array keys and the property values as array values - */ - private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array - { - if (isset(self::$parsedCssCache[$cssDeclarationsBlock])) { - return self::$parsedCssCache[$cssDeclarationsBlock]; - } - - $properties = []; - foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) { - /** @var array $matches */ - $matches = []; - if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) { - continue; - } - - $propertyName = \strtolower($matches[1]); - $propertyValue = $matches[2]; - $properties[$propertyName] = $propertyValue; - } - self::$parsedCssCache[$cssDeclarationsBlock] = $properties; - - return $properties; - } - /** * Applies $styles to $node. * diff --git a/src/Utilities/DeclarationBlockParser.php b/src/Utilities/DeclarationBlockParser.php new file mode 100644 index 00000000..e3bada08 --- /dev/null +++ b/src/Utilities/DeclarationBlockParser.php @@ -0,0 +1,68 @@ +> + */ + private static $cache = []; + + /** + * Parses a CSS declaration block into property name/value pairs. + * + * Example: + * + * The declaration block + * + * "color: #000; font-weight: bold;" + * + * will be parsed into the following array: + * + * "color" => "#000" + * "font-weight" => "bold" + * + * @param string $declarationBlock the CSS declarations block without the curly braces, may be empty + * + * @return array + * the CSS declarations with the property names as array keys and the property values as array values + */ + public function parse(string $declarationBlock): array + { + if (isset(self::$cache[$declarationBlock])) { + return self::$cache[$declarationBlock]; + } + + $declarations = \preg_split('/;(?!base64|charset)/', $declarationBlock); + if ($declarations === false) { + \trigger_error('Regex failure parsing declaration block `' . $declarationBlock . '`', E_USER_ERROR); + return []; + } + + $properties = []; + foreach ($declarations as $declaration) { + $matches = []; + if (!(bool) \preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) { + continue; + } + + $propertyName = \strtolower($matches[1]); + $propertyValue = $matches[2]; + $properties[$propertyName] = $propertyValue; + } + self::$cache[$declarationBlock] = $properties; + + return $properties; + } +} diff --git a/tests/Unit/Utilities/DeclarationBlockParserTest.php b/tests/Unit/Utilities/DeclarationBlockParserTest.php new file mode 100644 index 00000000..de6b6eb4 --- /dev/null +++ b/tests/Unit/Utilities/DeclarationBlockParserTest.php @@ -0,0 +1,159 @@ +}> + */ + public function provideDeclratationBlockAsStringAndArray(): array + { + return [ + 'empty' => [ + 'string' => '', + 'array' => [], + ], + 'whitespace only' => [ + 'string' => " \r\n\t", + 'array' => [], + ], + 'semicolon only' => [ + 'string' => ';', + 'array' => [], + ], + 'whitespace and semicolon only' => [ + 'string' => " \r\n\t; \r\n\t", + 'array' => [], + ], + '1 declaration without trailing semicolon' => [ + 'string' => 'color: green', + 'array' => ['color' => 'green'], + ], + '1 declaration with trailing semicolon' => [ + 'string' => 'color: green;', + 'array' => ['color' => 'green'], + ], + '1 declaration with leading semicolon' => [ + 'string' => '; color: green', + 'array' => ['color' => 'green'], + ], + 'declaration with space before colon' => [ + 'string' => 'color : green;', + 'array' => ['color' => 'green'], + ], + 'declaration without space after colon' => [ + 'string' => 'color:green;', + 'array' => ['color' => 'green'], + ], + 'declaration without value' => [ + 'string' => 'color: ;', + 'array' => [], + ], + 'declaration with only value' => [ + 'string' => 'red;', + 'array' => [], + ], + 'declaration without property name' => [ + 'string' => ' : red;', + 'array' => [], + ], + '2 declarations' => [ + 'string' => 'color: green; background-color: green;', + 'array' => ['color' => 'green', 'background-color' => 'green'], + ], + '2 declarations with extra semicolon between' => [ + 'string' => 'color: green;; background-color: green;', + 'array' => ['color' => 'green', 'background-color' => 'green'], + ], + '2 declarations with newline between' => [ + 'string' => 'color: green;' . "\n" . 'background-color: green;', + 'array' => ['color' => 'green', 'background-color' => 'green'], + ], + 'declaration with !important' => [ + 'string' => 'color: green !important;', + 'array' => ['color' => 'green !important'], + ], + 'vendor property declaration' => [ + 'string' => '-moz-box-sizing: border-box;', + 'array' => ['-moz-box-sizing' => 'border-box'], + ], + 'custom property definition' => [ + 'string' => '--text-color: green;', + 'array' => ['--text-color' => 'green'], + ], + 'declaration using custom property' => [ + 'string' => 'color: var(--text-color);', + 'array' => ['color' => 'var(--text-color)'], + ], + 'declaration using CSS function' => [ + 'string' => 'width: calc(50% + 10vw + 10rem);', + 'array' => ['width' => 'calc(50% + 10vw + 10rem)'], + ], + 'shorthand property declaration' => [ + 'string' => 'border: 2px solid green', + 'array' => ['border' => '2px solid green'], + ], + 'declaration with text value (single quotes)' => [ + 'string' => 'content: \'Hello universe\';', + 'array' => ['content' => '\'Hello universe\''], + ], + 'declaration with text value (double quotes)' => [ + 'string' => 'content: "Hello universe";', + 'array' => ['content' => '"Hello universe"'], + ], + 'font declaration with size, line-height and family' => [ + 'string' => 'font: 1.2em/2 "Fira Sans", sans-serif;', + 'array' => ['font' => '1.2em/2 "Fira Sans", sans-serif'], + ], + 'declaration using data URL with charset' => [ + 'string' => 'text: url("data:text/plain;charset=UTF-8,Hello%20universe");', + 'array' => ['text' => 'url("data:text/plain;charset=UTF-8,Hello%20universe")'], + ], + 'declaration using data URL with base64-encoding' => [ + 'string' => 'text: url("data:;base64,SGVsbG8gdW5pdmVyc2U=");', + 'array' => ['text' => 'url("data:;base64,SGVsbG8gdW5pdmVyc2U=")'], + ], + 'declaration using data URL with charset and base64-encoding' => [ + 'string' => 'text: url("data:text/plain;charset=UTF-8;base64,SGVsbG8gdW5pdmVyc2U=");', + 'array' => ['text' => 'url("data:text/plain;charset=UTF-8;base64,SGVsbG8gdW5pdmVyc2U=")'], + ], + ]; + } + + /** + * @test + * + * @param array $declratationBlockAsArray + * + * @dataProvider provideDeclratationBlockAsStringAndArray + */ + public function parses(string $declratationBlockAsString, array $declratationBlockAsArray): void + { + $subject = new DeclarationBlockParser(); + + $result = $subject->parse($declratationBlockAsString); + + self::assertSame($declratationBlockAsArray, $result); + } + + /** + * @test + */ + public function overridesEarlierDeclarationWithLaterOne(): void + { + $subject = new DeclarationBlockParser(); + + $result = $subject->parse('color: red; color: green;'); + + self::assertSame(['color' => 'green'], $result); + } +}