Skip to content

Commit

Permalink
[CLEANUP] Add DeclarationBlockParser class
Browse files Browse the repository at this point in the history
This is essentially a refactoring to move the `parseCssDeclarationsBlock` method
that was duplicated in both `CssInliner` and `CssToAttributeConverter` to a
separate class.

As well as eliminating duplicate code/functionality, there are other benefits:
- The method can be tested independently, which is duly done;
- The results cache is now shared;
- The functionality is readily available to other classes, e.g. for the solution
  to #1276 (as currently proposed).

PHPStan errors from the original code have been resolved as follows:
- Use `Preg::split` to wrap `preg_split` to handle unexpected errors;
- Check the return value from `preg_match` precisely with `!== 1` instead of
  `!`.

Longer-term, we should be able to use functionality from PHP-CSS-Parser for more
accurate parsing (e.g. strings containing semicolons), whereupon this class can
become a wrapper for that, but currently that functionality is not available in
a standalone class.
  • Loading branch information
JakeQZ authored and oliverklee committed Sep 17, 2024
1 parent c05b168 commit 2b34d83
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 112 deletions.
9 changes: 2 additions & 7 deletions phpstan-baseline.neon
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ parameters:

-
message: "#^Only booleans are allowed in a negated boolean, int\\|false given\\.$#"
count: 3
count: 2
path: src/CssInliner.php

-
Expand All @@ -40,11 +40,6 @@ parameters:
count: 2
path: src/HtmlProcessor/AbstractHtmlProcessor.php

-
message: "#^Argument of an invalid type array\\<int, string\\>\\|false supplied for foreach, only iterables are supported\\.$#"
count: 1
path: src/HtmlProcessor/CssToAttributeConverter.php

-
message: "#^Method Pelago\\\\Emogrifier\\\\HtmlProcessor\\\\CssToAttributeConverter\\:\\:getAllNodesWithStyleAttribute\\(\\) return type with generic class DOMNodeList does not specify its types\\: TNode$#"
count: 1
Expand All @@ -57,7 +52,7 @@ parameters:

-
message: "#^Only booleans are allowed in a negated boolean, int\\|false given\\.$#"
count: 2
count: 1
path: src/HtmlProcessor/CssToAttributeConverter.php

-
Expand Down
68 changes: 11 additions & 57 deletions src/CssInliner.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use Pelago\Emogrifier\Css\CssDocument;
use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Pelago\Emogrifier\Utilities\CssConcatenator;
use Pelago\Emogrifier\Utilities\DeclarationBlockParser;
use Pelago\Emogrifier\Utilities\Preg;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\CssSelector\Exception\ParseException;
Expand All @@ -24,12 +25,7 @@ class CssInliner extends AbstractHtmlProcessor
/**
* @var int
*/
private const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 1;

/**
* @var int
*/
private const CACHE_KEY_COMBINED_STYLES = 2;
private const CACHE_KEY_COMBINED_STYLES = 1;

/**
* Regular expression component matching a static pseudo class in a selector, without the preceding ":",
Expand Down Expand Up @@ -75,13 +71,11 @@ class CssInliner extends AbstractHtmlProcessor
/**
* @var array{
* 0: array<string, int>,
* 1: array<string, array<string, string>>,
* 2: array<string, string>
* 1: array<string, string>
* }
*/
private $caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];

Expand Down Expand Up @@ -393,7 +387,6 @@ private function clearAllCaches(): void
{
$this->caches = [
self::CACHE_KEY_SELECTOR => [],
self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => [],
self::CACHE_KEY_COMBINED_STYLES => [],
];
}
Expand Down Expand Up @@ -465,55 +458,13 @@ static function (array $propertyNameMatches): string {
// In order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles.
$nodePath = $node->getNodePath();
if (\is_string($nodePath) && !isset($this->styleAttributesForNodes[$nodePath])) {
$this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
$this->styleAttributesForNodes[$nodePath] = (new DeclarationBlockParser())->parse($normalizedOriginalStyle);
$this->visitedNodes[$nodePath] = $node;
}

$node->setAttribute('style', $normalizedOriginalStyle);
}

/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
}

$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var list<string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}

$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
$this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;

return $properties;
}

/**
* Returns CSS content.
*
Expand Down Expand Up @@ -782,15 +733,16 @@ private function getCssSelectorPrecedence(string $selector): int
private function copyInlinableCssToStyleAttribute(\DOMElement $node, array $cssRule): void
{
$declarationsBlock = $cssRule['declarationsBlock'];
$newStyleDeclarations = $this->parseCssDeclarationsBlock($declarationsBlock);
$declarationBlockParser = new DeclarationBlockParser();
$newStyleDeclarations = $declarationBlockParser->parse($declarationsBlock);
if ($newStyleDeclarations === []) {
return;
}

// if it has a style attribute, get it, process it, and append (overwrite) new stuff
if ($node->hasAttribute('style')) {
// break it up into an associative array
$oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$oldStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style'));
} else {
$oldStyleDeclarations = [];
}
Expand Down Expand Up @@ -865,9 +817,10 @@ private function attributeValueIsImportant(string $attributeValue): bool
*/
private function fillStyleAttributesWithMergedStyles(): void
{
$declarationBlockParser = new DeclarationBlockParser();
foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
$node = $this->visitedNodes[$nodePath];
$currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$currentStyleAttributes = $declarationBlockParser->parse($node->getAttribute('style'));
$node->setAttribute(
'style',
$this->generateStyleStringFromDeclarationsArrays(
Expand Down Expand Up @@ -906,7 +859,8 @@ private function removeImportantAnnotationFromAllInlineStyles(): void
*/
private function removeImportantAnnotationFromNodeInlineStyle(\DOMElement $node): void
{
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$style = $node->getAttribute('style');
$inlineStyleDeclarations = (new DeclarationBlockParser())->parse((bool) $style ? $style : '');
/** @var array<string, string> $regularStyleDeclarations */
$regularStyleDeclarations = [];
/** @var array<string, string> $importantStyleDeclarations */
Expand Down
51 changes: 3 additions & 48 deletions src/HtmlProcessor/CssToAttributeConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace Pelago\Emogrifier\HtmlProcessor;

use Pelago\Emogrifier\Utilities\DeclarationBlockParser;
use Pelago\Emogrifier\Utilities\Preg;

/**
Expand Down Expand Up @@ -44,21 +45,17 @@ class CssToAttributeConverter extends AbstractHtmlProcessor
],
];

/**
* @var array<string, array<string, string>>
*/
private static $parsedCssCache = [];

/**
* Maps the CSS from the style nodes to visual HTML attributes.
*
* @return $this
*/
public function convertCssToVisualAttributes(): self
{
$declarationBlockParser = new DeclarationBlockParser();
/** @var \DOMElement $node */
foreach ($this->getAllNodesWithStyleAttribute() as $node) {
$inlineStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
$inlineStyleDeclarations = $declarationBlockParser->parse($node->getAttribute('style'));
$this->mapCssToHtmlAttributes($inlineStyleDeclarations, $node);
}

Expand All @@ -75,48 +72,6 @@ private function getAllNodesWithStyleAttribute(): \DOMNodeList
return $this->getXPath()->query('//*[@style]');
}

/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
private function parseCssDeclarationsBlock(string $cssDeclarationsBlock): array
{
if (isset(self::$parsedCssCache[$cssDeclarationsBlock])) {
return self::$parsedCssCache[$cssDeclarationsBlock];
}

$properties = [];
foreach (\preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock) as $declaration) {
/** @var array<int, string> $matches */
$matches = [];
if (!\preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s', \trim($declaration), $matches)) {
continue;
}

$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
self::$parsedCssCache[$cssDeclarationsBlock] = $properties;

return $properties;
}

/**
* Applies $styles to $node.
*
Expand Down
73 changes: 73 additions & 0 deletions src/Utilities/DeclarationBlockParser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
<?php

declare(strict_types=1);

namespace Pelago\Emogrifier\Utilities;

/**
* Provides a common method for parsing CSS declaration blocks.
* These might be from actual CSS, or from the `style` attribute of an HTML DOM element.
*
* Caches results globally.
*
* @internal
*/
class DeclarationBlockParser
{
/**
* @var array<string, array<string, string>>
*/
private static $cache = [];

/**
* Parses a CSS declaration block into property name/value pairs.
*
* Example:
*
* The declaration block
*
* "color: #000; font-weight: bold;"
*
* will be parsed into the following array:
*
* "color" => "#000"
* "font-weight" => "bold"
*
* @param string $declarationBlock the CSS declarations block without the curly braces, may be empty
*
* @return array<string, string>
* the CSS declarations with the property names as array keys and the property values as array values
*/
public function parse(string $declarationBlock): array
{
if (isset(self::$cache[$declarationBlock])) {
return self::$cache[$declarationBlock];
}

$declarations = (new Preg())->split('/;(?!base64|charset)/', $declarationBlock);

$properties = [];
foreach ($declarations as $declaration) {
$matches = [];
if (
\preg_match(
'/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/s',
// `$declaration` would only be an array if `PREG_SPLIT_OFFSET_CAPTURE` was used with `preg_split`,
// which isn't the case
\trim($declaration),
$matches
)
!== 1
) {
continue;
}

$propertyName = \strtolower($matches[1]);
$propertyValue = $matches[2];
$properties[$propertyName] = $propertyValue;
}
self::$cache[$declarationBlock] = $properties;

return $properties;
}
}
Loading

0 comments on commit 2b34d83

Please sign in to comment.