diff --git a/composer.json b/composer.json index e987a65..ff29d32 100644 --- a/composer.json +++ b/composer.json @@ -18,7 +18,11 @@ "symfony/css-selector": "^2.7 || ^3.0 || ^4.0 || ^5.0" }, "require-dev": { - "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0 || ^7.5" + "phpunit/phpunit": "^4.8.35 || ^5.7 || ^6.0 || ^7.5", + "masterminds/html5": "^2.7" + }, + "suggest": { + "masterminds/html5": "To use a HTML5 parser instead of native DOM parser." }, "autoload": { "psr-4": { diff --git a/src/CssToInlineStyles.php b/src/CssToInlineStyles.php index 3268c5f..d33b72a 100644 --- a/src/CssToInlineStyles.php +++ b/src/CssToInlineStyles.php @@ -2,6 +2,8 @@ namespace TijsVerkoyen\CssToInlineStyles; +use LogicException; +use Masterminds\HTML5; use Symfony\Component\CssSelector\CssSelector; use Symfony\Component\CssSelector\CssSelectorConverter; use Symfony\Component\CssSelector\Exception\ExceptionInterface; @@ -13,11 +15,28 @@ class CssToInlineStyles { private $cssConverter; - public function __construct() + /** @var HTML5|null */ + private $html5Parser; + + /** @var bool */ + private $isHtml5Document = false; + + /** + * @param bool|null $useHtml5Parser Whether to use a HTML5 parser or the native DOM parser + */ + public function __construct($useHtml5Parser = null) { if (class_exists('Symfony\Component\CssSelector\CssSelectorConverter')) { $this->cssConverter = new CssSelectorConverter(); } + + if ($useHtml5Parser) { + if (! class_exists(HTML5::class)) { + throw new LogicException('Using the HTML5 parser requires the html5-php library. Try running "composer require masterminds/html5".'); + } + + $this->html5Parser = new HTML5(['disable_html_ns' => true]); + } } /** @@ -110,16 +129,79 @@ public function getInlineStyles(\DOMElement $element) * @return \DOMDocument */ protected function createDomDocumentFromHtml($html) + { + $this->isHtml5Document = false; + + if ($this->canParseHtml5String($html)) { + return $this->parseHtml5($html); + } + + return $this->parseXhtml($html); + } + + /** + * @param string $html + * @return \DOMDocument + */ + protected function parseHtml5($html) + { + $this->isHtml5Document = true; + + return $this->html5Parser->parse($this->convertToHtmlEntities($html)); + } + + /** + * @param string $html + * @return \DOMDocument + */ + protected function parseXhtml($html) { $document = new \DOMDocument('1.0', 'UTF-8'); $internalErrors = libxml_use_internal_errors(true); - $document->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8')); + $document->loadHTML($this->convertToHtmlEntities($html)); libxml_use_internal_errors($internalErrors); $document->formatOutput = true; return $document; } + /** + * @param string $content + * @return bool + */ + protected function canParseHtml5String($content) + { + if (null === $this->html5Parser) { + return false; + } + + if (false === ($pos = stripos($content, ''))) { + return false; + } + + $header = substr($content, 0, $pos); + + return '' === $header || $this->isValidHtml5Heading($header); + } + + /** + * @param string $heading + * @return bool + */ + protected function isValidHtml5Heading($heading) + { + return 1 === preg_match('/^\x{FEFF}?\s*(\s*)*$/u', $heading); + } + + /** + * @param string $html + * @return array|false|string + */ + protected function convertToHtmlEntities($html) + { + return mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); + } + /** * @param \DOMDocument $document * @@ -127,10 +209,15 @@ protected function createDomDocumentFromHtml($html) */ protected function getHtmlFromDocument(\DOMDocument $document) { + $parser = $document; + if (null !== $this->html5Parser && $this->isHtml5Document) { + $parser = $this->html5Parser; + } + // retrieve the document element // we do it this way to preserve the utf-8 encoding $htmlElement = $document->documentElement; - $html = $document->saveHTML($htmlElement); + $html = $parser->saveHTML($htmlElement); $html = trim($html); // retrieve the doctype diff --git a/tests/HTML5ParserTest.php b/tests/HTML5ParserTest.php new file mode 100644 index 0000000..aa29563 --- /dev/null +++ b/tests/HTML5ParserTest.php @@ -0,0 +1,66 @@ +cssToInlineStyles = new CssToInlineStyles(true); + } + + /** + * @after + */ + protected function clear() + { + $this->cssToInlineStyles = null; + } + + public function testBasicHtml() + { + $html = '

foo

'; + $css = 'p { color: red; }'; + $expected = << +

foo

+EOF; + + $this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css)); + } + + public function testSwitchingParser() + { + // HTML4 + $html = '

foo

'; + $css = 'p { color: red; }'; + $expected = << + + +

foo

+ +EOF; + + $this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css)); + + // HTML5 + $html = '' . $html; + $expected = << +

foo

+EOF; + $this->assertEquals($expected, $this->cssToInlineStyles->convert($html, $css)); + } +}