From 67cd354327c576a50d117fe1e76a4ccb2526982e Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 15:54:59 -0400 Subject: [PATCH 01/35] Apply 8.1 hotfixes from unmerged patch --- src/Twemoji.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Twemoji.php b/src/Twemoji.php index 5233c49..bcc36b7 100644 --- a/src/Twemoji.php +++ b/src/Twemoji.php @@ -25,7 +25,7 @@ public function __construct(array $codepoints) public static function emoji(string $emoji): self { - $chars = preg_split('//u', $emoji, null, PREG_SPLIT_NO_EMPTY); + $chars = preg_split('//u', $emoji, -1, PREG_SPLIT_NO_EMPTY); $codepoints = array_map( fn (string $code): string => dechex(mb_ord($code)), @@ -58,6 +58,7 @@ public function url(): string ); } + #[\ReturnTypeWillChange] public function jsonSerialize() { return $this->url(); From 8c15b01c72ba39158225f4d6d0a07e2e4948e304 Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 17:12:02 -0400 Subject: [PATCH 02/35] Initial HTML replacer code --- composer.json | 13 +++-- src/HtmlReplacer.php | 50 +++++++++++++++++++ tests/Datasets/HtmlContent.php | 45 +++++++++++++++++ tests/Unit/HtmlTest.php | 12 +++++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_1__1.html | 5 ++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_2__1.html | 11 ++++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_3__1.html | 22 ++++++++ 7 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 src/HtmlReplacer.php create mode 100644 tests/Datasets/HtmlContent.php create mode 100644 tests/Unit/HtmlTest.php create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html diff --git a/composer.json b/composer.json index 16ba2e4..3dacfbd 100644 --- a/composer.json +++ b/composer.json @@ -22,13 +22,15 @@ "ext-mbstring": "*" }, "require-dev": { - "pestphp/pest": "^0.3.0", + "pestphp/pest": "^1.21", "s9e/regexp-builder": "^1.4", "spatie/emoji": "^2.3.0", - "spatie/pest-plugin-snapshots": "^1.0" + "spatie/pest-plugin-snapshots": "^1.0", + "wa72/htmlpagedom": "^2.0 || ^3.0" }, "suggest": { - "spatie/emoji": "*" + "spatie/emoji": "*", + "wa72/htmlpagedom": "*" }, "minimum-stability": "dev", "prefer-stable": true, @@ -38,7 +40,10 @@ } }, "config": { - "sort-packages": true + "sort-packages": true, + "allow-plugins": { + "pestphp/pest-plugin": true + } }, "scripts": { "generate": "php ./generate.php", diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php new file mode 100644 index 0000000..5664c75 --- /dev/null +++ b/src/HtmlReplacer.php @@ -0,0 +1,50 @@ +filter('body > *'); + + if ($bodyChildren->count() === 0) { + return $html; + } + + $bodyChildren = $bodyChildren->each(function (HtmlPageCrawler $node) { + // TODO: consider some sort of filtering here to only twemoji encode "Text Nodes". + // It's just a bit harder to do in PHP than JS it seems. + $twemojiContent = (new EmojiText($node->innerText())) + ->base($this->base) + ->type($this->type) + ->toHtml(); + $node->makeEmpty()->setInnerHtml($twemojiContent); + return $node; + }); + + return $parsedHtml->saveHTML(); + } +} diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php new file mode 100644 index 0000000..b502e2f --- /dev/null +++ b/tests/Datasets/HtmlContent.php @@ -0,0 +1,45 @@ + + + + + +HTML, + <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + + +HTML, + <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + +HTML, +]); diff --git a/tests/Unit/HtmlTest.php b/tests/Unit/HtmlTest.php new file mode 100644 index 0000000..738e2ab --- /dev/null +++ b/tests/Unit/HtmlTest.php @@ -0,0 +1,12 @@ +png(); + assertMatchesHtmlSnapshot($htmlReplacer->parse($html)); +})->with('html'); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html new file mode 100644 index 0000000..8703386 --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html new file mode 100644 index 0000000..cc7c3ee --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html @@ -0,0 +1,11 @@ + + + + + + + HTML 5🚀 Boilerplate + + + + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html new file mode 100644 index 0000000..26d4130 --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html @@ -0,0 +1,22 @@ + + + + + + + HTML 5🚀 Boilerplate + + + +

Do a quick kickflip! 🛹 +

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

🦀🦀🦀🦀🦀

+

🦀🦀🦀

+

🦀🦀🦀🦀🦀

+

+🙏🐘 +

+ + From 6a8cb4df2b8537e3bde9a621d958bb9faa9bb426 Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 17:18:34 -0400 Subject: [PATCH 03/35] remove unused property --- src/HtmlReplacer.php | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 5664c75..21a8f62 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -10,11 +10,8 @@ class HtmlReplacer { use Configurable; - public static $textNodes = [ - 'h1', 'h2' - ]; - - public function __construct() { + public function __construct() + { if (!class_exists(HtmlPageCrawler::class)) { throw new RuntimeException( sprintf('Cannot use %s method unless `wa72/htmlpagedom` is installed.', __METHOD__) From b8af7e5b5f2b8f3c644e8b6899c632103a54ed0e Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 17:29:34 -0400 Subject: [PATCH 04/35] generate new emoji bytes --- src/emoji_bytes.regexp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emoji_bytes.regexp b/src/emoji_bytes.regexp index 74314f5..cdc3c75 100644 --- a/src/emoji_bytes.regexp +++ b/src/emoji_bytes.regexp @@ -1 +1 @@ -(?:[#*0-9]\xE2\x83\xA3|\xC2[\xA9\xAE]|\xE2(?:\x80\xBC|\x81\x89|\x84[\xA2\xB9]|\x86[\x94-\x99\xA9\xAA]|\x8C[\x9A\x9B\xA8]|\x8F[\x8F\xA9-\xB3\xB8-\xBA]|\x93\x82|\x96[\xAA\xAB\xB6]|\x97[\x80\xBB-\xBE]|\x98(?:[\x80-\x84\x8E\x91\x94\x95\x98\xA0\xA2\xA3\xA6\xAA\xAE\xAF\xB8-\xBA]|\x9D(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\x99[\x80\x82\x88-\x93\x9F\xA0\xA3\xA5\xA6\xA8\xBB\xBE\xBF]|\x9A[\x92-\x97\x99\x9B\x9C\xA0\xA1\xA7\xAA\xAB\xB0\xB1\xBD\xBE]|\x9B(?:[\x84\x85\x88\x8E\x8F\x91\x93\x94\xA9\xAA\xB0-\xB5\xB7\xB8\xBA\xBD]|\xB9(?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?)|\x9C(?:[\x8A-\x8D](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x82\x85\x88\x89\x8F\x92\x94\x96\x9D\xA1\xA8\xB3\xB4])|\x9D(?:[\x84\x87\x8C\x8E\x93-\x95\x97\xA3]|\xA4(?:\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x94\xA5|\xA9\xB9))?)|\x9E[\x95-\x97\xA1\xB0\xBF]|\xA4[\xB4\xB5]|\xAC[\x85-\x87\x9B\x9C]|\xAD[\x90\x95])|\xE3(?:\x80[\xB0\xBD]|\x8A[\x97\x99])|\xF0\x9F(?:\x80\x84|\x83\x8F|\x85[\xB0\xB1\xBE\xBF]|\x86[\x8E\x91-\x9A]|\x87(?:\xA6\xF0\x9F\x87[\xA8-\xAC\xAE\xB1\xB2\xB4\xB6-\xBA\xBC\xBD\xBF]|\xA7\xF0\x9F\x87[\xA6\xA7\xA9-\xAF\xB1-\xB4\xB6-\xB9\xBB\xBC\xBE\xBF]|\xA8\xF0\x9F\x87[\xA6\xA8\xA9\xAB-\xAE\xB0-\xB5\xB7\xBA-\xBF]|\xA9\xF0\x9F\x87[\xAA\xAC\xAF\xB0\xB2\xB4\xBF]|\xAA\xF0\x9F\x87[\xA6\xA8\xAA\xAC\xAD\xB7-\xBA]|\xAB\xF0\x9F\x87[\xAE-\xB0\xB2\xB4\xB7]|\xAC\xF0\x9F\x87[\xA6\xA7\xA9-\xAE\xB1-\xB3\xB5-\xBA\xBC\xBE]|\xAD\xF0\x9F\x87[\xB0\xB2\xB3\xB7\xB9\xBA]|\xAE\xF0\x9F\x87[\xA8-\xAA\xB1-\xB4\xB6-\xB9]|\xAF\xF0\x9F\x87[\xAA\xB2\xB4\xB5]|\xB0\xF0\x9F\x87[\xAA\xAC-\xAE\xB2\xB3\xB5\xB7\xBC\xBE\xBF]|\xB1\xF0\x9F\x87[\xA6-\xA8\xAE\xB0\xB7-\xBB\xBE]|\xB2\xF0\x9F\x87[\xA6\xA8-\xAD\xB0-\xBF]|\xB3\xF0\x9F\x87[\xA6\xA8\xAA-\xAC\xAE\xB1\xB4\xB5\xB7\xBA\xBF]|\xB4\xF0\x9F\x87\xB2|\xB5\xF0\x9F\x87[\xA6\xAA-\xAD\xB0-\xB3\xB7-\xB9\xBC\xBE]|\xB6\xF0\x9F\x87\xA6|\xB7\xF0\x9F\x87[\xAA\xB4\xB8\xBA\xBC]|\xB8\xF0\x9F\x87[\xA6-\xAA\xAC-\xB4\xB7-\xB9\xBB\xBD-\xBF]|\xB9\xF0\x9F\x87[\xA6\xA8\xA9\xAB-\xAD\xAF-\xB4\xB7\xB9\xBB\xBC\xBF]|\xBA\xF0\x9F\x87[\xA6\xAC\xB2\xB3\xB8\xBE\xBF]|\xBB\xF0\x9F\x87[\xA6\xA8\xAA\xAC\xAE\xB3\xBA]|\xBC\xF0\x9F\x87[\xAB\xB8]|\xBD\xF0\x9F\x87\xB0|\xBE\xF0\x9F\x87[\xAA\xB9]|\xBF\xF0\x9F\x87[\xA6\xB2\xBC])|\x88[\x81\x82\x9A\xAF\xB2-\xBA]|\x89[\x90\x91]|\x8C[\x80-\xA1\xA4-\xBF]|\x8D[\x80-\xBF]|\x8E(?:[\x80-\x84\x86-\x93\x96\x97\x99-\x9B\x9E-\xBF]|\x85(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\x8F(?:[\x82\x87](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x83\x84\x8A](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8B\x8C](?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?|[\x80\x81\x85\x86\x88\x89\x8D-\xB0\xB5\xB7-\xBF]|\xB3(?:\xEF\xB8\x8F\xE2\x80\x8D(?:\xE2\x9A\xA7\xEF\xB8\x8F|\xF0\x9F\x8C\x88))?|\xB4(?:\xE2\x80\x8D\xE2\x98\xA0\xEF\xB8\x8F|\xF3\xA0\x81\xA7\xF3\xA0\x81\xA2\xF3\xA0\x81(?:\xA5\xF3\xA0\x81\xAE\xF3\xA0\x81\xA7|\xB3\xF3\xA0\x81\xA3\xF3\xA0\x81\xB4|\xB7\xF3\xA0\x81\xAC\xF3\xA0\x81\xB3)\xF3\xA0\x81\xBF)?)|\x90(?:[\x80-\x87\x89-\x94\x96-\xBA\xBC-\xBF]|\x88(?:\xE2\x80\x8D\xE2\xAC\x9B)?|\x95(?:\xE2\x80\x8D\xF0\x9F\xA6\xBA)?|\xBB(?:\xE2\x80\x8D\xE2\x9D\x84\xEF\xB8\x8F)?)|\x91(?:[\x82\x83\x86-\x90\xA6\xA7\xAB-\xAD\xB2\xB4-\xB6\xB8\xBC](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xAE\xB0\xB1\xB3\xB7](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80\x84\x85\x91-\xA5\xAA\xB9-\xBB\xBD-\xBF]|\x81(?:\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F\x97\xA8\xEF\xB8\x8F)?|\xA8(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8)|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x91(?:[\xA8\xA9]\xE2\x80\x8D\xF0\x9F\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?)|\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?)|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBC-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB\xBD-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB\xBC\xBE\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB-\xBD\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB-\xBE]|\xA6[\xAF-\xB3\xBC\xBD])))?))?|\xA9(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?|\xA9\xE2\x80\x8D\xF0\x9F\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?))|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBC-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB\xBD-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB\xBC\xBE\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBD\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBE]|\xA6[\xAF-\xB3\xBC\xBD])))?))?|\xAF(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)|\x92(?:[\x81\x82\x86\x87](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x83\x85\x8F\x91\xAA](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x80\x84\x88-\x8E\x90\x92-\xA9\xAB-\xBF])|\x93[\x80-\xBD\xBF]|\x94[\x80-\xBD]|\x95(?:[\xB4\xBA](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x89-\x8E\x90-\xA7\xAF\xB0\xB3\xB6-\xB9]|\xB5(?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?)|\x96(?:[\x90\x95\x96](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x87\x8A-\x8D\xA4\xA5\xA8\xB1\xB2\xBC])|\x97[\x82-\x84\x91-\x93\x9C-\x9E\xA1\xA3\xA8\xAF\xB3\xBA-\xBF]|\x98(?:[\x80-\xAD\xAF-\xB4\xB7-\xBF]|\xAE(?:\xE2\x80\x8D\xF0\x9F\x92\xA8)?|\xB5(?:\xE2\x80\x8D\xF0\x9F\x92\xAB)?|\xB6(?:\xE2\x80\x8D\xF0\x9F\x8C\xAB\xEF\xB8\x8F)?)|\x99(?:[\x85-\x87\x8B\x8D\x8E](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8C\x8F](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x80-\x84\x88-\x8A])|\x9A(?:[\xA3\xB4-\xB6](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\xA2\xA4-\xB3\xB7-\xBF])|\x9B(?:[\x80\x8C](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x81-\x85\x8B\x8D-\x92\x95-\x97\xA0-\xA5\xA9\xAB\xAC\xB0\xB3-\xBC])|\x9F[\xA0-\xAB]|\xA4(?:[\x8C\x8F\x98-\x9C\x9E\x9F\xB0-\xB4\xB6](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xA6\xB5\xB7-\xB9\xBD\xBE](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8D\x8E\x90-\x97\x9D\xA0-\xA5\xA7-\xAF\xBA\xBF]|\xBC(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)|\xA5(?:[\x80-\x85\x87-\xB6\xB8\xBA-\xBF]|\xB7(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\xA6(?:[\xB5\xB6\xBB](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xB8\xB9](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\xB4\xB7\xBA\xBC-\xBF])|\xA7(?:[\x8D-\x8F\x94\x96-\x9D](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x92\x93\x95](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x9E\x9F](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\x8B\x90\xA0-\xBF]|\x91(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]|\x9C\x88)\xEF\xB8\x8F|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBC-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB\xBD-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB\xBC\xBE\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB-\xBD\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB-\xBE])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?))?)|\xA9[\xB0-\xB4\xB8-\xBA]|\xAA[\x80-\x86\x90-\xA8\xB0-\xB6]|\xAB[\x80-\x82\x90-\x96])) \ No newline at end of file +(?:[#*0-9]\xE2\x83\xA3|\xC2[\xA9\xAE]|\xE2(?:\x80\xBC|\x81\x89|\x84[\xA2\xB9]|\x86[\x94-\x99\xA9\xAA]|\x8C[\x9A\x9B\xA8]|\x8F[\x8F\xA9-\xB3\xB8-\xBA]|\x93\x82|\x96[\xAA\xAB\xB6]|\x97[\x80\xBB-\xBE]|\x98(?:[\x80-\x84\x8E\x91\x94\x95\x98\xA0\xA2\xA3\xA6\xAA\xAE\xAF\xB8-\xBA]|\x9D(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\x99[\x80\x82\x88-\x93\x9F\xA0\xA3\xA5\xA6\xA8\xBB\xBE\xBF]|\x9A[\x92-\x97\x99\x9B\x9C\xA0\xA1\xA7\xAA\xAB\xB0\xB1\xBD\xBE]|\x9B(?:[\x84\x85\x88\x8E\x8F\x91\x93\x94\xA9\xAA\xB0-\xB5\xB7\xB8\xBA\xBD]|\xB9(?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?)|\x9C(?:[\x8A-\x8D](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x82\x85\x88\x89\x8F\x92\x94\x96\x9D\xA1\xA8\xB3\xB4])|\x9D(?:[\x84\x87\x8C\x8E\x93-\x95\x97\xA3]|\xA4(?:\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x94\xA5|\xA9\xB9))?)|\x9E[\x95-\x97\xA1\xB0\xBF]|\xA4[\xB4\xB5]|\xAC[\x85-\x87\x9B\x9C]|\xAD[\x90\x95])|\xE3(?:\x80[\xB0\xBD]|\x8A[\x97\x99])|\xF0\x9F(?:\x80\x84|\x83\x8F|\x85[\xB0\xB1\xBE\xBF]|\x86[\x8E\x91-\x9A]|\x87(?:\xA6\xF0\x9F\x87[\xA8-\xAC\xAE\xB1\xB2\xB4\xB6-\xBA\xBC\xBD\xBF]|\xA7\xF0\x9F\x87[\xA6\xA7\xA9-\xAF\xB1-\xB4\xB6-\xB9\xBB\xBC\xBE\xBF]|\xA8\xF0\x9F\x87[\xA6\xA8\xA9\xAB-\xAE\xB0-\xB5\xB7\xBA-\xBF]|\xA9\xF0\x9F\x87[\xAA\xAC\xAF\xB0\xB2\xB4\xBF]|\xAA\xF0\x9F\x87[\xA6\xA8\xAA\xAC\xAD\xB7-\xBA]|\xAB\xF0\x9F\x87[\xAE-\xB0\xB2\xB4\xB7]|\xAC\xF0\x9F\x87[\xA6\xA7\xA9-\xAE\xB1-\xB3\xB5-\xBA\xBC\xBE]|\xAD\xF0\x9F\x87[\xB0\xB2\xB3\xB7\xB9\xBA]|\xAE\xF0\x9F\x87[\xA8-\xAA\xB1-\xB4\xB6-\xB9]|\xAF\xF0\x9F\x87[\xAA\xB2\xB4\xB5]|\xB0\xF0\x9F\x87[\xAA\xAC-\xAE\xB2\xB3\xB5\xB7\xBC\xBE\xBF]|\xB1\xF0\x9F\x87[\xA6-\xA8\xAE\xB0\xB7-\xBB\xBE]|\xB2\xF0\x9F\x87[\xA6\xA8-\xAD\xB0-\xBF]|\xB3\xF0\x9F\x87[\xA6\xA8\xAA-\xAC\xAE\xB1\xB4\xB5\xB7\xBA\xBF]|\xB4\xF0\x9F\x87\xB2|\xB5\xF0\x9F\x87[\xA6\xAA-\xAD\xB0-\xB3\xB7-\xB9\xBC\xBE]|\xB6\xF0\x9F\x87\xA6|\xB7\xF0\x9F\x87[\xAA\xB4\xB8\xBA\xBC]|\xB8\xF0\x9F\x87[\xA6-\xAA\xAC-\xB4\xB7-\xB9\xBB\xBD-\xBF]|\xB9\xF0\x9F\x87[\xA6\xA8\xA9\xAB-\xAD\xAF-\xB4\xB7\xB9\xBB\xBC\xBF]|\xBA\xF0\x9F\x87[\xA6\xAC\xB2\xB3\xB8\xBE\xBF]|\xBB\xF0\x9F\x87[\xA6\xA8\xAA\xAC\xAE\xB3\xBA]|\xBC\xF0\x9F\x87[\xAB\xB8]|\xBD\xF0\x9F\x87\xB0|\xBE\xF0\x9F\x87[\xAA\xB9]|\xBF\xF0\x9F\x87[\xA6\xB2\xBC])|\x88[\x81\x82\x9A\xAF\xB2-\xBA]|\x89[\x90\x91]|\x8C[\x80-\xA1\xA4-\xBF]|\x8D[\x80-\xBF]|\x8E(?:[\x80-\x84\x86-\x93\x96\x97\x99-\x9B\x9E-\xBF]|\x85(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\x8F(?:[\x82\x87](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x83\x84\x8A](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8B\x8C](?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?|[\x80\x81\x85\x86\x88\x89\x8D-\xB0\xB5\xB7-\xBF]|\xB3(?:\xEF\xB8\x8F\xE2\x80\x8D(?:\xE2\x9A\xA7\xEF\xB8\x8F|\xF0\x9F\x8C\x88))?|\xB4(?:\xE2\x80\x8D\xE2\x98\xA0\xEF\xB8\x8F|\xF3\xA0\x81\xA7\xF3\xA0\x81\xA2\xF3\xA0\x81(?:\xA5\xF3\xA0\x81\xAE\xF3\xA0\x81\xA7|\xB3\xF3\xA0\x81\xA3\xF3\xA0\x81\xB4|\xB7\xF3\xA0\x81\xAC\xF3\xA0\x81\xB3)\xF3\xA0\x81\xBF)?)|\x90(?:[\x88\xA6](?:\xE2\x80\x8D\xE2\xAC\x9B)?|[\x80-\x87\x89-\x94\x96-\xA5\xA7-\xBA\xBC-\xBF]|\x95(?:\xE2\x80\x8D\xF0\x9F\xA6\xBA)?|\xBB(?:\xE2\x80\x8D\xE2\x9D\x84\xEF\xB8\x8F)?)|\x91(?:[\x82\x83\x86-\x90\xA6\xA7\xAB-\xAD\xB2\xB4-\xB6\xB8\xBC](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xAE\xB0\xB1\xB3\xB7](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80\x84\x85\x91-\xA5\xAA\xB9-\xBB\xBD-\xBF]|\x81(?:\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F\x97\xA8\xEF\xB8\x8F)?|\xA8(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8)|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x91(?:[\xA8\xA9]\xE2\x80\x8D\xF0\x9F\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?)|\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?)|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBC-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB\xBD-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB\xBC\xBE\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB-\xBD\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91\xA8\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91\xA8\xF0\x9F\x8F[\xBB-\xBE]|\xA6[\xAF-\xB3\xBC\xBD])))?))?|\xA9(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?|\xA9\xE2\x80\x8D\xF0\x9F\x91(?:\xA6(?:\xE2\x80\x8D\xF0\x9F\x91\xA6)?|\xA7(?:\xE2\x80\x8D\xF0\x9F\x91[\xA6\xA7])?))|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBC-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB\xBD-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB\xBC\xBE\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBD\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\x91[\xA8\xA9]\xF0\x9F\x8F[\xBB-\xBE]|\xA6[\xAF-\xB3\xBC\xBD])))?))?|\xAF(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)|\x92(?:[\x81\x82\x86\x87](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x83\x85\x8F\x91\xAA](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x80\x84\x88-\x8E\x90\x92-\xA9\xAB-\xBF])|\x93[\x80-\xBD\xBF]|\x94[\x80-\xBD]|\x95(?:[\xB4\xBA](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x89-\x8E\x90-\xA7\xAF\xB0\xB3\xB6-\xB9]|\xB5(?:\xEF\xB8\x8F\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F|\xF0\x9F\x8F[\xBB-\xBF](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)?)|\x96(?:[\x90\x95\x96](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x87\x8A-\x8D\xA4\xA5\xA8\xB1\xB2\xBC])|\x97[\x82-\x84\x91-\x93\x9C-\x9E\xA1\xA3\xA8\xAF\xB3\xBA-\xBF]|\x98(?:[\x80-\xAD\xAF-\xB4\xB7-\xBF]|\xAE(?:\xE2\x80\x8D\xF0\x9F\x92\xA8)?|\xB5(?:\xE2\x80\x8D\xF0\x9F\x92\xAB)?|\xB6(?:\xE2\x80\x8D\xF0\x9F\x8C\xAB\xEF\xB8\x8F)?)|\x99(?:[\x85-\x87\x8B\x8D\x8E](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8C\x8F](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x80-\x84\x88-\x8A])|\x9A(?:[\xA3\xB4-\xB6](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\xA2\xA4-\xB3\xB7-\xBF])|\x9B(?:[\x80\x8C](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x81-\x85\x8B\x8D-\x92\x95-\x97\x9C-\xA5\xA9\xAB\xAC\xB0\xB3-\xBC])|\x9F[\xA0-\xAB\xB0]|\xA4(?:[\x8C\x8F\x98-\x9F\xB0-\xB4\xB6](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xA6\xB5\xB7-\xB9\xBD\xBE](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x8D\x8E\x90-\x97\xA0-\xA5\xA7-\xAF\xBA\xBF]|\xBC(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?)|\xA5(?:[\x80-\x85\x87-\xB6\xB8-\xBF]|\xB7(?:\xF0\x9F\x8F[\xBB-\xBF])?)|\xA6(?:[\xB5\xB6\xBB](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\xB8\xB9](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\xB4\xB7\xBA\xBC-\xBF])|\xA7(?:[\x8D-\x8F\x94\x96-\x9D](?:\xF0\x9F\x8F[\xBB-\xBF])?(?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x92\x93\x95](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x9E\x9F](?:\xE2\x80\x8D\xE2\x99[\x80\x82]\xEF\xB8\x8F)?|[\x80-\x8C\x90\xA0-\xBF]|\x91(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]|\x9C\x88)\xEF\xB8\x8F|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91|\xA6[\xAF-\xB3\xBC\xBD]))|\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBC-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBC(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB\xBD-\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBD(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB\xBC\xBE\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBE(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB-\xBD\xBF])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?|\xBF(?:\xE2\x80\x8D(?:\xE2(?:\x9A[\x95\x96]\xEF\xB8\x8F|\x9C\x88\xEF\xB8\x8F|\x9D\xA4\xEF\xB8\x8F\xE2\x80\x8D\xF0\x9F(?:\x92\x8B\xE2\x80\x8D\xF0\x9F)?\xA7\x91\xF0\x9F\x8F[\xBB-\xBE])|\xF0\x9F(?:\x8C\xBE|\x8D[\xB3\xBC]|\x8E[\x84\x93\xA4\xA8]|\x8F[\xAB\xAD]|\x92[\xBB\xBC]|\x94[\xA7\xAC]|\x9A[\x80\x92]|\xA4\x9D\xE2\x80\x8D\xF0\x9F\xA7\x91\xF0\x9F\x8F[\xBB-\xBF]|\xA6[\xAF-\xB3\xBC\xBD])))?))?)|\xA9[\xB0-\xBC]|\xAA[\x80-\x88\x90-\xBD\xBF]|\xAB(?:[\x83-\x85\xB0\xB2-\xB8](?:\xF0\x9F\x8F[\xBB-\xBF])?|[\x80-\x82\x8E-\x9B\xA0-\xA8]|\xB1(?:\xF0\x9F\x8F(?:\xBB(?:\xE2\x80\x8D\xF0\x9F\xAB\xB2\xF0\x9F\x8F[\xBC-\xBF])?|\xBC(?:\xE2\x80\x8D\xF0\x9F\xAB\xB2\xF0\x9F\x8F[\xBB\xBD-\xBF])?|\xBD(?:\xE2\x80\x8D\xF0\x9F\xAB\xB2\xF0\x9F\x8F[\xBB\xBC\xBE\xBF])?|\xBE(?:\xE2\x80\x8D\xF0\x9F\xAB\xB2\xF0\x9F\x8F[\xBB-\xBD\xBF])?|\xBF(?:\xE2\x80\x8D\xF0\x9F\xAB\xB2\xF0\x9F\x8F[\xBB-\xBE])?))?))) \ No newline at end of file From b8bedbfc42ae5f835a9ac0643ef49b6e14080f10 Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 17:30:42 -0400 Subject: [PATCH 05/35] clean up code --- src/HtmlReplacer.php | 3 ++- tests/Unit/HtmlTest.php | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 21a8f62..0d90cac 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -12,7 +12,7 @@ class HtmlReplacer public function __construct() { - if (!class_exists(HtmlPageCrawler::class)) { + if (! class_exists(HtmlPageCrawler::class)) { throw new RuntimeException( sprintf('Cannot use %s method unless `wa72/htmlpagedom` is installed.', __METHOD__) ); @@ -39,6 +39,7 @@ public function parse(string $html): string ->type($this->type) ->toHtml(); $node->makeEmpty()->setInnerHtml($twemojiContent); + return $node; }); diff --git a/tests/Unit/HtmlTest.php b/tests/Unit/HtmlTest.php index 738e2ab..5cd7f7e 100644 --- a/tests/Unit/HtmlTest.php +++ b/tests/Unit/HtmlTest.php @@ -1,10 +1,7 @@ png(); From bb842849887c7297fe7d9506d8d9cee8fe38be73 Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 18:01:46 -0400 Subject: [PATCH 06/35] Add test to cover image alt/title attributes --- src/HtmlReplacer.php | 13 +++++++++---- tests/Datasets/HtmlContent.php | 18 ++++++++++++++++++ ...with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html | 17 +++++++++++++++++ 3 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 0d90cac..56f62dc 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -31,10 +31,15 @@ public function parse(string $html): string return $html; } - $bodyChildren = $bodyChildren->each(function (HtmlPageCrawler $node) { - // TODO: consider some sort of filtering here to only twemoji encode "Text Nodes". - // It's just a bit harder to do in PHP than JS it seems. - $twemojiContent = (new EmojiText($node->innerText())) + $bodyChildren->each(function (HtmlPageCrawler $node) { + // Bail early if attempt to get inner text fails... + try { + $nodeInnterText = $node->innerText(); + } catch (\Throwable $throwable) { + return $node; + } + + $twemojiContent = (new EmojiText($nodeInnterText)) ->base($this->base) ->type($this->type) ->toHtml(); diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index b502e2f..93eba0d 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -41,5 +41,23 @@

๐Ÿ™๐Ÿ˜

+HTML, + <<<'HTML' + + + + + Test with Emoji in ALT text + + +

Hello Friends ๐Ÿ‘‹

+ A random image of Bill Murray ๐Ÿป +

Time for a ElePHPant RAVE!

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

+ + HTML, ]); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html new file mode 100644 index 0000000..f83089b --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html @@ -0,0 +1,17 @@ + + + + + Test with Emoji in ALT text + + +

Hello Friends 👋 +

+ A random image of Bill Murray 🍻 +

Time for a ElePHPant RAVE!

+

🐘🐘🐘🐘

+

🐘🐘🐘

+

🐘🐘🐘🐘🐘

+

🐘🐘

+ + From eb121c003d47108e76efb882d4be9b4297b010c2 Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 19:36:36 -0400 Subject: [PATCH 07/35] refactor to use XPath to solve filtering text nodes problem --- src/HtmlReplacer.php | 18 ++++++++------ tests/Datasets/HtmlContent.php | 24 +++++++++++++++++++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_3__1.html | 2 +- ...ith_(DOCTYPE_htmlnhtml_langhtml)_4__1.html | 2 +- ...ith_(DOCTYPE_htmlnhtml_langhtml)_5__1.html | 23 ++++++++++++++++++ 5 files changed, 60 insertions(+), 9 deletions(-) create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 56f62dc..cc20c96 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -4,12 +4,15 @@ use Astrotomic\Twemoji\Concerns\Configurable; use RuntimeException; +use Wa72\HtmlPageDom\HtmlPage; use Wa72\HtmlPageDom\HtmlPageCrawler; class HtmlReplacer { use Configurable; + public static string $shouldNotBeParsed = "/^(?:iframe|noframes|noscript|script|select|style|textarea)$/"; + public function __construct() { if (! class_exists(HtmlPageCrawler::class)) { @@ -22,16 +25,17 @@ public function __construct() public function parse(string $html): string { // Parse the html - $parsedHtml = HtmlPageCrawler::create($html); - // Fetch the body node children if any - $bodyChildren = $parsedHtml - ->filter('body > *'); + $parsedHtml = new HtmlPage($html); + $body = $parsedHtml->getBody(); - if ($bodyChildren->count() === 0) { + if ($body->children()->count() === 0) { return $html; } - $bodyChildren->each(function (HtmlPageCrawler $node) { + // Use xpath to filter only the "TextNodes" within each "Element" + $textNodes = $body->filterXPath('.//*[normalize-space(text())]'); + + $textNodes->each(function (HtmlPageCrawler $node) { // Bail early if attempt to get inner text fails... try { $nodeInnterText = $node->innerText(); @@ -48,6 +52,6 @@ public function parse(string $html): string return $node; }); - return $parsedHtml->saveHTML(); + return $parsedHtml->save(); } } diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index 93eba0d..d0faa13 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -59,5 +59,29 @@

๐Ÿ˜๐Ÿ˜

+HTML, + <<<'HTML' + + + + + Test with Emoji in ALT text + + +
+

Hello Friends ๐Ÿ‘‹

+ A random image of Bill Murray ๐Ÿป +
+
+
+

Time for a ElePHPant RAVE!

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

+
+
+ + HTML, ]); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html index 26d4130..89a1f6f 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html @@ -1,6 +1,6 @@ - + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html index f83089b..a3ea27c 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html @@ -1,6 +1,6 @@ - + Test with Emoji in ALT text diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html new file mode 100644 index 0000000..ac934a9 --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html @@ -0,0 +1,23 @@ + + + + + Test with Emoji in ALT text + + +
+

Hello Friends 👋 +

+ A random image of Bill Murray 🍻 +
+
+
+

Time for a ElePHPant RAVE!

+

🐘🐘🐘🐘

+

🐘🐘🐘

+

🐘🐘🐘🐘🐘

+

🐘🐘

+
+
+ + From 92d9136c5fa01f5ae8f8517ba212d8d30338908d Mon Sep 17 00:00:00 2001 From: Dan Date: Sun, 2 Oct 2022 19:45:11 -0400 Subject: [PATCH 08/35] Remove try-guy now that it's unused --- src/HtmlReplacer.php | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index cc20c96..e93fdb8 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -36,14 +36,7 @@ public function parse(string $html): string $textNodes = $body->filterXPath('.//*[normalize-space(text())]'); $textNodes->each(function (HtmlPageCrawler $node) { - // Bail early if attempt to get inner text fails... - try { - $nodeInnterText = $node->innerText(); - } catch (\Throwable $throwable) { - return $node; - } - - $twemojiContent = (new EmojiText($nodeInnterText)) + $twemojiContent = (new EmojiText($node->innerText())) ->base($this->base) ->type($this->type) ->toHtml(); From 4538750bc1bd094dbb69f5bc79cc0e1c79889796 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 08:58:30 -0400 Subject: [PATCH 09/35] refactor to ensure we allow HTML fragments too --- .gitignore | 1 + src/HtmlReplacer.php | 27 ++++++++++++------- tests/Datasets/HtmlContent.php | 27 +++++++++++++++++++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_3__1.html | 2 +- ...ith_(DOCTYPE_htmlnhtml_langhtml)_4__1.html | 2 +- ...ith_(DOCTYPE_htmlnhtml_langhtml)_5__1.html | 2 +- ...th_(section_classcomment-boxction)__1.html | 25 +++++++++++++++++ 7 files changed, 74 insertions(+), 12 deletions(-) create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html diff --git a/.gitignore b/.gitignore index c8153b5..8d978e8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /composer.lock /vendor/ +.phpunit.result.cache diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index e93fdb8..4c536ec 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -11,8 +11,6 @@ class HtmlReplacer { use Configurable; - public static string $shouldNotBeParsed = "/^(?:iframe|noframes|noscript|script|select|style|textarea)$/"; - public function __construct() { if (! class_exists(HtmlPageCrawler::class)) { @@ -24,16 +22,18 @@ public function __construct() public function parse(string $html): string { - // Parse the html - $parsedHtml = new HtmlPage($html); - $body = $parsedHtml->getBody(); + // Parse the HTML page or fragment... + $parsedHtmlRoot = new HtmlPageCrawler($html); + // Filter parsed HTML "root" into the twemoji relevant parts... + $parsedHtml = $this->checkHtmlIsDocumentAndSelectBody($parsedHtmlRoot); - if ($body->children()->count() === 0) { + // If the filtered DOM fragment doesn't have any children, return the input HTML. + if ($parsedHtml->children()->count() === 0) { return $html; } - // Use xpath to filter only the "TextNodes" within each "Element" - $textNodes = $body->filterXPath('.//*[normalize-space(text())]'); + // Use xpath to filter only the "TextNodes" within every "Element" + $textNodes = $parsedHtml->filterXPath('.//*[normalize-space(text())]'); $textNodes->each(function (HtmlPageCrawler $node) { $twemojiContent = (new EmojiText($node->innerText())) @@ -45,6 +45,15 @@ public function parse(string $html): string return $node; }); - return $parsedHtml->save(); + return $parsedHtmlRoot->saveHTML(); + } + + private function checkHtmlIsDocumentAndSelectBody(HtmlPageCrawler $htmlRoot): HtmlPageCrawler + { + if ($htmlRoot->isHtmlDocument()) { + return $htmlRoot->filter('body'); + } + + return $htmlRoot; } } diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index d0faa13..4d66645 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -84,4 +84,31 @@ HTML, +<<<'HTML' +
+
+

Time for a ElePHPant RAVE!

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

+
+
+
+
+

Time for a cRUSTation RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€

+
+
+
+
+

but what if the crabs and elephants rave together?!

+
+
+
+
+HTML, ]); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html index 89a1f6f..26d4130 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html @@ -1,6 +1,6 @@ - + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html index a3ea27c..f83089b 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html @@ -1,6 +1,6 @@ - + Test with Emoji in ALT text diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html index ac934a9..579e2ee 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html @@ -1,6 +1,6 @@ - + Test with Emoji in ALT text diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html new file mode 100644 index 0000000..7da92ce --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html @@ -0,0 +1,25 @@ +
+
+

Time for a ElePHPant RAVE!

+

🐘🐘🐘🐘

+

🐘🐘🐘

+

🐘🐘🐘🐘🐘

+

🐘🐘

+
+
+
+
+

Time for a cRUSTation RAVE!

+

🦀🦀🦀🦀

+

🦀🦀

+

🦀🦀🦀🦀

+

🦀

+
+
+
+
+

but what if the crabs and elephants rave together?!

+
+
+
+
From cd6e1906caf1abb76718d849d83137de17801390 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 09:11:35 -0400 Subject: [PATCH 10/35] refactor tests to split up HTML pages and HTML fragments --- tests/Datasets/HtmlContent.php | 49 ++++++++++++++++--- tests/Unit/HtmlTest.php | 10 +++- ...th_(DOCTYPE_htmlnhtml_langhtml)_1__1.html} | 0 ...th_(DOCTYPE_htmlnhtml_langhtml)_2__1.html} | 0 ...th_(DOCTYPE_htmlnhtml_langhtml)_3__1.html} | 0 ...th_(DOCTYPE_htmlnhtml_langhtml)_4__1.html} | 0 ...th_(DOCTYPE_htmlnhtml_langhtml)_5__1.html} | 0 ...ith_(DOCTYPE_htmlnhtml_langhtml)_6__1.html | 23 +++++++++ ..._with_(articlen____pLorem__ipticle)__1.txt | 6 +++ ...th_(section_classcomment-boxction)__1.txt} | 4 +- 10 files changed, 82 insertions(+), 10 deletions(-) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html => HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html => HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html => HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html => HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html => HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html} (100%) create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html => HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt} (98%) diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index 4d66645..378100b 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -1,14 +1,14 @@ HTML, - <<<'HTML' +<<<'HTML' @@ -21,7 +21,7 @@ HTML, - <<<'HTML' +<<<'HTML' @@ -42,7 +42,7 @@ HTML, - <<<'HTML' +<<<'HTML' @@ -60,7 +60,7 @@ HTML, - <<<'HTML' +<<<'HTML' @@ -85,6 +85,35 @@ HTML, <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + + +HTML, +]); + + +dataset('html-fragments', [ +<<<'HTML'

Time for a ElePHPant RAVE!

@@ -111,4 +140,12 @@
HTML, +<<<'HTML' +
+

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

+

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

+

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

+

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

+
+HTML, ]); diff --git a/tests/Unit/HtmlTest.php b/tests/Unit/HtmlTest.php index 5cd7f7e..421b495 100644 --- a/tests/Unit/HtmlTest.php +++ b/tests/Unit/HtmlTest.php @@ -2,8 +2,14 @@ use Astrotomic\Twemoji\HtmlReplacer; use function Spatie\Snapshots\assertMatchesHtmlSnapshot; +use function Spatie\Snapshots\assertMatchesTextSnapshot; -it('can parse HTML content', function (string $html) { +it('can parse HTML Pages', function (string $html) { $htmlReplacer = (new HtmlReplacer())->png(); assertMatchesHtmlSnapshot($htmlReplacer->parse($html)); -})->with('html'); +})->with('html-pages'); + +it('can parse HTML fragments content', function (string $html) { + $htmlReplacer = (new HtmlReplacer())->png(); + assertMatchesTextSnapshot($htmlReplacer->parse($html)); +})->with('html-fragments'); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html new file mode 100644 index 0000000..db74d3f --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html @@ -0,0 +1,23 @@ + + + + + + + HTML 5🚀 Boilerplate + + + +

Do a quick kickflip! 🛹 +

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

🦀🦀🦀🦀🦀

+

🦀🦀🦀

+

🦀🦀🦀🦀🦀

+

+🙏🐘 +

+ + + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt new file mode 100644 index 0000000..f3a466f --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt @@ -0,0 +1,6 @@ +
+

Lorem 😂😂 ipsum 🕵️‍♂️dolor sit✍️ amet, consectetur adipiscing😇😇🤙 elit, sed do eiusmod🥰 tempor 😤😤🏳️‍🌈incididunt ut 👏labore 👏et👏 dolore 👏magna👏 aliqua.

+

Ut enim ad minim 🐵✊🏿veniam,❤😤😫😩💦💦 quis nostrud 👿🤮exercitation ullamco 🧠👮🏿‍♀️🅱️laboris nisi ut aliquip❗️ ex ea commodo consequat.

+

💯Duis aute💦😂😂😂 irure dolor 👳🏻‍♂️🗿in reprehenderit 🤖👻👎in voluptate velit esse cillum dolore 🙏🙏eu fugiat🤔 nulla pariatur.

+

🙅‍♀️🙅‍♀️Excepteur sint occaecat🤷‍♀️🤦‍♀️ cupidatat💅 non💃 proident,👨‍👧 sunt🤗 in culpa😥😰😨 qui officia🤩🤩 deserunt mollit 🧐anim id est laborum.🤔🤔

+
\ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt similarity index 98% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html rename to tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt index 7da92ce..5bd6205 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_content_with_(section_classcomment-boxction)__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt @@ -1,4 +1,4 @@ -
+

Time for a ElePHPant RAVE!

🐘🐘🐘🐘

@@ -22,4 +22,4 @@

Time for a cRUSTation RAVE!

- + \ No newline at end of file From 60c987fcdca5c9de1e779fe82a4b5a6e3057d64b Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 09:16:13 -0400 Subject: [PATCH 11/35] Use internal tag as means of warning? --- src/HtmlReplacer.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 4c536ec..e2025d7 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -4,9 +4,11 @@ use Astrotomic\Twemoji\Concerns\Configurable; use RuntimeException; -use Wa72\HtmlPageDom\HtmlPage; use Wa72\HtmlPageDom\HtmlPageCrawler; +/** + * @internal This class is marked as Internal as it is considered Experimental. Code subject to change until warning removed. + */ class HtmlReplacer { use Configurable; From f7616c057f05789384257f459044d3a89bb1cb60 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 09:17:54 -0400 Subject: [PATCH 12/35] Refactor method name to slightly better option --- src/HtmlReplacer.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index e2025d7..698c4b0 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -27,7 +27,7 @@ public function parse(string $html): string // Parse the HTML page or fragment... $parsedHtmlRoot = new HtmlPageCrawler($html); // Filter parsed HTML "root" into the twemoji relevant parts... - $parsedHtml = $this->checkHtmlIsDocumentAndSelectBody($parsedHtmlRoot); + $parsedHtml = $this->whenHtmlDocFilterBody($parsedHtmlRoot); // If the filtered DOM fragment doesn't have any children, return the input HTML. if ($parsedHtml->children()->count() === 0) { @@ -50,7 +50,7 @@ public function parse(string $html): string return $parsedHtmlRoot->saveHTML(); } - private function checkHtmlIsDocumentAndSelectBody(HtmlPageCrawler $htmlRoot): HtmlPageCrawler + private function whenHtmlDocFilterBody(HtmlPageCrawler $htmlRoot): HtmlPageCrawler { if ($htmlRoot->isHtmlDocument()) { return $htmlRoot->filter('body'); From e818b5c85eebe47ff86945ba8869e94b0c804734 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 09:18:42 -0400 Subject: [PATCH 13/35] fix code styles --- tests/Datasets/HtmlContent.php | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index 378100b..d41a2d5 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -1,14 +1,14 @@ HTML, -<<<'HTML' + <<<'HTML' @@ -21,7 +21,7 @@ HTML, -<<<'HTML' + <<<'HTML' @@ -42,7 +42,7 @@ HTML, -<<<'HTML' + <<<'HTML' @@ -60,7 +60,7 @@ HTML, -<<<'HTML' + <<<'HTML' @@ -84,7 +84,7 @@ HTML, -<<<'HTML' + <<<'HTML' @@ -113,7 +113,7 @@ dataset('html-fragments', [ -<<<'HTML' + <<<'HTML'

Time for a ElePHPant RAVE!

@@ -140,7 +140,7 @@
HTML, -<<<'HTML' + <<<'HTML'

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

From b1f83c7c84d2f09086ad48a120b4e16662b66e10 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 09:20:01 -0400 Subject: [PATCH 14/35] make styleCI happy --- tests/Datasets/HtmlContent.php | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index d41a2d5..1d0cb86 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -111,7 +111,6 @@ HTML, ]); - dataset('html-fragments', [ <<<'HTML'
From 29f7d0a66ead3431739ed9da38f51e01aeb1d0b1 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 11:18:42 -0400 Subject: [PATCH 15/35] Refactor to fix missed fragments and expand tests --- src/HtmlReplacer.php | 9 +++--- tests/Datasets/HtmlContent.php | 19 +++++++++++++ ...ith_(DOCTYPE_htmlnhtml_langhtml)_2__1.html | 11 ++------ ...ith_(DOCTYPE_htmlnhtml_langhtml)_3__1.html | 15 ++-------- ...ith_(DOCTYPE_htmlnhtml_langhtml)_4__1.html | 21 ++++++++------ ...ith_(DOCTYPE_htmlnhtml_langhtml)_5__1.html | 20 +++++-------- ...ith_(DOCTYPE_htmlnhtml_langhtml)_6__1.html | 28 +++++++++---------- ...ith_(DOCTYPE_htmlnhtml_langhtml)_7__1.html | 23 +++++++++++++++ ...s_content_with_(a_href_titleLink_a)__1.txt | 1 + ...ragments_content_with_(img_src_alt)__1.txt | 1 + ...se_HTML_fragments_content_with_(pp)__1.txt | 1 + ...h_(scriptdocument.innerHTML__cript)__1.txt | 1 + 12 files changed, 89 insertions(+), 61 deletions(-) create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt create mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 698c4b0..9e9889b 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -28,15 +28,14 @@ public function parse(string $html): string $parsedHtmlRoot = new HtmlPageCrawler($html); // Filter parsed HTML "root" into the twemoji relevant parts... $parsedHtml = $this->whenHtmlDocFilterBody($parsedHtmlRoot); + // Use xpath to filter only the "TextNodes" within every "Element" + $textNodes = $parsedHtml->filterXPath('.//*[normalize-space(text())]'); - // If the filtered DOM fragment doesn't have any children, return the input HTML. - if ($parsedHtml->children()->count() === 0) { + // If the filtered DOM fragment doesn't have TextNode children, return the input HTML. + if ($textNodes->count() === 0) { return $html; } - // Use xpath to filter only the "TextNodes" within every "Element" - $textNodes = $parsedHtml->filterXPath('.//*[normalize-space(text())]'); - $textNodes->each(function (HtmlPageCrawler $node) { $twemojiContent = (new EmojiText($node->innerText())) ->base($this->base) diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index 1d0cb86..a76b5a8 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -10,6 +10,13 @@ HTML, <<<'HTML' + + + Hey ๐Ÿš€ + +HTML, + <<<'HTML' + @@ -113,6 +120,18 @@ dataset('html-fragments', [ <<<'HTML' +

๐Ÿš€

+HTML, + <<<'HTML' +๐ŸŽ‰ +HTML, + <<<'HTML' +Link โ›“๏ธ +HTML, + <<<'HTML' + +HTML, + <<<'HTML'

Time for a ElePHPant RAVE!

diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html index cc7c3ee..d44bab7 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html @@ -1,11 +1,6 @@ - - - - - HTML 5🚀 Boilerplate - - - + + Hey 🚀 + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html index 26d4130..cc7c3ee 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html @@ -1,22 +1,11 @@ - + HTML 5🚀 Boilerplate - -

Do a quick kickflip! 🛹 -

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

🦀🦀🦀🦀🦀

-

🦀🦀🦀

-

🦀🦀🦀🦀🦀

-

-🙏🐘 -

- + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html index f83089b..26d4130 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html @@ -2,16 +2,21 @@ - Test with Emoji in ALT text + + + HTML 5🚀 Boilerplate + -

Hello Friends 👋 +

Do a quick kickflip! 🛹

- A random image of Bill Murray 🍻 -

Time for a ElePHPant RAVE!

-

🐘🐘🐘🐘

-

🐘🐘🐘

-

🐘🐘🐘🐘🐘

-

🐘🐘

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

🦀🦀🦀🦀🦀

+

🦀🦀🦀

+

🦀🦀🦀🦀🦀

+

+🙏🐘 +

diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html index 579e2ee..f83089b 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html @@ -5,19 +5,13 @@ Test with Emoji in ALT text -
-

Hello Friends 👋 +

Hello Friends 👋

- A random image of Bill Murray 🍻 -
-
-
-

Time for a ElePHPant RAVE!

-

🐘🐘🐘🐘

-

🐘🐘🐘

-

🐘🐘🐘🐘🐘

-

🐘🐘

-
-
+ A random image of Bill Murray 🍻 +

Time for a ElePHPant RAVE!

+

🐘🐘🐘🐘

+

🐘🐘🐘

+

🐘🐘🐘🐘🐘

+

🐘🐘

diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html index db74d3f..579e2ee 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html @@ -2,22 +2,22 @@ - - - HTML 5🚀 Boilerplate - + Test with Emoji in ALT text -

Do a quick kickflip! 🛹 +
+

Hello Friends 👋

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

🦀🦀🦀🦀🦀

-

🦀🦀🦀

-

🦀🦀🦀🦀🦀

-

-🙏🐘 -

- + A random image of Bill Murray 🍻 +
+
+
+

Time for a ElePHPant RAVE!

+

🐘🐘🐘🐘

+

🐘🐘🐘

+

🐘🐘🐘🐘🐘

+

🐘🐘

+
+
diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html new file mode 100644 index 0000000..db74d3f --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html @@ -0,0 +1,23 @@ + + + + + + + HTML 5🚀 Boilerplate + + + +

Do a quick kickflip! 🛹 +

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

🦀🦀🦀🦀🦀

+

🦀🦀🦀

+

🦀🦀🦀🦀🦀

+

+🙏🐘 +

+ + + diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt new file mode 100644 index 0000000..ffb7d72 --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt @@ -0,0 +1 @@ +Link ⛓ \ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt new file mode 100644 index 0000000..483ef65 --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt @@ -0,0 +1 @@ +๐ŸŽ‰ \ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt new file mode 100644 index 0000000..08e7e3f --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt @@ -0,0 +1 @@ +

🚀

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt new file mode 100644 index 0000000..c57b86a --- /dev/null +++ b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt @@ -0,0 +1 @@ + \ No newline at end of file From eeb5f0a8720cf0c03ab66e7be2546c75b7f798e6 Mon Sep 17 00:00:00 2001 From: Dan Date: Tue, 4 Oct 2022 11:19:56 -0400 Subject: [PATCH 16/35] reorder code --- tests/Datasets/HtmlContent.php | 100 ++++++++++++++++----------------- tests/Unit/HtmlTest.php | 10 ++-- 2 files changed, 55 insertions(+), 55 deletions(-) diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php index a76b5a8..acc8a69 100644 --- a/tests/Datasets/HtmlContent.php +++ b/tests/Datasets/HtmlContent.php @@ -1,5 +1,55 @@ ๐Ÿš€

+HTML, + <<<'HTML' +๐ŸŽ‰ +HTML, + <<<'HTML' +Link โ›“๏ธ +HTML, + <<<'HTML' + +HTML, + <<<'HTML' +
+
+

Time for a ElePHPant RAVE!

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

+
+
+
+
+

Time for a cRUSTation RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€

+
+
+
+
+

but what if the crabs and elephants rave together?!

+
+
+
+
+HTML, + <<<'HTML' +
+

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

+

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

+

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

+

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

+
+HTML, +]); + dataset('html-pages', [ <<<'HTML' @@ -117,53 +167,3 @@ HTML, ]); - -dataset('html-fragments', [ - <<<'HTML' -

๐Ÿš€

-HTML, - <<<'HTML' -๐ŸŽ‰ -HTML, - <<<'HTML' -Link โ›“๏ธ -HTML, - <<<'HTML' - -HTML, - <<<'HTML' -
-
-

Time for a ElePHPant RAVE!

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜

-
-
-
-
-

Time for a cRUSTation RAVE!

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€

-
-
-
-
-

but what if the crabs and elephants rave together?!

-
-
-
-
-HTML, - <<<'HTML' -
-

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

-

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

-

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

-

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

-
-HTML, -]); diff --git a/tests/Unit/HtmlTest.php b/tests/Unit/HtmlTest.php index 421b495..36224d4 100644 --- a/tests/Unit/HtmlTest.php +++ b/tests/Unit/HtmlTest.php @@ -4,12 +4,12 @@ use function Spatie\Snapshots\assertMatchesHtmlSnapshot; use function Spatie\Snapshots\assertMatchesTextSnapshot; -it('can parse HTML Pages', function (string $html) { - $htmlReplacer = (new HtmlReplacer())->png(); - assertMatchesHtmlSnapshot($htmlReplacer->parse($html)); -})->with('html-pages'); - it('can parse HTML fragments content', function (string $html) { $htmlReplacer = (new HtmlReplacer())->png(); assertMatchesTextSnapshot($htmlReplacer->parse($html)); })->with('html-fragments'); + +it('can parse HTML Pages', function (string $html) { + $htmlReplacer = (new HtmlReplacer())->png(); + assertMatchesHtmlSnapshot($htmlReplacer->parse($html)); +})->with('html-pages'); From fcdd93d566d5196a5773b8eb604ead8114769687 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 12:13:36 -0400 Subject: [PATCH 17/35] Refactor new tests and add failing tests for current issues. --- tests/Datasets/HtmlContent.php | 169 ------------------ tests/Unit/HtmlReplacerFragmentTest.php | 74 ++++++++ tests/Unit/HtmlReplacerPageTest.php | 72 ++++++++ tests/Unit/HtmlTest.php | 15 -- ...n_convert_a_single_emoji_paragraph__1.txt} | 0 ...vert_many_Emoji_in_an_HTML_article__1.txt} | 0 ...y_Emoji_in_an_HTML_comment_section__1.txt} | 0 ...can_handle_text_with_an_outer_P_tag__1.txt | 1 + ...can_handle_text_without_outer_P_tag__1.txt | 1 + ..._without_outer_P_tag_but_inner_HTML__1.txt | 1 + ...rt_an_emoji_within_HTML_attributes__1.txt} | 0 ...onvert_an_emoji_within_SCRIPT_tags__1.txt} | 0 ...will_not_mangle_an_Empty_HTML_page__1.txt} | 2 +- ...eplace_a_single_Emoji_in_the_Title__1.txt} | 4 +- ..._replace_a_single_Emoji_on_an_page__1.txt} | 6 +- ...the_Emoji_on_page,_but_not_in_head__1.txt} | 10 +- ...ith_(DOCTYPE_htmlnhtml_langhtml)_5__1.html | 17 -- ...ith_(DOCTYPE_htmlnhtml_langhtml)_6__1.html | 23 --- ...ith_(DOCTYPE_htmlnhtml_langhtml)_7__1.html | 23 --- ...s_content_with_(a_href_titleLink_a)__1.txt | 1 - 20 files changed, 157 insertions(+), 262 deletions(-) delete mode 100644 tests/Datasets/HtmlContent.php create mode 100644 tests/Unit/HtmlReplacerFragmentTest.php create mode 100644 tests/Unit/HtmlReplacerPageTest.php delete mode 100644 tests/Unit/HtmlTest.php rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt => HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt => HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt => HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt} (100%) create mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt create mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt create mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt => HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_HTML_attributes__1.txt} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt => HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt} (100%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html => HtmlReplacerPageTest__it_will_not_mangle_an_Empty_HTML_page__1.txt} (89%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html => HtmlReplacerPageTest__it_will_not_replace_a_single_Emoji_in_the_Title__1.txt} (83%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html => HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt} (72%) rename tests/__snapshots__/{HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html => HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt} (86%) delete mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html delete mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html delete mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html delete mode 100644 tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt diff --git a/tests/Datasets/HtmlContent.php b/tests/Datasets/HtmlContent.php deleted file mode 100644 index acc8a69..0000000 --- a/tests/Datasets/HtmlContent.php +++ /dev/null @@ -1,169 +0,0 @@ -๐Ÿš€

-HTML, - <<<'HTML' -๐ŸŽ‰ -HTML, - <<<'HTML' -Link โ›“๏ธ -HTML, - <<<'HTML' - -HTML, - <<<'HTML' -
-
-

Time for a ElePHPant RAVE!

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜

-
-
-
-
-

Time for a cRUSTation RAVE!

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€

-
-
-
-
-

but what if the crabs and elephants rave together?!

-
-
-
-
-HTML, - <<<'HTML' -
-

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

-

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

-

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

-

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

-
-HTML, -]); - -dataset('html-pages', [ - <<<'HTML' - - - - - -HTML, - <<<'HTML' - - - - Hey ๐Ÿš€ - -HTML, - <<<'HTML' - - - - - - - HTML 5๐Ÿš€ Boilerplate - - - - -HTML, - <<<'HTML' - - - - - - - HTML 5๐Ÿš€ Boilerplate - - - -

Do a quick kickflip! ๐Ÿ›น

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿ™๐Ÿ˜

- - -HTML, - <<<'HTML' - - - - - Test with Emoji in ALT text - - -

Hello Friends ๐Ÿ‘‹

- A random image of Bill Murray ๐Ÿป -

Time for a ElePHPant RAVE!

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜

- - -HTML, - <<<'HTML' - - - - - Test with Emoji in ALT text - - -
-

Hello Friends ๐Ÿ‘‹

- A random image of Bill Murray ๐Ÿป -
-
-
-

Time for a ElePHPant RAVE!

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

-

๐Ÿ˜๐Ÿ˜

-
-
- - -HTML, - <<<'HTML' - - - - - - - HTML 5๐Ÿš€ Boilerplate - - - -

Do a quick kickflip! ๐Ÿ›น

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

-

๐Ÿ™๐Ÿ˜

- - - -HTML, -]); diff --git a/tests/Unit/HtmlReplacerFragmentTest.php b/tests/Unit/HtmlReplacerFragmentTest.php new file mode 100644 index 0000000..df04406 --- /dev/null +++ b/tests/Unit/HtmlReplacerFragmentTest.php @@ -0,0 +1,74 @@ +๐Ÿš€

")); +}); + +it('will not convert an emoji within HTML attributes', function () { + assertMatchesTextSnapshot(htmlReplacerPngParser('๐ŸŽ‰')); +}); + +it('will not convert an emoji within SCRIPT tags', function () { + assertMatchesTextSnapshot(htmlReplacerPngParser("")); +}); + +it('can convert many Emoji in an HTML comment section', function () { + $commentsHtml = <<<'HTML' +
+
+

Time for a ElePHPant RAVE!

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

+
+
+
+
+

Time for a cRUSTation RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€

+
+
+
+
+

but what if the crabs and elephants rave together?!

+
+
+
+
+HTML; + assertMatchesTextSnapshot(htmlReplacerPngParser($commentsHtml)); +}); + +it('can convert many Emoji in an HTML article', function () { + $commentsHtml = <<<'HTML' +
+

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

+

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

+

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

+

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

+
+HTML; + assertMatchesTextSnapshot(htmlReplacerPngParser($commentsHtml)); +}); + +it('can handle text with an outer P tag', function () { + $textContent = "

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

"; + assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); +}); + +it('can handle text without outer P tag', function () { + $textContent = "This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰"; + assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); +}); + +it('can handle text without outer P tag but inner HTML', function () { + $textContent = "This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰"; + assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); +}); diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php new file mode 100644 index 0000000..8e8d477 --- /dev/null +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -0,0 +1,72 @@ + + + + + +HTML; + assertMatchesTextSnapshot(htmlReplacerPngParser($pageHtml)); +}); + +it('will replace a single Emoji on an page', function () { + $pageHtml = <<<'HTML' + + + + Hey ๐Ÿš€ + +HTML; + assertMatchesTextSnapshot(htmlReplacerPngParser($pageHtml)); +}); + +it('will not replace a single Emoji in the Title', function () { + $pageHtml = <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + + +HTML; + $results = htmlReplacerPngParser($pageHtml); + expect($results)->toContain("5๐Ÿš€")->not()->toContain('&#'); + assertMatchesTextSnapshot($results); +}); + +it('will replace the Emoji on page, but not in head', function () { + $pageHtml = <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + +HTML; + $results = htmlReplacerPngParser($pageHtml); + expect($results)->toContain("5๐Ÿš€")->not()->toContain('&#'); + assertMatchesTextSnapshot($results); +}); + diff --git a/tests/Unit/HtmlTest.php b/tests/Unit/HtmlTest.php deleted file mode 100644 index 36224d4..0000000 --- a/tests/Unit/HtmlTest.php +++ /dev/null @@ -1,15 +0,0 @@ -png(); - assertMatchesTextSnapshot($htmlReplacer->parse($html)); -})->with('html-fragments'); - -it('can parse HTML Pages', function (string $html) { - $htmlReplacer = (new HtmlReplacer())->png(); - assertMatchesHtmlSnapshot($htmlReplacer->parse($html)); -})->with('html-pages'); diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(pp)__1.txt rename to tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(articlen____pLorem__ipticle)__1.txt rename to tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(section_classcomment-boxction)__1.txt rename to tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt new file mode 100644 index 0000000..96484d7 --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt @@ -0,0 +1 @@ +

This is some fancy-💃 Markdown/WYSIWYG text with surrounding

tags enabled. 🎉

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt new file mode 100644 index 0000000..0d9a50c --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt @@ -0,0 +1 @@ +This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰ \ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt new file mode 100644 index 0000000..4cb8555 --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt @@ -0,0 +1 @@ +This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰ \ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_HTML_attributes__1.txt similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(img_src_alt)__1.txt rename to tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_HTML_attributes__1.txt diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt similarity index 100% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(scriptdocument.innerHTML__cript)__1.txt rename to tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html b/tests/__snapshots__/HtmlReplacerPageTest__it_will_not_mangle_an_Empty_HTML_page__1.txt similarity index 89% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html rename to tests/__snapshots__/HtmlReplacerPageTest__it_will_not_mangle_an_Empty_HTML_page__1.txt index 8703386..55a5d94 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_1__1.html +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_not_mangle_an_Empty_HTML_page__1.txt @@ -2,4 +2,4 @@ - + \ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html b/tests/__snapshots__/HtmlReplacerPageTest__it_will_not_replace_a_single_Emoji_in_the_Title__1.txt similarity index 83% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html rename to tests/__snapshots__/HtmlReplacerPageTest__it_will_not_replace_a_single_Emoji_in_the_Title__1.txt index cc7c3ee..e33d55e 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_3__1.html +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_not_replace_a_single_Emoji_in_the_Title__1.txt @@ -4,8 +4,8 @@ - HTML 5🚀 Boilerplate + HTML 5๐Ÿš€ Boilerplate - + \ No newline at end of file diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt similarity index 72% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html rename to tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt index d44bab7..9987f35 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_2__1.html +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt @@ -1,6 +1,4 @@ - - - Hey 🚀 - + + Hey 🚀 diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt similarity index 86% rename from tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html rename to tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt index 26d4130..274b40a 100644 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_4__1.html +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt @@ -1,6 +1,5 @@ - - + @@ -8,15 +7,12 @@ -

Do a quick kickflip! 🛹 -

+

Do a quick kickflip! 🛹

This is HTML text that should be replaced, but the emoji in the head should not.

Time for a CRAB RAVE!

🦀🦀🦀🦀🦀

🦀🦀🦀

🦀🦀🦀🦀🦀

-

-🙏🐘 -

+

🙏🐘

diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html deleted file mode 100644 index f83089b..0000000 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_5__1.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - - Test with Emoji in ALT text - - -

Hello Friends 👋 -

- A random image of Bill Murray 🍻 -

Time for a ElePHPant RAVE!

-

🐘🐘🐘🐘

-

🐘🐘🐘

-

🐘🐘🐘🐘🐘

-

🐘🐘

- - diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html deleted file mode 100644 index 579e2ee..0000000 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_6__1.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - Test with Emoji in ALT text - - -
-

Hello Friends 👋 -

- A random image of Bill Murray 🍻 -
-
-
-

Time for a ElePHPant RAVE!

-

🐘🐘🐘🐘

-

🐘🐘🐘

-

🐘🐘🐘🐘🐘

-

🐘🐘

-
-
- - diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html deleted file mode 100644 index db74d3f..0000000 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_Pages_with_(DOCTYPE_htmlnhtml_langhtml)_7__1.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - HTML 5🚀 Boilerplate - - - -

Do a quick kickflip! 🛹 -

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

🦀🦀🦀🦀🦀

-

🦀🦀🦀

-

🦀🦀🦀🦀🦀

-

-🙏🐘 -

- - - diff --git a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt b/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt deleted file mode 100644 index ffb7d72..0000000 --- a/tests/__snapshots__/HtmlTest__it_can_parse_HTML_fragments_content_with_(a_href_titleLink_a)__1.txt +++ /dev/null @@ -1 +0,0 @@ -Link ⛓ \ No newline at end of file From 2d77cdccafcf681beff37ca090f5f585f08d2d23 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 12:17:09 -0400 Subject: [PATCH 18/35] fix styles --- composer.json | 1 + tests/Unit/HtmlReplacerFragmentTest.php | 9 ++++----- tests/Unit/HtmlReplacerPageTest.php | 5 ++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/composer.json b/composer.json index 3dacfbd..13a095c 100644 --- a/composer.json +++ b/composer.json @@ -26,6 +26,7 @@ "s9e/regexp-builder": "^1.4", "spatie/emoji": "^2.3.0", "spatie/pest-plugin-snapshots": "^1.0", + "symfony/var-dumper": "^6.1", "wa72/htmlpagedom": "^2.0 || ^3.0" }, "suggest": { diff --git a/tests/Unit/HtmlReplacerFragmentTest.php b/tests/Unit/HtmlReplacerFragmentTest.php index df04406..04b20f3 100644 --- a/tests/Unit/HtmlReplacerFragmentTest.php +++ b/tests/Unit/HtmlReplacerFragmentTest.php @@ -1,10 +1,9 @@ ๐Ÿš€

")); + assertMatchesTextSnapshot(htmlReplacerPngParser('

๐Ÿš€

')); }); it('will not convert an emoji within HTML attributes', function () { @@ -59,16 +58,16 @@ }); it('can handle text with an outer P tag', function () { - $textContent = "

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

"; + $textContent = '

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); }); it('can handle text without outer P tag', function () { - $textContent = "This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰"; + $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); }); it('can handle text without outer P tag but inner HTML', function () { - $textContent = "This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰"; + $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); }); diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php index 8e8d477..bddfd7e 100644 --- a/tests/Unit/HtmlReplacerPageTest.php +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -39,7 +39,7 @@ HTML; $results = htmlReplacerPngParser($pageHtml); - expect($results)->toContain("5๐Ÿš€")->not()->toContain('&#'); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); assertMatchesTextSnapshot($results); }); @@ -66,7 +66,6 @@ HTML; $results = htmlReplacerPngParser($pageHtml); - expect($results)->toContain("5๐Ÿš€")->not()->toContain('&#'); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); assertMatchesTextSnapshot($results); }); - From e0f2540d67df9d0767b1672144e6d65f94abe560 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 12:18:09 -0400 Subject: [PATCH 19/35] track the Pest helper file --- tests/Pest.php | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/Pest.php diff --git a/tests/Pest.php b/tests/Pest.php new file mode 100644 index 0000000..1cc6d2e --- /dev/null +++ b/tests/Pest.php @@ -0,0 +1,9 @@ +png(); + return $htmlReplacer->parse($html); +} From bc61a6aad36c7842fec97530952c5aab276a6cf7 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 12:18:54 -0400 Subject: [PATCH 20/35] fix pest file styles --- tests/Pest.php | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/Pest.php b/tests/Pest.php index 1cc6d2e..04e78b8 100644 --- a/tests/Pest.php +++ b/tests/Pest.php @@ -5,5 +5,6 @@ function htmlReplacerPngParser(string $html): string { $htmlReplacer = (new HtmlReplacer())->png(); + return $htmlReplacer->parse($html); } From b3a57bee3398474cd63a056cd3b3391b9b3c205c Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 13:35:17 -0400 Subject: [PATCH 21/35] Add tests that cover the edge case I've been chasing --- tests/Unit/HtmlReplacerPageTest.php | 29 ++++++++++++++++++- ...ut_not_in_head_-_NEW_Charset_Method__1.txt | 18 ++++++++++++ ..._the_Emoji_on_page,_but_not_in_head__1.txt | 18 ------------ 3 files changed, 46 insertions(+), 19 deletions(-) create mode 100644 tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_NEW_Charset_Method__1.txt delete mode 100644 tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php index bddfd7e..a61c5b0 100644 --- a/tests/Unit/HtmlReplacerPageTest.php +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -43,7 +43,7 @@ assertMatchesTextSnapshot($results); }); -it('will replace the Emoji on page, but not in head', function () { +it('will replace the Emoji on page, but not in head - OLD Charset Method', function () { $pageHtml = <<<'HTML' @@ -64,6 +64,33 @@

๐Ÿ™๐Ÿ˜

+HTML; + $results = htmlReplacerPngParser($pageHtml); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); + assertMatchesTextSnapshot($results); +})->skip('This will fail due to "incorrect" meta charset method, we need to consider how to address that.'); + +it('will replace the Emoji on page, but not in head - NEW Charset Method', function () { + $pageHtml = <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + HTML; $results = htmlReplacerPngParser($pageHtml); expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); diff --git a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_NEW_Charset_Method__1.txt b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_NEW_Charset_Method__1.txt new file mode 100644 index 0000000..f85e8f5 --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_NEW_Charset_Method__1.txt @@ -0,0 +1,18 @@ + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + diff --git a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt deleted file mode 100644 index 274b40a..0000000 --- a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head__1.txt +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - HTML 5🚀 Boilerplate - - - -

Do a quick kickflip! 🛹

-

This is HTML text that should be replaced, but the emoji in the head should not.

-

Time for a CRAB RAVE!

-

🦀🦀🦀🦀🦀

-

🦀🦀🦀

-

🦀🦀🦀🦀🦀

-

🙏🐘

- - From 62fdc48516913d45156c65dec569e5d81c931315 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 14:12:18 -0400 Subject: [PATCH 22/35] Refactor how HTML fragments are handled --- src/HtmlReplacer.php | 67 ++++++++++++++++++++++++++------- src/NoTextChildrenException.php | 10 +++++ 2 files changed, 63 insertions(+), 14 deletions(-) create mode 100644 src/NoTextChildrenException.php diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 9e9889b..a07f739 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -13,6 +13,18 @@ class HtmlReplacer { use Configurable; + private const FRAGMENT_TEMPLATE = <<<'HTML' + + + + + + +%s + + +HTML; + public function __construct() { if (! class_exists(HtmlPageCrawler::class)) { @@ -26,14 +38,50 @@ public function parse(string $html): string { // Parse the HTML page or fragment... $parsedHtmlRoot = new HtmlPageCrawler($html); - // Filter parsed HTML "root" into the twemoji relevant parts... - $parsedHtml = $this->whenHtmlDocFilterBody($parsedHtmlRoot); + + if ($parsedHtmlRoot->isHtmlDocument()) { + // We will only transform the body... + $parsedHtml = $parsedHtmlRoot->filter('body'); + } else { + return $this->parseFragment($html); + } + + try { + $this->findAndTwmojifyTextNodes($parsedHtml); + } catch (NoTextChildrenException $e) { + return $html; + } + + return $parsedHtmlRoot->saveHTML(); + } + + public function parseFragment(string $html): string + { + $wrappedFragment = sprintf(static::FRAGMENT_TEMPLATE, $html); + + $parsedHtmlRoot = new HtmlPageCrawler($wrappedFragment); + $parsedHtml = $parsedHtmlRoot->filter('body'); + + try { + $this->findAndTwmojifyTextNodes($parsedHtml); + } catch (NoTextChildrenException $e) { + return $html; + } + + return $parsedHtmlRoot->filter('body')->getInnerHtml(); + } + + /** + * @throws NoTextChildrenException + */ + private function findAndTwmojifyTextNodes(HtmlPageCrawler $htmlContent): HtmlPageCrawler + { // Use xpath to filter only the "TextNodes" within every "Element" - $textNodes = $parsedHtml->filterXPath('.//*[normalize-space(text())]'); + $textNodes = $htmlContent->filterXPath('.//*[normalize-space(text())]'); // If the filtered DOM fragment doesn't have TextNode children, return the input HTML. if ($textNodes->count() === 0) { - return $html; + throw new NoTextChildrenException(); } $textNodes->each(function (HtmlPageCrawler $node) { @@ -46,15 +94,6 @@ public function parse(string $html): string return $node; }); - return $parsedHtmlRoot->saveHTML(); - } - - private function whenHtmlDocFilterBody(HtmlPageCrawler $htmlRoot): HtmlPageCrawler - { - if ($htmlRoot->isHtmlDocument()) { - return $htmlRoot->filter('body'); - } - - return $htmlRoot; + return $textNodes; } } diff --git a/src/NoTextChildrenException.php b/src/NoTextChildrenException.php new file mode 100644 index 0000000..797c72c --- /dev/null +++ b/src/NoTextChildrenException.php @@ -0,0 +1,10 @@ + Date: Mon, 17 Oct 2022 14:13:12 -0400 Subject: [PATCH 23/35] Ensure extra spaces are not added --- src/HtmlReplacer.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index a07f739..2486297 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -68,7 +68,7 @@ public function parseFragment(string $html): string return $html; } - return $parsedHtmlRoot->filter('body')->getInnerHtml(); + return trim($parsedHtmlRoot->filter('body')->getInnerHtml()); } /** From b2bc8bb0ea3df32598f1fda898de5c043fc201c6 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 14:14:49 -0400 Subject: [PATCH 24/35] Update tests with fixed results --- ...t_can_convert_a_single_emoji_paragraph__1.txt | 2 +- ..._convert_many_Emoji_in_an_HTML_article__1.txt | 8 ++++---- ..._many_Emoji_in_an_HTML_comment_section__1.txt | 16 ++++++++-------- ...it_can_handle_text_with_an_outer_P_tag__1.txt | 2 +- ...ot_convert_an_emoji_within_SCRIPT_tags__1.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt index 08e7e3f..2835d2b 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_a_single_emoji_paragraph__1.txt @@ -1 +1 @@ -

🚀

\ No newline at end of file +

๐Ÿš€

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt index f3a466f..d97fef8 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_article__1.txt @@ -1,6 +1,6 @@
-

Lorem 😂😂 ipsum 🕵️‍♂️dolor sit✍️ amet, consectetur adipiscing😇😇🤙 elit, sed do eiusmod🥰 tempor 😤😤🏳️‍🌈incididunt ut 👏labore 👏et👏 dolore 👏magna👏 aliqua.

-

Ut enim ad minim 🐵✊🏿veniam,❤😤😫😩💦💦 quis nostrud 👿🤮exercitation ullamco 🧠👮🏿‍♀️🅱️laboris nisi ut aliquip❗️ ex ea commodo consequat.

-

💯Duis aute💦😂😂😂 irure dolor 👳🏻‍♂️🗿in reprehenderit 🤖👻👎in voluptate velit esse cillum dolore 🙏🙏eu fugiat🤔 nulla pariatur.

-

🙅‍♀️🙅‍♀️Excepteur sint occaecat🤷‍♀️🤦‍♀️ cupidatat💅 non💃 proident,👨‍👧 sunt🤗 in culpa😥😰😨 qui officia🤩🤩 deserunt mollit 🧐anim id est laborum.🤔🤔

+

Lorem ๐Ÿ˜‚๐Ÿ˜‚ ipsum ๐Ÿ•ต๏ธโ€โ™‚๏ธdolor sitโœ๏ธ amet, consectetur adipiscing๐Ÿ˜‡๐Ÿ˜‡๐Ÿค™ elit, sed do eiusmod๐Ÿฅฐ tempor ๐Ÿ˜ค๐Ÿ˜ค๐Ÿณ๏ธโ€๐ŸŒˆincididunt ut ๐Ÿ‘labore ๐Ÿ‘et๐Ÿ‘ dolore ๐Ÿ‘magna๐Ÿ‘ aliqua.

+

Ut enim ad minim ๐ŸตโœŠ๐Ÿฟveniam,โค๏ธ๐Ÿ˜ค๐Ÿ˜ซ๐Ÿ˜ฉ๐Ÿ’ฆ๐Ÿ’ฆ quis nostrud ๐Ÿ‘ฟ๐Ÿคฎexercitation ullamco ๐Ÿง ๐Ÿ‘ฎ๐Ÿฟโ€โ™€๏ธ๐Ÿ…ฑ๏ธlaboris nisi ut aliquipโ—๏ธ ex ea commodo consequat.

+

๐Ÿ’ฏDuis aute๐Ÿ’ฆ๐Ÿ˜‚๐Ÿ˜‚๐Ÿ˜‚ irure dolor ๐Ÿ‘ณ๐Ÿปโ€โ™‚๏ธ๐Ÿ—ฟin reprehenderit ๐Ÿค–๐Ÿ‘ป๐Ÿ‘Žin voluptate velit esse cillum dolore ๐Ÿ™๐Ÿ™eu fugiat๐Ÿค” nulla pariatur.

+

๐Ÿ™…โ€โ™€๏ธ๐Ÿ™…โ€โ™€๏ธExcepteur sint occaecat๐Ÿคทโ€โ™€๏ธ๐Ÿคฆโ€โ™€๏ธ cupidatat๐Ÿ’… non๐Ÿ’ƒ proident,๐Ÿ‘จโ€๐Ÿ‘ง sunt๐Ÿค— in culpa๐Ÿ˜ฅ๐Ÿ˜ฐ๐Ÿ˜จ qui officia๐Ÿคฉ๐Ÿคฉ deserunt mollit ๐Ÿงanim id est laborum.๐Ÿค”๐Ÿค”

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt index 5bd6205..1082cc5 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_convert_many_Emoji_in_an_HTML_comment_section__1.txt @@ -1,19 +1,19 @@

Time for a ElePHPant RAVE!

-

🐘🐘🐘🐘

-

🐘🐘🐘

-

🐘🐘🐘🐘🐘

-

🐘🐘

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜๐Ÿ˜

+

๐Ÿ˜๐Ÿ˜

Time for a cRUSTation RAVE!

-

🦀🦀🦀🦀

-

🦀🦀

-

🦀🦀🦀🦀

-

🦀

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€

diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt index 96484d7..cebc52b 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt @@ -1 +1 @@ -

This is some fancy-💃 Markdown/WYSIWYG text with surrounding

tags enabled. 🎉

\ No newline at end of file +

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags enabled. ๐ŸŽ‰

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt index c57b86a..a9a2d93 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_will_not_convert_an_emoji_within_SCRIPT_tags__1.txt @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file From 2e3278d04b044ff62fa902240702626a2e7e6379 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 14:30:59 -0400 Subject: [PATCH 25/35] Manually correct snapshots to desired state --- ...rFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt | 2 +- ...it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt index cebc52b..2442e17 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt @@ -1 +1 @@ -

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags enabled. ๐ŸŽ‰

\ No newline at end of file +

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags enabled. ๐ŸŽ‰

diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt index 4cb8555..ed20e5d 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_but_inner_HTML__1.txt @@ -1 +1 @@ -This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰ \ No newline at end of file +This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰ From 55badec63edef54f066666e58375f3c2fe12d1c2 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 14:31:17 -0400 Subject: [PATCH 26/35] Skip HTML fragment tests that cause errors --- tests/Unit/HtmlReplacerFragmentTest.php | 6 +++--- tests/Unit/HtmlReplacerPageTest.php | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/Unit/HtmlReplacerFragmentTest.php b/tests/Unit/HtmlReplacerFragmentTest.php index 04b20f3..1c8a686 100644 --- a/tests/Unit/HtmlReplacerFragmentTest.php +++ b/tests/Unit/HtmlReplacerFragmentTest.php @@ -60,14 +60,14 @@ it('can handle text with an outer P tag', function () { $textContent = '

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -}); +})->skip('Cannot pass until more work is done...'); it('can handle text without outer P tag', function () { $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -}); +})->skip('Cannot pass until more work is done...'); it('can handle text without outer P tag but inner HTML', function () { $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -}); +})->skip('Cannot pass until more work is done...'); diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php index a61c5b0..d67d224 100644 --- a/tests/Unit/HtmlReplacerPageTest.php +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -66,7 +66,7 @@ HTML; $results = htmlReplacerPngParser($pageHtml); - expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('5&#'); assertMatchesTextSnapshot($results); })->skip('This will fail due to "incorrect" meta charset method, we need to consider how to address that.'); @@ -93,6 +93,6 @@ HTML; $results = htmlReplacerPngParser($pageHtml); - expect($results)->toContain('5๐Ÿš€')->not()->toContain('&#'); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('5&#'); assertMatchesTextSnapshot($results); }); From c446d6faf7efe37cd398c7387d47495594f02731 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 14:34:36 -0400 Subject: [PATCH 27/35] Refactor exception --- src/{ => Exceptions}/NoTextChildrenException.php | 2 +- src/HtmlReplacer.php | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) rename src/{ => Exceptions}/NoTextChildrenException.php (65%) diff --git a/src/NoTextChildrenException.php b/src/Exceptions/NoTextChildrenException.php similarity index 65% rename from src/NoTextChildrenException.php rename to src/Exceptions/NoTextChildrenException.php index 797c72c..f0618b4 100644 --- a/src/NoTextChildrenException.php +++ b/src/Exceptions/NoTextChildrenException.php @@ -1,6 +1,6 @@ Date: Mon, 17 Oct 2022 14:35:35 -0400 Subject: [PATCH 28/35] remove dumper from composer file --- composer.json | 1 - 1 file changed, 1 deletion(-) diff --git a/composer.json b/composer.json index 13a095c..3dacfbd 100644 --- a/composer.json +++ b/composer.json @@ -26,7 +26,6 @@ "s9e/regexp-builder": "^1.4", "spatie/emoji": "^2.3.0", "spatie/pest-plugin-snapshots": "^1.0", - "symfony/var-dumper": "^6.1", "wa72/htmlpagedom": "^2.0 || ^3.0" }, "suggest": { From 2ee59bf67e7e2df993db68b3720e09e0f8197a3d Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 18:44:51 -0400 Subject: [PATCH 29/35] Always use static builder method instead of new --- src/HtmlReplacer.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index d532351..20e9b6d 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -38,7 +38,7 @@ public function __construct() public function parse(string $html): string { // Parse the HTML page or fragment... - $parsedHtmlRoot = new HtmlPageCrawler($html); + $parsedHtmlRoot = HtmlPageCrawler::create($html); if ($parsedHtmlRoot->isHtmlDocument()) { // We will only transform the body... @@ -60,7 +60,7 @@ public function parseFragment(string $html): string { $wrappedFragment = sprintf(static::FRAGMENT_TEMPLATE, $html); - $parsedHtmlRoot = new HtmlPageCrawler($wrappedFragment); + $parsedHtmlRoot = HtmlPageCrawler::create($wrappedFragment); $parsedHtml = $parsedHtmlRoot->filter('body'); try { From 891c25f5310dea0f7b4ce9539e74d54118ee7a85 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 18:45:38 -0400 Subject: [PATCH 30/35] Improve fragment parsing and enable more tests --- src/HtmlReplacer.php | 2 +- tests/Unit/HtmlReplacerFragmentTest.php | 13 +++++++++---- ...t__it_can_handle_text_with_an_outer_P_tag__1.txt | 2 +- ...dle_text_with_an_outer_P_tag_and_CODE_tag__1.txt | 1 + ...t__it_can_handle_text_without_outer_P_tag__1.txt | 1 - ...text_without_outer_P_tag_and_escaped_HTML__1.txt | 1 + 6 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag_and_CODE_tag__1.txt delete mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt create mode 100644 tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_and_escaped_HTML__1.txt diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 20e9b6d..e499d16 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -86,7 +86,7 @@ private function findAndTwmojifyTextNodes(HtmlPageCrawler $htmlContent): HtmlPag } $textNodes->each(function (HtmlPageCrawler $node) { - $twemojiContent = (new EmojiText($node->innerText())) + $twemojiContent = (new EmojiText($node->getInnerHtml())) ->base($this->base) ->type($this->type) ->toHtml(); diff --git a/tests/Unit/HtmlReplacerFragmentTest.php b/tests/Unit/HtmlReplacerFragmentTest.php index 1c8a686..68cc430 100644 --- a/tests/Unit/HtmlReplacerFragmentTest.php +++ b/tests/Unit/HtmlReplacerFragmentTest.php @@ -60,14 +60,19 @@ it('can handle text with an outer P tag', function () { $textContent = '

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -})->skip('Cannot pass until more work is done...'); +}); -it('can handle text without outer P tag', function () { +it('can handle text with an outer P tag and CODE tag', function () { + $textContent = '

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

'; + assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); +}); + +it('can handle text without outer P tag and escaped HTML', function () { $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -})->skip('Cannot pass until more work is done...'); +}); it('can handle text without outer P tag but inner HTML', function () { $textContent = 'This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags disabled. ๐ŸŽ‰'; assertMatchesTextSnapshot(htmlReplacerPngParser($textContent)); -})->skip('Cannot pass until more work is done...'); +})->skip('Fails: Mutates the code content to close the p tag'); diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt index 2442e17..adc8ef7 100644 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag__1.txt @@ -1 +1 @@ -

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding

tags enabled. ๐ŸŽ‰

+

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag_and_CODE_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag_and_CODE_tag__1.txt new file mode 100644 index 0000000..842f3db --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_with_an_outer_P_tag_and_CODE_tag__1.txt @@ -0,0 +1 @@ +

This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags enabled. ๐ŸŽ‰

\ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt deleted file mode 100644 index 0d9a50c..0000000 --- a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag__1.txt +++ /dev/null @@ -1 +0,0 @@ -This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰ \ No newline at end of file diff --git a/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_and_escaped_HTML__1.txt b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_and_escaped_HTML__1.txt new file mode 100644 index 0000000..4aa185e --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerFragmentTest__it_can_handle_text_without_outer_P_tag_and_escaped_HTML__1.txt @@ -0,0 +1 @@ +This is some fancy-๐Ÿ’ƒ Markdown/WYSIWYG text with surrounding <p> tags disabled. ๐ŸŽ‰ \ No newline at end of file From 2b779479dfaaa3b4f6d4aec4b8f8a96d4724016c Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 18:59:10 -0400 Subject: [PATCH 31/35] Correct HTML pages without meta charset tag --- src/HtmlReplacer.php | 26 ++++++++++++++++++- ...l_replace_a_single_Emoji_on_an_page__1.txt | 4 +-- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index e499d16..887c814 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -14,7 +14,9 @@ class HtmlReplacer { use Configurable; - private const FRAGMENT_TEMPLATE = <<<'HTML' + private const UTF8_META = ''; + + private const FRAGMENT_TEMPLATE = << @@ -53,6 +55,28 @@ public function parse(string $html): string return $html; } + // Find the page head and check if meta header should be added + $htmlHead = $parsedHtmlRoot->filter('head'); + $addHeader = false; + if ($htmlHead->getNode(0)->hasChildNodes()) { + $contentTypeMeta = $htmlHead->children('meta[http-equiv="content-type"][content]')->getNode(0); + if ( + $contentTypeMeta === null || + iterator_to_array($contentTypeMeta->attributes)['content']->textContent !== "text/html; charset=utf-8" + ) { + $addHeader = true; + } + } else { + $addHeader = true; + } + + // Adds the necessary meta tag to make PHP's DOM not mangle Emojis + if ($addHeader) { + $setUtf8Meta = $parsedHtmlRoot->getDOMDocument()->createDocumentFragment(); + $setUtf8Meta->appendXML(static::UTF8_META); + $htmlHead->append($setUtf8Meta); + } + return $parsedHtmlRoot->saveHTML(); } diff --git a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt index 9987f35..4d54a55 100644 --- a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_a_single_Emoji_on_an_page__1.txt @@ -1,4 +1,4 @@ - - Hey 🚀 + + Hey ๐Ÿš€ From d5b686908eac46eee1803f792a21cc809c6baea5 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 19:00:36 -0400 Subject: [PATCH 32/35] refactor UTF8 tag adding and enable test --- src/HtmlReplacer.php | 27 ++++++++++--------- tests/Unit/HtmlReplacerPageTest.php | 2 +- ...ut_not_in_head_-_OLD_Charset_Method__1.txt | 18 +++++++++++++ 3 files changed, 34 insertions(+), 13 deletions(-) create mode 100644 tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_OLD_Charset_Method__1.txt diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 887c814..80a31fd 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -4,6 +4,7 @@ use Astrotomic\Twemoji\Concerns\Configurable; use Astrotomic\Twemoji\Exceptions\NoTextChildrenException; +use DOMDocument; use RuntimeException; use Wa72\HtmlPageDom\HtmlPageCrawler; @@ -59,22 +60,16 @@ public function parse(string $html): string $htmlHead = $parsedHtmlRoot->filter('head'); $addHeader = false; if ($htmlHead->getNode(0)->hasChildNodes()) { - $contentTypeMeta = $htmlHead->children('meta[http-equiv="content-type"][content]')->getNode(0); + $contentTypeMeta = $htmlHead->children('meta[http-equiv="content-type"][content]'); if ( - $contentTypeMeta === null || - iterator_to_array($contentTypeMeta->attributes)['content']->textContent !== "text/html; charset=utf-8" + $contentTypeMeta->getNode(0) === null || + iterator_to_array($contentTypeMeta->getNode(0)->attributes)['content']->textContent !== "text/html; charset=utf-8" ) { - $addHeader = true; + $this->addUtf8MetaTag($htmlHead); + $contentTypeMeta->remove(); } } else { - $addHeader = true; - } - - // Adds the necessary meta tag to make PHP's DOM not mangle Emojis - if ($addHeader) { - $setUtf8Meta = $parsedHtmlRoot->getDOMDocument()->createDocumentFragment(); - $setUtf8Meta->appendXML(static::UTF8_META); - $htmlHead->append($setUtf8Meta); + $this->addUtf8MetaTag($htmlHead); } return $parsedHtmlRoot->saveHTML(); @@ -121,4 +116,12 @@ private function findAndTwmojifyTextNodes(HtmlPageCrawler $htmlContent): HtmlPag return $textNodes; } + + private function addUtf8MetaTag($htmlHead): void + { + $doc = new DOMDocument(); + $setUtf8Meta = $doc->createDocumentFragment(); + $setUtf8Meta->appendXML(static::UTF8_META); + $htmlHead->append($setUtf8Meta); + } } diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php index d67d224..0415d46 100644 --- a/tests/Unit/HtmlReplacerPageTest.php +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -68,7 +68,7 @@ $results = htmlReplacerPngParser($pageHtml); expect($results)->toContain('5๐Ÿš€')->not()->toContain('5&#'); assertMatchesTextSnapshot($results); -})->skip('This will fail due to "incorrect" meta charset method, we need to consider how to address that.'); +}); it('will replace the Emoji on page, but not in head - NEW Charset Method', function () { $pageHtml = <<<'HTML' diff --git a/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_OLD_Charset_Method__1.txt b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_OLD_Charset_Method__1.txt new file mode 100644 index 0000000..72c74f6 --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_replace_the_Emoji_on_page,_but_not_in_head_-_OLD_Charset_Method__1.txt @@ -0,0 +1,18 @@ + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + From 894b79f5d2b45377477b7bbcdb97bc34599a1699 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 19:01:15 -0400 Subject: [PATCH 33/35] Add test to cover when incorrect content type is corrected --- tests/Unit/HtmlReplacerPageTest.php | 27 +++++++++++++++++++ ...rrect_invalid_content-type_meta_tag__1.txt | 18 +++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 tests/__snapshots__/HtmlReplacerPageTest__it_will_correct_invalid_content-type_meta_tag__1.txt diff --git a/tests/Unit/HtmlReplacerPageTest.php b/tests/Unit/HtmlReplacerPageTest.php index 0415d46..6d7b909 100644 --- a/tests/Unit/HtmlReplacerPageTest.php +++ b/tests/Unit/HtmlReplacerPageTest.php @@ -96,3 +96,30 @@ expect($results)->toContain('5๐Ÿš€')->not()->toContain('5&#'); assertMatchesTextSnapshot($results); }); + +it('will correct invalid content-type meta tag', function () { + $pageHtml = <<<'HTML' + + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + +HTML; + $results = htmlReplacerPngParser($pageHtml); + expect($results)->toContain('5๐Ÿš€')->not()->toContain('5&#'); + assertMatchesTextSnapshot($results); +}); diff --git a/tests/__snapshots__/HtmlReplacerPageTest__it_will_correct_invalid_content-type_meta_tag__1.txt b/tests/__snapshots__/HtmlReplacerPageTest__it_will_correct_invalid_content-type_meta_tag__1.txt new file mode 100644 index 0000000..a7b7416 --- /dev/null +++ b/tests/__snapshots__/HtmlReplacerPageTest__it_will_correct_invalid_content-type_meta_tag__1.txt @@ -0,0 +1,18 @@ + + + + + + HTML 5๐Ÿš€ Boilerplate + + + +

Do a quick kickflip! ๐Ÿ›น

+

This is HTML text that should be replaced, but the emoji in the head should not.

+

Time for a CRAB RAVE!

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€๐Ÿฆ€

+

๐Ÿ™๐Ÿ˜

+ + From 590ac9751dc5e31ecdd939984ffb829fde2d2fc8 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 19:01:26 -0400 Subject: [PATCH 34/35] Add ext-dom to suggested --- composer.json | 1 + 1 file changed, 1 insertion(+) diff --git a/composer.json b/composer.json index 3dacfbd..25cb07a 100644 --- a/composer.json +++ b/composer.json @@ -29,6 +29,7 @@ "wa72/htmlpagedom": "^2.0 || ^3.0" }, "suggest": { + "ext-dom": "*", "spatie/emoji": "*", "wa72/htmlpagedom": "*" }, From 4468c8eb74caaf1dccd60c21849384b7e51213e9 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 17 Oct 2022 19:04:52 -0400 Subject: [PATCH 35/35] adjust styles --- src/Exceptions/NoTextChildrenException.php | 1 - src/HtmlReplacer.php | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Exceptions/NoTextChildrenException.php b/src/Exceptions/NoTextChildrenException.php index f0618b4..f1d2bef 100644 --- a/src/Exceptions/NoTextChildrenException.php +++ b/src/Exceptions/NoTextChildrenException.php @@ -6,5 +6,4 @@ class NoTextChildrenException extends Exception { - } diff --git a/src/HtmlReplacer.php b/src/HtmlReplacer.php index 80a31fd..8463df7 100644 --- a/src/HtmlReplacer.php +++ b/src/HtmlReplacer.php @@ -17,7 +17,7 @@ class HtmlReplacer private const UTF8_META = ''; - private const FRAGMENT_TEMPLATE = << @@ -61,9 +61,10 @@ public function parse(string $html): string $addHeader = false; if ($htmlHead->getNode(0)->hasChildNodes()) { $contentTypeMeta = $htmlHead->children('meta[http-equiv="content-type"][content]'); + $metaNode = $contentTypeMeta->getNode(0); if ( - $contentTypeMeta->getNode(0) === null || - iterator_to_array($contentTypeMeta->getNode(0)->attributes)['content']->textContent !== "text/html; charset=utf-8" + $metaNode === null || + iterator_to_array($metaNode->attributes)['content']->textContent !== 'text/html; charset=utf-8' ) { $this->addUtf8MetaTag($htmlHead); $contentTypeMeta->remove();