From f6cfcf783b55cc0a206d9436799acf81116fbb46 Mon Sep 17 00:00:00 2001 From: Diego Pino Navarro Date: Sun, 28 Nov 2021 17:03:55 -0500 Subject: [PATCH] Fixes OCR Highlight 0.7 dislike for empty tags --- .../OcrPostProcessor.php | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php b/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php index 4bf8a30..84ed48f 100644 --- a/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php +++ b/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php @@ -573,13 +573,16 @@ protected function hOCRtoMiniOCR($output, $pageid) { $miniocr->text(' '); } $notFirstWord = TRUE; - $miniocr->startElement("w"); - $miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h); - $miniocr->text($text); - // Only assume we have at least one word for tags - // Since lines? could end empty? - $atleastone_word = TRUE; - $miniocr->endElement(); + // New OCR Highlight does not like empty tags at all + if (strlen(trim($text)) > 0) { + $miniocr->startElement("w"); + $miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h); + $miniocr->text($text); + // Only assume we have at least one word for tags + // Since lines? could end empty? + $atleastone_word = TRUE; + $miniocr->endElement(); + } } } $miniocr->endElement();