diff --git a/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php b/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php index 4bf8a30..84ed48f 100644 --- a/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php +++ b/src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php @@ -573,13 +573,16 @@ protected function hOCRtoMiniOCR($output, $pageid) { $miniocr->text(' '); } $notFirstWord = TRUE; - $miniocr->startElement("w"); - $miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h); - $miniocr->text($text); - // Only assume we have at least one word for tags - // Since lines? could end empty? - $atleastone_word = TRUE; - $miniocr->endElement(); + // New OCR Highlight does not like empty tags at all + if (strlen(trim($text)) > 0) { + $miniocr->startElement("w"); + $miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h); + $miniocr->text($text); + // Only assume we have at least one word for tags + // Since lines? could end empty? + $atleastone_word = TRUE; + $miniocr->endElement(); + } } } $miniocr->endElement();