diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4423e3bb..44314686 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.7.24
+
+* fix: assign value to `text_as_html` element attribute only if `text` attribute contains HTML tags.
+
## 0.7.23
* fix: added handling in `UnstructuredTableTransformerModel` for if `recognize` returns an empty
diff --git a/test_unstructured_inference/models/test_chippermodel.py b/test_unstructured_inference/models/test_chippermodel.py
index 065e24bc..c68aa6bc 100644
--- a/test_unstructured_inference/models/test_chippermodel.py
+++ b/test_unstructured_inference/models/test_chippermodel.py
@@ -3,6 +3,7 @@
import pytest
import torch
from PIL import Image
+from unstructured_inference.inference.layoutelement import LayoutElement
from unstructured_inference.models import chipper
from unstructured_inference.models.base import get_model
@@ -422,3 +423,26 @@ def test_check_overlap(bbox1, bbox2, output):
model = get_model("chipper")
assert model.check_overlap(bbox1, bbox2) == output
+
+
+def test_format_table_elements():
+ table_html = "
"
+ texts = [
+ "Text",
+ " - List element",
+ table_html,
+ None,
+ ]
+ elements = [LayoutElement(bbox=mock.MagicMock(), text=text) for text in texts]
+ formatted_elements = chipper.UnstructuredChipperModel.format_table_elements(elements)
+ text_attributes = [fe.text for fe in formatted_elements]
+ text_as_html_attributes = [
+ fe.text_as_html if hasattr(fe, "text_as_html") else None for fe in formatted_elements
+ ]
+ assert text_attributes == [
+ "Text",
+ " - List element",
+ "Cell 1Cell 2Cell 3",
+ None,
+ ]
+ assert text_as_html_attributes == [None, None, table_html, None]
diff --git a/unstructured_inference/__version__.py b/unstructured_inference/__version__.py
index b16a8c5b..688c38bb 100644
--- a/unstructured_inference/__version__.py
+++ b/unstructured_inference/__version__.py
@@ -1 +1 @@
-__version__ = "0.7.23" # pragma: no cover
+__version__ = "0.7.24" # pragma: no cover
diff --git a/unstructured_inference/models/chipper.py b/unstructured_inference/models/chipper.py
index 2df09c21..d147f8cb 100644
--- a/unstructured_inference/models/chipper.py
+++ b/unstructured_inference/models/chipper.py
@@ -171,16 +171,18 @@ def predict(self, image) -> List[LayoutElement]:
return elements
@staticmethod
- def format_table_elements(elements):
- """makes chipper table element return the same as other layout models
+ def format_table_elements(elements: List[LayoutElement]) -> List[LayoutElement]:
+ """Makes chipper table element return the same as other layout models.
- - copies the html representation to attribute text_as_html
- - strip html tags from the attribute text
+ 1. If `text` attribute is an html (has html tags in it), copies the `text`
+ attribute to `text_as_html` attribute.
+ 2. Strips html tags from the `text` attribute.
"""
for element in elements:
- element.text_as_html = element.text
- element.text = strip_tags(element.text)
-
+ text = strip_tags(element.text) if element.text is not None else element.text
+ if text != element.text:
+ element.text_as_html = element.text # type: ignore[attr-defined]
+ element.text = text
return elements
def predict_tokens(