Skip to content

Commit

Permalink
feat: add new element types (#332)
Browse files Browse the repository at this point in the history
This PR adds some new element types that can be used especially by
pdf/image parition.
The new element types correspond to the ones added in unstructured in
this PR:
Unstructured-IO/unstructured#2700
  • Loading branch information
pawel-kmiecik authored Apr 3, 2024
1 parent 4a2fd95 commit 0a08377
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 1 deletion.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## 0.7.26-dev0
* feat: add a set of new `ElementType`s to extend future element types recognition

## 0.7.25

* fix: replace `Rectangle.is_in()` with `Rectangle.is_almost_subregion_of()` when filling in an inferred element with embedded text
Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.25" # pragma: no cover
__version__ = "0.7.26-dev0" # pragma: no cover
12 changes: 12 additions & 0 deletions unstructured_inference/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@ class Source(Enum):


class ElementType:
PARAGRAPH = "Paragraph"
IMAGE = "Image"
PARAGRAPH_IN_IMAGE = "ParagraphInImage"
FIGURE = "Figure"
PICTURE = "Picture"
TABLE = "Table"
PARAGRAPH_IN_TABLE = "ParagraphInTable"
LIST = "List"
FORM = "Form"
PARAGRAPH_IN_FORM = "ParagraphInForm"
CHECK_BOX_CHECKED = "CheckBoxChecked"
CHECK_BOX_UNCHECKED = "CheckBoxUnchecked"
RADIO_BUTTON_CHECKED = "RadioButtonChecked"
RADIO_BUTTON_UNCHECKED = "RadioButtonUnchecked"
LIST_ITEM = "List-item"
FORMULA = "Formula"
CAPTION = "Caption"
Expand All @@ -42,6 +51,9 @@ class ElementType:
TEXT = "Text"
UNCATEGORIZED_TEXT = "UncategorizedText"
PAGE_BREAK = "PageBreak"
CODE_SNIPPET = "CodeSnippet"
PAGE_NUMBER = "PageNumber"
OTHER = "Other"


FULL_PAGE_REGION_THRESHOLD = 0.99
Expand Down

0 comments on commit 0a08377

Please sign in to comment.