diff --git a/CHANGELOG.md b/CHANGELOG.md index 6044d71244..8c10eecb9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -## 0.13.0-dev10 +## 0.13.0-dev11 ### Enhancements @@ -12,6 +12,7 @@ ### Fixes +* **Fix `clean_pdfminer_inner_elements()` to remove only pdfminer (embedded) elements merged with inferred elements** Previously, some embedded elements were removed even if they were not merged with inferred elements. Now, only embedded elements that are already merged with inferred elements are removed. * **Clarify IAM Role Requirement for GCS Platform Connectors**. The GCS Source Connector requires Storage Object Viewer and GCS Destination Connector requires Storage Object Creator IAM roles. * **Change table extraction defaults** Change table extraction defaults in favor of using `skip_infer_table_types` parameter and reflect these changes in documentation. * **Fix OneDrive dates with inconsistent formatting** Adds logic to conditionally support dates returned by office365 that may vary in date formatting or may be a datetime rather than a string. See previous fix for SharePoint diff --git a/requirements/extra-pdf-image.in b/requirements/extra-pdf-image.in index 083d4b09d7..0a22ac9210 100644 --- a/requirements/extra-pdf-image.in +++ b/requirements/extra-pdf-image.in @@ -9,7 +9,7 @@ pillow_heif pypdf # Do not move to constraints.in, otherwise unstructured-inference will not be upgraded # when unstructured library is. -unstructured-inference==0.7.23 +unstructured-inference==0.7.25 # unstructured fork of pytesseract that provides an interface to allow for multiple output formats # from one tesseract call unstructured.pytesseract>=0.3.12 diff --git a/requirements/extra-pdf-image.txt b/requirements/extra-pdf-image.txt index 173449c0db..941d30d241 100644 --- a/requirements/extra-pdf-image.txt +++ b/requirements/extra-pdf-image.txt @@ -248,7 +248,7 @@ typing-extensions==4.9.0 # torch tzdata==2024.1 # via pandas -unstructured-inference==0.7.23 +unstructured-inference==0.7.25 # via -r extra-pdf-image.in unstructured-pytesseract==0.3.12 # via @@ -257,7 +257,6 @@ unstructured-pytesseract==0.3.12 urllib3==1.26.18 # via # -c base.txt - # -c constraints.in # requests wrapt==1.16.0 # via diff --git a/test_unstructured/partition/utils/test_processing_elements.py b/test_unstructured/partition/utils/test_processing_elements.py index 78fb1b4996..a4a10aae3a 100644 --- a/test_unstructured/partition/utils/test_processing_elements.py +++ b/test_unstructured/partition/utils/test_processing_elements.py @@ -58,8 +58,8 @@ type="Table", source=InferenceSource.YOLOX, ), - LayoutElement(bbox=Rectangle(0, 510, 50, 300), text="Inside table2", source=Source.PDFMINER), - LayoutElement(bbox=Rectangle(0, 550, 70, 400), text="Inside table2", source=Source.PDFMINER), + LayoutElement(bbox=Rectangle(0, 510, 50, 600), text="Inside table2", source=Source.PDFMINER), + LayoutElement(bbox=Rectangle(0, 550, 70, 650), text="Inside table2", source=Source.PDFMINER), ] diff --git a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json index 2bca039e2c..9fbb537d8f 100644 --- a/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/biomed-api/75/29/main.PMC6312793.pdf.json @@ -623,6 +623,19 @@ "text": "Instance size (m, n) Average number of Locations Times Vehicles (8, 1500) (8, 2000) (8, 2500) (8, 3000) (12, 1500) (12, 2000) (12, 2500) (12, 3000) (16, 1500) (16, 2000) (16, 2500) (16, 3000) 568.40 672.80 923.40 977.00 566.00 732.60 875.00 1119.60 581.80 778.00 879.00 1087.20 975.20 1048.00 1078.00 1113.20 994.00 1040.60 1081.00 1107.40 985.40 1040.60 1083.20 1101.60 652.20 857.20 1082.40 1272.80 642.00 861.20 1096.00 1286.20 667.80 872.40 1076.40 1284.60 668,279.40 1,195,844.80 1,866,175.20 2,705,617.00 674,191.00 1,199,659.80 1,878,745.20 2,711,180.40 673,585.80 1,200,560.80 1,879,387.00 2,684,983.60", "type": "Table" }, + { + "element_id": "68ec9a56bde1cd8ea67340bf9cb829cb", + "metadata": { + "data_source": {}, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 3 + }, + "text": "Possible empty travels", + "type": "Title" + }, { "element_id": "f64bebb0be23116b44b4ad54968178a0", "metadata": { diff --git a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json index 897339302d..ffcae5ef48 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/2023-Jan-economic-outlook.pdf.json @@ -1133,6 +1133,258 @@ "text": "Year over Year Difference from October 2022 Q4 over Q4 2/ 2021 Estimate 2022 Projections 2023 2024 WEO Projections 1/ 2023 2024 Estimate 2022 Projections 2023 2024 6.2 3.4 2.9 3.1 0.2 –0.1 1.9 3.2 3.0 Advanced Economies United States Euro Area Germany France Italy Spain Japan United Kingdom Canada Other Advanced Economies 3/ 5.4 5.9 5.3 2.6 6.8 6.7 5.5 2.1 7.6 5.0 5.3 2.7 2.0 3.5 1.9 2.6 3.9 5.2 1.4 4.1 3.5 2.8 1.2 1.4 0.7 0.1 0.7 0.6 1.1 1.8 –0.6 1.5 2.0 1.4 1.0 1.6 1.4 1.6 0.9 2.4 0.9 0.9 1.5 2.4 0.1 0.4 0.2 0.4 0.0 0.8 –0.1 0.2 –0.9 0.0 –0.3 –0.2 –0.2 –0.2 –0.1 0.0 –0.4 –0.2 –0.4 0.3 –0.1 –0.2 1.3 0.7 1.9 1.4 0.5 2.1 2.1 1.7 0.4 2.3 1.4 1.1 1.0 0.5 0.0 0.9 0.1 1.3 1.0 –0.5 1.2 2.1 1.6 1.3 2.1 2.3 1.8 1.0 2.8 1.0 1.8 1.9 2.2 Emerging Market and Developing Economies Emerging and Developing Asia China India 4/ Emerging and Developing Europe Russia Latin America and the Caribbean Brazil Mexico Middle East and Central Asia Saudi Arabia Sub-Saharan Africa Nigeria South Africa 6.7 7.4 8.4 8.7 6.9 4.7 7.0 5.0 4.7 4.5 3.2 4.7 3.6 4.9 3.9 4.3 3.0 6.8 0.7 –2.2 3.9 3.1 3.1 5.3 8.7 3.8 3.0 2.6 4.0 5.3 5.2 6.1 1.5 0.3 1.8 1.2 1.7 3.2 2.6 3.8 3.2 1.2 4.2 5.2 4.5 6.8 2.6 2.1 2.1 1.5 1.6 3.7 3.4 4.1 2.9 1.3 0.3 0.4 0.8 0.0 0.9 2.6 0.1 0.2 0.5 –0.4 –1.1 0.1 0.2 0.1 –0.1 0.0 0.0 0.0 0.1 0.6 –0.3 –0.4 –0.2 0.2 0.5 0.0 0.0 0.0 2.5 3.4 2.9 4.3 –2.0 –4.1 2.6 2.8 3.7 . . . 4.6 . . . 2.6 3.0 5.0 6.2 5.9 7.0 3.5 1.0 1.9 0.8 1.1 . . . 2.7 . . . 3.1 0.5 4.1 4.9 4.1 7.1 2.8 2.0 1.9 2.2 1.9 . . . 3.5 . . . 2.9 1.8 Memorandum World Growth Based on Market Exchange Rates European Union ASEAN-5 5/ Middle East and North Africa Emerging Market and Middle-Income Economies Low-Income Developing Countries 6.0 5.5 3.8 4.1 7.0 4.1 3.1 3.7 5.2 5.4 3.8 4.9 2.4 0.7 4.3 3.2 4.0 4.9 2.5 1.8 4.7 3.5 4.1 5.6 0.3 0.0 –0.2 –0.4 0.4 0.0 –0.1 –0.3 –0.2 0.2 0.0 0.1 1.7 1.8 3.7 . . . 2.5 . . . 2.5 1.2 5.7 . . . 5.0 . . . 2.5 2.0 4.0 . . . 4.1 . . . 10.4 9.4 12.1 5.4 6.6 3.4 2.4 2.3 2.6 3.4 2.7 4.6 –0.1 0.0 –0.3 –0.3 –0.4 0.0 . . . . . . . . . . . . . . . . . . . . . . . . . . . 65.8 26.4 39.8 7.0 –16.2 –6.3 –7.1 –0.4 –3.3 –0.1 –0.9 0.3 11.2 –2.0 –9.8 1.4 –5.9 –0.2", "type": "Table" }, + { + "element_id": "fcadc00fe663ee0e7818b0ffc5c46948", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "World Output", + "type": "Title" + }, + { + "element_id": "0c76bc4e35219e2a31b09428cd47d009", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "World Trade Volume (goods and services) 6/ Advanced Economies Emerging Market and Developing Economies", + "type": "UncategorizedText" + }, + { + "element_id": "3c0578f4d944258ffa4ffac7615f1ff9", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "Commodity Prices Oil 7/ Nonfuel (average based on world commodity import weights)", + "type": "NarrativeText" + }, + { + "element_id": "6bb1e757e09d7fa3aba323a375abd047", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "World Consumer Prices 8/ Advanced Economies 9/ Emerging Market and Developing Economies 8/", + "type": "UncategorizedText" + }, + { + "element_id": "9db439c530ed3425c0a68724de199942", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "4.7 3.1 5.9", + "type": "UncategorizedText" + }, + { + "element_id": "b7948d6976e997e76e343161b4b5d864", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "8.8 7.3 9.9", + "type": "UncategorizedText" + }, + { + "element_id": "e7ac421147471fe341ae242e7544a44c", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "6.6 4.6 8.1", + "type": "UncategorizedText" + }, + { + "element_id": "4b48b0469ba9682a3e385ee7fbb6bbed", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "4.3 2.6 5.5", + "type": "UncategorizedText" + }, + { + "element_id": "5277334fd8abe869f6a8de2e43942c9d", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "0.1 0.2 0.0", + "type": "UncategorizedText" + }, + { + "element_id": "44f0ab7953bb0b3696b9fa3cf0682f35", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "0.2 0.2 0.2", + "type": "UncategorizedText" + }, + { + "element_id": "08e781dd2b6499b1ac8105a47f3520cc", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "9.2 7.8 10.4", + "type": "UncategorizedText" + }, + { + "element_id": "e586cf66e92b356a4611ee2ffdf85a16", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "5.0 3.1 6.6", + "type": "UncategorizedText" + }, { "element_id": "46c8e0c55b163d73d3d2766be8d1bf8d", "metadata": { @@ -1217,6 +1469,27 @@ "text": "6 International Monetary Fund | January 2023", "type": "ListItem" }, + { + "element_id": "41d85a7cc007a9c34136a786d6e61c15", + "metadata": { + "data_source": { + "date_modified": "2023-02-14T07:31:28+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/2023-Jan-economic-outlook.pdf", + "version": "265756457651539296174748931590365722430" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 7 + }, + "text": "3.5 2.3 4.5", + "type": "UncategorizedText" + }, { "element_id": "95af4f3feb2d03b2310ce31abc0c435d", "metadata": { diff --git a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json index b20aed94b9..ad5a0cb72c 100644 --- a/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json +++ b/test_unstructured_ingest/expected-structured-output/s3/recalibrating-risk-report.pdf.json @@ -377,6 +377,27 @@ "text": "Nuclear energy and the risk of radiation is one of the most extreme cases in which perceived and actual risks have diverged. The fear of radiation, whilst pre- dating the Second World War, was firmly established by the debate on the potential impacts of low-dose radiation from the fallout from nuclear weapons testing in the early years of the Cold War. Radiation in many ways became linked with the mental imagery of nuclear war, playing an important role in increasing public concern about radiation and its health effects. There is a well-established discrepancy between fact-based risk assessments and public perception of different risks. This is very much the case with nuclear power, and this is clearly highlighted in Figure 1, with laypersons ranking nuclear power as the highest risk out of 30 activities and technologies, with experts ranking nuclear as 20th. In many ways, popular culture’s depiction of radiation has played a role in ensuring that this discrepancy has remained, be it Godzilla, The Incredible Hulk, or The Simpsons, which regularly plays on the notion of radiation from nuclear power plants causing three-eyed fish, something that has been firmly rejected as unscientific.", "type": "NarrativeText" }, + { + "element_id": "d977fff4c69c437aa4a44a5c5f4bf02e", + "metadata": { + "data_source": { + "date_modified": "2023-02-12T10:09:32+00:00", + "record_locator": { + "protocol": "s3", + "remote_file_path": "utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf" + }, + "url": "s3://utic-dev-tech-fixtures/small-pdf-set/recalibrating-risk-report.pdf", + "version": "306475068461766865312866697521104206816" + }, + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 4 + }, + "text": "Rank Order Laypersons", + "type": "Title" + }, { "element_id": "92a15f52537ead259f4d9c2da1b22454", "metadata": { diff --git a/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv b/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv index 70c3016c66..8041138ab0 100644 --- a/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv +++ b/test_unstructured_ingest/metrics/text-extraction/aggregate-scores-cct.tsv @@ -1,3 +1,3 @@ metric average sample_sd population_sd count cct-accuracy 0.809 0.24 0.233 17 -cct-%missing 0.026 0.033 0.032 17 +cct-%missing 0.025 0.032 0.031 17 diff --git a/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv b/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv index 79188f1ec7..95773b80e8 100644 --- a/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv +++ b/test_unstructured_ingest/metrics/text-extraction/all-docs-cct.tsv @@ -13,6 +13,6 @@ handbook-1p.docx docx local-single-file-basic-chunking 0.858 0.029 fake-html-cp1252.html html local-single-file-with-encoding 0.659 0.0 layout-parser-paper-with-table.jpg jpg local-single-file-with-pdf-infer-table-structure 0.716 0.032 layout-parser-paper.pdf pdf local-single-file-with-pdf-infer-table-structure 0.95 0.029 -2023-Jan-economic-outlook.pdf pdf s3 0.834 0.054 +2023-Jan-economic-outlook.pdf pdf s3 0.84 0.044 page-with-formula.pdf pdf s3 0.971 0.021 -recalibrating-risk-report.pdf pdf s3 0.966 0.009 +recalibrating-risk-report.pdf pdf s3 0.968 0.008 diff --git a/unstructured/__version__.py b/unstructured/__version__.py index c29cef9d38..5f7860090b 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.13.0-dev10" # pragma: no cover +__version__ = "0.13.0-dev11" # pragma: no cover diff --git a/unstructured/partition/utils/processing_elements.py b/unstructured/partition/utils/processing_elements.py index fee9236f2f..1d60103a44 100644 --- a/unstructured/partition/utils/processing_elements.py +++ b/unstructured/partition/utils/processing_elements.py @@ -1,22 +1,31 @@ from collections import defaultdict from typing import TYPE_CHECKING +from unstructured.documents.elements import ElementType from unstructured.partition.utils.constants import Source +from unstructured.utils import requires_dependencies if TYPE_CHECKING: from unstructured_inference.inference.layout import DocumentLayout +@requires_dependencies("unstructured_inference") def clean_pdfminer_inner_elements(document: "DocumentLayout") -> "DocumentLayout": """Clean pdfminer elements from inside tables and stores them in extra_info dictionary with the table id as key""" + + from unstructured_inference.config import inference_config + defaultdict(list) for page in document.pages: - tables = [e for e in page.elements if e.type == "Table"] + tables = [e for e in page.elements if e.type == ElementType.TABLE] for i, element in enumerate(page.elements): if element.source != Source.PDFMINER: continue - element_inside_table = [element.bbox.is_in(t.bbox, error_margin=15) for t in tables] + subregion_threshold = inference_config.EMBEDDED_TEXT_AGGREGATION_SUBREGION_THRESHOLD + element_inside_table = [ + element.bbox.is_almost_subregion_of(t.bbox, subregion_threshold) for t in tables + ] if sum(element_inside_table) == 1: page.elements[i] = None page.elements = [e for e in page.elements if e]