From e30c0f17b565b0488f8ad67c13a3974f00aa43d9 Mon Sep 17 00:00:00 2001 From: "Jorj X. McKie" Date: Wed, 6 Mar 2024 08:40:59 -0400 Subject: [PATCH] Add support for redacting vector graphics Add support for redacting vector graphics Also add a quick fix for property Annot.irt_xref Update test_2548.py --- src/__init__.py | 6 ++++-- src/utils.py | 19 +++++++++++++------ tests/test_2548.py | 2 +- tests/test_annots.py | 17 ++++++++++++++++- 4 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/__init__.py b/src/__init__.py index 434f3d7e7..ee819f628 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -778,7 +778,8 @@ def info(self): return res - def irt_xref(): + @property + def irt_xref(self): ''' annotation IRT xref ''' @@ -7605,11 +7606,12 @@ def _addWidget(self, field_type, field_name): JM_add_annot_id(annot, "W") return Annot(annot) - def _apply_redactions(self, images): + def _apply_redactions(self, images, graphics): page = self._pdf_page() opts = mupdf.PdfRedactOptions() opts.black_boxes = 0 # no black boxes opts.image_method = images # how to treat images + opts.line_art = graphics # how to treat vector graphics ASSERT_PDF(page) success = mupdf.pdf_redact_page(page.doc(), page, opts) return success diff --git a/src/utils.py b/src/utils.py index 696afc780..0a00d8677 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1213,7 +1213,7 @@ def set_metadata(doc: fitz.Document, m: dict) -> None: info_xref = 0 else: info_xref = int(temp.replace("0 R", "")) - + if m == {} and info_xref == 0: # nothing to do return @@ -4187,13 +4187,20 @@ def commit(self, overlay: bool = True) -> None: return -def apply_redactions(page: fitz.Page, images: int = 2) -> bool: +def apply_redactions(page: fitz.Page, images: int = 2, graphics: int = 1) -> bool: """Apply the redaction annotations of the page. Args: page: the PDF page. - images: 0 - ignore images, 1 - remove complete overlapping image, - 2 - blank out overlapping image parts. + images: + 0 - ignore images + 1 - remove all overlapping images + 2 - blank out overlapping image parts + 3 - remove image unless invisible + graphics: + 0 - ignore graphics + 1 - remove graphics if contained in rectangle + 2 - remove all overlapping graphics """ def center_rect(annot_rect, text, font, fsize): @@ -4246,7 +4253,7 @@ def center_rect(annot_rect, text, font, fsize): if redact_annots == []: # any redactions on this page? return False # no redactions - rc = page._apply_redactions(images) # call MuPDF redaction process step + rc = page._apply_redactions(images, graphics) # call MuPDF if not rc: # should not happen really raise ValueError("Error applying redactions.") @@ -4504,7 +4511,7 @@ def append_this(pos, text): pos, text, font=font, fontsize=fontsize, small_caps=small_caps ) return ret - + tolerance = fontsize * 0.2 # extra distance to left border space_len = textlen(" ") std_width = rect.width - tolerance diff --git a/tests/test_2548.py b/tests/test_2548.py index 5af77e054..9aa4212a1 100644 --- a/tests/test_2548.py +++ b/tests/test_2548.py @@ -36,7 +36,7 @@ def test_2548(): # versions with updated MuPDF also fix the bug. if fitz.mupdf_version_tuple >= (1, 24): - expected = 'format error: cycle in structure tree\nstructure tree broken, assume tree is missing' + expected = 'Loop found in structure tree. Ignoring structure.' assert wt == expected, f'expected:\n {expected!r}\nwt:\n {wt!r}\n' assert not e elif (0 diff --git a/tests/test_annots.py b/tests/test_annots.py index bcc923cc3..fbf78f79b 100644 --- a/tests/test_annots.py +++ b/tests/test_annots.py @@ -149,7 +149,7 @@ def test_stamp(): page = doc.reload_page(page) -def test_redact(): +def test_redact1(): doc = fitz.open() page = doc.new_page() annot = page.add_redact_annot(r, text="Hello") @@ -168,6 +168,21 @@ def test_redact(): assert s == r page.apply_redactions() + +def test_redact2(): + """Test removal of graphics (line art).""" + if not hasattr(fitz, "mupdf"): + print("Not executing 'test_redact2' in classic") + return + doc = fitz.open() + page = doc.new_page() + rect = fitz.Rect(100, 100, 200, 200) + page.draw_rect(rect) + page.add_redact_annot(rect) + page.apply_redactions(graphics=2) + assert page.get_drawings() == [] + + def test_1645(): ''' Test fix for #1645.