-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf_highlighter.py
73 lines (51 loc) · 2.13 KB
/
pdf_highlighter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import (
DictionaryObject,
FloatObject,
NameObject,
ArrayObject,
TextStringObject,
)
# x1, y1 starts in bottom left corner
def _create_annotation(x1, y1, x2, y2, color, subtype):
annotation = DictionaryObject()
annotation.update({
NameObject('/Subtype'): NameObject(subtype),
NameObject('/C'): ArrayObject([FloatObject(c) for c in color]),
NameObject('/Rect'): ArrayObject([
FloatObject(x1),
FloatObject(y1),
FloatObject(x2),
FloatObject(y2)]),
})
return annotation
def _add_annotation(annotation, page):
if '/Annots' in page:
page[NameObject('/Annots')].append(annotation)
else:
page[NameObject('/Annots')] = ArrayObject([annotation])
def create_highlight(x1, y1, x2, y2, color=(1, 0, 0)):
return _create_annotation(x1, y1, x2, y2, color, '/Highlight')
def create_circle(x1, y1, x2, y2, color=(1, 0, 0)):
return _create_annotation(x1, y1, x2, y2, color, '/Circle')
def create_square(x1, y1, x2, y2, color=(1, 0, 0)):
return _create_annotation(x1, y1, x2, y2, color, '/Square')
def create_underline(x1, y1, x2, y2, color=(1, 0, 0)):
return _create_annotation(x1, y1, x2, y2, color, '/Underline')
def create_strike_out(x1, y1, x2, y2, color=(1, 0, 0)):
return _create_annotation(x1, y1, x2, y2, color, '/StrikeOut')
def create_free_text(x1, y1, x2, y2, text, color=(1, 0, 0)):
annotation = _create_annotation(x1, y1, x2, y2, color, '/FreeText')
annotation[NameObject('/Contents')] = TextStringObject(text)
return annotation
def highlight_file(file_in, file_out, annotations_dict):
pdf_input = PdfFileReader(open(file_in, 'rb'))
pdf_output = PdfFileWriter()
for page_number in range(pdf_input.getNumPages()):
page = pdf_input.getPage(page_number)
if page_number in annotations_dict:
for annotation in annotations_dict[page_number]:
_add_annotation(annotation, page)
pdf_output.addPage(page)
output_stream = open(file_out, 'wb')
pdf_output.write(output_stream)