-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
225 lines (158 loc) · 7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
"""
作者:[赵泽霖]
联系方式:[[email protected]]
版权所有 (c) [2024] [赵泽霖]
[您的简要版权声明或版权声明链接]
本项目地址:https://github.com/ZhaoZelin2000/PDF-Separator-color-and-black-white-pages
本代码修改自:https://github.com/RicePasteM/Color-BW-Separator-for-PDF/tree/master
-------------------------------------------------------------
Author: [Zelin Zhao]
Contact: [[email protected]]
Copyright (c) [2024] [Zelin Zhao]
[Your brief copyright statement or copyright statement link]
Other information or notes...
本项目地址:https://github.com/ZhaoZelin2000/PDF-Separator-color-and-black-white-pages
本代码修改自:https://github.com/RicePasteM/Color-BW-Separator-for-PDF/tree/master
"""
import pymupdf as fitz
import numpy as np
import os
from tqdm import tqdm
import tkinter as tk
from tkinter import filedialog, messagebox
def is_color_image(image, saturation_threshold=0.35, color_fraction_threshold=0.001):
image = image.convert('RGB')
pixels = np.array(image) / 255.0 # 归一化像素值到[0,1]范围
# 将RGB转换为HSV
max_rgb = np.max(pixels, axis=2)
min_rgb = np.min(pixels, axis=2)
delta = max_rgb - min_rgb
# 饱和度
saturation = delta / (max_rgb + 1e-7) # 防止除以零
# 判断饱和度大于阈值的彩色像素
color_pixels = saturation > saturation_threshold
color_fraction = np.mean(color_pixels)
return color_fraction > color_fraction_threshold
def is_color_page(page):
"""
Check if a page is a color page.
"""
# Render page to a pixmap
pix = page.get_pixmap()
# Convert pixmap to an image
img = pix.tobytes("png")
# Create an image object using PIL
from PIL import Image
from io import BytesIO
image = Image.open(BytesIO(img))
return is_color_image(image)
def split_pdf(input_pdf_path, output_color_pdf_path, output_bw_pdf_path, is_double_sized_printing):
# Open the input PDF
doc = fitz.open(input_pdf_path)
# Create new PDFs for color and black & white pages
color_doc = fitz.open()
bw_doc = fitz.open()
# Save color and bw pages number
color_pages = []
bw_pages = []
# Iterate over each page in the input PDF
for page_num in tqdm(range(len(doc))):
page = doc.load_page(page_num)
# Check if the page is a color page
if is_color_page(page):
color_pages.append(page_num)
# Handle double sized printing
if is_double_sized_printing:
for page_num in color_pages:
if page_num % 2 == 0 and page_num + 1 not in color_pages and page_num + 1 < len(doc):
color_pages.append(page_num + 1)
if page_num % 2 == 1 and page_num - 1 not in color_pages and page_num - 1 > 0:
color_pages.append(page_num - 1)
# Insert BW Pages
for page_num in range(len(doc)):
if page_num not in color_pages:
bw_pages.append(page_num)
# Insert PDF pages
if color_pages:
for page_num in sorted(color_pages):
color_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
if bw_pages:
for page_num in sorted(bw_pages):
bw_doc.insert_pdf(doc, from_page=page_num, to_page=page_num)
# Save the new PDFs
print(f"Output color PDF path: {output_color_pdf_path}") # 调试语句
print(f"Output BW PDF path: {output_bw_pdf_path}") # 调试语句
if color_pages:
color_doc.save(output_color_pdf_path)
if bw_pages:
bw_doc.save(output_bw_pdf_path)
# Close all documents
doc.close()
color_doc.close()
bw_doc.close()
# if __name__ == '__main__':
# INPUT_PDF_PATH = 'example.pdf' # 待转换的PDF路径
# OUTPUT_DIR = './output' # 输出文件夹
# OUTPUT_COLOR_PDF_PATH = os.path.join(OUTPUT_DIR, 'color_pages.pdf') # 彩色部分PDF输出路径
# OUTPUT_BW_PDF_PATH = os.path.join(OUTPUT_DIR, 'bw_pages.pdf') # 黑白部分PDF输出路径
# IS_DOUBLE_SIZED_PRINTING = True # 是否双面打印
# # Create the output directory if it doesn't exist
# os.makedirs(OUTPUT_DIR, exist_ok=True)
# split_pdf(INPUT_PDF_PATH, OUTPUT_COLOR_PDF_PATH, OUTPUT_BW_PDF_PATH, IS_DOUBLE_SIZED_PRINTING)
def center_window(window):
window.update_idletasks()
width = window.winfo_width()
height = window.winfo_height()
x = (window.winfo_screenwidth() // 2) - (width // 2)
y = (window.winfo_screenheight() // 2) - (height // 2)
window.geometry(f'{width}x{height}+{x}+{y}')
def select_print_option():
def on_select():
print_option_window.is_double_sized_printing = (print_option.get() == 'double')
print_option_window.destroy()
print_option_window = tk.Toplevel()
print_option_window.title("Select Print Option")
print_option = tk.StringVar(value='single')
tk.Label(print_option_window, text="Select Print Option:").pack(anchor=tk.W)
tk.Radiobutton(print_option_window, text="Single-sided", variable=print_option, value='single').pack(anchor=tk.W)
tk.Radiobutton(print_option_window, text="Double-sided", variable=print_option, value='double').pack(anchor=tk.W)
tk.Button(print_option_window, text="OK", command=on_select).pack()
# Center the window
center_window(print_option_window)
print_option_window.is_double_sized_printing = False
print_option_window.grab_set()
print_option_window.wait_window()
return print_option_window.is_double_sized_printing
def select_files():
root = tk.Tk()
root.withdraw() # Hide the root window
input_pdf_paths = filedialog.askopenfilenames(title="Select PDF files", filetypes=[("PDF files", "*.pdf")])
if not input_pdf_paths:
messagebox.showerror("Error", "No files selected")
return
output_dir = filedialog.askdirectory(title="Select output directory")
if not output_dir:
messagebox.showerror("Error", "No output directory selected")
return
# Show the print option selection dialog
is_double_sized_printing = select_print_option()
for input_pdf_path in input_pdf_paths:
file_name = os.path.splitext(os.path.basename(input_pdf_path))[0]
output_color_pdf_path = os.path.join(output_dir, f'{file_name}_color_pages.pdf')
output_bw_pdf_path = os.path.join(output_dir, f'{file_name}_bw_pages.pdf')
try:
split_pdf(input_pdf_path, output_color_pdf_path, output_bw_pdf_path, is_double_sized_printing)
except Exception as e:
messagebox.showerror("Error", f"Error processing {input_pdf_path}: {str(e)}")
continue
messagebox.showinfo("Success", "PDF files processed successfully")
if __name__ == '__main__':
print("""
作者:赵泽霖
联系方式:[email protected]
版权所有 (c) 2024 赵泽霖
[您的简要版权声明或版权声明链接]
本项目地址: https://github.com/ZhaoZelin2000/PDF-Separator-color-and-black-white-pages
本代码修改自: https://github.com/RicePasteM/Color-BW-Separator-for-PDF/tree/master
""")
select_files()