Skip to content

Commit

Permalink
Merge pull request #114 from funstory-ai/feat/dual-pdf-options
Browse files Browse the repository at this point in the history
feat: add dual PDF generation options (side-by-side and alternating pages)
  • Loading branch information
awwaawwa authored Feb 25, 2025
2 parents 3c6dcc1 + b06c825 commit c347c34
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 43 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ uv run babeldoc --bing --files example.pdf --files example2.pdf
- `--dual-translate-first`: Put translated pages first in dual PDF mode (default: original pages first)
- `--disable-rich-text-translate`: Disable rich text translation (may help improve compatibility with some PDFs)
- `--enhance-compatibility`: Enable all compatibility enhancement options (equivalent to --skip-clean --dual-translate-first --disable-rich-text-translate)
- `--use-side-by-side-dual`: Use side-by-side mode for dual PDF (default). Original and translated pages are shown side by side on the same page.
- `--use-alternating-pages-dual`: Use alternating pages mode for dual PDF. Original and translated pages are arranged in alternate order.

> [!TIP]
> - Both `--skip-clean` and `--dual-translate-first` may help improve compatibility with some PDF readers
Expand Down
2 changes: 1 addition & 1 deletion babeldoc/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.11"
__version__ = "0.1.12"
2 changes: 1 addition & 1 deletion babeldoc/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import subprocess
from pathlib import Path

__version__ = "0.1.11"
__version__ = "0.1.12"

CACHE_FOLDER = Path.home() / ".cache" / "babeldoc"

Expand Down
150 changes: 112 additions & 38 deletions babeldoc/document_il/backend/pdf_creater.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,100 @@ def _debug_render_rectangle(
# Restore graphics state
draw_op.append(b"Q\n")

def create_side_by_side_dual_pdf(
self,
original_pdf: pymupdf.Document,
translated_pdf: pymupdf.Document,
dual_out_path: str,
translation_config: TranslationConfig,
) -> pymupdf.Document:
"""Create a dual PDF with side-by-side pages (original and translation).
Args:
original_pdf: Original PDF document
translated_pdf: Translated PDF document
dual_out_path: Output path for the dual PDF
translation_config: Translation configuration
Returns:
The created dual PDF document
"""
# Create a new PDF for side-by-side pages
dual = pymupdf.open()
page_count = min(original_pdf.page_count, translated_pdf.page_count)

for page_id in range(page_count):
# Get pages from both PDFs
orig_page = original_pdf[page_id]
trans_page = translated_pdf[page_id]

# Calculate total width and use max height
total_width = orig_page.rect.width + trans_page.rect.width
max_height = max(orig_page.rect.height, trans_page.rect.height)

# Create new page with combined width
dual_page = dual.new_page(width=total_width, height=max_height)

# Define rectangles for left and right sides
left_width = (
orig_page.rect.width
if not translation_config.dual_translate_first
else trans_page.rect.width
)
rect_left = pymupdf.Rect(0, 0, left_width, max_height)
rect_right = pymupdf.Rect(left_width, 0, total_width, max_height)

# Show pages according to dual_translate_first setting
if translation_config.dual_translate_first:
# Show translated page on left and original on right
rect_left, rect_right = rect_right, rect_left

# Show original page on left and translated on right (default)
dual_page.show_pdf_page(
rect_left,
original_pdf,
page_id,
keep_proportion=True,
)
dual_page.show_pdf_page(
rect_right,
translated_pdf,
page_id,
keep_proportion=True,
)

return dual

def create_alternating_pages_dual_pdf(
self,
original_pdf_path: str,
translated_pdf: pymupdf.Document,
translation_config: TranslationConfig,
) -> pymupdf.Document:
"""Create a dual PDF with alternating pages (original and translation).
Args:
original_pdf_path: Path to the original PDF
translated_pdf: Translated PDF document
translation_config: Translation configuration
Returns:
The created dual PDF document
"""
# Open the original PDF and insert translated PDF
dual = pymupdf.open(original_pdf_path)
dual.insert_file(translated_pdf)

# Rearrange pages to alternate between original and translated
page_count = translated_pdf.page_count
for page_id in range(page_count):
if translation_config.dual_translate_first:
dual.move_page(page_count + page_id, page_id * 2)
else:
dual.move_page(page_count + page_id, page_id * 2 + 1)

return dual

def write_debug_info(
self,
pdf: pymupdf.Document,
Expand Down Expand Up @@ -388,48 +482,28 @@ def write(self, translation_config: TranslationConfig) -> TranslateResult:
original_pdf = pymupdf.open(self.original_pdf_path)
translated_pdf = pdf

# Create a new PDF for side-by-side pages
dual = pymupdf.open()
page_count = min(original_pdf.page_count, translated_pdf.page_count)

for page_id in range(page_count):
# Get pages from both PDFs
orig_page = original_pdf[page_id]
trans_page = translated_pdf[page_id]

# Calculate total width and use max height
total_width = orig_page.rect.width + trans_page.rect.width
max_height = max(orig_page.rect.height, trans_page.rect.height)

# Create new page with combined width
dual_page = dual.new_page(width=total_width, height=max_height)

# Define rectangles for left and right sides
left_width = (
orig_page.rect.width
if not translation_config.dual_translate_first
else trans_page.rect.width
)
rect_left = pymupdf.Rect(0, 0, left_width, max_height)
rect_right = pymupdf.Rect(left_width, 0, total_width, max_height)

# Show pages according to dual_translate_first setting
if translation_config.dual_translate_first:
# Show translated page on left and original on right
rect_left, rect_right = rect_right, rect_left
# Choose between side-by-side and alternating pages format
# Default to side-by-side if not specified
use_side_by_side = getattr(
translation_config,
"use_side_by_side_dual",
True,
)

# Show original page on left and translated on right (default)
dual_page.show_pdf_page(
rect_left,
if use_side_by_side:
# Create a dual PDF with side-by-side pages (original and translation)
dual = self.create_side_by_side_dual_pdf(
original_pdf,
page_id,
keep_proportion=True,
translated_pdf,
dual_out_path,
translation_config,
)
dual_page.show_pdf_page(
rect_right,
else:
# Create a dual PDF with alternating pages (original and translation)
dual = self.create_alternating_pages_dual_pdf(
self.original_pdf_path,
translated_pdf,
page_id,
keep_proportion=True,
translation_config,
)

if translation_config.debug:
Expand Down
15 changes: 14 additions & 1 deletion babeldoc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from babeldoc.translation_config import TranslationConfig

logger = logging.getLogger(__name__)
__version__ = "0.1.11"
__version__ = "0.1.12"


def create_parser():
Expand Down Expand Up @@ -176,6 +176,18 @@ def create_parser():
action="store_true",
help="Enable all compatibility enhancement options (equivalent to --skip-clean --dual-translate-first --disable-rich-text-translate)",
)
translation_params.add_argument(
"--use-side-by-side-dual",
default=True,
action="store_true",
help="Use side-by-side mode for dual PDF (default). When enabled, original and translated pages are shown side by side.",
)
translation_params.add_argument(
"--use-alternating-pages-dual",
dest="use_side_by_side_dual",
action="store_false",
help="Use alternating pages mode for dual PDF. When enabled, original and translated pages are arranged in alternate order.",
)
translation_params.add_argument(
"--report-interval",
type=float,
Expand Down Expand Up @@ -426,6 +438,7 @@ async def main():
dual_translate_first=args.dual_translate_first,
disable_rich_text_translate=args.disable_rich_text_translate,
enhance_compatibility=args.enhance_compatibility,
use_side_by_side_dual=args.use_side_by_side_dual,
report_interval=args.report_interval,
min_text_length=args.min_text_length,
)
Expand Down
2 changes: 2 additions & 0 deletions babeldoc/translation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def __init__(
enhance_compatibility: bool = False, # 增强兼容性模式
report_interval: float = 0.1, # Progress report interval in seconds
min_text_length: int = 5, # Minimum text length to translate
use_side_by_side_dual: bool = True, # 是否使用拼版式双语PDF(并排显示原文和译文)
):
self.input_file = input_file
self.translator = translator
Expand All @@ -62,6 +63,7 @@ def __init__(
)
self.report_interval = report_interval
self.min_text_length = min_text_length
self.use_side_by_side_dual = use_side_by_side_dual
if progress_monitor:
if progress_monitor.cancel_event is None:
progress_monitor.cancel_event = threading.Event()
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "BabelDOC"
version = "0.1.11"
version = "0.1.12"
description = "Yet Another Document Translator"
license = "AGPL-3.0"
readme = "README.md"
Expand Down Expand Up @@ -124,7 +124,7 @@ dev = [
]

[bumpver]
current_version = "0.1.11"
current_version = "0.1.12"
version_pattern = "MAJOR.MINOR.PATCH[.PYTAGNUM]"

[bumpver.file_patterns]
Expand Down

0 comments on commit c347c34

Please sign in to comment.