Skip to content

Commit

Permalink
feat(translation_config): add doc_layout_model to TranslationConfig
Browse files Browse the repository at this point in the history
- introduce doc_layout_model parameter in TranslationConfig
- load DocLayoutModel if not provided
- update il_creater to use translation_config.doc_layout_model

refactor(il_creater): improve code style and consistency

- fix PSLiteral check indentation
- add empty line for better readability
  • Loading branch information
awwaawwa committed Jan 21, 2025
1 parent fd796f0 commit 0b04424
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
8 changes: 5 additions & 3 deletions yadt/document_il/frontend/il_creater.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def __init__(self, translation_config: TranslationConfig):
self.progress = None
self.current_page: il_version_1.Page = None
self.mupdf: pymupdf.Document = None
self.model = DocLayoutModel.load_available()
self.model = translation_config.doc_layout_model
self.docs = il_version_1.Document(page=[])
self.stroking_color_space_name = None
self.non_stroking_color_space_name = None
Expand All @@ -42,8 +42,8 @@ def on_passthrough_per_char(self, operator: str, args: list[str]):
pass

def parse_arg(self, arg: str):
if isinstance(arg,PSLiteral):
return f'/{arg.name}'
if isinstance(arg, PSLiteral):
return f"/{arg.name}"
if not isinstance(arg, str):
return str(arg)
return arg
Expand Down Expand Up @@ -83,8 +83,10 @@ def on_page_start(self):
)
self.current_page_font_name_id_map = {}
self.docs.page.append(self.current_page)

def on_page_end(self):
self.progress.advance(1)

def on_page_crop_box(
self,
x0: float | int,
Expand Down
6 changes: 6 additions & 0 deletions yadt/translation_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
)
import os

from yadt.doclayout import DocLayoutModel
from yadt.progress_monitor import ProgressMonitor


Expand All @@ -29,6 +30,7 @@ def __init__(
short_line_split_factor: float = 0.8, # 切分阈值系数。实际阈值为当前页所有行长度中位数*此系数
use_rich_pbar: bool = True, # 是否使用 rich 进度条
progress_monitor: Optional[ProgressMonitor] = None, # progress_monitor
doc_layout_model=None,
):
self.input_file = input_file
self.translator = translator
Expand Down Expand Up @@ -62,6 +64,10 @@ def __init__(

os.makedirs(output_dir, exist_ok=True)

if doc_layout_model is None:
doc_layout_model = DocLayoutModel.load_available()
self.doc_layout_model = doc_layout_model

def get_output_file_path(self, filename):
return os.path.join(self.output_dir, filename)

Expand Down

0 comments on commit 0b04424

Please sign in to comment.