Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fontmap): Enhance character glyph detection + Refactor(layout): Move layout parser to a separate class #32

Merged
merged 3 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 2 additions & 35 deletions yadt/document_il/frontend/il_creater.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,6 @@ def on_page_number(self, page_number: int):
assert page_number >= 0
self.current_page.page_number = page_number

self.on_page_layout(page_number)

def on_page_base_operation(self, operation: str):
self.current_page.base_operations = il_version_1.BaseOperations(
value=operation
Expand Down Expand Up @@ -262,6 +260,8 @@ def on_lt_char(self, char: LTChar):

char_id = char.cid
char_unicode = char.get_text()
if '(cid:' not in char_unicode and len(char_unicode) > 1:
return
advance = char.adv
if char.matrix[0] == 0 and char.matrix[3] == 0:
vertical = True
Expand All @@ -283,39 +283,6 @@ def on_lt_char(self, char: LTChar):
)
self.current_page.pdf_character.append(pdf_char)

def on_page_layout(self, page_number):
if (
self.translation_config.should_translate_page(page_number + 1)
is False
):
return
pix = self.mupdf[page_number].get_pixmap()
image = np.fromstring(pix.samples, np.uint8).reshape(
pix.height, pix.width, 3
)[:, :, ::-1]
h, w = pix.height, pix.width
layouts = self.model.predict(image, imgsz=int(pix.height / 32) * 32)[0]
id = 0
for layout in layouts.boxes:
id += 1
# Convert the coordinate system from the picture coordinate system to the il coordinate system
x0, y0, x1, y1 = layout.xyxy
x0, y0, x1, y1 = (
np.clip(int(x0 - 1), 0, w - 1),
np.clip(int(h - y1 - 1), 0, h - 1),
np.clip(int(x1 + 1), 0, w - 1),
np.clip(int(h - y0 + 1), 0, h - 1),
)
page_layout = il_version_1.PageLayout(
id=id,
box=il_version_1.Box(
x0.item(), y0.item(), x1.item(), y1.item()
),
conf=layout.conf.item(),
class_name=layouts.names[layout.cls],
)
self.current_page.page_layout.append(page_layout)

def create_il(self):
pages = [
page
Expand Down
62 changes: 62 additions & 0 deletions yadt/document_il/midend/layout_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np
from pymupdf import Document
from yadt.document_il import il_version_1
from yadt.translation_config import TranslationConfig
from yadt.doclayout import DocLayoutModel
import logging

logger = logging.getLogger(__name__)


class LayoutParser:
stage_name = "解析页面布局"

def __init__(self, translation_config: TranslationConfig):
self.translation_config = translation_config
self.model = translation_config.doc_layout_model
self.progress = None

def process(self, docs: il_version_1.Document, mupdf_doc: Document):
"""Generate layouts for all pages that need to be translated."""
total = sum(1 for page in docs.page if self.translation_config.should_translate_page(
page.page_number + 1))
self.progress = self.translation_config.progress_monitor.stage_start(
self.stage_name, total)

for page in docs.page:
if not self.translation_config.should_translate_page(page.page_number + 1):
continue

page_number = page.page_number
pix = mupdf_doc[page_number].get_pixmap()
image = np.fromstring(pix.samples, np.uint8).reshape(
pix.height, pix.width, 3
)[:, :, ::-1]
h, w = pix.height, pix.width
layouts = self.model.predict(
image, imgsz=int(pix.height / 32) * 32)[0]

page_layouts = []
for layout in layouts.boxes:
# Convert the coordinate system from the picture coordinate system to the il coordinate system
x0, y0, x1, y1 = layout.xyxy
x0, y0, x1, y1 = (
np.clip(int(x0 - 1), 0, w - 1),
np.clip(int(h - y1 - 1), 0, h - 1),
np.clip(int(x1 + 1), 0, w - 1),
np.clip(int(h - y0 + 1), 0, h - 1),
)
page_layout = il_version_1.PageLayout(
id=len(page_layouts) + 1,
box=il_version_1.Box(
x0.item(), y0.item(), x1.item(), y1.item()
),
conf=layout.conf.item(),
class_name=layouts.names[layout.cls],
)
page_layouts.append(page_layout)

page.page_layout = page_layouts
self.progress.advance(1)

return docs
2 changes: 2 additions & 0 deletions yadt/document_il/utils/fontmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def __init__(self, translation_config: TranslationConfig):
self.fontid2font["fallback"] = self.fallback_font
self.fontid2font["kai"] = self.kai_font
def has_char(self, char_unicode: str):
if len(char_unicode) != 1:
return False
current_char = ord(char_unicode)
for font in self.fonts.values():
if font.has_glyph(current_char):
Expand Down
12 changes: 12 additions & 0 deletions yadt/high_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from yadt.translation_config import TranslationConfig
from yadt.progress_monitor import ProgressMonitor
from yadt.document_il.utils.fontmap import FontMapper
from yadt.document_il.midend.layout_parser import LayoutParser
import logging

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -155,6 +156,7 @@ def translate(translation_config: TranslationConfig):
translation_config,
[
ILCreater.stage_name,
LayoutParser.stage_name,
ParagraphFinder.stage_name,
StylesAndFormulas.stage_name,
ILTranslator.stage_name,
Expand Down Expand Up @@ -209,6 +211,16 @@ def translate(translation_config: TranslationConfig):
docs, translation_config.get_working_file_path("create_il.debug.json")
)

# Generate layouts for all pages
logger.debug("start generating layouts")
docs = LayoutParser(translation_config).process(docs, doc_input)
logger.debug("finish generating layouts")

if translation_config.debug:
xml_converter.write_json(
docs, translation_config.get_working_file_path("layout_generator.json")
)

ParagraphFinder(translation_config).process(docs)
logger.debug(f"finish paragraph finder from {temp_pdf_path}")
if translation_config.debug:
Expand Down
Loading