Skip to content

Commit

Permalink
Have the pageXML that is parsed available to the save methods, and be…
Browse files Browse the repository at this point in the history
…tter iteration methods
  • Loading branch information
stefanklut committed Jan 10, 2025
1 parent 490f5f3 commit ce11514
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
17 changes: 11 additions & 6 deletions page_xml/xmlPAGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def parse(self):
# --- save "namespace" base
self.base = "".join([self.root.tag.rsplit("}", 1)[0], "}"])

ET.register_namespace("", self.XMLNS["xmlns"])

def get_region(self, region_name):
"""
get all regions in PAGE which match region_name
Expand Down Expand Up @@ -157,6 +159,9 @@ def get_polygons(self, element_name):
def _iter_element(self, element):
return self.root.iterfind("".join([".//", self.base, element]))

def _iter_subelement(self, element, subelement):
return element.iterfind("".join([".//", self.base, subelement]))

def iter_class_coords(self, element, class_dict):
for node in self._iter_element(element):
element_type = self.get_region_type(node)
Expand Down Expand Up @@ -249,13 +254,13 @@ def write_transcriptions(self, out_dir):

def new_page(self, name, rows, cols):
"""create a new PAGE xml"""
self.xml = ET.Element("PcGts")
self.xml.attrib = self.XMLNS
self.metadata = ET.SubElement(self.xml, "Metadata")
self.root = ET.Element("PcGts")
self.root.attrib = self.XMLNS
self.metadata = ET.SubElement(self.root, "Metadata")
ET.SubElement(self.metadata, "Creator").text = self.creator
ET.SubElement(self.metadata, "Created").text = datetime.datetime.today().strftime("%Y-%m-%dT%X")
ET.SubElement(self.metadata, "LastChange").text = datetime.datetime.today().strftime("%Y-%m-%dT%X")
self.page = ET.SubElement(self.xml, "Page")
self.page = ET.SubElement(self.root, "Page")
self.page.attrib = {
"imageFilename": name,
"imageWidth": cols,
Expand Down Expand Up @@ -329,8 +334,8 @@ def add_baseline(self, b_coords, parent):

def save_xml(self):
"""write out XML file of current PAGE data"""
self._indent(self.xml)
tree = ET.ElementTree(self.xml)
self._indent(self.root)
tree = ET.ElementTree(self.root)
with AtomicFileName(self.filepath) as path:
tree.write(path, encoding="UTF-8", xml_declaration=True)

Expand Down
2 changes: 0 additions & 2 deletions page_xml/xml_converters/xml_to_sem_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,6 @@ def build_text_line(self, page: PageData, out_size: tuple[int, int]):
self.logger.warning(f"File {page.filepath} does not contains text line sem_seg")
return sem_seg

# TOP BOTTOM

def build_top_bottom(self, page: PageData, out_size: tuple[int, int]):
"""
Create the sem_seg version of the top bottom
Expand Down

0 comments on commit ce11514

Please sign in to comment.