From 9766f931ca56408f614cc437eec28a3028ef9151 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 24 Aug 2020 16:25:21 +0200 Subject: [PATCH] GdsCollector: proof-of-concept to catch validation errors including filename --- ocrd_models/ocrd_models/generatedscollector.py | 4 ++-- ocrd_models/ocrd_models/ocrd_page_generateds.py | 10 ++++++---- ocrd_models/ocrd_page_parse.py | 8 +++++--- requirements_test.txt | 2 +- tests/model/test_ocrd_page.py | 13 +++++++++++++ 5 files changed, 27 insertions(+), 10 deletions(-) diff --git a/ocrd_models/ocrd_models/generatedscollector.py b/ocrd_models/ocrd_models/generatedscollector.py index 25a94a304..caa6b8162 100644 --- a/ocrd_models/ocrd_models/generatedscollector.py +++ b/ocrd_models/ocrd_models/generatedscollector.py @@ -1,7 +1,7 @@ class OcrdGdsCollector(object): - def __init__(self, messages=None): - print('GdsCollector.__init__', self) + def __init__(self, filename=None, messages=None): + self.filename = filename if messages is None: self.messages = [] else: diff --git a/ocrd_models/ocrd_models/ocrd_page_generateds.py b/ocrd_models/ocrd_models/ocrd_page_generateds.py index c29374b8d..c842f5999 100644 --- a/ocrd_models/ocrd_models/ocrd_page_generateds.py +++ b/ocrd_models/ocrd_models/ocrd_page_generateds.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # -# Generated Thu Jul 30 13:44:35 2020 by generateDS.py version 2.35.24. +# Generated Mon Aug 24 15:23:10 2020 by generateDS.py version 2.35.26. # Python 3.6.9 (default, Jul 17 2020, 12:50:27) [GCC 8.4.0] # # Command line options: @@ -14950,7 +14950,9 @@ def main(): ] def parse(inFileName, silence=False, print_warnings=True): global CapturedNsmap_ - gds_collector = GdsCollector_() + if not filename: + filename=inFilename + gds_collector = GdsCollector_(filename=filenmae) parser = None doc = parsexml_(inFileName, parser) rootNode = doc.getroot() @@ -14979,7 +14981,7 @@ def parse(inFileName, silence=False, print_warnings=True): sys.stderr.write(separator) return rootObj -def parseString(inString, silence=False, print_warnings=True): +def parseString(inString, filename=None, silence=False, print_warnings=True): '''Parse a string, create the object tree, and export it. Arguments: @@ -14990,7 +14992,7 @@ def parseString(inString, silence=False, print_warnings=True): ''' parser = None rootNode= parsexmlstring_(inString, parser) - gds_collector = GdsCollector_() + gds_collector = GdsCollector_(filename=filename) rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'PcGts' diff --git a/ocrd_models/ocrd_page_parse.py b/ocrd_models/ocrd_page_parse.py index 056d55e4e..23deeb871 100644 --- a/ocrd_models/ocrd_page_parse.py +++ b/ocrd_models/ocrd_page_parse.py @@ -1,6 +1,8 @@ def parse(inFileName, silence=False, print_warnings=True): global CapturedNsmap_ - gds_collector = GdsCollector_() + if not filename: + filename=inFilename + gds_collector = GdsCollector_(filename=filenmae) parser = None doc = parsexml_(inFileName, parser) rootNode = doc.getroot() @@ -29,7 +31,7 @@ def parse(inFileName, silence=False, print_warnings=True): sys.stderr.write(separator) return rootObj -def parseString(inString, silence=False, print_warnings=True): +def parseString(inString, filename=None, silence=False, print_warnings=True): '''Parse a string, create the object tree, and export it. Arguments: @@ -40,7 +42,7 @@ def parseString(inString, silence=False, print_warnings=True): ''' parser = None rootNode= parsexmlstring_(inString, parser) - gds_collector = GdsCollector_() + gds_collector = GdsCollector_(filename=filename) rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'PcGts' diff --git a/requirements_test.txt b/requirements_test.txt index bbd7f5c6b..ee7e65847 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,5 +1,5 @@ pytest >= 4.0.0 -generateDS == 2.35.24 +generateDS == 2.35.26 coverage >= 4.5.2 sphinx codecov >= 2.0.15 diff --git a/tests/model/test_ocrd_page.py b/tests/model/test_ocrd_page.py index fd9e85553..2f085e9d3 100644 --- a/tests/model/test_ocrd_page.py +++ b/tests/model/test_ocrd_page.py @@ -297,5 +297,18 @@ def test_gdscollector_override(self): gdc = pcgts.gds_collector_ self.assertTrue(isinstance(gdc, OcrdGdsCollector)) + def test_gdscollector_info(self): + filename = assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml') + with open(filename, 'r') as f: + s = f.read() + s = s.replace('pc:Page', 'pc:Foo') + s = s.encode('utf-8') + pcgts = parseString(s, silence=True, filename=filename) + gdsc = pcgts.gds_collector_ + self.assertEqual(gdsc.messages, []) + self.assertEqual(gdsc.filename, filename) + pcgts.validate_(gdsc, True) + self.assertEqual(gdsc.messages, ['Number of values for Page near line 2 is below the minimum allowed, expected at least 1, found 0']) + if __name__ == '__main__': main(__file__)