Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate ocrd tool runtime #13

Merged
merged 5 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion repo/spec
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gdown
httpx>=0.22.0
importlib_metadata ; python_version < '3.8'
importlib_resources ; python_version < '3.10'
jsonschema
jsonschema>=4
lxml
memory-profiler >= 0.58.0
# XXX explicitly do not restrict the numpy version because different
Expand Down
15 changes: 11 additions & 4 deletions src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import inspect
import tarfile
import io
from warnings import warn
from deprecated import deprecated

from ocrd.workspace import Workspace
Expand All @@ -43,6 +44,7 @@
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType, OcrdPage, to_xml
from ocrd_modelfactory import page_from_file
from ocrd_validators.ocrd_tool_validator import OcrdToolValidator

# XXX imports must remain for backwards-compatibility
from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import
Expand Down Expand Up @@ -71,31 +73,36 @@ class Processor():
"""

@property
def metadata(self):
def metadata(self) -> dict:
"""the ocrd-tool.json dict of the package"""
if hasattr(self, '_metadata'):
return self._metadata
self._metadata = json.loads(resource_string(self.__module__.split('.')[0], 'ocrd-tool.json'))
report = OcrdToolValidator.validate(self._metadata)
kba marked this conversation as resolved.
Show resolved Hide resolved
if not report.is_valid:
# FIXME: remove when bertsky/core#10 is merged
self.logger = getLogger(f'ocrd.processor.{self.__class__.__name__}')
self.logger.error(f"The ocrd-tool.json of this processor is {'problematic' if not report.errors else 'invalid'}:\n{report.to_xml()}.\nPlease open an issue at {self._metadata['git_url']}.")
kba marked this conversation as resolved.
Show resolved Hide resolved
return self._metadata

@property
def version(self):
def version(self) -> str:
"""the version of the package"""
if hasattr(self, '_version'):
return self._version
self._version = self.metadata['version']
return self._version

@property
def executable(self):
def executable(self) -> str:
"""the executable name of this processor tool"""
if hasattr(self, '_executable'):
return self._executable
self._executable = os.path.basename(inspect.stack()[-1].filename)
return self._executable

@property
def ocrd_tool(self):
def ocrd_tool(self) -> dict:
"""the ocrd-tool.json dict of this processor tool"""
if hasattr(self, '_ocrd_tool'):
return self._ocrd_tool
Expand Down
21 changes: 14 additions & 7 deletions src/ocrd_validators/json_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,39 @@
Validating JSON-Schema
"""
import json
from warnings import warn

from jsonschema import Draft6Validator, validators # pylint: disable=import-error
from jsonschema import Draft201909Validator, ValidationError, validators # pylint: disable=import-error

from ocrd_models import ValidationReport

class JsonSchemaDeprecationWarning(ValidationError):
pass

# http://python-jsonschema.readthedocs.io/en/latest/faq/
def extend_with_default(validator_class):
"""
Add a default-setting mechanism to a ``jsonschema`` validation class.
"""
validate_properties = validator_class.VALIDATORS["properties"]

def set_defaults(validator, properties, instance, schema):
def set_defaults_and_handle_deprecate(validator, properties, instance, schema):
"""
Set defaults in subschemas
"""
for prop, subschema in properties.items():
if "default" in subschema:
instance.setdefault(prop, subschema["default"])
if subschema.get('deprecated', False):
yield JsonSchemaDeprecationWarning(f"Property {prop} has been deprecated, ocrd-tool.json should be updated.")

for error in validate_properties(validator, properties, instance, schema):
yield error

return validators.extend(validator_class, {"properties": set_defaults})
return validators.extend(validator_class, {"properties": set_defaults_and_handle_deprecate})


DefaultValidatingDraft6Validator = extend_with_default(Draft6Validator)
DefaultValidatingDraft20199Validator = extend_with_default(Draft201909Validator)

#
# -------------------------------------------------
Expand All @@ -52,13 +58,13 @@ def validate(obj, schema):
obj = json.loads(obj)
return JsonValidator(schema)._validate(obj) # pylint: disable=protected-access

def __init__(self, schema, validator_class=Draft6Validator):
def __init__(self, schema, validator_class=Draft201909Validator):
"""
Construct a JsonValidator.

Args:
schema (dict):
validator_class (Draft6Validator|DefaultValidatingDraft6Validator):
validator_class (Draft20199Validator|DefaultValidatingDraft20199Validator):
"""
self.validator = validator_class(schema)

Expand All @@ -74,6 +80,7 @@ def _validate(self, obj):
report = ValidationReport()
if not self.validator.is_valid(obj):
for v in self.validator.iter_errors(obj):
meth = f'add_{"warning" if isinstance(v, JsonSchemaDeprecationWarning) else "error"}'
# print(">>>>>>>>> v='%s', obj='%s'" % (v, obj))
report.add_error("[%s] %s" % ('.'.join(str(vv) for vv in v.path), v.message))
getattr(report, meth)("[%s] %s" % ('.'.join(str(vv) for vv in v.path), v.message))
kba marked this conversation as resolved.
Show resolved Hide resolved
return report
13 changes: 11 additions & 2 deletions src/ocrd_validators/ocrd_tool.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,12 @@ properties:
maximum:
type: number
description: Maximum value for number parameters, including the maximum
minProperties:
type: number
description: Minimum number of properties of an object
maxProperties:
type: number
description: Maximum number of properties of an object
exclusiveMinimum:
type: number
description: Minimum value for number parameters, excluding the minimum
Expand All @@ -121,8 +127,11 @@ properties:
type: object
description: Describe the properties of an object value
additionalProperties:
type: boolean
description: Whether an object value may contain properties not explicitly defined
oneOf:
- type: boolean
description: Whether an object value may contain properties not explicitly defined
- type: object
description: Schema any additional properties need to adhere to
required:
type: boolean
description: Whether this parameter is required
Expand Down
7 changes: 5 additions & 2 deletions src/ocrd_validators/ocrd_tool_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
See `specs <https://ocr-d.de/en/spec/ocrd_tool>`_.
"""
from .constants import OCRD_TOOL_SCHEMA
from .json_validator import JsonValidator
from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator

#
# -------------------------------------------------
Expand All @@ -20,4 +20,7 @@ def validate(obj, schema=OCRD_TOOL_SCHEMA):
"""
Validate against ``ocrd-tool.json`` schema.
"""
return JsonValidator.validate(obj, schema)
return OcrdToolValidator(schema)._validate(obj) # pylint: disable=protected-access

def __init__(self, schema, validator_class=...):
super().__init__(schema, DefaultValidatingDraft20199Validator)
4 changes: 2 additions & 2 deletions src/ocrd_validators/parameter_validator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Validate parameters against ocrd-tool.json.
"""
from .json_validator import JsonValidator, DefaultValidatingDraft6Validator
from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator

#
# -------------------------------------------------
Expand Down Expand Up @@ -45,4 +45,4 @@ def __init__(self, ocrd_tool):
"required": required,
"additionalProperties": False,
"properties": p
}, DefaultValidatingDraft6Validator)
}, DefaultValidatingDraft20199Validator)
4 changes: 2 additions & 2 deletions src/ocrd_validators/resource_list_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
See `specs <https://ocr-d.de/en/spec/cli#processor-resources>`_.
"""
from .constants import RESOURCE_LIST_SCHEMA
from .json_validator import JsonValidator, DefaultValidatingDraft6Validator
from .json_validator import DefaultValidatingDraft20199Validator, JsonValidator

#
# -------------------------------------------------
Expand All @@ -20,5 +20,5 @@ def validate(obj, schema=RESOURCE_LIST_SCHEMA):
"""
Validate against ``resource_list.schema.yml`` schema.
"""
return JsonValidator(schema, validator_class=DefaultValidatingDraft6Validator)._validate(obj)
return JsonValidator(schema, validator_class=DefaultValidatingDraft20199Validator)._validate(obj)

4 changes: 2 additions & 2 deletions tests/validator/test_json_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tests.base import TestCase, main
from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft6Validator
from ocrd_validators.json_validator import JsonValidator, DefaultValidatingDraft20199Validator

class TestParameterValidator(TestCase):

Expand All @@ -15,7 +15,7 @@ def setUp(self):
}
}
}
self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft6Validator)
self.defaults_validator = JsonValidator(self.schema, DefaultValidatingDraft20199Validator)
super().setUp()

def test_validate_string(self):
Expand Down