diff --git a/Makefile b/Makefile index 8b934cea..19a667f0 100644 --- a/Makefile +++ b/Makefile @@ -11,3 +11,7 @@ update_model: test: poetry run python -m unittest discover + +# temporary measure until linkml-model is synced +linkml_runtime/processing/validation_datamodel.py: linkml_runtime/processing/validation_datamodel.yaml + gen-python $< > $@.tmp && mv $@.tmp $@ diff --git a/linkml_runtime/processing/validation_datamodel.py b/linkml_runtime/processing/validation_datamodel.py new file mode 100644 index 00000000..9fa99717 --- /dev/null +++ b/linkml_runtime/processing/validation_datamodel.py @@ -0,0 +1,544 @@ +# Auto generated from validation_datamodel.yaml by pythongen.py version: 0.9.0 +# Generation date: 2023-01-27T10:37:33 +# Schema: validaton-results +# +# id: https://w3id.org/linkml/validation_results +# description: A datamodel for data validation results. +# license: https://creativecommons.org/publicdomain/zero/1.0/ + +import dataclasses +import sys +import re +from jsonasobj2 import JsonObj, as_dict +from typing import Optional, List, Union, Dict, ClassVar, Any +from dataclasses import dataclass +from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions + +from linkml_runtime.utils.slot import Slot +from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode +from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int +from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs +from linkml_runtime.utils.formatutils import camelcase, underscore, sfx +from linkml_runtime.utils.enumerations import EnumDefinitionImpl +from rdflib import Namespace, URIRef +from linkml_runtime.utils.curienamespace import CurieNamespace +from linkml_runtime.linkml_model.types import Boolean, Integer, String, Uriorcurie +from linkml_runtime.utils.metamodelcore import Bool, URIorCURIE + +metamodel_version = "1.7.0" +version = None + +# Overwrite dataclasses _init_fn to add **kwargs in __init__ +dataclasses._init_fn = dataclasses_init_fn_with_kwargs + +# Namespaces +LINKML = CurieNamespace('linkml', 'https://w3id.org/linkml/') +OWL = CurieNamespace('owl', 'http://www.w3.org/2002/07/owl#') +PAV = CurieNamespace('pav', 'http://purl.org/pav/') +RDF = CurieNamespace('rdf', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#') +RDFS = CurieNamespace('rdfs', 'http://www.w3.org/2000/01/rdf-schema#') +SCHEMA = CurieNamespace('schema', 'http://schema.org/') +SH = CurieNamespace('sh', 'http://www.w3.org/ns/shacl#') +SKOS = CurieNamespace('skos', 'http://www.w3.org/2004/02/skos/core#') +VM = CurieNamespace('vm', 'https://w3id.org/linkml/validation-model/') +XSD = CurieNamespace('xsd', 'http://www.w3.org/2001/XMLSchema#') +DEFAULT_ = VM + + +# Types + +# Class references +class ConstraintCheckId(URIorCURIE): + pass + + +class NodeId(URIorCURIE): + pass + + +class TypeSeverityKeyValueType(URIorCURIE): + pass + + +@dataclass +class ConstraintCheck(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.ConstraintCheck + class_class_curie: ClassVar[str] = "vm:ConstraintCheck" + class_name: ClassVar[str] = "ConstraintCheck" + class_model_uri: ClassVar[URIRef] = VM.ConstraintCheck + + id: Union[str, ConstraintCheckId] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, ConstraintCheckId): + self.id = ConstraintCheckId(self.id) + + super().__post_init__(**kwargs) + + +@dataclass +class Node(YAMLRoot): + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.Node + class_class_curie: ClassVar[str] = "vm:Node" + class_name: ClassVar[str] = "Node" + class_model_uri: ClassVar[URIRef] = VM.Node + + id: Union[str, NodeId] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.id): + self.MissingRequiredField("id") + if not isinstance(self.id, NodeId): + self.id = NodeId(self.id) + + super().__post_init__(**kwargs) + + +@dataclass +class ValidationConfiguration(YAMLRoot): + """ + Configuration parameters for execution of a validation report + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.ValidationConfiguration + class_class_curie: ClassVar[str] = "vm:ValidationConfiguration" + class_name: ClassVar[str] = "ValidationConfiguration" + class_model_uri: ClassVar[URIRef] = VM.ValidationConfiguration + + max_number_results_per_type: Optional[int] = None + type_severity_map: Optional[Union[Dict[Union[str, TypeSeverityKeyValueType], Union[dict, "TypeSeverityKeyValue"]], List[Union[dict, "TypeSeverityKeyValue"]]]] = empty_dict() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.max_number_results_per_type is not None and not isinstance(self.max_number_results_per_type, int): + self.max_number_results_per_type = int(self.max_number_results_per_type) + + self._normalize_inlined_as_dict(slot_name="type_severity_map", slot_type=TypeSeverityKeyValue, key_name="type", keyed=True) + + super().__post_init__(**kwargs) + + +@dataclass +class RepairConfiguration(YAMLRoot): + """ + Configuration parameters for execution of validation repairs + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.RepairConfiguration + class_class_curie: ClassVar[str] = "vm:RepairConfiguration" + class_name: ClassVar[str] = "RepairConfiguration" + class_model_uri: ClassVar[URIRef] = VM.RepairConfiguration + + validation_configuration: Optional[Union[dict, ValidationConfiguration]] = None + dry_run: Optional[Union[bool, Bool]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.validation_configuration is not None and not isinstance(self.validation_configuration, ValidationConfiguration): + self.validation_configuration = ValidationConfiguration(**as_dict(self.validation_configuration)) + + if self.dry_run is not None and not isinstance(self.dry_run, Bool): + self.dry_run = Bool(self.dry_run) + + super().__post_init__(**kwargs) + + +@dataclass +class TypeSeverityKeyValue(YAMLRoot): + """ + key-value pair that maps a validation result type to a severity setting, for overriding default severity + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.TypeSeverityKeyValue + class_class_curie: ClassVar[str] = "vm:TypeSeverityKeyValue" + class_name: ClassVar[str] = "TypeSeverityKeyValue" + class_model_uri: ClassVar[URIRef] = VM.TypeSeverityKeyValue + + type: Union[str, TypeSeverityKeyValueType] = None + severity: Optional[Union[str, "SeverityType"]] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.type): + self.MissingRequiredField("type") + if not isinstance(self.type, TypeSeverityKeyValueType): + self.type = TypeSeverityKeyValueType(self.type) + + if self.severity is not None and not isinstance(self.severity, SeverityType): + self.severity = SeverityType(self.severity) + + super().__post_init__(**kwargs) + + +@dataclass +class Report(YAMLRoot): + """ + A report object that is a holder to multiple report results + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.Report + class_class_curie: ClassVar[str] = "vm:Report" + class_name: ClassVar[str] = "Report" + class_model_uri: ClassVar[URIRef] = VM.Report + + results: Optional[Union[Union[dict, "Result"], List[Union[dict, "Result"]]]] = empty_list() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if not isinstance(self.results, list): + self.results = [self.results] if self.results is not None else [] + self.results = [v if isinstance(v, Result) else Result(**as_dict(v)) for v in self.results] + + super().__post_init__(**kwargs) + + +@dataclass +class ValidationReport(Report): + """ + A report that consists of validation results + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = SH.ValidationReport + class_class_curie: ClassVar[str] = "sh:ValidationReport" + class_name: ClassVar[str] = "ValidationReport" + class_model_uri: ClassVar[URIRef] = VM.ValidationReport + + results: Optional[Union[Union[dict, "ValidationResult"], List[Union[dict, "ValidationResult"]]]] = empty_list() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if not isinstance(self.results, list): + self.results = [self.results] if self.results is not None else [] + self.results = [v if isinstance(v, ValidationResult) else ValidationResult(**as_dict(v)) for v in self.results] + + super().__post_init__(**kwargs) + + +@dataclass +class RepairReport(Report): + """ + A report that consists of repair operation results + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.RepairReport + class_class_curie: ClassVar[str] = "vm:RepairReport" + class_name: ClassVar[str] = "RepairReport" + class_model_uri: ClassVar[URIRef] = VM.RepairReport + + results: Optional[Union[Union[dict, "RepairOperation"], List[Union[dict, "RepairOperation"]]]] = empty_list() + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if not isinstance(self.results, list): + self.results = [self.results] if self.results is not None else [] + self.results = [v if isinstance(v, RepairOperation) else RepairOperation(**as_dict(v)) for v in self.results] + + super().__post_init__(**kwargs) + + +class Result(YAMLRoot): + """ + Abstract base class for any individual report result + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.Result + class_class_curie: ClassVar[str] = "vm:Result" + class_name: ClassVar[str] = "Result" + class_model_uri: ClassVar[URIRef] = VM.Result + + +@dataclass +class ValidationResult(Result): + """ + An individual result arising from validation of a data instance using a particular rule + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = SH.ValidationResult + class_class_curie: ClassVar[str] = "sh:ValidationResult" + class_name: ClassVar[str] = "ValidationResult" + class_model_uri: ClassVar[URIRef] = VM.ValidationResult + + type: Union[str, "ConstraintType"] = None + severity: Optional[Union[str, "SeverityType"]] = None + subject: Optional[str] = None + instantiates: Optional[Union[str, NodeId]] = None + predicate: Optional[Union[str, NodeId]] = None + object: Optional[Union[str, NodeId]] = None + object_str: Optional[str] = None + source: Optional[str] = None + info: Optional[str] = None + normalized: Optional[Union[bool, Bool]] = None + repaired: Optional[Union[bool, Bool]] = None + source_line_number: Optional[int] = None + source_column_number: Optional[int] = None + source_location: Optional[str] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self._is_empty(self.type): + self.MissingRequiredField("type") + if not isinstance(self.type, ConstraintType): + self.type = ConstraintType(self.type) + + if self.severity is not None and not isinstance(self.severity, SeverityType): + self.severity = SeverityType(self.severity) + + if self.subject is not None and not isinstance(self.subject, str): + self.subject = str(self.subject) + + if self.instantiates is not None and not isinstance(self.instantiates, NodeId): + self.instantiates = NodeId(self.instantiates) + + if self.predicate is not None and not isinstance(self.predicate, NodeId): + self.predicate = NodeId(self.predicate) + + if self.object is not None and not isinstance(self.object, NodeId): + self.object = NodeId(self.object) + + if self.object_str is not None and not isinstance(self.object_str, str): + self.object_str = str(self.object_str) + + if self.source is not None and not isinstance(self.source, str): + self.source = str(self.source) + + if self.info is not None and not isinstance(self.info, str): + self.info = str(self.info) + + if self.normalized is not None and not isinstance(self.normalized, Bool): + self.normalized = Bool(self.normalized) + + if self.repaired is not None and not isinstance(self.repaired, Bool): + self.repaired = Bool(self.repaired) + + if self.source_line_number is not None and not isinstance(self.source_line_number, int): + self.source_line_number = int(self.source_line_number) + + if self.source_column_number is not None and not isinstance(self.source_column_number, int): + self.source_column_number = int(self.source_column_number) + + if self.source_location is not None and not isinstance(self.source_location, str): + self.source_location = str(self.source_location) + + super().__post_init__(**kwargs) + + +@dataclass +class RepairOperation(Result): + """ + The result of performing an individual repair + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = VM.RepairOperation + class_class_curie: ClassVar[str] = "vm:RepairOperation" + class_name: ClassVar[str] = "RepairOperation" + class_model_uri: ClassVar[URIRef] = VM.RepairOperation + + repairs: Optional[Union[dict, ValidationResult]] = None + modified: Optional[Union[bool, Bool]] = None + successful: Optional[Union[bool, Bool]] = None + info: Optional[str] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + if self.repairs is not None and not isinstance(self.repairs, ValidationResult): + self.repairs = ValidationResult(**as_dict(self.repairs)) + + if self.modified is not None and not isinstance(self.modified, Bool): + self.modified = Bool(self.modified) + + if self.successful is not None and not isinstance(self.successful, Bool): + self.successful = Bool(self.successful) + + if self.info is not None and not isinstance(self.info, str): + self.info = str(self.info) + + super().__post_init__(**kwargs) + + +# Enumerations +class SeverityType(EnumDefinitionImpl): + + FATAL = PermissibleValue(text="FATAL") + ERROR = PermissibleValue(text="ERROR", + meaning=SH.Violation) + WARNING = PermissibleValue(text="WARNING", + meaning=SH.Warning) + INFO = PermissibleValue(text="INFO", + meaning=SH.Info) + + _defn = EnumDefinition( + name="SeverityType", + ) + +class ConstraintType(EnumDefinitionImpl): + + TypeConstraint = PermissibleValue(text="TypeConstraint", + description="constraint in which the range is a type, and the slot value must conform to the type", + meaning=SH.DatatypeConstraintComponent) + MinCountConstraint = PermissibleValue(text="MinCountConstraint", + description="cardinality constraint where the number of values of the slot must be greater or equal to a specified minimum", + meaning=SH.MinCountConstraintComponent) + RequiredConstraint = PermissibleValue(text="RequiredConstraint", + description="cardinality constraint where there MUST be at least one value of the slot", + meaning=SH.MinCountConstraintComponent) + RecommendedConstraint = PermissibleValue(text="RecommendedConstraint", + description="cardinality constraint where there SHOULD be at least one value of the slot", + meaning=SH.MinCountConstraintComponent) + MaxCountConstraint = PermissibleValue(text="MaxCountConstraint", + description="cardinality constraint where the number of values of the slot must be less than or equal to a specified maximum", + meaning=SH.MaxCountConstraintComponent) + SingleValuedConstraint = PermissibleValue(text="SingleValuedConstraint", + description="the value of the slot must be atomic and not a collection") + MultiValuedConstraint = PermissibleValue(text="MultiValuedConstraint", + description="the value of the slot must be a collection and not atomic") + DeprecatedProperty = PermissibleValue(text="DeprecatedProperty", + description="constraint where the instance slot should not be deprecated", + meaning=VM.DeprecatedProperty) + MaxLengthConstraint = PermissibleValue(text="MaxLengthConstraint", + description="constraint where the slot value must have a length equal to or less than a specified maximum", + meaning=SH.MaxLengthConstraintComponent) + MinLengthConstraint = PermissibleValue(text="MinLengthConstraint", + description="constraint where the slot value must have a length equal to or less than a specified maximum", + meaning=SH.MinLengthConstraintComponent) + PatternConstraint = PermissibleValue(text="PatternConstraint", + description="constraint where the slot value must match a given regular expression pattern", + meaning=SH.PatternConstraintComponent) + ClosedClassConstraint = PermissibleValue(text="ClosedClassConstraint", + description="constraint where the slot value must be allowable for the instantiated class", + meaning=SH.ClosedConstraintComponent) + DesignatesTypeConstraint = PermissibleValue(text="DesignatesTypeConstraint") + InstanceConstraint = PermissibleValue(text="InstanceConstraint", + meaning=SH.NodeConstraintComponent) + SlotConstraint = PermissibleValue(text="SlotConstraint", + meaning=SH.PropertyConstraintComponent) + PermissibleValueConstraint = PermissibleValue(text="PermissibleValueConstraint", + description="constraint where the slot value must be one of a set of permissible values", + meaning=SH.InConstraintComponent) + UndeclaredSlotConstraint = PermissibleValue(text="UndeclaredSlotConstraint") + RuleConstraint = PermissibleValue(text="RuleConstraint", + description="constraint where the structure of an object must conform to a specified rule") + ExpressionConstraint = PermissibleValue(text="ExpressionConstraint") + EqualsExpressionConstraint = PermissibleValue(text="EqualsExpressionConstraint", + meaning=SH.EqualsConstraintComponent) + LessThanExpressionConstraint = PermissibleValue(text="LessThanExpressionConstraint", + meaning=SH.LessThanConstraintComponent) + LessThanOrEqualsExpressionConstraint = PermissibleValue(text="LessThanOrEqualsExpressionConstraint", + meaning=SH.LessThanOrEqualsComponent) + DisjointConstraint = PermissibleValue(text="DisjointConstraint", + meaning=SH.DisjointConstraintComponent) + MinimumValueConstraint = PermissibleValue(text="MinimumValueConstraint", + meaning=SH.MinInclusiveConstraintComponent) + MaximumValueConstraint = PermissibleValue(text="MaximumValueConstraint", + meaning=SH.MaxInclusiveConstraintComponent) + MinimumExclusiveValueConstraint = PermissibleValue(text="MinimumExclusiveValueConstraint", + meaning=SH.MinExclusiveInclusiveConstraintComponent) + MaximumExclusiveValueConstraint = PermissibleValue(text="MaximumExclusiveValueConstraint", + meaning=SH.MaxExclusiveInclusiveConstraintComponent) + CollectionFormConstraint = PermissibleValue(text="CollectionFormConstraint") + ListCollectionFormConstraint = PermissibleValue(text="ListCollectionFormConstraint") + DictCollectionFormConstraint = PermissibleValue(text="DictCollectionFormConstraint") + SimpleDictCollectionFormConstraint = PermissibleValue(text="SimpleDictCollectionFormConstraint") + CompactDictCollectionFormConstraint = PermissibleValue(text="CompactDictCollectionFormConstraint") + ExpandedDictCollectionFormConstraint = PermissibleValue(text="ExpandedDictCollectionFormConstraint") + + _defn = EnumDefinition( + name="ConstraintType", + ) + +# Slots +class slots: + pass + +slots.type = Slot(uri=SH.sourceConstraintComponent, name="type", curie=SH.curie('sourceConstraintComponent'), + model_uri=VM.type, domain=None, range=Union[str, "ConstraintType"]) + +slots.subject = Slot(uri=SH.focusNode, name="subject", curie=SH.curie('focusNode'), + model_uri=VM.subject, domain=None, range=Optional[str]) + +slots.instantiates = Slot(uri=VM.instantiates, name="instantiates", curie=VM.curie('instantiates'), + model_uri=VM.instantiates, domain=None, range=Optional[Union[str, NodeId]]) + +slots.predicate = Slot(uri=VM.predicate, name="predicate", curie=VM.curie('predicate'), + model_uri=VM.predicate, domain=None, range=Optional[Union[str, NodeId]]) + +slots.object = Slot(uri=SH.value, name="object", curie=SH.curie('value'), + model_uri=VM.object, domain=None, range=Optional[Union[str, NodeId]]) + +slots.object_str = Slot(uri=VM.object_str, name="object_str", curie=VM.curie('object_str'), + model_uri=VM.object_str, domain=None, range=Optional[str]) + +slots.source = Slot(uri=VM.source, name="source", curie=VM.curie('source'), + model_uri=VM.source, domain=None, range=Optional[str]) + +slots.severity = Slot(uri=SH.resultSeverity, name="severity", curie=SH.curie('resultSeverity'), + model_uri=VM.severity, domain=None, range=Optional[Union[str, "SeverityType"]]) + +slots.info = Slot(uri=SH.resultMessage, name="info", curie=SH.curie('resultMessage'), + model_uri=VM.info, domain=None, range=Optional[str]) + +slots.results = Slot(uri=SH.result, name="results", curie=SH.curie('result'), + model_uri=VM.results, domain=None, range=Optional[Union[Union[dict, Result], List[Union[dict, Result]]]]) + +slots.normalized = Slot(uri=VM.normalized, name="normalized", curie=VM.curie('normalized'), + model_uri=VM.normalized, domain=None, range=Optional[Union[bool, Bool]]) + +slots.repaired = Slot(uri=VM.repaired, name="repaired", curie=VM.curie('repaired'), + model_uri=VM.repaired, domain=None, range=Optional[Union[bool, Bool]]) + +slots.source_line_number = Slot(uri=VM.source_line_number, name="source_line_number", curie=VM.curie('source_line_number'), + model_uri=VM.source_line_number, domain=None, range=Optional[int]) + +slots.source_column_number = Slot(uri=VM.source_column_number, name="source_column_number", curie=VM.curie('source_column_number'), + model_uri=VM.source_column_number, domain=None, range=Optional[int]) + +slots.source_location = Slot(uri=VM.source_location, name="source_location", curie=VM.curie('source_location'), + model_uri=VM.source_location, domain=None, range=Optional[str]) + +slots.constraintCheck__id = Slot(uri=VM.id, name="constraintCheck__id", curie=VM.curie('id'), + model_uri=VM.constraintCheck__id, domain=None, range=URIRef) + +slots.node__id = Slot(uri=VM.id, name="node__id", curie=VM.curie('id'), + model_uri=VM.node__id, domain=None, range=URIRef) + +slots.validationConfiguration__max_number_results_per_type = Slot(uri=VM.max_number_results_per_type, name="validationConfiguration__max_number_results_per_type", curie=VM.curie('max_number_results_per_type'), + model_uri=VM.validationConfiguration__max_number_results_per_type, domain=None, range=Optional[int]) + +slots.validationConfiguration__type_severity_map = Slot(uri=VM.type_severity_map, name="validationConfiguration__type_severity_map", curie=VM.curie('type_severity_map'), + model_uri=VM.validationConfiguration__type_severity_map, domain=None, range=Optional[Union[Dict[Union[str, TypeSeverityKeyValueType], Union[dict, TypeSeverityKeyValue]], List[Union[dict, TypeSeverityKeyValue]]]]) + +slots.repairConfiguration__validation_configuration = Slot(uri=VM.validation_configuration, name="repairConfiguration__validation_configuration", curie=VM.curie('validation_configuration'), + model_uri=VM.repairConfiguration__validation_configuration, domain=None, range=Optional[Union[dict, ValidationConfiguration]]) + +slots.repairConfiguration__dry_run = Slot(uri=VM.dry_run, name="repairConfiguration__dry_run", curie=VM.curie('dry_run'), + model_uri=VM.repairConfiguration__dry_run, domain=None, range=Optional[Union[bool, Bool]]) + +slots.typeSeverityKeyValue__type = Slot(uri=VM.type, name="typeSeverityKeyValue__type", curie=VM.curie('type'), + model_uri=VM.typeSeverityKeyValue__type, domain=None, range=URIRef) + +slots.typeSeverityKeyValue__severity = Slot(uri=VM.severity, name="typeSeverityKeyValue__severity", curie=VM.curie('severity'), + model_uri=VM.typeSeverityKeyValue__severity, domain=None, range=Optional[Union[str, "SeverityType"]]) + +slots.repairOperation__repairs = Slot(uri=VM.repairs, name="repairOperation__repairs", curie=VM.curie('repairs'), + model_uri=VM.repairOperation__repairs, domain=None, range=Optional[Union[dict, ValidationResult]]) + +slots.repairOperation__modified = Slot(uri=VM.modified, name="repairOperation__modified", curie=VM.curie('modified'), + model_uri=VM.repairOperation__modified, domain=None, range=Optional[Union[bool, Bool]]) + +slots.repairOperation__successful = Slot(uri=VM.successful, name="repairOperation__successful", curie=VM.curie('successful'), + model_uri=VM.repairOperation__successful, domain=None, range=Optional[Union[bool, Bool]]) + +slots.repairOperation__info = Slot(uri=VM.info, name="repairOperation__info", curie=VM.curie('info'), + model_uri=VM.repairOperation__info, domain=None, range=Optional[str]) + +slots.ValidationReport_results = Slot(uri=SH.result, name="ValidationReport_results", curie=SH.curie('result'), + model_uri=VM.ValidationReport_results, domain=ValidationReport, range=Optional[Union[Union[dict, "ValidationResult"], List[Union[dict, "ValidationResult"]]]]) + +slots.RepairReport_results = Slot(uri=SH.result, name="RepairReport_results", curie=SH.curie('result'), + model_uri=VM.RepairReport_results, domain=RepairReport, range=Optional[Union[Union[dict, "RepairOperation"], List[Union[dict, "RepairOperation"]]]]) diff --git a/linkml_runtime/processing/validation_datamodel.yaml b/linkml_runtime/processing/validation_datamodel.yaml new file mode 100644 index 00000000..9abd8db4 --- /dev/null +++ b/linkml_runtime/processing/validation_datamodel.yaml @@ -0,0 +1,335 @@ +# TODO: fold this back into linkml-model +id: https://w3id.org/linkml/validation_results +title: Validation Results Datamodel +name: validaton-results +description: |- + A datamodel for data validation results. +license: https://creativecommons.org/publicdomain/zero/1.0/ + +prefixes: + linkml: https://w3id.org/linkml/ + vm: https://w3id.org/linkml/validation-model/ + skos: http://www.w3.org/2004/02/skos/core# + pav: http://purl.org/pav/ + schema: http://schema.org/ + sh: http://www.w3.org/ns/shacl# + +default_prefix: vm +default_range: string + +default_curi_maps: + - semweb_context + +emit_prefixes: + - linkml + - rdf + - rdfs + - xsd + - owl + +imports: + - linkml:types + +#================================== +# Classes # +#================================== +classes: + + + ConstraintCheck: + attributes: + id: + range: uriorcurie + identifier: true + + Node: + attributes: + id: + range: uriorcurie + identifier: true + + ValidationConfiguration: + description: Configuration parameters for execution of a validation report + attributes: + max_number_results_per_type: + range: integer + description: if set then truncate results such that no more than this number of results are reported per type + type_severity_map: + description: Allows overriding of severity of a particular type + range: TypeSeverityKeyValue + inlined: true + multivalued: true + + RepairConfiguration: + description: Configuration parameters for execution of validation repairs + attributes: + validation_configuration: + description: repair configurations include validation configurations + range: ValidationConfiguration + dry_run: + range: boolean + + + TypeSeverityKeyValue: + description: key-value pair that maps a validation result type to a severity setting, for overriding default severity + conforms_to: wikidata:Q4818718 + attributes: + type: + key: true + range: uriorcurie + severity: + range: SeverityType + + Report: + abstract: true + description: A report object that is a holder to multiple report results + slots: + - results + + ValidationReport: + is_a: Report + class_uri: sh:ValidationReport + description: A report that consists of validation results + slot_usage: + results: + range: ValidationResult + todos: + - add prov object + + RepairReport: + is_a: Report + description: A report that consists of repair operation results + slot_usage: + results: + range: RepairOperation + + Result: + abstract: true + description: Abstract base class for any individual report result + + ValidationResult: + is_a: Result + class_uri: sh:ValidationResult + description: An individual result arising from validation of a data instance using a particular rule + slots: + - type + - severity + - subject + - instantiates + - predicate + - object + - object_str + - source + - info + - normalized + - repaired + - source_line_number + - source_column_number + - source_location + + RepairOperation: + is_a: Result + description: The result of performing an individual repair + todos: + - integrate with kgcl data model, to be able to describe changes + attributes: + repairs: + range: ValidationResult + modified: + range: boolean + successful: + range: boolean + info: + range: string + + +#================================== +# Slots # +#================================== +slots: + type: + range: ConstraintType + slot_uri: sh:sourceConstraintComponent + description: >- + The type of validation result. SHACL validation vocabulary is recommended for checks against a datamodel. + For principle checks use the corresponding rule or principle, e.g. GO RULE ID, OBO Principle ID + required: true + subject: + description: The instance which the result is about + #range: Node + slot_uri: sh:focusNode + #required: true + instantiates: + description: The type of the subject + range: Node + exact_mappings: + - sh:sourceShape + predicate: + description: The predicate or property of the subject which the result is about + range: Node + related_mappings: + - sh:resultPath + object: + range: Node + slot_uri: sh:value + object_str: + range: string + source: + range: string + severity: + description: the severity of the issue + range: SeverityType + slot_uri: sh:resultSeverity + info: + description: additional information about the issue + range: string + slot_uri: sh:resultMessage + results: + description: collection of results + slot_uri: sh:result + range: Result + multivalued: true + inlined: true + inlined_as_list: true + normalized: + range: boolean + repaired: + range: boolean + source_line_number: + range: integer + source_column_number: + range: integer + source_location: + +#================================== +# Enumerations # +#================================== +enums: + SeverityType: + exact_mappings: + - sh:Severity + permissible_values: + FATAL: + ERROR: + meaning: sh:Violation + WARNING: + meaning: sh:Warning + INFO: + meaning: sh:Info + + ConstraintType: + # sh:sourceConstraintComponent + permissible_values: + TypeConstraint: + meaning: sh:DatatypeConstraintComponent + description: constraint in which the range is a type, and the slot value must conform to the type + annotations: + element: linkml:range + MinCountConstraint: + meaning: sh:MinCountConstraintComponent + description: cardinality constraint where the number of values of the slot must be greater or equal to a specified minimum + annotations: + element: linkml:minimum_value + RequiredConstraint: + is_a: MinCountConstraint + meaning: sh:MinCountConstraintComponent + description: cardinality constraint where there MUST be at least one value of the slot + annotations: + element: linkml:required + RecommendedConstraint: + is_a: MinCountConstraint + meaning: sh:MinCountConstraintComponent + description: cardinality constraint where there SHOULD be at least one value of the slot + annotations: + element: linkml:recommended + severity: WARNING + MaxCountConstraint: + meaning: sh:MaxCountConstraintComponent + description: cardinality constraint where the number of values of the slot must be less than or equal to a specified maximum + annotations: + element: linkml:maximum_value + SingleValuedConstraint: + is_a: MaxCountConstraint + description: the value of the slot must be atomic and not a collection + MultiValuedConstraint: + description: the value of the slot must be a collection and not atomic + DeprecatedProperty: + meaning: vm:DeprecatedProperty + description: constraint where the instance slot should not be deprecated + annotations: + element: linkml:deprecated + MaxLengthConstraint: + meaning: sh:MaxLengthConstraintComponent + description: constraint where the slot value must have a length equal to or less than a specified maximum + MinLengthConstraint: + meaning: sh:MinLengthConstraintComponent + description: constraint where the slot value must have a length equal to or less than a specified maximum + PatternConstraint: + meaning: sh:PatternConstraintComponent + description: constraint where the slot value must match a given regular expression pattern + annotations: + element: linkml:pattern + ClosedClassConstraint: + meaning: sh:ClosedConstraintComponent + description: constraint where the slot value must be allowable for the instantiated class + annotations: + element: linkml:attributes + DesignatesTypeConstraint: + InstanceConstraint: + meaning: sh:NodeConstraintComponent + SlotConstraint: + meaning: sh:PropertyConstraintComponent + PermissibleValueConstraint: + meaning: sh:InConstraintComponent + description: constraint where the slot value must be one of a set of permissible values + annotations: + element: linkml:permissible_values + UndeclaredSlotConstraint: + is_a: ClosedClassConstraint + RuleConstraint: + description: constraint where the structure of an object must conform to a specified rule + ExpressionConstraint: + EqualsExpressionConstraint: + is_a: ExpressionConstraint + meaning: sh:EqualsConstraintComponent + annotations: + element: linkml:equals_expression + LessThanExpressionConstraint: + is_a: ExpressionConstraint + meaning: sh:LessThanConstraintComponent + LessThanOrEqualsExpressionConstraint: + is_a: ExpressionConstraint + meaning: sh:LessThanOrEqualsComponent + DisjointConstraint: + is_a: ExpressionConstraint + meaning: sh:DisjointConstraintComponent + MinimumValueConstraint: + meaning: sh:MinInclusiveConstraintComponent + annotations: + element: linkml:minimum_value + MaximumValueConstraint: + meaning: sh:MaxInclusiveConstraintComponent + annotations: + element: linkml:maximum_exclusive_value + MinimumExclusiveValueConstraint: + meaning: sh:MinExclusiveInclusiveConstraintComponent + annotations: + element: linkml:minimum_value + MaximumExclusiveValueConstraint: + meaning: sh:MaxExclusiveInclusiveConstraintComponent + annotations: + element: linkml:maximum_exclusive_value + CollectionFormConstraint: + ListCollectionFormConstraint: + is_a: CollectionFormConstraint + DictCollectionFormConstraint: + is_a: CollectionFormConstraint + SimpleDictCollectionFormConstraint: + is_a: DictCollectionFormConstraint + CompactDictCollectionFormConstraint: + is_a: DictCollectionFormConstraint + ExpandedDictCollectionFormConstraint: + is_a: DictCollectionFormConstraint + + + diff --git a/linkml_runtime/utils/schemaview.py b/linkml_runtime/utils/schemaview.py index e61b235c..15da6252 100644 --- a/linkml_runtime/utils/schemaview.py +++ b/linkml_runtime/utils/schemaview.py @@ -1633,3 +1633,49 @@ def materialize_pattern_into_slot_definition(slot_definition: SlotDefinition) -> if class_definition.attributes: for slot_definition in class_definition.attributes.values(): materialize_pattern_into_slot_definition(slot_definition) + + def materialize_derived_schema(self) -> SchemaDefinition: + """ Materialize a schema view into a schema definition """ + # TODO: move this to schemaview + derived_schema = SchemaDefinition(id=self.schema.id, + name=self.schema.name, + imports=self.schema.imports, + prefixes=self.schema.prefixes, + ) + derived_schemaview = SchemaView(derived_schema) + derived_schemaview.merge_imports() + for typ in [deepcopy(t) for t in self.all_types().values()]: + for typ_anc_name in self.type_ancestors(typ.name, reflexive=False): + a = derived_schema.types[typ_anc_name] + if not typ.uri: + typ.uri = a.uri + if not typ.base: + typ.base = a.base + if not typ.pattern: + typ.pattern = a.pattern + derived_schema.types[typ.name] = typ + for cls in [deepcopy(c) for c in self.all_classes().values()]: + for slot in self.class_induced_slots(cls.name): + slot_range_element = self.get_element(slot.range) + if isinstance(slot_range_element, TypeDefinition): + for metaslot in ["pattern", "maximum_value", "minimum_value"]: + metaslot_val = getattr(slot_range_element, metaslot, None) + if metaslot_val is not None: + setattr(slot, metaslot, metaslot_val) + slot_range_pk_slot_name = None + if isinstance(slot_range_element, ClassDefinition): + slot_range_pk_slot_name = self.get_identifier_slot(slot_range_element.name, use_key=True) + if not slot_range_pk_slot_name: + slot.inlined = True + slot.inlined_as_list = True + if slot.inlined_as_list: + slot.inlined = True + if slot.identifier or slot.key: + slot.required = True + cls.attributes[slot.name] = slot + derived_schema.classes[cls.name] = cls + for subset in [deepcopy(s) for s in self.all_subsets().values()]: + derived_schema.subsets[subset.name] = subset + for enum in [deepcopy(e) for e in self.all_enums().values()]: + derived_schema.enums[enum.name] = enum + return derived_schema diff --git a/pyproject.toml b/pyproject.toml index f70dc55c..8514ae21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ packages = [ [tool.poetry.scripts] comparefiles = "linkml_runtime.utils.comparefiles:cli" +linkml-normalize = "linkml_runtime.processing.referencevalidator:cli" [tool.poetry.dependencies] python = "^3.7.6" diff --git a/tests/test_utils/test_yaml_utils.py b/tests/test_utils/test_yaml_utils.py index 07f3e333..e10553cb 100644 --- a/tests/test_utils/test_yaml_utils.py +++ b/tests/test_utils/test_yaml_utils.py @@ -25,6 +25,27 @@ def test_dupcheck_loader(self): s1 = yaml.load(f, DupCheckYamlLoader) self.assertEqual('schema1', s1['name']) + def test_line_numbers(self): + s = """ + name: schema1 + info: foo + x: + a: 1 + b: 2 + l: [1, 2, 3] + """ + obj = yaml.load(s, DupCheckYamlLoader) + cases = [ + ('name', 1), + ('info', 2), + ('x', 3), + ('l', 6), + ] + key_to_lines = [(k, k._s.line) for k in obj.keys()] + self.assertCountEqual(cases, key_to_lines) + + + if __name__ == '__main__': unittest.main()