diff --git a/.gitignore b/.gitignore index 2c2b1946..b84a58fa 100644 --- a/.gitignore +++ b/.gitignore @@ -140,3 +140,4 @@ Pipfile.lock # No Pycharm .idea/ +.vscode diff --git a/linkml_runtime/utils/pattern.py b/linkml_runtime/utils/pattern.py index 9da2225f..b36339db 100644 --- a/linkml_runtime/utils/pattern.py +++ b/linkml_runtime/utils/pattern.py @@ -1,83 +1,64 @@ +from functools import lru_cache import re from typing import Dict -def generate_patterns(schema_view) -> Dict[str, str]: - """Generates a dictionary of slot patterns corresponding to - the structured patterns in the settings. - - :param schema_view: SchemaView object with LinkML YAML - already loaded - :return generated_patterns: dictionary with the - expanded structured patterns - """ - - # fetch settings from schema_view - settings_dict = schema_view.schema.settings - - # dictionary of key and string value of settings dict - format_spec = {} - - for k, setting in settings_dict.items(): - - # create spec dictionary with keys that will replace - # substrings in the structured pattern syntax - format_spec[k] = setting.setting_value - - # dictionary with structured patterns in the key and - # expanded, or materialized patterns as values - generated_patterns = {} +class PatternResolver(): # regular expression capturing the various use cases # for the optionally dot separated, curly braces bound, pattern syntax var_name = re.compile("{([a-z0-9_-]+([\.-_ ][a-z0-9]+)*)}", re.IGNORECASE) - for _, slot_defn in schema_view.all_slots().items(): - if slot_defn.structured_pattern: - struct_pat = slot_defn.structured_pattern - - pattern = struct_pat.syntax - - # compute pattern from structured patterns - # and format_spec dictionary - - - # apply the regex to the pattern and look for matches - matches = var_name.finditer(pattern) - - reversed = [] - for item in matches: - # Detect double set brackets - match_string = None - if ( - item.start() > 0 - and item.end() < len(pattern) - and pattern[item.start() - 1] == "{" - and pattern[item.end()] == "}" - ): - match_string = item.group(1) - - elif item.group(1) in format_spec: - match_string = str(format_spec[item.group(1)]) - - if match_string: - reversed.insert( - 0, - { - "string": match_string, - "start": item.start(), - "end": item.end(), - }, - ) - - converted = pattern - for item in reversed: - converted = ( - converted[: item["start"]] - + item["string"] - + converted[item["end"] :] + def __init__(self, schema_view): + # fetch settings from schema_view + settings_dict = schema_view.schema.settings + + # dictionary of key and string value of settings dict + self.format_spec = {} + + for k, setting in settings_dict.items(): + + # create spec dictionary with keys that will replace + # substrings in the structured pattern syntax + self.format_spec[k] = setting.setting_value + + @lru_cache + def resolve(self, pattern: str) -> str: + # apply the regex to the pattern and look for matches + matches = self.var_name.finditer(pattern) + + reversed = [] + for item in matches: + # Detect double set brackets + match_string = None + if ( + item.start() > 0 + and item.end() < len(pattern) + and pattern[item.start() - 1] == "{" + and pattern[item.end()] == "}" + ): + match_string = item.group(1) + + elif item.group(1) in self.format_spec: + match_string = str(self.format_spec[item.group(1)]) + + if match_string: + reversed.insert( + 0, + { + "string": match_string, + "start": item.start(), + "end": item.end(), + }, ) - generated_patterns[pattern] = converted + converted = pattern + for item in reversed: + converted = ( + converted[: item["start"]] + + item["string"] + + converted[item["end"] :] + ) + + return converted - return generated_patterns diff --git a/linkml_runtime/utils/schemaview.py b/linkml_runtime/utils/schemaview.py index a86ff54b..0d7ea9c6 100644 --- a/linkml_runtime/utils/schemaview.py +++ b/linkml_runtime/utils/schemaview.py @@ -9,7 +9,7 @@ from linkml_runtime.utils.namespaces import Namespaces from deprecated.classic import deprecated from linkml_runtime.utils.context_utils import parse_import_map, map_import -from linkml_runtime.utils.pattern import generate_patterns +from linkml_runtime.utils.pattern import PatternResolver from linkml_runtime.linkml_model.meta import * from enum import Enum logger = logging.getLogger(__name__) @@ -1462,12 +1462,22 @@ def materialize_patterns(self) -> None: into regular expressions based on composite patterns provided in the settings dictionary. """ - patterns_dict = generate_patterns(self) - - for _, slot_defn in self.all_slots().items(): - if slot_defn.structured_pattern: - - pattern = slot_defn.structured_pattern.syntax - - if pattern in patterns_dict: - slot_defn.pattern = patterns_dict[pattern] + resolver = PatternResolver(self) + + def materialize_pattern_into_slot_definition(slot_definition: SlotDefinition) -> None: + if not slot_definition.structured_pattern: + return + pattern = slot_definition.structured_pattern.syntax + slot_definition.pattern = resolver.resolve(pattern) + + for slot_definition in self.all_slots().values(): + materialize_pattern_into_slot_definition(slot_definition) + + for class_definition in self.all_classes().values(): + if class_definition.slot_usage: + for slot_definition in class_definition.slot_usage.values(): + materialize_pattern_into_slot_definition(slot_definition) + + if class_definition.attributes: + for slot_definition in class_definition.attributes.values(): + materialize_pattern_into_slot_definition(slot_definition) diff --git a/tests/test_utils/input/pattern-example.yaml b/tests/test_utils/input/pattern-example.yaml index 544f1edc..f40b1d83 100644 --- a/tests/test_utils/input/pattern-example.yaml +++ b/tests/test_utils/input/pattern-example.yaml @@ -32,6 +32,7 @@ settings: unit.length: "(centimeter|meter|inch)" unit.weight: "(kg|g|lbs|stone)" email: "\\S+@\\S+{\\.\\w}+" + hyphenated_name: "\\S+-\\S+" #================================== # Classes # @@ -45,6 +46,23 @@ classes: - height - email + FancyPersonInfo: + is_a: PersonInfo + slot_usage: + name: + structured_pattern: + syntax: "\\S+ {hyphenated_name}" + interpolated: true + partial_match: false + + ClassWithAttributes: + attributes: + weight: + structured_pattern: + syntax: "{float} {unit.weight}" + interpolated: true + partial_match: false + #================================== # Slots # #================================== diff --git a/tests/test_utils/test_pattern.py b/tests/test_utils/test_pattern.py index 8c48e027..cc5fcdba 100644 --- a/tests/test_utils/test_pattern.py +++ b/tests/test_utils/test_pattern.py @@ -4,7 +4,7 @@ from linkml_runtime.utils.schemaview import SchemaView -from linkml_runtime.utils.pattern import generate_patterns +from linkml_runtime.utils.pattern import PatternResolver class PatternTestCase(unittest.TestCase): @@ -13,15 +13,10 @@ def test_generate_patterns(self): sv = SchemaView(env.input_path("pattern-example.yaml")) - # actual result returned from call to generate_patterns() - actual_dict = generate_patterns(sv) + resolver = PatternResolver(sv) - expected_dict = { - "{float} {unit.length}": "\\d+[\\.\\d+] (centimeter|meter|inch)", - "{float} {unit.weight}": "\\d+[\\.\\d+] (kg|g|lbs|stone)", - } - - self.assertDictEqual(actual_dict, expected_dict) + self.assertEqual(resolver.resolve("{float} {unit.length}"), "\\d+[\\.\\d+] (centimeter|meter|inch)") + self.assertEqual(resolver.resolve("{float} {unit.weight}"), "\\d+[\\.\\d+] (kg|g|lbs|stone)") if __name__ == "__main__": diff --git a/tests/test_utils/test_schemaview.py b/tests/test_utils/test_schemaview.py index 8b78d556..8624b86c 100644 --- a/tests/test_utils/test_schemaview.py +++ b/tests/test_utils/test_schemaview.py @@ -15,6 +15,7 @@ SCHEMA_NO_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink_noimports.yaml') SCHEMA_WITH_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink.yaml') +SCHEMA_WITH_STRUCTURED_PATTERNS = os.path.join(INPUT_DIR, "pattern-example.yaml") yaml_loader = YAMLLoader() @@ -526,7 +527,7 @@ def test_metamodel_in_schemaview(self): self.assertIsNotNone(exp_slot_uri) def test_materialize_patterns(self): - sv = SchemaView(os.path.join(INPUT_DIR, "pattern-example.yaml")) + sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS) sv.materialize_patterns() @@ -536,6 +537,24 @@ def test_materialize_patterns(self): self.assertEqual(height_slot.pattern, "\d+[\.\d+] (centimeter|meter|inch)") self.assertEqual(weight_slot.pattern, "\d+[\.\d+] (kg|g|lbs|stone)") + def test_materialize_patterns_slot_usage(self): + sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS) + + sv.materialize_patterns() + + name_slot_usage = sv.get_class("FancyPersonInfo").slot_usage['name'] + + self.assertEqual(name_slot_usage.pattern, "\\S+ \\S+-\\S+") + + def test_materialize_patterns_attribute(self): + sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS) + + sv.materialize_patterns() + + weight_attribute = sv.get_class('ClassWithAttributes').attributes['weight'] + + self.assertEqual(weight_attribute.pattern, "\d+[\.\d+] (kg|g|lbs|stone)") + if __name__ == '__main__': unittest.main()