Skip to content

Commit

Permalink
Materialize structured patterns on class slot usages and attributes
Browse files Browse the repository at this point in the history
  • Loading branch information
pkalita-lbl committed Oct 4, 2022
1 parent 3617c37 commit e1df199
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 91 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,4 @@ Pipfile.lock

# No Pycharm
.idea/
.vscode
123 changes: 52 additions & 71 deletions linkml_runtime/utils/pattern.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,64 @@
from functools import lru_cache
import re
from typing import Dict


def generate_patterns(schema_view) -> Dict[str, str]:
"""Generates a dictionary of slot patterns corresponding to
the structured patterns in the settings.
:param schema_view: SchemaView object with LinkML YAML
already loaded
:return generated_patterns: dictionary with the
expanded structured patterns
"""

# fetch settings from schema_view
settings_dict = schema_view.schema.settings

# dictionary of key and string value of settings dict
format_spec = {}

for k, setting in settings_dict.items():

# create spec dictionary with keys that will replace
# substrings in the structured pattern syntax
format_spec[k] = setting.setting_value

# dictionary with structured patterns in the key and
# expanded, or materialized patterns as values
generated_patterns = {}
class PatternResolver():

# regular expression capturing the various use cases
# for the optionally dot separated, curly braces bound, pattern syntax
var_name = re.compile("{([a-z0-9_-]+([\.-_ ][a-z0-9]+)*)}", re.IGNORECASE)

for _, slot_defn in schema_view.all_slots().items():
if slot_defn.structured_pattern:
struct_pat = slot_defn.structured_pattern

pattern = struct_pat.syntax

# compute pattern from structured patterns
# and format_spec dictionary


# apply the regex to the pattern and look for matches
matches = var_name.finditer(pattern)

reversed = []
for item in matches:
# Detect double set brackets
match_string = None
if (
item.start() > 0
and item.end() < len(pattern)
and pattern[item.start() - 1] == "{"
and pattern[item.end()] == "}"
):
match_string = item.group(1)

elif item.group(1) in format_spec:
match_string = str(format_spec[item.group(1)])

if match_string:
reversed.insert(
0,
{
"string": match_string,
"start": item.start(),
"end": item.end(),
},
)

converted = pattern
for item in reversed:
converted = (
converted[: item["start"]]
+ item["string"]
+ converted[item["end"] :]
def __init__(self, schema_view):
# fetch settings from schema_view
settings_dict = schema_view.schema.settings

# dictionary of key and string value of settings dict
self.format_spec = {}

for k, setting in settings_dict.items():

# create spec dictionary with keys that will replace
# substrings in the structured pattern syntax
self.format_spec[k] = setting.setting_value

@lru_cache
def resolve(self, pattern: str) -> str:
# apply the regex to the pattern and look for matches
matches = self.var_name.finditer(pattern)

reversed = []
for item in matches:
# Detect double set brackets
match_string = None
if (
item.start() > 0
and item.end() < len(pattern)
and pattern[item.start() - 1] == "{"
and pattern[item.end()] == "}"
):
match_string = item.group(1)

elif item.group(1) in self.format_spec:
match_string = str(self.format_spec[item.group(1)])

if match_string:
reversed.insert(
0,
{
"string": match_string,
"start": item.start(),
"end": item.end(),
},
)

generated_patterns[pattern] = converted
converted = pattern
for item in reversed:
converted = (
converted[: item["start"]]
+ item["string"]
+ converted[item["end"] :]
)

return converted

return generated_patterns
30 changes: 20 additions & 10 deletions linkml_runtime/utils/schemaview.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from linkml_runtime.utils.namespaces import Namespaces
from deprecated.classic import deprecated
from linkml_runtime.utils.context_utils import parse_import_map, map_import
from linkml_runtime.utils.pattern import generate_patterns
from linkml_runtime.utils.pattern import PatternResolver
from linkml_runtime.linkml_model.meta import *
from enum import Enum
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -1462,12 +1462,22 @@ def materialize_patterns(self) -> None:
into regular expressions based on composite patterns
provided in the settings dictionary.
"""
patterns_dict = generate_patterns(self)

for _, slot_defn in self.all_slots().items():
if slot_defn.structured_pattern:

pattern = slot_defn.structured_pattern.syntax

if pattern in patterns_dict:
slot_defn.pattern = patterns_dict[pattern]
resolver = PatternResolver(self)

def materialize_pattern_into_slot_definition(slot_definition: SlotDefinition) -> None:
if not slot_definition.structured_pattern:
return
pattern = slot_definition.structured_pattern.syntax
slot_definition.pattern = resolver.resolve(pattern)

for slot_definition in self.all_slots().values():
materialize_pattern_into_slot_definition(slot_definition)

for class_definition in self.all_classes().values():
if class_definition.slot_usage:
for slot_definition in class_definition.slot_usage.values():
materialize_pattern_into_slot_definition(slot_definition)

if class_definition.attributes:
for slot_definition in class_definition.attributes.values():
materialize_pattern_into_slot_definition(slot_definition)
18 changes: 18 additions & 0 deletions tests/test_utils/input/pattern-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ settings:
unit.length: "(centimeter|meter|inch)"
unit.weight: "(kg|g|lbs|stone)"
email: "\\S+@\\S+{\\.\\w}+"
hyphenated_name: "\\S+-\\S+"

#==================================
# Classes #
Expand All @@ -45,6 +46,23 @@ classes:
- height
- email

FancyPersonInfo:
is_a: PersonInfo
slot_usage:
name:
structured_pattern:
syntax: "\\S+ {hyphenated_name}"
interpolated: true
partial_match: false

ClassWithAttributes:
attributes:
weight:
structured_pattern:
syntax: "{float} {unit.weight}"
interpolated: true
partial_match: false

#==================================
# Slots #
#==================================
Expand Down
13 changes: 4 additions & 9 deletions tests/test_utils/test_pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from linkml_runtime.utils.schemaview import SchemaView

from linkml_runtime.utils.pattern import generate_patterns
from linkml_runtime.utils.pattern import PatternResolver


class PatternTestCase(unittest.TestCase):
Expand All @@ -13,15 +13,10 @@ def test_generate_patterns(self):

sv = SchemaView(env.input_path("pattern-example.yaml"))

# actual result returned from call to generate_patterns()
actual_dict = generate_patterns(sv)
resolver = PatternResolver(sv)

expected_dict = {
"{float} {unit.length}": "\\d+[\\.\\d+] (centimeter|meter|inch)",
"{float} {unit.weight}": "\\d+[\\.\\d+] (kg|g|lbs|stone)",
}

self.assertDictEqual(actual_dict, expected_dict)
self.assertEqual(resolver.resolve("{float} {unit.length}"), "\\d+[\\.\\d+] (centimeter|meter|inch)")
self.assertEqual(resolver.resolve("{float} {unit.weight}"), "\\d+[\\.\\d+] (kg|g|lbs|stone)")


if __name__ == "__main__":
Expand Down
21 changes: 20 additions & 1 deletion tests/test_utils/test_schemaview.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

SCHEMA_NO_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink_noimports.yaml')
SCHEMA_WITH_IMPORTS = os.path.join(INPUT_DIR, 'kitchen_sink.yaml')
SCHEMA_WITH_STRUCTURED_PATTERNS = os.path.join(INPUT_DIR, "pattern-example.yaml")

yaml_loader = YAMLLoader()

Expand Down Expand Up @@ -526,7 +527,7 @@ def test_metamodel_in_schemaview(self):
self.assertIsNotNone(exp_slot_uri)

def test_materialize_patterns(self):
sv = SchemaView(os.path.join(INPUT_DIR, "pattern-example.yaml"))
sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS)

sv.materialize_patterns()

Expand All @@ -536,6 +537,24 @@ def test_materialize_patterns(self):
self.assertEqual(height_slot.pattern, "\d+[\.\d+] (centimeter|meter|inch)")
self.assertEqual(weight_slot.pattern, "\d+[\.\d+] (kg|g|lbs|stone)")

def test_materialize_patterns_slot_usage(self):
sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS)

sv.materialize_patterns()

name_slot_usage = sv.get_class("FancyPersonInfo").slot_usage['name']

self.assertEqual(name_slot_usage.pattern, "\\S+ \\S+-\\S+")

def test_materialize_patterns_attribute(self):
sv = SchemaView(SCHEMA_WITH_STRUCTURED_PATTERNS)

sv.materialize_patterns()

weight_attribute = sv.get_class('ClassWithAttributes').attributes['weight']

self.assertEqual(weight_attribute.pattern, "\d+[\.\d+] (kg|g|lbs|stone)")


if __name__ == '__main__':
unittest.main()

0 comments on commit e1df199

Please sign in to comment.