From 8c722b6c7498763f5ab476172ea5d2fc0e7af77e Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 8 Nov 2023 00:19:06 +0100 Subject: [PATCH 1/4] test: improve coverage of curie validation Issue linkml/linkml#1701 has discovered an erroneous validation of CURIE prefixes, since apparently it doesn't accept characters '_' and '.'. This patch extends the tests to provide test coverage for the expected values to work, avoiding future regressions. Signed-off-by: Silvano Cirujano Cuesta --- tests/test_utils/test_metamodelcore.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_utils/test_metamodelcore.py b/tests/test_utils/test_metamodelcore.py index 2d74e6b1..ff2b5700 100644 --- a/tests/test_utils/test_metamodelcore.py +++ b/tests/test_utils/test_metamodelcore.py @@ -64,6 +64,8 @@ def test_curie(self): self.assertFalse(Curie.is_valid("type")) self.assertEqual(":type", Curie(":type")) self.assertTrue(Curie.is_valid(':type')) + self.assertTrue(Curie.is_valid('WIKIDATA_PROPERTY:P854')) + self.assertTrue(Curie.is_valid('WIKIDATA.PROPERTY:P854')) with self.assertRaises(ValueError): Curie("1df:type") self.assertFalse(Curie.is_valid('1df:type')) From e01eabe572218f6d22a4f7de2fa8851482077139 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 8 Nov 2023 00:39:27 +0100 Subject: [PATCH 2/4] style: cleanup commented-out code Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/uri_validator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linkml_runtime/utils/uri_validator.py b/linkml_runtime/utils/uri_validator.py index 70203d27..cacf642a 100644 --- a/linkml_runtime/utils/uri_validator.py +++ b/linkml_runtime/utils/uri_validator.py @@ -357,6 +357,5 @@ def validate_uri_reference(input): def validate_curie(input): - # print(CURIE) return curie_validator.match(input) From 38c4be2c89de3176da1ecda8e8155921693336d8 Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 8 Nov 2023 01:16:05 +0100 Subject: [PATCH 3/4] fix: replacing utf-8 non-breaking spaces For whatever strange reasons, the regular expressions being used for CURIE validation were using UTF-8 non-breaking spaces (hex C2A0) instead of normal whitespaces. This patch fixes it. Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/uri_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linkml_runtime/utils/uri_validator.py b/linkml_runtime/utils/uri_validator.py index cacf642a..2e66851c 100644 --- a/linkml_runtime/utils/uri_validator.py +++ b/linkml_runtime/utils/uri_validator.py @@ -42,7 +42,7 @@ gen_delims = r"(?: : | / | \? | \# | \[ | \] | @ )" # sub-delims = "!" / "$" / "&" / "'" / "(" -sub_delims = r"(?: ! | \$ | & | ' | \( | \) | \* | \+ | , | ; | = )" +sub_delims = r"(?: ! | \$ | & | ' | \( | \) | \* | \+ | , | ; | = )" # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" pchar = rf"(?: {unreserved} | {pct_encoded} | {sub_delims} | : | @ )" @@ -295,7 +295,7 @@ # As of now this module doesn't support NCNameChar IRI, but # relative-refs as defined in URI, # NCNameChar ::= Letter | Digit | '.' | '-' | '_' -NCNameChar = rf"(?: {ALPHA} | {DIGIT} | \. | \- | _ )" +NCNameChar = rf"(?: {ALPHA} | {DIGIT} | \. | \- | _ )" # prefix := NCName # NCName := (Letter | '_') (NCNameChar)* From d45970a5e410401568d86652034faf8cbe76cf0f Mon Sep 17 00:00:00 2001 From: Silvano Cirujano Cuesta Date: Wed, 8 Nov 2023 01:39:44 +0100 Subject: [PATCH 4/4] style: consisten use of f-strings Signed-off-by: Silvano Cirujano Cuesta --- linkml_runtime/utils/uri_validator.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/linkml_runtime/utils/uri_validator.py b/linkml_runtime/utils/uri_validator.py index 2e66851c..f761415d 100644 --- a/linkml_runtime/utils/uri_validator.py +++ b/linkml_runtime/utils/uri_validator.py @@ -324,17 +324,17 @@ # ### Compile the regular expressions for better performance -uri_validator = re.compile("^{}$".format(URI), re.VERBOSE) +uri_validator = re.compile(f"^{URI}$", re.VERBOSE) -#uri_ref_validator = re.compile("^{}$".format(URI_reference), re.VERBOSE) +#uri_ref_validator = re.compile(f"^{URI_reference}$", re.VERBOSE) -uri_relative_ref_validator = re.compile("^{}$".format(relative_ref), re.VERBOSE) +uri_relative_ref_validator = re.compile(f"^{relative_ref}$", re.VERBOSE) -abs_uri_validator = re.compile("^{}$".format(absolute_URI), re.VERBOSE) +abs_uri_validator = re.compile(f"^{absolute_URI}$", re.VERBOSE) -curie_validator = re.compile("^{}$".format(CURIE), re.VERBOSE) +curie_validator = re.compile(f"^{CURIE}$", re.VERBOSE) -safe_curie_validator = re.compile("^{}$".format(safe_CURIE), re.VERBOSE) +safe_curie_validator = re.compile(f"^{safe_CURIE}$", re.VERBOSE) # ----------------------------------------------------------------------------- #