Enhancing tests, moving towards compliance framework (#292)

* Add option to filter owl tautologies * Raise exception if path to sqlite db does not exist, fixes #90 * moving towards general compliance suite, #291 * Renaming check-definitions to validate-definitions. #30 * ability to filter owl tautologies. Adding missing predicate filter for abstract_sparql_implementation.py * lint
INCATools · Sep 24, 2022 · 56859dd · 56859dd
1 parent d181130
commit 56859dd
Show file tree

Hide file tree

Showing 9 changed files with 67 additions and 7 deletions.
diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py
@@ -10,6 +10,7 @@
 import json
 import logging
 import re
+import secrets
 import subprocess
 import sys
 from collections import defaultdict
@@ -523,6 +524,10 @@ def chain_it(v):
             pass
         elif term.startswith(".all"):
             chain_it(impl.entities())
+        elif term.startswith(".rand"):
+            for x in impl.entities():
+                if secrets.randbelow(100) == 0:
+                    yield x
         elif term.startswith(".in"):
             subset = terms[0]
             terms = terms[1:]
@@ -2769,7 +2774,7 @@ def validate_multiple(dbs, output, schema, cutoff: int):
 
 @main.command()
 @output_option
-def check_definitions(output: str):
+def validate_definitions(output: str):
     """
     Check definitions
 

diff --git a/src/oaklib/implementations/sparql/abstract_sparql_implementation.py b/src/oaklib/implementations/sparql/abstract_sparql_implementation.py
@@ -306,6 +306,7 @@ def outgoing_relationships(
     ) -> Iterator[Tuple[PRED_CURIE, CURIE]]:
         if predicates:
             predicates = list(set(predicates))
+        pred_quris = [self.curie_to_sparql(p) for p in predicates] if predicates else None
         uri = self.curie_to_uri(curie)
         if not predicates or IS_A in predicates:
             for p in self.hierararchical_parents(curie):
@@ -318,6 +319,7 @@ def outgoing_relationships(
                     "FILTER (isIRI(?o))",
                 ],
             )
+            query.add_values("p", pred_quris)
             bindings = self._query(query)
             for row in bindings:
                 pred = self.uri_to_curie(row["p"]["value"])
@@ -330,6 +332,7 @@ def outgoing_relationships(
                     "?p rdf:type owl:ObjectProperty",
                 ],
             )
+            query.add_values("p", pred_quris)
             bindings = self._query(query)
             for row in bindings:
                 pred = self.uri_to_curie(row["p"]["value"])

diff --git a/src/oaklib/implementations/sparql/sparql_query.py b/src/oaklib/implementations/sparql/sparql_query.py
@@ -28,8 +28,9 @@ def add_not_in(self, subquery: "SparqlQuery"):
     def add_filter(self, cond: str):
         self.where.append(f"FILTER ( {cond} )")
 
-    def add_values(self, var: str, vals: List[str]):
-        self.where.append(f'VALUES ?{var} {{ {" ".join(vals)} }}')
+    def add_values(self, var: str, vals: Optional[List[str]]):
+        if vals is not None:
+            self.where.append(f'VALUES ?{var} {{ {" ".join(vals)} }}')
 
     def select_str(self):
         distinct = "DISTINCT " if self.distinct else ""

diff --git a/src/oaklib/implementations/sqldb/sql_implementation.py b/src/oaklib/implementations/sqldb/sql_implementation.py
@@ -75,6 +75,8 @@
     IN_SUBSET,
     IS_A,
     LABEL_PREDICATE,
+    OWL_NOTHING,
+    OWL_THING,
     RDF_TYPE,
     SEMAPV,
     SYNONYM_PREDICATES,
@@ -229,6 +231,8 @@ def __post_init__(self):
                 locator = f"sqlite:///{locator}"
             else:
                 path = Path(locator.replace("sqlite:", "")).absolute()
+                if not path.exists():
+                    raise FileNotFoundError(f"File does not exist: {path}")
                 locator = f"sqlite:///{path}"
             logging.info(f"Locator, post-processed: {locator}")
             self.engine = create_engine(locator)
@@ -469,6 +473,8 @@ def outgoing_relationships(
             q = q.filter(tbl.predicate.in_(predicates))
         logging.debug(f"Querying outgoing, curie={curie}, predicates={predicates}, q={q}")
         for row in q:
+            if self.exclude_owl_top_and_bottom and row.object == OWL_THING:
+                continue
             yield row.predicate, row.object
         if not predicates or RDF_TYPE in predicates:
             q = self.session.query(RdfTypeStatement.object).filter(
@@ -477,6 +483,8 @@ def outgoing_relationships(
             cls_subq = self.session.query(ClassNode.id)
             q = q.filter(RdfTypeStatement.object.in_(cls_subq))
             for row in q:
+                if self.exclude_owl_top_and_bottom and row.object == OWL_THING:
+                    continue
                 yield RDF_TYPE, row.object
         if tbl == Edge and (not predicates or EQUIVALENT_CLASS in predicates):
             q = self.session.query(OwlEquivalentClassStatement.object).filter(
@@ -514,6 +522,10 @@ def relationships(
             for r in self._tbox_relationships(
                 subjects, predicates, objects, include_entailed=include_entailed
             ):
+                if self.exclude_owl_top_and_bottom and r[2] == OWL_THING:
+                    continue
+                if self.exclude_owl_top_and_bottom and r[0] == OWL_NOTHING:
+                    continue
                 yield r
             for r in self._equivalent_class_relationships(subjects, predicates, objects):
                 yield r

diff --git a/src/oaklib/interfaces/basic_ontology_interface.py b/src/oaklib/interfaces/basic_ontology_interface.py
@@ -105,6 +105,9 @@ class BasicOntologyInterface(OntologyInterface, ABC):
     """For adapters that wrap a transactional source (e.g sqlite), this controls
     whether results should be autocommitted after each operation"""
 
+    exclude_owl_top_and_bottom: bool = field(default_factory=lambda: True)
+    """Do not include owl:Thing or owl:Nothing"""
+
     _converter: Optional[curies.Converter] = None
 
     def prefix_map(self) -> PREFIX_MAP:

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -419,7 +419,7 @@ def test_validate_bad_ontology(self):
     def test_check_definitions(self):
         for input_arg in [TEST_ONT, f"sqlite:{TEST_DB}"]:
             logging.info(f"INPUT={input_arg}")
-            result = self.runner.invoke(main, ["-i", input_arg, "check-definitions"])
+            result = self.runner.invoke(main, ["-i", input_arg, "validate-definitions"])
             out = result.stdout
             err = result.stderr
             logging.info(f"ERR={err}")

diff --git a/tests/test_implementations/__init__.py b/tests/test_implementations/__init__.py
@@ -10,7 +10,13 @@
 from linkml_runtime.dumpers import json_dumper
 
 from oaklib import BasicOntologyInterface
-from oaklib.datamodels.vocabulary import EQUIVALENT_CLASS, IS_A, ONLY_IN_TAXON, PART_OF
+from oaklib.datamodels.vocabulary import (
+    EQUIVALENT_CLASS,
+    IS_A,
+    NEVER_IN_TAXON,
+    ONLY_IN_TAXON,
+    PART_OF,
+)
 from oaklib.interfaces.differ_interface import DifferInterface
 from oaklib.interfaces.patcher_interface import PatcherInterface
 from oaklib.utilities.kgcl_utilities import generate_change_id
@@ -44,7 +50,7 @@ class ComplianceTester:
     test: unittest.TestCase
     """Link back to the calling test"""
 
-    def test_relationships(self, oi: BasicOntologyInterface):
+    def test_relationships(self, oi: BasicOntologyInterface, ignore_annotation_edges=False):
         """
         Tests relationship methods for compliance
 
@@ -61,6 +67,9 @@ def test_relationships(self, oi: BasicOntologyInterface):
             (CELLULAR_COMPONENT, True, [(CELLULAR_COMPONENT, IS_A, "BFO:0000040")]),
         ]
         for curie, complete, expected_rels in cases:
+            print(f"TESTS FOR {curie}")
+            if ignore_annotation_edges:
+                expected_rels = [r for r in expected_rels if r[1] != NEVER_IN_TAXON]
             rels = list(oi.relationships([curie]))
             preds = set()
             for rv in expected_rels:

diff --git a/tests/test_implementations/test_sparql.py b/tests/test_implementations/test_sparql.py
@@ -30,6 +30,7 @@
     SHAPE,
     VACUOLE,
 )
+from tests.test_implementations import ComplianceTester
 
 TEST_RDF = INPUT_DIR / "go-nucleus.owl.ttl"
 TEST_INST_RDF = INPUT_DIR / "inst.owl.ttl"
@@ -41,8 +42,12 @@ class TestSparqlImplementation(unittest.TestCase):
     def setUp(self) -> None:
         oi = SparqlImplementation(OntologyResource(slug=str(TEST_RDF)))
         self.oi = oi
+        self.compliance_tester = ComplianceTester(self)
 
     def test_relationships(self):
+        self.compliance_tester.test_relationships(self.oi, ignore_annotation_edges=True)
+
+    def test_relationships_extra(self):
         oi = self.oi
         self.assertIsNotNone(oi.graph)
         rels = list(oi.outgoing_relationships(VACUOLE))
@@ -52,6 +57,8 @@ def test_relationships(self):
             logging.info(f"{k} = {v}")
         self.assertIn("GO:0043231", rels[IS_A])
         self.assertIn("GO:0005737", rels[PART_OF])
+        rels = oi.outgoing_relationship_map(NUCLEUS)
+        print(rels)
 
     def test_instance_graph(self):
         oi = SparqlImplementation(OntologyResource(slug=str(TEST_INST_RDF)))

diff --git a/tests/test_implementations/test_sqldb.py b/tests/test_implementations/test_sqldb.py
@@ -41,6 +41,7 @@
     PHOTORECEPTOR_OUTER_SEGMENT,
     VACUOLE,
 )
+from tests.test_implementations import ComplianceTester
 
 DB = INPUT_DIR / "go-nucleus.db"
 SSN_DB = INPUT_DIR / "ssn.db"
@@ -59,13 +60,29 @@ def setUp(self) -> None:
         self.bad_oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{bad_ont}"))
         self.ssn_oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{SSN_DB}"))
         self.inst_oi = SqlImplementation(OntologyResource(INST_DB))
+        self.compliance_tester = ComplianceTester(self)
 
+    def test_empty_db(self) -> None:
+        """Should raise error when connecting to an empty db."""
+        res = OntologyResource(slug=f"sqlite:///{str(INPUT_DIR / 'NO_SUCH_FILE')}")
+        with self.assertRaises(FileNotFoundError):
+            _ = SqlImplementation(res)
+
+    @unittest.skip("Contents of go-nucleus file need to be aligned")
     def test_relationships(self):
+        oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{str(DB)}"))
+        self.compliance_tester.test_relationships(oi, ignore_annotation_edges=False)
+
+    def test_relationships_extra(self):
         oi = self.oi
         rels = oi.outgoing_relationship_map(VACUOLE)
-        self.assertCountEqual(rels[IS_A], ["GO:0043231"])
+        self.assertCountEqual(rels[IS_A], [IMBO])
         self.assertCountEqual(rels[PART_OF], ["GO:0005737"])
         self.assertCountEqual([IS_A, PART_OF], rels)
+        rels = list(oi.outgoing_relationships(VACUOLE))
+        self.assertCountEqual([(IS_A, IMBO), (PART_OF, CYTOPLASM)], rels)
+        hier_parents = list(oi.hierararchical_parents(VACUOLE))
+        self.assertEqual([IMBO], hier_parents)
 
     def test_instance_graph(self):
         oi = self.inst_oi
@@ -431,6 +448,8 @@ def test_search_regex(self):
 
     def test_multiset_mrcas(self):
         oi = self.oi
+        orig_exclude_owl_top_and_bottom = oi.exclude_owl_top_and_bottom
+        oi.exclude_owl_top_and_bottom = False
         results = oi.multiset_most_recent_common_ancestors(
             [NUCLEUS, VACUOLE, NUCLEAR_ENVELOPE, FUNGI], predicates=[IS_A, PART_OF], asymmetric=True
         )
@@ -443,6 +462,7 @@ def test_multiset_mrcas(self):
             ("GO:0005773", "NCBITaxon:4751", "owl:Thing"),
             ("GO:0005634", "GO:0005773", "GO:0043231"),
         ]
+        oi.exclude_owl_top_and_bottom = orig_exclude_owl_top_and_bottom
         self.assertCountEqual(expected, list(results))
         for s, o, lca in expected:
             results = list(oi.most_recent_common_ancestors(s, o, predicates=[IS_A, PART_OF]))