Skip to content

Commit

Permalink
Enhancing tests, moving towards compliance framework (#292)
Browse files Browse the repository at this point in the history
* Add option to filter owl tautologies

* Raise exception if path to sqlite db does not exist, fixes #90

* moving towards general compliance suite, #291

* Renaming check-definitions to validate-definitions. #30

* ability to filter owl tautologies.

Adding missing predicate filter for
abstract_sparql_implementation.py

* lint
  • Loading branch information
cmungall authored Sep 24, 2022
1 parent d181130 commit 56859dd
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 7 deletions.
7 changes: 6 additions & 1 deletion src/oaklib/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import logging
import re
import secrets
import subprocess
import sys
from collections import defaultdict
Expand Down Expand Up @@ -523,6 +524,10 @@ def chain_it(v):
pass
elif term.startswith(".all"):
chain_it(impl.entities())
elif term.startswith(".rand"):
for x in impl.entities():
if secrets.randbelow(100) == 0:
yield x
elif term.startswith(".in"):
subset = terms[0]
terms = terms[1:]
Expand Down Expand Up @@ -2769,7 +2774,7 @@ def validate_multiple(dbs, output, schema, cutoff: int):

@main.command()
@output_option
def check_definitions(output: str):
def validate_definitions(output: str):
"""
Check definitions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ def outgoing_relationships(
) -> Iterator[Tuple[PRED_CURIE, CURIE]]:
if predicates:
predicates = list(set(predicates))
pred_quris = [self.curie_to_sparql(p) for p in predicates] if predicates else None
uri = self.curie_to_uri(curie)
if not predicates or IS_A in predicates:
for p in self.hierararchical_parents(curie):
Expand All @@ -318,6 +319,7 @@ def outgoing_relationships(
"FILTER (isIRI(?o))",
],
)
query.add_values("p", pred_quris)
bindings = self._query(query)
for row in bindings:
pred = self.uri_to_curie(row["p"]["value"])
Expand All @@ -330,6 +332,7 @@ def outgoing_relationships(
"?p rdf:type owl:ObjectProperty",
],
)
query.add_values("p", pred_quris)
bindings = self._query(query)
for row in bindings:
pred = self.uri_to_curie(row["p"]["value"])
Expand Down
5 changes: 3 additions & 2 deletions src/oaklib/implementations/sparql/sparql_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ def add_not_in(self, subquery: "SparqlQuery"):
def add_filter(self, cond: str):
self.where.append(f"FILTER ( {cond} )")

def add_values(self, var: str, vals: List[str]):
self.where.append(f'VALUES ?{var} {{ {" ".join(vals)} }}')
def add_values(self, var: str, vals: Optional[List[str]]):
if vals is not None:
self.where.append(f'VALUES ?{var} {{ {" ".join(vals)} }}')

def select_str(self):
distinct = "DISTINCT " if self.distinct else ""
Expand Down
12 changes: 12 additions & 0 deletions src/oaklib/implementations/sqldb/sql_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@
IN_SUBSET,
IS_A,
LABEL_PREDICATE,
OWL_NOTHING,
OWL_THING,
RDF_TYPE,
SEMAPV,
SYNONYM_PREDICATES,
Expand Down Expand Up @@ -229,6 +231,8 @@ def __post_init__(self):
locator = f"sqlite:///{locator}"
else:
path = Path(locator.replace("sqlite:", "")).absolute()
if not path.exists():
raise FileNotFoundError(f"File does not exist: {path}")
locator = f"sqlite:///{path}"
logging.info(f"Locator, post-processed: {locator}")
self.engine = create_engine(locator)
Expand Down Expand Up @@ -469,6 +473,8 @@ def outgoing_relationships(
q = q.filter(tbl.predicate.in_(predicates))
logging.debug(f"Querying outgoing, curie={curie}, predicates={predicates}, q={q}")
for row in q:
if self.exclude_owl_top_and_bottom and row.object == OWL_THING:
continue
yield row.predicate, row.object
if not predicates or RDF_TYPE in predicates:
q = self.session.query(RdfTypeStatement.object).filter(
Expand All @@ -477,6 +483,8 @@ def outgoing_relationships(
cls_subq = self.session.query(ClassNode.id)
q = q.filter(RdfTypeStatement.object.in_(cls_subq))
for row in q:
if self.exclude_owl_top_and_bottom and row.object == OWL_THING:
continue
yield RDF_TYPE, row.object
if tbl == Edge and (not predicates or EQUIVALENT_CLASS in predicates):
q = self.session.query(OwlEquivalentClassStatement.object).filter(
Expand Down Expand Up @@ -514,6 +522,10 @@ def relationships(
for r in self._tbox_relationships(
subjects, predicates, objects, include_entailed=include_entailed
):
if self.exclude_owl_top_and_bottom and r[2] == OWL_THING:
continue
if self.exclude_owl_top_and_bottom and r[0] == OWL_NOTHING:
continue
yield r
for r in self._equivalent_class_relationships(subjects, predicates, objects):
yield r
Expand Down
3 changes: 3 additions & 0 deletions src/oaklib/interfaces/basic_ontology_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ class BasicOntologyInterface(OntologyInterface, ABC):
"""For adapters that wrap a transactional source (e.g sqlite), this controls
whether results should be autocommitted after each operation"""

exclude_owl_top_and_bottom: bool = field(default_factory=lambda: True)
"""Do not include owl:Thing or owl:Nothing"""

_converter: Optional[curies.Converter] = None

def prefix_map(self) -> PREFIX_MAP:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ def test_validate_bad_ontology(self):
def test_check_definitions(self):
for input_arg in [TEST_ONT, f"sqlite:{TEST_DB}"]:
logging.info(f"INPUT={input_arg}")
result = self.runner.invoke(main, ["-i", input_arg, "check-definitions"])
result = self.runner.invoke(main, ["-i", input_arg, "validate-definitions"])
out = result.stdout
err = result.stderr
logging.info(f"ERR={err}")
Expand Down
13 changes: 11 additions & 2 deletions tests/test_implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
from linkml_runtime.dumpers import json_dumper

from oaklib import BasicOntologyInterface
from oaklib.datamodels.vocabulary import EQUIVALENT_CLASS, IS_A, ONLY_IN_TAXON, PART_OF
from oaklib.datamodels.vocabulary import (
EQUIVALENT_CLASS,
IS_A,
NEVER_IN_TAXON,
ONLY_IN_TAXON,
PART_OF,
)
from oaklib.interfaces.differ_interface import DifferInterface
from oaklib.interfaces.patcher_interface import PatcherInterface
from oaklib.utilities.kgcl_utilities import generate_change_id
Expand Down Expand Up @@ -44,7 +50,7 @@ class ComplianceTester:
test: unittest.TestCase
"""Link back to the calling test"""

def test_relationships(self, oi: BasicOntologyInterface):
def test_relationships(self, oi: BasicOntologyInterface, ignore_annotation_edges=False):
"""
Tests relationship methods for compliance
Expand All @@ -61,6 +67,9 @@ def test_relationships(self, oi: BasicOntologyInterface):
(CELLULAR_COMPONENT, True, [(CELLULAR_COMPONENT, IS_A, "BFO:0000040")]),
]
for curie, complete, expected_rels in cases:
print(f"TESTS FOR {curie}")
if ignore_annotation_edges:
expected_rels = [r for r in expected_rels if r[1] != NEVER_IN_TAXON]
rels = list(oi.relationships([curie]))
preds = set()
for rv in expected_rels:
Expand Down
7 changes: 7 additions & 0 deletions tests/test_implementations/test_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
SHAPE,
VACUOLE,
)
from tests.test_implementations import ComplianceTester

TEST_RDF = INPUT_DIR / "go-nucleus.owl.ttl"
TEST_INST_RDF = INPUT_DIR / "inst.owl.ttl"
Expand All @@ -41,8 +42,12 @@ class TestSparqlImplementation(unittest.TestCase):
def setUp(self) -> None:
oi = SparqlImplementation(OntologyResource(slug=str(TEST_RDF)))
self.oi = oi
self.compliance_tester = ComplianceTester(self)

def test_relationships(self):
self.compliance_tester.test_relationships(self.oi, ignore_annotation_edges=True)

def test_relationships_extra(self):
oi = self.oi
self.assertIsNotNone(oi.graph)
rels = list(oi.outgoing_relationships(VACUOLE))
Expand All @@ -52,6 +57,8 @@ def test_relationships(self):
logging.info(f"{k} = {v}")
self.assertIn("GO:0043231", rels[IS_A])
self.assertIn("GO:0005737", rels[PART_OF])
rels = oi.outgoing_relationship_map(NUCLEUS)
print(rels)

def test_instance_graph(self):
oi = SparqlImplementation(OntologyResource(slug=str(TEST_INST_RDF)))
Expand Down
22 changes: 21 additions & 1 deletion tests/test_implementations/test_sqldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
PHOTORECEPTOR_OUTER_SEGMENT,
VACUOLE,
)
from tests.test_implementations import ComplianceTester

DB = INPUT_DIR / "go-nucleus.db"
SSN_DB = INPUT_DIR / "ssn.db"
Expand All @@ -59,13 +60,29 @@ def setUp(self) -> None:
self.bad_oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{bad_ont}"))
self.ssn_oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{SSN_DB}"))
self.inst_oi = SqlImplementation(OntologyResource(INST_DB))
self.compliance_tester = ComplianceTester(self)

def test_empty_db(self) -> None:
"""Should raise error when connecting to an empty db."""
res = OntologyResource(slug=f"sqlite:///{str(INPUT_DIR / 'NO_SUCH_FILE')}")
with self.assertRaises(FileNotFoundError):
_ = SqlImplementation(res)

@unittest.skip("Contents of go-nucleus file need to be aligned")
def test_relationships(self):
oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{str(DB)}"))
self.compliance_tester.test_relationships(oi, ignore_annotation_edges=False)

def test_relationships_extra(self):
oi = self.oi
rels = oi.outgoing_relationship_map(VACUOLE)
self.assertCountEqual(rels[IS_A], ["GO:0043231"])
self.assertCountEqual(rels[IS_A], [IMBO])
self.assertCountEqual(rels[PART_OF], ["GO:0005737"])
self.assertCountEqual([IS_A, PART_OF], rels)
rels = list(oi.outgoing_relationships(VACUOLE))
self.assertCountEqual([(IS_A, IMBO), (PART_OF, CYTOPLASM)], rels)
hier_parents = list(oi.hierararchical_parents(VACUOLE))
self.assertEqual([IMBO], hier_parents)

def test_instance_graph(self):
oi = self.inst_oi
Expand Down Expand Up @@ -431,6 +448,8 @@ def test_search_regex(self):

def test_multiset_mrcas(self):
oi = self.oi
orig_exclude_owl_top_and_bottom = oi.exclude_owl_top_and_bottom
oi.exclude_owl_top_and_bottom = False
results = oi.multiset_most_recent_common_ancestors(
[NUCLEUS, VACUOLE, NUCLEAR_ENVELOPE, FUNGI], predicates=[IS_A, PART_OF], asymmetric=True
)
Expand All @@ -443,6 +462,7 @@ def test_multiset_mrcas(self):
("GO:0005773", "NCBITaxon:4751", "owl:Thing"),
("GO:0005634", "GO:0005773", "GO:0043231"),
]
oi.exclude_owl_top_and_bottom = orig_exclude_owl_top_and_bottom
self.assertCountEqual(expected, list(results))
for s, o, lca in expected:
results = list(oi.most_recent_common_ancestors(s, o, predicates=[IS_A, PART_OF]))
Expand Down

0 comments on commit 56859dd

Please sign in to comment.