From b22d1eff00b20ae6f6f4995dba299bb982889fe8 Mon Sep 17 00:00:00 2001 From: cmungall Date: Sun, 9 May 2021 15:39:15 -0700 Subject: [PATCH] various --- .gitignore | 12 + Makefile | 112 +++++- README.md | 19 + environment.sh | 5 + mkdocs.yml | 7 + prefixes/prefix.sql | 5 + requirements.txt | 4 + semsql/__init__.py | 0 semsql/sqla/__init__.py | 0 semsql/sqla/owl.py | 101 +++++ semsql/sqla/rdf.py | 0 semsql/sqlutils.py | 44 +++ sql/go-annotation.sql | 17 + sql/go-cam.sql | 37 ++ sql/idmapping.sql | 5 + sql/indexes.sql | 3 + sql/obo-checks.sql | 6 + sql/oio.sql | 41 ++ sql/owl.sql | 47 ++- sql/rdf.sql | 10 +- sql/rdftab.sql | 11 +- sql/relation-graph.sql | 18 + src/schema/basics.yaml | 23 ++ src/schema/owl.yaml | 89 +++++ src/schema/rdf.yaml | 122 ++++++ src/schema/semsql.yaml | 20 + tests/inputs/go-nucleus.db | Bin 0 -> 163840 bytes tests/inputs/go-nucleus.owl | 743 ++++++++++++++++++++++++++++++++++++ tests/test_basic_sqla.py | 30 ++ utils/gaf.header.tsv | 1 + utils/gaf2tsv | 4 + 31 files changed, 1517 insertions(+), 19 deletions(-) create mode 100644 .gitignore create mode 100644 environment.sh create mode 100644 mkdocs.yml create mode 100644 requirements.txt create mode 100644 semsql/__init__.py create mode 100644 semsql/sqla/__init__.py create mode 100644 semsql/sqla/owl.py create mode 100644 semsql/sqla/rdf.py create mode 100644 semsql/sqlutils.py create mode 100644 sql/go-annotation.sql create mode 100644 sql/go-cam.sql create mode 100644 sql/idmapping.sql create mode 100644 sql/indexes.sql create mode 100644 sql/oio.sql create mode 100644 src/schema/basics.yaml create mode 100644 src/schema/owl.yaml create mode 100644 src/schema/rdf.yaml create mode 100644 src/schema/semsql.yaml create mode 100644 tests/inputs/go-nucleus.db create mode 100644 tests/inputs/go-nucleus.owl create mode 100644 tests/test_basic_sqla.py create mode 100644 utils/gaf.header.tsv create mode 100755 utils/gaf2tsv diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed5b2c3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.idea +owl/ +download/ +db/ +docs/ +load*-* +schemaload*-* +venv +bin/ +target/ +test.db +inferences/ diff --git a/Makefile b/Makefile index 79933b0..9ccac72 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,117 @@ +ONTS = obi mondo go envo ro hp mp zfa wbphenotype ecto upheno uberon_cm doid chebi pr OWL_SQL = rdf owl OBO_SQL = $(OWL_SQL) obo-checks RG_SQL = $(OWL_SQL) relation-graph ALL_SQL = $(OWL_SQL) relation-graph obo-checks + +all: $(patsubst %,all-%,$(ONTS)) + +all-%: target/%.load target/%.views inferences/%.load reports/%.problems.tsv + echo $* + +realclean-%: + rm target/$*.* ; + rm db/$*.db + ALL_SQL_FILES = $(patsubst %,sql/%.sql,$(ALL_SQL)) sql/all.sql: $(ALL_SQL_FILES) cat $^ > $@ -schemaload-%: db/%.db sql/all.sql - sqlite3 -cmd ".echo on" $< < sql/all.sql && touch $@ +# --- +# sqlite db creation and loading +# --- +target/%.created: + touch db/$*.db +db/%.db: prefixes/prefix.sql sql/rdftab.sql + cat $^ | sqlite3 $@ && echo OK || echo ALREADY LOADED +.PRECIOUS: db/%.db -problems-%: db/%.db schemaload-% - sqlite3 $< -cmd "SELECT * FROM problems" +target/%.load: target/%.created owl/%.owl + ./bin/rdftab db/$*.db < owl/$*.owl && touch $@ +.PRECIOUS: target/%.load -db/%.db: prefixes/prefix.sql - sqlite3 $@ < $< -.PRECIOUS: db/%.db +target/%.views: db/%.db sql/all.sql + sqlite3 -cmd ".echo on" $< < sql/all.sql ; touch $@ +.PRECIOUS: target/%.load + + +# --- +# Inferences +# --- +# We use relation-graph +inferences/%-inf.ttl: owl/%.owl + relation-graph --ontology-file $< --redundant-output-file $@ --non-redundant-output-file inferences/$*-nr.ttl --property http://purl.obolibrary.org/obo/BFO_0000050 +.PRECIOUS: inferences/%-inf.ttl + +inferences/%-inf.owl: inferences/%-inf.ttl + robot convert -i $< -o $@ +.PRECIOUS: inferences/%-inf.owl +inferences/%-inf.tsv: inferences/%-inf.owl + sqlite3 $@.db < prefixes/prefix.sql && ./bin/rdftab $@.db < $< && sqlite3 $@.db -cmd '.separator "\t"' -cmd '.header on' "SELECT subject,predicate,object FROM statements " > $@.tmp && mv $@.db $@.db.old && mv $@.tmp $@ +.PRECIOUS: inferences/%-inf.tsv + +inferences/%.load: db/%.db inferences/%-inf.tsv + sqlite3 $< -cmd '.separator "\t"' '.import inferences/$*-inf.tsv entailed_edge' && touch $@ +.PRECIOUS: inferences/%.load + + +# --- +# Reports +# --- + +reports/%.problems.tsv: db/%.db target/%.views + sqlite3 $< "SELECT * FROM problems" > $@ + + +# --- +# Downloads +# --- + +owl/%.owl: + curl -L -s http://purl.obolibrary.org/obo/$*.owl > $@.tmp && mv $@.tmp $@ +.PRECIOUS: owl/%.owl + +owl/go.owl: + curl -L -s http://purl.obolibrary.org/obo/go/extensions/go-plus.owl > $@ + +# --- +# GO Demo +# --- +demo/gaf/%.gaf.tsv: + curl -L -s http://current.geneontology.org/annotations/$*.gaf.gz | gzip -dc | ./utils/gaf2tsv > $@ +loadgaf-%: demo/gaf/%.gaf.tsv + sqlite3 db/go.db -cmd '.separator "\t"' '.import $< gaf' && touch $@ + +download/idmapping.dat.gz: + wget https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz -O $@ + +CAMDIR = ../noctua-models/models/ +loadcams: + find $(CAMDIR) -name "*.ttl" -exec sh -c "riot --out rdfxml {} | ./bin/rdftab db/go.db" \; + +# --- +# Experimental: sqlalchemy bindings +# --- +semsql/sqlaviews.py: db/foo.db + sqlacodegen sqlite:///$< > $@ + +# --- +# Schema +# --- + +# TODO: markdown gen should make modular output +markdown-%: src/schema/%.yaml + gen-markdown --no-mergeimports -d docs $< && mv docs/index.md docs/$*_index.md +markdown: markdown-rdf markdown-owl + gen-markdown --no-mergeimports -d docs src/schema/semsql.yaml -load-%: db/%.db owl/%.owl - ./bin/rdftab $< < owl/$*.owl && touch $@ +gen-ddl: ddl/rdf.sql +ddl/%.sql: src/schema/%.yaml + gen-sqlddl --no-use-foreign-keys $< > $@.tmp && \ + python semsql/sqlutils.py $< >> $@.tmp && \ + mv $@.tmp $@ +semsql/sqla/%.py: src/schema/%.yaml + gen-sqlddl --no-use-foreign-keys --sqla-file $@ $< diff --git a/README.md b/README.md index d8c8c58..9f1b466 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,21 @@ views for querying ontologies such as GO, that incorporate critical information in existential axioms, the view `edge` provides a union of subclass between named classes and subclasses of existentials. +## Validation + +The general philosophy is not to use foreign key constraints or +triggers to enforce integrity. Instead we allow problematic data into +the database and instead provide transparent reporting on it and ways +to validate. Individual use cases may call for more aggressive filtering. + +The convention is to write rules/constrains as SQL views with a name `problem_`. + +See also: + + * ROBOT report + * GO Rules + * KGX validation + ## Relationship to rdftab.rs We use the same schema as rdftab.rs, and rdftab can be used as a performant robust loader. @@ -78,6 +93,10 @@ rdftab provides a useful base standard that could be used e.g. for distributing ontologies and semantic databases as sqlite, for which a variety of performant tools can be written. +## Schema + +See [LinkML Docs](https://cmungall.github.io/semantic-sql/) + ## Design Philosophy SPARQL has many appealing qualities and it was designed first and foremost for the semantic web. But there are problems: diff --git a/environment.sh b/environment.sh new file mode 100644 index 0000000..dc950b8 --- /dev/null +++ b/environment.sh @@ -0,0 +1,5 @@ +#!/bin/sh +python -m venv venv +source venv/bin/activate +export PYTHONPATH=.:$PYTHONPATH + diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..c296da7 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,7 @@ +site_name: "Semantic SQL" +theme: readthedocs +nav: + - Home: index.md + - RDF: rdf_index.md + - OWL: owl_index.md + diff --git a/prefixes/prefix.sql b/prefixes/prefix.sql index 198dda4..bc9b463 100644 --- a/prefixes/prefix.sql +++ b/prefixes/prefix.sql @@ -18,6 +18,10 @@ INSERT OR IGNORE INTO prefix VALUES ("BFO", "http://purl.obolibrary.org/obo/BFO_"), ("CHEBI", "http://purl.obolibrary.org/obo/CHEBI_"), ("CL", "http://purl.obolibrary.org/obo/CL_"), +("RO", "http://purl.obolibrary.org/obo/RO_"), +("GO", "http://purl.obolibrary.org/obo/GO_"), +("UBERON", "http://purl.obolibrary.org/obo/UBERON_"), +("PATO", "http://purl.obolibrary.org/obo/PATO_"), ("IAO", "http://purl.obolibrary.org/obo/IAO_"), ("NCBITaxon", "http://purl.obolibrary.org/obo/NCBITaxon_"), ("OBI", "http://purl.obolibrary.org/obo/OBI_"), @@ -25,6 +29,7 @@ INSERT OR IGNORE INTO prefix VALUES ("obo", "http://purl.obolibrary.org/obo/"), +("gocam", "http://model.geneontology.org/"), ("UP", "http://purl.uniprot.org/uniprot/"), ("UC", "http://purl.uniprot.org/core/"), ("PRO", "http://www.uniprot.org/annotation/PRO_"), diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..65e7d97 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +linkml +mkdocs +sqlalchemy +sqlacodegen diff --git a/semsql/__init__.py b/semsql/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/semsql/sqla/__init__.py b/semsql/sqla/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/semsql/sqla/owl.py b/semsql/sqla/owl.py new file mode 100644 index 0000000..af57918 --- /dev/null +++ b/semsql/sqla/owl.py @@ -0,0 +1,101 @@ +from sqlalchemy import Column, Index, Table, Text +from sqlalchemy.sql.sqltypes import NullType +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() +metadata = Base.metadata + +class BlankNode(Base): + __tablename__ = 'blank_node' + id = Column(Text, primary_key=True) + +class ClassNode(Base): + __tablename__ = 'class_node' + id = Column(Text, primary_key=True) + +class IriNode(Base): + __tablename__ = 'iri_node' + id = Column(Text, primary_key=True) + +class NamedIndividualNode(Base): + __tablename__ = 'named_individual_node' + id = Column(Text, primary_key=True) + +class Node(Base): + __tablename__ = 'node' + id = Column(Text, primary_key=True) + +class OntologyNode(Base): + __tablename__ = 'ontology_node' + id = Column(Text, primary_key=True) + +class OwlEquivalentClassStatement(Base): + __tablename__ = 'owl_equivalent_class_statement' + stanza = Column(Text, primary_key=True) + predicate = Column(Text, primary_key=True) + value = Column(Text, primary_key=True) + language = Column(Text, primary_key=True) + subject = Column(Text, primary_key=True) + object = Column(Text, primary_key=True) + +class OwlRestriction(Base): + __tablename__ = 'owl_restriction' + restriction = Column(Text, primary_key=True) + on_property = Column(Text, primary_key=True) + filler = Column(Text, primary_key=True) + +class OwlSomeValuesFrom(Base): + __tablename__ = 'owl_some_values_from' + restriction = Column(Text, primary_key=True) + on_property = Column(Text, primary_key=True) + filler = Column(Text, primary_key=True) + +class Prefix(Base): + """ + Maps CURIEs to URIs + """ + __tablename__ = 'prefix' + prefix = Column(Text, primary_key=True) + base = Column(Text, primary_key=True) + +class RdfTypeStatement(Base): + """ + A statement that indicates the asserted type of the subject entity + """ + __tablename__ = 'rdf_type_statement' + stanza = Column(Text, primary_key=True) + subject = Column(Text, primary_key=True) + predicate = Column(Text, primary_key=True) + value = Column(Text, primary_key=True) + language = Column(Text, primary_key=True) + object = Column(Text, primary_key=True) + +class RdfsLabelStatement(Base): + __tablename__ = 'rdfs_label_statement' + stanza = Column(Text, primary_key=True) + subject = Column(Text, primary_key=True) + predicate = Column(Text, primary_key=True) + object = Column(Text, primary_key=True) + value = Column(Text, primary_key=True) + language = Column(Text, primary_key=True) + +class RdfsSubClassOfStatement(Base): + __tablename__ = 'rdfs_subClassOf_statement' + stanza = Column(Text, primary_key=True) + predicate = Column(Text, primary_key=True) + value = Column(Text, primary_key=True) + language = Column(Text, primary_key=True) + subject = Column(Text, primary_key=True) + object = Column(Text, primary_key=True) + +class Statements(Base): + """ + Represents an RDF triple + """ + __tablename__ = 'statements' + stanza = Column(Text, primary_key=True) + subject = Column(Text, primary_key=True) + predicate = Column(Text, primary_key=True) + object = Column(Text, primary_key=True) + value = Column(Text, primary_key=True) + language = Column(Text, primary_key=True) \ No newline at end of file diff --git a/semsql/sqla/rdf.py b/semsql/sqla/rdf.py new file mode 100644 index 0000000..e69de29 diff --git a/semsql/sqlutils.py b/semsql/sqlutils.py new file mode 100644 index 0000000..629d299 --- /dev/null +++ b/semsql/sqlutils.py @@ -0,0 +1,44 @@ +import click +from linkml_model import SchemaDefinition +from linkml.utils.formatutils import underscore +from linkml.utils.schemaloader import load_raw_schema, SchemaLoader + +VIEW_CODE = 'sqlview>>' + +def generate_views_from_linkml(schema: SchemaDefinition, drop_tables=True) -> None: + """ + Generates SQL VIEW statements from hints in LinkML schema + + View hints are encoded in comments section in classes/tables section + :param schema: LinkML schema containing hints + """ + for cn, c in schema.classes.items(): + sql_table = underscore(cn) + views = [] + for cmt in c.comments: + cmt = cmt.strip().rstrip(';') + if cmt.startswith(VIEW_CODE): + views.append(cmt.replace(VIEW_CODE,'').strip()) + if len(views) > 0: + print() + if drop_tables: + print(f'DROP TABLE {sql_table};') + print(f'CREATE VIEW {sql_table} AS {"UNION".join(views)};') + +@click.command() +@click.argument('inputs', nargs=-1) +def cli(inputs): + """ + Generates SQL VIEW commands from hints embedded in linkml schema + """ + for input in inputs: + with open(input, 'r') as stream: + schema = load_raw_schema(input) + print('-- ** REWRITE TABLES AS VIEWS **') + print(f'-- SCHEMA: {schema.id}') + loader = SchemaLoader(schema, mergeimports=True) + loader.resolve() + generate_views_from_linkml(schema) + +if __name__ == '__main__': + cli() diff --git a/sql/go-annotation.sql b/sql/go-annotation.sql new file mode 100644 index 0000000..e83318b --- /dev/null +++ b/sql/go-annotation.sql @@ -0,0 +1,17 @@ +/** + +These operate over NON-NORMALIZED gaf tables, for demo purposes + +No attempt is yet made to parse pipe-separated fields like taxon, qualifier + +*/ + + +CREATE VIEW annotation_to_deprecated AS SELECT * FROM gaf WHERE ontology_class_ref IN (SELECT id FROM deprecated ); + +-- todo: include redundant +CREATE VIEW entailed_gaf AS SELECT gaf.*, e.predicate AS inferred_predicate, e.object AS ancestor_term FROM gaf JOIN entailed_edge AS e ON (gaf.ontology_class_ref = e.subject); + +-- stats +CREATE VIEW num_annotation_by_taxon AS SELECT db_object_taxon, count(*) AS num_annotations FROM gaf GROUP BY db_object_taxon; +CREATE VIEW num_term_by_taxon AS SELECT db_object_taxon, count(DISTINCT ontology_class_ref) AS num_terms_annotated FROM gaf GROUP BY db_object_taxon; diff --git a/sql/go-cam.sql b/sql/go-cam.sql new file mode 100644 index 0000000..f5eb6a2 --- /dev/null +++ b/sql/go-cam.sql @@ -0,0 +1,37 @@ +CREATE VIEW inst_term AS SELECT * FROM rdftype WHERE object != 'owl:NamedIndividual'; + +CREATE VIEW modelstate AS SELECT subject AS id, value AS state FROM statements WHERE predicate=''; +CREATE VIEW modeltitle AS SELECT subject AS id, value AS state FROM statements WHERE predicate='dce:title'; + +CREATE VIEW stats_modelstate SELECT state, count(distinct id) AS num_models FROM modelstate GROUP BY state; + +CREATE VIEW has_evidence AS SELECT * FROM statements WHERE predicate = ''; + +CREATE VIEW inferred_type AS SELECT t.subject AS id, t.object AS asserted_type, s.object AS inferred_type FROM rdftype AS t JOIN entailed_subclass_of AS s ON (t.object=s.subject); + +CREATE VIEW molecular_activity_node AS SELECT * FROM inferred_type WHERE inferred_type = 'GO:0003674'; +CREATE VIEW biological_process_node AS SELECT * FROM inferred_type WHERE inferred_type = 'GO:0008150'; +CREATE VIEW location_node AS SELECT * FROM inferred_type WHERE inferred_type = 'GO:0110165'; + +CREATE VIEW stats_molecular_activity_by_term AS select asserted_type, count(*) AS num_nodes FROM molecular_activity_node GROUP BY asserted_type ORDER BY num_nodes desc; + +CREATE VIEW edge_with_terms AS SELECT edge.*,st.object AS subject_term,ot.object AS object_term from inst_term AS st JOIN statements AS edge ON (st.subject=edge.subject) INNER JOIN inst_term AS ot ON (edge.object=ot.subject); + +CREATE VIEW enabled_by AS SELECT * from statements WHERE predicate='RO:0002333'; +CREATE VIEW enabled_by_ann AS SELECT * from axiom_annotation WHERE predicate='RO:0002333'; +--CREATE VIEW enabled_by_flat AS SELECT * from rdfslabel AS sl JOIN enabled_by AS edge ON (sl.subject=edge.subject) INNER JOIN rdfslabel AS ol ON (edge.object=ol.subject); +CREATE VIEW enabled_by_with_terms AS SELECT * from edge_with_terms WHERE predicate='RO:0002333'; + + +CREATE VIEW occurs_in AS SELECT * from statements WHERE predicate='BFO:0000066'; +CREATE VIEW regulates AS SELECT * from statements WHERE predicate='RO:0002211'; +CREATE VIEW part_of AS SELECT * from statements WHERE predicate='BFO:0000050'; + +-- QC + +CREATE VIEW problem_rdftype_to_deprecated AS SELECT * FROM rdftype WHERE object IN (SELECT id FROM deprecated); + +-- suggest a repair for use of an obsolete term +-- sqlite does not support stored procedures but it would be easy to do an UPDATE for this +CREATE VIEW repair_rdftype_to_deprecated AS SELECT t.*, r.object AS replaced_by FROM rdftype AS t JOIN term_replaced_by AS r ON (t.object=r.subject); + diff --git a/sql/idmapping.sql b/sql/idmapping.sql new file mode 100644 index 0000000..828b17d --- /dev/null +++ b/sql/idmapping.sql @@ -0,0 +1,5 @@ +CREATE TABLE idmapping ( + uniprot TEXT, + db TEXT, + xref TEXT +); diff --git a/sql/indexes.sql b/sql/indexes.sql new file mode 100644 index 0000000..8dc0120 --- /dev/null +++ b/sql/indexes.sql @@ -0,0 +1,3 @@ +CREATE INDEX statements_spo ON statements(subject,predicate,object); +CREATE INDEX statements_spv ON statements(subject,predicate,value); +CREATE INDEX statements_p ON statements(predicate); diff --git a/sql/obo-checks.sql b/sql/obo-checks.sql index 3a02ac7..837ed7d 100644 --- a/sql/obo-checks.sql +++ b/sql/obo-checks.sql @@ -17,3 +17,9 @@ CREATE VIEW problems AS UNION SELECT 'shared_label_problems' AS problem, node1 AS id, value FROM shared_label_problems; +CREATE VIEW edge_pointing_to_obsolete_problem AS SELECT subject AS id, predicate, object FROM edge WHERE object in (SELECT id FROM deprecated); + +-- https://github.com/ontodev/robot/blob/master/robot-core/src/main/resources/report_queries/deprecated_boolean_datatype.rq +CREATE VIEW deprecated_boolean_datatype_problem AS SELECT * FROM statements WHERE predicate='owl:deprecated' AND datatype != 'xsd:boolean'; + +CREATE VIEW bad_boolean_datatype_problem AS SELECT * FROM statements WHERE datatype = 'xsd:boolean' AND value NOT IN ('true', 'false'); diff --git a/sql/oio.sql b/sql/oio.sql new file mode 100644 index 0000000..0b01b83 --- /dev/null +++ b/sql/oio.sql @@ -0,0 +1,41 @@ +CREATE VIEW hasExactSynonym AS SELECT * FROM statements WHERE predicate='oio:hasExactSynonym'; +CREATE VIEW hasBroadSynonym AS SELECT * FROM statements WHERE predicate='oio:hasBroadSynonym'; +CREATE VIEW hasNarrowSynonym AS SELECT * FROM statements WHERE predicate='oio:hasNarrowSynonym'; +CREATE VIEW hasRelatedSynonym AS SELECT * FROM statements WHERE predicate='oio:hasRelatedSynonym'; +CREATE VIEW hasSynonym AS + SELECT * FROM hasExactSynonym UNION + SELECT * FROM hasBroadSynonym UNION + SELECT * FROM hasNarrowSynonym UNION + SELECT * FROM hasRelatedSynonym; + +CREATE VIEW axiom_dbxref_annotation AS SELECT * FROM axiom_annotation WHERE annotation_predicate = 'oio:hasDbXref'; + +-- we assume domain of hasSynonymType is a synonym axiom +CREATE VIEW hasSynonymWithType AS SELECT * FROM axiom_annotation WHERE annotation_predicate = 'oio:hasSynonymType'; + +CREATE VIEW hasSynonymWithTypeXrefs AS + SELECT s.*, GROUP_CONCAT(a.value) AS xref FROM hasSynonymWithType AS s LEFT JOIN dbxref_axiom AS a ON(a.subject=s.subject AND a.predicate=s.predicate AND a.value=s.value) GROUP BY s.subject,s.predicate,s.object; + + +CREATE VIEW hasSynonymWithXrefs AS + SELECT s.*, GROUP_CONCAT(a.axiom_object_value) AS xref FROM hasSynonym AS s LEFT JOIN dbxref_axiom AS a ON(a.subject=s.subject AND a.predicate=s.predicate AND a.value=s.value) GROUP BY s.subject,s.predicate,s.object; + +CREATE VIEW has_dbxref AS SELECT * FROM statements WHERE predicate = 'oio:hasDbXref'; +CREATE VIEW skos_exact_match AS SELECT * FROM statements WHERE predicate = 'skos:exactMatch'; +CREATE VIEW skos_narrow_match AS SELECT * FROM statements WHERE predicate = 'skos:narrowMatch'; +CREATE VIEW skos_broad_match AS SELECT * FROM statements WHERE predicate = 'skos:broadMatch'; +CREATE VIEW skos_close_match AS SELECT * FROM statements WHERE predicate = 'skos:closeMatch'; + +CREATE VIEW mapping AS + SELECT * FROM has_dbxref UNION + SELECT * FROM skos_exact_match UNION + SELECT * FROM skos_broad_match UNION + SELECT * FROM skos_narrow_match UNION + SELECT * FROM skos_related_match; + +-- TODO: autogenerate these for all prefixes +CREATE VIEW rhea_xref AS SELECT * FROM has_dbxref WHERE VALUE LIKE 'RHEA:%'; + +CREATE VIEW axiom_dbxref_annotation_inconsistency + AS SELECT a.*, x.subject AS subject2 FROM axiom_dbxref_annotation AS a JOIN has_dbxref AS x ON (a.annotation_value = x.value) + WHERE a.subject != x.subject AND x.subject NOT LIKE '_:%'; diff --git a/sql/owl.sql b/sql/owl.sql index 0f12391..7d960e7 100644 --- a/sql/owl.sql +++ b/sql/owl.sql @@ -1,5 +1,48 @@ -- #include "rdf.sql" + +CREATE VIEW owl_ontology AS SELECT subject AS id FROM rdftype WHERE object='owl:Ontology'; + +CREATE VIEW owlClass AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:Class'; +CREATE VIEW owlNamedIndividual AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:NamedIndividual'; +CREATE VIEW object_property AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:ObjectProperty'; +CREATE VIEW annotation_property AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:AnnotationProperty'; +CREATE VIEW transitive AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:TransitiveProperty'; + +CREATE VIEW punned_class_as_individual AS SELECT id FROM owlClass WHERE id IN (SELECT id FROM owlNamedIndividual); + +CREATE VIEW illegally_punned_object_annotation_property AS SELECT id FROM object_property WHERE id IN (SELECT id FROM annotation_property); + +CREATE VIEW axiom_annotation AS + SELECT + axpv.stanza AS stanza, + axs.object AS subject, + axp.object AS predicate, + axo.object AS object, + axo.value AS value, + axo.datatype AS datatype, + axo.language AS language, + axpv.subject AS id, + axpv.predicate AS annotation_predicate, + axpv.object AS annotation_iri, + axpv.value AS annotation_value, + axpv.language AS annotation_language, + axpv.datatype AS annotation_datatype + FROM + statements AS axs, + statements AS axp, + statements AS axo, + statements AS axpv + WHERE + axs.predicate = 'owl:annotatedSource' AND + axp.predicate = 'owl:annotatedProperty' AND + axo.predicate = 'owl:annotatedTarget' AND + axs.subject = axpv.subject AND + axp.subject = axpv.subject AND + axo.subject = axpv.subject AND + axpv.predicate NOT IN ('owl:annotatedSource', 'owl:annotatedProperty', 'owl:annotatedTarget', 'rdf:type'); + + CREATE VIEW subclass_of AS SELECT stanza, subject, predicate, object FROM statements @@ -34,10 +77,6 @@ CREATE VIEW subclass_of_some AS svf.restriction=subClassOf.object; -CREATE VIEW owlClass AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:Class'; -CREATE VIEW owlNamedIndividual AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:NamedIndividual'; -CREATE VIEW object_property AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:ObjectProperty'; -CREATE VIEW transitive AS SELECT distinct subject AS id FROM statements WHERE predicate = 'rdf:type' AND object = 'owl:TransitiveProperty'; CREATE VIEW predicate AS SELECT distinct predicate AS id FROM edge WHERE id IN (SELECT id FROM object_property) OR id='rdfs:subClassOf'; diff --git a/sql/rdf.sql b/sql/rdf.sql index da0f246..12be413 100644 --- a/sql/rdf.sql +++ b/sql/rdf.sql @@ -2,6 +2,14 @@ CREATE VIEW node AS SELECT distinct(subject) AS id FROM statements UNION SELECT CREATE VIEW bnode AS SELECT * FROM node WHERE id LIKE '_:%'; CREATE VIEW iri AS SELECT * FROM node WHERE id NOT LIKE '_:%'; -CREATE VIEW deprecated AS SELECT distinct(subject) AS id FROM statements WHERE predicate='owl:deprecated' AND value='true'; +CREATE VIEW rdftype AS SELECT * FROM statements WHERE predicate = 'rdf:type'; CREATE VIEW rdfslabel AS SELECT * FROM statements WHERE predicate = 'rdfs:label'; + +-- move to owl? +CREATE VIEW deprecated AS SELECT distinct(subject) AS id FROM statements WHERE predicate='owl:deprecated' AND value='true'; + +-- move to OBO/IAO/OMO? +-- NOTE: most ontologies use OBJECT for replaced_by, and VALUE for consider?? +CREATE VIEW term_replaced_by AS SELECT * FROM statements WHERE predicate='IAO:0100001'; +CREATE VIEW term_consider_replacement AS SELECT * FROM statements WHERE predicate='oio:consider'; diff --git a/sql/rdftab.sql b/sql/rdftab.sql index bc108f7..cb8f87f 100644 --- a/sql/rdftab.sql +++ b/sql/rdftab.sql @@ -1,9 +1,9 @@ -- NOTE: these are created automatically if you use rdftab.rs -CREATE TABLE prefix ( - prefix TEXT PRIMARY KEY, - base TEXT NOT NULL -); +--CREATE TABLE prefix ( +-- prefix TEXT PRIMARY KEY, +-- base TEXT NOT NULL +--); CREATE TABLE statements ( stanza TEXT, @@ -12,5 +12,6 @@ CREATE TABLE statements ( object TEXT, value TEXT, datatype TEXT, - language TEXT + language TEXT, + PRIMARY KEY(subject, predicate, object, value) ); diff --git a/sql/relation-graph.sql b/sql/relation-graph.sql index 0511fca..e2df421 100644 --- a/sql/relation-graph.sql +++ b/sql/relation-graph.sql @@ -1,6 +1,24 @@ -- #include "owl.sql" CREATE VIEW edge AS SELECT * FROM subclass_of_named UNION SELECT * FROM subclass_of_some; + +-- todo: add graph param +CREATE TABLE entailed_edge ( + subject TEXT, + predicate TEXT, + object TEXT + ); + +CREATE VIEW entailed_subclass_of AS SELECT * FROM entailed_edge WHERE predicate='rdfs:subClassOf'; + +/** + +Note: the queries below are slow + +instead it is recommended you use relation-graph to materialize inferred edges + +*/ + CREATE VIEW edge_composition AS SELECT e1.subject, e1.predicate AS predicate1, e2.predicate AS predicate2, e2.object, e1.object AS intermediate FROM edge AS e1, edge AS e2 diff --git a/src/schema/basics.yaml b/src/schema/basics.yaml new file mode 100644 index 0000000..31214da --- /dev/null +++ b/src/schema/basics.yaml @@ -0,0 +1,23 @@ +id: https://w3id.org/semsql/basics +name: basics +license: https://creativecommons.org/publicdomain/zero/1.0/ + +prefixes: + basics: https://w3id.org/semsql/basics + linkml: https://w3id.org/linkml/ + +default_prefix: basics +default_range: string + +default_curi_maps: + - semweb_context + +imports: + - linkml:types + +slots: + id: + identifier: true + + + diff --git a/src/schema/owl.yaml b/src/schema/owl.yaml new file mode 100644 index 0000000..2115609 --- /dev/null +++ b/src/schema/owl.yaml @@ -0,0 +1,89 @@ +id: https://w3id.org/semsql/owl +name: semsql_owl +title: Semantic SQL OWL module +description: >- + OWL Module +license: https://creativecommons.org/publicdomain/zero/1.0/ + +prefixes: + semsql_owl: https://w3id.org/semsql/owl + linkml: https://w3id.org/linkml/ + +default_prefix: semsql_owl +default_range: string + +default_curi_maps: + - semweb_context + +imports: + - rdf + +classes: + ontology node: + is_a: node + comments: + - sqlview>> SELECT DISTINCT subject AS id FROM rdftype WHERE object='owl:Ontology' + + property node: + abstract: true + object property node: + is_a: property node + comments: + - sqlview>> SELECT DISTINCT subject AS id FROM rdftype WHERE object='owl:ObjectProperty' + transitive property node: + is_a: object property node + comments: + - sqlview>> SELECT DISTINCT subject AS id FROM rdftype WHERE object='owl:TransitiveProperty' + + annotation property node: + is_a: property node + comments: + - sqlview>> SELECT DISTINCT subject AS id FROM rdftype WHERE object='owl:AnnotatonProperty' + + owl equivalent class statement: + is_a: statements + comments: + - sqlview>> SELECT * FROM statements WHERE predicate='owl:equivalentClass' + slot_usage: + subject: + description: >- + One of the two classes that are equivalent. No significance to subject vs object + range: class node + object: + description: >- + One of the two classes that are equivalent. No significance to subject vs object + range: class node + + owl restriction: + abstract: true + slots: + - restriction + - on property + - filler + + owl some values from: + is_a: owl restriction + comments: + - >- + sqlview>> + SELECT onProperty.subject AS restriction, + onProperty.object AS on_property, + someValuesFrom.object AS filler + FROM + statements AS onProperty, + statements AS someValuesFrom + WHERE + onProperty.predicate = 'owl:onProperty' AND + onProperty.subject=someValuesFrom.subject AND + someValuesFrom.predicate='owl:someValuesFrom' + + + + +slots: + restriction: + range: blank node + on property: + range: node + filler: + range: class node diff --git a/src/schema/rdf.yaml b/src/schema/rdf.yaml new file mode 100644 index 0000000..370171f --- /dev/null +++ b/src/schema/rdf.yaml @@ -0,0 +1,122 @@ +id: https://w3id.org/semsql/rdf +name: semsql_rdf +title: Semantic SQL RDF module +description: >- + RDF Module +license: https://creativecommons.org/publicdomain/zero/1.0/ + +prefixes: + semsql_rdf: https://w3id.org/semsql/rdf + linkml: https://w3id.org/linkml/ + +default_prefix: semsql_rdf +default_range: string + +default_curi_maps: + - semweb_context + +imports: + - linkml:types + - rdf + +classes: + prefix: + description: >- + Maps CURIEs to URIs + slots: + - prefix + - base + + # -- triples -- + statements: + description: >- + Represents an RDF triple + slots: + - stanza + - subject + - predicate + - object + - value + - language + rdf type statement: + is_a: statements + description: >- + A statement that indicates the asserted type of the subject entity + comments: + - sqlview>> SELECT * FROM statements WHERE predicate='rdf:type' + slot_usage: + object: + description: >- + The entity type + range: class node + rdfs subClassOf statement: + is_a: statements + comments: + - sqlview>> SELECT * FROM statements WHERE predicate='rdfs:subClassOf' + slot_usage: + subject: + description: >- + The subclass element of the triple + range: class node + object: + description: >- + The superclass element of the triple + range: class node + rdfs label statement: + is_a: statements + comments: + - sqlview>> SELECT * FROM statements WHERE predicate='rdfs:label' + + # -- nodes -- + node: + slots: + - id + comments: + - sqlview>> SELECT distinct(subject) AS id FROM statements UNION SELECT distinct(object) AS id FROM statements WHERE datatype IS NOT NULL + blank node: + is_a: node + comments: + - sqlview>> SELECT * FROM node WHERE id LIKE '_:%' + iri node: + is_a: node + comments: + - sqlview>> SELECT * FROM node WHERE id NOT LIKE '_:%' + class node: + is_a: node + class_uri: owl:Class + named individual node: + is_a: node + class_uri: owl:NamedIndividual + +slots: + id: + identifier: true + range: node id type + subject: + range: node + predicate: + object: + description: >- + Note the range of this slot is always a node. If the triple represents a literal, instead value will be populated + range: node + stanza: + range: node + value: + description: >- + Note the range of this slot is always a string. Only used the triple represents a literal assertion + range: literal as string type + language: + range: string + todos: + - use an enum + prefix: + range: ncname + base: + range: uri + +types: + node id type: + typeof: uriorcurie + literal as string type: + typeof: string + diff --git a/src/schema/semsql.yaml b/src/schema/semsql.yaml new file mode 100644 index 0000000..dcbfa0e --- /dev/null +++ b/src/schema/semsql.yaml @@ -0,0 +1,20 @@ +id: https://w3id.org/kgcl/semsql +name: semsql +title: Semantic SQL +description: >- + A datamodel for RDF, OWL, and OBO Ontologies designed to work harmoniously with SQL databases +license: https://creativecommons.org/publicdomain/zero/1.0/ + +prefixes: + semsql: https://w3id.org/semsql/ + linkml: https://w3id.org/linkml/ + +default_prefix: semsql +default_range: string + +default_curi_maps: + - semweb_context + +imports: + - rdf + - owl diff --git a/tests/inputs/go-nucleus.db b/tests/inputs/go-nucleus.db new file mode 100644 index 0000000000000000000000000000000000000000..030cb345eb005a29b7d0f92cedaff4f7035ec778 GIT binary patch literal 163840 zcmeHw34Gi~b^q_Dm418g#&I0SaU8Fr$g*PVSgoX$Y|EA`%Z_}>wj3Y1HmlvWyzy#R zXjitCmW1CX0saMAjzT$F3Z?vkLfUe)lu}x*0Hu^tptO`0NZYg=y(s@45E}mP&F`3* z-?fsR*o2HfAKTLIyf<&YGxKKV&3kXAj~*=63W+n7g}H1kk&>>FWLX+bBqT{{hrgTQ z?rHZgX5B5`7J;$*_<`^4CEVtV4>#Q5<+eGGADzEp=BH zXYMcLYWU?sp;}ug=4!=Cd21-a{9%e$%+i%|W?pB)mquAn5K;Gnq}`O_OtDldEO4Cn z9-BI3d{C`rYlXQ&xmHDCXq_f^z5rj1#^mJf6UQbDew0J^{6ZmL%mMqvNTLI<*+#$X zNQ@oc<3OV0GQw9w;i5#A5@uZ^q$d8`4z(}gUo$ciP=5xyHd`)Npw$4liDjl-$rs!{ zXS>*~4AaefCZ~^29)|vAI=PrnAn1vO{F&O)d?7&~OK6>>Nj#m?JJhzD$t+UmP-?A(yu?nQvC=wyf7y~wkr_m# zy!A%^nl)hknN2TcX9}Q{4TVJ-Wcke2 zV9WQH&{o2sLv3E`Uz1D*)U+;62K$N=#`1|p6w9|O7GxXN*AaZ1^y&t{UE!})>01BV zEn8$YUCYjt3b+I^E1KokTXj&5kL^AGZSGYBkmBUo$xwPz%`U*p|Wz7K^YUfaNli!&yiNfcLlTVDK)vvVj4-wo#dg zV(3r<8~kfJI&`H`Dc35c%Ip$Z;kaKonb=Z{2$!~&P3hb^GVA?oT3Z8Z6!DuWWy|M? z81yGr2Kp%@fUc2~_3{<)YCl5>{Ix^eZJ5*oTzc!tg)pj!a%GtR>K2t2`i|32^ z#cYYx=J??TZX`Mu!SM){;gB;GGLFY1$0iSr9Xp*kFmXDuxtQ+^bxuZeC-7=R9{Ym+w2lLke!hJ4QUPV{zE(H%D922a{(NKnf22381g9%j zwl4b!WDg%AkeRDhmHyPsK}J6Z$h7T0d{llQt0_C)S5jnze zpO~8smLJT|*~RQ^fh)<3c4P0eHDT7gRo}yIO^`Y|iDaFrX0INl4bCL#&n@@K{FB1&Q;|7^t<~nM+j@F(=kqh&cs9=$&Ufc3b3OQF&(>Do>eH2lS=UnVUAA0&fLOA7 zHYI$krt($S!notF7HU1WZS<`@I$k+3Iqq6CTdoynD*2_JuA6=Blf^=QrVH*IYK3AM zNkCrws2ghUsbw!#%Ep&HYd84TbWKj|*=;SByvesVGj;g*JD07Ut&~aKU@EFCF60V5q*UA18v(5&$ENh+7cN}rUMv?; zZc>`s?R%h5=9YE7Rr~HB5I}q87Z*xg>0D)@(9?c{Z`GkYj_3?+WpwU6o7Vc)PR>?x z**U%B9CTx;d$v$67;PdFv|sOAeV{U9w1@D6UrWWAh3vu-DFVOtbgc2MI(p<7TdPB< zBgZnG*ZEdYPwk#;P!dXH23GqvW)6?NW8$!NSslr8Fgu=z_+JMAz*Tg(vB!N+?#kYF4 z0`pSS3OMC#qh7l=2N`E7ljW%krM6p}eXA!yVut8KiqzlN+uPUEo5tp}b@+U%r!H3W zj-`@4rw$#Q&YdmHW!ttk`PQ7on*hg>y*<6j9{9Gas+CU4zSTMS-Cj63JUrY3_4oC4 zk@8Dr&~07iYFh|as9R!Rmg2u2e`EYLv44&~3b*wB_+9bo_%-nt#1F=AjlVqpKzwUF z75_r~GqES*?}^_S|D*V}*nh?M#Q!SxxAAYr-yi=${NH0=j(_XsEw@S`e6G7ivnV1GQUgK+*S;ODW zNP?!l4OoVl=x)NFi0y4XvH`2wjmYMX@pa_CBRAmp#4fg$R5Me#9=|wpYz_VjW~u9< zzKybJZCZ_=f+^`*tO?CTt3tkYvSt9f1_}2~#qqymQ!)H+{9u$wir1h5PkJ^b4L1qJ^H+eHic|3ax5{|AGE4++qo&;&-1ApFn*-4uv_ z2|Mfh_*3!!ihnQu-T0I7Z^XYE|5D^-k+tEMgx7^$9sG8%82EAEm4U7P&-!QmVddS* z_Le7F9%{L%`J>Henp=GD^-VN=scEihwfqP2Sve#<_N?^&U7G_VBkfX0s|5CzZL^iG z8aS7?%~qDoF@ZY72xmv^{#K~b4Y8Hh*Z{+036Fs^Tgf^!3O@BlEVGdeq_iVq zUOY{F_Dzi?vgJH@PJtm8LNzRN>(tF2L(02?25{WDAuu-PHl)y83$Z6okZ9j{!;-xT zc(+SioMBl8ZwilA*gkUZTMt#Zjhy9(Pn^{ah2W%kPTd&TwaX14G#k&99PrV;@P?Pb8~QWL>2VCH$Z)ESS_c%Mnh?> z$56__=q~1+W|#hrH&)r}J%-Y9cvG}AK^%tC8jqp0yeb+1G;>{Ga?))mWvdYSH4DL5 zi%!~``PLh#xz$idyOeSqR16n~G0S`lb(nySQNiq>oxK*Sa~o3(!(~+00K5lR1quZR z@L=HP&Smly@NBMS=CWmQ>n<&2^!wZbU$1vrZ(!9;*MI=+(wr4pZ>B30z(oBxT(u>j zNr_q|F;hsuc07+kripxU0RscIhQg8oMaTs|o#twD90YaflV@1a2K`lv1*WGP_17}G z$tcpGccM|Ku3gG__Kv~Jqf?9`>iiP*=B+;I}-u;+G-xlpX)ue1R=n zHeduiGYY!l7-spEm0`*VGHL?a&Rj!UJk*^pEL5T5$zziYBuw7csjbJJqRJ^)=68^omjt#g*eGvYQs&~M@5w#Qk4Lt<^_>aIp z%I2o{Yb1YkR}=gai$`Ps7_CQl#lIYXQ+zV=rD!TVAO3ixFZ3&+C&HV8e;v9pbTSwY z?hn2;aC_jTfyezX^M5*^C?8OM;2%|FWkgwM+1v6+%SW0YZvI$HlkcO=QuBzf#W(Kz z*{1zXFKzmm{D}OCrWWZf(i8FscoV!Rjv_VqGpdZk;FTG4+UCw=%7sF{3W0bS6~uPI zn7N)9TjK3^DdX+ZYy$@7L>>n>veHtmm`h|KWc_@xwv-@FBMI1;Vx@`7nMAf$11kdb z+P1yQIOxcRyc^8W;hhR@&^b0>(;(&p3@twA-NKKmT1Z}VsZ8XLX)pwoQ-5t}AZn@!+smzzCA*Q;$PddGGJ&5}-iIN!+rxT#-3 z^P@X06`R@_+s70%E_x9)7%;c)QP7C!#DTT7d4t}zS3w)$GMJc+aIyj3vR^^N-?G>= zXjZFxPc$^(C3a#@IL-o%)@+2Mp>CslpBUr~gumHc59wAc@9x;(u7@Z+Z3J&lyUAS- zvBV9Uu-3ZaTcxgd?-7=`L658%SJ2$#1Sw;h@$Q!E_9$phYRtoIii5pq^`wGEqUA7Y(4=dR zDrf|R#Y8u{k>4gSuQ^l-5$vu&YRnT0ba{OKRYbC~i63F?SZ)gZdCH?fFG8!WFBD5(IYC3i;03W!K?@&57 z!2j1>(hdsK|JToM4QJM!;{M;;hfUo7 zv;Kzs5wzjx68Hb&{vUH(;+f8VO{I$a|0Qw%FYf>G;BHt;c8L4`OS=CjUh%{oKr=T@ z?Vj38()*v#oDGuQebg02JO95;ioY|yKlWto6|pVR&qPbnSmeEt)8VJWZw~Jb{bT5% zP0ww=<{SW)EQ{JhJw|t{zvH5qJkNUpndzmlU^v$MUY)Z+0D?cg^NKZ;{ z1GB<2^=I@*prZp!gW!vyHApkXaKCVt4Vx3d;VIzIDvf}{gck?H9|U{=Fu!633{;Q; z7py*og+e(8sZWhify0Lb0|Tr?V)<}~mhZ$N;MXc8P5C%l7k;|5 zU{B0sY4w>R<7agIU|{=pQ%>FDcpMfXV~QbbhmXCb%^U!(trB{RJCnP73|w`Y;T{=n zpIXjnutt(c11P-v_Xm?njB9fA3W_K#wsoS0W zsmX;s-V^1{74odJ3ipM#sQG=wpV|_)z%lURY+CeB5GR8QxE7twE|xU^6)OL|d!4+f zS5QlX^XANilasLB49Yge{;fAs>d2lTdI_sXITxrlJD-NNn5Z4c+%SpULILgJiJ7GY zey|TMx0*>hUm@(OC%JzVD$!KrWmEzU(dYNH2|+@KfOgN^F6^G|m)Rgvm%V#5)yFE~ zGdlx^4r%I+YZ^GZW-4dk?xYBrzKN$IkD&c~?|yn{1nOy(rujJot-q9dbQpYJ3s&jTrW|83f_21XAnEmhq%rdb zklfkh5nu*-7~M4;K+(VPh9%qW5n#3)-X=79VZ%R?f^%P&M}XP#s%QYviLD-EhInH- zz+-r6dIU=6wvaJn*D^Q_8}T)PadaQ0@Z5cy$Jl9HNh4@Fy`X6j`tAWuNWMo(O0SXQ ze-VF2{3Y@G;#*@sj(tA%8_^Sx@&DVA$0FrOIwFNX9e#Vb7*2+M82WhV(a_P*`rwnn z-wgho|9AZ_Z8_JxsrjDf*ED}Zey#jD`RS(CrqfNYZu$av{_pU8#aHwFPo9zqL)OXBi(1CT;^=b>MUk9wp#Y4qhya=uwS(aKrBd zeudnnnuAQK1U@U6MYcLu?M_UVNgxhJ=YS6jyl*p?P0YdV5WMoGZR4|L*sNcKeRvs| zz>yy&B>1Y3ZM;z#c$rie(UnHyg8nv%3lOqLL?FL`=S`smo7isY51^Stm*Pt@sI;b~ zEwuf8`iA&e0Z$zHX>AW#Ud+LRM7gC}g|tO_QxeH^4;OS@aVCRpnMP5Q-pKmIm=wBC z4EbGp1Ng(V?~*LC6wA|O{d1U(nzN(y`XQhjTLGQ!sVV)^2oN4v0U>*VF1@ZFsE1cT z4NlI|YXR@}k(ChPe3O20CD73QjP#mL1)`EV7)U|2>-*^}kzG8-@nt9J*S09F5ZTU`Av;0x z{nQ!FF{;0RuwQyB0MsFEXUggW(&`j7$%+4~14?U3>avru4sg(7ZN5REC^kiUa}QEE z3}cqc9K>_duRu4XSdffi-vFdv_A5(LFVpST$w0tnCPQkc0(xnRHYa7MC5VY5Qyvb%1}Xu0+7$Q+rtrT{6K_6s=R}F$&?^B#jOPw2}*5i0mf+BNyGP7M(AF zFJfZ&npVLBwE`s_>H(gM<#XlAg>rYo6da1<2xTiaL&#|@&^#P%+AFL$+#u+-T!Bxt zw9F{S1@t>TpDlu+q67lYRA8#OHCTg!729YLv=#X1SV5|JM3-wWRx_8)7c^gCh)aYQ zH*?Tjd}Z_So~PMXKE`b`W!N1!6MpB^O$W>#`Le&2^b5i;JMI(wzEm+%du#!<>MyPk`vwB|rfd zPYY|Xy?QU654e%BYqVLFsZ)LiR?w6*X0OdkhAWL-*q;RqLBra~txBJ2*iqC+6e8?eV#If9!u_kH_8`dr|Co zQ*9r{q{k&9Ab}t(q8g`ZI5Y2!OfKM zemHndU|iAsSrSV_P9KIb2Oi+eoU=;w^Bi8;7z&A}@=c{rUw z><}DwYsEao6B(x)_^ANqBRJ=5O)S7eKvhUrryWlSXE3K!&q4l;8oa}kn9x$UXeSwB zWuX?MZrN`gL21X)h9gLN3RcWkX^i5dl_ev{ut9af?i8Nbg(MS*7Y-un|5*nSlJSE` z`jK@Iktx$~5J~@O9S<5E9S>>gzpUfIn2oI3G1hs;0P)|WxX_O?t<`5SyU?4>hOKf5 z4hL+z|L1blfLbMkG0jxZtiG2s7>y2sOMM7WgFy*r3QTHMS;W&tsW=Z0D$l@KV+kS! zjpkkYIK^YQF;*LlMd^puu{h{77RhE&`j>VbiyJ9CT3Ip{>CpRo>(IkemZA5r)}c4J zEd3?@gH?Zd(dqh2dXhbqqjG9Qr0?{le_V-_bgM7@lXZe<719YZD1FO1K}eB?3RC)a zA8ut+Oc!v=91Sxhqm`(F@38g-v-wb!>&loMNDU7U4kS}a>AUrt$&NJ2)cB0diAx{d z#>j4@g&`6Y<^dS|%nM#~5uvvDEIcS{+P`@{tKL5-P_Fmk>+XwsK z-Kw;9N^3L(Ng3S!xhE%jSesp8aohDh`_9+-J+)f>6@FOT$^Ej#R`q? zU{hrErKKm>i4C^gRKL^2hVcNs{3U1w?2&Z|&9Y=->+7TRNv@@O8OyZ(GMupCgWAw$|7puLK|>K)n}PHhR+TKLN<)?WF{?_JL#SVe ze0HT3>ShD!PtZ0pX^a!>t_I*Rb_3~itOBFb8ioU4CjBX^z|10z71-zDvMD9CGeb>R zVC)KyI|IW2Ed3eSlR6}Qhsf5X=`X7C#(^qh4A(EPTF*{J4ZZZtpYH7&#>durr9ZEe zZRw+IrN%UaZRwb~M%#B@xIZd*PfQ={KQBosp2lyZ|%prGRVP3OX8Y?pd_cJ9l?Fd#y zWL_Kqo{1D_bpL>sFS?GO3R%e8Llix!R5hjHh&nb&!wdI#u?r2%l{JG|rHcvFmz7P3S zUtiOcO|Nenm%k^!8Q4GX{_J@u(26$0R*5e9^e0_cx|k2CPsz~-flCLb*rv4fC!LGy z{*SR>QkF+vNLcQ>G?vsRN;yvdEN}#~t|v#^vfyaprA!a}3}Fhc`?S6ECzrK`Bo;BP z%?z^#mch(>rch>wmVg-;+dU=YJvHp6Y|3u%BCvzRB<}3YD`>bAqI7#N03GZMBv*i3 z6d)=VW|Oe{Jn|X4G(P$+Wo!|7?7BoAB)Qh^+GU8L@6PsBk;j2c6NB9!?p#1d=rv4- z9Q_Fgp9i+EI~{!i(vMssJuWh%^OwPa?8e6`$l((Dgx(uIJX;O|54dM`9-L&U)^3Aq z=FP3Ra(>QSIa6h_dkzK*iLRi-oc_db$xz8sQy=BAzl1??R!4L=Nsx;Wr^^xGYwqOu ztCpd2nEDxcF?SWrkM5Ma?gvV^rE#Ndcw@7pXk?vc>SKOntF~V#jh@Bo@kt(rg!z$M zvYF2*jN`Mw0q%Mla9~#};6llZiFvt1$#xWo^qyqat1C)9oCj(!Ke$y& zhmn3=M@hHkVEhsPd0L43$s8y4(D|J+f%bMvtLq~|O@H>FAI-AirDpT6r_5h~n+Jzo z_mS|@jaopYNo3`qyYrae)8_BY7){{T7G|CC7GUIFpa+YtCV;s%@${yLVFtD%@G=#x zzVDvEx^-Ig0+Nsb)?$M^W&7`DV>Drw>rSZ{*rX3~<`Zx6kK9FKG`lqMm%&SK3hEX( z=9;0Tv_(7bWC2BvOjsHD*;}HK(=4Da^sE%fo>qV4R3M!;0R?h?JE-Hsf9NgH>65^* zl{||AU9@bDjEGc96M=#uwo(y~A7|s3q4mI&3u&i}xsSa?y>FVtF%Pq%a^&t@Eh1~~W*I%1Kb8C7F$=Wp zDM;tee&j9h^id16EU7a$M$Q5zowWBpx&N2vq~_Ox5C1T{OfEQ>ny>UNF0rCM3%&>| zxZ{n zm`BG@#Y|eOVpWX8y#!;8lj=@NYCzZFnv&W#H4ZPFX2AI+i^&6|hjrgJG6%HlL{@v) z_r(F00R82LaJYDEWBnF_h7a^8;M@i;?BKJu#3fIAlSf~3=&L%-7QR|B4?$Wu{{&Py zWSpCq0z}QoJL=u&1pkO0Cq?z1)gnRst4;rPxalBQC1ejwZeB>@2jV*pTl2+Io`h03 zyjg};Cc~c{)PeDG8D6MBL_{{f6(S`doiQXSh6|b^@F6bql?AYfoIwY05+y$mUP*9C z!xSi7gxtn(&jRU>A?XZBB>@o@z@%KbKv;Aq_Gpi$(K_L}r@}PLxPXCd&*(msUnKrg zw0Pp^c(**l-acq63$xg==W{MDTHfioNL%R0EeZIV?HYq?%V};IbFGYIO)*};(}ga` zqwEE5195G6v*a=M%7jwut{XU{=gU@ zaVP4rQyfgoTP->OXk4eIW+fmGP|2;9>Fh=4Je_Z`OlL0+_UYVana(=3Ih|p{bE{=? zYDKiOqDQQx++M$xct)+$_nA1BnFY;lhl^{-^Cw(S-o)t!bT)F-4Y((j*DkwkTE1Zq z>XCbxJw9FPc|BsdQ`%iMg-IUjsU0^}CiEt|pN_T=92# zH8)(1B8`XZb%${+$uNNFyGC{`aY4@RXDhG4`~TY@U0>{Vu`{t)^zq14Boh8y`1Rq5 z(Dy^{44n=22mdPg;^1)L2Z6@}j|Rs4Py0XOull={?}dWs-=Fwi z;NrxoLH<%lu?&@3*K(< z{zGz?US2qH- zkjuasA+AHIN8%)#)_^jbRBr}mkm10SnME;k$qp#5adiXXMg2fv*3+(YQ1a%%WR7w< zpx#8dQ0EjUF5br#%5A^89=Jg!63_PW*BGv9cS609F!OF8&&+Hhb6GX0t^+=-!;Gzo zro#+;Lt8VZ-T;KGM}(cwJuiUf$Gclyi#!-_nU(M`+*H)-kqP6cvJxhopNhH$IWdkZ zE8&EWD(ZE}fbmpW5d-v88By>?iVQ{ft8rYB(^N})(;F+YK9l)@KBdM`7~ZLjZvZEZ_7<4^ znt?)cT8#q74hb@(>RQAu9=m#34VLB7MZ70Qt3(E@jj1H#dX`ipz?)srth~8UA#!;# zqK2`B%%Wza;aX^g`*t-1^zhD_xhQ(mTi|Au@RS-vQ9QurUvJaEHtjQDve44(P_U1v z0pOVAo$W>1!yRQLA;2FnViVA zh7n8+mj?Qah_q+${J&NHS1JDfcs0H&_H^t6vDsKh^!w32h+d2iME*VUo=7#)9{$Vl zBjKLVRA_DRpMq}=yf07*^!T6hf6V`&f4lNS<+qh{N=M7zwY;I_zLsm7Kim9Z^OmMx zZJKRL17q>$CkTPP>KztE0Wo0g{2o}QqUchOTP8FxGL9=GMA3StordzJ0u!9!u2_KcD8OOG259Lmq%Gn zsz(S*m(Jx_(&P9zZc6n4LCR}FodRAEljuB998S=r&vP4!bT_NoVgkPYy#p=~%yu{%jU=QJDOp&md+?3vXjfycQQ98`+= z?3lVAnXsq4l`zplJk-g{5X3w@-;PY!!}AIR;SbOIkP~}&UI8a^c;1T)*u(Qm7?3az zb>cEukT{RDx(8Sgw`-~u=}+|RY@0AibsQE0c2fr7m)v4te=vAN-3>JC5yM8~evE^n zqv{yYvqubXdWR#1dR84JM~r%w&UCBboDlSgacRD2S2}Sps|0pg*K1o#+5Nc1X^1lF zQg^~AvGlbuN*rQp2hfP!NvR?C@Ln)c{8aM9GwGKI^ZUx8*O1Y2d~!iB@iON$g8eF~-zEAmkPy8==QarY}MR$b(%CSHMGGghthV zWWs9x6)+)LFUHi=CANi{P5ae8-~r+D&YetEKW=m=j7c?V)BF5=DqilJ`*+=Z*K5=J z78*|Fa;7CP2=7;WQ13I=QuvTtYmywjq4)KH%3DkKsof|H6Eo}0Ow<4KX5MoO#ZI*g zIC7sz<}4Qe7i79p%u$5C_ZDRvx?8(XWOy74d}hFiGyWj=DuSI_wPX80p0QmgBGI9#e|Nk2Po$y29-CH z%DGt91EBBKWkv~@Vjhe$TNI3w))+SkkiI9SfCaIOl!g5xbMZ?>fzOSIO!5})U1+JXkn3)y!B`hYBU@_0k4`fUW{P<*zH?Z?sAEGQejCo$u)xO`pWLQk1hgxqN2R9* z6xba17^Lj6lCKi%o-|tcjv)o3m|dtmsyne$0dKHjhL8&awpYnV`%dpr@QGy)a*w)? z^G#*F6!>}zZ1nC?zNr=}kLr#M;+z{XWYx|zv?6?=0t_;?hp(rYDG1}nB=e~0NLqn+ zjFU!9T%s#lA&!6k?dn6r3O=muM(0uE;Sub{G@)cBo1vg+rEJCk^L^(7OfNc9dsKa} z52yMTt!i>Scc|4owv)%MCz+W(Z6)!jYX5D@mJRKI56v}?FTF$SOrk0M-4@@JvZXU; zD~iW#Jy3n}*d)WJ2WBuvpjCUEU!8C2QMPQCRtFgp??>kMD7)ZYQdZP9@`R%COg8Bf z@G&yLf(-hgiFq{kjw-v_rGW-a^aV$I)RBMqdf!fE7a33#Fq;%6bYr%qjOhjv5;AW)c-X|s<51vjH}oA$&=MwTU>bQ4LVLj6Q9jDsrQ6@pOV_8mq_8q!e_!A z;Pd~%&@Y50LxJFzgRc+H2SzbeReMo@p`XFB(da7XW!eti@|l+!$7mUsta{^0BZjmj_5(r?^`!OH77V1a>F zma}R}?{vz8V2@>x-r-Pgz-}{?-ismG@RA$b+XK9v&uW^Je$ye0+d^Zthe^Nfgb`nl zH%7nZgps4=PkPL$?RFcV^qwJXJD-Na(r;SZEe_MtyPN>v4SF^J-YrQj5eV$4a!?OC zIodrR^Cql!fYbsG11ZTgA#ZR<4Pc=$#uRgeP??8-td+ zRypj{k9Msn%j!d_5?|hOC)T_%uZv0^sB=z0zR`?dNF>1e~`Szc7K@jHBF`1_rF+ zvtC`>Lky%>ILx7bdoY3Ya;Hp8%k$co{*TiD;c0E~ylf}z!czP$te>Z`z{4KOrJviY zU~~Wv&Wb|&uRA|Tr{5z7ftCa z4nrC|dD+Bd+>i!OLg}v@be@IA7}BiH`%8y~(c5WQ`U|IVWZ&ROpHPqE!Uzw)bK{6# zthI&lx#Kz{eblLM_%`u<1KIpP?$kFtjZ@!z%&BiYH!ad1q5prU^ea;AY^*){Wc0n! zd!yGzJ{@^UWID1o{P*GC3_lp&9eO(Ska5xNp9x(9|XWulxaEdHH|ptJMd!kae&^6MrLo_bAN{k3oJD@Kd~R zqFyU|RmkDbg!?#w!Y;g)ohoJ2rDE!S>b*i3QJh(;BFQ;3pkh{j>J?}&Z}SSoB7=3m z`hXUh&h;a$iP12Zb9VWO-Sbk&r&N4CphKEq2Q&r3bHfBq^yXuil;d#~Ul4%T_B=VV z`w=shiaj9+WqUxy2L`CmpQW#LLD|b0BSO+CIryN8&kRtXKL>8Q&mZ}QiZQ10ZwojP zI{W;Y2KRIOReW3kUSD)Zh*)n)<|-2yV<1(GhZw0qNfllhU_8E1pg~q64cIMaBPu*7 zz<7K)(6}wur0A#$4+=0|(~amI7Ki#ybzTe1j4~)#1m}dfTvhth@iH9k_`NE;s9@2$ zR{WN!b;~dud96EWssQe^qibEv&d+OIzPwHA5?}0kpIXv_D7h}DH9aF*_ouC!kErJW zZR(6o<{HJUk%FdgF0v!%e!>!NwJ9U|6UUM|W0Qpt9Cw+#;s?|sFyeJGGZI#;lg|P- zUME|)F*c%)n!=4#|yyU#JP9r>f*4Nbg45yOH~1RFp`W6=Qk+()GU;TcO#7~2>5U$ zEpJR{2%I#3>`N!r`;dtG8#@qL-=`c_Ge85cUAF1*13V2b6#T=NI67u36}UkzC)Ino z6_UK#$E}c@j??|i@8MQR2O4g1;*U(J{@c~NafM{kL%*VMSbd5m>8Gj$XPCY^PAABG$FF40U!&O*}o(+o;)#d2_-_g8A zihmC7^2_n<@#feUqQ4&f+2|zL0lpsoTk!j@hYy6;hSq?`|L+GM4o(Gn0|Eb^``_hX z^6!Pa{_iNCRNkn}L6-ke%ipxTujOZ2_5nNbM<5^&c&;K)??prGBkMRrY??Pf>gw6$ z0f^f3D!-sSbG8<>5}eXtQ;V_2hYfNlteuog23Bw@&=K z=Eoq>JUmQWgGYSj+rW4XUW|N~_u-0+j|HE|mKF=uz3}R>w%f#OQ+acb(%ETwJ|Rs= zDrG}-=8ZGCb1Mi0Z;!Bxewy5nFMuH2r@eAFv=g?}4u#JY7pgT~47j_Ld)k5RdP_;- zl7no~h*>U{b`v<}2devhXy5$I#*ZpFBKiQ7pqyJ70ho&==;mohNo<_{^R{F8S-7DNpT9;+U@lX0+2c7G1 zc_4kiejC}2MY*+bfUGaiST{cVb8ah{{__i!bJ>NZO0Aenz~a1^t1ZGR4UeAW&L*-| zD3YCLh*5sqH)!({^HEll~U(|2yOhQv4(F7se-J zKaPDk_CRbb`u)g1MSd%CA+j(0bm$*LZwuu@JrEJ_%HUM+`oL!bIsZ0)lk!2*!F%TL3re;1ys2cIh@_klX(^Jgvj`0NPT(~`r%N-g>ITY(#6 z$+vSeEcy5iwf^j@LymhUn--~@7sJ_-Z+oKUKpm6Z!;??CWqR419G{CBt78g$=7zzc zL5*&G>$~d-xcW3z5V@1WxA z$@+TD{ite;AiLn45SmuL(wix=PCg5}=@q<9N&9+l9rFD1*W}i|_F5w;qpmt8`llBi zqyshGguQ_0`Wikq4HgRRn!Jwr{kd!MRFW%iyCw&ged&QZ=Ja1N-Eh6Ve!b?@#ElMG zBePj}UXO^GI=)f>j~20$h{B|g2KEY$*CnP`j;Fg)9bYowuEyEkU-5#9j~XxwDvmf_ zWMe_aZ|I=iS;v=*?eQZSUq{*+VM>I`t zWSVWn<;&-yOsY)B6v!*juCA@mTtIB0EpsI3T9UeRYRcX8jMJ50p6>h51!IdXj zhbIm!n$hB^&Td&Wua~+6G}JExYq!vNX~y)Z`jAaCdeJ**Mm+!bNxv?|J{fyyY%&&& zekuB<=v~npBHxI-3(o%|;itnN55F{gJbY{DAA-LeJRMvYcrx%Vf6l*A`MUB-Wv>!# zdA#L9%ckZpH}7c<`2N`U3g5JEebd*PUf+~y>XiRm{)qgbyju=PUxrF9@lRJB+)r5L zg74sJorFKJu#%l%vVMZ^3M|DgQfcnEL1@x-%#07$W%T@Np}{l-rV?Xre}?Oj3Lo#m z+$pgW9j|qE)FCZCUb}hIy4=8}()D9N4!2saE}&_^>7_DG+Ku&?0A#qN=V|HJTA3{N9ve*J;k%2!v;IW z zHk5|zm{)y^8(1)m*v7JbHPov{>ifvzs*V{uP97>|admr77v4+#P zonAtxLu|Od1A2mSv*x>!U`T>^h$pVq{s>vy*}WMa30NJLb~?2}Qg(K4W~Xu8n_ zI%HsHPpu8;9rkARJ@p|iW@jDMJ66FtA%s&_ZmCuwZW-Nl=#13U&=ue`<${gk?3>Ue?pFVui@t4xRb6!N_l?M!1uJP_t9`2GRgCs1}mk<{u46D z^P2`IT9-{jU%HN&I67k?*vNjC}XAc7)qNFF!p@lMC&$`hU@UCI=}z)Xe_6@`|3mWO=SQ12sgtG zMEd%dvHyI2IjmCM7z(#bY3UIubWh+1aJ&Cdur>I;;5S1`s5kWS(0_z}G4N;7H>LLk zCZ$Kh7vmp`zal)`yaRIn{Y&ga@$T5GB6mf6;op~1~@y_=K=VtUCNl9V--z?IqfMk?+#n*knq#~ zrkbI9jj`jDy7byT3gr8Q+{sMlQ5Q~nua8wBH|%T?yq&6veNzb(diA(6>CznLi8t>i zU3XF`G#DdJAd~aHR|Dft^R|zZh0NRRc4gS54dC3xl{d$=QSYYx$^pCVq&wNS+?LZH zt$URN)>N{3f7n>*9df8p%R;DWWv-Yrd*qfq$^nW*ri#smF#j_bMsDR z7&N!j=rTCyjo3Fera98VfxiH5b!1#^bng?5+(Y=pgfiwPe4$)`fP&>TVEqndtR0gk z+oy*f-AlL_jnEhy)8Z!gSx$*Pny|qkH!&BUyqp%?upe_1Pdc?gi*aTfFn)I8(C&kY zdF%xAD#|XvQ@rT+I5!9Gkn0?>6kFx-XwHr9JwlaU!}+|%y+>FsdXJ22+SKTg>(yp- zQdP$|r$x(B>wP@Ed=sF2+95awWUwFZPVC53rCx+ zeB2>$L@V#IPfp3xPH`-5W`6mYQ)~<=%^a=dqfW6lPUd|1h*Jm;DQP=G4arjuu_)<= zW_$Ut1LUqIbNPS+doVxRN$iucFVa2h9SyqB>8(H6tM;v7i-{-N}xr&3&z96WNoKbFL_9k zn!2CE@qrF{dB|>^mCraG2Kfk8+hMTakmPiTr!n@Oj#nmf9xJsZG`BF&mN;DIi5apc z=Nxj20}VUVF+Ur;Dk#r5wTag$wl>}8kePIdr?Iq&b+nT+82=B>xl;TO<1dQui~C}a z#~z7|#-!*cqA!f@iYk$hM;?hBi1@>w3BNcz77oMvf8|g|@QL8dgWCiD6?k7@9*D&s zfq+0jARrJB2nYmzx)3-F4f^$W zT;@+NRcnPgFm*$;sK#y7!6NF&!Q)i18%?vRMvc<+rD^UU290)RNLZ7964`m!Zk~r5f|(_9Fo0BIU_FM&mHzC_bm2qoB?YuqOxP1Vgp0m7Hh7Z)C zD+x5tq@0>TT#Y7`oGppe`)4`04342C(PqrGFws$gn#~Amyd@x|z#L(F<_rvo90#)n z_G_x4ab?8XzqL!wq?2K;_Dw7 z*mt`d(kZb=6MEdypRdf#EUN*xjpD$ujeg?gZ}QN^-&?F!VX`O4h%y?n#l05{Av~I} z)uRa|Fjp;S0Cl(zAaF*~dFV|zf*%;7Z}t#fC-e}#)BPGryIE^EX>UdU|9JR=Qv9Rw zTVqee-Wr>Wt&M&>dLf#Q`rzz;F|sB6UHDr35eNtb1Ofs9fq+0jARrJB2nYnO9sNAQ5|9^Lcs;maHA}SRJ2Ru*4>Mu+orpbW5R_OxN)}21^K~g((ZFm$M4xnylk1 z4O}3NrK{|2-y?PM90MCyV5RXKwWD=>f`Q2%f_~v#OL!M@18F8^K)s=Un#5u8@vRJ|tH1Ox&C0fB%(Kp-Fx5C{ka1Ofs9fxs0-AOf{*hQA%~HvxY)!rvR + + + + 1.2 + + + + + + + + + + + + + definition + + + + + + + + + + + + + + + + + + + + + + + + + + Grouping classes that can be excluded + + + + + + + + + Term not to be used for direct annotation + + + + + + + + + Term not to be used for direct manual annotation + + + + + + + + + Aspergillus GO slim + + + + + + + + + Candida GO slim + + + + + + + + + ChEMBL protein targets summary + + + + + + + + + Generic GO slim + + + + + + + + + GOA and proteome slim + + + + + + + + + Metagenomics GO slim + + + + + + + + + PIR GO slim + + + + + + + + + Plant GO slim + + + + + + + + + Fission yeast GO slim + + + + + + + + + synapse GO slim + + + + + + + + + Viral GO slim + + + + + + + + + Yeast GO slim + + + + + + + + + Prokaryotic GO subset + + + + + + + + + Catalytic activity terms in need of attention + + + + + + + + + label approved by the SynGO project + + + + + + + + + + Systematic synonym + + + + + + + + + Terms created by TermGenie that do not follow a template and require additional vetting by editors + + + + + + + + + Viral overhaul terms + + + + + + + + + subset_property + + + + + + + + synonym_type_property + + + + + + + + has_alternative_id + + + + + + + + database_cross_reference + + + + + + + + has_exact_synonym + + + + + + + + has_obo_format_version + + + + + + + + has_obo_namespace + + + + + + + + has_related_synonym + + + + + + + + has_scope + + + + + + + + has_synonym_type + + + + + + + + + + + + + + in_subset + + + + + + + + shorthand + + + + + + + + + + + + + + + + + + + + + + + + + + + BFO:0000050 + external + part_of + part_of + part of + + + + + + + + + + + + + + + + + + + The part of a cell or its extracellular environment in which a gene product is located. A gene product may be located in one or more parts of a cell and its location may be as specific as a particular macromolecular complex, that is, a stable, persistent association of macromolecules that function together. + GO:0008372 + NIF_Subcellular:sao-1337158144 + NIF_Subcellular:sao1337158144 + cell or subcellular entity + cellular component + cellular_component + subcellular entity + GO:0005575 + + + + + + + + + + Note that, in addition to forming the root of the cellular component ontology, this term is recommended for use for the annotation of gene products whose cellular component is unknown. Note that when this term is used for annotation, it indicates that no information was available about the cellular component of the gene product annotated as of the date the annotation was made; the evidence code ND, no data, is used to indicate this. + cellular_component + + + + + The part of a cell or its extracellular environment in which a gene product is located. A gene product may be located in one or more parts of a cell and its location may be as specific as a particular macromolecular complex, that is, a stable, persistent association of macromolecules that function together. + GOC:go_curators + NIF_Subcellular:sao-1337158144 + + + + + cellular component + + + + + + subcellular entity + NIF_Subcellular:nlx_subcell_100315 + + + + + + + + + The living contents of a cell; the matter contained within (but not including) the plasma membrane, usually taken to exclude large vacuoles and masses of secretory or ingested material. In eukaryotes it includes the nucleus and cytoplasm. + Wikipedia:Intracellular + internal to cell + protoplasm + cellular_component + nucleocytoplasm + protoplast + GO:0005622 + + + + + + + intracellular + + + + + The living contents of a cell; the matter contained within (but not including) the plasma membrane, usually taken to exclude large vacuoles and masses of secretory or ingested material. In eukaryotes it includes the nucleus and cytoplasm. + ISBN:0198506732 + + + + + nucleocytoplasm + GOC:mah + + + + + protoplast + GOC:mah + + + + + + + + + The basic structural and functional unit of all organisms. Includes the plasma membrane and any external encapsulating structures such as the cell wall and cell envelope. + cell and encapsulating structures + NIF_Subcellular:sao1813327414 + Wikipedia:Cell_(biology) + cellular_component + GO:0005623 + + + + + cell + + + + + The basic structural and functional unit of all organisms. Includes the plasma membrane and any external encapsulating structures such as the cell wall and cell envelope. + GOC:go_curators + + + + + + + + + A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent. + NIF_Subcellular:sao1702920020 + Wikipedia:Cell_nucleus + cell nucleus + cellular_component + GO:0005634 + + + + + + + + + nucleus + + + + + A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent. + GOC:go_curators + + + + + cell nucleus + + + + + + + + + + + true + + + + + + + + + Organized structure of distinctive morphology and function. Includes the nucleus, mitochondria, plastids, vacuoles, vesicles, ribosomes and the cytoskeleton, and prokaryotic structures such as anammoxosomes and pirellulosomes. Excludes the plasma membrane. + NIF_Subcellular:sao1539965131 + Wikipedia:Organelle + cellular_component + GO:0043226 + + + + + organelle + + + + + Organized structure of distinctive morphology and function. Includes the nucleus, mitochondria, plastids, vacuoles, vesicles, ribosomes and the cytoskeleton, and prokaryotic structures such as anammoxosomes and pirellulosomes. Excludes the plasma membrane. + GOC:go_curators + + + + + + + + + Organized structure of distinctive morphology and function, bounded by a single or double lipid bilayer membrane. Includes the nucleus, mitochondria, plastids, vacuoles, and vesicles. Excludes the plasma membrane. + NIF_Subcellular:sao414196390 + membrane-enclosed organelle + cellular_component + GO:0043227 + + membrane-bounded organelle + + + + + Organized structure of distinctive morphology and function, bounded by a single or double lipid bilayer membrane. Includes the nucleus, mitochondria, plastids, vacuoles, and vesicles. Excludes the plasma membrane. + GOC:go_curators + + + + + + + + + + Organized structure of distinctive morphology and function, occurring within the cell. Includes the nucleus, mitochondria, plastids, vacuoles, vesicles, ribosomes and the cytoskeleton. Excludes the plasma membrane. + cellular_component + GO:0043229 + + + intracellular organelle + + + + + Organized structure of distinctive morphology and function, occurring within the cell. Includes the nucleus, mitochondria, plastids, vacuoles, vesicles, ribosomes and the cytoskeleton. Excludes the plasma membrane. + GOC:go_curators + + + + + + + + + + Organized structure of distinctive morphology and function, bounded by a single or double lipid bilayer membrane and occurring within the cell. Includes the nucleus, mitochondria, plastids, vacuoles, and vesicles. Excludes the plasma membrane. + intracellular membrane-enclosed organelle + cellular_component + GO:0043231 + + + intracellular membrane-bounded organelle + + + + + Organized structure of distinctive morphology and function, bounded by a single or double lipid bilayer membrane and occurring within the cell. Includes the nucleus, mitochondria, plastids, vacuoles, and vesicles. Excludes the plasma membrane. + GOC:go_curators + + + + + + + + + + + + + + + + + + + + + + + + + + Any constituent part of the living contents of a cell; the matter contained within (but not including) the plasma membrane, usually taken to exclude large vacuoles and masses of secretory or ingested material. In eukaryotes it includes the nucleus and cytoplasm. + cellular_component + GO:0044424 + + + + Note that this term is in the subset of terms that should not be used for direct gene product annotation. Instead, select a child term or, if no appropriate child term exists, please request a new term. Direct annotations to this term may be amended during annotation QC. + intracellular part + + + + + Any constituent part of the living contents of a cell; the matter contained within (but not including) the plasma membrane, usually taken to exclude large vacuoles and masses of secretory or ingested material. In eukaryotes it includes the nucleus and cytoplasm. + GOC:jl + + + + + + + + + + + + + + + + + + + + + + + + + + Any constituent part of a cell, the basic structural and functional unit of all organisms. + NIF_Subcellular:sao628508602 + cellular subcomponent + cellular_component + protoplast + GO:0044464 + + + + + Note that this term is in the subset of terms that should not be used for direct gene product annotation. Instead, select a child term or, if no appropriate child term exists, please request a new term. Direct annotations to this term may be amended during annotation QC. + cell part + + + + + Any constituent part of a cell, the basic structural and functional unit of all organisms. + GOC:jl + + + + + cellular subcomponent + NIF_Subcellular:sao628508602 + + + + + protoplast + GOC:mah + + + + + + + diff --git a/tests/test_basic_sqla.py b/tests/test_basic_sqla.py new file mode 100644 index 0000000..040ddd9 --- /dev/null +++ b/tests/test_basic_sqla.py @@ -0,0 +1,30 @@ +import unittest + +import logging +import unittest +import os +from semsql.sqla.owl import ClassNode, OwlEquivalentClassStatement, RdfsSubClassOfStatement, OwlSomeValuesFrom +from sqlalchemy import Column, ForeignKey, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship, sessionmaker +from sqlalchemy import create_engine + + + +cwd = os.path.abspath(os.path.dirname(__file__)) +DB_DIR = os.path.join(cwd, 'inputs') +OUTPUT_DIR = os.path.join(cwd, 'outputs') + + +class OwlQueryTestCase(unittest.TestCase): + path = os.path.join(DB_DIR, 'go-nucleus.db') + engine = create_engine(f"sqlite:///{path}") + Session = sessionmaker(bind=engine) + session = Session() + q = session.query(RdfsSubClassOfStatement).\ + join(OwlSomeValuesFrom, RdfsSubClassOfStatement.object == OwlSomeValuesFrom.restriction) + for row in q.all(): + print(f'Row = {row}') + print(row.subject) + + diff --git a/utils/gaf.header.tsv b/utils/gaf.header.tsv new file mode 100644 index 0000000..48fdc10 --- /dev/null +++ b/utils/gaf.header.tsv @@ -0,0 +1 @@ +db local_id db_object_symbol qualifiers ontology_class_ref supporting_references evidence_type with_or_from aspect db_object_name db_object_synonyms db_object_type db_object_taxon annotation_date assigned_by annotation_extensions gene_product_form diff --git a/utils/gaf2tsv b/utils/gaf2tsv new file mode 100755 index 0000000..e1ec449 --- /dev/null +++ b/utils/gaf2tsv @@ -0,0 +1,4 @@ +#!/bin/sh +SCRIPTPATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +cat $SCRIPTPATH/gaf.header.tsv +grep -v ^\! $1