-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
31 changed files
with
1,517 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
.idea | ||
owl/ | ||
download/ | ||
db/ | ||
docs/ | ||
load*-* | ||
schemaload*-* | ||
venv | ||
bin/ | ||
target/ | ||
test.db | ||
inferences/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,117 @@ | ||
ONTS = obi mondo go envo ro hp mp zfa wbphenotype ecto upheno uberon_cm doid chebi pr | ||
|
||
OWL_SQL = rdf owl | ||
OBO_SQL = $(OWL_SQL) obo-checks | ||
RG_SQL = $(OWL_SQL) relation-graph | ||
ALL_SQL = $(OWL_SQL) relation-graph obo-checks | ||
|
||
|
||
all: $(patsubst %,all-%,$(ONTS)) | ||
|
||
all-%: target/%.load target/%.views inferences/%.load reports/%.problems.tsv | ||
echo $* | ||
|
||
realclean-%: | ||
rm target/$*.* ; | ||
rm db/$*.db | ||
|
||
ALL_SQL_FILES = $(patsubst %,sql/%.sql,$(ALL_SQL)) | ||
sql/all.sql: $(ALL_SQL_FILES) | ||
cat $^ > $@ | ||
|
||
schemaload-%: db/%.db sql/all.sql | ||
sqlite3 -cmd ".echo on" $< < sql/all.sql && touch $@ | ||
# --- | ||
# sqlite db creation and loading | ||
# --- | ||
target/%.created: | ||
touch db/$*.db | ||
db/%.db: prefixes/prefix.sql sql/rdftab.sql | ||
cat $^ | sqlite3 $@ && echo OK || echo ALREADY LOADED | ||
.PRECIOUS: db/%.db | ||
|
||
problems-%: db/%.db schemaload-% | ||
sqlite3 $< -cmd "SELECT * FROM problems" | ||
target/%.load: target/%.created owl/%.owl | ||
./bin/rdftab db/$*.db < owl/$*.owl && touch $@ | ||
.PRECIOUS: target/%.load | ||
|
||
db/%.db: prefixes/prefix.sql | ||
sqlite3 $@ < $< | ||
.PRECIOUS: db/%.db | ||
target/%.views: db/%.db sql/all.sql | ||
sqlite3 -cmd ".echo on" $< < sql/all.sql ; touch $@ | ||
.PRECIOUS: target/%.load | ||
|
||
|
||
# --- | ||
# Inferences | ||
# --- | ||
# We use relation-graph | ||
inferences/%-inf.ttl: owl/%.owl | ||
relation-graph --ontology-file $< --redundant-output-file $@ --non-redundant-output-file inferences/$*-nr.ttl --property http://purl.obolibrary.org/obo/BFO_0000050 | ||
.PRECIOUS: inferences/%-inf.ttl | ||
|
||
inferences/%-inf.owl: inferences/%-inf.ttl | ||
robot convert -i $< -o $@ | ||
.PRECIOUS: inferences/%-inf.owl | ||
inferences/%-inf.tsv: inferences/%-inf.owl | ||
sqlite3 $@.db < prefixes/prefix.sql && ./bin/rdftab $@.db < $< && sqlite3 $@.db -cmd '.separator "\t"' -cmd '.header on' "SELECT subject,predicate,object FROM statements " > $@.tmp && mv $@.db $@.db.old && mv $@.tmp $@ | ||
.PRECIOUS: inferences/%-inf.tsv | ||
|
||
inferences/%.load: db/%.db inferences/%-inf.tsv | ||
sqlite3 $< -cmd '.separator "\t"' '.import inferences/$*-inf.tsv entailed_edge' && touch $@ | ||
.PRECIOUS: inferences/%.load | ||
|
||
|
||
# --- | ||
# Reports | ||
# --- | ||
|
||
reports/%.problems.tsv: db/%.db target/%.views | ||
sqlite3 $< "SELECT * FROM problems" > $@ | ||
|
||
|
||
# --- | ||
# Downloads | ||
# --- | ||
|
||
owl/%.owl: | ||
curl -L -s http://purl.obolibrary.org/obo/$*.owl > $@.tmp && mv $@.tmp $@ | ||
.PRECIOUS: owl/%.owl | ||
|
||
owl/go.owl: | ||
curl -L -s http://purl.obolibrary.org/obo/go/extensions/go-plus.owl > $@ | ||
|
||
# --- | ||
# GO Demo | ||
# --- | ||
demo/gaf/%.gaf.tsv: | ||
curl -L -s http://current.geneontology.org/annotations/$*.gaf.gz | gzip -dc | ./utils/gaf2tsv > $@ | ||
loadgaf-%: demo/gaf/%.gaf.tsv | ||
sqlite3 db/go.db -cmd '.separator "\t"' '.import $< gaf' && touch $@ | ||
|
||
download/idmapping.dat.gz: | ||
wget https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz -O $@ | ||
|
||
CAMDIR = ../noctua-models/models/ | ||
loadcams: | ||
find $(CAMDIR) -name "*.ttl" -exec sh -c "riot --out rdfxml {} | ./bin/rdftab db/go.db" \; | ||
|
||
# --- | ||
# Experimental: sqlalchemy bindings | ||
# --- | ||
semsql/sqlaviews.py: db/foo.db | ||
sqlacodegen sqlite:///$< > $@ | ||
|
||
# --- | ||
# Schema | ||
# --- | ||
|
||
# TODO: markdown gen should make modular output | ||
markdown-%: src/schema/%.yaml | ||
gen-markdown --no-mergeimports -d docs $< && mv docs/index.md docs/$*_index.md | ||
markdown: markdown-rdf markdown-owl | ||
gen-markdown --no-mergeimports -d docs src/schema/semsql.yaml | ||
|
||
load-%: db/%.db owl/%.owl | ||
./bin/rdftab $< < owl/$*.owl && touch $@ | ||
gen-ddl: ddl/rdf.sql | ||
ddl/%.sql: src/schema/%.yaml | ||
gen-sqlddl --no-use-foreign-keys $< > $@.tmp && \ | ||
python semsql/sqlutils.py $< >> $@.tmp && \ | ||
mv $@.tmp $@ | ||
|
||
semsql/sqla/%.py: src/schema/%.yaml | ||
gen-sqlddl --no-use-foreign-keys --sqla-file $@ $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/sh | ||
python -m venv venv | ||
source venv/bin/activate | ||
export PYTHONPATH=.:$PYTHONPATH | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
site_name: "Semantic SQL" | ||
theme: readthedocs | ||
nav: | ||
- Home: index.md | ||
- RDF: rdf_index.md | ||
- OWL: owl_index.md | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
linkml | ||
mkdocs | ||
sqlalchemy | ||
sqlacodegen |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
from sqlalchemy import Column, Index, Table, Text | ||
from sqlalchemy.sql.sqltypes import NullType | ||
from sqlalchemy.ext.declarative import declarative_base | ||
|
||
Base = declarative_base() | ||
metadata = Base.metadata | ||
|
||
class BlankNode(Base): | ||
__tablename__ = 'blank_node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class ClassNode(Base): | ||
__tablename__ = 'class_node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class IriNode(Base): | ||
__tablename__ = 'iri_node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class NamedIndividualNode(Base): | ||
__tablename__ = 'named_individual_node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class Node(Base): | ||
__tablename__ = 'node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class OntologyNode(Base): | ||
__tablename__ = 'ontology_node' | ||
id = Column(Text, primary_key=True) | ||
|
||
class OwlEquivalentClassStatement(Base): | ||
__tablename__ = 'owl_equivalent_class_statement' | ||
stanza = Column(Text, primary_key=True) | ||
predicate = Column(Text, primary_key=True) | ||
value = Column(Text, primary_key=True) | ||
language = Column(Text, primary_key=True) | ||
subject = Column(Text, primary_key=True) | ||
object = Column(Text, primary_key=True) | ||
|
||
class OwlRestriction(Base): | ||
__tablename__ = 'owl_restriction' | ||
restriction = Column(Text, primary_key=True) | ||
on_property = Column(Text, primary_key=True) | ||
filler = Column(Text, primary_key=True) | ||
|
||
class OwlSomeValuesFrom(Base): | ||
__tablename__ = 'owl_some_values_from' | ||
restriction = Column(Text, primary_key=True) | ||
on_property = Column(Text, primary_key=True) | ||
filler = Column(Text, primary_key=True) | ||
|
||
class Prefix(Base): | ||
""" | ||
Maps CURIEs to URIs | ||
""" | ||
__tablename__ = 'prefix' | ||
prefix = Column(Text, primary_key=True) | ||
base = Column(Text, primary_key=True) | ||
|
||
class RdfTypeStatement(Base): | ||
""" | ||
A statement that indicates the asserted type of the subject entity | ||
""" | ||
__tablename__ = 'rdf_type_statement' | ||
stanza = Column(Text, primary_key=True) | ||
subject = Column(Text, primary_key=True) | ||
predicate = Column(Text, primary_key=True) | ||
value = Column(Text, primary_key=True) | ||
language = Column(Text, primary_key=True) | ||
object = Column(Text, primary_key=True) | ||
|
||
class RdfsLabelStatement(Base): | ||
__tablename__ = 'rdfs_label_statement' | ||
stanza = Column(Text, primary_key=True) | ||
subject = Column(Text, primary_key=True) | ||
predicate = Column(Text, primary_key=True) | ||
object = Column(Text, primary_key=True) | ||
value = Column(Text, primary_key=True) | ||
language = Column(Text, primary_key=True) | ||
|
||
class RdfsSubClassOfStatement(Base): | ||
__tablename__ = 'rdfs_subClassOf_statement' | ||
stanza = Column(Text, primary_key=True) | ||
predicate = Column(Text, primary_key=True) | ||
value = Column(Text, primary_key=True) | ||
language = Column(Text, primary_key=True) | ||
subject = Column(Text, primary_key=True) | ||
object = Column(Text, primary_key=True) | ||
|
||
class Statements(Base): | ||
""" | ||
Represents an RDF triple | ||
""" | ||
__tablename__ = 'statements' | ||
stanza = Column(Text, primary_key=True) | ||
subject = Column(Text, primary_key=True) | ||
predicate = Column(Text, primary_key=True) | ||
object = Column(Text, primary_key=True) | ||
value = Column(Text, primary_key=True) | ||
language = Column(Text, primary_key=True) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import click | ||
from linkml_model import SchemaDefinition | ||
from linkml.utils.formatutils import underscore | ||
from linkml.utils.schemaloader import load_raw_schema, SchemaLoader | ||
|
||
VIEW_CODE = 'sqlview>>' | ||
|
||
def generate_views_from_linkml(schema: SchemaDefinition, drop_tables=True) -> None: | ||
""" | ||
Generates SQL VIEW statements from hints in LinkML schema | ||
View hints are encoded in comments section in classes/tables section | ||
:param schema: LinkML schema containing hints | ||
""" | ||
for cn, c in schema.classes.items(): | ||
sql_table = underscore(cn) | ||
views = [] | ||
for cmt in c.comments: | ||
cmt = cmt.strip().rstrip(';') | ||
if cmt.startswith(VIEW_CODE): | ||
views.append(cmt.replace(VIEW_CODE,'').strip()) | ||
if len(views) > 0: | ||
print() | ||
if drop_tables: | ||
print(f'DROP TABLE {sql_table};') | ||
print(f'CREATE VIEW {sql_table} AS {"UNION".join(views)};') | ||
|
||
@click.command() | ||
@click.argument('inputs', nargs=-1) | ||
def cli(inputs): | ||
""" | ||
Generates SQL VIEW commands from hints embedded in linkml schema | ||
""" | ||
for input in inputs: | ||
with open(input, 'r') as stream: | ||
schema = load_raw_schema(input) | ||
print('-- ** REWRITE TABLES AS VIEWS **') | ||
print(f'-- SCHEMA: {schema.id}') | ||
loader = SchemaLoader(schema, mergeimports=True) | ||
loader.resolve() | ||
generate_views_from_linkml(schema) | ||
|
||
if __name__ == '__main__': | ||
cli() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
/** | ||
These operate over NON-NORMALIZED gaf tables, for demo purposes | ||
No attempt is yet made to parse pipe-separated fields like taxon, qualifier | ||
*/ | ||
|
||
|
||
CREATE VIEW annotation_to_deprecated AS SELECT * FROM gaf WHERE ontology_class_ref IN (SELECT id FROM deprecated ); | ||
|
||
-- todo: include redundant | ||
CREATE VIEW entailed_gaf AS SELECT gaf.*, e.predicate AS inferred_predicate, e.object AS ancestor_term FROM gaf JOIN entailed_edge AS e ON (gaf.ontology_class_ref = e.subject); | ||
|
||
-- stats | ||
CREATE VIEW num_annotation_by_taxon AS SELECT db_object_taxon, count(*) AS num_annotations FROM gaf GROUP BY db_object_taxon; | ||
CREATE VIEW num_term_by_taxon AS SELECT db_object_taxon, count(DISTINCT ontology_class_ref) AS num_terms_annotated FROM gaf GROUP BY db_object_taxon; |
Oops, something went wrong.