Skip to content

Commit

Permalink
Merge pull request #36 from INCATools/poetry
Browse files Browse the repository at this point in the history
poetry
  • Loading branch information
cmungall authored Apr 30, 2022
2 parents e969fa8 + 7e68b14 commit 0e1a0e3
Show file tree
Hide file tree
Showing 51 changed files with 515 additions and 4,176 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,6 @@ relation-graph/*
*/.DS_Store
inferences/*
node_modules/*
demo/
*pyc
tmp/
14 changes: 7 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ LABEL maintainer="[email protected]" \
ENV JAVA_HOME="/usr"
WORKDIR /tools
ENV PATH "/tools/:$PATH"
COPY requirements.txt /tools/
COPY pyproject.toml /tools/
COPY ./utils/create-semsql-db.sh /tools/
##COPY utils/* /tools/

Expand Down Expand Up @@ -55,7 +55,7 @@ RUN apt-get update &&\
#ENV PATH "/tools/node_modules/obographviz/bin/:$PATH"

###### ROBOT ######
ENV ROBOT v1.8.1
ENV ROBOT v1.8.3
ARG ROBOT_JAR=https://github.com/ontodev/robot/releases/download/$ROBOT/robot.jar
ENV ROBOT_JAR ${ROBOT_JAR}
# LAYERSIZE ~66MB
Expand All @@ -69,17 +69,17 @@ RUN wget $ROBOT_JAR -O /tools/robot.jar && \
ENV COURSIER_CACHE "/tools/.coursier-cache"

###### JENA ######
ENV JENA 3.12.0
ENV JENA 4.4.0
RUN wget http://archive.apache.org/dist/jena/binaries/apache-jena-$JENA.tar.gz -O- | tar xzC /tools
ENV PATH "/tools/apache-jena-$JENA/bin:$PATH"

###### relation-graph ######
ENV RGVERSION=1.1
ENV RGVERSION=2.2.0
ENV PATH "/tools/relation-graph/bin:$PATH"
# LAYERSIZE ~200MB
RUN wget -nv https://github.com/balhoff/relation-graph/releases/download/v$RGVERSION/relation-graph-$RGVERSION.tgz \
&& tar -zxvf relation-graph-$RGVERSION.tgz \
&& mv relation-graph-$RGVERSION /tools/relation-graph \
RUN wget -nv https://github.com/balhoff/relation-graph/releases/download/v$RGVERSION/relation-graph-cli-$RGVERSION.tgz \
&& tar -zxvf relation-graph-cli-$RGVERSION.tgz \
&& mv relation-graph-cli-$RGVERSION /tools/relation-graph \
&& chmod +x /tools/relation-graph

### TODO REVIEW THIS. As we speak, jq is official still stalled at 1.5, but for the walk function, we
Expand Down
42 changes: 26 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
RUN = poetry run

ALL_OBO_ONTS := $(shell cat reports/obo.tsv)
SELECTED_ONTS = obi mondo go envo ro hp mp zfa wbphenotype ecto upheno uberon_cm doid chebi pr wbphenotype fbbt dron

Expand All @@ -15,7 +17,7 @@ all-%: db/%.db
# ---
test: test_build unittest
unittest:
python -s -m unittest tests/test_*.py
$(RUN) python -s -m unittest

cp-%: tests/inputs/%.owl
cp $< owl/
Expand All @@ -25,6 +27,7 @@ test_build: setup_tests $(patsubst %, test-build-%,$(TEST_ONTOLOGIES))
test-build-%: inferences/%-inf.tsv
./utils/create-semsql-db.sh -v -f -r -d db/$*.db owl/$*.owl && cp db/$*.db tests/inputs/

# copy from tests/input to staging area
setup_tests: $(patsubst %, cp-%,$(TEST_ONTOLOGIES))

realclean-%:
Expand All @@ -39,8 +42,8 @@ db/%.db: owl/%.owl inferences/%-inf.tsv bin/rdftab
./utils/create-semsql-db.sh -v -f -d $@ $<
.PRECIOUS: db/%.db

foo/%.db: owl/%.owl inferences/%-inf.tsv bin/rdftab
./utils/create-semsql-db.sh -v -f -d $@ $<
#foo/%.db: owl/%.owl inferences/%-inf.tsv bin/rdftab
# ./utils/create-semsql-db.sh -v -f -d $@ $<

# ---
### RDFTab
Expand All @@ -61,9 +64,9 @@ bin/rdftab:
curl -L -o $@ $(RDFTAB_URL)
chmod +x $@

RG_VERSION=2.0
RG_VERSION=2.2.0
bin/relation-graph:
curl -L -s https://github.com/balhoff/relation-graph/releases/download/v$(RG_VERSION)/relation-graph-$(RG_VERSION).tgz | tar -zxv && mv relation-graph-$(RG_VERSION) relation-graph && (cd bin && ln -s ../relation-graph/bin/relation-graph)
curl -L -s https://github.com/balhoff/relation-graph/releases/download/v$(RG_VERSION)/relation-graph-cli-$(RG_VERSION).tgz | tar -zxv && mv relation-graph-cli-$(RG_VERSION) relation-graph && (cd bin && ln -s ../relation-graph/bin/relation-graph)

# ---
# OBO Registry
Expand Down Expand Up @@ -94,7 +97,7 @@ list-onts:
#RG_PROPS = --property http://purl.obolibrary.org/obo/BFO_0000050
RG_PROPS =

# we still want to do graph walking even when incoherent
# we still want to do graph walking even when ontology is incoherent
inferences/%-no-disjoint.owl: owl/%.owl
robot remove -i $< --axioms disjoint -o $@
.PRECIOUS: inferences/%-no-disjoint.owl
Expand All @@ -104,6 +107,8 @@ inferences/%-inf.ttl: inferences/%-no-disjoint.owl
.PRECIOUS: inferences/%-inf.ttl

# currently tedious to get this back into a TSV that can be loaded into sqlite...
# https://github.com/balhoff/relation-graph/issues/123
# https://github.com/balhoff/relation-graph/issues/25
inferences/%-inf.owl: inferences/%-inf.ttl
riot --out RDFXML $< > $@.tmp && mv $@.tmp $@
.PRECIOUS: inferences/%-inf.owl
Expand Down Expand Up @@ -145,6 +150,7 @@ prefixes/prefixes.csv: prefixes/prefixes_curated.csv prefixes/obo_prefixes.csv
# Downloads
# ---

# download OWL, ensuring converted to RDF/XML
owl/%.owl:
# curl -L -s http://purl.obolibrary.org/obo/$*.owl > [email protected] && mv [email protected] $@
robot merge -I http://purl.obolibrary.org/obo/$*.owl -o $@
Expand All @@ -157,11 +163,12 @@ owl/monarch.owl:
robot merge -I http://purl.obolibrary.org/obo/upheno/monarch.owl -o $@

#fma.owl:#
http://purl.org/sig/ont/fma.owl
# http://purl.org/sig/ont/fma.owl

# ---
# GO Demo
# ---
# TODO: move this
demo/gaf/%.gaf.tsv:
curl -L -s http://current.geneontology.org/annotations/$*.gaf.gz | gzip -dc | ./utils/gaf2tsv > $@
demo/gaf/%.gpi.tsv:
Expand Down Expand Up @@ -203,28 +210,31 @@ MODULES = rdf owl obo omo relation_graph semsql

# TODO: markdown gen should make modular output
markdown-%: src/schema/%.yaml
gen-markdown --no-mergeimports -d docs $< && mv docs/index.md docs/$*_index.md
$(RUN) gen-markdown --no-mergeimports -d docs $< && mv docs/index.md docs/$*_index.md
markdown: $(patsubst %, markdown-%, $(MODULES))
gen-markdown --no-mergeimports -d docs src/schema/semsql.yaml
$(RUN) gen-markdown --no-mergeimports -d docs src/schema/semsql.yaml

gen-project: src/schema/semsql.yaml
$(RUN) gen-project $< -d project

# Create SQL Create Table statements from linkml
GENDDL = gen-sqlddl --dialect sqlite --no-use-foreign-keys
GENDDL = $(RUN) gen-sqlddl --dialect sqlite --no-use-foreign-keys
gen-ddl: $(patsubst %, ddl/%.sql, $(MODULES))
ddl/%.sql: src/schema/%.yaml
$(GENDDL) $< > $@.tmp && \
python semsql/sqlutils/viewgen.py $< >> $@.tmp && \
$(RUN) python semsql/sqlutils/viewgen.py $< >> $@.tmp && \
mv $@.tmp $@

reports/query-%.sql: src/schema/%.yaml
python semsql/sqlutils/reportgen.py $< > $@
$(RUN) python semsql/sqlutils/reportgen.py $< > $@

# Generate SQL Alchemy
gen-sqla: $(patsubst %, semsql/sqla/%.py, $(MODULES))

# make SQL Alchemy models
# requires linkml 1.2.5
semsql/sqla/%.py: src/schema/%.yaml
gen-sqla --no-use-foreign-keys $< > $@
$(RUN) gen-sqla --no-use-foreign-keys $< > $@

# DOCKER

Expand All @@ -233,19 +243,19 @@ VERSION = "v0.0.1"
IM=cmungall/semantic-sql

docker-build-no-cache:
@docker build --build-arg ODK_VERSION=$(VERSION) $(ROBOT_JAR_ARGS) --no-cache -t $(IM):$(VERSION) . \
@docker build --build-arg SEMSQL_VERSION=$(VERSION) $(ROBOT_JAR_ARGS) --no-cache -t $(IM):$(VERSION) . \
&& docker tag $(IM):$(VERSION) $(IM):latest && docker tag $(IM):$(VERSION) $(DEV):latest && \
docker build -f docker/odklite/Dockerfile -t $(IMLITE):$(VERSION) . \
&& docker tag $(IMLITE):$(VERSION) $(IMLITE):latest && cd docker/robot/ && make docker-build

docker-build:
@docker build --build-arg ODK_VERSION=$(VERSION) $(ROBOT_JAR_ARGS) -t $(IM):$(VERSION) . \
@docker build --build-arg SEMSQL_VERSION=$(VERSION) $(ROBOT_JAR_ARGS) -t $(IM):$(VERSION) . \
&& docker tag $(IM):$(VERSION) $(IM) && docker tag $(IM):$(VERSION) $(DEV):latest && \
docker build -f docker/odklite/Dockerfile -t $(IMLITE):$(VERSION) . \
&& docker tag $(IMLITE):$(VERSION) $(IMLITE):latest && cd docker/robot/ && make docker-build

docker-build-dev:
@docker build --build-arg ODK_VERSION=$(VERSION) -t $(DEV):$(VERSION) . \
@docker build --build-arg SEMSQL_VERSION=$(VERSION) -t $(DEV):$(VERSION) . \
&& docker tag $(DEV):$(VERSION) $(DEV):latest

docker-clean:
Expand Down
55 changes: 11 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
# semantic-sql
# Semantic-Sql: standard SQL views for ontologies

This is an experimental repo that provides useful tools for working
with RDF, OWL, and ontologies using SQL databases, as a performant and
composable alternative to SPARQL.
This repo provides a SQL **schema definitions** for working with
ontologies, together with tools for being able to load these from RDF/OWL files.

Currently only sqlite is supported, but this would be easy to adapt to postgres
These SQL databases can then be used with the [ontology-access-kit](https://github.com/INCATools/ontology-access-kit)

It leverages [rdftab.rs](https://github.com/ontodev/rdftab.rs) but can be used independently.

Expand All @@ -20,7 +19,8 @@ The basic idea is:
Basic lexical query:

```sql
$ sqlite db/hp.db
$ wget https://s3.amazonaws.com/bbop-sqlite/hp.db -O hp.db
$ sqlite hp.db
sqlite>
select * from rdfs_label_statement where value like 'Abnormality of %';
```
Expand Down Expand Up @@ -54,19 +54,7 @@ caveats:
- the version of the schema may be different from this repo
- some dbs may have additional tables loaded; e.g go.db may have gafs loaded

You can easily build a sqlite db from OWL yourself, see below:

## In flux

Some parts of this repo are in-flux, see https://github.com/cmungall/semantic-sql/issues/4

One particularly confusing thing is that there are two sets of semi-redundant VIEW definitions in 2 folders:

* sql/
* ddl/

The ones in ddl are generated from the YAML, and they will eventually replace the ones in sql/

You can easily build a sqlite db from OWL yourself, see below

## Requirements

Expand Down Expand Up @@ -168,31 +156,14 @@ sqlite> select count(*) from rdfs_subclass_of_statement;
## Python
```bash
pip install -r requirements.txt
```
for visualization, install: https://github.com/cmungall/obographviz
```bash
npm install -g graphviz
```
visualize all terms starting with the string "nucle" and their ancestors using obographviz, with subClassOf as a containment relation:
See:
[ontology-access-kit](https://github.com/INCATools/ontology-access-kit)
```bash
subgraph-d tests/inputs/go-nucleus.db -m label nucle% -f viz -p s,BFO:0000050 \
-s conf/obograph-style.json -C 'containmentRelations: [rdfs:subClassOf]'
runoak -i db/envo.db search t~biome
```
Generates:
![image](https://user-images.githubusercontent.com/50745/119427094-4659d580-bcbf-11eb-8c79-ed8559ed4886.png)
## Modules
Expand Down Expand Up @@ -240,10 +211,6 @@ views for querying ontologies such as GO, that incorporate critical
information in existential axioms, the view `edge` provides a union of
subclass between named classes and subclasses of existentials.
### OBO-Checks
This is an experiment to try and replicate ROBOT checks. See below
### GO
## Validation
Expand Down Expand Up @@ -278,7 +245,7 @@ variety of performant tools can be written.
## Schema
See [LinkML Docs](https://cmungall.github.io/semantic-sql/)
See [LinkML Docs](https://incatools.github.io/semantic-sql/)
SQL views can be generated automatically. For now the linkml schema can be used to explore the structure
Expand Down
2 changes: 1 addition & 1 deletion bin/relation-graph
21 changes: 20 additions & 1 deletion ddl/omo.sql
Original file line number Diff line number Diff line change
Expand Up @@ -249,13 +249,17 @@
-- * Slot: filler Description: This is Null for a self-restriction
-- * Slot: id Description: An identifier for an element. Note blank node ids are not unique across databases
-- # Class: "owl_complex_axiom" Description: "An axiom that is composed of two or more statements"
-- * Slot: subject Description:
-- * Slot: predicate Description:
-- * Slot: object Description: Note the range of this slot is always a node. If the triple represents a literal, instead value will be populated
-- # Class: "owl_subclass_of_some_values_from" Description: "Composition of subClassOf and SomeValuesFrom"
-- * Slot: subject Description: the class C in the axiom C subClassOf P some D
-- * Slot: predicate Description: the predicate P in the axiom C subClassOf P some D
-- * Slot: object Description: the class D in the axiom C subClassOf P some D
-- # Class: "owl_equivalent_to_intersection_member" Description: "Composition of `OwlEquivalentClass`, `OwlIntersectionOf`, and `RdfListMember`; `C = X1 and ... and Xn`"
-- * Slot: subject Description: the defined class
-- * Slot: object Description: a class expression that forms the defining expression
-- * Slot: predicate Description:
-- # Class: "prefix" Description: "Maps CURIEs to URIs"
-- * Slot: prefix Description: A standardized prefix such as 'GO' or 'rdf' or 'FlyBase'
-- * Slot: base Description: The base URI a prefix will expand to
Expand Down Expand Up @@ -723,14 +727,20 @@ CREATE TABLE owl_has_self (
id TEXT,
PRIMARY KEY (id)
);
CREATE TABLE owl_complex_axiom (
subject TEXT,
predicate TEXT,
object TEXT
);
CREATE TABLE owl_subclass_of_some_values_from (
subject TEXT,
predicate TEXT,
object TEXT
);
CREATE TABLE owl_equivalent_to_intersection_member (
subject TEXT,
object TEXT
object TEXT,
predicate TEXT
);
CREATE TABLE prefix (
prefix TEXT,
Expand Down Expand Up @@ -933,6 +943,9 @@ CREATE VIEW has_narrow_synonym_statement AS SELECT * FROM statements WHERE predi
DROP TABLE has_related_synonym_statement;
CREATE VIEW has_related_synonym_statement AS SELECT * FROM statements WHERE predicate='oio:hasRelatedSynonym';

DROP TABLE has_synonym_statement;
CREATE VIEW has_synonym_statement AS SELECT * FROM has_exact_synonym_statement UNION SELECT * FROM has_broad_synonym_statement UNION SELECT * FROM has_narrow_synonym_statement UNION SELECT * FROM has_related_synonym_statement;

DROP TABLE has_exact_match_statement;
CREATE VIEW has_exact_match_statement AS SELECT * FROM statements WHERE predicate='skos:hasExactMatch';

Expand All @@ -945,9 +958,15 @@ CREATE VIEW has_narrow_match_statement AS SELECT * FROM statements WHERE predica
DROP TABLE has_related_match_statement;
CREATE VIEW has_related_match_statement AS SELECT * FROM statements WHERE predicate='skos:hasRelatedMatch';

DROP TABLE has_match_statement;
CREATE VIEW has_match_statement AS SELECT * FROM has_exact_match_statement UNION SELECT * FROM has_broad_match_statement UNION SELECT * FROM has_narrow_match_statement UNION SELECT * FROM has_related_match_statement;

DROP TABLE has_dbxref_statement;
CREATE VIEW has_dbxref_statement AS SELECT * FROM statements WHERE predicate='oio:hasDbXref';

DROP TABLE has_mapping_statement;
CREATE VIEW has_mapping_statement AS SELECT * FROM has_match_statement UNION SELECT * FROM has_dbxref_statement;

DROP TABLE axiom_dbxref_annotation;
CREATE VIEW axiom_dbxref_annotation AS SELECT * FROM owl_axiom_annotation WHERE annotation_predicate = 'oio:hasDbXref';

Expand Down
Loading

0 comments on commit 0e1a0e3

Please sign in to comment.