Skip to content

Commit

Permalink
Add named graphs to SPARQL interface. Add SPARQL diagnostics
Browse files Browse the repository at this point in the history
  • Loading branch information
hsolbrig committed Feb 17, 2019
1 parent 6bf1c94 commit c3e2d53
Show file tree
Hide file tree
Showing 37 changed files with 3,453 additions and 2,210 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ urllib3 = "*"
ShExJSG = ">=0.5.6"
CFGraph = ">=0.2.1"
PyShExC = ">=0.5.4"
sparql-slurper = ">=0.2.0"
sparql-slurper = ">=0.2.1"
sparqlwrapper = "*"

[requires]
Expand Down
6 changes: 3 additions & 3 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ This package is a reasonably literal implementation of the [Shape Expressions La
* 0.7.1 -- Fix issue 26
* 0.7.2 -- Upgrade error reporting
* 0.7.3 -- Report using namespaces, enhance PrefixLib to inject into a module
* 0.7.4 -- Added '-ps', '-pr', '-gn', '-pb' options to CLI

## Installation
```bash
Expand All @@ -53,7 +54,7 @@ Unfortunately, however, `rdflib-jsonld` is NOT compatible with the bleeding edge
> shexeval -h
usage: shexeval [-h] [-f FORMAT] [-s START] [-ut] [-sp STARTPREDICATE]
[-fn FOCUS] [-A] [-d] [-ss] [-cf] [-sq SPARQL] [-se]
[--stopafter STOPAFTER]
[--stopafter STOPAFTER] [-ps] [-pr] [-gn GRAPHNAME] [-pb]
rdf shex

positional arguments:
Expand Down Expand Up @@ -81,6 +82,13 @@ optional arguments:
-se, --stoponerror Stop on an error
--stopafter STOPAFTER
Stop after N nodes
-ps, --printsparql Print SPARQL queries as they are executed
-pr, --printsparqlresults
Print SPARQL query and results
-gn GRAPHNAME, --graphname GRAPHNAME
Specific SPARQL graph to query - use '' for any named
graph
-pb, --persistbnodes Treat BNodes as persistent in SPARQL endpoint
```
## Documentation
Expand Down
140 changes: 128 additions & 12 deletions notebooks/CLI.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from pyshex.shex_evaluator import evaluate_cli as shexeval"
"from pyshex.shex_evaluator import evaluate_cli as shexeval, PrefixLibrary"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -36,7 +36,7 @@
"text": [
"usage: shexeval [-h] [-f FORMAT] [-s START] [-ut] [-sp STARTPREDICATE]\n",
" [-fn FOCUS] [-A] [-d] [-ss] [-cf] [-sq SPARQL] [-se]\n",
" [--stopafter STOPAFTER]\n",
" [--stopafter STOPAFTER] [-ps] [-pr] [-gn GRAPHNAME] [-pb]\n",
" rdf shex\n",
"\n",
"positional arguments:\n",
Expand All @@ -63,7 +63,14 @@
" SPARQL query to generate focus nodes\n",
" -se, --stoponerror Stop on an error\n",
" --stopafter STOPAFTER\n",
" Stop after N nodes\n"
" Stop after N nodes\n",
" -ps, --printsparql Print SPARQL queries as they are executed\n",
" -pr, --printsparqlresults\n",
" Print SPARQL query and results\n",
" -gn GRAPHNAME, --graphname GRAPHNAME\n",
" Specific SPARQL graph to query - use '' for any named\n",
" graph\n",
" -pb, --persistbnodes Treat BNodes as persistent in SPARQL endpoint\n"
]
},
{
Expand All @@ -90,7 +97,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -99,7 +106,7 @@
"0"
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -119,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -158,7 +165,7 @@
"1"
]
},
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -169,12 +176,121 @@
" \"-fn http://hl7.org/fhir/Observation/example-haplotype2\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Debug Biolink model against NCATS Red knowledge graph\n",
"The following example selects every item of type Protein and validates it against the biolink-model shex definition.\n",
"Parameters:\n",
"* http://graphdb.dumontierlab.com/repositories/ncats-red-kg - SPARQL endpoing\n",
"* https://raw.githubusercontent.com/biolink/biolink-model/master/shex/biolink-modelnc.shex - ShEx \n",
"* -ss Use SparqlSlurper\n",
"* -gn '' Don't slurp the default graph (if there is a name in the quotes, restrict it to that graph)\n",
"* -pr Print Slurper query results\n",
"* -ps Print Slurper queries\n",
"* -ut The RDF types of the subjects are the start nodes in the ShEx\n",
"* -se Stop on the first error\n",
"* -sq 'select ?item where{?item a <http://w3id.org/biolink/vocab/Protein>}' - the initial SPARQL query"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"SPARQL:\n",
"select ?item where{?item a <http://w3id.org/biolink/vocab/Protein>}\n",
"\thttp://identifiers.org/uniprot/P00734\n",
"\thttp://identifiers.org/uniprot/P00533\n",
"\thttp://identifiers.org/uniprot/O75015\n",
"\thttp://identifiers.org/uniprot/P00736\n",
"\thttp://identifiers.org/uniprot/P02745\n",
"\thttp://identifiers.org/uniprot/P02746\n",
"\thttp://identifiers.org/uniprot/P02747\n",
"\thttp://identifiers.org/uniprot/P08637\n",
"\thttp://identifiers.org/uniprot/P09871\n",
"\thttp://identifiers.org/uniprot/P12314\n",
"\n",
"\t ...\n",
"\n",
"\n",
"SPARQL: (SELECT ?s ?p ?o {graph ?g {<http://identifiers.org/uniprot/P00734> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?o}}) (0.25 secs) - 3 triples\n",
"RESULTS:\n",
"\t<http://identifiers.org/uniprot/P00734> a <http://w3id.org/biolink/vocab/Protein> .\n",
"SPARQL: (SELECT ?s ?p ?o {graph ?g {<http://identifiers.org/uniprot/P00734> ?p ?o}}) (0.25 secs) - 42 triples\n",
"RESULTS:\n",
"\t<http://identifiers.org/uniprot/P00734> a ns1:Protein ;\n",
"\t ns1:id <http://identifiers.org/uniprot/P00734>,\n",
"\t \"P00734\" ;\n",
"\t ns1:name \"10xCbxE-F2(25-622)\",\n",
"\t \"10xCbxE-F2(44-327)\",\n",
"\t \"10xCbxE-F2(44-622)\",\n",
"\t \"F2(25-622)\",\n",
"\t \"prothrombin (factor II) propeptide\",\n",
"\t \"thrombin heavy chain\",\n",
"\t \"thrombin light chain\" ;\n",
"\t ns1:part_of <http://identifiers.org/wikipathways/WP1818_r101320>,\n",
"\t <http://identifiers.org/wikipathways/WP1884_r101399>,\n",
"\t <http://identifiers.org/wikipathways/WP1929_r101361>,\n",
"\t <http://identifiers.org/wikipathways/WP2762_r101409>,\n",
"\t <http://identifiers.org/wikipathways/WP4419_r101720>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/a6d12>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/b607a>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/b9727>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/d6bb0>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/da774>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/e0f44>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/e7688>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/eddd5>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/f0c44>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/Complex/f7a7d>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/WP/Interaction/ccf28>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1818_r101320/WP/Interaction/f8e40>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1884_r101399/Complex/a8341>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1884_r101399/Complex/e1e8e>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP1929_r101361/Complex/eb678>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP2762_r101409/WP/Interaction/b538d>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP2762_r101409/WP/Interaction/da6fc>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP2762_r101409/WP/Interaction/fc444>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP4419_r101720/Complex/d0cd7>,\n",
"\t <http://rdf.wikipathways.org/Pathway/WP4419_r101720/Complex/f1433> ;\n",
"\t ns1:same_as <http://identifiers.org/ncbigene/2147>,\n",
"\t <http://identifiers.org/uniprot/C9JV37>,\n",
"\t <http://identifiers.org/uniprot/E9PIT3>,\n",
"\t <http://identifiers.org/uniprot/P00734> ;\n",
"\t ns1:systematic_synonym <http://identifiers.org/hgnc.symbol/F2> .\n",
"Errors:\n",
" Focus: http://identifiers.org/uniprot/P00734\n",
" Start: http://w3id.org/biolink/vocab/Protein\n",
" Reason: Testing <http://identifiers.org/uniprot/P00734> against shape http://w3id.org/biolink/vocab/NamedThing\n",
" Triples:\n",
" <http://identifiers.org/uniprot/P00734> ns1:id <http://identifiers.org/uniprot/P00734> .\n",
" <http://identifiers.org/uniprot/P00734> ns1:id \"P00734\" .\n",
" 2 triples exceeds max {0,1}\n"
]
},
{
"data": {
"text/plain": [
"1"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"shexeval([\"http://graphdb.dumontierlab.com/repositories/ncats-red-kg\",\n",
" \"https://raw.githubusercontent.com/biolink/biolink-model/master/shex/biolink-modelnc.shex\",\n",
" \"-ss\", \"-gn\", \"\", \"-pr\", \"-ps\", \"-ut\", \"-se\", \"-sq\",\n",
" \"select ?item where{?item a <http://w3id.org/biolink/vocab/Protein>}\"])"
]
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr
_fail_triples(cntxt, T)
cntxt.fail_reason = f" {len(T)} triples less than {cardinality_text}"
else:
cntxt.fail_reason = f" No matching triples found for predicate {expr.predicate}"
cntxt.fail_reason = f" No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}"
return False
elif 0 <= max_ < len(T):
_fail_triples(cntxt, T)
Expand All @@ -238,7 +238,7 @@ def _fail_triples(cntxt: Context, T: RDFGraph) -> None:
tlist = list(T)
if len(tlist):
cntxt.fail_reason = "Triples:"
for t in tlist:
for t in sorted(tlist):
cntxt.fail_reason = f" {cntxt.n3_mapper.n3(t)}"
if len(tlist) > 5:
cntxt.fail_reason = " ... "
Expand Down
31 changes: 25 additions & 6 deletions pyshex/shex_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ShExJSG import ShExJ, ShExC
from rdflib import Graph, URIRef, RDF
from rdflib.util import guess_format
from sparql_slurper import SlurpyGraph
from sparql_slurper import SlurpyGraph, QueryResultPrinter

from pyshex import PrefixLibrary
from pyshex.shape_expressions_language.p5_2_validation_definition import isValid
Expand Down Expand Up @@ -281,6 +281,11 @@ def genargs(prog: Optional[str] = None) -> ArgumentParser:
parser.add_argument("-sq", "--sparql", help="SPARQL query to generate focus nodes")
parser.add_argument("-se", "--stoponerror", help="Stop on an error", action="store_true")
parser.add_argument("--stopafter", help="Stop after N nodes", type=int)
parser.add_argument("-ps", "--printsparql", help="Print SPARQL queries as they are executed", action="store_true")
parser.add_argument("-pr", "--printsparqlresults", help="Print SPARQL query and results", action="store_true")
parser.add_argument("-gn", "--graphname", help="Specific SPARQL graph to query - use '' for any named graph")
parser.add_argument("-pb", "--persistbnodes", help="Treat BNodes as persistent in SPARQL endpoint",
action="store_true")
return parser


Expand All @@ -291,16 +296,28 @@ def evaluate_cli(argv: Optional[Union[str, List[str]]] = None, prog: Optional[st
if opts.sparql:
opts.slurper = True
if opts.slurper and opts.flattener:
print("Error: Cannot combine slurper and flattener graphs")
print("Error: Cannot combine slurper and flattener graphs", file=sys.stderr)
return 2
if not opts.sparql and not opts.slurper and \
(opts.printsparql or opts.printsparqlresults or opts.graphname is not None or opts.persistbnodes):
print("Error: printsparql, pringsparqlresults, graphname and persistbnodes are SPQARQL only",
file=sys.stderr)
if not opts.format:
opts.format = guess_format(opts.rdf)
if not opts.format:
print('Error: Cannot determine RDF format from file name - use "--format" option')
print('Error: Cannot determine RDF format from file name - use "--format" option', file=sys.stderr)
return 3
if opts.slurper:
g = SlurpyGraph(opts.rdf)
g.persistent_bnodes = True
if opts.printsparql:
g.debug_slurps = True
if opts.printsparqlresults:
g.debug_slurps = True
g.add_result_hook(QueryResultPrinter)
if opts.graphname is not None:
g.graph_name = opts.graphname
if opts.persistbnodes:
g.persistent_bnodes = True
else:
g = CFGraph() if opts.flattener else Graph()
if '\n' in opts.rdf or '\r' in opts.rdf:
Expand All @@ -309,7 +326,8 @@ def evaluate_cli(argv: Optional[Union[str, List[str]]] = None, prog: Optional[st
g.load(opts.rdf, format=opts.format)

if not (opts.focus or opts.allsubjects or opts.sparql):
print('Error: You must specify one or more graph focus nodes, supply a SPARQL query, or use the "-A" option')
print('Error: You must specify one or more graph focus nodes, supply a SPARQL query, or use the "-A" option',
file=sys.stderr)
return 4

start = []
Expand All @@ -327,7 +345,8 @@ def evaluate_cli(argv: Optional[Union[str, List[str]]] = None, prog: Optional[st
opts.focus = []
elif not isinstance(opts.focus, list):
opts.focus = [opts.focus]
opts.focus += list(SPARQLQuery(opts.rdf, opts.sparql, ).focus_nodes())
opts.focus += list(SPARQLQuery(opts.rdf, opts.sparql, print_query=opts.printsparql,
print_results=opts.printsparqlresults).focus_nodes())

def result_sink(rslt: EvaluationResult) -> bool:
if not rslt.result:
Expand Down
14 changes: 13 additions & 1 deletion pyshex/utils/sparql_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,16 @@


class SPARQLQuery:
def __init__(self, sparql_endpoint: str, sparql_file_uri_or_text: str, ) -> None:
def __init__(self, sparql_endpoint: str, sparql_file_uri_or_text: str,
print_query: bool=False, print_results: bool=False) -> None:
""" Set up the query to run
:param sparql_endpoint: URL of sparql endpoint
:param sparql_file_uri_or_text: URI, filename or SPARQL text
:param print_query: Print the sparql results query
:param print_results: Print query results
"""
self.print_results = print_results
if '\n' in sparql_file_uri_or_text or '\r' in sparql_file_uri_or_text or ' ' in sparql_file_uri_or_text:
self.query = sparql_file_uri_or_text
elif ':/' in sparql_file_uri_or_text:
Expand All @@ -23,6 +27,9 @@ def __init__(self, sparql_endpoint: str, sparql_file_uri_or_text: str, ) -> None
else:
with open(sparql_file_uri_or_text) as f:
self.query = f.read()
if print_query:
print("SPARQL:")
print(self.query)
self.endpoint = SPARQLWrapper(sparql_endpoint)
self.endpoint.setQuery(self.query)
self.endpoint.setReturnFormat(JSON)
Expand All @@ -31,4 +38,9 @@ def focus_nodes(self) -> List[URIRef]:
result = self.endpoint.query()

processed_results = jsonasobj.load(result.response)
if self.print_results:
print('\t' + ('\n\t'.join([row.item.value for row in processed_results.results.bindings[:10]])))
if len(processed_results.results.bindings) > 10:
print('\n\t ...')
print('\n')
return [URIRef(row.item.value) for row in processed_results.results.bindings]
Loading

0 comments on commit c3e2d53

Please sign in to comment.