diff --git a/notebooks/Commands/Relationships.ipynb b/notebooks/Commands/Relationships.ipynb
index 50037f3f9..b1ec428aa 100644
--- a/notebooks/Commands/Relationships.ipynb
+++ b/notebooks/Commands/Relationships.ipynb
@@ -10,7 +10,9 @@
"This notebook is intended as a supplement to the [main OAK CLI docs](https://incatools.github.io/ontology-access-kit/cli.html).\n",
"\n",
"This notebook provides examples for the `relationships` command, which can be used to lookup direct and inferred relationships\n",
- "between entities in ontologies\n",
+ "between entities in ontologies.\n",
+ "\n",
+ "Overall background on the concepts here can be found in the [OAK Guide to Graphs and Relationships](https://incatools.github.io/ontology-access-kit/guide/relationships-and-graphs.html).\n",
"\n",
"## Help Option\n",
"\n",
@@ -19,9 +21,14 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 28,
"id": "97ed8cee",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T22:17:52.798918Z",
+ "start_time": "2024-04-19T22:17:50.223140Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -68,8 +75,18 @@
" runoak -i uberon.db relationships -p RO:0002178 .desc//p=i \"artery\" .and\r\n",
" .desc//p=i,p \"limb\"\r\n",
"\r\n",
+ " More examples:\r\n",
+ "\r\n",
+ " https://github.com/INCATools/ontology-access-\r\n",
+ " kit/blob/main/notebooks/Commands/Relationships.ipynb\r\n",
+ "\r\n",
+ " Python API:\r\n",
+ "\r\n",
+ " https://incatools.github.io/ontology-access-kit/interfaces/basic\r\n",
+ "\r\n",
"Options:\r\n",
- " -p, --predicates TEXT A comma-separated list of predicates\r\n",
+ " -p, --predicates TEXT A comma-separated list of predicates. This\r\n",
+ " may be a shorthand (i, p) or CURIE\r\n",
" --direction [up|down|both] direction of traversal over edges, which up\r\n",
" is subject to object, down is object to\r\n",
" subject.\r\n",
@@ -85,6 +102,10 @@
" --include-entailed / --no-include-entailed\r\n",
" Include entailed indirect relationships\r\n",
" [default: no-include-entailed]\r\n",
+ " --non-redundant-entailed / --no-non-redundant-entailed\r\n",
+ " Include entailed but exclude entailed\r\n",
+ " redundant relationships [default: no-non-\r\n",
+ " redundant-entailed]\r\n",
" --include-tbox / --no-include-tbox\r\n",
" Include class-class relationships (subclass\r\n",
" and existentials) [default: include-tbox]\r\n",
@@ -92,6 +113,9 @@
" Include instance relationships (class and\r\n",
" object property assertions) [default:\r\n",
" include-abox]\r\n",
+ " --include-metadata / --no-include-metadata\r\n",
+ " Include metadata (axiom annotations)\r\n",
+ " [default: no-include-metadata]\r\n",
" --help Show this message and exit.\r\n"
]
}
@@ -107,14 +131,19 @@
"source": [
"## Set up an alias\n",
"\n",
- "For convenience we will set up an alias for use in this notebook"
+ "For convenience we will set up an alias for use in this notebook. This will allow us to use `uberon ...` rather than `runoak -i sqlite:obo:uberon ...` for the rest of the notebook."
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 2,
"id": "29d2249a",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:06:44.192590Z",
+ "start_time": "2024-04-19T19:06:44.188656Z"
+ }
+ },
"outputs": [],
"source": [
"alias uberon runoak -i sqlite:obo:uberon"
@@ -125,25 +154,32 @@
"id": "1a7c69d7",
"metadata": {},
"source": [
- "## Direct relationships for a subject term"
+ "## Direct relationships for a subject term\n",
+ "\n",
+ "First we will look up the direct [asserted](https://incatools.github.io/ontology-access-kit/glossary.html#term-Asserted) relationships in Uberon with `finger` as the subject term."
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 3,
"id": "2c406bc1",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:06:47.918434Z",
+ "start_time": "2024-04-19T19:06:44.193418Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\n"
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r\n"
]
}
],
@@ -151,36 +187,41 @@
"uberon relationships finger"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Like most OAK commands, the `relationships` command can take lists of labels, lists of IDs, or even complex query terms (which might themselves involve graphs)."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "860ef175aa0e1bb8"
+ },
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 5,
"id": "70acf6ae",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:10:02.524364Z",
+ "start_time": "2024-04-19T19:09:59.249190Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r",
- "\r\n",
- "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:0002387\tpes\r",
- "\r\n",
- "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:0012142\tpedal digitopodium region\r",
- "\r\n",
- "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:5001466\tpedal digit plus metapodial segment\r",
- "\r\n",
- "UBERON:0001466\tpedal digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r",
- "\r\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\r",
- "\r\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r",
- "\r\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r",
- "\r\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\r",
- "\r\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r",
- "\r\n"
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:0002387\tpes\r\n",
+ "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:0012142\tpedal digitopodium region\r\n",
+ "UBERON:0001466\tpedal digit\tBFO:0000050\tpart of\tUBERON:5001466\tpedal digit plus metapodial segment\r\n",
+ "UBERON:0001466\tpedal digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r\n"
]
}
],
@@ -188,81 +229,96 @@
"uberon relationships finger toe"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Next we will show all direct relationships for all is-a descendants of `finger`."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "747e2b9c7333997b"
+ },
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 6,
"id": "84dc3c97",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:10:29.447982Z",
+ "start_time": "2024-04-19T19:10:24.216377Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\n",
- "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:5001463\tmanual digit 1 plus metapodial segment\n",
- "UBERON:0001463\tmanual digit 1\tBSPO:0001113\tpreaxialmost part of\tUBERON:0002398\tmanus\n",
- "UBERON:0001463\tmanual digit 1\trdfs:subClassOf\tNone\tUBERON:0006048\tdigit 1\n",
- "UBERON:0001463\tmanual digit 1\trdfs:subClassOf\tNone\tUBERON:0019231\tmanual digit 1 or 5\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\n",
- "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:5003622\tmanual digit 2 plus metapodial segment\n",
- "UBERON:0003622\tmanual digit 2\trdfs:subClassOf\tNone\tUBERON:0006049\tdigit 2\n",
- "UBERON:0003622\tmanual digit 2\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\n",
- "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:5003623\tmanual digit 3 plus metapodial segment\n",
- "UBERON:0003623\tmanual digit 3\trdfs:subClassOf\tNone\tUBERON:0006050\tdigit 3\n",
- "UBERON:0003623\tmanual digit 3\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\n",
- "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:5003624\tmanual digit 4 plus metapodial segment\n",
- "UBERON:0003624\tmanual digit 4\trdfs:subClassOf\tNone\tUBERON:0006051\tdigit 4\n",
- "UBERON:0003624\tmanual digit 4\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\n",
- "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\n",
- "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:5003625\tmanual digit 5 plus metapodial segment\n",
- "UBERON:0003625\tmanual digit 5\trdfs:subClassOf\tNone\tUBERON:0006052\tdigit 5\n",
- "UBERON:0003625\tmanual digit 5\trdfs:subClassOf\tNone\tUBERON:0019231\tmanual digit 1 or 5\n",
- "UBERON:0008444\twebbed manual digit\tBFO:0000050\tpart of\tUBERON:0008441\twebbed manus\n",
- "UBERON:0008444\twebbed manual digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0008444\twebbed manual digit\trdfs:subClassOf\tNone\tUBERON:0008443\twebbed digit\n",
- "UBERON:0011981\tmanual digit 6\tBFO:0000050\tpart of\tUBERON:5011981\tmanual digit 6 plus metapodial segment\n",
- "UBERON:0011981\tmanual digit 6\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0011981\tmanual digit 6\trdfs:subClassOf\tNone\tUBERON:0016856\tdigit 6\n",
- "UBERON:0011982\tmanual digit 7\tBFO:0000050\tpart of\tUBERON:5011982\tmanual digit 7 plus metapodial segment\n",
- "UBERON:0011982\tmanual digit 7\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0011982\tmanual digit 7\trdfs:subClassOf\tNone\tUBERON:0016857\tdigit 7\n",
- "UBERON:0011983\tmanual digit 8\tBFO:0000050\tpart of\tUBERON:5011983\tmanual digit 8 plus metapodial segment\n",
- "UBERON:0011983\tmanual digit 8\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0011983\tmanual digit 8\trdfs:subClassOf\tNone\tUBERON:0016858\tdigit 8\n",
- "UBERON:0012260\talular digit\tBFO:0000050\tpart of\tUBERON:5012260\talular digit plus metapodial segment\n",
- "UBERON:0012260\talular digit\tBSPO:0001113\tpreaxialmost part of\tUBERON:0002398\tmanus\n",
- "UBERON:0012260\talular digit\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\n",
- "UBERON:0012260\talular digit\tRO:0002254\thas developmental contribution from\tUBERON:0005692\tmanual digit 2 mesenchyme\n",
- "UBERON:0012260\talular digit\tRO:0002254\thas developmental contribution from\tUBERON:0010564\tmanual digit 1 mesenchyme\n",
- "UBERON:0012260\talular digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0012261\tmanual major digit (Aves)\tBFO:0000050\tpart of\tUBERON:5012261\tmanual major digit (Aves) plus metapodial segment\n",
- "UBERON:0012261\tmanual major digit (Aves)\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\n",
- "UBERON:0012261\tmanual major digit (Aves)\tRO:0002254\thas developmental contribution from\tUBERON:0005693\tmanual digit 3 mesenchyme\n",
- "UBERON:0012261\tmanual major digit (Aves)\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0012262\tmanual minor digit (Aves)\tBFO:0000050\tpart of\tUBERON:5012262\tmanual minor digit (Aves) plus metapodial segment\n",
- "UBERON:0012262\tmanual minor digit (Aves)\tBSPO:0001115\tpostaxialmost part of\tUBERON:0002398\tmanus\n",
- "UBERON:0012262\tmanual minor digit (Aves)\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\n",
- "UBERON:0012262\tmanual minor digit (Aves)\tRO:0002254\thas developmental contribution from\tUBERON:0005694\tmanual digit 4 mesenchyme\n",
- "UBERON:0012262\tmanual minor digit (Aves)\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0019231\tmanual digit 1 or 5\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0019231\tmanual digit 1 or 5\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0019231\tmanual digit 1 or 5\trdfs:subClassOf\tNone\tUBERON:0019221\tdigit 1 or 5\n",
- "UBERON:0019232\tmanual digit 2, 3 or 4\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\n",
- "UBERON:0019232\tmanual digit 2, 3 or 4\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0019232\tmanual digit 2, 3 or 4\trdfs:subClassOf\tNone\tUBERON:0019222\tdigit 2, 3 or 4\n"
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0001463\tmanual digit 1\tBFO:0000050\tpart of\tUBERON:5001463\tmanual digit 1 plus metapodial segment\r\n",
+ "UBERON:0001463\tmanual digit 1\tBSPO:0001113\tpreaxialmost part of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0001463\tmanual digit 1\trdfs:subClassOf\tNone\tUBERON:0006048\tdigit 1\r\n",
+ "UBERON:0001463\tmanual digit 1\trdfs:subClassOf\tNone\tUBERON:0019231\tmanual digit 1 or 5\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002102\tforelimb\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0002389\tmanual digit\tBFO:0000050\tpart of\tUBERON:5002389\tmanual digit plus metapodial segment\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r\n",
+ "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0003622\tmanual digit 2\tBFO:0000050\tpart of\tUBERON:5003622\tmanual digit 2 plus metapodial segment\r\n",
+ "UBERON:0003622\tmanual digit 2\trdfs:subClassOf\tNone\tUBERON:0006049\tdigit 2\r\n",
+ "UBERON:0003622\tmanual digit 2\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\r\n",
+ "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0003623\tmanual digit 3\tBFO:0000050\tpart of\tUBERON:5003623\tmanual digit 3 plus metapodial segment\r\n",
+ "UBERON:0003623\tmanual digit 3\trdfs:subClassOf\tNone\tUBERON:0006050\tdigit 3\r\n",
+ "UBERON:0003623\tmanual digit 3\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\r\n",
+ "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0003624\tmanual digit 4\tBFO:0000050\tpart of\tUBERON:5003624\tmanual digit 4 plus metapodial segment\r\n",
+ "UBERON:0003624\tmanual digit 4\trdfs:subClassOf\tNone\tUBERON:0006051\tdigit 4\r\n",
+ "UBERON:0003624\tmanual digit 4\trdfs:subClassOf\tNone\tUBERON:0019232\tmanual digit 2, 3 or 4\r\n",
+ "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:0012141\tmanual digitopodium region\r\n",
+ "UBERON:0003625\tmanual digit 5\tBFO:0000050\tpart of\tUBERON:5003625\tmanual digit 5 plus metapodial segment\r\n",
+ "UBERON:0003625\tmanual digit 5\trdfs:subClassOf\tNone\tUBERON:0006052\tdigit 5\r\n",
+ "UBERON:0003625\tmanual digit 5\trdfs:subClassOf\tNone\tUBERON:0019231\tmanual digit 1 or 5\r\n",
+ "UBERON:0008444\twebbed manual digit\tBFO:0000050\tpart of\tUBERON:0008441\twebbed manus\r\n",
+ "UBERON:0008444\twebbed manual digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0008444\twebbed manual digit\trdfs:subClassOf\tNone\tUBERON:0008443\twebbed digit\r\n",
+ "UBERON:0011981\tmanual digit 6\tBFO:0000050\tpart of\tUBERON:5011981\tmanual digit 6 plus metapodial segment\r\n",
+ "UBERON:0011981\tmanual digit 6\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0011981\tmanual digit 6\trdfs:subClassOf\tNone\tUBERON:0016856\tdigit 6\r\n",
+ "UBERON:0011982\tmanual digit 7\tBFO:0000050\tpart of\tUBERON:5011982\tmanual digit 7 plus metapodial segment\r\n",
+ "UBERON:0011982\tmanual digit 7\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0011982\tmanual digit 7\trdfs:subClassOf\tNone\tUBERON:0016857\tdigit 7\r\n",
+ "UBERON:0011983\tmanual digit 8\tBFO:0000050\tpart of\tUBERON:5011983\tmanual digit 8 plus metapodial segment\r\n",
+ "UBERON:0011983\tmanual digit 8\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0011983\tmanual digit 8\trdfs:subClassOf\tNone\tUBERON:0016858\tdigit 8\r\n",
+ "UBERON:0012260\talular digit\tBFO:0000050\tpart of\tUBERON:5012260\talular digit plus metapodial segment\r\n",
+ "UBERON:0012260\talular digit\tBSPO:0001113\tpreaxialmost part of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0012260\talular digit\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\r\n",
+ "UBERON:0012260\talular digit\tRO:0002254\thas developmental contribution from\tUBERON:0005692\tmanual digit 2 mesenchyme\r\n",
+ "UBERON:0012260\talular digit\tRO:0002254\thas developmental contribution from\tUBERON:0010564\tmanual digit 1 mesenchyme\r\n",
+ "UBERON:0012260\talular digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0012261\tmanual major digit (Aves)\tBFO:0000050\tpart of\tUBERON:5012261\tmanual major digit (Aves) plus metapodial segment\r\n",
+ "UBERON:0012261\tmanual major digit (Aves)\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\r\n",
+ "UBERON:0012261\tmanual major digit (Aves)\tRO:0002254\thas developmental contribution from\tUBERON:0005693\tmanual digit 3 mesenchyme\r\n",
+ "UBERON:0012261\tmanual major digit (Aves)\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0012262\tmanual minor digit (Aves)\tBFO:0000050\tpart of\tUBERON:5012262\tmanual minor digit (Aves) plus metapodial segment\r\n",
+ "UBERON:0012262\tmanual minor digit (Aves)\tBSPO:0001115\tpostaxialmost part of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0012262\tmanual minor digit (Aves)\tRO:0002160\tonly in taxon\tNCBITaxon:8782\tAves\r\n",
+ "UBERON:0012262\tmanual minor digit (Aves)\tRO:0002254\thas developmental contribution from\tUBERON:0005694\tmanual digit 4 mesenchyme\r\n",
+ "UBERON:0012262\tmanual minor digit (Aves)\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0019231\tmanual digit 1 or 5\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0019231\tmanual digit 1 or 5\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0019231\tmanual digit 1 or 5\trdfs:subClassOf\tNone\tUBERON:0019221\tdigit 1 or 5\r\n",
+ "UBERON:0019232\tmanual digit 2, 3 or 4\tBFO:0000050\tpart of\tUBERON:0002398\tmanus\r\n",
+ "UBERON:0019232\tmanual digit 2, 3 or 4\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0019232\tmanual digit 2, 3 or 4\trdfs:subClassOf\tNone\tUBERON:0019222\tdigit 2, 3 or 4\r\n"
]
}
],
@@ -270,11 +326,28 @@
"uberon relationships .desc//p=i finger"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We can write this out to a file and explore it using pandas.\n",
+ "\n",
+ "(we use pandas here as this is convenient for Jupyter notebooks but if you were to execute this on the command line you could use any TSV or tabular tool you like)"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "edf4f8d768c91e3f"
+ },
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"id": "7d7574ba",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:11:20.714954Z",
+ "start_time": "2024-04-19T19:11:15.352017Z"
+ }
+ },
"outputs": [],
"source": [
"uberon relationships .desc//p=i finger -o output/finger-relationships.tsv"
@@ -282,9 +355,14 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 8,
"id": "cf691c34",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:11:20.968690Z",
+ "start_time": "2024-04-19T19:11:20.715483Z"
+ }
+ },
"outputs": [],
"source": [
"import pandas as pd"
@@ -292,188 +370,21 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"id": "c68535b0",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:11:20.993998Z",
+ "start_time": "2024-04-19T19:11:20.969074Z"
+ }
+ },
"outputs": [
{
"data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " subject | \n",
- " subject_label | \n",
- " predicate | \n",
- " predicate_label | \n",
- " object | \n",
- " object_label | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " UBERON:0001463 | \n",
- " manual digit 1 | \n",
- " BFO:0000050 | \n",
- " part of | \n",
- " UBERON:0002398 | \n",
- " manus | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " UBERON:0001463 | \n",
- " manual digit 1 | \n",
- " BFO:0000050 | \n",
- " part of | \n",
- " UBERON:0012141 | \n",
- " manual digitopodium region | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " UBERON:0001463 | \n",
- " manual digit 1 | \n",
- " BFO:0000050 | \n",
- " part of | \n",
- " UBERON:5001463 | \n",
- " manual digit 1 plus metapodial segment | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " UBERON:0001463 | \n",
- " manual digit 1 | \n",
- " BSPO:0001113 | \n",
- " preaxialmost part of | \n",
- " UBERON:0002398 | \n",
- " manus | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " UBERON:0001463 | \n",
- " manual digit 1 | \n",
- " rdfs:subClassOf | \n",
- " None | \n",
- " UBERON:0006048 | \n",
- " digit 1 | \n",
- "
\n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- "
\n",
- " \n",
- " 59 | \n",
- " UBERON:0019231 | \n",
- " manual digit 1 or 5 | \n",
- " rdfs:subClassOf | \n",
- " None | \n",
- " UBERON:0002389 | \n",
- " manual digit | \n",
- "
\n",
- " \n",
- " 60 | \n",
- " UBERON:0019231 | \n",
- " manual digit 1 or 5 | \n",
- " rdfs:subClassOf | \n",
- " None | \n",
- " UBERON:0019221 | \n",
- " digit 1 or 5 | \n",
- "
\n",
- " \n",
- " 61 | \n",
- " UBERON:0019232 | \n",
- " manual digit 2, 3 or 4 | \n",
- " BFO:0000050 | \n",
- " part of | \n",
- " UBERON:0002398 | \n",
- " manus | \n",
- "
\n",
- " \n",
- " 62 | \n",
- " UBERON:0019232 | \n",
- " manual digit 2, 3 or 4 | \n",
- " rdfs:subClassOf | \n",
- " None | \n",
- " UBERON:0002389 | \n",
- " manual digit | \n",
- "
\n",
- " \n",
- " 63 | \n",
- " UBERON:0019232 | \n",
- " manual digit 2, 3 or 4 | \n",
- " rdfs:subClassOf | \n",
- " None | \n",
- " UBERON:0019222 | \n",
- " digit 2, 3 or 4 | \n",
- "
\n",
- " \n",
- "
\n",
- "
64 rows × 6 columns
\n",
- "
"
- ],
- "text/plain": [
- " subject subject_label predicate \\\n",
- "0 UBERON:0001463 manual digit 1 BFO:0000050 \n",
- "1 UBERON:0001463 manual digit 1 BFO:0000050 \n",
- "2 UBERON:0001463 manual digit 1 BFO:0000050 \n",
- "3 UBERON:0001463 manual digit 1 BSPO:0001113 \n",
- "4 UBERON:0001463 manual digit 1 rdfs:subClassOf \n",
- ".. ... ... ... \n",
- "59 UBERON:0019231 manual digit 1 or 5 rdfs:subClassOf \n",
- "60 UBERON:0019231 manual digit 1 or 5 rdfs:subClassOf \n",
- "61 UBERON:0019232 manual digit 2, 3 or 4 BFO:0000050 \n",
- "62 UBERON:0019232 manual digit 2, 3 or 4 rdfs:subClassOf \n",
- "63 UBERON:0019232 manual digit 2, 3 or 4 rdfs:subClassOf \n",
- "\n",
- " predicate_label object \\\n",
- "0 part of UBERON:0002398 \n",
- "1 part of UBERON:0012141 \n",
- "2 part of UBERON:5001463 \n",
- "3 preaxialmost part of UBERON:0002398 \n",
- "4 None UBERON:0006048 \n",
- ".. ... ... \n",
- "59 None UBERON:0002389 \n",
- "60 None UBERON:0019221 \n",
- "61 part of UBERON:0002398 \n",
- "62 None UBERON:0002389 \n",
- "63 None UBERON:0019222 \n",
- "\n",
- " object_label \n",
- "0 manus \n",
- "1 manual digitopodium region \n",
- "2 manual digit 1 plus metapodial segment \n",
- "3 manus \n",
- "4 digit 1 \n",
- ".. ... \n",
- "59 manual digit \n",
- "60 digit 1 or 5 \n",
- "61 manus \n",
- "62 manual digit \n",
- "63 digit 2, 3 or 4 \n",
- "\n",
- "[64 rows x 6 columns]"
- ]
+ "text/plain": " subject subject_label predicate \\\n0 UBERON:0001463 manual digit 1 BFO:0000050 \n1 UBERON:0001463 manual digit 1 BFO:0000050 \n2 UBERON:0001463 manual digit 1 BFO:0000050 \n3 UBERON:0001463 manual digit 1 BSPO:0001113 \n4 UBERON:0001463 manual digit 1 rdfs:subClassOf \n.. ... ... ... \n59 UBERON:0019231 manual digit 1 or 5 rdfs:subClassOf \n60 UBERON:0019231 manual digit 1 or 5 rdfs:subClassOf \n61 UBERON:0019232 manual digit 2, 3 or 4 BFO:0000050 \n62 UBERON:0019232 manual digit 2, 3 or 4 rdfs:subClassOf \n63 UBERON:0019232 manual digit 2, 3 or 4 rdfs:subClassOf \n\n predicate_label object \\\n0 part of UBERON:0002398 \n1 part of UBERON:0012141 \n2 part of UBERON:5001463 \n3 preaxialmost part of UBERON:0002398 \n4 NaN UBERON:0006048 \n.. ... ... \n59 NaN UBERON:0002389 \n60 NaN UBERON:0019221 \n61 part of UBERON:0002398 \n62 NaN UBERON:0002389 \n63 NaN UBERON:0019222 \n\n object_label \n0 manus \n1 manual digitopodium region \n2 manual digit 1 plus metapodial segment \n3 manus \n4 digit 1 \n.. ... \n59 manual digit \n60 digit 1 or 5 \n61 manus \n62 manual digit \n63 digit 2, 3 or 4 \n\n[64 rows x 6 columns]",
+ "text/html": "\n\n
\n \n \n | \n subject | \n subject_label | \n predicate | \n predicate_label | \n object | \n object_label | \n
\n \n \n \n 0 | \n UBERON:0001463 | \n manual digit 1 | \n BFO:0000050 | \n part of | \n UBERON:0002398 | \n manus | \n
\n \n 1 | \n UBERON:0001463 | \n manual digit 1 | \n BFO:0000050 | \n part of | \n UBERON:0012141 | \n manual digitopodium region | \n
\n \n 2 | \n UBERON:0001463 | \n manual digit 1 | \n BFO:0000050 | \n part of | \n UBERON:5001463 | \n manual digit 1 plus metapodial segment | \n
\n \n 3 | \n UBERON:0001463 | \n manual digit 1 | \n BSPO:0001113 | \n preaxialmost part of | \n UBERON:0002398 | \n manus | \n
\n \n 4 | \n UBERON:0001463 | \n manual digit 1 | \n rdfs:subClassOf | \n NaN | \n UBERON:0006048 | \n digit 1 | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 59 | \n UBERON:0019231 | \n manual digit 1 or 5 | \n rdfs:subClassOf | \n NaN | \n UBERON:0002389 | \n manual digit | \n
\n \n 60 | \n UBERON:0019231 | \n manual digit 1 or 5 | \n rdfs:subClassOf | \n NaN | \n UBERON:0019221 | \n digit 1 or 5 | \n
\n \n 61 | \n UBERON:0019232 | \n manual digit 2, 3 or 4 | \n BFO:0000050 | \n part of | \n UBERON:0002398 | \n manus | \n
\n \n 62 | \n UBERON:0019232 | \n manual digit 2, 3 or 4 | \n rdfs:subClassOf | \n NaN | \n UBERON:0002389 | \n manual digit | \n
\n \n 63 | \n UBERON:0019232 | \n manual digit 2, 3 or 4 | \n rdfs:subClassOf | \n NaN | \n UBERON:0019222 | \n digit 2, 3 or 4 | \n
\n \n
\n
64 rows × 6 columns
\n
"
},
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -488,34 +399,44 @@
"id": "9611f196",
"metadata": {},
"source": [
- "## Entailments"
+ "## Entailments\n",
+ "\n",
+ "Next we will look at [Entailed](https://incatools.github.io/ontology-access-kit/glossary.html#term-Entailed) relationships.\n",
+ "\n",
+ "You are encouraged to consult the OAK guide and glossary here but the basic idea is that entailed relationships\n",
+ "encompasses \"walking up\" the relationship graph, following a specified set of [predicates](https://incatools.github.io/ontology-access-kit/glossary.html#term-Predicate).\n",
+ "\n",
+ "First we'll look at the is-a ancestors of `finger`. Note the results here should be the same as using the `ancestors` command:"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"id": "38a93c7d",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:12:50.442368Z",
+ "start_time": "2024-04-19T19:12:48.002563Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000001\tentity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000002\tcontinuant\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000004\tindependent continuant\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000040\tmaterial entity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tCARO:0000000\tanatomical entity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tCARO:0030000\tbiological entity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000061\tanatomical structure\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000465\tmaterial anatomical entity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000475\torganism subdivision\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0001062\tanatomical entity\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0005881\tautopodial extension\n",
- "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0010000\tmulticellular anatomical structure\n"
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000001\tentity\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000002\tcontinuant\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000004\tindependent continuant\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tBFO:0000040\tmaterial entity\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000061\tanatomical structure\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000465\tmaterial anatomical entity\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0000475\torganism subdivision\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0001062\tanatomical entity\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002389\tmanual digit\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0002544\tdigit\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0005881\tautopodial extension\r\n",
+ "UBERON:0002389\tmanual digit\trdfs:subClassOf\tNone\tUBERON:0010000\tmulticellular anatomical structure\r\n"
]
}
],
@@ -523,31 +444,280 @@
"uberon relationships finger --include-entailed -p i"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Next we'll include a wider range of predicates. We'll also switch our example to be `trigeminal ganglion`"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "a94e156d1089947"
+ },
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 18,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tBFO:0000001\tentity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tBFO:0000002\tcontinuant\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tBFO:0000004\tindependent continuant\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tBFO:0000040\tmaterial entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tRO:0002577\tsystem\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000033\thead\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000061\tanatomical structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000153\tanterior region of body\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000465\tmaterial anatomical entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000467\tanatomical system\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000468\tmulticellular organism\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000475\torganism subdivision\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0001016\tnervous system\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0001062\tanatomical entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0007811\tcraniocervical region\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0010000\tmulticellular anatomical structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0011676\tsubdivision of organism along main body axis\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0013701\tmain body axis\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0013702\tbody proper\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tBFO:0000001\tentity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tBFO:0000002\tcontinuant\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tBFO:0000004\tindependent continuant\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tBFO:0000040\tmaterial entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0000045\tganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0000061\tanatomical structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0000465\tmaterial anatomical entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001062\tanatomical entity\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001675\ttrigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001714\tcranial ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001800\tsensory ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0004121\tectoderm-derived structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0010313\tneural crest-derived structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0010314\tstructure with developmental contribution from neural crest\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "uberon relationships \"trigeminal ganglion\" --include-entailed -p i,p"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:26:17.932863Z",
+ "start_time": "2024-04-19T19:26:15.284612Z"
+ }
+ },
+ "id": "55d23e285bea8695"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We can see if the query above we get a lot of entailed relationships! Usually we wouldn't show this as a table to a user - instead we might use the `viz` command to show all individual direct relationships for all ancestors."
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "788270cee6bad9ca"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "outputs": [],
+ "source": [
+ "uberon viz -p i,p \"trigeminal ganglion\" -o output/trigeminal-ganglion-graph.png"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-04-19T19:27:47.848838Z",
+ "start_time": "2024-04-19T19:27:41.902477Z"
+ }
+ },
+ "id": "60354b36cc783e26"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "![img](output/trigeminal-ganglion-graph.png)"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "3fdad80da90a38aa"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "This is a standard way of communicating a complex bundle of relationships. But is there a way of getting the *non-redundant* informative entailed relationships in a more concise way?"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "30177ec6b27b30c0"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Non-redundant entailed relationships\n",
+ "\n",
+ "Is there a way to get the most relevant information in a more concise way, as a table.\n",
+ "\n",
+ "Let's consider the term \"trigeminal ganglion\" again. Let's look at direct relationships"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "914b45da1e52ef8d"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
"id": "767d220d",
- "metadata": {},
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-19T22:09:26.325020Z",
+ "start_time": "2024-04-19T22:09:22.451109Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "RO:0002131 ! overlaps\r\n"
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001714\tcranial ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001800\tsensory ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0010313\tneural crest-derived structure\r\n"
]
}
],
"source": [
- "uberon info overlaps"
+ "uberon relationships uberon relationships \"trigeminal ganglion\" -p i,p"
]
},
+ {
+ "cell_type": "markdown",
+ "source": [
+ "These are all correct but don't tell us what this ganglion is a part of. Using the `--include-entailed` option above gives **too much** information.\n",
+ "\n",
+ "OAK now has a `--non-redundant-entailed` option which effectively \"rolls down\" the entailed relationships for each predicate:"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "cd462f9b9e3f413c"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tRO:0002577\tsystem\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0000033\thead\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tUBERON:0001016\tnervous system\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001714\tcranial ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001800\tsensory ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0010313\tneural crest-derived structure\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "uberon relationships uberon relationships --non-redundant-entailed \"trigeminal ganglion\" -p i,p"
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-04-19T22:09:41.578453Z",
+ "start_time": "2024-04-19T22:09:38.750423Z"
+ }
+ },
+ "id": "23cee5fd3df60b5c"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Note that even though 3 part-parents are provided, these are all technically non-redundant, as they are all \"proper\" overlaps (the `system` term is odd, but this is an artefact of RO imports, and in fact uberon doesn't place 'nervous system' under 'system')"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "64031e20457ec269"
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "We can do this for other relationships too:"
+ ],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "a550c9cc75bb8043"
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "subject\tsubject_label\tpredicate\tpredicate_label\tobject\tobject_label\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000050\tpart of\tRO:0002577\tsystem\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tBFO:0000051\thas part\tUBERON:0003714\tneural tissue\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002131\toverlaps\tRO:0002577\tsystem\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002131\toverlaps\tUBERON:0003714\tneural tissue\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002162\tin taxon\tNCBITaxon:7742\tVertebrata \r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002170\tconnected to\tUBERON:0001027\tsensory nerve\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002170\tconnected to\tUBERON:0001645\ttrigeminal nerve\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002202\tdevelops from\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002207\tdirectly develops from\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002225\tdevelops from part of\tUBERON:0000922\tembryo\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002254\thas developmental contribution from\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002258\tdevelopmentally preceded by\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002328\tfunctionally related to\tGO:0019226\ttransmission of nerve impulse\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002329\tpart of structure that is capable of\tGO:0050877\tnervous system process\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002473\tcomposed primarily of\tUBERON:0003714\tneural tissue\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002494\ttransformation of\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002495\timmediate transformation of\tUBERON:0006304\tfuture trigeminal ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002496\texistence starts during or after\tUBERON:0000110\tneurula stage\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002496\texistence starts during or after\tUBERON:0000111\torganogenesis stage\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002497\texistence ends during or before\tUBERON:0000066\tfully formed stage\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tRO:0002584\thas part structure that is capable of\tGO:0019226\ttransmission of nerve impulse\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001714\tcranial ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0001800\tsensory ganglion\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\trdfs:subClassOf\tNone\tUBERON:0010313\tneural crest-derived structure\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tuberon/core#extends:fibers_into\tNone\tUBERON:0001027\tsensory nerve\r\n",
+ "UBERON:0001675\ttrigeminal ganglion\tuberon/core#extends:fibers_into\tNone\tUBERON:0001645\ttrigeminal nerve\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "uberon relationships uberon relationships --non-redundant-entailed \"trigeminal ganglion\" "
+ ],
+ "metadata": {
+ "collapsed": false,
+ "ExecuteTime": {
+ "end_time": "2024-04-19T22:14:43.961052Z",
+ "start_time": "2024-04-19T22:14:40.508242Z"
+ }
+ },
+ "id": "ffccb89f65f605c3"
+ },
{
"cell_type": "code",
"execution_count": null,
- "id": "1f51f46a",
- "metadata": {},
"outputs": [],
- "source": []
+ "source": [],
+ "metadata": {
+ "collapsed": false
+ },
+ "id": "5dd76e24bdc3fea2"
}
],
"metadata": {
diff --git a/notebooks/Commands/ValidateDefinitions.ipynb b/notebooks/Commands/ValidateDefinitions.ipynb
index ee56f95d7..1852030a3 100644
--- a/notebooks/Commands/ValidateDefinitions.ipynb
+++ b/notebooks/Commands/ValidateDefinitions.ipynb
@@ -19,7 +19,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 16,
"id": "c223f678-f82f-4b06-8e19-1a5b7323e571",
"metadata": {
"ExecuteTime": {
@@ -32,64 +32,64 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Usage: runoak validate-definitions [OPTIONS] [TERMS]...\r\n",
- "\r\n",
- " Checks presence and structure of text definitions.\r\n",
- "\r\n",
- " To run:\r\n",
- "\r\n",
- " runoak validate-definitions -i db/uberon.db -o results.tsv\r\n",
- "\r\n",
- " By default this will apply basic text mining of text definitions to check\r\n",
- " against machine actionable OBO text definition guideline rules. This can\r\n",
- " result in an initial lag - to skip this, and ONLY perform checks for\r\n",
- " *presence* of definitions, use --skip-text-annotation:\r\n",
- "\r\n",
- " Example: -------\r\n",
- "\r\n",
- " runoak validate-definitions -i db/uberon.db --skip-text-annotation\r\n",
- "\r\n",
- " Like most OAK commands, this accepts lists of terms or term queries as\r\n",
- " arguments. You can pass in a CURIE list to selectively validate individual\r\n",
- " classes\r\n",
- "\r\n",
- " Example: -------\r\n",
- "\r\n",
- " runoak validate-definitions -i db/cl.db CL:0002053\r\n",
- "\r\n",
- " Only on CL identifiers:\r\n",
- "\r\n",
- " runoak validate-definitions -i db/cl.db i^CL:\r\n",
- "\r\n",
- " Only on neuron hierarchy:\r\n",
- "\r\n",
- " runoak validate-definitions -i db/cl.db .desc//p=i neuron\r\n",
- "\r\n",
- " Output format:\r\n",
- "\r\n",
- " This command emits objects conforming to the OAK validation datamodel. See\r\n",
- " https://incatools.github.io/ontology-access-kit/datamodels for more on OAK\r\n",
- " datamodels.\r\n",
- "\r\n",
- " The default serialization of the datamodel is CSV.\r\n",
- "\r\n",
- " Notes: -----\r\n",
- "\r\n",
- " This command is largely redundant with the validate command, but is useful\r\n",
- " for targeted validation focused solely on definitions\r\n",
- "\r\n",
- "Options:\r\n",
- " --skip-text-annotation / --no-skip-text-annotation\r\n",
- " If true, do not parse text annotations\r\n",
- " [default: no-skip-text-annotation]\r\n",
- " -C, --configuration-file TEXT Path to a configuration file. This is\r\n",
- " typically a YAML file, but may be a JSON\r\n",
- " file\r\n",
- " --adapter-mapping TEXT Multiple prefix=selector pairs, e.g.\r\n",
- " --adapter-mapping uberon=db/uberon.db\r\n",
- " -O, --output-type TEXT Desired output type\r\n",
- " -o, --output FILENAME Output file, e.g. obo file\r\n",
- " --help Show this message and exit.\r\n"
+ "Usage: runoak validate-definitions [OPTIONS] [TERMS]...\n",
+ "\n",
+ " Checks presence and structure of text definitions.\n",
+ "\n",
+ " To run:\n",
+ "\n",
+ " runoak validate-definitions -i db/uberon.db -o results.tsv\n",
+ "\n",
+ " By default this will apply basic text mining of text definitions to check\n",
+ " against machine actionable OBO text definition guideline rules. This can\n",
+ " result in an initial lag - to skip this, and ONLY perform checks for\n",
+ " *presence* of definitions, use --skip-text-annotation:\n",
+ "\n",
+ " Example: -------\n",
+ "\n",
+ " runoak validate-definitions -i db/uberon.db --skip-text-annotation\n",
+ "\n",
+ " Like most OAK commands, this accepts lists of terms or term queries as\n",
+ " arguments. You can pass in a CURIE list to selectively validate individual\n",
+ " classes\n",
+ "\n",
+ " Example: -------\n",
+ "\n",
+ " runoak validate-definitions -i db/cl.db CL:0002053\n",
+ "\n",
+ " Only on CL identifiers:\n",
+ "\n",
+ " runoak validate-definitions -i db/cl.db i^CL:\n",
+ "\n",
+ " Only on neuron hierarchy:\n",
+ "\n",
+ " runoak validate-definitions -i db/cl.db .desc//p=i neuron\n",
+ "\n",
+ " Output format:\n",
+ "\n",
+ " This command emits objects conforming to the OAK validation datamodel. See\n",
+ " https://incatools.github.io/ontology-access-kit/datamodels for more on OAK\n",
+ " datamodels.\n",
+ "\n",
+ " The default serialization of the datamodel is CSV.\n",
+ "\n",
+ " Notes: -----\n",
+ "\n",
+ " This command is largely redundant with the validate command, but is useful\n",
+ " for targeted validation focused solely on definitions\n",
+ "\n",
+ "Options:\n",
+ " --skip-text-annotation / --no-skip-text-annotation\n",
+ " If true, do not parse text annotations\n",
+ " [default: no-skip-text-annotation]\n",
+ " -C, --configuration-file TEXT Path to a configuration file. This is\n",
+ " typically a YAML file, but may be a JSON\n",
+ " file\n",
+ " --adapter-mapping TEXT Multiple prefix=selector pairs, e.g.\n",
+ " --adapter-mapping uberon=db/uberon.db\n",
+ " -O, --output-type TEXT Desired output type\n",
+ " -o, --output FILENAME Output file, e.g. obo file\n",
+ " --help Show this message and exit.\n"
]
}
],
@@ -111,7 +111,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 17,
"id": "c9b86e52-87a7-449c-baac-81981e7ce632",
"metadata": {
"ExecuteTime": {
@@ -121,11 +121,18 @@
},
"outputs": [],
"source": [
- "!runoak --stacktrace -i simpleobo:input/validate-defs-test.obo validate-definitions -C input/validate-definition-conf.yaml .desc//p=i \"cellular_component\" -o output/validate-definitions.output.tsv"
+ "!runoak -i simpleobo:input/validate-defs-test.obo validate-definitions -C input/validate-definition-conf.yaml .desc//p=i \"cellular_component\" -o output/validate-definitions.output.tsv"
]
},
{
"cell_type": "markdown",
+ "id": "27c1668fc8d1a8de",
+ "metadata": {
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"source": [
"The output is a TSV file with a summary of the issues found.\n",
"\n",
@@ -134,15 +141,11 @@
"\n",
"If you were actually using this on the command line you may prefer to use your own TSV processing tools,\n",
"or to simply load into google sheets."
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "27c1668fc8d1a8de"
+ ]
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 18,
"id": "5fc9b15d-cc81-400a-8660-f92491baa120",
"metadata": {
"ExecuteTime": {
@@ -153,10 +156,665 @@
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n0 oaklib.om:DCC#Any GO:0005634 nucleus \n1 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n2 oaklib.om:DCC#S11 GO:0043227 membrane-bounded organelle \n3 oaklib.om:DCC#Any GO:0110165 cellular anatomical entity \n4 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n5 oaklib.om:DCC#S1 GO:0005737 cytoplasm \n6 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n7 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n8 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n9 oaklib.om:DCC#Any GO:0005886 plasma membrane \n10 oaklib.om:DCC#Any GO:0098590 plasma membrane region \n11 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n12 oaklib.om:DCC#S1 GO:0005773 vacuole \n13 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n14 oaklib.om:DCC#S3 GO:0031975 envelope \n15 oaklib.om:DCC#S3 GO:0005575 cellular_component \n16 oaklib.om:DCC#S11 GO:0031090 organelle membrane \n17 oaklib.om:DCC#Any GO:0034357 photosynthetic membrane \n18 oaklib.om:DCC#S11 GO:0031965 nuclear membrane \n19 oaklib.om:DCC#S11 GO:0005938 cell cortex \n20 oaklib.om:DCC#S11 GO:0005938 cell cortex \n21 oaklib.om:DCC#S0 GO:0012505 endomembrane system \n22 oaklib.om:DCC#S7 GO:0009579 thylakoid \n23 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n24 oaklib.om:DCC#S3 GO:0043226 organelle \n25 oaklib.om:DCC#S3 GO:0071944 cell periphery \n26 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n27 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n28 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n29 oaklib.om:DCC#S3 GO:0016020 membrane \n30 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n31 oaklib.om:DCC#S11 GO:0099738 cell cortex region \n32 oaklib.om:DCC#Any GO:0005635 nuclear envelope \n33 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n\n severity instantiates predicate object \\\n0 INFO NaN IAO:0000115 NaN \n1 WARNING NaN IAO:0000115 NaN \n2 NaN NaN IAO:0000115 NaN \n3 INFO NaN IAO:0000115 NaN \n4 WARNING NaN IAO:0000115 NaN \n5 NaN NaN IAO:0000115 NaN \n6 WARNING NaN IAO:0000115 NaN \n7 NaN NaN IAO:0000115 NaN \n8 NaN NaN IAO:0000115 NaN \n9 INFO NaN IAO:0000115 NaN \n10 INFO NaN IAO:0000115 NaN \n11 WARNING NaN IAO:0000115 NaN \n12 NaN NaN IAO:0000115 NaN \n13 WARNING NaN IAO:0000115 NaN \n14 WARNING NaN IAO:0000115 NaN \n15 WARNING NaN IAO:0000115 NaN \n16 NaN NaN IAO:0000115 NaN \n17 INFO NaN IAO:0000115 NaN \n18 NaN NaN IAO:0000115 NaN \n19 NaN NaN IAO:0000115 NaN \n20 NaN NaN IAO:0000115 NaN \n21 ERROR NaN IAO:0000115 NaN \n22 NaN NaN IAO:0000115 NaN \n23 WARNING NaN IAO:0000115 NaN \n24 WARNING NaN IAO:0000115 NaN \n25 WARNING NaN IAO:0000115 NaN \n26 WARNING NaN IAO:0000115 NaN \n27 NaN NaN IAO:0000115 NaN \n28 NaN NaN IAO:0000115 NaN \n29 WARNING NaN IAO:0000115 NaN \n30 NaN NaN IAO:0000115 NaN \n31 NaN NaN IAO:0000115 NaN \n32 INFO NaN IAO:0000115 NaN \n33 WARNING NaN IAO:0000115 NaN \n34 ERROR NaN IAO:0000115 PMID:9999999999999 \n35 ERROR NaN IAO:0000115 PMID:19717156 \n\n object_str source \\\n0 A membrane-bounded organelle of eukaryotic cel... NaN \n1 Organized structure of distinctive morphology ... NaN \n2 NaN NaN \n3 A part of a cellular organism that is either a... NaN \n4 Any (proper) part of the cytoplasm of a single... NaN \n5 NaN NaN \n6 Organized structure of distinctive morphology ... NaN \n7 NaN NaN \n8 NaN NaN \n9 The membrane surrounding a cell that separates... NaN \n10 A membrane that is a (regional) part of the pl... NaN \n11 fake definition to test retracted typo in refe... NaN \n12 NaN NaN \n13 fake definition to test retracted reference NaN \n14 A multilayered structure surrounding all or pa... NaN \n15 A location, relative to cellular compartments ... NaN \n16 is one of the two lipid bilayers of an organel... NaN \n17 A membrane enriched in complexes formed of rea... NaN \n18 envelope NaN \n19 region of a cell NaN \n20 lies just beneath the plasma membrane and ofte... NaN \n21 NaN NaN \n22 The structure in a plant cell that is known as... NaN \n23 A double membrane structure enclosing an organ... NaN \n24 Organized structure of distinctive morphology ... NaN \n25 The part of a cell encompassing the cell corte... NaN \n26 Organized structure of distinctive morphology ... NaN \n27 NaN NaN \n28 NaN NaN \n29 A lipid bilayer along with all the proteins an... NaN \n30 complete extent of cell cortex NaN \n31 underlies some some region of the plasma membrane NaN \n32 A double lipid bilayer that is part of the nuc... NaN \n33 A component of a cell contained within (but no... NaN \n34 NaN NaN \n35 NaN NaN \n\n info \n0 No problems with definition \n1 Cannot parse genus and differentia \n2 Logical definition element not found in text: ... \n3 No problems with definition \n4 Cannot parse genus and differentia \n5 Definiendum should not appear at the start \n6 Cannot parse genus and differentia \n7 Logical definition element not found in text: ... \n8 Logical definition element not found in text: ... \n9 No problems with definition \n10 No problems with definition \n11 Cannot parse genus and differentia \n12 Definiendum should not appear at the start \n13 Cannot parse genus and differentia \n14 Cannot parse genus and differentia \n15 Cannot parse genus and differentia \n16 Logical definition element not found in text: ... \n17 No problems with definition \n18 Logical definition element not found in text: ... \n19 Logical definition element not found in text: ... \n20 Logical definition element not found in text: ... \n21 Missing text definition \n22 Circular, thylakoid (GO:0009579 in definition \n23 Cannot parse genus and differentia \n24 Cannot parse genus and differentia \n25 Cannot parse genus and differentia \n26 Cannot parse genus and differentia \n27 Logical definition element not found in text: ... \n28 Logical definition element not found in text: ... \n29 Cannot parse genus and differentia \n30 Did not match whole text: cell cortex < comple... \n31 Wrong position, 'cell cortex' not in 'underlie... \n32 No problems with definition \n33 Cannot parse genus and differentia \n34 publication not found: PMID:9999999999999 \n35 publication is retracted: A role for plasma tr... ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n severity | \n instantiates | \n predicate | \n object | \n object_str | \n source | \n info | \n
\n \n \n \n 0 | \n oaklib.om:DCC#Any | \n GO:0005634 | \n nucleus | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n A membrane-bounded organelle of eukaryotic cel... | \n NaN | \n No problems with definition | \n
\n \n 1 | \n oaklib.om:DCC#S3 | \n GO:0043227 | \n membrane-bounded organelle | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n Organized structure of distinctive morphology ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 2 | \n oaklib.om:DCC#S11 | \n GO:0043227 | \n membrane-bounded organelle | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 3 | \n oaklib.om:DCC#Any | \n GO:0110165 | \n cellular anatomical entity | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n A part of a cellular organism that is either a... | \n NaN | \n No problems with definition | \n
\n \n 4 | \n oaklib.om:DCC#S3 | \n GO:0099568 | \n cytoplasmic region | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n Any (proper) part of the cytoplasm of a single... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 5 | \n oaklib.om:DCC#S1 | \n GO:0005737 | \n cytoplasm | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Definiendum should not appear at the start | \n
\n \n 6 | \n oaklib.om:DCC#S3 | \n GO:0043229 | \n intracellular organelle | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n Organized structure of distinctive morphology ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 7 | \n oaklib.om:DCC#S11 | \n GO:0043229 | \n intracellular organelle | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 8 | \n oaklib.om:DCC#S11 | \n GO:0043229 | \n intracellular organelle | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 9 | \n oaklib.om:DCC#Any | \n GO:0005886 | \n plasma membrane | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n The membrane surrounding a cell that separates... | \n NaN | \n No problems with definition | \n
\n \n 10 | \n oaklib.om:DCC#Any | \n GO:0098590 | \n plasma membrane region | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n A membrane that is a (regional) part of the pl... | \n NaN | \n No problems with definition | \n
\n \n 11 | \n oaklib.om:DCC#S3 | \n GO:9999998 | \n fake term for testing pmid type | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n fake definition to test retracted typo in refe... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 12 | \n oaklib.om:DCC#S1 | \n GO:0005773 | \n vacuole | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Definiendum should not appear at the start | \n
\n \n 13 | \n oaklib.om:DCC#S3 | \n GO:9999999 | \n fake term for testing retraction | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n fake definition to test retracted reference | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 14 | \n oaklib.om:DCC#S3 | \n GO:0031975 | \n envelope | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n A multilayered structure surrounding all or pa... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 15 | \n oaklib.om:DCC#S3 | \n GO:0005575 | \n cellular_component | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n A location, relative to cellular compartments ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 16 | \n oaklib.om:DCC#S11 | \n GO:0031090 | \n organelle membrane | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n is one of the two lipid bilayers of an organel... | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 17 | \n oaklib.om:DCC#Any | \n GO:0034357 | \n photosynthetic membrane | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n A membrane enriched in complexes formed of rea... | \n NaN | \n No problems with definition | \n
\n \n 18 | \n oaklib.om:DCC#S11 | \n GO:0031965 | \n nuclear membrane | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n envelope | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 19 | \n oaklib.om:DCC#S11 | \n GO:0005938 | \n cell cortex | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n region of a cell | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 20 | \n oaklib.om:DCC#S11 | \n GO:0005938 | \n cell cortex | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n lies just beneath the plasma membrane and ofte... | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 21 | \n oaklib.om:DCC#S0 | \n GO:0012505 | \n endomembrane system | \n ERROR | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Missing text definition | \n
\n \n 22 | \n oaklib.om:DCC#S7 | \n GO:0009579 | \n thylakoid | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n The structure in a plant cell that is known as... | \n NaN | \n Circular, thylakoid (GO:0009579 in definition | \n
\n \n 23 | \n oaklib.om:DCC#S3 | \n GO:0031967 | \n organelle envelope | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n A double membrane structure enclosing an organ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 24 | \n oaklib.om:DCC#S3 | \n GO:0043226 | \n organelle | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n Organized structure of distinctive morphology ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 25 | \n oaklib.om:DCC#S3 | \n GO:0071944 | \n cell periphery | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n The part of a cell encompassing the cell corte... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 26 | \n oaklib.om:DCC#S3 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n Organized structure of distinctive morphology ... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 27 | \n oaklib.om:DCC#S11 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 28 | \n oaklib.om:DCC#S11 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n NaN | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 29 | \n oaklib.om:DCC#S3 | \n GO:0016020 | \n membrane | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n A lipid bilayer along with all the proteins an... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 30 | \n oaklib.om:DCC#S3 | \n GO:0099738 | \n cell cortex region | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n complete extent of cell cortex | \n NaN | \n Did not match whole text: cell cortex < comple... | \n
\n \n 31 | \n oaklib.om:DCC#S11 | \n GO:0099738 | \n cell cortex region | \n NaN | \n NaN | \n IAO:0000115 | \n NaN | \n underlies some some region of the plasma membrane | \n NaN | \n Wrong position, 'cell cortex' not in 'underlie... | \n
\n \n 32 | \n oaklib.om:DCC#Any | \n GO:0005635 | \n nuclear envelope | \n INFO | \n NaN | \n IAO:0000115 | \n NaN | \n A double lipid bilayer that is part of the nuc... | \n NaN | \n No problems with definition | \n
\n \n 33 | \n oaklib.om:DCC#S3 | \n GO:0005622 | \n intracellular anatomical structure | \n WARNING | \n NaN | \n IAO:0000115 | \n NaN | \n A component of a cell contained within (but no... | \n NaN | \n Cannot parse genus and differentia | \n
\n \n 34 | \n oaklib.om:DCC#S20.1 | \n GO:9999998 | \n fake term for testing pmid type | \n ERROR | \n NaN | \n IAO:0000115 | \n PMID:9999999999999 | \n NaN | \n NaN | \n publication not found: PMID:9999999999999 | \n
\n \n 35 | \n oaklib.om:DCC#S20.2 | \n GO:9999999 | \n fake term for testing retraction | \n ERROR | \n NaN | \n IAO:0000115 | \n PMID:19717156 | \n NaN | \n NaN | \n publication is retracted: A role for plasma tr... | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " severity | \n",
+ " instantiates | \n",
+ " predicate | \n",
+ " object | \n",
+ " object_str | \n",
+ " source | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099568 | \n",
+ " cytoplasmic region | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " Any (proper) part of the cytoplasm of a single... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099738 | \n",
+ " cell cortex region | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " complete extent of cell cortex | \n",
+ " NaN | \n",
+ " Did not match whole text: cell cortex < comple... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0099738 | \n",
+ " cell cortex region | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " underlies some some region of the plasma membrane | \n",
+ " NaN | \n",
+ " Wrong position, 'cell cortex' not in 'underlie... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0071944 | \n",
+ " cell periphery | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " The part of a cell encompassing the cell corte... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0031090 | \n",
+ " organelle membrane | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " is one of the two lipid bilayers of an organel... | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031967 | \n",
+ " organelle envelope | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A double membrane structure enclosing an organ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031975 | \n",
+ " envelope | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A multilayered structure surrounding all or pa... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0098590 | \n",
+ " plasma membrane region | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A membrane that is a (regional) part of the pl... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " oaklib.om:DCC#S0 | \n",
+ " GO:0012505 | \n",
+ " endomembrane system | \n",
+ " ERROR | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Missing text definition | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005622 | \n",
+ " intracellular anatomical structure | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A component of a cell contained within (but no... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043227 | \n",
+ " membrane-bounded organelle | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043227 | \n",
+ " membrane-bounded organelle | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0005938 | \n",
+ " cell cortex | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " region of a cell | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0005938 | \n",
+ " cell cortex | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " lies just beneath the plasma membrane and ofte... | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " oaklib.om:DCC#S7 | \n",
+ " GO:0009579 | \n",
+ " thylakoid | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " The structure in a plant cell that is known as... | \n",
+ " NaN | \n",
+ " Circular, thylakoid (GO:0009579 in definition | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " fake definition to test retracted reference | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005575 | \n",
+ " cellular_component | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A location, relative to cellular compartments ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005634 | \n",
+ " nucleus | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A membrane-bounded organelle of eukaryotic cel... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0016020 | \n",
+ " membrane | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A lipid bilayer along with all the proteins an... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0110165 | \n",
+ " cellular anatomical entity | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A part of a cellular organism that is either a... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005635 | \n",
+ " nuclear envelope | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A double lipid bilayer that is part of the nuc... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005886 | \n",
+ " plasma membrane | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " The membrane surrounding a cell that separates... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005773 | \n",
+ " vacuole | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0031965 | \n",
+ " nuclear membrane | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " envelope | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005737 | \n",
+ " cytoplasm | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0034357 | \n",
+ " photosynthetic membrane | \n",
+ " INFO | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " A membrane enriched in complexes formed of rea... | \n",
+ " NaN | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043226 | \n",
+ " organelle | \n",
+ " WARNING | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " NaN | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " NaN | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " oaklib.om:DCC#S20.1 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " ERROR | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " PMID:9999999999999 | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " NaN | \n",
+ " publication not found: PMID:9999999999999 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " oaklib.om:DCC#S20.2 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " ERROR | \n",
+ " NaN | \n",
+ " IAO:0000115 | \n",
+ " PMID:19717156 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " publication is retracted: A role for plasma tr... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "0 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n",
+ "1 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n",
+ "2 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n",
+ "3 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n",
+ "4 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n",
+ "5 oaklib.om:DCC#S11 GO:0099738 cell cortex region \n",
+ "6 oaklib.om:DCC#S3 GO:0071944 cell periphery \n",
+ "7 oaklib.om:DCC#S11 GO:0031090 organelle membrane \n",
+ "8 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n",
+ "9 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n",
+ "10 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n",
+ "11 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n",
+ "12 oaklib.om:DCC#S3 GO:0031975 envelope \n",
+ "13 oaklib.om:DCC#Any GO:0098590 plasma membrane region \n",
+ "14 oaklib.om:DCC#S0 GO:0012505 endomembrane system \n",
+ "15 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n",
+ "16 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n",
+ "17 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n",
+ "18 oaklib.om:DCC#S11 GO:0043227 membrane-bounded organelle \n",
+ "19 oaklib.om:DCC#S11 GO:0005938 cell cortex \n",
+ "20 oaklib.om:DCC#S11 GO:0005938 cell cortex \n",
+ "21 oaklib.om:DCC#S7 GO:0009579 thylakoid \n",
+ "22 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n",
+ "23 oaklib.om:DCC#S3 GO:0005575 cellular_component \n",
+ "24 oaklib.om:DCC#Any GO:0005634 nucleus \n",
+ "25 oaklib.om:DCC#S3 GO:0016020 membrane \n",
+ "26 oaklib.om:DCC#Any GO:0110165 cellular anatomical entity \n",
+ "27 oaklib.om:DCC#Any GO:0005635 nuclear envelope \n",
+ "28 oaklib.om:DCC#Any GO:0005886 plasma membrane \n",
+ "29 oaklib.om:DCC#S1 GO:0005773 vacuole \n",
+ "30 oaklib.om:DCC#S11 GO:0031965 nuclear membrane \n",
+ "31 oaklib.om:DCC#S1 GO:0005737 cytoplasm \n",
+ "32 oaklib.om:DCC#Any GO:0034357 photosynthetic membrane \n",
+ "33 oaklib.om:DCC#S3 GO:0043226 organelle \n",
+ "34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n",
+ "35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n",
+ "\n",
+ " severity instantiates predicate object \\\n",
+ "0 WARNING NaN IAO:0000115 NaN \n",
+ "1 NaN NaN IAO:0000115 NaN \n",
+ "2 NaN NaN IAO:0000115 NaN \n",
+ "3 WARNING NaN IAO:0000115 NaN \n",
+ "4 NaN NaN IAO:0000115 NaN \n",
+ "5 NaN NaN IAO:0000115 NaN \n",
+ "6 WARNING NaN IAO:0000115 NaN \n",
+ "7 NaN NaN IAO:0000115 NaN \n",
+ "8 WARNING NaN IAO:0000115 NaN \n",
+ "9 NaN NaN IAO:0000115 NaN \n",
+ "10 NaN NaN IAO:0000115 NaN \n",
+ "11 WARNING NaN IAO:0000115 NaN \n",
+ "12 WARNING NaN IAO:0000115 NaN \n",
+ "13 INFO NaN IAO:0000115 NaN \n",
+ "14 ERROR NaN IAO:0000115 NaN \n",
+ "15 WARNING NaN IAO:0000115 NaN \n",
+ "16 WARNING NaN IAO:0000115 NaN \n",
+ "17 WARNING NaN IAO:0000115 NaN \n",
+ "18 NaN NaN IAO:0000115 NaN \n",
+ "19 NaN NaN IAO:0000115 NaN \n",
+ "20 NaN NaN IAO:0000115 NaN \n",
+ "21 NaN NaN IAO:0000115 NaN \n",
+ "22 WARNING NaN IAO:0000115 NaN \n",
+ "23 WARNING NaN IAO:0000115 NaN \n",
+ "24 INFO NaN IAO:0000115 NaN \n",
+ "25 WARNING NaN IAO:0000115 NaN \n",
+ "26 INFO NaN IAO:0000115 NaN \n",
+ "27 INFO NaN IAO:0000115 NaN \n",
+ "28 INFO NaN IAO:0000115 NaN \n",
+ "29 NaN NaN IAO:0000115 NaN \n",
+ "30 NaN NaN IAO:0000115 NaN \n",
+ "31 NaN NaN IAO:0000115 NaN \n",
+ "32 INFO NaN IAO:0000115 NaN \n",
+ "33 WARNING NaN IAO:0000115 NaN \n",
+ "34 ERROR NaN IAO:0000115 PMID:9999999999999 \n",
+ "35 ERROR NaN IAO:0000115 PMID:19717156 \n",
+ "\n",
+ " object_str source \\\n",
+ "0 Organized structure of distinctive morphology ... NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 Any (proper) part of the cytoplasm of a single... NaN \n",
+ "4 complete extent of cell cortex NaN \n",
+ "5 underlies some some region of the plasma membrane NaN \n",
+ "6 The part of a cell encompassing the cell corte... NaN \n",
+ "7 is one of the two lipid bilayers of an organel... NaN \n",
+ "8 Organized structure of distinctive morphology ... NaN \n",
+ "9 NaN NaN \n",
+ "10 NaN NaN \n",
+ "11 A double membrane structure enclosing an organ... NaN \n",
+ "12 A multilayered structure surrounding all or pa... NaN \n",
+ "13 A membrane that is a (regional) part of the pl... NaN \n",
+ "14 NaN NaN \n",
+ "15 A component of a cell contained within (but no... NaN \n",
+ "16 fake definition to test retracted typo in refe... NaN \n",
+ "17 Organized structure of distinctive morphology ... NaN \n",
+ "18 NaN NaN \n",
+ "19 region of a cell NaN \n",
+ "20 lies just beneath the plasma membrane and ofte... NaN \n",
+ "21 The structure in a plant cell that is known as... NaN \n",
+ "22 fake definition to test retracted reference NaN \n",
+ "23 A location, relative to cellular compartments ... NaN \n",
+ "24 A membrane-bounded organelle of eukaryotic cel... NaN \n",
+ "25 A lipid bilayer along with all the proteins an... NaN \n",
+ "26 A part of a cellular organism that is either a... NaN \n",
+ "27 A double lipid bilayer that is part of the nuc... NaN \n",
+ "28 The membrane surrounding a cell that separates... NaN \n",
+ "29 NaN NaN \n",
+ "30 envelope NaN \n",
+ "31 NaN NaN \n",
+ "32 A membrane enriched in complexes formed of rea... NaN \n",
+ "33 Organized structure of distinctive morphology ... NaN \n",
+ "34 fake definition to test retracted typo in refe... NaN \n",
+ "35 NaN NaN \n",
+ "\n",
+ " info \n",
+ "0 Cannot parse genus and differentia \n",
+ "1 Logical definition element not found in text: ... \n",
+ "2 Logical definition element not found in text: ... \n",
+ "3 Cannot parse genus and differentia \n",
+ "4 Did not match whole text: cell cortex < comple... \n",
+ "5 Wrong position, 'cell cortex' not in 'underlie... \n",
+ "6 Cannot parse genus and differentia \n",
+ "7 Logical definition element not found in text: ... \n",
+ "8 Cannot parse genus and differentia \n",
+ "9 Logical definition element not found in text: ... \n",
+ "10 Logical definition element not found in text: ... \n",
+ "11 Cannot parse genus and differentia \n",
+ "12 Cannot parse genus and differentia \n",
+ "13 No problems with definition \n",
+ "14 Missing text definition \n",
+ "15 Cannot parse genus and differentia \n",
+ "16 Cannot parse genus and differentia \n",
+ "17 Cannot parse genus and differentia \n",
+ "18 Logical definition element not found in text: ... \n",
+ "19 Logical definition element not found in text: ... \n",
+ "20 Logical definition element not found in text: ... \n",
+ "21 Circular, thylakoid (GO:0009579 in definition \n",
+ "22 Cannot parse genus and differentia \n",
+ "23 Cannot parse genus and differentia \n",
+ "24 No problems with definition \n",
+ "25 Cannot parse genus and differentia \n",
+ "26 No problems with definition \n",
+ "27 No problems with definition \n",
+ "28 No problems with definition \n",
+ "29 Definiendum should not appear at the start \n",
+ "30 Logical definition element not found in text: ... \n",
+ "31 Definiendum should not appear at the start \n",
+ "32 No problems with definition \n",
+ "33 Cannot parse genus and differentia \n",
+ "34 publication not found: PMID:9999999999999 \n",
+ "35 publication is retracted: A role for plasma tr... "
+ ]
},
- "execution_count": 3,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -181,7 +839,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 19,
"id": "421c556c-df3e-4281-914b-613e3d467036",
"metadata": {
"ExecuteTime": {
@@ -192,9 +850,13 @@
"outputs": [
{
"data": {
- "text/plain": "array(['oaklib.om:DCC#Any', 'oaklib.om:DCC#S3', 'oaklib.om:DCC#S11',\n 'oaklib.om:DCC#S1', 'oaklib.om:DCC#S0', 'oaklib.om:DCC#S7',\n 'oaklib.om:DCC#S20.1', 'oaklib.om:DCC#S20.2'], dtype=object)"
+ "text/plain": [
+ "array(['oaklib.om:DCC#S3', 'oaklib.om:DCC#S11', 'oaklib.om:DCC#Any',\n",
+ " 'oaklib.om:DCC#S0', 'oaklib.om:DCC#S7', 'oaklib.om:DCC#S1',\n",
+ " 'oaklib.om:DCC#S20.1', 'oaklib.om:DCC#S20.2'], dtype=object)"
+ ]
},
- "execution_count": 4,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -205,7 +867,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 20,
"id": "aea2cfe0-70bf-4b76-89e2-2bfdbdd3a084",
"metadata": {
"ExecuteTime": {
@@ -216,10 +878,87 @@
"outputs": [
{
"data": {
- "text/plain": " type counts\n0 oaklib.om:DCC#Any 6\n1 oaklib.om:DCC#S0 1\n2 oaklib.om:DCC#S1 2\n3 oaklib.om:DCC#S11 10\n4 oaklib.om:DCC#S20.1 1\n5 oaklib.om:DCC#S20.2 1\n6 oaklib.om:DCC#S3 14\n7 oaklib.om:DCC#S7 1",
- "text/html": "\n\n
\n \n \n | \n type | \n counts | \n
\n \n \n \n 0 | \n oaklib.om:DCC#Any | \n 6 | \n
\n \n 1 | \n oaklib.om:DCC#S0 | \n 1 | \n
\n \n 2 | \n oaklib.om:DCC#S1 | \n 2 | \n
\n \n 3 | \n oaklib.om:DCC#S11 | \n 10 | \n
\n \n 4 | \n oaklib.om:DCC#S20.1 | \n 1 | \n
\n \n 5 | \n oaklib.om:DCC#S20.2 | \n 1 | \n
\n \n 6 | \n oaklib.om:DCC#S3 | \n 14 | \n
\n \n 7 | \n oaklib.om:DCC#S7 | \n 1 | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " counts | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " oaklib.om:DCC#S0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " oaklib.om:DCC#S20.1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " oaklib.om:DCC#S20.2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " 14 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " oaklib.om:DCC#S7 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type counts\n",
+ "0 oaklib.om:DCC#Any 6\n",
+ "1 oaklib.om:DCC#S0 1\n",
+ "2 oaklib.om:DCC#S1 2\n",
+ "3 oaklib.om:DCC#S11 10\n",
+ "4 oaklib.om:DCC#S20.1 1\n",
+ "5 oaklib.om:DCC#S20.2 1\n",
+ "6 oaklib.om:DCC#S3 14\n",
+ "7 oaklib.om:DCC#S7 1"
+ ]
},
- "execution_count": 5,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -230,24 +969,470 @@
},
{
"cell_type": "markdown",
- "source": [
- "Next we'll filter out less informative columns"
- ],
+ "id": "f28d70f482239b30",
"metadata": {
- "collapsed": false
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
},
- "id": "f28d70f482239b30"
+ "source": [
+ "Next we'll filter out less informative columns"
+ ]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 21,
+ "id": "c1df05dd32082e69",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:50:30.994801Z",
+ "start_time": "2024-04-15T00:50:30.971926Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n0 oaklib.om:DCC#Any GO:0005634 nucleus \n1 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n2 oaklib.om:DCC#S11 GO:0043227 membrane-bounded organelle \n3 oaklib.om:DCC#Any GO:0110165 cellular anatomical entity \n4 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n5 oaklib.om:DCC#S1 GO:0005737 cytoplasm \n6 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n7 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n8 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n9 oaklib.om:DCC#Any GO:0005886 plasma membrane \n10 oaklib.om:DCC#Any GO:0098590 plasma membrane region \n11 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n12 oaklib.om:DCC#S1 GO:0005773 vacuole \n13 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n14 oaklib.om:DCC#S3 GO:0031975 envelope \n15 oaklib.om:DCC#S3 GO:0005575 cellular_component \n16 oaklib.om:DCC#S11 GO:0031090 organelle membrane \n17 oaklib.om:DCC#Any GO:0034357 photosynthetic membrane \n18 oaklib.om:DCC#S11 GO:0031965 nuclear membrane \n19 oaklib.om:DCC#S11 GO:0005938 cell cortex \n20 oaklib.om:DCC#S11 GO:0005938 cell cortex \n21 oaklib.om:DCC#S0 GO:0012505 endomembrane system \n22 oaklib.om:DCC#S7 GO:0009579 thylakoid \n23 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n24 oaklib.om:DCC#S3 GO:0043226 organelle \n25 oaklib.om:DCC#S3 GO:0071944 cell periphery \n26 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n27 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n28 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n29 oaklib.om:DCC#S3 GO:0016020 membrane \n30 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n31 oaklib.om:DCC#S11 GO:0099738 cell cortex region \n32 oaklib.om:DCC#Any GO:0005635 nuclear envelope \n33 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n\n object_str \\\n0 A membrane-bounded organelle of eukaryotic cel... \n1 Organized structure of distinctive morphology ... \n2 NaN \n3 A part of a cellular organism that is either a... \n4 Any (proper) part of the cytoplasm of a single... \n5 NaN \n6 Organized structure of distinctive morphology ... \n7 NaN \n8 NaN \n9 The membrane surrounding a cell that separates... \n10 A membrane that is a (regional) part of the pl... \n11 fake definition to test retracted typo in refe... \n12 NaN \n13 fake definition to test retracted reference \n14 A multilayered structure surrounding all or pa... \n15 A location, relative to cellular compartments ... \n16 is one of the two lipid bilayers of an organel... \n17 A membrane enriched in complexes formed of rea... \n18 envelope \n19 region of a cell \n20 lies just beneath the plasma membrane and ofte... \n21 NaN \n22 The structure in a plant cell that is known as... \n23 A double membrane structure enclosing an organ... \n24 Organized structure of distinctive morphology ... \n25 The part of a cell encompassing the cell corte... \n26 Organized structure of distinctive morphology ... \n27 NaN \n28 NaN \n29 A lipid bilayer along with all the proteins an... \n30 complete extent of cell cortex \n31 underlies some some region of the plasma membrane \n32 A double lipid bilayer that is part of the nuc... \n33 A component of a cell contained within (but no... \n34 NaN \n35 NaN \n\n info \n0 No problems with definition \n1 Cannot parse genus and differentia \n2 Logical definition element not found in text: ... \n3 No problems with definition \n4 Cannot parse genus and differentia \n5 Definiendum should not appear at the start \n6 Cannot parse genus and differentia \n7 Logical definition element not found in text: ... \n8 Logical definition element not found in text: ... \n9 No problems with definition \n10 No problems with definition \n11 Cannot parse genus and differentia \n12 Definiendum should not appear at the start \n13 Cannot parse genus and differentia \n14 Cannot parse genus and differentia \n15 Cannot parse genus and differentia \n16 Logical definition element not found in text: ... \n17 No problems with definition \n18 Logical definition element not found in text: ... \n19 Logical definition element not found in text: ... \n20 Logical definition element not found in text: ... \n21 Missing text definition \n22 Circular, thylakoid (GO:0009579 in definition \n23 Cannot parse genus and differentia \n24 Cannot parse genus and differentia \n25 Cannot parse genus and differentia \n26 Cannot parse genus and differentia \n27 Logical definition element not found in text: ... \n28 Logical definition element not found in text: ... \n29 Cannot parse genus and differentia \n30 Did not match whole text: cell cortex < comple... \n31 Wrong position, 'cell cortex' not in 'underlie... \n32 No problems with definition \n33 Cannot parse genus and differentia \n34 publication not found: PMID:9999999999999 \n35 publication is retracted: A role for plasma tr... ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 0 | \n oaklib.om:DCC#Any | \n GO:0005634 | \n nucleus | \n A membrane-bounded organelle of eukaryotic cel... | \n No problems with definition | \n
\n \n 1 | \n oaklib.om:DCC#S3 | \n GO:0043227 | \n membrane-bounded organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 2 | \n oaklib.om:DCC#S11 | \n GO:0043227 | \n membrane-bounded organelle | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 3 | \n oaklib.om:DCC#Any | \n GO:0110165 | \n cellular anatomical entity | \n A part of a cellular organism that is either a... | \n No problems with definition | \n
\n \n 4 | \n oaklib.om:DCC#S3 | \n GO:0099568 | \n cytoplasmic region | \n Any (proper) part of the cytoplasm of a single... | \n Cannot parse genus and differentia | \n
\n \n 5 | \n oaklib.om:DCC#S1 | \n GO:0005737 | \n cytoplasm | \n NaN | \n Definiendum should not appear at the start | \n
\n \n 6 | \n oaklib.om:DCC#S3 | \n GO:0043229 | \n intracellular organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 7 | \n oaklib.om:DCC#S11 | \n GO:0043229 | \n intracellular organelle | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 8 | \n oaklib.om:DCC#S11 | \n GO:0043229 | \n intracellular organelle | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 9 | \n oaklib.om:DCC#Any | \n GO:0005886 | \n plasma membrane | \n The membrane surrounding a cell that separates... | \n No problems with definition | \n
\n \n 10 | \n oaklib.om:DCC#Any | \n GO:0098590 | \n plasma membrane region | \n A membrane that is a (regional) part of the pl... | \n No problems with definition | \n
\n \n 11 | \n oaklib.om:DCC#S3 | \n GO:9999998 | \n fake term for testing pmid type | \n fake definition to test retracted typo in refe... | \n Cannot parse genus and differentia | \n
\n \n 12 | \n oaklib.om:DCC#S1 | \n GO:0005773 | \n vacuole | \n NaN | \n Definiendum should not appear at the start | \n
\n \n 13 | \n oaklib.om:DCC#S3 | \n GO:9999999 | \n fake term for testing retraction | \n fake definition to test retracted reference | \n Cannot parse genus and differentia | \n
\n \n 14 | \n oaklib.om:DCC#S3 | \n GO:0031975 | \n envelope | \n A multilayered structure surrounding all or pa... | \n Cannot parse genus and differentia | \n
\n \n 15 | \n oaklib.om:DCC#S3 | \n GO:0005575 | \n cellular_component | \n A location, relative to cellular compartments ... | \n Cannot parse genus and differentia | \n
\n \n 16 | \n oaklib.om:DCC#S11 | \n GO:0031090 | \n organelle membrane | \n is one of the two lipid bilayers of an organel... | \n Logical definition element not found in text: ... | \n
\n \n 17 | \n oaklib.om:DCC#Any | \n GO:0034357 | \n photosynthetic membrane | \n A membrane enriched in complexes formed of rea... | \n No problems with definition | \n
\n \n 18 | \n oaklib.om:DCC#S11 | \n GO:0031965 | \n nuclear membrane | \n envelope | \n Logical definition element not found in text: ... | \n
\n \n 19 | \n oaklib.om:DCC#S11 | \n GO:0005938 | \n cell cortex | \n region of a cell | \n Logical definition element not found in text: ... | \n
\n \n 20 | \n oaklib.om:DCC#S11 | \n GO:0005938 | \n cell cortex | \n lies just beneath the plasma membrane and ofte... | \n Logical definition element not found in text: ... | \n
\n \n 21 | \n oaklib.om:DCC#S0 | \n GO:0012505 | \n endomembrane system | \n NaN | \n Missing text definition | \n
\n \n 22 | \n oaklib.om:DCC#S7 | \n GO:0009579 | \n thylakoid | \n The structure in a plant cell that is known as... | \n Circular, thylakoid (GO:0009579 in definition | \n
\n \n 23 | \n oaklib.om:DCC#S3 | \n GO:0031967 | \n organelle envelope | \n A double membrane structure enclosing an organ... | \n Cannot parse genus and differentia | \n
\n \n 24 | \n oaklib.om:DCC#S3 | \n GO:0043226 | \n organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 25 | \n oaklib.om:DCC#S3 | \n GO:0071944 | \n cell periphery | \n The part of a cell encompassing the cell corte... | \n Cannot parse genus and differentia | \n
\n \n 26 | \n oaklib.om:DCC#S3 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 27 | \n oaklib.om:DCC#S11 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 28 | \n oaklib.om:DCC#S11 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n NaN | \n Logical definition element not found in text: ... | \n
\n \n 29 | \n oaklib.om:DCC#S3 | \n GO:0016020 | \n membrane | \n A lipid bilayer along with all the proteins an... | \n Cannot parse genus and differentia | \n
\n \n 30 | \n oaklib.om:DCC#S3 | \n GO:0099738 | \n cell cortex region | \n complete extent of cell cortex | \n Did not match whole text: cell cortex < comple... | \n
\n \n 31 | \n oaklib.om:DCC#S11 | \n GO:0099738 | \n cell cortex region | \n underlies some some region of the plasma membrane | \n Wrong position, 'cell cortex' not in 'underlie... | \n
\n \n 32 | \n oaklib.om:DCC#Any | \n GO:0005635 | \n nuclear envelope | \n A double lipid bilayer that is part of the nuc... | \n No problems with definition | \n
\n \n 33 | \n oaklib.om:DCC#S3 | \n GO:0005622 | \n intracellular anatomical structure | \n A component of a cell contained within (but no... | \n Cannot parse genus and differentia | \n
\n \n 34 | \n oaklib.om:DCC#S20.1 | \n GO:9999998 | \n fake term for testing pmid type | \n NaN | \n publication not found: PMID:9999999999999 | \n
\n \n 35 | \n oaklib.om:DCC#S20.2 | \n GO:9999999 | \n fake term for testing retraction | \n NaN | \n publication is retracted: A role for plasma tr... | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099568 | \n",
+ " cytoplasmic region | \n",
+ " Any (proper) part of the cytoplasm of a single... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099738 | \n",
+ " cell cortex region | \n",
+ " complete extent of cell cortex | \n",
+ " Did not match whole text: cell cortex < comple... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0099738 | \n",
+ " cell cortex region | \n",
+ " underlies some some region of the plasma membrane | \n",
+ " Wrong position, 'cell cortex' not in 'underlie... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0071944 | \n",
+ " cell periphery | \n",
+ " The part of a cell encompassing the cell corte... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0031090 | \n",
+ " organelle membrane | \n",
+ " is one of the two lipid bilayers of an organel... | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031967 | \n",
+ " organelle envelope | \n",
+ " A double membrane structure enclosing an organ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031975 | \n",
+ " envelope | \n",
+ " A multilayered structure surrounding all or pa... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0098590 | \n",
+ " plasma membrane region | \n",
+ " A membrane that is a (regional) part of the pl... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " oaklib.om:DCC#S0 | \n",
+ " GO:0012505 | \n",
+ " endomembrane system | \n",
+ " NaN | \n",
+ " Missing text definition | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005622 | \n",
+ " intracellular anatomical structure | \n",
+ " A component of a cell contained within (but no... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043227 | \n",
+ " membrane-bounded organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0043227 | \n",
+ " membrane-bounded organelle | \n",
+ " NaN | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0005938 | \n",
+ " cell cortex | \n",
+ " region of a cell | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0005938 | \n",
+ " cell cortex | \n",
+ " lies just beneath the plasma membrane and ofte... | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " oaklib.om:DCC#S7 | \n",
+ " GO:0009579 | \n",
+ " thylakoid | \n",
+ " The structure in a plant cell that is known as... | \n",
+ " Circular, thylakoid (GO:0009579 in definition | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " fake definition to test retracted reference | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005575 | \n",
+ " cellular_component | \n",
+ " A location, relative to cellular compartments ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005634 | \n",
+ " nucleus | \n",
+ " A membrane-bounded organelle of eukaryotic cel... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0016020 | \n",
+ " membrane | \n",
+ " A lipid bilayer along with all the proteins an... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0110165 | \n",
+ " cellular anatomical entity | \n",
+ " A part of a cellular organism that is either a... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005635 | \n",
+ " nuclear envelope | \n",
+ " A double lipid bilayer that is part of the nuc... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0005886 | \n",
+ " plasma membrane | \n",
+ " The membrane surrounding a cell that separates... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005773 | \n",
+ " vacuole | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " oaklib.om:DCC#S11 | \n",
+ " GO:0031965 | \n",
+ " nuclear membrane | \n",
+ " envelope | \n",
+ " Logical definition element not found in text: ... | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005737 | \n",
+ " cytoplasm | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " oaklib.om:DCC#Any | \n",
+ " GO:0034357 | \n",
+ " photosynthetic membrane | \n",
+ " A membrane enriched in complexes formed of rea... | \n",
+ " No problems with definition | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043226 | \n",
+ " organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " oaklib.om:DCC#S20.1 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " publication not found: PMID:9999999999999 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " oaklib.om:DCC#S20.2 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " NaN | \n",
+ " publication is retracted: A role for plasma tr... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "0 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n",
+ "1 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n",
+ "2 oaklib.om:DCC#S11 GO:0043231 intracellular membrane-bounded organelle \n",
+ "3 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n",
+ "4 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n",
+ "5 oaklib.om:DCC#S11 GO:0099738 cell cortex region \n",
+ "6 oaklib.om:DCC#S3 GO:0071944 cell periphery \n",
+ "7 oaklib.om:DCC#S11 GO:0031090 organelle membrane \n",
+ "8 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n",
+ "9 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n",
+ "10 oaklib.om:DCC#S11 GO:0043229 intracellular organelle \n",
+ "11 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n",
+ "12 oaklib.om:DCC#S3 GO:0031975 envelope \n",
+ "13 oaklib.om:DCC#Any GO:0098590 plasma membrane region \n",
+ "14 oaklib.om:DCC#S0 GO:0012505 endomembrane system \n",
+ "15 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n",
+ "16 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n",
+ "17 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n",
+ "18 oaklib.om:DCC#S11 GO:0043227 membrane-bounded organelle \n",
+ "19 oaklib.om:DCC#S11 GO:0005938 cell cortex \n",
+ "20 oaklib.om:DCC#S11 GO:0005938 cell cortex \n",
+ "21 oaklib.om:DCC#S7 GO:0009579 thylakoid \n",
+ "22 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n",
+ "23 oaklib.om:DCC#S3 GO:0005575 cellular_component \n",
+ "24 oaklib.om:DCC#Any GO:0005634 nucleus \n",
+ "25 oaklib.om:DCC#S3 GO:0016020 membrane \n",
+ "26 oaklib.om:DCC#Any GO:0110165 cellular anatomical entity \n",
+ "27 oaklib.om:DCC#Any GO:0005635 nuclear envelope \n",
+ "28 oaklib.om:DCC#Any GO:0005886 plasma membrane \n",
+ "29 oaklib.om:DCC#S1 GO:0005773 vacuole \n",
+ "30 oaklib.om:DCC#S11 GO:0031965 nuclear membrane \n",
+ "31 oaklib.om:DCC#S1 GO:0005737 cytoplasm \n",
+ "32 oaklib.om:DCC#Any GO:0034357 photosynthetic membrane \n",
+ "33 oaklib.om:DCC#S3 GO:0043226 organelle \n",
+ "34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n",
+ "35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n",
+ "\n",
+ " object_str \\\n",
+ "0 Organized structure of distinctive morphology ... \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 Any (proper) part of the cytoplasm of a single... \n",
+ "4 complete extent of cell cortex \n",
+ "5 underlies some some region of the plasma membrane \n",
+ "6 The part of a cell encompassing the cell corte... \n",
+ "7 is one of the two lipid bilayers of an organel... \n",
+ "8 Organized structure of distinctive morphology ... \n",
+ "9 NaN \n",
+ "10 NaN \n",
+ "11 A double membrane structure enclosing an organ... \n",
+ "12 A multilayered structure surrounding all or pa... \n",
+ "13 A membrane that is a (regional) part of the pl... \n",
+ "14 NaN \n",
+ "15 A component of a cell contained within (but no... \n",
+ "16 fake definition to test retracted typo in refe... \n",
+ "17 Organized structure of distinctive morphology ... \n",
+ "18 NaN \n",
+ "19 region of a cell \n",
+ "20 lies just beneath the plasma membrane and ofte... \n",
+ "21 The structure in a plant cell that is known as... \n",
+ "22 fake definition to test retracted reference \n",
+ "23 A location, relative to cellular compartments ... \n",
+ "24 A membrane-bounded organelle of eukaryotic cel... \n",
+ "25 A lipid bilayer along with all the proteins an... \n",
+ "26 A part of a cellular organism that is either a... \n",
+ "27 A double lipid bilayer that is part of the nuc... \n",
+ "28 The membrane surrounding a cell that separates... \n",
+ "29 NaN \n",
+ "30 envelope \n",
+ "31 NaN \n",
+ "32 A membrane enriched in complexes formed of rea... \n",
+ "33 Organized structure of distinctive morphology ... \n",
+ "34 fake definition to test retracted typo in refe... \n",
+ "35 NaN \n",
+ "\n",
+ " info \n",
+ "0 Cannot parse genus and differentia \n",
+ "1 Logical definition element not found in text: ... \n",
+ "2 Logical definition element not found in text: ... \n",
+ "3 Cannot parse genus and differentia \n",
+ "4 Did not match whole text: cell cortex < comple... \n",
+ "5 Wrong position, 'cell cortex' not in 'underlie... \n",
+ "6 Cannot parse genus and differentia \n",
+ "7 Logical definition element not found in text: ... \n",
+ "8 Cannot parse genus and differentia \n",
+ "9 Logical definition element not found in text: ... \n",
+ "10 Logical definition element not found in text: ... \n",
+ "11 Cannot parse genus and differentia \n",
+ "12 Cannot parse genus and differentia \n",
+ "13 No problems with definition \n",
+ "14 Missing text definition \n",
+ "15 Cannot parse genus and differentia \n",
+ "16 Cannot parse genus and differentia \n",
+ "17 Cannot parse genus and differentia \n",
+ "18 Logical definition element not found in text: ... \n",
+ "19 Logical definition element not found in text: ... \n",
+ "20 Logical definition element not found in text: ... \n",
+ "21 Circular, thylakoid (GO:0009579 in definition \n",
+ "22 Cannot parse genus and differentia \n",
+ "23 Cannot parse genus and differentia \n",
+ "24 No problems with definition \n",
+ "25 Cannot parse genus and differentia \n",
+ "26 No problems with definition \n",
+ "27 No problems with definition \n",
+ "28 No problems with definition \n",
+ "29 Definiendum should not appear at the start \n",
+ "30 Logical definition element not found in text: ... \n",
+ "31 Definiendum should not appear at the start \n",
+ "32 No problems with definition \n",
+ "33 Cannot parse genus and differentia \n",
+ "34 publication not found: PMID:9999999999999 \n",
+ "35 publication is retracted: A role for plasma tr... "
+ ]
},
- "execution_count": 6,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -255,264 +1440,731 @@
"source": [
"df = df[[\"type\", \"subject\", \"subject_label\", \"object_str\", \"info\"]]\n",
"df"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ad6ef24d0daf11f",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:50:30.994801Z",
- "start_time": "2024-04-15T00:50:30.971926Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "c1df05dd32082e69"
- },
- {
- "cell_type": "markdown",
"source": [
"## Missing Definitions\n",
"\n",
"This is the most trivial way to fail a definition check - not to include one. We can see all the missing definitions:\n"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "8ad6ef24d0daf11f"
+ ]
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 22,
+ "id": "381e7c7da587668e",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:50:31.048081Z",
+ "start_time": "2024-04-15T00:50:30.979466Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label object_str \\\n21 oaklib.om:DCC#S0 GO:0012505 endomembrane system NaN \n\n info \n21 Missing text definition ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 21 | \n oaklib.om:DCC#S0 | \n GO:0012505 | \n endomembrane system | \n NaN | \n Missing text definition | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 14 | \n",
+ " oaklib.om:DCC#S0 | \n",
+ " GO:0012505 | \n",
+ " endomembrane system | \n",
+ " NaN | \n",
+ " Missing text definition | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label object_str \\\n",
+ "14 oaklib.om:DCC#S0 GO:0012505 endomembrane system NaN \n",
+ "\n",
+ " info \n",
+ "14 Missing text definition "
+ ]
},
- "execution_count": 7,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S0\"]\n"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f8844c7876451383",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:50:31.048081Z",
- "start_time": "2024-04-15T00:50:30.979466Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "381e7c7da587668e"
- },
- {
- "cell_type": "markdown",
"source": [
"Of course, in the real ontology this term has a definition"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "f8844c7876451383"
+ ]
},
{
"cell_type": "markdown",
+ "id": "c098cdf7a5665add",
+ "metadata": {
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"source": [
"## Non genus-differentia structure\n",
"\n",
"The OAK validate definitions command follows [SRS]( https://philpapers.org/archive/SEPGFW.pdf) and assumes good definitions follow genus-differentia structure.\n",
"\n",
"We can see the ones that fail this (S3):"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "c098cdf7a5665add"
+ ]
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 23,
+ "id": "9cf1490c83491596",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:50:31.052182Z",
+ "start_time": "2024-04-15T00:50:30.987744Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n1 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n4 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n6 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n11 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n13 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n14 oaklib.om:DCC#S3 GO:0031975 envelope \n15 oaklib.om:DCC#S3 GO:0005575 cellular_component \n23 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n24 oaklib.om:DCC#S3 GO:0043226 organelle \n25 oaklib.om:DCC#S3 GO:0071944 cell periphery \n26 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n29 oaklib.om:DCC#S3 GO:0016020 membrane \n30 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n33 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n\n object_str \\\n1 Organized structure of distinctive morphology ... \n4 Any (proper) part of the cytoplasm of a single... \n6 Organized structure of distinctive morphology ... \n11 fake definition to test retracted typo in refe... \n13 fake definition to test retracted reference \n14 A multilayered structure surrounding all or pa... \n15 A location, relative to cellular compartments ... \n23 A double membrane structure enclosing an organ... \n24 Organized structure of distinctive morphology ... \n25 The part of a cell encompassing the cell corte... \n26 Organized structure of distinctive morphology ... \n29 A lipid bilayer along with all the proteins an... \n30 complete extent of cell cortex \n33 A component of a cell contained within (but no... \n\n info \n1 Cannot parse genus and differentia \n4 Cannot parse genus and differentia \n6 Cannot parse genus and differentia \n11 Cannot parse genus and differentia \n13 Cannot parse genus and differentia \n14 Cannot parse genus and differentia \n15 Cannot parse genus and differentia \n23 Cannot parse genus and differentia \n24 Cannot parse genus and differentia \n25 Cannot parse genus and differentia \n26 Cannot parse genus and differentia \n29 Cannot parse genus and differentia \n30 Did not match whole text: cell cortex < comple... \n33 Cannot parse genus and differentia ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 1 | \n oaklib.om:DCC#S3 | \n GO:0043227 | \n membrane-bounded organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 4 | \n oaklib.om:DCC#S3 | \n GO:0099568 | \n cytoplasmic region | \n Any (proper) part of the cytoplasm of a single... | \n Cannot parse genus and differentia | \n
\n \n 6 | \n oaklib.om:DCC#S3 | \n GO:0043229 | \n intracellular organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 11 | \n oaklib.om:DCC#S3 | \n GO:9999998 | \n fake term for testing pmid type | \n fake definition to test retracted typo in refe... | \n Cannot parse genus and differentia | \n
\n \n 13 | \n oaklib.om:DCC#S3 | \n GO:9999999 | \n fake term for testing retraction | \n fake definition to test retracted reference | \n Cannot parse genus and differentia | \n
\n \n 14 | \n oaklib.om:DCC#S3 | \n GO:0031975 | \n envelope | \n A multilayered structure surrounding all or pa... | \n Cannot parse genus and differentia | \n
\n \n 15 | \n oaklib.om:DCC#S3 | \n GO:0005575 | \n cellular_component | \n A location, relative to cellular compartments ... | \n Cannot parse genus and differentia | \n
\n \n 23 | \n oaklib.om:DCC#S3 | \n GO:0031967 | \n organelle envelope | \n A double membrane structure enclosing an organ... | \n Cannot parse genus and differentia | \n
\n \n 24 | \n oaklib.om:DCC#S3 | \n GO:0043226 | \n organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 25 | \n oaklib.om:DCC#S3 | \n GO:0071944 | \n cell periphery | \n The part of a cell encompassing the cell corte... | \n Cannot parse genus and differentia | \n
\n \n 26 | \n oaklib.om:DCC#S3 | \n GO:0043231 | \n intracellular membrane-bounded organelle | \n Organized structure of distinctive morphology ... | \n Cannot parse genus and differentia | \n
\n \n 29 | \n oaklib.om:DCC#S3 | \n GO:0016020 | \n membrane | \n A lipid bilayer along with all the proteins an... | \n Cannot parse genus and differentia | \n
\n \n 30 | \n oaklib.om:DCC#S3 | \n GO:0099738 | \n cell cortex region | \n complete extent of cell cortex | \n Did not match whole text: cell cortex < comple... | \n
\n \n 33 | \n oaklib.om:DCC#S3 | \n GO:0005622 | \n intracellular anatomical structure | \n A component of a cell contained within (but no... | \n Cannot parse genus and differentia | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043231 | \n",
+ " intracellular membrane-bounded organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099568 | \n",
+ " cytoplasmic region | \n",
+ " Any (proper) part of the cytoplasm of a single... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0099738 | \n",
+ " cell cortex region | \n",
+ " complete extent of cell cortex | \n",
+ " Did not match whole text: cell cortex < comple... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0071944 | \n",
+ " cell periphery | \n",
+ " The part of a cell encompassing the cell corte... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043229 | \n",
+ " intracellular organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031967 | \n",
+ " organelle envelope | \n",
+ " A double membrane structure enclosing an organ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0031975 | \n",
+ " envelope | \n",
+ " A multilayered structure surrounding all or pa... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005622 | \n",
+ " intracellular anatomical structure | \n",
+ " A component of a cell contained within (but no... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043227 | \n",
+ " membrane-bounded organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " fake definition to test retracted reference | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0005575 | \n",
+ " cellular_component | \n",
+ " A location, relative to cellular compartments ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0016020 | \n",
+ " membrane | \n",
+ " A lipid bilayer along with all the proteins an... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " oaklib.om:DCC#S3 | \n",
+ " GO:0043226 | \n",
+ " organelle | \n",
+ " Organized structure of distinctive morphology ... | \n",
+ " Cannot parse genus and differentia | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "0 oaklib.om:DCC#S3 GO:0043231 intracellular membrane-bounded organelle \n",
+ "3 oaklib.om:DCC#S3 GO:0099568 cytoplasmic region \n",
+ "4 oaklib.om:DCC#S3 GO:0099738 cell cortex region \n",
+ "6 oaklib.om:DCC#S3 GO:0071944 cell periphery \n",
+ "8 oaklib.om:DCC#S3 GO:0043229 intracellular organelle \n",
+ "11 oaklib.om:DCC#S3 GO:0031967 organelle envelope \n",
+ "12 oaklib.om:DCC#S3 GO:0031975 envelope \n",
+ "15 oaklib.om:DCC#S3 GO:0005622 intracellular anatomical structure \n",
+ "16 oaklib.om:DCC#S3 GO:9999998 fake term for testing pmid type \n",
+ "17 oaklib.om:DCC#S3 GO:0043227 membrane-bounded organelle \n",
+ "22 oaklib.om:DCC#S3 GO:9999999 fake term for testing retraction \n",
+ "23 oaklib.om:DCC#S3 GO:0005575 cellular_component \n",
+ "25 oaklib.om:DCC#S3 GO:0016020 membrane \n",
+ "33 oaklib.om:DCC#S3 GO:0043226 organelle \n",
+ "\n",
+ " object_str \\\n",
+ "0 Organized structure of distinctive morphology ... \n",
+ "3 Any (proper) part of the cytoplasm of a single... \n",
+ "4 complete extent of cell cortex \n",
+ "6 The part of a cell encompassing the cell corte... \n",
+ "8 Organized structure of distinctive morphology ... \n",
+ "11 A double membrane structure enclosing an organ... \n",
+ "12 A multilayered structure surrounding all or pa... \n",
+ "15 A component of a cell contained within (but no... \n",
+ "16 fake definition to test retracted typo in refe... \n",
+ "17 Organized structure of distinctive morphology ... \n",
+ "22 fake definition to test retracted reference \n",
+ "23 A location, relative to cellular compartments ... \n",
+ "25 A lipid bilayer along with all the proteins an... \n",
+ "33 Organized structure of distinctive morphology ... \n",
+ "\n",
+ " info \n",
+ "0 Cannot parse genus and differentia \n",
+ "3 Cannot parse genus and differentia \n",
+ "4 Did not match whole text: cell cortex < comple... \n",
+ "6 Cannot parse genus and differentia \n",
+ "8 Cannot parse genus and differentia \n",
+ "11 Cannot parse genus and differentia \n",
+ "12 Cannot parse genus and differentia \n",
+ "15 Cannot parse genus and differentia \n",
+ "16 Cannot parse genus and differentia \n",
+ "17 Cannot parse genus and differentia \n",
+ "22 Cannot parse genus and differentia \n",
+ "23 Cannot parse genus and differentia \n",
+ "25 Cannot parse genus and differentia \n",
+ "33 Cannot parse genus and differentia "
+ ]
},
- "execution_count": 8,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S3\"]"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "27f9e7b747b071de",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:50:31.052182Z",
- "start_time": "2024-04-15T00:50:30.987744Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "9cf1490c83491596"
- },
- {
- "cell_type": "markdown",
"source": [
"Many of these are actual definitions rather than ones manipulated for test purposes.\n",
"\n",
"There is room for valid disagreement about whether rewriting some of these following genus-differentia form would improve things for either users or annotators. Arguably at least the subtypes of organelle could simply state how they are differentiated from organelles in general rather than repeating the somewhat wordy _\"Organized structure of distinctive morphology...\"_"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "27f9e7b747b071de"
+ ]
},
{
"cell_type": "markdown",
- "source": [
- "## Circular definitions"
- ],
+ "id": "c56d3a9c531e5a09",
"metadata": {
- "collapsed": false
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
},
- "id": "c56d3a9c531e5a09"
+ "source": [
+ "## Circular definitions"
+ ]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 24,
+ "id": "adcbad5fae63e7fb",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:50:31.052559Z",
+ "start_time": "2024-04-15T00:50:30.994899Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n22 oaklib.om:DCC#S7 GO:0009579 thylakoid \n\n object_str \\\n22 The structure in a plant cell that is known as... \n\n info \n22 Circular, thylakoid (GO:0009579 in definition ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 22 | \n oaklib.om:DCC#S7 | \n GO:0009579 | \n thylakoid | \n The structure in a plant cell that is known as... | \n Circular, thylakoid (GO:0009579 in definition | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 21 | \n",
+ " oaklib.om:DCC#S7 | \n",
+ " GO:0009579 | \n",
+ " thylakoid | \n",
+ " The structure in a plant cell that is known as... | \n",
+ " Circular, thylakoid (GO:0009579 in definition | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "21 oaklib.om:DCC#S7 GO:0009579 thylakoid \n",
+ "\n",
+ " object_str \\\n",
+ "21 The structure in a plant cell that is known as... \n",
+ "\n",
+ " info \n",
+ "21 Circular, thylakoid (GO:0009579 in definition "
+ ]
},
- "execution_count": 9,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S7\"]"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "34eb55cf06afa332",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:50:31.052559Z",
- "start_time": "2024-04-15T00:50:30.994899Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "adcbad5fae63e7fb"
- },
- {
- "cell_type": "markdown",
"source": [
"## Not following convention"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "34eb55cf06afa332"
+ ]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 25,
+ "id": "cf4d18796842b46",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:50:31.062863Z",
+ "start_time": "2024-04-15T00:50:31.004181Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label object_str \\\n5 oaklib.om:DCC#S1 GO:0005737 cytoplasm NaN \n12 oaklib.om:DCC#S1 GO:0005773 vacuole NaN \n\n info \n5 Definiendum should not appear at the start \n12 Definiendum should not appear at the start ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 5 | \n oaklib.om:DCC#S1 | \n GO:0005737 | \n cytoplasm | \n NaN | \n Definiendum should not appear at the start | \n
\n \n 12 | \n oaklib.om:DCC#S1 | \n GO:0005773 | \n vacuole | \n NaN | \n Definiendum should not appear at the start | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 29 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005773 | \n",
+ " vacuole | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " oaklib.om:DCC#S1 | \n",
+ " GO:0005737 | \n",
+ " cytoplasm | \n",
+ " NaN | \n",
+ " Definiendum should not appear at the start | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label object_str \\\n",
+ "29 oaklib.om:DCC#S1 GO:0005773 vacuole NaN \n",
+ "31 oaklib.om:DCC#S1 GO:0005737 cytoplasm NaN \n",
+ "\n",
+ " info \n",
+ "29 Definiendum should not appear at the start \n",
+ "31 Definiendum should not appear at the start "
+ ]
},
- "execution_count": 10,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S1\"]"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4c5189bd46804bd8",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:50:31.062863Z",
- "start_time": "2024-04-15T00:50:31.004181Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "cf4d18796842b46"
- },
- {
- "cell_type": "markdown",
"source": [
"## Definition Reference Issues\n",
"\n",
"### Typos in PMIDs\n"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "4c5189bd46804bd8"
+ ]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 26,
+ "id": "35e1f10deba2c6c9",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:51:38.780848Z",
+ "start_time": "2024-04-15T00:51:38.770256Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n\n object_str info \n34 NaN publication not found: PMID:9999999999999 ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 34 | \n oaklib.om:DCC#S20.1 | \n GO:9999998 | \n fake term for testing pmid type | \n NaN | \n publication not found: PMID:9999999999999 | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 34 | \n",
+ " oaklib.om:DCC#S20.1 | \n",
+ " GO:9999998 | \n",
+ " fake term for testing pmid type | \n",
+ " fake definition to test retracted typo in refe... | \n",
+ " publication not found: PMID:9999999999999 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "34 oaklib.om:DCC#S20.1 GO:9999998 fake term for testing pmid type \n",
+ "\n",
+ " object_str \\\n",
+ "34 fake definition to test retracted typo in refe... \n",
+ "\n",
+ " info \n",
+ "34 publication not found: PMID:9999999999999 "
+ ]
},
- "execution_count": 11,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S20.1\"]\n"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7a288d8fc507acc4",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:51:38.780848Z",
- "start_time": "2024-04-15T00:51:38.770256Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "35e1f10deba2c6c9"
- },
- {
- "cell_type": "markdown",
"source": [
"### Retracted publications"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "7a288d8fc507acc4"
+ ]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 27,
+ "id": "f5245d99ab0864d5",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T00:52:02.693591Z",
+ "start_time": "2024-04-15T00:52:02.687692Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"data": {
- "text/plain": " type subject subject_label \\\n35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n\n object_str info \n35 NaN publication is retracted: A role for plasma tr... ",
- "text/html": "\n\n
\n \n \n | \n type | \n subject | \n subject_label | \n object_str | \n info | \n
\n \n \n \n 35 | \n oaklib.om:DCC#S20.2 | \n GO:9999999 | \n fake term for testing retraction | \n NaN | \n publication is retracted: A role for plasma tr... | \n
\n \n
\n
"
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " type | \n",
+ " subject | \n",
+ " subject_label | \n",
+ " object_str | \n",
+ " info | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 35 | \n",
+ " oaklib.om:DCC#S20.2 | \n",
+ " GO:9999999 | \n",
+ " fake term for testing retraction | \n",
+ " NaN | \n",
+ " publication is retracted: A role for plasma tr... | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " type subject subject_label \\\n",
+ "35 oaklib.om:DCC#S20.2 GO:9999999 fake term for testing retraction \n",
+ "\n",
+ " object_str info \n",
+ "35 NaN publication is retracted: A role for plasma tr... "
+ ]
},
- "execution_count": 12,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df[\"type\"] == \"oaklib.om:DCC#S20.2\"]\n"
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7e8d97bc6e6c20b0",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T00:52:02.693591Z",
- "start_time": "2024-04-15T00:52:02.687692Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "f5245d99ab0864d5"
- },
- {
- "cell_type": "markdown",
"source": [
"# Using LLMs to validate definitions\n",
"\n",
@@ -539,48 +2191,61 @@
"\n",
" - the publication [PMID:9708911](https://pubmed.ncbi.nlm.nih.gov/9708911/)\n",
" - the RHEA reaction [RHEA:27794](https://www.rhea-db.org/reaction?id=27794)"
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "7e8d97bc6e6c20b0"
+ ]
},
{
"cell_type": "code",
- "execution_count": 5,
- "outputs": [],
- "source": [
- "!runoak --stacktrace -i llm:{claude-3-opus}:simpleobo:input/validate-defs-test.obo validate-definitions -C input/validate-definition-conf.yaml GO:0000010 -O yaml -o output/validate-definitions.llm.yaml"
- ],
+ "execution_count": 13,
+ "id": "4e29eb9d8ff5df4c",
"metadata": {
- "collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-15T01:00:28.475900Z",
"start_time": "2024-04-15T01:00:13.437742Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "4e29eb9d8ff5df4c"
+ "outputs": [],
+ "source": [
+ "!runoak --stacktrace -i llm:{claude-3-opus}:simpleobo:input/validate-defs-test.obo validate-definitions -C input/validate-definition-conf.yaml GO:0000010 -O yaml -o output/validate-definitions.llm.yaml"
+ ]
},
{
"cell_type": "code",
- "execution_count": 7,
- "outputs": [],
- "source": [
- "import yaml\n",
- "report = yaml.safe_load(open(\"output/validate-definitions.llm.yaml\"))"
- ],
+ "execution_count": 14,
+ "id": "69f6da5532285cf9",
"metadata": {
- "collapsed": false,
"ExecuteTime": {
"end_time": "2024-04-15T01:01:41.771699Z",
"start_time": "2024-04-15T01:01:41.744373Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "69f6da5532285cf9"
+ "outputs": [],
+ "source": [
+ "import yaml\n",
+ "report = yaml.safe_load(open(\"output/validate-definitions.llm.yaml\"))"
+ ]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 15,
+ "id": "b35f8ffab12b1b6b",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2024-04-15T01:09:34.475682Z",
+ "start_time": "2024-04-15T01:09:34.465369Z"
+ },
+ "collapsed": false,
+ "jupyter": {
+ "outputs_hidden": false
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -588,7 +2253,7 @@
"text": [
"type: https://w3id.org/oak/ontology-metadata/DCC.S20\n",
"subject: GO:0000010\n",
- "severity: ERROR\n",
+ "severity: INFO\n",
"predicate: IAO:0000115\n",
"object_str: \n",
" id: PMID:9708911\n",
@@ -611,13 +2276,15 @@
" \n",
"\n",
"info: \n",
- " The definition for the term \"trans-hexaprenyltranstransferase activity\" has a LOW level of alignment with the cited reference PMID:9708911.\n",
+ " The term \"trans-hexaprenyltranstransferase activity\" has a HIGH level of alignment with the cited reference PMID:9708911. The abstract supports the definition well, as evidenced by these key points:\n",
+ " \n",
+ " 1. The study examines the importance of the side chain length of ubiquinone (UQ) in Saccharomyces cerevisiae, which directly relates to the activity of trans-hexaprenyltranstransferase.\n",
" \n",
- " The abstract does not specifically mention the term \"trans-hexaprenyltranstransferase activity\". It discusses the biological significance of the side chain length of ubiquinone in Saccharomyces cerevisiae, and how different prenyl diphosphate synthases were expressed in a COQ1 mutant defective for hexaprenyl diphosphate synthesis to produce UQs with different isoprenoid chain lengths.\n",
+ " 2. The abstract mentions \"hexaprenyl diphosphate synthesis\" in S. cerevisiae, which is the product of trans-hexaprenyltranstransferase activity.\n",
" \n",
- " While the study involves manipulating the synthesis of hexaprenyl diphosphate, which is likely catalyzed by trans-hexaprenyltranstransferase, the enzyme itself is not explicitly mentioned or studied in the abstract.\n",
+ " 3. The study found that the original species of UQ (UQ-6) had maximum functionality in yeast cells, suggesting a preference for the hexaprenyl side chain length produced by trans-hexaprenyltranstransferase.\n",
" \n",
- " The definition seems too specific for the cited reference, as the abstract does not directly investigate or discuss the activity of trans-hexaprenyltranstransferase.\n",
+ " No sections of the abstract misalign with or contradict the term definition. The definition is appropriately specific, focusing on the enzyme's activity without providing additional details about its structure or cellular role.\n",
"\n",
"definition: \n",
" Catalysis of the reaction: (2E,6E)-farnesyl diphosphate + 4 isopentenyl diphosphate = 4 diphosphate + all-trans-heptaprenyl diphosphate.\n",
@@ -636,25 +2303,32 @@
" print(f\"{k}: {v}\")\n",
"\n",
" "
- ],
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "233f8a645b3517f2",
"metadata": {
"collapsed": false,
- "ExecuteTime": {
- "end_time": "2024-04-15T01:09:34.475682Z",
- "start_time": "2024-04-15T01:09:34.465369Z"
+ "jupyter": {
+ "outputs_hidden": false
}
},
- "id": "b35f8ffab12b1b6b"
+ "source": [
+ "__COMMENTARY__\n",
+ "\n",
+ "Note that as this is an LLM the output differs every time!\n",
+ "\n",
+ "In some cases, the LLM is failing to see that the paper is indeed about trans-hexaprenyltranstransferase activity, the output is useful as it shows us that the abstract is not directly about this activity."
+ ]
},
{
- "cell_type": "markdown",
- "source": [
- "While in this case, the LLM is failing to see that the paper is indeed about trans-hexaprenyltranstransferase activity, the output is useful as it shows us that the abstract is not directly about this activity."
- ],
- "metadata": {
- "collapsed": false
- },
- "id": "233f8a645b3517f2"
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df16f8ef-a274-4c8c-a1a5-bbef76597842",
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
diff --git a/notebooks/Commands/output/trigeminal-ganglion-graph.png b/notebooks/Commands/output/trigeminal-ganglion-graph.png
new file mode 100644
index 000000000..07ad6b2bc
Binary files /dev/null and b/notebooks/Commands/output/trigeminal-ganglion-graph.png differ
diff --git a/src/oaklib/cli.py b/src/oaklib/cli.py
index ff183193f..90282d18d 100644
--- a/src/oaklib/cli.py
+++ b/src/oaklib/cli.py
@@ -1,6 +1,6 @@
"""
Command Line Interface to OAK
------------------------------
+----------------------
Executed using "runoak" command
"""
@@ -1071,7 +1071,7 @@ def search(terms, output_type: str, autolabel, output: TextIO):
Searches ontology for entities that have a label, alias, or other property matching a search term.
Example:
- -------
+
runoak -i uberon.obo search limb
This uses the Pronto implementation to load uberon from disk, and does a basic substring
@@ -1136,11 +1136,11 @@ def subsets(output: str):
Shows information on subsets
Example:
- -------
+
runoak -i obolibrary:go.obo subsets
Example:
- -------
+
runoak -i cl.owl subsets
For background on subsets, see https://incatools.github.io/ontology-access-kit/concepts.html#subsets
@@ -1149,7 +1149,7 @@ def subsets(output: str):
terms (directly) in goslim_generic in GO:
Example:
- -------
+
runoak -i sqlite:obo:go info .in goslim_generic
Python API:
@@ -1157,7 +1157,7 @@ def subsets(output: str):
https://incatools.github.io/ontology-access-kit/interfaces/basic
See Also:
- --------
+ -
term-subsets command, which shows relationships of terms to subsets
"""
@@ -1192,25 +1192,25 @@ def obsoletes(
Shows all obsolete entities.
Example:
- -------
+
runoak -i obolibrary:go.obo obsoletes
To exclude *merged terms*, use the ``--no-include-merged`` flag
Example:
- -------
+
runoak -i obolibrary:go.obo obsoletes --no-include-merged
To show migration relationships, use the ``--show-migration-relationships`` flag
Example:
- -------
+
runoak -i obolibrary:go.obo obsoletes --show-migration-relationships
You can also specify terms to show obsoletes for:
Example:
- -------
+
runoak -i obolibrary:go.obo obsoletes --show-migration-relationships GO:0000187 GO:0000188
More examples:
@@ -1285,7 +1285,7 @@ def statistics(
Shows all descriptive/summary statistics
Example:
- -------
+
runoak -i sqlite:obo:pr statistics
By default, this will show combined summary statistics for all terms
@@ -1299,7 +1299,7 @@ def statistics(
- by prefix (e.g. GO, PR, CL, OBI)
Example:
- -------
+
runoak -i sqlite:obo:pr statistics -p oio:hasOBONamespace
Note: the oio:hasOBONamespace is *not* the same as the ID prefix, it is
@@ -1334,7 +1334,7 @@ def statistics(
option.
Example:
- -------
+
runoak -i v2.obo statistics --group-by-obo-namespace --compare-with v1.obo
This will also include change stats broken down by KGCL change types. If
@@ -1451,7 +1451,7 @@ def ontology_versions(ontologies, output: str, all: bool):
Currently only implemented for BioPortal
Example:
- -------
+
runoak -i bioportal: ontology-versions mp
All ontologies:
@@ -1489,19 +1489,19 @@ def ontology_metadata(ontologies, output_type: str, output: str, all: bool):
Shows ontology metadata
Example:
- -------
+
runoak -i bioportal: ontology-metadata obi uberon foodon
Use the ``--all`` option to show all ontologies
Example:
- -------
+
runoak -i bioportal: ontology-metadata --all
By default the output is YAML. You can get the results as TSV:
Example:
- -------
+
runoak -i bioportal: ontology-metadata --all -O csv
.. warning::
@@ -1547,7 +1547,7 @@ def term_metadata(terms, predicates, additional_metadata: bool, output_type: str
Shows term metadata.
Example:
- -------
+
runoak -i sqlite:obo:uberon term-metadata lung heart
You can filter the results for only selected predicates:
@@ -1666,14 +1666,14 @@ def annotate(
in these cases the endpoint functionality is used:
Example:
- -------
+
runoak -i bioportal: annotate "enlarged nucleus in T-cells from peripheral blood"
For other endpoints, the built-in OAK annotator is used. This currently uses a basic
algorithm based on lexical matching.
Example:
- -------
+
runoak -i sqlite:obo:cl annotate "enlarged nucleus in T-cells from peripheral blood"
Using the builtin annotator can be slow, as the lexical index is re-built every time.
@@ -1687,7 +1687,7 @@ def annotate(
as gilda only performs grounding.
Example:
- -------
+
runoak -i gilda: annotate -W BRCA2
Aliases can be listed in the output by setting the flag
@@ -1830,7 +1830,7 @@ def viz(
This requires that `obographviz `_ is installed.
Example:
- -------
+
runoak -i sqlite:cl.db viz CL:4023094
Same query on ubergraph:
@@ -1983,7 +1983,7 @@ def tree(
For general instructions, see the viz command, which this is analogous too.
Example:
- -------
+
runoak -i envo.db tree ENVO:00000372 -p i,p
This produces output like:
@@ -2002,7 +2002,7 @@ def tree(
You can use the --gap-fill option to create a minimal tree:
Example:
- -------
+
runoak -i envo.db tree --gap-fill 'pyroclastic shield volcano' 'subglacial volcano' volcano -p i
This will show the tree containing only these terms, and the most direct inferred relationships between them.
@@ -2011,7 +2011,7 @@ def tree(
the most informative intermediate classes:
Example:
- -------
+
runoak -i envo.db tree --add-mrcas --gap-fill 'pyroclastic shield volcano'\
'subglacial volcano' 'mud volcano' -p i
@@ -2126,7 +2126,7 @@ def ancestors(
a parent includes all relationship types, not just is-a.
Example:
- -------
+
runoak -i cl.owl ancestors CL:4023094
This will show ancestry over the full relationship graph. Like any relational
@@ -2250,19 +2250,19 @@ def paths(
List all paths between one or more start curies.
Example:
- -------
+
runoak -i sqlite:obo:go paths -p i,p 'nuclear membrane'
This shows all shortest paths from nuclear membrane to all ancestors
Example:
- -------
+
runoak -i sqlite:obo:go paths -p i,p 'nuclear membrane' --target cytoplasm
This shows shortest paths between two nodes
Example:
- -------
+
runoak -i sqlite:obo:go paths -p i,p 'nuclear membrane' 'thylakoid' --target cytoplasm 'thylakoid membrane'
This shows all shortest paths between 4 combinations of starts and ends
@@ -2276,7 +2276,7 @@ def paths(
You can also pass in weights for each predicate, used when calculating shortest paths.
Example:
- -------
+
runoak -i sqlite:obo:go paths -p i,p 'nuclear membrane' --target cytoplasm \
--predicate-weights "{i: 0.0001, p: 999}"
@@ -2287,7 +2287,7 @@ def paths(
This command can be combined with others to visualize the paths.
Example:
- -------
+
alias go="runoak -i sqlite:obo:go"
go paths -p i,p 'nuclear membrane' --target cytoplasm --narrow | go viz --fill-gaps -
@@ -2436,7 +2436,7 @@ def siblings(terms, predicates, output_type: str, output: str):
List all siblings of a specified term or terms
Example:
- -------
+
runoak -i cl.owl siblings CL:4023094
Note that siblings is by default over ALL relationship types, so we recommend
@@ -2477,11 +2477,11 @@ def descendants(
List all descendants of a term
Example:
- -------
+
runoak -i sqlite:obo:obi descendants assay -p i
Example:
- -------
+
runoak -i sqlite:obo:uberon descendants heart -p i,p
This is the inverse of the 'ancestors' command; see the documentation for
@@ -2531,11 +2531,11 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
Exports (dumps) the entire contents of an ontology.
Example:
- -------
+
runoak -i pato.obo dump -o pato.json -O json
Example:
- -------
+
runoak -i pato.owl dump -o pato.ttl -O turtle
You can also pass in a JSON configuration file to parameterize the dump process.
@@ -2545,7 +2545,7 @@ def dump(terms, output, output_type: str, config_file: str = None, **kwargs):
https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html
Example:
- -------
+
runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json
Currently each implementation only supports a subset of formats.
@@ -2590,11 +2590,11 @@ def transform(terms, transform, output, output_type: str, config_file: str = Non
Transforms an ontology
Example:
- -------
+
runoak -i pato.obo dump -o pato.json -O json
Example:
- -------
+
runoak -i pato.owl dump -o pato.ttl -O turtle
You can also pass in a JSON configuration file to parameterize the dump process.
@@ -2604,7 +2604,7 @@ def transform(terms, transform, output, output_type: str, config_file: str = Non
https://incatools.github.io/ontology-access-kit/converters/obo-graph-to-fhir.html
Example:
- -------
+
runoak -i pato.owl dump -o pato.ttl -O fhirjson -c fhir_config.json -o pato.fhir.json
Currently each implementation only supports a subset of formats.
@@ -2679,7 +2679,7 @@ def prefixes(terms, used_only: bool, output, output_type: str):
prefix maps.
Example:
- -------
+
runoak --named-prefix-map prefixcc prefixes
If an ontology is loaded, then --used-only can be used to restrict to
@@ -2789,7 +2789,7 @@ def similarity_pair(terms, predicates, autolabel: bool, output: TextIO, output_t
Note: We recommend always specifying explicit predicate lists
Example:
- -------
+
runoak -i ubergraph: similarity-pair -p i,p CL:0000540 CL:0000000
You can omit predicates if you like but be warned this may yield
@@ -2895,7 +2895,7 @@ def similarity(
- via explicit lists of terms or queries
Example:
- -------
+
runoak -i hp.db similarity -p i --set1-file HPO-TERMS1 --set2-file HPO-TERMS2 -O csv
This will compare every term in TERMS1 vs TERMS2
@@ -2903,13 +2903,13 @@ def similarity(
Alternatively standard OAK term queries can be used, with "@" separating the two lists
Example:
- -------
+
runoak -i hp.db similarity -p i TERM_1 TERM_2 ... TERM_N @ TERM_N+1 ... TERM_M
The .all term syntax can be used to select all terms in an ontology
Example:
- -------
+
runoak -i ma.db similarity -p i,p .all @ .all
This can be mixed with other term selectors; for example to calculate the similarity of "neuron"
@@ -3012,7 +3012,7 @@ def termset_similarity(
This calculates a similarity matrix for two sets of terms.
Example:
- -------
+
runoak -i go.db termset-similarity -p i,p nucleus membrane @ "nuclear membrane" vacuole -p i,p
Python API:
@@ -3067,7 +3067,7 @@ def information_content(
Show information content for term or list of terms
Example:
- -------
+
runoak -i cl.db information-content -p i .all
Like all OAK commands that operate over graphs, the graph traversal is controlled
@@ -3116,7 +3116,7 @@ def info(terms, output: TextIO, display: str, output_type: str):
Show information on term or set of terms
Example:
- -------
+
runoak -i sqlite:obo:cl info CL:4023094
The default output is minimal, showing only ID and label
@@ -3175,7 +3175,7 @@ def languages():
Show available languages
Example:
- -------
+
runoak languages
"""
@@ -3209,13 +3209,13 @@ def labels(
Show labels for term or list of terms
Example:
- -------
+
runoak -i cl.owl labels CL:4023093 CL:4023094
You can use the ".all" selector to show all labels:
Example:
- -------
+
runoak -i cl.owl labels .all
(this may be blocked for remote endpoints)
@@ -3231,13 +3231,13 @@ def labels(
a particular language.
Example:
- -------
+
runoak --preferred-language fr -i sqlite:obo:hpinternational labels .ancestors HP:0020110
You can also query for all languages, and see these pivoted:
Example:
- -------
+
runoak -i sqlite:obo:hpinternational labels .ancestors HP:0020110 --pivot-languages
Python API:
@@ -3321,14 +3321,14 @@ def definitions(
Show textual definitions for term or set of terms
Example:
- -------
+
runoak -i sqlite:obo:envo definitions 'tropical biome' 'temperate biome'
You can use the ".all" selector to show all definitions for all terms in the ontology:
Example:
- -------
+
runoak -i sqlite:obo:envo definitions .all
@@ -3409,6 +3409,12 @@ def definitions(
show_default=True,
help="Include entailed indirect relationships",
)
+@click.option(
+ "--non-redundant-entailed/--no-non-redundant-entailed",
+ default=False,
+ show_default=True,
+ help="Include entailed but exclude entailed redundant relationships",
+)
@click.option(
"--include-tbox/--no-include-tbox",
default=True,
@@ -3439,6 +3445,7 @@ def relationships(
include_entailed: bool,
include_tbox: bool,
include_abox: bool,
+ non_redundant_entailed: bool,
include_metadata: bool,
):
"""
@@ -3447,25 +3454,25 @@ def relationships(
By default, this shows all relationships where the input term(s) are the *subjects*
Example:
- -------
+
runoak -i cl.db relationships CL:4023094
Like all OAK commands, a label can be passed instead of a CURIE
Example:
- -------
+
runoak -i cl.db relationships neuron
To reverse the direction, and query where the search term(s) are *objects*, use the --direction flag:
Example:
- -------
+
runoak -i cl.db relationships --direction down neuron
Multiple terms can be passed
Example:
- -------
+
runoak -i uberon.db relationships heart liver lung
And like all OAK commands, a query can be passed rather than an explicit term list
@@ -3509,6 +3516,15 @@ def relationships(
include_tbox=include_tbox,
include_entailed=include_entailed,
)
+ if non_redundant_entailed:
+ if not isinstance(impl, OboGraphInterface):
+ raise NotImplementedError(f"Cannot execute this using {impl} of type {type(impl)}")
+ up_it = impl.non_redundant_entailed_relationships(
+ subjects=curies,
+ predicates=actual_predicates,
+ include_abox=include_abox,
+ include_tbox=include_tbox,
+ )
if direction is None or direction == Direction.up.value:
it = up_it
elif direction == Direction.down.value:
@@ -3615,7 +3631,7 @@ def logical_definitions(
You can also specify CSV to generate a flattened form of this.
Example:
- -------
+
pato logical-definitions .all --output-type csv
You can optionally choose to "--matrix-axes" to transform the output to a matrix form.
@@ -3623,12 +3639,12 @@ def logical_definitions(
type: "f" for filler, "p" for predicate, "g" for genus, "d" for defined class.
Example:
- -------
+
- Each property/predicate is a column
- For repeated properties, columns of the form prop_1, prop_2, ... are generated
Example:
- -------
+
pato logical-definitions .all --matrix-axes d,p --output-type csv
This will generate a row for each defined class with a logical definition, with columns
@@ -3752,7 +3768,7 @@ def disjoints(
serialization:
Example:
- -------
+
runoak -i sqlite:obo:uberon disjoints
Note that this will include pairwise disjoints, setwise disjoints,
@@ -3761,13 +3777,13 @@ def disjoints(
A tabular format can be easier to browse, and includes labels by default:
Example:
- -------
+
runoak -i sqlite:obo:uberon disjoints --autolabel -O csv
To perform this on a subset:
Example:
- -------
+
runoak -i sqlite:obo:cl disjoints --autolabel -O csv .desc//p=i "immune cell"
Data model:
@@ -3869,7 +3885,7 @@ def terms(output: str, owl_type, filter_obsoletes: bool):
List all terms in the ontology
Example:
- -------
+
runoak -i db/cob.db terms
All terms without obsoletes:
@@ -3921,7 +3937,7 @@ def roots(output: str, output_type: str, predicates: str, has_prefix: str, annot
and parameterizing
Example:
- -------
+
runoak -i db/cob.db roots
This command is a wrapper onto the "roots" command in the BasicOntologyInterface.
@@ -3958,7 +3974,7 @@ def leafs(output: str, predicates: str, filter_obsoletes: bool):
Note that the default is to return the roots of the relation graph over *all* predicates
Example:
- -------
+
runoak -i db/cob.db leafs
This command is a wrapper onto the "leafs" command in the BasicOntologyInterface.
@@ -3990,7 +4006,7 @@ def singletons(output: str, predicates: str, filter_obsoletes: bool):
Obsoletes are filtered by default
Example:
- -------
+
runoak -i db/cob.db singletons
This command is a wrapper onto the "singletons" command in the BasicOntologyInterface.
@@ -4027,7 +4043,7 @@ def mappings(terms, maps_to_source, autolabel: bool, output, output_type, mapper
List all mappings encoded in the ontology
Example:
- -------
+
runoak -i sqlite:obo:envo mappings
The default output is SSSOM YAML. To use the (canonical) csv format:
@@ -4099,7 +4115,7 @@ def normalize(terms, maps_to_source, autolabel: bool, output, output_type):
Normalize all input identifiers.
Example:
- -------
+
runoak -i translator: normalize HGNC:1 HGNC:2 -M NCBIGene
Python API:
@@ -4142,7 +4158,7 @@ def aliases(terms, output, output_type, obo_model):
List aliases for a term or set of terms.
Example:
- -------
+
runoak -i ubergraph:uberon aliases UBERON:0001988
TERMS should be either an explicit list of terms or queries, or can be a selector query,
@@ -4200,7 +4216,7 @@ def term_subsets(terms, output, output_type):
List subsets for a term or set of terms.
Example:
- -------
+
runoak -i sqlite:obo:uberon term-subsets heart lung
Python API:
@@ -4228,7 +4244,7 @@ def expand_subsets(subsets: list, output, predicates):
For each subset provide a mapping of each term in the ontology to a subset
Example:
- -------
+
runoak -i db/pato.db expand-subsets attribute_slim value_slim
"""
@@ -4287,7 +4303,7 @@ def axioms(terms, output: str, output_type: str, axiom_type: str, about: str, re
Filters axioms
Example:
- -------
+
runoak -i cl.ofn axiom
The above will write all axioms.
@@ -4295,7 +4311,7 @@ def axioms(terms, output: str, output_type: str, axiom_type: str, about: str, re
You can filter by axiom type:
Example:
- -------
+
runoak -i cl.ofn axiom --axiom-type SubClassOf
Note this currently only works with the funowl adapter, on functional syntax files
@@ -4366,11 +4382,11 @@ def taxon_constraints(
of NCBI Taxonomy
Example:
- -------
+
runoak -i db/go.db taxon-constraints GO:0034357 --include-redundant -p i,p
Example:
- -------
+
runoak -i sqlite:obo:uberon taxon-constraints UBERON:0003884 UBERON:0003941 -p i,p
More examples:
@@ -4432,7 +4448,7 @@ def apply_taxon_constraints(
separated by periods.
Example:
- -------
+
runoak -i db/go.db apply-taxon-constraints -p i,p GO:0005743 only NCBITaxon:2759
never NCBITaxon:2 . GO:0005634 only NCBITaxon:2
@@ -4444,7 +4460,7 @@ def apply_taxon_constraints(
GO:0000229,Gain|NCBITaxon:1(root);>Loss|NCBITaxon:2759(Eukaryota);
Example:
- -------
+
runoak -i db/go.db eval-taxon-constraints -p i,p -E tests/input/go-evo-gains-losses.csv
More examples:
@@ -4544,7 +4560,7 @@ def associations(
Lookup associations from or to entities.
Example:
- -------
+
runoak -i sqlite:obo:hp -g test.hpoa -G hpoa associations
The above will show all associations
@@ -4553,7 +4569,7 @@ def associations(
terms or term queries, plus the closure predicate(s), e.g.
Example:
- -------
+
runoak -i sqlite:obo:hp -g test.hpoa -G hpoa associations -p i HP:0001392
This shows all annotations either to "Abnormality of the liver" (HP:0001392), or
@@ -4567,7 +4583,7 @@ def associations(
For example, the go-dictybase-input-spec combines go plus dictybase associations.
Example:
- -------
+
runoak --i src/oaklib/conf/go-dictybase-input-spec.yaml associations -p i,p GO:0008104
More examples:
@@ -4702,7 +4718,7 @@ def associations_counts(
Count associations, grouped by subject or object
Example:
- -------
+
runoak -i sqlite:obo:hp -g test.hpoa -G hpoa associations-counts
This will default to summarzing by objects (HPO term), showing the number
@@ -4712,13 +4728,13 @@ def associations_counts(
the closure predicate(s), e.g.
Example:
- -------
+
runoak -i sqlite:obo:hp -g test.hpoa -G hpoa associations -p i
You can also group by other fields
Example:
- -------
+
runoak -i sqlite:obo:hp -g test.hpoa -G hpoa associations-counts --group-by subject
This will show the number of associations for each disease.
@@ -4820,7 +4836,7 @@ def associations_matrix(
See: Wood V., Carbon S., et al, https://royalsocietypublishing.org/doi/10.1098/rsob.200149
Example:
- -------
+
runoak -i amigo:NCBITaxon:9606 associations-matrix -p i,p GO:0042416 GO:0014046
@@ -4892,7 +4908,7 @@ def rollup(
sub-groups.
Example:
- -------
+
runoak -i sqlite:go.db -g wb.gaf -G gaf rollup \
--object-group GO:0032502,GO:0007568,GO:0048869,GO:0098727 \
--object-group GO:0008152,GO:0009056,GO:0044238,GO:1901275 \
@@ -5042,7 +5058,7 @@ def enrichment(
associations, return the terms that are over-represented in the sample set.
Example:
- -------
+
runoak -i sqlite:obo:uberon -g gene2anat.txt -G g2t enrichment -U my-genes.txt -O csv
This runs an enrichment using Uberon on my-genes.txt, using the gene2anat.txt file as the
@@ -5131,7 +5147,7 @@ def diff_associations(
Diffs two association sources.
Example:
- -------
+
runoak -i sqlite:obo:go -G gaf diff-associations \
--old-date ${date1} --new-date ${date2} \
-g "${download_dir}/${group}-${date1}.gaf" \
@@ -5230,7 +5246,7 @@ def validate(
Implementation notes: Currently only works on SQLite
Example:
- -------
+
runoak -i db/ecto.db validate -o results.tsv
The default validation performed is structural (conformance to the ontology_metadata schema)
@@ -5241,7 +5257,7 @@ def validate(
To run these, pass --no-skip-ontology-rules
Example:
- -------
+
runoak -i db/uberon.db validate --skip-structural-validation --no-skip-ontology-rules
For more information, see the OAK how-to guide:
@@ -5383,7 +5399,7 @@ def validate_definitions(
checks for *presence* of definitions, use --skip-text-annotation:
Example:
- -------
+
runoak validate-definitions -i db/uberon.db --skip-text-annotation
@@ -5392,7 +5408,7 @@ def validate_definitions(
individual classes
Example:
- -------
+
runoak validate-definitions -i db/cl.db CL:0002053
@@ -5633,7 +5649,7 @@ def migrate_curies(curie_pairs, replace: bool, output_type, output: str):
source and the target
Example:
- -------
+
runoak -i db/uberon.db migrate-curies --replace SRC1=TGT1 SRC2=TGT2
This command is a wrapper onto the "migrate_curies" command in the PatcherInterface
@@ -5665,7 +5681,7 @@ def set_apikey(endpoint, keyval):
Sets an API key
Example:
- -------
+
oak set-apikey -e bioportal MY-KEY-VALUE
This is stored in an OS-dependent path
@@ -5757,7 +5773,7 @@ def lexmatch(
Performs lexical matching between pairs of terms in one more more ontologies.
Examples
- --------
+ -
runoak -i foo.obo lexmatch -o foo.sssom.tsv
In this example, the input ontology file is assumed to contain all pairs of terms to be mapped.
@@ -5935,7 +5951,7 @@ def diff(
Compute difference between two ontologies.
Example:
- -------
+
runoak -i foo.obo diff -X bar.obo -o diff.yaml
This will produce a list of Changes that are required to go from the main input ontology (--input)
@@ -5963,7 +5979,7 @@ def diff(
the GO uses the oio:hasOBONamespace property to partition classes into 3 categories.
Example:
- -------
+
runoak -i go.obo diff -X go-new.obo -o diff.yaml --statistics --group-by-property oio:hasOBONamespace
This will produce a YAML dictionary, with outer keys being the values of the oio:hasOBONamespace property,
@@ -6068,7 +6084,7 @@ def apply(
https://github.com/INCATools/kgcl
Example:
- -------
+
runoak -i cl.owl.ttl apply "rename CL:0000561 to 'amacrine neuron'" -o cl.owl.ttl -O ttl
On an obo format file:
@@ -6082,7 +6098,7 @@ def apply(
-o cl.owl.ttl -O ttl
Warning:
- -------
+
This command is still experimental. Some things to bear in mind:
- for some ontologies, CURIEs may not work, instead specify a full URI surrounded by <>s
@@ -6162,7 +6178,7 @@ def apply_obsolete(output, output_type, expand: bool, terms, **kwargs):
Sets an ontology element to be obsolete
Example:
- -------
+
runoak -i my.obo apply-obsolete MY:0002200 -o my-modified.obo
Multiple terms can be passed, as labels, IDs, or using OAK queries:
@@ -6217,7 +6233,7 @@ def lint(output, output_type, report_format, dry_run: bool):
By default, changes will be applied
Example:
- -------
+
runoak -i my.obo lint
This can be executed in dry-run mode, in which case changes are not applied:
@@ -6306,7 +6322,7 @@ def diff_via_mappings(
command will perform a structural comparison of all mapped pairs of terms
Example:
- -------
+
runoak -i sqlite:obo:uberon diff-via-mappings --other-input sqlite:obo:zfa --source UBERON --source ZFA -O csv
Note the above command does not have any mapping file specified; the mappings that are distributed within
@@ -6453,7 +6469,7 @@ def fill_table(
headers for a table of ontology elements (see later for configuration when you don't follow conventions)
Example:
- -------
+
runoak -i cl.owl.ttl fill-table my-table.tsv
(any implementation can be used)
@@ -6493,7 +6509,7 @@ def fill_table(
given a TSV with columns cl_identifier and cl_display_label you can say:
Example:
- -------
+
runoak -i cl.owl.ttl fill-table \
--relation "{primary_key: cl_identifier, dependent_column: cl_display_label, relation: label}"
@@ -6615,14 +6631,14 @@ def generate_synonyms(terms, rules_file, apply_patch, patch, patch_format, outpu
applied. Pass the `--patch` argument to lso get the patch file in KGCL format.
Example:
- -------
+
runoak -i foo.obo generate-synonyms -R foo_rules.yaml --patch patch.kgcl --apply-patch -o foo_syn.obo
If the `apply-patch` flag is NOT set then the main input will be KGCL commands
Example:
- -------
+
runoak -i foo.obo generate-synonyms -R foo_rules.yaml -o changes.kgcl
@@ -6693,7 +6709,7 @@ def generate_lexical_replacements(
applied. Pass the `--patch` argument to lso get the patch file in KGCL format.
Example:
- -------
+
runoak -i foo.obo generate-lexical-replacements -R foo_rules.yaml\
--patch patch.kgcl --apply-patch -o foo_syn.obo
@@ -6701,7 +6717,7 @@ def generate_lexical_replacements(
If the `apply-patch` flag is NOT set then the main input will be KGCL commands
Example:
- -------
+
runoak -i foo.obo generate-lexical-replacements -R foo_rules.yaml -o changes.kgcl
@@ -6709,7 +6725,7 @@ def generate_lexical_replacements(
You can also pass the expressions directly as YAML
Example:
- -------
+
runoak -i foo.obo generate-lexical-replacements \
-Y '{match: "nuclear (\\w+)", replacement: "\\1 nucleus"}' .all
@@ -6782,13 +6798,13 @@ def generate_definitions(terms, apply_patch, patch, patch_format, output, output
Currently this only works with the llm extension.
Example:
- -------
+
runoak -i llm:sqlite:obo:foodon generate-definitions FOODON:03315258
The --style-hints option can be used to provide hints to the definition generator.
Example:
- -------
+
runoak -i llm:sqlite:obo:foodon generate-definitions FOODON:03315258 \
--style-hints "Write the definition in the style of a pretentious food critic"
@@ -6972,7 +6988,7 @@ def generate_disjoints(
Generate candidate disjointness axioms.
Example:
- -------
+
runoak -i sqlite:obo:iao generate-disjoints -O obo
To generate spatial disjointness axioms:
diff --git a/src/oaklib/interfaces/obograph_interface.py b/src/oaklib/interfaces/obograph_interface.py
index 2165c3f51..05b67fbbc 100644
--- a/src/oaklib/interfaces/obograph_interface.py
+++ b/src/oaklib/interfaces/obograph_interface.py
@@ -259,6 +259,38 @@ def descendant_graph(
self.transitive_query_cache[key] = g
return g
+ def non_redundant_entailed_relationships(
+ self,
+ predicates: List[PRED_CURIE] = None,
+ **kwargs,
+ ) -> Iterator[RELATIONSHIP]:
+ """
+ Yields all relationships that are directly entailed.
+
+ See https://github.com/INCATools/ontology-access-kit/issues/739
+
+ :param kwargs: same as relationships
+ :return:
+ """
+ if "include_entailed" in kwargs:
+ kwargs.pop("include_entailed")
+ relationships = list(
+ self.relationships(predicates=predicates, include_entailed=True, **kwargs)
+ )
+ rel_by_sp = defaultdict(list)
+ for s, p, o in relationships:
+ if s == o:
+ continue
+ rel_by_sp[(s, p)].append(o)
+ for (s, p), objs in rel_by_sp.items():
+ redundant_set = set()
+ for o in objs:
+ ancs = list(self.ancestors(o, predicates=predicates, reflexive=False))
+ redundant_set.update(ancs)
+ for o in objs:
+ if o not in redundant_set:
+ yield s, p, o
+
def ancestors(
self,
start_curies: Union[CURIE, List[CURIE]],
diff --git a/tests/__init__.py b/tests/__init__.py
index 6309277f9..f3d13c68e 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -46,6 +46,8 @@ def output_path(fn: str) -> str:
NUCLEUS = "GO:0005634"
ORGANELLE_MEMBRANE = "GO:0031090"
NUCLEAR_ENVELOPE = "GO:0005635"
+ORGANELLE_ENVELOPE = "GO:0031967"
+ENVELOPE = "GO:0031975"
THYLAKOID = "GO:0009579"
ATOM = "CHEBI:33250"
INTERNEURON = "CL:0000099"
diff --git a/tests/test_implementations/__init__.py b/tests/test_implementations/__init__.py
index 18c45bffb..0abd97455 100644
--- a/tests/test_implementations/__init__.py
+++ b/tests/test_implementations/__init__.py
@@ -96,6 +96,7 @@
CYTOPLASM,
CYTOPLASMIC_REGION,
ENDOMEMBRANE_SYSTEM,
+ ENVELOPE,
EUKARYOTA,
FAKE_ID,
FUNGI,
@@ -120,6 +121,8 @@
NUCLEUS,
OPISTHOKONTA,
ORGANELLE,
+ ORGANELLE_ENVELOPE,
+ ORGANELLE_MEMBRANE,
PHENOTYPIC_ABNORMALITY,
PHOTORECEPTOR_OUTER_SEGMENT,
PHOTOSYNTHETIC_MEMBRANE,
@@ -615,6 +618,42 @@ def test_relationships(self, oi: BasicOntologyInterface, ignore_annotation_edges
irels = list(oi.incoming_relationships(o, predicates=[p]))
test.assertIn((p, s), irels)
+ def test_entailed_relationships(self, oi: OboGraphInterface):
+ """
+ Tests entailed relationship methods for compliance.
+
+ :param oi:
+ :return:
+ """
+ test = self.test
+ cases = [
+ (
+ NUCLEAR_MEMBRANE,
+ [IS_A, PART_OF],
+ {IS_A: {ORGANELLE_MEMBRANE}, PART_OF: {NUCLEAR_ENVELOPE}},
+ ),
+ (
+ NUCLEAR_MEMBRANE,
+ [IS_A, OVERLAPS],
+ {IS_A: {ORGANELLE_MEMBRANE}, OVERLAPS: {NUCLEAR_ENVELOPE}},
+ ),
+ (NUCLEAR_MEMBRANE, [IS_A], {IS_A: {ORGANELLE_MEMBRANE}}),
+ (
+ NUCLEAR_MEMBRANE,
+ [PART_OF],
+ {PART_OF: {NUCLEAR_ENVELOPE, ORGANELLE_ENVELOPE, ENVELOPE}},
+ ),
+ ]
+ for curie, preds, expected in cases:
+ logging.info(f"TESTS FOR {curie}")
+ rels = list(oi.non_redundant_entailed_relationships(subjects=[curie], predicates=preds))
+ objs_by_pred = {p: set() for p in preds}
+ for s, p, o in rels:
+ objs_by_pred[p].add(o)
+ assert s == curie
+ for p in preds:
+ test.assertCountEqual(expected[p], objs_by_pred[p])
+
def test_rbox_relationships(self, oi: BasicOntologyInterface):
"""
Tests relationships between relationship types
diff --git a/tests/test_implementations/test_simple_obo.py b/tests/test_implementations/test_simple_obo.py
index b7328a10e..a1eced5f7 100644
--- a/tests/test_implementations/test_simple_obo.py
+++ b/tests/test_implementations/test_simple_obo.py
@@ -114,6 +114,10 @@ def test_relationships_extra(self):
def test_relationships(self):
self.compliance_tester.test_relationships(self.oi)
+ @unittest.skip("Contents of go-nucleus file need to be aligned")
+ def test_entailed_relationships(self):
+ self.compliance_tester.test_entailed_relationships(self.oi)
+
def test_rbox_relationships(self):
self.compliance_tester.test_rbox_relationships(self.oi)
diff --git a/tests/test_implementations/test_sqldb.py b/tests/test_implementations/test_sqldb.py
index 010ca026a..62bb68584 100644
--- a/tests/test_implementations/test_sqldb.py
+++ b/tests/test_implementations/test_sqldb.py
@@ -90,6 +90,10 @@ def test_relationships(self):
oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{str(DB)}"))
self.compliance_tester.test_relationships(oi, ignore_annotation_edges=False)
+ def test_entailed_relationships(self):
+ oi = SqlImplementation(OntologyResource(slug=f"sqlite:///{str(DB)}"))
+ self.compliance_tester.test_entailed_relationships(oi)
+
def test_relationships_chunking(self):
"""
Tests behavior for chunking relationship queries