Skip to content

Commit

Permalink
Rerun slim.ipynb with corrected disease names
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Mar 23, 2016
1 parent 94e3eb0 commit bb2b282
Showing 1 changed file with 108 additions and 46 deletions.
154 changes: 108 additions & 46 deletions slim.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create useful datasets for DO Slim -- a non-redundant set of diseases"
]
},
{
"cell_type": "code",
"execution_count": 1,
Expand Down Expand Up @@ -55,21 +62,21 @@
" <tr>\n",
" <th>0</th>\n",
" <td>DOID:2531</td>\n",
" <td>Hematologic cancer</td>\n",
" <td>hematologic cancer</td>\n",
" <td>DOcancerslim</td>\n",
" <td>neoplastic</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DOID:1319</td>\n",
" <td>Brain cancer</td>\n",
" <td>brain cancer</td>\n",
" <td>DOcancerslim</td>\n",
" <td>neoplastic</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>DOID:1324</td>\n",
" <td>Lung cancer</td>\n",
" <td>lung cancer</td>\n",
" <td>DOcancerslim</td>\n",
" <td>neoplastic</td>\n",
" </tr>\n",
Expand All @@ -79,9 +86,9 @@
],
"text/plain": [
" doid name source pathophysiology\n",
"0 DOID:2531 Hematologic cancer DOcancerslim neoplastic\n",
"1 DOID:1319 Brain cancer DOcancerslim neoplastic\n",
"2 DOID:1324 Lung cancer DOcancerslim neoplastic"
"0 DOID:2531 hematologic cancer DOcancerslim neoplastic\n",
"1 DOID:1319 brain cancer DOcancerslim neoplastic\n",
"2 DOID:1324 lung cancer DOcancerslim neoplastic"
]
},
"execution_count": 3,
Expand All @@ -99,25 +106,46 @@
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [],
"source": [
"# Convert DO slim names to those in the ontology\n",
"id_to_name = {node.id: node.name for node in dox}\n",
"slim_df.name = slim_df.doid.map(id_to_name).tolist()\n",
"slim_df.to_csv(path, sep='\\t', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>doid</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>pathophysiology</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>DOID:9917</td>\n",
" <td>pleural cancer</td>\n",
" <td>DOcancerslim</td>\n",
" <td>neoplastic</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" doid name source pathophysiology\n",
"55 DOID:9917 pleural cancer DOcancerslim neoplastic"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for terms that in slim but not in the DO\n",
"all_doids = set(do.get_term_ids())\n",
Expand All @@ -129,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"collapsed": false
},
Expand All @@ -146,7 +174,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"collapsed": false
},
Expand All @@ -161,11 +189,63 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>doid_code</th>\n",
" <th>doid_name</th>\n",
" <th>resource</th>\n",
" <th>resource_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DOID:2531</td>\n",
" <td>hematologic cancer</td>\n",
" <td>CSP</td>\n",
" <td>2004-1600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DOID:2531</td>\n",
" <td>hematologic cancer</td>\n",
" <td>CSP</td>\n",
" <td>2004-1803</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>DOID:2531</td>\n",
" <td>hematologic cancer</td>\n",
" <td>CSP</td>\n",
" <td>2004-2820</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" doid_code doid_name resource resource_id\n",
"0 DOID:2531 hematologic cancer CSP 2004-1600\n",
"1 DOID:2531 hematologic cancer CSP 2004-1803\n",
"2 DOID:2531 hematologic cancer CSP 2004-2820"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"slim_df = slim_df.rename(columns={'doid': 'doid_code'})\n",
"slim_map_unprop_df = slim_df[['doid_code']].merge(map_unprop_df)\n",
Expand All @@ -175,7 +255,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {
"collapsed": false
},
Expand All @@ -190,16 +270,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {
"collapsed": false
},
Expand All @@ -219,15 +290,6 @@
"path = os.path.join('data', 'slim-terms-prop.tsv')\n",
"slim_prop_df.to_csv(path, sep='\\t', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit bb2b282

Please sign in to comment.