Skip to content

Commit

Permalink
Export disease descriptions to a TSV
Browse files Browse the repository at this point in the history
  • Loading branch information
dhimmel committed Sep 9, 2016
1 parent 75050ea commit 052ffcc
Show file tree
Hide file tree
Showing 2 changed files with 6,640 additions and 24 deletions.
89 changes: 65 additions & 24 deletions DO-xrefs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,14 @@
"source": [
"import os\n",
"import csv\n",
"import re\n",
"\n",
"import networkx\n",
"import pandas\n",
"\n",
"import do_tools"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 2,
Expand All @@ -47,16 +40,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {
"collapsed": false
},
Expand All @@ -69,12 +53,68 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": []
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>disease_id</th>\n",
" <th>name</th>\n",
" <th>description</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1509</th>\n",
" <td>DOID:0001816</td>\n",
" <td>angiosarcoma</td>\n",
" <td>A malignant vascular tumor that results_in rap...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3043</th>\n",
" <td>DOID:0002116</td>\n",
" <td>pterygium</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" disease_id name \\\n",
"1509 DOID:0001816 angiosarcoma \n",
"3043 DOID:0002116 pterygium \n",
"\n",
" description \n",
"1509 A malignant vascular tumor that results_in rap... \n",
"3043 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a table of descriptions\n",
"pattern = re.compile(r'^\"(.*?)\"')\n",
"rows = list()\n",
"for term in dox:\n",
" match = pattern.search(term.definition)\n",
" description = match.group(1) if match else ''\n",
" rows.append((term.id, term.name, description))\n",
"description_df = pandas.DataFrame(rows, columns = ['disease_id', 'name', 'description']).sort_values('disease_id')\n",
"description_df.to_csv('data/description.tsv', sep='\\t', index=False)\n",
"description_df.head(2)"
]
},
{
"cell_type": "code",
Expand Down Expand Up @@ -210,8 +250,9 @@
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
Expand All @@ -225,7 +266,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.0"
"version": "3.5.2"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 052ffcc

Please sign in to comment.