Updated gnomon --> gnomonicus

oxfordmmm · Sep 20, 2022 · bbee266 · bbee266
1 parent b3edc23
commit bbee266
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 38 deletions.
diff --git a/.gitignore b/.gitignore
@@ -129,4 +129,4 @@ dmypy.json
 .pyre/
 
 #Example output folder
-gnomon-out
+gnomonicus-out
diff --git a/README.md b/README.md
@@ -1,2 +1,22 @@
 # AMR-catalogue-demo
-A Jupyter notebook to demonstrate usage of `piezo`, `gumpy` and `gnomon` with different AMR catalogues
+A Jupyter notebook to demonstrate usage of `piezo`, `gumpy` and `gnomonicus` with different AMR catalogues
+
+## Requirements
+Requirements which are not auto-installed:
+* Python >= 3.8
+* pip
+* Jupyter notebook
+
+## Install
+Due to memory limitations, this cannot be run using binder, so install locally, or on a VM with >=4GB RAM
+
+Optional virtual environment:
+```
+python -m virtualenv env
+source env/bin/activate
+```
+
+Install requirements:
+```
+pip install -r requirements.txt
+```
diff --git a/demo.ipynb b/demo.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# AMR catalogue demo\n",
-    "This notebook should show how to utilise different AMR catalogues within `piezo` and `gnomon` to produce predictions.\n"
+    "This notebook should show how to utilise different AMR catalogues within `piezo` and `gnomonicus` to produce predictions.\n"
    ]
   },
   {
@@ -18,7 +18,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -27,7 +27,7 @@
        "{'RIF': 'R'}"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 1,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -53,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -107,7 +107,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -129,7 +129,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -155,34 +155,42 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Gnomon\n",
-    "Above shows how simple this makes the process programatically, but `gnomon` can be utilised to produce these automatically through a single command line call, as well as providing an API for programatic interface."
+    "## gnomonicus\n",
+    "Above shows how simple this makes the process programatically, but `gnomonicus` can be utilised to produce these automatically through a single command line call, as well as providing an API for programatic interface."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['zsh:1: command not found: gnomon']"
+       "['',\n",
+       " '  0%|          | 0/4 [00:00<?, ?it/s]',\n",
+       " ' 50%|█████     | 2/4 [00:00<00:00,  7.95it/s]',\n",
+       " ' 75%|███████▌  | 3/4 [00:00<00:00,  5.13it/s]',\n",
+       " '100%|██████████| 4/4 [00:00<00:00,  4.46it/s]',\n",
+       " '100%|██████████| 4/4 [00:00<00:00,  4.90it/s]',\n",
+       " '',\n",
+       " '  0%|          | 0/4 [00:00<?, ?it/s]',\n",
+       " '100%|██████████| 4/4 [00:00<00:00, 252.23it/s]']"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "#Produce an antibiogram in a single command line call\n",
-    "!!gnomon --genome_object tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3.gbk --catalogue tuberculosis_amr_catalogues/catalogues/NC_000962.3/WHO-UCN-GTB-PCI-2021.7.GARC.csv --vcf_file XDR_demo.vcf --output_dir ./gnomon-out --json"
+    "!!gnomonicus --genome_object tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3.gbk --catalogue tuberculosis_amr_catalogues/catalogues/NC_000962.3/WHO-UCN-GTB-PCI-2021.7.GARC.csv --vcf_file XDR_demo.vcf --output_dir ./gnomonicus-out --json"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -204,16 +212,16 @@
     }
    ],
    "source": [
-    "#Checking the output files produced by gnomon\n",
+    "#Checking the output files produced by gnomonicus\n",
     "import pandas as pd\n",
     "\n",
     "#The genome variants\n",
-    "print(pd.read_csv(\"gnomon-out/XDR_demo.variants.csv\"))"
+    "print(pd.read_csv(\"gnomonicus-out/XDR_demo.variants.csv\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -250,12 +258,12 @@
    ],
    "source": [
     "#The gene mutations\n",
-    "print(pd.read_csv(\"gnomon-out/XDR_demo.mutations.csv\"))"
+    "print(pd.read_csv(\"gnomonicus-out/XDR_demo.mutations.csv\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -274,12 +282,12 @@
    ],
    "source": [
     "#The antibiogram\n",
-    "print(pd.read_csv(\"gnomon-out/XDR_demo.effects.csv\"))"
+    "print(pd.read_csv(\"gnomonicus-out/XDR_demo.effects.csv\"))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -307,7 +315,7 @@
    "source": [
     "#The output JSON\n",
     "import json\n",
-    "out = json.load(open(\"gnomon-out/XDR_demo.gnomon-out.json\"))\n",
+    "out = json.load(open(\"gnomonicus-out/XDR_demo.gnomonicus-out.json\"))\n",
     "\n",
     "#Get the effects of mutations on levofloxacin, as well as predicted phenotype\n",
     "print(json.dumps(out['data']['EFFECTS']['LEV'], indent=2))"
@@ -318,19 +326,19 @@
    "metadata": {},
    "source": [
     "## More advanced\n",
-    "As `gnomon` writes output files, we can go back to samples previously processed and produce new predictions with a new catalogue without having to actually re-process entirely. This kind of processing is likely not worthwhile for a single sample, but avoiding generating mutations will provide significant speed up for multiple samples."
+    "As `gnomonicus` writes output files, we can go back to samples previously processed and produce new predictions with a new catalogue without having to actually re-process entirely. This kind of processing is likely not worthwhile for a single sample, but avoiding generating mutations will provide significant speed up for multiple samples."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 4/4 [00:00<00:00, 1563.87it/s]"
+      "100%|██████████| 4/4 [00:00<00:00, 931.39it/s]"
      ]
     },
     {
@@ -350,7 +358,7 @@
     }
    ],
    "source": [
-    "import gnomon\n",
+    "import gnomonicus\n",
     "import os\n",
     "\n",
     "#We need a dict of reference genes\n",
@@ -359,28 +367,28 @@
     "\n",
     "#The VCF stem (which is used as a sample ID)\n",
     "vcfStem = os.path.split(\"XDR_demo.vcf\")[-1].split(\".\")[0]\n",
-    "outputDir = 'gnomon-out'\n",
+    "outputDir = 'gnomonicus-out'\n",
     "\n",
-    "#Read the mutations produced by gnomon the last time\n",
-    "mutations = pd.read_csv(\"gnomon-out/XDR_demo.mutations.csv\")\n",
+    "#Read the mutations produced by gnomonicus the last time\n",
+    "mutations = pd.read_csv(\"gnomonicus-out/XDR_demo.mutations.csv\")\n",
     "\n",
     "#Re-make the effects using a new catalogue (in this case, the NEJM catalogue)\n",
     "catalogue = piezo.ResistanceCatalogue(\"tuberculosis_amr_catalogues/catalogues/NC_000962.3/NC_000962.3_NEJM2018_v1.1_GARC1_RUS.csv\")\n",
     "\n",
-    "effects, _ = gnomon.populateEffects(None, outputDir, catalogue, mutations, referenceGenes, vcfStem)\n",
+    "effects, _ = gnomonicus.populateEffects(None, outputDir, catalogue, mutations, referenceGenes, vcfStem)\n",
     "\n",
     "print(effects)\n",
     "\n",
     "#Update the JSON output with this too\n",
-    "variants = pd.read_csv(\"gnomon-out/XDR_demo.variants.csv\")\n",
+    "variants = pd.read_csv(\"gnomonicus-out/XDR_demo.variants.csv\")\n",
     "\n",
-    "gnomon.saveJSON(variants, mutations, effects, \"gnomon-out\", vcfStem, \"RUS\", \"1.0.0\")"
+    "gnomonicus.saveJSON(variants, mutations, effects, \"gnomonicus-out\", vcfStem, \"RUS\", \"1.0.0\")"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.10.5 64-bit",
+   "display_name": "Python 3.10.6 ('env': venv)",
    "language": "python",
    "name": "python3"
   },
@@ -399,7 +407,7 @@
   "orig_nbformat": 4,
   "vscode": {
    "interpreter": {
-    "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
+    "hash": "cb77f519de5f0cdaae3257050ab6445f82395812815df62702308f51a6d03027"
    }
   }
  },

diff --git a/requirements.txt b/requirements.txt
@@ -3,5 +3,5 @@ setuptools>=42
 importlib-metadata
 gumpy>=1.0.11
 piezo>=0.3
-gnomonicus==1.0.0
+gnomonicus==1.0.1
 pandas
diff --git a/runtime.txt b/runtime.txt