From 87a60aa9afc82fcc0a9ff4e6258a39b584c6a8fc Mon Sep 17 00:00:00 2001
From: cmungall <cjm@berkeleybop.org>
Date: Wed, 24 May 2023 12:48:14 -0700
Subject: [PATCH] more tasks

---
 notebooks/Enrichment-Results-Analysis.ipynb   | 607 +-----------------
 src/ontogpt/cli.py                            |  77 ++-
 src/ontogpt/engines/reasoner_engine.py        | 135 +++-
 src/ontogpt/io/yaml_wrapper.py                |  20 +-
 src/ontogpt/ontex/extractor.py                | 585 +++++++++++++++--
 .../test_knowledge_engines/test_reasoning.py  |  39 +-
 tests/unit/test_ontex/test_extract.py         |  42 +-
 tox.ini                                       |   1 +
 8 files changed, 787 insertions(+), 719 deletions(-)
diff --git a/notebooks/Enrichment-Results-Analysis.ipynb b/notebooks/Enrichment-Results-Analysis.ipynb
index e8f411628..6580c33f5 100644
--- a/notebooks/Enrichment-Results-Analysis.ipynb
+++ b/notebooks/Enrichment-Results-Analysis.ipynb
@@ -991,609 +991,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 1,
    "id": "30174f04",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/nc/m4tx21912kv1b8nk3zzx9plr0000gn/T/ipykernel_27125/3852654709.py:1: FutureWarning: this method is deprecated in favour of `Styler.hide(axis=\"index\")`\n",
-      "  df[[SOURCE_GENESET, GENESET_SIZE]].drop_duplicates().style.hide_index()\n"
+     "ename": "NameError",
+     "evalue": "name 'df' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mdf\u001b[49m[[SOURCE_GENESET, GENESET_SIZE]]\u001b[38;5;241m.\u001b[39mdrop_duplicates()\u001b[38;5;241m.\u001b[39mhide_index()\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
      ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<style type=\"text/css\">\n",
-       "</style>\n",
-       "<table id=\"T_6e30d\">\n",
-       "  <thead>\n",
-       "    <tr>\n",
-       "      <th id=\"T_6e30d_level0_col0\" class=\"col_heading level0 col0\" >source geneset</th>\n",
-       "      <th id=\"T_6e30d_level0_col1\" class=\"col_heading level0 col1\" >geneset_size</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row0_col0\" class=\"data row0 col0\" >EDS</td>\n",
-       "      <td id=\"T_6e30d_row0_col1\" class=\"data row0 col1\" >19</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row1_col0\" class=\"data row1 col0\" >EDS</td>\n",
-       "      <td id=\"T_6e30d_row1_col1\" class=\"data row1 col1\" >18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row2_col0\" class=\"data row2 col0\" >FA</td>\n",
-       "      <td id=\"T_6e30d_row2_col1\" class=\"data row2 col1\" >19</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row3_col0\" class=\"data row3 col0\" >FA</td>\n",
-       "      <td id=\"T_6e30d_row3_col1\" class=\"data row3 col1\" >18</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row4_col0\" class=\"data row4 col0\" >HALLMARK_ADIPOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row4_col1\" class=\"data row4 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row5_col0\" class=\"data row5 col0\" >HALLMARK_ADIPOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row5_col1\" class=\"data row5 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row6_col0\" class=\"data row6 col0\" >HALLMARK_ALLOGRAFT_REJECTION</td>\n",
-       "      <td id=\"T_6e30d_row6_col1\" class=\"data row6 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row7_col0\" class=\"data row7 col0\" >HALLMARK_ALLOGRAFT_REJECTION</td>\n",
-       "      <td id=\"T_6e30d_row7_col1\" class=\"data row7 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row8_col0\" class=\"data row8 col0\" >HALLMARK_ANDROGEN_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row8_col1\" class=\"data row8 col1\" >101</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row9_col0\" class=\"data row9 col0\" >HALLMARK_ANDROGEN_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row9_col1\" class=\"data row9 col1\" >90</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row10_col0\" class=\"data row10 col0\" >HALLMARK_ANGIOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row10_col1\" class=\"data row10 col1\" >36</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row11_col0\" class=\"data row11 col0\" >HALLMARK_ANGIOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row11_col1\" class=\"data row11 col1\" >33</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row12_col0\" class=\"data row12 col0\" >HALLMARK_APICAL_JUNCTION</td>\n",
-       "      <td id=\"T_6e30d_row12_col1\" class=\"data row12 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row13_col0\" class=\"data row13 col0\" >HALLMARK_APICAL_JUNCTION</td>\n",
-       "      <td id=\"T_6e30d_row13_col1\" class=\"data row13 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row14_col0\" class=\"data row14 col0\" >HALLMARK_APICAL_SURFACE</td>\n",
-       "      <td id=\"T_6e30d_row14_col1\" class=\"data row14 col1\" >44</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row15_col0\" class=\"data row15 col0\" >HALLMARK_APICAL_SURFACE</td>\n",
-       "      <td id=\"T_6e30d_row15_col1\" class=\"data row15 col1\" >40</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row16_col0\" class=\"data row16 col0\" >HALLMARK_APOPTOSIS</td>\n",
-       "      <td id=\"T_6e30d_row16_col1\" class=\"data row16 col1\" >161</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row17_col0\" class=\"data row17 col0\" >HALLMARK_APOPTOSIS</td>\n",
-       "      <td id=\"T_6e30d_row17_col1\" class=\"data row17 col1\" >145</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row18_col0\" class=\"data row18 col0\" >HALLMARK_BILE_ACID_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row18_col1\" class=\"data row18 col1\" >112</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row19_col0\" class=\"data row19 col0\" >HALLMARK_BILE_ACID_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row19_col1\" class=\"data row19 col1\" >101</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row20_col0\" class=\"data row20 col0\" >HALLMARK_CHOLESTEROL_HOMEOSTASIS</td>\n",
-       "      <td id=\"T_6e30d_row20_col1\" class=\"data row20 col1\" >74</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row21_col0\" class=\"data row21 col0\" >HALLMARK_CHOLESTEROL_HOMEOSTASIS</td>\n",
-       "      <td id=\"T_6e30d_row21_col1\" class=\"data row21 col1\" >67</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row22_col0\" class=\"data row22 col0\" >HALLMARK_COAGULATION</td>\n",
-       "      <td id=\"T_6e30d_row22_col1\" class=\"data row22 col1\" >138</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row23_col0\" class=\"data row23 col0\" >HALLMARK_COAGULATION</td>\n",
-       "      <td id=\"T_6e30d_row23_col1\" class=\"data row23 col1\" >125</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row24_col0\" class=\"data row24 col0\" >HALLMARK_COMPLEMENT</td>\n",
-       "      <td id=\"T_6e30d_row24_col1\" class=\"data row24 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row25_col0\" class=\"data row25 col0\" >HALLMARK_COMPLEMENT</td>\n",
-       "      <td id=\"T_6e30d_row25_col1\" class=\"data row25 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row26_col0\" class=\"data row26 col0\" >HALLMARK_DNA_REPAIR</td>\n",
-       "      <td id=\"T_6e30d_row26_col1\" class=\"data row26 col1\" >150</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row27_col0\" class=\"data row27 col0\" >HALLMARK_DNA_REPAIR</td>\n",
-       "      <td id=\"T_6e30d_row27_col1\" class=\"data row27 col1\" >135</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row28_col0\" class=\"data row28 col0\" >HALLMARK_E2F_TARGETS</td>\n",
-       "      <td id=\"T_6e30d_row28_col1\" class=\"data row28 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row29_col0\" class=\"data row29 col0\" >HALLMARK_E2F_TARGETS</td>\n",
-       "      <td id=\"T_6e30d_row29_col1\" class=\"data row29 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row30_col0\" class=\"data row30 col0\" >HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION</td>\n",
-       "      <td id=\"T_6e30d_row30_col1\" class=\"data row30 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row31_col0\" class=\"data row31 col0\" >HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION</td>\n",
-       "      <td id=\"T_6e30d_row31_col1\" class=\"data row31 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row32_col0\" class=\"data row32 col0\" >HALLMARK_ESTROGEN_RESPONSE_EARLY</td>\n",
-       "      <td id=\"T_6e30d_row32_col1\" class=\"data row32 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row33_col0\" class=\"data row33 col0\" >HALLMARK_ESTROGEN_RESPONSE_EARLY</td>\n",
-       "      <td id=\"T_6e30d_row33_col1\" class=\"data row33 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row34_col0\" class=\"data row34 col0\" >HALLMARK_ESTROGEN_RESPONSE_LATE</td>\n",
-       "      <td id=\"T_6e30d_row34_col1\" class=\"data row34 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row35_col0\" class=\"data row35 col0\" >HALLMARK_ESTROGEN_RESPONSE_LATE</td>\n",
-       "      <td id=\"T_6e30d_row35_col1\" class=\"data row35 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row36_col0\" class=\"data row36 col0\" >HALLMARK_FATTY_ACID_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row36_col1\" class=\"data row36 col1\" >158</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row37_col0\" class=\"data row37 col0\" >HALLMARK_FATTY_ACID_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row37_col1\" class=\"data row37 col1\" >143</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row38_col0\" class=\"data row38 col0\" >HALLMARK_G2M_CHECKPOINT</td>\n",
-       "      <td id=\"T_6e30d_row38_col1\" class=\"data row38 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row39_col0\" class=\"data row39 col0\" >HALLMARK_G2M_CHECKPOINT</td>\n",
-       "      <td id=\"T_6e30d_row39_col1\" class=\"data row39 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row40_col0\" class=\"data row40 col0\" >HALLMARK_GLYCOLYSIS</td>\n",
-       "      <td id=\"T_6e30d_row40_col1\" class=\"data row40 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row41_col0\" class=\"data row41 col0\" >HALLMARK_GLYCOLYSIS</td>\n",
-       "      <td id=\"T_6e30d_row41_col1\" class=\"data row41 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row42_col0\" class=\"data row42 col0\" >HALLMARK_HEDGEHOG_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row42_col1\" class=\"data row42 col1\" >36</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row43_col0\" class=\"data row43 col0\" >HALLMARK_HEDGEHOG_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row43_col1\" class=\"data row43 col1\" >33</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row44_col0\" class=\"data row44 col0\" >HALLMARK_HEME_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row44_col1\" class=\"data row44 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row45_col0\" class=\"data row45 col0\" >HALLMARK_HEME_METABOLISM</td>\n",
-       "      <td id=\"T_6e30d_row45_col1\" class=\"data row45 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row46_col0\" class=\"data row46 col0\" >HALLMARK_HYPOXIA</td>\n",
-       "      <td id=\"T_6e30d_row46_col1\" class=\"data row46 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row47_col0\" class=\"data row47 col0\" >HALLMARK_HYPOXIA</td>\n",
-       "      <td id=\"T_6e30d_row47_col1\" class=\"data row47 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row48_col0\" class=\"data row48 col0\" >HALLMARK_IL2_STAT5_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row48_col1\" class=\"data row48 col1\" >199</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row49_col0\" class=\"data row49 col0\" >HALLMARK_IL2_STAT5_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row49_col1\" class=\"data row49 col1\" >179</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row50_col0\" class=\"data row50 col0\" >HALLMARK_IL6_JAK_STAT3_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row50_col1\" class=\"data row50 col1\" >87</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row51_col0\" class=\"data row51 col0\" >HALLMARK_IL6_JAK_STAT3_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row51_col1\" class=\"data row51 col1\" >79</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row52_col0\" class=\"data row52 col0\" >HALLMARK_INFLAMMATORY_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row52_col1\" class=\"data row52 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row53_col0\" class=\"data row53 col0\" >HALLMARK_INFLAMMATORY_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row53_col1\" class=\"data row53 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row54_col0\" class=\"data row54 col0\" >HALLMARK_INTERFERON_ALPHA_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row54_col1\" class=\"data row54 col1\" >97</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row55_col0\" class=\"data row55 col0\" >HALLMARK_INTERFERON_ALPHA_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row55_col1\" class=\"data row55 col1\" >88</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row56_col0\" class=\"data row56 col0\" >HALLMARK_INTERFERON_GAMMA_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row56_col1\" class=\"data row56 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row57_col0\" class=\"data row57 col0\" >HALLMARK_INTERFERON_GAMMA_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row57_col1\" class=\"data row57 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row58_col0\" class=\"data row58 col0\" >HALLMARK_KRAS_SIGNALING_DN</td>\n",
-       "      <td id=\"T_6e30d_row58_col1\" class=\"data row58 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row59_col0\" class=\"data row59 col0\" >HALLMARK_KRAS_SIGNALING_DN</td>\n",
-       "      <td id=\"T_6e30d_row59_col1\" class=\"data row59 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row60_col0\" class=\"data row60 col0\" >HALLMARK_KRAS_SIGNALING_UP</td>\n",
-       "      <td id=\"T_6e30d_row60_col1\" class=\"data row60 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row61_col0\" class=\"data row61 col0\" >HALLMARK_KRAS_SIGNALING_UP</td>\n",
-       "      <td id=\"T_6e30d_row61_col1\" class=\"data row61 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row62_col0\" class=\"data row62 col0\" >HALLMARK_MITOTIC_SPINDLE</td>\n",
-       "      <td id=\"T_6e30d_row62_col1\" class=\"data row62 col1\" >199</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row63_col0\" class=\"data row63 col0\" >HALLMARK_MITOTIC_SPINDLE</td>\n",
-       "      <td id=\"T_6e30d_row63_col1\" class=\"data row63 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row64_col0\" class=\"data row64 col0\" >HALLMARK_MTORC1_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row64_col1\" class=\"data row64 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row65_col0\" class=\"data row65 col0\" >HALLMARK_MTORC1_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row65_col1\" class=\"data row65 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row66_col0\" class=\"data row66 col0\" >HALLMARK_MYC_TARGETS_V1</td>\n",
-       "      <td id=\"T_6e30d_row66_col1\" class=\"data row66 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row67_col0\" class=\"data row67 col0\" >HALLMARK_MYC_TARGETS_V1</td>\n",
-       "      <td id=\"T_6e30d_row67_col1\" class=\"data row67 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row68_col0\" class=\"data row68 col0\" >HALLMARK_MYC_TARGETS_V2</td>\n",
-       "      <td id=\"T_6e30d_row68_col1\" class=\"data row68 col1\" >58</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row69_col0\" class=\"data row69 col0\" >HALLMARK_MYC_TARGETS_V2</td>\n",
-       "      <td id=\"T_6e30d_row69_col1\" class=\"data row69 col1\" >53</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row70_col0\" class=\"data row70 col0\" >HALLMARK_MYOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row70_col1\" class=\"data row70 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row71_col0\" class=\"data row71 col0\" >HALLMARK_MYOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row71_col1\" class=\"data row71 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row72_col0\" class=\"data row72 col0\" >HALLMARK_NOTCH_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row72_col1\" class=\"data row72 col1\" >32</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row73_col0\" class=\"data row73 col0\" >HALLMARK_NOTCH_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row73_col1\" class=\"data row73 col1\" >29</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row74_col0\" class=\"data row74 col0\" >HALLMARK_OXIDATIVE_PHOSPHORYLATION</td>\n",
-       "      <td id=\"T_6e30d_row74_col1\" class=\"data row74 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row75_col0\" class=\"data row75 col0\" >HALLMARK_OXIDATIVE_PHOSPHORYLATION</td>\n",
-       "      <td id=\"T_6e30d_row75_col1\" class=\"data row75 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row76_col0\" class=\"data row76 col0\" >HALLMARK_P53_PATHWAY</td>\n",
-       "      <td id=\"T_6e30d_row76_col1\" class=\"data row76 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row77_col0\" class=\"data row77 col0\" >HALLMARK_P53_PATHWAY</td>\n",
-       "      <td id=\"T_6e30d_row77_col1\" class=\"data row77 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row78_col0\" class=\"data row78 col0\" >HALLMARK_PANCREAS_BETA_CELLS</td>\n",
-       "      <td id=\"T_6e30d_row78_col1\" class=\"data row78 col1\" >40</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row79_col0\" class=\"data row79 col0\" >HALLMARK_PANCREAS_BETA_CELLS</td>\n",
-       "      <td id=\"T_6e30d_row79_col1\" class=\"data row79 col1\" >36</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row80_col0\" class=\"data row80 col0\" >HALLMARK_PEROXISOME</td>\n",
-       "      <td id=\"T_6e30d_row80_col1\" class=\"data row80 col1\" >104</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row81_col0\" class=\"data row81 col0\" >HALLMARK_PEROXISOME</td>\n",
-       "      <td id=\"T_6e30d_row81_col1\" class=\"data row81 col1\" >94</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row82_col0\" class=\"data row82 col0\" >HALLMARK_PI3K_AKT_MTOR_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row82_col1\" class=\"data row82 col1\" >105</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row83_col0\" class=\"data row83 col0\" >HALLMARK_PI3K_AKT_MTOR_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row83_col1\" class=\"data row83 col1\" >95</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row84_col0\" class=\"data row84 col0\" >HALLMARK_PROTEIN_SECRETION</td>\n",
-       "      <td id=\"T_6e30d_row84_col1\" class=\"data row84 col1\" >96</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row85_col0\" class=\"data row85 col0\" >HALLMARK_PROTEIN_SECRETION</td>\n",
-       "      <td id=\"T_6e30d_row85_col1\" class=\"data row85 col1\" >87</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row86_col0\" class=\"data row86 col0\" >HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY</td>\n",
-       "      <td id=\"T_6e30d_row86_col1\" class=\"data row86 col1\" >49</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row87_col0\" class=\"data row87 col0\" >HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY</td>\n",
-       "      <td id=\"T_6e30d_row87_col1\" class=\"data row87 col1\" >45</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row88_col0\" class=\"data row88 col0\" >HALLMARK_SPERMATOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row88_col1\" class=\"data row88 col1\" >135</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row89_col0\" class=\"data row89 col0\" >HALLMARK_SPERMATOGENESIS</td>\n",
-       "      <td id=\"T_6e30d_row89_col1\" class=\"data row89 col1\" >122</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row90_col0\" class=\"data row90 col0\" >HALLMARK_TGF_BETA_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row90_col1\" class=\"data row90 col1\" >54</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row91_col0\" class=\"data row91 col0\" >HALLMARK_TGF_BETA_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row91_col1\" class=\"data row91 col1\" >49</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row92_col0\" class=\"data row92 col0\" >HALLMARK_TNFA_SIGNALING_VIA_NFKB</td>\n",
-       "      <td id=\"T_6e30d_row92_col1\" class=\"data row92 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row93_col0\" class=\"data row93 col0\" >HALLMARK_TNFA_SIGNALING_VIA_NFKB</td>\n",
-       "      <td id=\"T_6e30d_row93_col1\" class=\"data row93 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row94_col0\" class=\"data row94 col0\" >HALLMARK_UNFOLDED_PROTEIN_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row94_col1\" class=\"data row94 col1\" >113</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row95_col0\" class=\"data row95 col0\" >HALLMARK_UNFOLDED_PROTEIN_RESPONSE</td>\n",
-       "      <td id=\"T_6e30d_row95_col1\" class=\"data row95 col1\" >101</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row96_col0\" class=\"data row96 col0\" >HALLMARK_UV_RESPONSE_DN</td>\n",
-       "      <td id=\"T_6e30d_row96_col1\" class=\"data row96 col1\" >144</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row97_col0\" class=\"data row97 col0\" >HALLMARK_UV_RESPONSE_DN</td>\n",
-       "      <td id=\"T_6e30d_row97_col1\" class=\"data row97 col1\" >130</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row98_col0\" class=\"data row98 col0\" >HALLMARK_UV_RESPONSE_UP</td>\n",
-       "      <td id=\"T_6e30d_row98_col1\" class=\"data row98 col1\" >158</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row99_col0\" class=\"data row99 col0\" >HALLMARK_UV_RESPONSE_UP</td>\n",
-       "      <td id=\"T_6e30d_row99_col1\" class=\"data row99 col1\" >143</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row100_col0\" class=\"data row100 col0\" >HALLMARK_WNT_BETA_CATENIN_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row100_col1\" class=\"data row100 col1\" >42</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row101_col0\" class=\"data row101 col0\" >HALLMARK_WNT_BETA_CATENIN_SIGNALING</td>\n",
-       "      <td id=\"T_6e30d_row101_col1\" class=\"data row101 col1\" >38</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row102_col0\" class=\"data row102 col0\" >T cell proliferation</td>\n",
-       "      <td id=\"T_6e30d_row102_col1\" class=\"data row102 col1\" >72</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row103_col0\" class=\"data row103 col0\" >T cell proliferation</td>\n",
-       "      <td id=\"T_6e30d_row103_col1\" class=\"data row103 col1\" >65</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row104_col0\" class=\"data row104 col0\" >Yamanaka-TFs</td>\n",
-       "      <td id=\"T_6e30d_row104_col1\" class=\"data row104 col1\" >4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row105_col0\" class=\"data row105 col0\" >Yamanaka-TFs</td>\n",
-       "      <td id=\"T_6e30d_row105_col1\" class=\"data row105 col1\" >3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row106_col0\" class=\"data row106 col0\" >amigo-example</td>\n",
-       "      <td id=\"T_6e30d_row106_col1\" class=\"data row106 col1\" >36</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row107_col0\" class=\"data row107 col0\" >amigo-example</td>\n",
-       "      <td id=\"T_6e30d_row107_col1\" class=\"data row107 col1\" >32</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row108_col0\" class=\"data row108 col0\" >bicluster_RNAseqDB_0</td>\n",
-       "      <td id=\"T_6e30d_row108_col1\" class=\"data row108 col1\" >158</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row109_col0\" class=\"data row109 col0\" >bicluster_RNAseqDB_0</td>\n",
-       "      <td id=\"T_6e30d_row109_col1\" class=\"data row109 col1\" >134</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row110_col0\" class=\"data row110 col0\" >bicluster_RNAseqDB_1002</td>\n",
-       "      <td id=\"T_6e30d_row110_col1\" class=\"data row110 col1\" >52</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row111_col0\" class=\"data row111 col0\" >bicluster_RNAseqDB_1002</td>\n",
-       "      <td id=\"T_6e30d_row111_col1\" class=\"data row111 col1\" >43</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row112_col0\" class=\"data row112 col0\" >glycolysis-gocam</td>\n",
-       "      <td id=\"T_6e30d_row112_col1\" class=\"data row112 col1\" >10</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row113_col0\" class=\"data row113 col0\" >glycolysis-gocam</td>\n",
-       "      <td id=\"T_6e30d_row113_col1\" class=\"data row113 col1\" >9</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row114_col0\" class=\"data row114 col0\" >term-GO:0007212</td>\n",
-       "      <td id=\"T_6e30d_row114_col1\" class=\"data row114 col1\" >28</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row115_col0\" class=\"data row115 col0\" >term-GO:0007212</td>\n",
-       "      <td id=\"T_6e30d_row115_col1\" class=\"data row115 col1\" >26</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row116_col0\" class=\"data row116 col0\" >endocytosis</td>\n",
-       "      <td id=\"T_6e30d_row116_col1\" class=\"data row116 col1\" >16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row117_col0\" class=\"data row117 col0\" >endocytosis</td>\n",
-       "      <td id=\"T_6e30d_row117_col1\" class=\"data row117 col1\" >15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row118_col0\" class=\"data row118 col0\" >go-postsynapse-calcium-transmembrane</td>\n",
-       "      <td id=\"T_6e30d_row118_col1\" class=\"data row118 col1\" >33</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row119_col0\" class=\"data row119 col0\" >go-postsynapse-calcium-transmembrane</td>\n",
-       "      <td id=\"T_6e30d_row119_col1\" class=\"data row119 col1\" >30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row120_col0\" class=\"data row120 col0\" >go-reg-autophagy-pkra</td>\n",
-       "      <td id=\"T_6e30d_row120_col1\" class=\"data row120 col1\" >17</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row121_col0\" class=\"data row121 col0\" >go-reg-autophagy-pkra</td>\n",
-       "      <td id=\"T_6e30d_row121_col1\" class=\"data row121 col1\" >16</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row122_col0\" class=\"data row122 col0\" >hydrolase activity, hydrolyzing O-glycosyl compounds</td>\n",
-       "      <td id=\"T_6e30d_row122_col1\" class=\"data row122 col1\" >91</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row123_col0\" class=\"data row123 col0\" >hydrolase activity, hydrolyzing O-glycosyl compounds</td>\n",
-       "      <td id=\"T_6e30d_row123_col1\" class=\"data row123 col1\" >81</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row124_col0\" class=\"data row124 col0\" >ig-receptor-binding-2022</td>\n",
-       "      <td id=\"T_6e30d_row124_col1\" class=\"data row124 col1\" >91</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row125_col0\" class=\"data row125 col0\" >ig-receptor-binding-2022</td>\n",
-       "      <td id=\"T_6e30d_row125_col1\" class=\"data row125 col1\" >82</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row126_col0\" class=\"data row126 col0\" >meiosis I</td>\n",
-       "      <td id=\"T_6e30d_row126_col1\" class=\"data row126 col1\" >54</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row127_col0\" class=\"data row127 col0\" >meiosis I</td>\n",
-       "      <td id=\"T_6e30d_row127_col1\" class=\"data row127 col1\" >46</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row128_col0\" class=\"data row128 col0\" >molecular sequestering</td>\n",
-       "      <td id=\"T_6e30d_row128_col1\" class=\"data row128 col1\" >30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row129_col0\" class=\"data row129 col0\" >molecular sequestering</td>\n",
-       "      <td id=\"T_6e30d_row129_col1\" class=\"data row129 col1\" >27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row130_col0\" class=\"data row130 col0\" >mtorc1</td>\n",
-       "      <td id=\"T_6e30d_row130_col1\" class=\"data row130 col1\" >200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row131_col0\" class=\"data row131 col0\" >mtorc1</td>\n",
-       "      <td id=\"T_6e30d_row131_col1\" class=\"data row131 col1\" >180</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row132_col0\" class=\"data row132 col0\" >peroxisome</td>\n",
-       "      <td id=\"T_6e30d_row132_col1\" class=\"data row132 col1\" >8</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row133_col0\" class=\"data row133 col0\" >peroxisome</td>\n",
-       "      <td id=\"T_6e30d_row133_col1\" class=\"data row133 col1\" >5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row134_col0\" class=\"data row134 col0\" >progeria</td>\n",
-       "      <td id=\"T_6e30d_row134_col1\" class=\"data row134 col1\" >4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row135_col0\" class=\"data row135 col0\" >progeria</td>\n",
-       "      <td id=\"T_6e30d_row135_col1\" class=\"data row135 col1\" >3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row136_col0\" class=\"data row136 col0\" >regulation of presynaptic membrane potential</td>\n",
-       "      <td id=\"T_6e30d_row136_col1\" class=\"data row136 col1\" >30</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row137_col0\" class=\"data row137 col0\" >regulation of presynaptic membrane potential</td>\n",
-       "      <td id=\"T_6e30d_row137_col1\" class=\"data row137 col1\" >27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row138_col0\" class=\"data row138 col0\" >sensory ataxia</td>\n",
-       "      <td id=\"T_6e30d_row138_col1\" class=\"data row138 col1\" >15</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row139_col0\" class=\"data row139 col0\" >sensory ataxia</td>\n",
-       "      <td id=\"T_6e30d_row139_col1\" class=\"data row139 col1\" >14</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row140_col0\" class=\"data row140 col0\" >tf-downreg-colorectal</td>\n",
-       "      <td id=\"T_6e30d_row140_col1\" class=\"data row140 col1\" >51</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <td id=\"T_6e30d_row141_col0\" class=\"data row141 col0\" >tf-downreg-colorectal</td>\n",
-       "      <td id=\"T_6e30d_row141_col1\" class=\"data row141 col1\" >46</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n"
-      ],
-      "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x2bd6af070>"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
diff --git a/src/ontogpt/cli.py b/src/ontogpt/cli.py
index af58c61cc..7b11a7d85 100644
--- a/src/ontogpt/cli.py
+++ b/src/ontogpt/cli.py
@@ -3,7 +3,7 @@
 import logging
 import pickle
 import sys
-from copy import copy
+from copy import copy, deepcopy
 from dataclasses import dataclass
 from io import BytesIO, TextIOWrapper
 from pathlib import Path
@@ -34,6 +34,7 @@
 from ontogpt.engines.synonym_engine import SynonymEngine
 from ontogpt.evaluation.enrichment.eval_enrichment import EvalEnrichment
 from ontogpt.evaluation.resolver import create_evaluator
+from ontogpt.io.csv_wrapper import write_obj_as_csv
 from ontogpt.io.html_exporter import HTMLExporter
 from ontogpt.io.markdown_exporter import MarkdownExporter
 from ontogpt.utils.gene_set_utils import (
@@ -136,7 +137,7 @@ def write_extraction(
 output_format_options = click.option(
     "-O",
     "--output-format",
-    type=click.Choice(["json", "yaml", "pickle", "md", "html", "owl", "turtle"]),
+    type=click.Choice(["json", "yaml", "pickle", "md", "html", "owl", "turtle", "jsonl"]),
     default="yaml",
     help="Output format.",
 )
@@ -806,22 +807,59 @@ def entity_similarity(terms, ontology, output, model, output_format, **kwargs):
 
 @main.command()
 @inputfile_option
+@output_option_txt
+@model_option
+@click.option("--task-file")
 @click.option("--task-type")
+@click.option("--tsv-output")
+@click.option("--all-methods/--no-all-methods", default=False)
 @click.option("--explain/--no-explain", default=False)
+@click.option("--evaluate/--no-evaluate", default=False)
 @click.argument("terms", nargs=-1)
-def reason(terms, inputfile, explain, task_type, **kwargs):
+def reason(
+    terms,
+    inputfile,
+    model,
+    task_file,
+    explain,
+    task_type,
+    output,
+    tsv_output,
+    all_methods,
+    evaluate,
+    **kwargs,
+):
     """Reason."""
-    reasoner = ReasonerEngine()
-    adapter = get_adapter(inputfile)
-    if not isinstance(adapter, OboGraphInterface):
-        raise ValueError("Only OBO graphs supported")
-    ex = extractor.OntologyExtractor(adapter=adapter)
-    # ex.use_identifiers = True
-    task = ex.create_task(task_type=task_type, parameters=list(terms))
-    task.include_explanations = explain
-    print(yaml.dump(task.dict(), sort_keys=False))
-    result = reasoner.reason(task=task)
-    print(yaml.dump(result.dict(), sort_keys=False))
+    reasoner = ReasonerEngine(model=model)
+    if task_file:
+        tc = extractor.TaskCollection.load(task_file)
+    else:
+        adapter = get_adapter(inputfile)
+        if not isinstance(adapter, OboGraphInterface):
+            raise ValueError("Only OBO graphs supported")
+        ex = extractor.OntologyExtractor(adapter=adapter)
+        # ex.use_identifiers = True
+        task = ex.create_task(task_type=task_type, parameters=list(terms))
+        tc = extractor.TaskCollection(tasks=[task])
+    if all_methods:
+        tasks = []
+        print(f"Cloning {len(tc.tasks)} tasks")
+        for core_task in tc.tasks:
+            for m in extractor.GPTReasonMethodType:
+                print(f"Cloning {m}")
+                task = deepcopy(core_task)
+                task.method = m
+                task.init_method()
+                tasks.append(task)
+        tc.tasks = tasks
+        print(f"New {len(tc.tasks)} tasks")
+    else:
+        for task in tc.tasks:
+            task.include_explanations = explain
+    resultset = reasoner.reason_multiple(tc, evaluate=evaluate)
+    dump_minimal_yaml(resultset.dict(), file=output)
+    if tsv_output:
+        write_obj_as_csv(resultset.results, tsv_output)
 
 
 @main.command()
@@ -979,20 +1017,19 @@ def parse(template, input):
 @model_option
 @click.option("-m", "match", help="Match string to use for filtering.")
 @click.option("-D", "database", help="Path to sqlite database.")
-def dump_completions(engine, match, database, output, output_format):
+def dump_completions(model, match, database, output, output_format):
     """Dump cached completions."""
-    logging.info(f"Creating for {engine}")
     client = OpenAIClient()
     if database:
         client.cache_db_path = database
     if output_format == "jsonl":
         writer = jsonlines.Writer(output)
-        for engine, prompt, completion in client.cached_completions(match):
-            writer.write(dict(engine=engine, prompt=prompt, completion=completion))
+        for _engine, prompt, completion in client.cached_completions(match):
+            writer.write(dict(engine=model, prompt=prompt, completion=completion))
     elif output_format == "yaml":
-        for engine, prompt, completion in client.cached_completions(match):
+        for _engine, prompt, completion in client.cached_completions(match):
             output.write(
-                dump_minimal_yaml(dict(engine=engine, prompt=prompt, completion=completion))
+                dump_minimal_yaml(dict(engine=model, prompt=prompt, completion=completion))
             )
     else:
         output.write("# Cached Completions:\n")
diff --git a/src/ontogpt/engines/reasoner_engine.py b/src/ontogpt/engines/reasoner_engine.py
index 33ab91a03..8939da330 100644
--- a/src/ontogpt/engines/reasoner_engine.py
+++ b/src/ontogpt/engines/reasoner_engine.py
@@ -8,8 +8,15 @@
 from jinja2 import Template
 from pydantic import BaseModel
 
-from ontogpt.engines.knowledge_engine import KnowledgeEngine
-from ontogpt.ontex.extractor import Answer, Axiom, Explanation, Task
+from ontogpt.engines.knowledge_engine import MODEL_GPT_4, KnowledgeEngine
+from ontogpt.ontex.extractor import (
+    Answer,
+    Axiom,
+    Explanation,
+    GPTReasonMethodType,
+    Task,
+    TaskCollection,
+)
 from ontogpt.prompts.reasoning import DEFAULT_REASONING_PROMPT
 from ontogpt.utils.parse_utils import split_on_one_of
 
@@ -30,7 +37,11 @@ class ReasonerResult(BaseModel):
     """The result of a reason query."""
 
     name: Optional[str] = None
+    completed: Optional[bool] = True
     task_name: Optional[str] = None
+    task_type: Optional[str] = None
+    method: Optional[GPTReasonMethodType] = None
+    model: Optional[str] = None
     description: Optional[str] = None
     answers: Optional[List[Answer]] = None
     prompt: Optional[str] = None
@@ -40,6 +51,22 @@ class ReasonerResult(BaseModel):
     false_negatives: Optional[List[str]] = None
     num_false_positives: Optional[int] = None
     num_false_negatives: Optional[int] = None
+    num_true_positives: Optional[int] = None
+    num_true_negatives: Optional[int] = None
+    precision: Optional[float] = None
+    recall: Optional[float] = None
+    f1_score: Optional[float] = None
+    len_shortest_explanation: Optional[int] = None
+
+    class Config:
+        """Pydantic config."""
+
+        use_enum_values = True
+
+
+class ReasonerResultSet(BaseModel):
+    name: str = None
+    results: List[ReasonerResult]
 
 
 @dataclass
@@ -109,7 +136,11 @@ class ReasonerEngine(KnowledgeEngine):
 
     """
 
-    def reason(self, task: Task, template_path=None) -> ReasonerResult:
+    completion_length = 250
+
+    def reason(
+        self, task: Task, template_path=None, strict=False, evaluate: bool = None
+    ) -> ReasonerResult:
         """Reason over axioms and query entailments."""
         if template_path is None:
             template_path = DEFAULT_REASONING_PROMPT
@@ -126,18 +157,60 @@ def reason(self, task: Task, template_path=None) -> ReasonerResult:
             query=task.query,
             examples=task.examples,
         )
+        completion_length = self.completion_length
+        if task.method == GPTReasonMethodType.EXPLANATION:
+            completion_length *= 2
+        elif task.method == GPTReasonMethodType.CHAIN_OF_THOUGHT:
+            completion_length *= 2
         logger.info(f"Prompt: {prompt}")
-        payload = self.client.complete(prompt)
-        if task.has_multiple_answers:
-            elements = payload.split("- ")
-            answers = [self._parse_single_answer(e, task) for e in elements]
+        prompt_length = len(self.encoding.encode(prompt)) + 10
+        max_len_total = 4097
+        if self.model == MODEL_GPT_4:
+            max_len_total = 8193
+        max_len = max_len_total - completion_length
+        completed = True
+        logger.info(f"PROMPT LENGTH: {prompt_length} [max={max_len}]")
+        if prompt_length > max_len:
+            if strict:
+                raise ValueError(f"Prompt length ({prompt_length}) exceeds maximum ({max_len})")
+            answers = []
+            completed = False
         else:
-            answers = [self._parse_single_answer(payload, task)]
-        answers = flatten_list(answers)
-        rr = ReasonerResult(prompt=prompt, completion=payload, answers=[a for a in answers if a])
+            payload = self.client.complete(prompt, max_tokens=completion_length)
+            if task.has_multiple_answers:
+                elements = payload.split("- ")
+                answers = [self._parse_single_answer(e, task) for e in elements]
+            else:
+                answers = [self._parse_single_answer(payload, task)]
+            answers = flatten_list(answers)
+        result = ReasonerResult(
+            completed=completed,
+            task_name=task.name,
+            task_type=task.type,
+            method=task.method,
+            len_shortest_explanation=task.len_shortest_explanation,
+            model=self.model,
+            prompt=prompt,
+            completion=payload,
+            answers=[a for a in answers if a],
+        )
+        result.name = f"{task.name}-{task.method.value}-{self.model}"
+        if not task.answers and evaluate:
+            raise ValueError(f"Cannot evaluate without expected answers: {task}")
         if task.answers is not None:
-            self.evaluate(rr, task)
-        return rr
+            self.evaluate(result, task)
+        return result
+
+    def reason_multiple(self, task_collection: TaskCollection, **kwargs) -> ReasonerResultSet:
+        """
+        Reason over multiple tasks.
+
+        :param task_collection:
+        :param kwargs:
+        :return:
+        """
+        results = [self.reason(task, **kwargs) for task in task_collection.tasks]
+        return ReasonerResultSet(results=results)
 
     def _parse_single_answer(
         self, payload: str, task: Task
@@ -186,24 +259,26 @@ def _parse_single_answer(
 
     def evaluate(self, result: ReasonerResult, task: Task):
         """Evaluate result against task."""
-        task_answer_texts = {t.text for t in task.answers}
+        positives = {t.text for t in task.answers}
         result_answer_texts = {a.text for a in result.answers}
-        ixn = task_answer_texts.intersection(result_answer_texts)
-        all_texts = task_answer_texts.union(result_answer_texts)
-        if len(all_texts) == 0:
-            j_score = 0.0
-        else:
-            j_score = len(ixn) / len(all_texts)
-        result.jaccard_score = j_score
-        result.false_positives = list(result_answer_texts - task_answer_texts)
-        result.false_negatives = list(task_answer_texts - result_answer_texts)
+        ixn = positives.intersection(result_answer_texts)
+        all_texts = positives.union(result_answer_texts)
+        result.false_positives = list(result_answer_texts - positives)
+        result.false_negatives = list(positives - result_answer_texts)
         result.num_false_positives = len(result.false_positives)
         result.num_false_negatives = len(result.false_negatives)
-        if not result.task_name:
-            result.task_name = task.name
-        if not result.name:
-            result.name = task.name
-            if task.chain_of_thought:
-                result.name += "-cot"
-            if task.include_explanations:
-                result.name += "-expl"
+        result.num_true_positives = len(ixn)
+        result.precision = result.num_true_positives / (
+            result.num_true_positives + result.num_false_positives
+        )
+        result.recall = result.num_true_positives / len(positives)
+        if len(all_texts) == 0:
+            result.jaccard_score = 0.0
+        else:
+            result.jaccard_score = len(ixn) / len(all_texts)
+        if result.num_true_positives == 0:
+            result.f1_score = 0.0
+        else:
+            result.f1_score = (
+                2 * (result.precision * result.recall) / (result.precision + result.recall)
+            )
diff --git a/src/ontogpt/io/yaml_wrapper.py b/src/ontogpt/io/yaml_wrapper.py
index a21ddf389..916d79e24 100644
--- a/src/ontogpt/io/yaml_wrapper.py
+++ b/src/ontogpt/io/yaml_wrapper.py
@@ -1,7 +1,7 @@
 """YAML Wrapper."""
 import io
 import logging
-from typing import Any
+from typing import Any, Optional, TextIO
 
 import pydantic
 from ruamel.yaml import YAML, RoundTripRepresenter
@@ -34,18 +34,20 @@ def eliminate_empty(obj: Any, preserve=False) -> Any:
 
 
 def repr_str(dumper: RoundTripRepresenter, data: str):
-    if '\n' in data:
-        return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
-    return dumper.represent_scalar('tag:yaml.org,2002:str', data)
+    if "\n" in data:
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data)
 
 
-def dump_minimal_yaml(obj: Any, minimize=True) -> str:
+def dump_minimal_yaml(obj: Any, minimize=True, file: Optional[TextIO] = None) -> Optional[str]:
     """Dump a YAML string, but eliminating Nones and empty lists and dicts."""
     yaml = YAML()
     yaml.representer.add_representer(str, repr_str)
     yaml.default_flow_style = False
     yaml.indent(sequence=4, offset=2)
-    f = io.StringIO()
-    yaml.dump(eliminate_empty(obj, not minimize), f)
-    return f.getvalue()
-
+    if not file:
+        file = io.StringIO()
+        yaml.dump(eliminate_empty(obj, not minimize), file)
+        return file.getvalue()
+    else:
+        yaml.dump(eliminate_empty(obj, not minimize), file)
diff --git a/src/ontogpt/ontex/extractor.py b/src/ontogpt/ontex/extractor.py
index a3fb45aaa..f3b079b7d 100644
--- a/src/ontogpt/ontex/extractor.py
+++ b/src/ontogpt/ontex/extractor.py
@@ -1,17 +1,27 @@
 """Tools to extract sub-ontologies and reasoner tasks."""
+import logging
 import random
 import re
+import sys
+import uuid
+from collections import defaultdict
 from dataclasses import dataclass
-from typing import Any, ClassVar, List, Optional, Tuple, Type, Union
+from enum import Enum
+from pathlib import Path
+from typing import Any, ClassVar, List, Literal, Optional, TextIO, Tuple, Type, Union
 
 import inflection
-from oaklib.datamodels.vocabulary import DISJOINT_WITH, IS_A, OWL_CLASS
+import yaml
+from oaklib.datamodels.vocabulary import DISJOINT_WITH, IS_A, OWL_CLASS, PART_OF
 from oaklib.interfaces import OboGraphInterface
 from oaklib.interfaces.basic_ontology_interface import RELATIONSHIP
 from oaklib.interfaces.obograph_interface import GraphTraversalMethod
 from oaklib.interfaces.semsim_interface import SemanticSimilarityInterface
 from oaklib.types import CURIE, PRED_CURIE
-from pydantic import BaseModel
+from oaklib.utilities.obograph_utils import shortest_paths
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
 
 
 class Axiom(BaseModel):
@@ -24,12 +34,15 @@ class Axiom(BaseModel):
 class Ontology(BaseModel):
     """An ontology is a collection of axioms."""
 
+    name: Optional[str] = None
     axioms: List[Axiom]
     """All axioms in the ontology"""
 
     terms: List[CURIE] = None
     predicates: List[PRED_CURIE] = None
 
+    comments: Optional[List[str]] = None
+
 
 class Query(BaseModel):
     """Query."""
@@ -46,7 +59,7 @@ class Query(BaseModel):
 class Explanation(BaseModel):
     """The collection of axioms that entail some explained axiom."""
 
-    axioms: List[Axiom]
+    axioms: List[Axiom] = []
     text: Optional[str] = None
     comments: Optional[List[str]] = None
 
@@ -63,6 +76,13 @@ class Answer(BaseModel):
     explanations: Optional[List[Explanation]] = None
     """All explanations for the answer."""
 
+    def shortest_explanation(self) -> Optional[Explanation]:
+        """Return the shortest explanation for the answer."""
+        if not self.explanations:
+            return Explanation(axioms=[Axiom(text="No explanation found")])
+        shortest = min(self.explanations, key=lambda x: len(x.axioms))
+        return shortest
+
 
 class ObjectAnswer(Answer):
     """Answer that is an object, e.g class."""
@@ -76,6 +96,12 @@ class ClassAnswer(Answer):
     _value_domain = "The name of the class."
 
 
+class InstanceAnswer(Answer):
+    """Answer that is an OWL individual."""
+
+    _value_domain = "The name of the individual."
+
+
 class BooleanAnswer(Answer):
     """Answer that is a boolean, e.g. true or false."""
 
@@ -102,6 +128,12 @@ class Example(BaseModel):
     query_answers: Optional[List[ExampleQueryAnswers]] = None
 
 
+class GPTReasonMethodType(str, Enum):
+    BASIC = "basic"
+    EXPLANATION = "explanation"
+    CHAIN_OF_THOUGHT = "chain_of_thought"
+
+
 class Task(BaseModel):
     """
     A task is a query on an ontology that has a set of defined answers.
@@ -110,6 +142,9 @@ class Task(BaseModel):
     """
 
     _query_format: ClassVar[str] = None
+
+    type: Literal["Task"] = Field("Task")
+
     has_multiple_answers: ClassVar[bool] = True
 
     ontology: Ontology
@@ -120,6 +155,8 @@ class Task(BaseModel):
     examples: Optional[List[Example]] = None
     description: Optional[str] = None
 
+    method: Optional[GPTReasonMethodType] = None
+
     include_explanations: Optional[bool] = False
     """If true then completing the task must involve providing explanations for each answer."""
 
@@ -129,13 +166,52 @@ class Task(BaseModel):
     abductive: Optional[bool] = False
     """If true then the task is to find explanations for answers that are given."""
 
+    shortest_explanation: Optional[Explanation] = None
+
+    len_shortest_explanation: Optional[int] = None
+
+    class Config:
+        """Pydantic configuration."""
+
+        use_enum_values = True
+
     def populate(self) -> None:
         qf = self._query_format
         for example in self.examples:
             for query_answer in example.query_answers:
-                query_answer.query.text = qf.format(params=query_answer.query.parameters)
+                if not query_answer.query.text:
+                    query_answer.query.text = qf.format(params=query_answer.query.parameters)
         if not self.query.text:
             self.query.text = qf.format(params=self.query.parameters)
+        if len(self.answers) == 0:
+            self.shortest_explanation = None
+            self.len_shortest_explanation = 0
+        else:
+            most_complex_answer = max(
+                self.answers, key=lambda x: len(x.shortest_explanation().axioms)
+            )
+            self.shortest_explanation = most_complex_answer.shortest_explanation()
+            self.len_shortest_explanation = len(self.shortest_explanation.axioms)
+        if not self.name:
+            self.name = f"{self.type}-{uuid.uuid4()}"
+        self.init_method()
+
+    def init_method(self):
+        if self.method:
+            logger.info(f"Initializing method for {self.name}")
+            if not isinstance(self.method, GPTReasonMethodType):
+                self.method = GPTReasonMethodType(self.method)
+            if self.method == GPTReasonMethodType.EXPLANATION:
+                self.include_explanations = True
+            elif self.method == GPTReasonMethodType.CHAIN_OF_THOUGHT:
+                self.chain_of_thought = True
+        else:
+            if self.include_explanations:
+                self.method = GPTReasonMethodType.EXPLANATION
+            elif self.chain_of_thought:
+                self.method = GPTReasonMethodType.CHAIN_OF_THOUGHT
+            else:
+                self.method = GPTReasonMethodType.BASIC
 
 
 class OntologyCoherencyTask(Task):
@@ -154,6 +230,8 @@ class OntologyCoherencyTask(Task):
     List all unsatisfiable classes that can be found with this rule.
     If there are no unsatisfiable classes, just write NONE."""
 
+    type: Literal["OntologyCoherencyTask"] = Field("OntologyCoherencyTask")
+
     has_multiple_answers = False
     answers: Optional[List[ClassAnswer]] = None
 
@@ -230,6 +308,8 @@ class EntailedIndirectSuperClassTask(Task):
     Do not include direct (one-hop) superclasses.
     """
 
+    type: Literal["EntailedIndirectSuperClassTask"] = Field("EntailedIndirectSuperClassTask")
+
     answers: Optional[List[ClassAnswer]] = None
 
     examples: Optional[List[Example]] = [
@@ -302,14 +382,123 @@ class EntailedIndirectSuperClassTask(Task):
     ]
 
 
+class EntailedTransitiveSuperClassTask(Task):
+    """A task to determine the all transitive superclasses of a class."""
+
+    _query_format = """
+    What are the transitive superclasses of {params[0]}?
+    Include answers entailed by the transitivity of SubClassOf.
+    Also direct (one-hop) superclasses.
+    """
+
+    type: Literal["EntailedTransitiveSuperClassTask"] = Field("EntailedTransitiveSuperClassTask")
+
+    answers: Optional[List[ClassAnswer]] = None
+
+    examples: Optional[List[Example]] = [
+        Example(
+            ontology=Ontology(
+                axioms=[
+                    Axiom(text="E2 SubClassOf E"),
+                    Axiom(text="E SubClassOf B"),
+                    Axiom(text="B SubClassOf A"),
+                    Axiom(text="C SubClassOf A"),
+                    Axiom(text="D SubClassOf B"),
+                ]
+            ),
+            query_answers=[
+                ExampleQueryAnswers(
+                    query=Query(parameters=["E"]),
+                    answers=[
+                        ClassAnswer(
+                            text="A",
+                            explanations=[
+                                Explanation(
+                                    text="""A is an entailed superclass of E because
+                                    E SubClassOf B, and B SubClassOf A, and SubClassOf is
+                                    transitive.""",
+                                    axioms=[
+                                        Axiom(text="E SubClassOf B"),
+                                        Axiom(text="B SubClassOf A"),
+                                    ],
+                                )
+                            ],
+                        ),
+                        ClassAnswer(
+                            text="B",
+                            explanations=[
+                                Explanation(
+                                    text="""B is an indirect entailed superclass of E because
+                                        it is already asserted.""",
+                                    axioms=[
+                                        Axiom(text="B SubClassOf A"),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+                ExampleQueryAnswers(
+                    query=Query(parameters=["E2"]),
+                    answers=[
+                        ClassAnswer(
+                            text="A",
+                            explanations=[
+                                Explanation(
+                                    text="""A is an indirect entailed superclass of E2 because
+                                    E2 SubClassOf E, and E SubClassOf B, and B SubClassOf A,
+                                    and because SubClassOf is transitive.""",
+                                    axioms=[
+                                        Axiom(text="E2 SubClassOf E"),
+                                        Axiom(text="E SubClassOf B"),
+                                        Axiom(text="B SubClassOf A"),
+                                    ],
+                                )
+                            ],
+                        ),
+                        ClassAnswer(
+                            text="B",
+                            explanations=[
+                                Explanation(
+                                    text="""B is an entailed superclass of E2 because
+                                    E2 SubClassOf E, and E SubClassOf B, and because SubClassOf
+                                    is transitive.""",
+                                    axioms=[
+                                        Axiom(text="E SubClassOf B"),
+                                        Axiom(text="E2 SubClassOf E"),
+                                    ],
+                                )
+                            ],
+                        ),
+                        ClassAnswer(
+                            text="E",
+                            explanations=[
+                                Explanation(
+                                    text="""E is an entailed superclass of E2 because
+                                    it is directly asserted.""",
+                                    axioms=[
+                                        Axiom(text="E2 SubClassOf E"),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        )
+    ]
+
+
 class EntailedSubClassOfExpressionTask(Task):
     """A task to determine the subclasses of a class expression."""
 
     _query_format = """
-    What are the entailed subclasses of the expression {params[0]} Some {params[0]}?
+    What are the entailed subclasses of the expression {params[0]} Some {params[1]}?
     Include indirect (transitive) descendants.
     """
 
+    type: Literal["EntailedSubClassOfExpressionTask"] = Field("EntailedSubClassOfExpressionTask")
+
     answers: Optional[List[ClassAnswer]] = None
 
     examples: Optional[List[Example]] = [
@@ -410,6 +599,8 @@ class EntailedDirectSuperClassTask(Task):
     Make use of all axioms in the provided ontology.
     """
 
+    type: Literal["EntailedDirectSuperClassTask"] = Field("EntailedDirectSuperClassTask")
+
     answers: Optional[List[ClassAnswer]] = None
 
     # TODO: examples
@@ -422,6 +613,8 @@ class MostRecentCommonSubsumerTask(Task):
     What are the most specific common entailed superclasses of {params[0]} and {params[1]}?.
     """
 
+    type: Literal["MostRecentCommonSubsumerTask"] = Field("MostRecentCommonSubsumerTask")
+
     answers: Optional[List[ClassAnswer]] = None
 
     examples: Optional[List[Example]] = [
@@ -451,7 +644,7 @@ class MostRecentCommonSubsumerTask(Task):
                                         Axiom(text="E2 SubClassOf E"),
                                         Axiom(text="E SubClassOf B"),
                                         Axiom(text="D SubClassOf B"),
-                                    ]
+                                    ],
                                 )
                             ],
                         ),
@@ -474,7 +667,7 @@ class MostRecentCommonSubsumerTask(Task):
                                         Axiom(text="B SubClassOf A"),
                                         Axiom(text="C SubClassOf B"),
                                         Axiom(text="B SubClassOf A"),
-                                    ]
+                                    ],
                                 )
                             ],
                         ),
@@ -491,7 +684,7 @@ class MostRecentCommonSubsumerTask(Task):
                                     trivially E2 SubClassOf E""",
                                     axioms=[
                                         Axiom(text="E2 SubClassOf E"),
-                                    ]
+                                    ],
                                 )
                             ],
                         ),
@@ -502,6 +695,157 @@ class MostRecentCommonSubsumerTask(Task):
     ]
 
 
+class ABoxPropertyChainPlusTransitivityTask(Task):
+    """A task to infer assertions over property chains and transitvity in aboxes."""
+
+    _query_format = """
+    What instances <I> satisfy {params[0]} {params[1]} <I> ?.
+    Make use of property chain axioms of the form
+    PROPERTY1 o PROPERTY2 SubPropertyOf PROPERTY3.
+    This means that if x PROPERTY1 y and y PROPERTY2 z then x PROPERTY3 z.
+    Also make use of transitivity axioms of the form
+    PROPERTY type TransitiveProperty.
+    This means that if x PROPERTY y and y PROPERTY z then x PROPERTY z.
+    """
+
+    type: Literal["ABoxPropertyChainTask"] = Field("ABoxPropertyChainTask")
+
+    answers: Optional[List[InstanceAnswer]] = None
+
+    # TODO: examples
+
+    examples: Optional[List[Example]] = [
+        Example(
+            ontology=Ontology(
+                axioms=[
+                    Axiom(text="p1 o p2 SubPropertyOf p3"),
+                    Axiom(text="p1 type TransitiveProperty"),
+                    Axiom(text="i0 p1 i1"),
+                    Axiom(text="i1 p1 i2"),
+                    Axiom(text="i2 p2 i3"),
+                    Axiom(text="i3 p1 i4"),
+                ],
+                comments=["""a chain of two transitive properties followed by a property chain."""],
+            ),
+            query_answers=[
+                ExampleQueryAnswers(
+                    query=Query(parameters=["i0", "p3"]),
+                    answers=[
+                        InstanceAnswer(
+                            text="i3",
+                            explanations=[
+                                Explanation(
+                                    text="""i0 p3 i3 because
+                                    i0 p1 i1 and i1 p1 i2 and p1 is transitive, so i0 p1 i2.
+                                    i2 p2 i3 and p1 o p2 SubPropertyOf p3, so i0 p3 i3""",
+                                    axioms=[
+                                        Axiom(text="i0 p1 i1"),
+                                        Axiom(text="i1 p1 i2"),
+                                        Axiom(text="p1 type TransitiveProperty"),
+                                        Axiom(text="i2 p2 i3"),
+                                        Axiom(text="p1 o p2 SubPropertyOf p3"),
+                                    ],
+                                )
+                            ],
+                        ),
+                        InstanceAnswer(
+                            text="i2",
+                            explanations=[
+                                Explanation(
+                                    text="""i0 p3 i2 because
+                                        i0 p1 i1 and i1 p1 i2 and p1 is transitive, so i0 p1 i2.""",
+                                    axioms=[
+                                        Axiom(text="i0 p1 i1"),
+                                        Axiom(text="i1 p1 i2"),
+                                        Axiom(text="p1 type TransitiveProperty"),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+                ExampleQueryAnswers(
+                    query=Query(parameters=["i1", "p3"]),
+                    answers=[
+                        InstanceAnswer(
+                            text="i3",
+                            explanations=[
+                                Explanation(
+                                    text="""i1 p3 i3 because
+                                            i1 p1 i2 and
+                                            i2 p2 i3 and p1 o p2 SubPropertyOf p3, so i1 p3 i3""",
+                                    axioms=[
+                                        Axiom(text="i1 p1 i2"),
+                                        Axiom(text="i2 p2 i3"),
+                                        Axiom(text="p1 o p2 SubPropertyOf p3"),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+                ExampleQueryAnswers(
+                    query=Query(parameters=["i0", "p1"]),
+                    answers=[
+                        InstanceAnswer(
+                            text="i1",
+                            explanations=[
+                                Explanation(
+                                    text="""i0 p1 i1 is directly asserted""",
+                                    axioms=[
+                                        Axiom(text="i0 p1 i1"),
+                                    ],
+                                )
+                            ],
+                        ),
+                        InstanceAnswer(
+                            text="i2",
+                            explanations=[
+                                Explanation(
+                                    text="""i0 p1 i2 because
+                                            i0 p1 i1 and i1 p1 i2 and p1 is transitive,
+                                            so i0 p1 i2.""",
+                                    axioms=[
+                                        Axiom(text="i0 p1 i1"),
+                                        Axiom(text="i1 p1 i2"),
+                                        Axiom(text="p1 type TransitiveProperty"),
+                                    ],
+                                )
+                            ],
+                        ),
+                    ],
+                ),
+            ],
+        )
+    ]
+
+
+class TaskCollection(BaseModel):
+    tasks: List[Task] = None
+
+    @staticmethod
+    def load(file_or_object: Union[dict, str, Path, TextIO]):
+        if isinstance(file_or_object, Path):
+            file_or_object = str(file_or_object)
+        if isinstance(file_or_object, str):
+            with open(file_or_object) as f:
+                tc_dict = yaml.safe_load(f)
+        else:
+            tc_dict = yaml.safe_load(file_or_object)
+        current_module = sys.modules[__name__]
+        tasks = []
+        for task_dict in tc_dict["tasks"]:
+            typ = task_dict["type"]
+            cls = current_module.__dict__[typ]
+            task = cls(**task_dict)
+            if not isinstance(task.method, GPTReasonMethodType):
+                # TODO: figure how to get pydantic to do this
+                task.method = GPTReasonMethodType(task.method)
+            tasks.append(task)
+        tc_dict["tasks"] = tasks
+        return TaskCollection(**tc_dict)
+
+
 @dataclass
 class OntologyExtractor:
     """
@@ -534,6 +878,25 @@ def create_task(
         task.populate()
         return task
 
+    def create_random_tasks(
+        self, num_tasks_per_type: int = 10, methods: List = None
+    ) -> TaskCollection:
+        if methods is None:
+            methods = [
+                self.extract_indirect_superclasses_task,
+                self.extract_transitive_superclasses_task,
+                self.extract_most_recent_common_subsumers_task,
+                self.extract_subclass_of_expression_task,
+                self.extract_incoherent_ontology_task,
+            ]
+        objs = []
+        for method in methods:
+            for _n in range(num_tasks_per_type):
+                task = method(select_random=True)
+                objs.append(task)
+                logger.info(f"  {task.name}")
+        return TaskCollection(tasks=objs)
+
     def extract_ontology(
         self,
         terms: List[CURIE],
@@ -551,8 +914,9 @@ def extract_ontology(
         if predicates is None:
             predicates = [IS_A]
         adapter = self.adapter
+        onts = list(adapter.ontologies())
         ancs = list(adapter.ancestors(terms, predicates=predicates))
-        if roots is not None:
+        if roots:
             roots = set(roots)
             ancs = [
                 t for t in ancs if roots.intersection(adapter.ancestors(t, predicates=predicates))
@@ -561,6 +925,8 @@ def extract_ontology(
         already_have = set()
         terms = set()
         used_predicates = set()
+        if not ancs:
+            raise ValueError(f"No ancestors found for {terms} over {predicates}")
         for t in ancs:
             for rel in adapter.relationships([t], predicates=predicates):
                 if rel in already_have:
@@ -575,7 +941,13 @@ def extract_ontology(
                     continue
                 axioms.append(self._axiom(rel))
                 already_have.add(rel)
-        ontology = Ontology(axioms=axioms, terms=terms, predicates=used_predicates)
+        if not axioms:
+            raise ValueError(
+                f"No axioms found for ancestors {ancs} over {predicates} (roots={roots})"
+            )
+        ontology = Ontology(
+            name="-".join(onts), axioms=axioms, terms=terms, predicates=used_predicates
+        )
         return ontology
 
     def extract_indirect_superclasses_task(
@@ -613,23 +985,21 @@ def extract_indirect_superclasses_task(
         subclass_ancestors = list(adapter.ancestors(subclass, predicates=predicates))
         terms = [subclass] + siblings
         ontology = self.extract_ontology(terms, roots)
-        answers = []
         if roots is not None:
             roots = set(roots)
         subclass_parents = {r[2] for r in adapter.relationships([subclass], predicates=predicates)}
-        for anc in subclass_ancestors:
+
+        def _filter(anc: CURIE) -> bool:
+            if anc == subclass:
+                return True
+            if anc in subclass_parents:
+                return True
             if roots is not None:
                 if not roots.intersection(adapter.ancestors(anc, predicates=predicates)):
-                    continue
-            if anc in subclass_parents or anc == subclass:
-                # exclude direct
-                continue
-            explanations = [
-                Explanation(axioms=[self._axiom((s, IS_A, x)), self._axiom((x, IS_A, o))])
-                for s, o, x in adapter.paths([subclass], [anc], predicates=predicates)
-                if s != x and x != o
-            ]
-            answers.append(ClassAnswer(text=self._name(anc), explanations=explanations))
+                    return True
+
+        filtered_ancestors = [anc for anc in subclass_ancestors if not _filter(anc)]
+        answers = self._answers_from_ancestors(subclass, filtered_ancestors, predicates=predicates)
         task = EntailedIndirectSuperClassTask(
             ontology=ontology,
             query=Query(parameters=[self._name(subclass)]),
@@ -639,34 +1009,110 @@ def extract_indirect_superclasses_task(
         task.populate()
         return task
 
-
-    def extract_most_recent_common_subsumers_task(
+    def _answers_from_ancestors(
+        self, start: CURIE, ends: List[CURIE], predicates: List[PRED_CURIE]
+    ) -> List[ClassAnswer]:
+        graph = self.adapter.ancestor_graph([start], predicates=predicates)
+        answer_map = defaultdict(list)
+        for _s, end, path in shortest_paths(graph, [start], ends, directed=True):
+            axioms = []
+            for i in range(len(path) - 1):
+                axioms.append(self._axiom((path[i], IS_A, path[i + 1])))
+            answer_map[end].append(Explanation(axioms=axioms))
+        return [ClassAnswer(text=self._name(end), explanations=answer_map[end]) for end in ends]
+
+    def extract_transitive_superclasses_task(
         self,
-        subclass1: CURIE,
-        subclass2: CURIE,
-        siblings: List[CURIE],
+        subclass: CURIE = None,
+        siblings: List[CURIE] = None,
         roots: Optional[List[CURIE]] = None,
         predicates: Optional[List[PRED_CURIE]] = None,
+        select_random=False,
         **kwargs,
-    ) -> MostRecentCommonSubsumerTask:
+    ) -> EntailedTransitiveSuperClassTask:
         """
-        Extract a task for finding all MRCAs of a pair of classes.
+        Extract a task for finding all transitive superclasses of a class.
+
+        >>> from oaklib import get_adapter
+        >>> from ontogpt.ontex.extractor import OntologyExtractor
+        >>> adapter = get_adapter("sqlite:obo:go")
+        >>> extractor = OntologyExtractor(adapter=adapter)
+        >>> task = extractor.extract_transitive_superclasses_task(
+        ...    subclass="GO:0005634", siblings=["GO:0005773"], roots=["GO:0043226"]
+        ... )
 
+        :param subclass: the main focus of the query
+        :param siblings: other terms to include (to make the task harder)
+        :param roots: only include descendants of these terms
+        :return: An EntailedIndirectSuperClassTask
         """
         if predicates is None:
             predicates = [IS_A]
         adapter = self.adapter
-        subclass1_ancestors = list(adapter.ancestors(subclass1, predicates=predicates))
-        subclass2_ancestors = list(adapter.ancestors(subclass2, predicates=predicates))
+        if select_random:
+            all_classes = list(adapter.entities(filter_obsoletes=True, owl_type=OWL_CLASS))
+            subclass = random.choice(all_classes)
+            siblings = random.sample(all_classes, 3)
+        subclass_ancestors = list(adapter.ancestors(subclass, predicates=predicates))
+        terms = [subclass] + siblings
+        ontology = self.extract_ontology(terms, roots)
+        answers = []
+        if roots is not None:
+            roots = set(roots)
+
+        def _filter(anc: CURIE) -> bool:
+            if anc == subclass:
+                return True
+            if roots is not None:
+                if not roots.intersection(adapter.ancestors(anc, predicates=predicates)):
+                    return True
+
+        filtered_ancestors = [anc for anc in subclass_ancestors if not _filter(anc)]
+        answers = self._answers_from_ancestors(subclass, filtered_ancestors, predicates=predicates)
+        task = EntailedTransitiveSuperClassTask(
+            ontology=ontology,
+            query=Query(parameters=[self._name(subclass)]),
+            answers=answers,
+            **kwargs,
+        )
+        task.populate()
+        return task
+
+    def extract_most_recent_common_subsumers_task(
+        self,
+        subclass1: CURIE = None,
+        subclass2: CURIE = None,
+        siblings: List[CURIE] = None,
+        roots: Optional[List[CURIE]] = None,
+        predicates: Optional[List[PRED_CURIE]] = None,
+        select_random=False,
+        **kwargs,
+    ) -> MostRecentCommonSubsumerTask:
+        """Extract a task for finding all MRCAs of a pair of classes."""
+        if predicates is None:
+            predicates = [IS_A]
+        adapter = self.adapter
+        if select_random:
+            all_classes = list(adapter.entities(filter_obsoletes=True, owl_type=OWL_CLASS))
+            subclass1 = random.choice(all_classes)
+            subclass2 = random.choice(all_classes)
+            siblings = random.sample(all_classes, 2)
         terms = [subclass1, subclass2] + siblings
         ontology = self.extract_ontology(terms, roots)
         answers = []
         if not isinstance(adapter, SemanticSimilarityInterface):
             raise ValueError("Adapter must implement SemanticSimilarityInterface")
-        mrcas = list(adapter.most_recent_common_ancestors(subclass1, subclass2, predicates=predicates))
+        mrcas = list(
+            adapter.most_recent_common_ancestors(subclass1, subclass2, predicates=predicates)
+        )
         for mrca in mrcas:
             explanations = [
-                Explanation(axioms=[self._axiom((mrca, IS_A, subclass1)), self._axiom((mrca, IS_A, subclass2))])
+                Explanation(
+                    axioms=[
+                        self._axiom((mrca, IS_A, subclass1)),
+                        self._axiom((mrca, IS_A, subclass2)),
+                    ]
+                )
             ]
             answers.append(ClassAnswer(text=self._name(mrca), explanations=explanations))
         task = MostRecentCommonSubsumerTask(
@@ -680,32 +1126,59 @@ def extract_most_recent_common_subsumers_task(
 
     def extract_subclass_of_expression_task(
         self,
-        superclass: CURIE,
-        predicate: PRED_CURIE,
-        siblings: List[CURIE],
+        superclass: CURIE = None,
+        predicate: PRED_CURIE = None,
+        siblings: List[CURIE] = None,
         predicates: Optional[List[PRED_CURIE]] = None,
+        select_random=False,
         **kwargs,
     ) -> EntailedSubClassOfExpressionTask:
-        if predicates is None:
-            predicates = [IS_A, predicate]
         adapter = self.adapter
+        if predicate is None:
+            predicate = PART_OF
+        if not predicates:
+            predicates = [IS_A, predicate]
+        if select_random:
+            all_classes = list(adapter.entities(filter_obsoletes=True, owl_type=OWL_CLASS))
+            siblings = random.sample(all_classes, 2)
+            n = 0
+            while True:
+                superclass = random.choice(all_classes)
+                descendants = list(adapter.descendants(superclass, predicates=predicates))
+                isa_descendants = list(adapter.descendants(superclass, predicates=[IS_A]))
+                if (
+                    len(descendants) < 15
+                    and len(descendants) > 0
+                    and len(descendants) != len(isa_descendants)
+                ):
+                    break
+                n += 1
+                if n > 100:
+                    raise ValueError(
+                        f"Could not find suitable parent (ontology MUST have {predicate}"
+                    )
+        logger.info(f"Extracting subclass of expression task for {superclass}, preds={predicates}")
         descendants = list(adapter.descendants(superclass, predicates=predicates))
+        isa_descendants = list(adapter.descendants(superclass, predicates=[IS_A]))
         terms = descendants + siblings
         roots = [superclass] + siblings
-        ontology = self.extract_ontology(terms, roots)
+        ontology = self.extract_ontology(terms, roots, predicates=predicates)
         answers = []
         if roots is not None:
             roots = set(roots)
         for desc in descendants:
             if desc == superclass:
                 continue
+            if desc in isa_descendants:
+                # TODO: Reflexive scenario
+                continue
             # if desc not in ontology.terms:
             #    continue
             explanations = []
             answers.append(ClassAnswer(text=self._name(desc), explanations=explanations))
         task = EntailedSubClassOfExpressionTask(
             ontology=ontology,
-            query=Query(parameters=[self._name(superclass)]),
+            query=Query(parameters=[self._name(predicate), self._name(superclass)]),
             answers=answers,
             **kwargs,
         )
@@ -714,11 +1187,12 @@ def extract_subclass_of_expression_task(
 
     def extract_incoherent_ontology_task(
         self,
-        incoherents: List[CURIE],
-        siblings: List[CURIE],
-        disjoints: List[Tuple[CURIE, CURIE]],
-        spiked_relationships: List[RELATIONSHIP],
+        incoherents: List[CURIE] = None,
+        siblings: List[CURIE] = None,
+        disjoints: List[Tuple[CURIE, CURIE]] = None,
+        spiked_relationships: List[RELATIONSHIP] = None,
         roots: Optional[List[CURIE]] = None,
+        select_random=False,
         **kwargs,
     ) -> OntologyCoherencyTask:
         """
@@ -732,6 +1206,27 @@ def extract_incoherent_ontology_task(
         :return:
         """
         adapter = self.adapter
+        if select_random:
+            all_classes = list(adapter.entities(filter_obsoletes=True, owl_type=OWL_CLASS))
+            siblings = random.sample(all_classes, 2)
+            candidates = []
+            for c in all_classes:
+                parents = {rel[2] for rel in adapter.relationships(subjects=[c], predicates=[IS_A])}
+                if len(parents) > 1:
+                    candidates.append((c, parents))
+            if len(candidates) == 0:
+                raise ValueError("No suitable candidates")
+            root_incoherent, parents = random.choice(candidates)
+            incoherents = [
+                random.choice(list(adapter.descendants(root_incoherent, predicates=[IS_A])))
+            ]
+            parents = list(parents)
+            random.shuffle(parents)
+            disjoints = [(parents[0], parents[1])]
+        if not incoherents or not siblings or not disjoints:
+            raise ValueError("Must specify incoherents, siblings, and disjoints")
+        if not spiked_relationships:
+            spiked_relationships = []
         terms = incoherents + siblings
         for s, _p, o in spiked_relationships:
             terms += [s, o]
diff --git a/tests/integration/test_knowledge_engines/test_reasoning.py b/tests/integration/test_knowledge_engines/test_reasoning.py
index 9989d28e5..efcfa1527 100644
--- a/tests/integration/test_knowledge_engines/test_reasoning.py
+++ b/tests/integration/test_knowledge_engines/test_reasoning.py
@@ -7,9 +7,8 @@
 from oaklib import get_adapter
 from oaklib.datamodels.vocabulary import IS_A, PART_OF
 from oaklib.interfaces.obograph_interface import OboGraphInterface
-from pydantic import BaseModel
 
-from ontogpt.engines.reasoner_engine import ReasonerEngine, ReasonerResult
+from ontogpt.engines.reasoner_engine import ReasonerEngine, ReasonerResult, ReasonerResultSet
 from ontogpt.io.csv_wrapper import write_obj_as_csv
 from ontogpt.io.yaml_wrapper import dump_minimal_yaml
 from ontogpt.ontex import extractor
@@ -40,11 +39,6 @@
 logger.setLevel(level=logging.INFO)
 
 
-class ReasonerResultSet(BaseModel):
-    name: str
-    results: List[ReasonerResult]
-
-
 class TestReasoning(unittest.TestCase):
     """Test ability to convert from OAK to native HALO form."""
 
@@ -67,15 +61,30 @@ def save(self, results: List[ReasonerResult], name: str):
 
     def tasks(self) -> Iterator[Task]:
         extractor = self.extractor
-        yield extractor.extract_indirect_superclasses_task(
-            name="random",
-            select_random=True,
+        # yield extractor.extract_indirect_superclasses_task(
+        #     name="random",
+        #     select_random=True,
+        # )
+        yield extractor.extract_transitive_superclasses_task(
+            name="transitive-ancestor-nucleus",
+            subclass=NUCLEUS,
+            siblings=[VACUOLE],
+            roots=[ORGANELLE],
         )
         yield extractor.extract_indirect_superclasses_task(
-            name="ancestor-nucleus", subclass=NUCLEUS, siblings=[VACUOLE], roots=[ORGANELLE]
+            name="indirect-ancestor-nucleus",
+            subclass=NUCLEUS,
+            siblings=[VACUOLE],
+            roots=[ORGANELLE],
+        )
+        yield extractor.extract_transitive_superclasses_task(
+            name="transitive-ancestor-nuclear-membrane",
+            subclass=IMBO,
+            siblings=[NUCLEUS],
+            roots=[ORGANELLE, BIOLOGICAL_PROCESS],
         )
         yield extractor.extract_indirect_superclasses_task(
-            name="ancestor-nuclear-membrane",
+            name="indirect-ancestor-nuclear-membrane",
             subclass=IMBO,
             siblings=[NUCLEUS],
             roots=[ORGANELLE, BIOLOGICAL_PROCESS],
@@ -94,7 +103,10 @@ def tasks(self) -> Iterator[Task]:
         )
         yield extractor.extract_most_recent_common_subsumers_task(
             name="mrca-nucleus-vacuole",
-            subclass1=NUCLEUS, subclass2=VACUOLE, siblings=[NUCLEAR_MEMBRANE], roots=[]
+            subclass1=NUCLEUS,
+            subclass2=VACUOLE,
+            siblings=[NUCLEAR_MEMBRANE],
+            roots=[],
         )
         yield extractor.extract_subclass_of_expression_task(
             name="subclass-of-part-of-nuclear-envelope",
@@ -136,6 +148,7 @@ def test_reason(self):
             print(yaml.dump(result.dict(), sort_keys=False))
             print(result.prompt)
             results.append(result)
+            ReasonerResultSet(results=[result])
         for result in results:
             print(
                 f"Result: {result.jaccard_score} {result.false_positives} {result.false_negatives}"
diff --git a/tests/unit/test_ontex/test_extract.py b/tests/unit/test_ontex/test_extract.py
index 5402cd1ca..c5e0a437c 100644
--- a/tests/unit/test_ontex/test_extract.py
+++ b/tests/unit/test_ontex/test_extract.py
@@ -8,11 +8,14 @@
 from oaklib.datamodels.vocabulary import IS_A, PART_OF
 from oaklib.interfaces.obograph_interface import OboGraphInterface
 
+from ontogpt.io.yaml_wrapper import dump_minimal_yaml
 from ontogpt.ontex import extractor
-from ontogpt.ontex.extractor import OntologyExtractor, Task
+from ontogpt.ontex.extractor import OntologyExtractor, Task, TaskCollection
 from tests import (
     CELLULAR_ANATOMICAL_ENTITY,
     ENVELOPE,
+    FUNGI,
+    IMBO,
     INPUT_DIR,
     INTRACELLULAR_ORGANELLE,
     MEMBRANE_BOUNDED_ORGANELLE,
@@ -20,7 +23,8 @@
     NUCLEAR_MEMBRANE,
     NUCLEUS,
     ORGANELLE,
-    VACUOLE, IMBO,
+    OUTPUT_DIR,
+    VACUOLE,
 )
 
 TEST_ONTOLOGY_OAK = INPUT_DIR / "go-nucleus.db"
@@ -42,7 +46,10 @@ def setUp(self) -> None:
 
     def cases(self) -> Iterator[Tuple[Task, List[str]]]:
         extractor = self.extractor
-        yield extractor.extract_indirect_superclasses_task(select_random=True), None
+        # yield extractor.extract_indirect_superclasses_task(select_random=True), None
+        yield extractor.extract_transitive_superclasses_task(
+            subclass=NUCLEUS, siblings=[VACUOLE], roots=[ORGANELLE]
+        ), [ORGANELLE, IMBO, INTRACELLULAR_ORGANELLE, MEMBRANE_BOUNDED_ORGANELLE]
         yield extractor.extract_indirect_superclasses_task(
             subclass=NUCLEUS, siblings=[VACUOLE], roots=[ORGANELLE]
         ), [ORGANELLE, INTRACELLULAR_ORGANELLE, MEMBRANE_BOUNDED_ORGANELLE]
@@ -61,12 +68,39 @@ def cases(self) -> Iterator[Tuple[Task, List[str]]]:
             predicate=PART_OF,
             siblings=[VACUOLE],
         ), [NUCLEAR_MEMBRANE, NUCLEAR_ENVELOPE]
+        yield extractor.extract_subclass_of_expression_task(
+            superclass=IMBO,
+            predicate=PART_OF,
+            siblings=[FUNGI],
+        ), [NUCLEAR_MEMBRANE, NUCLEAR_ENVELOPE]
 
     def test_extract(self):
-        """Test extract seed ontology."""
+        """Test extract tasks."""
         extractor = self.extractor
         for task, expected in self.cases():
+            if not task.ontology.axioms:
+                raise ValueError(f"Task {task} has no axioms")
             print(yaml.dump(task.dict(), sort_keys=False))
             answer_texts = [a.text for a in task.answers]
             if expected is not None:
                 self.assertCountEqual(answer_texts, [extractor._name(x) for x in expected])
+
+    def test_random(self):
+        """Test extract random tasks."""
+        extractor = self.extractor
+        tc = extractor.create_random_tasks(20)
+        for task in tc.tasks:
+            if not task.answers:
+                print(f"Task {task} has no answers")
+                # raise ValueError(f"Task {task} has no answers")
+            if not task.ontology.axioms:
+                raise ValueError(f"Task {task} has no axioms")
+                # raise ValueError(f"Task {task} has no axioms")
+        path = OUTPUT_DIR / "random-reasoner-tasks.yaml"
+        with open(path, "w") as f:
+            f.write(dump_minimal_yaml(tc))
+        tc = TaskCollection.load(path)
+        task_types = {type(obj) for obj in tc.tasks}
+        print(len(tc.tasks))
+        print(task_types)
+        self.assertEqual(len(task_types), 5)
diff --git a/tox.ini b/tox.ini
index f29b8c631..107dcd6be 100644
--- a/tox.ini
+++ b/tox.ini
@@ -101,6 +101,7 @@ ignore =
     S101 # Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.
     S113 # Requests call without timeout
     S110 # Try, Except, Pass detected.
+    S311
     
     
 

source geneset	geneset_size
EDS	19
EDS	18
FA	19
FA	18
HALLMARK_ADIPOGENESIS	200
HALLMARK_ADIPOGENESIS	180
HALLMARK_ALLOGRAFT_REJECTION	200
HALLMARK_ALLOGRAFT_REJECTION	180
HALLMARK_ANDROGEN_RESPONSE	101
HALLMARK_ANDROGEN_RESPONSE	90
HALLMARK_ANGIOGENESIS	36
HALLMARK_ANGIOGENESIS	33
HALLMARK_APICAL_JUNCTION	200
HALLMARK_APICAL_JUNCTION	180
HALLMARK_APICAL_SURFACE	44
HALLMARK_APICAL_SURFACE	40
HALLMARK_APOPTOSIS	161
HALLMARK_APOPTOSIS	145
HALLMARK_BILE_ACID_METABOLISM	112
HALLMARK_BILE_ACID_METABOLISM	101
HALLMARK_CHOLESTEROL_HOMEOSTASIS	74
HALLMARK_CHOLESTEROL_HOMEOSTASIS	67
HALLMARK_COAGULATION	138
HALLMARK_COAGULATION	125
HALLMARK_COMPLEMENT	200
HALLMARK_COMPLEMENT	180
HALLMARK_DNA_REPAIR	150
HALLMARK_DNA_REPAIR	135
HALLMARK_E2F_TARGETS	200
HALLMARK_E2F_TARGETS	180
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION	200
HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION	180
HALLMARK_ESTROGEN_RESPONSE_EARLY	200
HALLMARK_ESTROGEN_RESPONSE_EARLY	180
HALLMARK_ESTROGEN_RESPONSE_LATE	200
HALLMARK_ESTROGEN_RESPONSE_LATE	180
HALLMARK_FATTY_ACID_METABOLISM	158
HALLMARK_FATTY_ACID_METABOLISM	143
HALLMARK_G2M_CHECKPOINT	200
HALLMARK_G2M_CHECKPOINT	180
HALLMARK_GLYCOLYSIS	200
HALLMARK_GLYCOLYSIS	180
HALLMARK_HEDGEHOG_SIGNALING	36
HALLMARK_HEDGEHOG_SIGNALING	33
HALLMARK_HEME_METABOLISM	200
HALLMARK_HEME_METABOLISM	180
HALLMARK_HYPOXIA	200
HALLMARK_HYPOXIA	180
HALLMARK_IL2_STAT5_SIGNALING	199
HALLMARK_IL2_STAT5_SIGNALING	179
HALLMARK_IL6_JAK_STAT3_SIGNALING	87
HALLMARK_IL6_JAK_STAT3_SIGNALING	79
HALLMARK_INFLAMMATORY_RESPONSE	200
HALLMARK_INFLAMMATORY_RESPONSE	180
HALLMARK_INTERFERON_ALPHA_RESPONSE	97
HALLMARK_INTERFERON_ALPHA_RESPONSE	88
HALLMARK_INTERFERON_GAMMA_RESPONSE	200
HALLMARK_INTERFERON_GAMMA_RESPONSE	180
HALLMARK_KRAS_SIGNALING_DN	200
HALLMARK_KRAS_SIGNALING_DN	180
HALLMARK_KRAS_SIGNALING_UP	200
HALLMARK_KRAS_SIGNALING_UP	180
HALLMARK_MITOTIC_SPINDLE	199
HALLMARK_MITOTIC_SPINDLE	180
HALLMARK_MTORC1_SIGNALING	200
HALLMARK_MTORC1_SIGNALING	180
HALLMARK_MYC_TARGETS_V1	200
HALLMARK_MYC_TARGETS_V1	180
HALLMARK_MYC_TARGETS_V2	58
HALLMARK_MYC_TARGETS_V2	53
HALLMARK_MYOGENESIS	200
HALLMARK_MYOGENESIS	180
HALLMARK_NOTCH_SIGNALING	32
HALLMARK_NOTCH_SIGNALING	29
HALLMARK_OXIDATIVE_PHOSPHORYLATION	200
HALLMARK_OXIDATIVE_PHOSPHORYLATION	180
HALLMARK_P53_PATHWAY	200
HALLMARK_P53_PATHWAY	180
HALLMARK_PANCREAS_BETA_CELLS	40
HALLMARK_PANCREAS_BETA_CELLS	36
HALLMARK_PEROXISOME	104
HALLMARK_PEROXISOME	94
HALLMARK_PI3K_AKT_MTOR_SIGNALING	105
HALLMARK_PI3K_AKT_MTOR_SIGNALING	95
HALLMARK_PROTEIN_SECRETION	96
HALLMARK_PROTEIN_SECRETION	87
HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY	49
HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY	45
HALLMARK_SPERMATOGENESIS	135
HALLMARK_SPERMATOGENESIS	122
HALLMARK_TGF_BETA_SIGNALING	54
HALLMARK_TGF_BETA_SIGNALING	49
HALLMARK_TNFA_SIGNALING_VIA_NFKB	200
HALLMARK_TNFA_SIGNALING_VIA_NFKB	180
HALLMARK_UNFOLDED_PROTEIN_RESPONSE	113
HALLMARK_UNFOLDED_PROTEIN_RESPONSE	101
HALLMARK_UV_RESPONSE_DN	144
HALLMARK_UV_RESPONSE_DN	130
HALLMARK_UV_RESPONSE_UP	158
HALLMARK_UV_RESPONSE_UP	143
HALLMARK_WNT_BETA_CATENIN_SIGNALING	42
HALLMARK_WNT_BETA_CATENIN_SIGNALING	38
T cell proliferation	72
T cell proliferation	65
Yamanaka-TFs	4
Yamanaka-TFs	3
amigo-example	36
amigo-example	32
bicluster_RNAseqDB_0	158
bicluster_RNAseqDB_0	134
bicluster_RNAseqDB_1002	52
bicluster_RNAseqDB_1002	43
glycolysis-gocam	10
glycolysis-gocam	9
term-GO:0007212	28
term-GO:0007212	26
endocytosis	16
endocytosis	15
go-postsynapse-calcium-transmembrane	33
go-postsynapse-calcium-transmembrane	30
go-reg-autophagy-pkra	17
go-reg-autophagy-pkra	16
hydrolase activity, hydrolyzing O-glycosyl compounds	91
hydrolase activity, hydrolyzing O-glycosyl compounds	81
ig-receptor-binding-2022	91
ig-receptor-binding-2022	82
meiosis I	54
meiosis I	46
molecular sequestering	30
molecular sequestering	27
mtorc1	200
mtorc1	180
peroxisome	8
peroxisome	5
progeria	4
progeria	3
regulation of presynaptic membrane potential	30
regulation of presynaptic membrane potential	27
sensory ataxia	15
sensory ataxia	14
tf-downreg-colorectal	51
tf-downreg-colorectal	46