diff --git a/README.md b/README.md
index c62d027..3e595c1 100644
--- a/README.md
+++ b/README.md
@@ -116,6 +116,9 @@ data.get_results()
data.get_results_binary()
+# get R2 values, coefficients, and coefficient p-values for all models/edges
+data.get_model_stats()
+
```
The expected run time for the installation and running the demo dataset on a "normal" desktop computer is around 3~5 minutes.
diff --git a/dev_tests.ipynb b/dev_tests.ipynb
new file mode 100644
index 0000000..1c16fad
--- /dev/null
+++ b/dev_tests.ipynb
@@ -0,0 +1,1746 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "d03d274e-6792-4bbf-93bf-b8c7259c1d7f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext autoreload\n",
+ "%autoreload 2\n",
+ "import pandas as pd\n",
+ "from src.dysregnet.dysregnet import run"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "b6408bef-4768-42c3-88e3-21e250102240",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "meta = pd.read_csv(\"test_data/tpm_meta.csv\")\n",
+ "expr = pd.read_csv(\"test_data/tpm.csv\")\n",
+ "grn=pd.read_csv(\"test_data/HTRIdb_data.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "0b876315-e883-450e-95a4-88b7caf76085",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "meta[\"birth_days_to\"] = meta[\"birth_days_to\"].fillna(meta[\"birth_days_to\"].mean())\n",
+ "meta[\"race\"] = meta[\"race\"].fillna(\"not reported\")\n",
+ "meta[\"race\"] = meta[\"race\"].replace({\"[Unknown]\": \"not reported\", \"[Not Evaluated]\":\"not reported\"})\n",
+ "\n",
+ "expr = expr.set_index(expr.columns[0])\n",
+ "expr = expr.T\n",
+ "expr.insert(0, \"sample\", expr.index)\n",
+ "assert all(expr.iloc[:, 0].values == meta.iloc[:,0].values)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "208046d3-5d5f-4ea4-aae7-c575094bc88e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sample | \n",
+ " _PATIENT | \n",
+ " cancer type abbreviation | \n",
+ " age_at_initial_pathologic_diagnosis | \n",
+ " gender | \n",
+ " race | \n",
+ " ajcc_pathologic_tumor_stage | \n",
+ " clinical_stage | \n",
+ " histological_type | \n",
+ " histological_grade | \n",
+ " ... | \n",
+ " DSS.time | \n",
+ " DFI | \n",
+ " DFI.time | \n",
+ " PFI | \n",
+ " PFI.time | \n",
+ " Redaction | \n",
+ " sample_type_id | \n",
+ " sample_type | \n",
+ " _primary_disease | \n",
+ " condition | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " TCGA-55-7995-01 | \n",
+ " TCGA-55-7995 | \n",
+ " LUAD | \n",
+ " 73.0 | \n",
+ " FEMALE | \n",
+ " WHITE | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " Lung Adenocarcinoma | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 889.0 | \n",
+ " 1.0 | \n",
+ " 468.0 | \n",
+ " 1.0 | \n",
+ " 468.0 | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ " Primary Tumor | \n",
+ " lung adenocarcinoma | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " TCGA-38-4625-11 | \n",
+ " TCGA-38-4625 | \n",
+ " LUAD | \n",
+ " 66.0 | \n",
+ " FEMALE | \n",
+ " WHITE | \n",
+ " Stage IB | \n",
+ " NaN | \n",
+ " Lung Adenocarcinoma | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2973.0 | \n",
+ " 0.0 | \n",
+ " 2973.0 | \n",
+ " 0.0 | \n",
+ " 2973.0 | \n",
+ " NaN | \n",
+ " 11.0 | \n",
+ " Solid Tissue Normal | \n",
+ " lung adenocarcinoma | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " TCGA-69-7761-01 | \n",
+ " TCGA-69-7761 | \n",
+ " LUAD | \n",
+ " 84.0 | \n",
+ " MALE | \n",
+ " WHITE | \n",
+ " Stage IB | \n",
+ " NaN | \n",
+ " Lung Adenocarcinoma | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 186.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.0 | \n",
+ " 186.0 | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ " Primary Tumor | \n",
+ " lung adenocarcinoma | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " TCGA-67-6216-01 | \n",
+ " TCGA-67-6216 | \n",
+ " LUAD | \n",
+ " 57.0 | \n",
+ " FEMALE | \n",
+ " WHITE | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " Lung Adenocarcinoma | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 141.0 | \n",
+ " 0.0 | \n",
+ " 141.0 | \n",
+ " 0.0 | \n",
+ " 141.0 | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ " Primary Tumor | \n",
+ " lung adenocarcinoma | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " TCGA-44-6148-01 | \n",
+ " TCGA-44-6148 | \n",
+ " LUAD | \n",
+ " 60.0 | \n",
+ " MALE | \n",
+ " WHITE | \n",
+ " Stage IA | \n",
+ " NaN | \n",
+ " Lung Adenocarcinoma | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 704.0 | \n",
+ " 0.0 | \n",
+ " 704.0 | \n",
+ " 0.0 | \n",
+ " 704.0 | \n",
+ " NaN | \n",
+ " 1.0 | \n",
+ " Primary Tumor | \n",
+ " lung adenocarcinoma | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 38 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sample _PATIENT cancer type abbreviation \\\n",
+ "0 TCGA-55-7995-01 TCGA-55-7995 LUAD \n",
+ "1 TCGA-38-4625-11 TCGA-38-4625 LUAD \n",
+ "2 TCGA-69-7761-01 TCGA-69-7761 LUAD \n",
+ "3 TCGA-67-6216-01 TCGA-67-6216 LUAD \n",
+ "4 TCGA-44-6148-01 TCGA-44-6148 LUAD \n",
+ "\n",
+ " age_at_initial_pathologic_diagnosis gender race \\\n",
+ "0 73.0 FEMALE WHITE \n",
+ "1 66.0 FEMALE WHITE \n",
+ "2 84.0 MALE WHITE \n",
+ "3 57.0 FEMALE WHITE \n",
+ "4 60.0 MALE WHITE \n",
+ "\n",
+ " ajcc_pathologic_tumor_stage clinical_stage histological_type \\\n",
+ "0 Stage IA NaN Lung Adenocarcinoma \n",
+ "1 Stage IB NaN Lung Adenocarcinoma \n",
+ "2 Stage IB NaN Lung Adenocarcinoma \n",
+ "3 Stage IA NaN Lung Adenocarcinoma \n",
+ "4 Stage IA NaN Lung Adenocarcinoma \n",
+ "\n",
+ " histological_grade ... DSS.time DFI DFI.time PFI PFI.time Redaction \\\n",
+ "0 NaN ... 889.0 1.0 468.0 1.0 468.0 NaN \n",
+ "1 NaN ... 2973.0 0.0 2973.0 0.0 2973.0 NaN \n",
+ "2 NaN ... 186.0 NaN NaN 0.0 186.0 NaN \n",
+ "3 NaN ... 141.0 0.0 141.0 0.0 141.0 NaN \n",
+ "4 NaN ... 704.0 0.0 704.0 0.0 704.0 NaN \n",
+ "\n",
+ " sample_type_id sample_type _primary_disease condition \n",
+ "0 1.0 Primary Tumor lung adenocarcinoma 1 \n",
+ "1 11.0 Solid Tissue Normal lung adenocarcinoma 0 \n",
+ "2 1.0 Primary Tumor lung adenocarcinoma 1 \n",
+ "3 1.0 Primary Tumor lung adenocarcinoma 1 \n",
+ "4 1.0 Primary Tumor lung adenocarcinoma 1 \n",
+ "\n",
+ "[5 rows x 38 columns]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "meta.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "35febb56-1441-4049-a452-450c25afebb1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " sample | \n",
+ " sample | \n",
+ " RP11-34P13.7 | \n",
+ " RP11-34P13.8 | \n",
+ " CICP27 | \n",
+ " RP11-34P13.15 | \n",
+ " RP11-34P13.16 | \n",
+ " RP11-34P13.13 | \n",
+ " FO538757.2 | \n",
+ " AP006222.2 | \n",
+ " RP4-669L17.10 | \n",
+ " ... | \n",
+ " MT-CO2 | \n",
+ " MT-ATP8 | \n",
+ " MT-ATP6 | \n",
+ " MT-CO3 | \n",
+ " MT-ND3 | \n",
+ " MT-ND4L | \n",
+ " MT-ND4 | \n",
+ " MT-ND5 | \n",
+ " MT-ND6 | \n",
+ " MT-CYB | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " TCGA-55-7995-01 | \n",
+ " TCGA-55-7995-01 | \n",
+ " 0.496253 | \n",
+ " 0.562163 | \n",
+ " 1.819967 | \n",
+ " 1.945791 | \n",
+ " 1.013340 | \n",
+ " 1.489258 | \n",
+ " 2.809733 | \n",
+ " 2.720060 | \n",
+ " 1.448061 | \n",
+ " ... | \n",
+ " 0.620082 | \n",
+ " -0.891616 | \n",
+ " 0.031685 | \n",
+ " 0.590567 | \n",
+ " -0.847172 | \n",
+ " 0.537382 | \n",
+ " 0.866038 | \n",
+ " 0.486255 | \n",
+ " -0.036586 | \n",
+ " -0.059644 | \n",
+ "
\n",
+ " \n",
+ " TCGA-38-4625-11 | \n",
+ " TCGA-38-4625-11 | \n",
+ " -0.110067 | \n",
+ " 0.014913 | \n",
+ " -1.962869 | \n",
+ " -0.813442 | \n",
+ " -0.416048 | \n",
+ " -1.713702 | \n",
+ " -0.940562 | \n",
+ " -1.039181 | \n",
+ " -1.737634 | \n",
+ " ... | \n",
+ " 0.151399 | \n",
+ " -0.685961 | \n",
+ " -0.306375 | \n",
+ " 0.289113 | \n",
+ " 0.895153 | \n",
+ " -0.455624 | \n",
+ " -0.069085 | \n",
+ " -0.627751 | \n",
+ " -0.722210 | \n",
+ " -0.002905 | \n",
+ "
\n",
+ " \n",
+ " TCGA-69-7761-01 | \n",
+ " TCGA-69-7761-01 | \n",
+ " -0.110067 | \n",
+ " 0.578951 | \n",
+ " 0.341960 | \n",
+ " -0.030543 | \n",
+ " 0.101305 | \n",
+ " 0.602601 | \n",
+ " -1.764752 | \n",
+ " -1.116434 | \n",
+ " 0.600953 | \n",
+ " ... | \n",
+ " -0.695560 | \n",
+ " 0.842728 | \n",
+ " 0.008627 | \n",
+ " -0.768548 | \n",
+ " -2.694695 | \n",
+ " -0.043072 | \n",
+ " -0.480471 | \n",
+ " -0.274686 | \n",
+ " -0.205755 | \n",
+ " -1.255776 | \n",
+ "
\n",
+ " \n",
+ " TCGA-67-6216-01 | \n",
+ " TCGA-67-6216-01 | \n",
+ " -0.414540 | \n",
+ " -2.486586 | \n",
+ " -0.101231 | \n",
+ " 1.162652 | \n",
+ " 0.742126 | \n",
+ " 0.936864 | \n",
+ " -1.340492 | \n",
+ " -1.116434 | \n",
+ " 0.747482 | \n",
+ " ... | \n",
+ " 0.152220 | \n",
+ " -0.215530 | \n",
+ " 0.056840 | \n",
+ " 0.926111 | \n",
+ " -2.544535 | \n",
+ " 0.081287 | \n",
+ " -0.070977 | \n",
+ " -0.378933 | \n",
+ " -1.056571 | \n",
+ " -0.535812 | \n",
+ "
\n",
+ " \n",
+ " TCGA-44-6148-01 | \n",
+ " TCGA-44-6148-01 | \n",
+ " 1.098818 | \n",
+ " 0.595261 | \n",
+ " 0.728725 | \n",
+ " 0.670473 | \n",
+ " 0.439598 | \n",
+ " 0.729149 | \n",
+ " 0.512986 | \n",
+ " 1.445461 | \n",
+ " 0.868339 | \n",
+ " ... | \n",
+ " 0.323585 | \n",
+ " -0.281548 | \n",
+ " 0.094381 | \n",
+ " 0.249448 | \n",
+ " 0.895798 | \n",
+ " 0.406246 | \n",
+ " 0.483969 | \n",
+ " -0.844608 | \n",
+ " -1.144351 | \n",
+ " -0.227255 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 22579 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "sample sample RP11-34P13.7 RP11-34P13.8 CICP27 \\\n",
+ "TCGA-55-7995-01 TCGA-55-7995-01 0.496253 0.562163 1.819967 \n",
+ "TCGA-38-4625-11 TCGA-38-4625-11 -0.110067 0.014913 -1.962869 \n",
+ "TCGA-69-7761-01 TCGA-69-7761-01 -0.110067 0.578951 0.341960 \n",
+ "TCGA-67-6216-01 TCGA-67-6216-01 -0.414540 -2.486586 -0.101231 \n",
+ "TCGA-44-6148-01 TCGA-44-6148-01 1.098818 0.595261 0.728725 \n",
+ "\n",
+ "sample RP11-34P13.15 RP11-34P13.16 RP11-34P13.13 FO538757.2 \\\n",
+ "TCGA-55-7995-01 1.945791 1.013340 1.489258 2.809733 \n",
+ "TCGA-38-4625-11 -0.813442 -0.416048 -1.713702 -0.940562 \n",
+ "TCGA-69-7761-01 -0.030543 0.101305 0.602601 -1.764752 \n",
+ "TCGA-67-6216-01 1.162652 0.742126 0.936864 -1.340492 \n",
+ "TCGA-44-6148-01 0.670473 0.439598 0.729149 0.512986 \n",
+ "\n",
+ "sample AP006222.2 RP4-669L17.10 ... MT-CO2 MT-ATP8 MT-ATP6 \\\n",
+ "TCGA-55-7995-01 2.720060 1.448061 ... 0.620082 -0.891616 0.031685 \n",
+ "TCGA-38-4625-11 -1.039181 -1.737634 ... 0.151399 -0.685961 -0.306375 \n",
+ "TCGA-69-7761-01 -1.116434 0.600953 ... -0.695560 0.842728 0.008627 \n",
+ "TCGA-67-6216-01 -1.116434 0.747482 ... 0.152220 -0.215530 0.056840 \n",
+ "TCGA-44-6148-01 1.445461 0.868339 ... 0.323585 -0.281548 0.094381 \n",
+ "\n",
+ "sample MT-CO3 MT-ND3 MT-ND4L MT-ND4 MT-ND5 MT-ND6 \\\n",
+ "TCGA-55-7995-01 0.590567 -0.847172 0.537382 0.866038 0.486255 -0.036586 \n",
+ "TCGA-38-4625-11 0.289113 0.895153 -0.455624 -0.069085 -0.627751 -0.722210 \n",
+ "TCGA-69-7761-01 -0.768548 -2.694695 -0.043072 -0.480471 -0.274686 -0.205755 \n",
+ "TCGA-67-6216-01 0.926111 -2.544535 0.081287 -0.070977 -0.378933 -1.056571 \n",
+ "TCGA-44-6148-01 0.249448 0.895798 0.406246 0.483969 -0.844608 -1.144351 \n",
+ "\n",
+ "sample MT-CYB \n",
+ "TCGA-55-7995-01 -0.059644 \n",
+ "TCGA-38-4625-11 -0.002905 \n",
+ "TCGA-69-7761-01 -1.255776 \n",
+ "TCGA-67-6216-01 -0.535812 \n",
+ "TCGA-44-6148-01 -0.227255 \n",
+ "\n",
+ "[5 rows x 22579 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "expr.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "9dd72783-5b5d-488d-937e-f1d34535b29a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " SYMBOL_TF | \n",
+ " SYMBOL_TG | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " PARP1 | \n",
+ " BRCA2 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " AHR | \n",
+ " CYP1A1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " AHR | \n",
+ " CYP1A2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " AHR | \n",
+ " CYP1B1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " AHR | \n",
+ " FOS | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " SYMBOL_TF SYMBOL_TG\n",
+ "0 PARP1 BRCA2\n",
+ "1 AHR CYP1A1\n",
+ "2 AHR CYP1A2\n",
+ "3 AHR CYP1B1\n",
+ "4 AHR FOS"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "grn.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "abda2742-f17d-440c-b5b2-e021d2c26f9c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "CatCov=['gender'] \n",
+ "ConCov=['birth_days_to',]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "62b921d3-27e0-4f54-a77b-d3b35d5eedfb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "14979it [00:45, 332.12it/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "data=run(expression_data=expr,\n",
+ " meta=meta, \n",
+ " CatCov=CatCov,\n",
+ " ConCov=ConCov,\n",
+ " GRN=grn,\n",
+ " conCol='condition')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "c3aede6a-7044-456e-affd-3804f54eb9d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " (PARP1, BRCA2) | \n",
+ " (AHR, CYP1B1) | \n",
+ " (AHR, FOS) | \n",
+ " (AHR, SOS1) | \n",
+ " (AHR, UGT1A6) | \n",
+ " (AR, AADAC) | \n",
+ " (AR, ABCA1) | \n",
+ " (AR, ABCA2) | \n",
+ " (AR, ABCF1) | \n",
+ " (AR, ABCA4) | \n",
+ " ... | \n",
+ " (ZNF419, CDKN2A) | \n",
+ " (ZNF671, CDKN2A) | \n",
+ " (THAP7, CDKN2A) | \n",
+ " (FOXP2, PLAUR) | \n",
+ " (FOXP2, CNTNAP2) | \n",
+ " (ZNF653, CDKN2A) | \n",
+ " (E2F7, SP1) | \n",
+ " (ZNF417, CDKN2A) | \n",
+ " (ZNF384, CDKN2A) | \n",
+ " (ZNF384, COL1A1) | \n",
+ "
\n",
+ " \n",
+ " patient id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " TCGA-55-7995-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -11.1 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-69-7761-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-67-6216-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 6.9 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -5.3 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-44-6148-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-71-8520-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 4.8 | \n",
+ " 0.0 | \n",
+ " 5.5 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 4.7 | \n",
+ " 4.7 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " TCGA-69-7763-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-78-7150-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 5.4 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 5.3 | \n",
+ " 4.1 | \n",
+ " 5.4 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 4.9 | \n",
+ " 5.3 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-MP-A4TI-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -8.3 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-44-6145-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " -8.5 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-05-4427-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 6.0 | \n",
+ " -11.4 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
515 rows × 14979 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " (AHR, UGT1A6) (AR, AADAC) (AR, ABCA1) (AR, ABCA2) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 6.9 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 5.4 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " (AR, ABCF1) (AR, ABCA4) ... (ZNF419, CDKN2A) \\\n",
+ "patient id ... \n",
+ "TCGA-55-7995-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 ... 4.8 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 ... 5.3 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 ... 0.0 \n",
+ "\n",
+ " (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 5.5 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 4.1 5.4 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 \n",
+ "\n",
+ " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 -11.1 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 -5.3 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 -8.3 0.0 \n",
+ "TCGA-44-6145-01 0.0 -8.5 0.0 \n",
+ "TCGA-05-4427-01 6.0 -11.4 0.0 \n",
+ "\n",
+ " (ZNF417, CDKN2A) (ZNF384, CDKN2A) (ZNF384, COL1A1) \n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 4.7 4.7 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 4.9 5.3 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 \n",
+ "\n",
+ "[515 rows x 14979 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.get_results()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e719292a-4d97-426a-b1aa-c64e1f0b1837",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R2 | \n",
+ " coef_intercept | \n",
+ " coef_TF | \n",
+ " coef_birth_days_to | \n",
+ " coef_gender_MALE | \n",
+ " pval_intercept | \n",
+ " pval_TF | \n",
+ " pval_birth_days_to | \n",
+ " pval_gender_MALE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " (PARP1, BRCA2) | \n",
+ " 0.166337 | \n",
+ " 1.515785 | \n",
+ " 0.330729 | \n",
+ " 0.000068 | \n",
+ " 0.285308 | \n",
+ " 0.087795 | \n",
+ " 1.234803e-02 | \n",
+ " 0.056502 | \n",
+ " 0.263960 | \n",
+ "
\n",
+ " \n",
+ " (AHR, CYP1B1) | \n",
+ " 0.078480 | \n",
+ " 1.125920 | \n",
+ " -0.042942 | \n",
+ " 0.000053 | \n",
+ " 0.344663 | \n",
+ " 0.216261 | \n",
+ " 7.472208e-01 | \n",
+ " 0.147472 | \n",
+ " 0.203034 | \n",
+ "
\n",
+ " \n",
+ " (AHR, FOS) | \n",
+ " 0.125917 | \n",
+ " -0.998160 | \n",
+ " 0.297254 | \n",
+ " -0.000041 | \n",
+ " 0.046922 | \n",
+ " 0.259778 | \n",
+ " 2.512678e-02 | \n",
+ " 0.250962 | \n",
+ " 0.857749 | \n",
+ "
\n",
+ " \n",
+ " (AHR, SOS1) | \n",
+ " 0.545301 | \n",
+ " -0.985723 | \n",
+ " 0.696733 | \n",
+ " -0.000040 | \n",
+ " 0.082682 | \n",
+ " 0.124716 | \n",
+ " 6.121357e-10 | \n",
+ " 0.123721 | \n",
+ " 0.661683 | \n",
+ "
\n",
+ " \n",
+ " (AHR, UGT1A6) | \n",
+ " 0.146786 | \n",
+ " -0.752809 | \n",
+ " -0.174668 | \n",
+ " -0.000041 | \n",
+ " -0.562913 | \n",
+ " 0.388505 | \n",
+ " 1.764701e-01 | \n",
+ " 0.239152 | \n",
+ " 0.033036 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " (ZNF653, CDKN2A) | \n",
+ " 0.333566 | \n",
+ " -2.066818 | \n",
+ " -0.020549 | \n",
+ " -0.000100 | \n",
+ " -0.810059 | \n",
+ " 0.009235 | \n",
+ " 8.554798e-01 | \n",
+ " 0.001918 | \n",
+ " 0.000677 | \n",
+ "
\n",
+ " \n",
+ " (E2F7, SP1) | \n",
+ " 0.100177 | \n",
+ " -1.700884 | \n",
+ " 0.162334 | \n",
+ " -0.000067 | \n",
+ " 0.194240 | \n",
+ " 0.058594 | \n",
+ " 2.147244e-01 | \n",
+ " 0.062178 | \n",
+ " 0.460722 | \n",
+ "
\n",
+ " \n",
+ " (ZNF417, CDKN2A) | \n",
+ " 0.337528 | \n",
+ " -1.990619 | \n",
+ " 0.066682 | \n",
+ " -0.000097 | \n",
+ " -0.824056 | \n",
+ " 0.010785 | \n",
+ " 5.495458e-01 | \n",
+ " 0.002148 | \n",
+ " 0.000572 | \n",
+ "
\n",
+ " \n",
+ " (ZNF384, CDKN2A) | \n",
+ " 0.340745 | \n",
+ " -1.883079 | \n",
+ " 0.090320 | \n",
+ " -0.000093 | \n",
+ " -0.833861 | \n",
+ " 0.018186 | \n",
+ " 4.297584e-01 | \n",
+ " 0.004001 | \n",
+ " 0.000505 | \n",
+ "
\n",
+ " \n",
+ " (ZNF384, COL1A1) | \n",
+ " 0.144069 | \n",
+ " -0.571788 | \n",
+ " 0.348782 | \n",
+ " -0.000025 | \n",
+ " -0.045509 | \n",
+ " 0.519196 | \n",
+ " 9.296426e-03 | \n",
+ " 0.488815 | \n",
+ " 0.860108 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
14979 rows × 9 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " R2 coef_intercept coef_TF coef_birth_days_to \\\n",
+ "(PARP1, BRCA2) 0.166337 1.515785 0.330729 0.000068 \n",
+ "(AHR, CYP1B1) 0.078480 1.125920 -0.042942 0.000053 \n",
+ "(AHR, FOS) 0.125917 -0.998160 0.297254 -0.000041 \n",
+ "(AHR, SOS1) 0.545301 -0.985723 0.696733 -0.000040 \n",
+ "(AHR, UGT1A6) 0.146786 -0.752809 -0.174668 -0.000041 \n",
+ "... ... ... ... ... \n",
+ "(ZNF653, CDKN2A) 0.333566 -2.066818 -0.020549 -0.000100 \n",
+ "(E2F7, SP1) 0.100177 -1.700884 0.162334 -0.000067 \n",
+ "(ZNF417, CDKN2A) 0.337528 -1.990619 0.066682 -0.000097 \n",
+ "(ZNF384, CDKN2A) 0.340745 -1.883079 0.090320 -0.000093 \n",
+ "(ZNF384, COL1A1) 0.144069 -0.571788 0.348782 -0.000025 \n",
+ "\n",
+ " coef_gender_MALE pval_intercept pval_TF \\\n",
+ "(PARP1, BRCA2) 0.285308 0.087795 1.234803e-02 \n",
+ "(AHR, CYP1B1) 0.344663 0.216261 7.472208e-01 \n",
+ "(AHR, FOS) 0.046922 0.259778 2.512678e-02 \n",
+ "(AHR, SOS1) 0.082682 0.124716 6.121357e-10 \n",
+ "(AHR, UGT1A6) -0.562913 0.388505 1.764701e-01 \n",
+ "... ... ... ... \n",
+ "(ZNF653, CDKN2A) -0.810059 0.009235 8.554798e-01 \n",
+ "(E2F7, SP1) 0.194240 0.058594 2.147244e-01 \n",
+ "(ZNF417, CDKN2A) -0.824056 0.010785 5.495458e-01 \n",
+ "(ZNF384, CDKN2A) -0.833861 0.018186 4.297584e-01 \n",
+ "(ZNF384, COL1A1) -0.045509 0.519196 9.296426e-03 \n",
+ "\n",
+ " pval_birth_days_to pval_gender_MALE \n",
+ "(PARP1, BRCA2) 0.056502 0.263960 \n",
+ "(AHR, CYP1B1) 0.147472 0.203034 \n",
+ "(AHR, FOS) 0.250962 0.857749 \n",
+ "(AHR, SOS1) 0.123721 0.661683 \n",
+ "(AHR, UGT1A6) 0.239152 0.033036 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) 0.001918 0.000677 \n",
+ "(E2F7, SP1) 0.062178 0.460722 \n",
+ "(ZNF417, CDKN2A) 0.002148 0.000572 \n",
+ "(ZNF384, CDKN2A) 0.004001 0.000505 \n",
+ "(ZNF384, COL1A1) 0.488815 0.860108 \n",
+ "\n",
+ "[14979 rows x 9 columns]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.get_model_stats()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "3aacbc4e-8f13-48ef-849a-7404f45e9573",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " (PARP1, BRCA2) | \n",
+ " (AHR, CYP1B1) | \n",
+ " (AHR, FOS) | \n",
+ " (AHR, SOS1) | \n",
+ " (AHR, UGT1A6) | \n",
+ " (AR, AADAC) | \n",
+ " (AR, ABCA1) | \n",
+ " (AR, ABCA2) | \n",
+ " (AR, ABCF1) | \n",
+ " (AR, ABCA4) | \n",
+ " ... | \n",
+ " (ZNF419, CDKN2A) | \n",
+ " (ZNF671, CDKN2A) | \n",
+ " (THAP7, CDKN2A) | \n",
+ " (FOXP2, PLAUR) | \n",
+ " (FOXP2, CNTNAP2) | \n",
+ " (ZNF653, CDKN2A) | \n",
+ " (E2F7, SP1) | \n",
+ " (ZNF417, CDKN2A) | \n",
+ " (ZNF384, CDKN2A) | \n",
+ " (ZNF384, COL1A1) | \n",
+ "
\n",
+ " \n",
+ " patient id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " TCGA-55-7995-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-69-7761-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-67-6216-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-44-6148-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-71-8520-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " TCGA-69-7763-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-78-7150-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-MP-A4TI-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-44-6145-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " TCGA-05-4427-01 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
515 rows × 14979 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " (AHR, UGT1A6) (AR, AADAC) (AR, ABCA1) (AR, ABCA2) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 1.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 1.0 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 0.0 \n",
+ "\n",
+ " (AR, ABCF1) (AR, ABCA4) ... (ZNF419, CDKN2A) \\\n",
+ "patient id ... \n",
+ "TCGA-55-7995-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 ... 1.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 ... 1.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 ... 0.0 \n",
+ "\n",
+ " (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 1.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 1.0 1.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 \n",
+ "\n",
+ " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) \\\n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 1.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 1.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 0.0 0.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 1.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 1.0 0.0 \n",
+ "TCGA-05-4427-01 1.0 1.0 0.0 \n",
+ "\n",
+ " (ZNF417, CDKN2A) (ZNF384, CDKN2A) (ZNF384, COL1A1) \n",
+ "patient id \n",
+ "TCGA-55-7995-01 0.0 0.0 0.0 \n",
+ "TCGA-69-7761-01 0.0 0.0 0.0 \n",
+ "TCGA-67-6216-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6148-01 0.0 0.0 0.0 \n",
+ "TCGA-71-8520-01 1.0 1.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-69-7763-01 0.0 0.0 0.0 \n",
+ "TCGA-78-7150-01 1.0 1.0 0.0 \n",
+ "TCGA-MP-A4TI-01 0.0 0.0 0.0 \n",
+ "TCGA-44-6145-01 0.0 0.0 0.0 \n",
+ "TCGA-05-4427-01 0.0 0.0 0.0 \n",
+ "\n",
+ "[515 rows x 14979 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.get_results_binary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a77d69fa-9401-466a-a20a-29d738649016",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/setup.py b/setup.py
index f406a04..227998b 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
setup(name='dysregnet',
- version='0.0.3',
+ version='0.0.4',
description='DysRegNet',
long_description=README,
long_description_content_type="text/markdown",
@@ -27,7 +27,8 @@
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Topic :: Scientific/Engineering :: Bio-Informatics",
],
- packages=find_packages(),
+ package_dir = {'': 'src'},
+ packages=['dysregnet'],
include_package_data=True,
python_requires='>=3.7',
install_requires=[
diff --git a/dysregnet/__init__.py b/src/dysregnet/__init__.py
similarity index 100%
rename from dysregnet/__init__.py
rename to src/dysregnet/__init__.py
diff --git a/dysregnet/dysregnet.py b/src/dysregnet/dysregnet.py
similarity index 96%
rename from dysregnet/dysregnet.py
rename to src/dysregnet/dysregnet.py
index a2e045a..3459a73 100644
--- a/dysregnet/dysregnet.py
+++ b/src/dysregnet/dysregnet.py
@@ -57,7 +57,7 @@ def __init__(self,
List of continuous covariates. They should match the name of their columns in meta Dataframe.
- zscoring: boolean, default: True
+ zscoring: boolean, default: False
zscoring of expression data (if needed).
bonferroni_alpha: Float
@@ -138,7 +138,7 @@ def __init__(self,
self.cov_df,self.expr, self.control, self.case = functions.process_data(self)
- self.results=functions.dyregnet_model(self)
+ self.results, self.model_stats = functions.dyregnet_model(self)
@@ -151,11 +151,14 @@ def get_results(self):
def get_results_binary(self):
res_binary=self.results.copy()
- res_binary=res_binary.set_index('patient id')
res_binary = res_binary.where(res_binary==0, other=1)
return res_binary
+
+ def get_model_stats(self):
+ return self.model_stats
+
diff --git a/dysregnet/functions.py b/src/dysregnet/functions.py
similarity index 74%
rename from dysregnet/functions.py
rename to src/dysregnet/functions.py
index 8a423f8..22d8a49 100644
--- a/dysregnet/functions.py
+++ b/src/dysregnet/functions.py
@@ -11,7 +11,7 @@
def process_data(data):
- # process covariates and desing martic
+ # process covariates and design matrix
all_covariates= data.CatCov + data.ConCov
@@ -32,7 +32,7 @@ def process_data(data):
# process categorial covariate
# drop_first is important to avoid multicollinear
- cov_df=pd.get_dummies(cov_df, columns=data.CatCov, drop_first=True)
+ cov_df=pd.get_dummies(cov_df, columns=data.CatCov, drop_first=True, dtype=int)
@@ -67,63 +67,67 @@ def dyregnet_model(data):
case=data.expr.loc[data.case]
covariate_name=[]
- edges={}
- edges['patient id']=list(case.index)
+ edges = {}
+ edges['patient id']=list(case.index.values)
+ model_stats = {}
for tup in tqdm(data.GRN.itertuples()):
# pvalues for the same edge for all patients
edge = (tup[1],tup[2])
# skip self loops
- if edge[0]!=edge[1]:
-
+ if edge[0] != edge[1]:
+
# prepare control for fitting model
- x_train = control[ [edge[0]] + covariate_name ].values
+ x_train = control[ [edge[0]] + covariate_name ]
+ x_train = sm.add_constant(x_train) # add bias
y_train = control[edge[1]].values
# fit the model
- reg = LinearRegression().fit(x_train, y_train)
+ model = sm.OLS(y_train, x_train)
+ results = model.fit()
+
+ model_stats[edge] = [results.rsquared] + list(results.params.values) + list(results.pvalues.values)
+
- #get residuals of control
- resid_control =reg.predict(x_train) - y_train
+ # get residuals of control
+ resid_control = results.predict(x_train) - y_train
-
# test data (case or condition)
- x_test = case[ [edge[0]]+ covariate_name ].values
+ x_test = case[ [edge[0]]+ covariate_name ]
+ x_test = sm.add_constant(x_test) # add bias
y_test = case[edge[1]].values
-
# define residue for cases
- resid_case = reg.predict(x_test) - y_test
-
-
+ resid_case = results.predict(x_test) - y_test
+
# condition of direction
- cond=True
- direction= np.sign(reg.coef_[0])
+ cond = True
+ direction = np.sign(results.params[1])
# two sided p_value as default
# if direction_condition is false calculate, two sided p value
- sides=2
+ sides = 2
if data.direction_condition:
- cond=( direction * resid_case )>0
+ cond = ( direction * resid_case ) > 0
# if direction_condition is true only calculate one sided p value
- sides=1
+ sides = 1
# calculate zscore
- zscore=(resid_case-resid_control.mean())/resid_control.std()
+ zscore= (resid_case - resid_control.mean()) / resid_control.std()
# Quality check of the fitness (optionally and must be provided by user)
- if (data.R2_threshold is not None) and ( data.R2_threshold > reg.score(x_train, y_train) ):
+ if (data.R2_threshold is not None) and ( data.R2_threshold > results.rsquared ):
# model fit is not that good on training
# shrink the zscores
edges[edge]= [0.0] * len(zscore)
@@ -160,8 +164,15 @@ def dyregnet_model(data):
zscore[~valid]=0.0
- edges[edge]=np.round(zscore, 1)
+ edges[edge] = np.round(zscore, 1)
+
+
- data=pd.DataFrame.from_dict(edges)
+ results = pd.DataFrame.from_dict(edges)
+ results = results.set_index('patient id')
+
+ model_stats_cols = ["R2"] + ["coef_" + coef for coef in ["intercept", "TF"] + covariate_name] + ["pval_" + coef for coef in ["intercept", "TF"] + covariate_name]
+ model_stats = pd.DataFrame([model_stats[edge] for edge in results.columns], index=results.columns, columns=model_stats_cols)
+
- return data
+ return results, model_stats
diff --git a/test.ipynb b/test.ipynb
index 86a0bbd..aa080da 100644
--- a/test.ipynb
+++ b/test.ipynb
@@ -5,9 +5,19 @@
"execution_count": 1,
"id": "b15e0cd9-bcec-47ca-b095-326d208de825",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.0.4\n"
+ ]
+ }
+ ],
"source": [
- "import dysregnet"
+ "import dysregnet\n",
+ "import importlib.metadata\n",
+ "print(importlib.metadata.version('dysregnet'))"
]
},
{
@@ -554,7 +564,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "14162it [00:37, 381.60it/s]\n"
+ "14162it [00:50, 280.14it/s]\n"
]
}
],
@@ -604,7 +614,6 @@
" \n",
" \n",
" | \n",
- " patient id | \n",
" (PARP1, BRCA2) | \n",
" (AHR, CYP1B1) | \n",
" (AHR, FOS) | \n",
@@ -614,6 +623,7 @@
" (AR, ABCA2) | \n",
" (AR, ABCF1) | \n",
" (AR, ABCA4) | \n",
+ " (AR, ABL1) | \n",
" ... | \n",
" (ZNF419, CDKN2A) | \n",
" (ZNF671, CDKN2A) | \n",
@@ -626,11 +636,35 @@
" (ZNF384, CDKN2A) | \n",
" (ZNF384, COL1A1) | \n",
"
\n",
+ " \n",
+ " patient id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
" \n",
" \n",
" \n",
- " 0 | \n",
- " TCGA-3C-AAAU-01 | \n",
+ " TCGA-3C-AAAU-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -653,8 +687,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 1 | \n",
- " TCGA-3C-AALI-01 | \n",
+ " TCGA-3C-AALI-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -677,8 +711,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 2 | \n",
- " TCGA-3C-AALJ-01 | \n",
+ " TCGA-3C-AALJ-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -701,8 +735,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 3 | \n",
- " TCGA-3C-AALK-01 | \n",
+ " TCGA-3C-AALK-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -725,8 +759,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 4 | \n",
- " TCGA-4H-AAAK-01 | \n",
+ " TCGA-4H-AAAK-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -773,8 +807,8 @@
" ... | \n",
"
\n",
" \n",
- " 1093 | \n",
- " TCGA-WT-AB44-01 | \n",
+ " TCGA-WT-AB44-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -797,8 +831,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 1094 | \n",
- " TCGA-XX-A899-01 | \n",
+ " TCGA-XX-A899-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -821,8 +855,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 1095 | \n",
- " TCGA-XX-A89A-01 | \n",
+ " TCGA-XX-A89A-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -845,8 +879,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 1096 | \n",
- " TCGA-Z7-A8R5-01 | \n",
+ " TCGA-Z7-A8R5-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -869,8 +903,8 @@
" 0.0 | \n",
"
\n",
" \n",
- " 1097 | \n",
- " TCGA-Z7-A8R6-01 | \n",
+ " TCGA-Z7-A8R6-01 | \n",
+ " 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
@@ -894,76 +928,95 @@
"
\n",
" \n",
"\n",
- "1098 rows × 14148 columns
\n",
+ "1098 rows × 14147 columns
\n",
""
],
"text/plain": [
- " patient id (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n",
- "0 TCGA-3C-AAAU-01 0.0 0.0 0.0 0.0 \n",
- "1 TCGA-3C-AALI-01 0.0 0.0 0.0 0.0 \n",
- "2 TCGA-3C-AALJ-01 0.0 0.0 0.0 0.0 \n",
- "3 TCGA-3C-AALK-01 0.0 0.0 0.0 0.0 \n",
- "4 TCGA-4H-AAAK-01 0.0 0.0 0.0 0.0 \n",
- "... ... ... ... ... ... \n",
- "1093 TCGA-WT-AB44-01 0.0 0.0 0.0 0.0 \n",
- "1094 TCGA-XX-A899-01 0.0 0.0 0.0 0.0 \n",
- "1095 TCGA-XX-A89A-01 0.0 0.0 0.0 0.0 \n",
- "1096 TCGA-Z7-A8R5-01 0.0 0.0 0.0 0.0 \n",
- "1097 TCGA-Z7-A8R6-01 0.0 0.0 0.0 0.0 \n",
+ " (PARP1, BRCA2) (AHR, CYP1B1) (AHR, FOS) (AHR, SOS1) \\\n",
+ "patient id \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 0.0 0.0 \n",
"\n",
- " (AHR, UGT1A6) (AR, ABCA1) (AR, ABCA2) (AR, ABCF1) (AR, ABCA4) ... \\\n",
- "0 0.0 0.0 0.0 0.0 0.0 ... \n",
- "1 0.0 0.0 0.0 0.0 0.0 ... \n",
- "2 0.0 0.0 0.0 0.0 0.0 ... \n",
- "3 0.0 0.0 0.0 0.0 0.0 ... \n",
- "4 0.0 0.0 0.0 0.0 0.0 ... \n",
- "... ... ... ... ... ... ... \n",
- "1093 0.0 0.0 0.0 0.0 0.0 ... \n",
- "1094 0.0 0.0 0.0 0.0 0.0 ... \n",
- "1095 0.0 0.0 0.0 0.0 0.0 ... \n",
- "1096 0.0 0.0 0.0 0.0 0.0 ... \n",
- "1097 0.0 0.0 0.0 0.0 0.0 ... \n",
+ " (AHR, UGT1A6) (AR, ABCA1) (AR, ABCA2) (AR, ABCF1) \\\n",
+ "patient id \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 0.0 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 0.0 0.0 \n",
"\n",
- " (ZNF419, CDKN2A) (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n",
- "0 0.0 0.0 0.0 0.0 \n",
- "1 0.0 0.0 0.0 0.0 \n",
- "2 0.0 0.0 0.0 0.0 \n",
- "3 0.0 0.0 0.0 0.0 \n",
- "4 0.0 0.0 0.0 0.0 \n",
- "... ... ... ... ... \n",
- "1093 0.0 0.0 0.0 0.0 \n",
- "1094 0.0 0.0 0.0 0.0 \n",
- "1095 0.0 0.0 0.0 0.0 \n",
- "1096 0.0 0.0 0.0 0.0 \n",
- "1097 0.0 0.0 0.0 0.0 \n",
+ " (AR, ABCA4) (AR, ABL1) ... (ZNF419, CDKN2A) \\\n",
+ "patient id ... \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 ... 0.0 \n",
+ "... ... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 ... 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 ... 0.0 \n",
"\n",
- " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) (ZNF417, CDKN2A) \\\n",
- "0 0.0 0.0 0.0 0.0 \n",
- "1 0.0 0.0 0.0 0.0 \n",
- "2 0.0 0.0 0.0 0.0 \n",
- "3 0.0 0.0 0.0 0.0 \n",
- "4 0.0 0.0 0.0 0.0 \n",
- "... ... ... ... ... \n",
- "1093 0.0 0.0 0.0 0.0 \n",
- "1094 0.0 0.0 0.0 0.0 \n",
- "1095 0.0 0.0 0.0 0.0 \n",
- "1096 0.0 0.0 0.0 0.0 \n",
- "1097 0.0 0.0 0.0 0.0 \n",
+ " (ZNF671, CDKN2A) (THAP7, CDKN2A) (FOXP2, PLAUR) \\\n",
+ "patient id \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 0.0 \n",
"\n",
- " (ZNF384, CDKN2A) (ZNF384, COL1A1) \n",
- "0 0.0 0.0 \n",
- "1 0.0 0.0 \n",
- "2 0.0 0.0 \n",
- "3 0.0 0.0 \n",
- "4 0.0 0.0 \n",
- "... ... ... \n",
- "1093 0.0 0.0 \n",
- "1094 0.0 0.0 \n",
- "1095 0.0 0.0 \n",
- "1096 0.0 0.0 \n",
- "1097 0.0 0.0 \n",
+ " (FOXP2, CNTNAP2) (ZNF653, CDKN2A) (E2F7, SP1) \\\n",
+ "patient id \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 0.0 \n",
"\n",
- "[1098 rows x 14148 columns]"
+ " (ZNF417, CDKN2A) (ZNF384, CDKN2A) (ZNF384, COL1A1) \n",
+ "patient id \n",
+ "TCGA-3C-AAAU-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALI-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALJ-01 0.0 0.0 0.0 \n",
+ "TCGA-3C-AALK-01 0.0 0.0 0.0 \n",
+ "TCGA-4H-AAAK-01 0.0 0.0 0.0 \n",
+ "... ... ... ... \n",
+ "TCGA-WT-AB44-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A899-01 0.0 0.0 0.0 \n",
+ "TCGA-XX-A89A-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R5-01 0.0 0.0 0.0 \n",
+ "TCGA-Z7-A8R6-01 0.0 0.0 0.0 \n",
+ "\n",
+ "[1098 rows x 14147 columns]"
]
},
"execution_count": 9,
@@ -1418,11 +1471,379 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "0aa5c03b-1886-4b06-a6dd-c7933a00046c",
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " R2 | \n",
+ " coef_intercept | \n",
+ " coef_TF | \n",
+ " coef_birth_days_to | \n",
+ " coef_race_ASIAN | \n",
+ " coef_race_BLACK OR AFRICAN AMERICAN | \n",
+ " coef_race_WHITE | \n",
+ " coef_race_[Not Evaluated] | \n",
+ " coef_gender_MALE | \n",
+ " pval_intercept | \n",
+ " pval_TF | \n",
+ " pval_birth_days_to | \n",
+ " pval_race_ASIAN | \n",
+ " pval_race_BLACK OR AFRICAN AMERICAN | \n",
+ " pval_race_WHITE | \n",
+ " pval_race_[Not Evaluated] | \n",
+ " pval_gender_MALE | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " (PARP1, BRCA2) | \n",
+ " 0.518944 | \n",
+ " -0.628582 | \n",
+ " 1.009024 | \n",
+ " 0.000016 | \n",
+ " 1.641409 | \n",
+ " 1.672660 | \n",
+ " 1.696522 | \n",
+ " 0.0 | \n",
+ " -0.119685 | \n",
+ " 0.387606 | \n",
+ " 2.496552e-14 | \n",
+ " 0.196470 | \n",
+ " 0.079281 | \n",
+ " 0.019698 | \n",
+ " 0.010310 | \n",
+ " NaN | \n",
+ " 0.852860 | \n",
+ "
\n",
+ " \n",
+ " (AHR, CYP1B1) | \n",
+ " 0.460382 | \n",
+ " -0.372820 | \n",
+ " 0.524032 | \n",
+ " -0.000007 | \n",
+ " 0.767510 | \n",
+ " 0.131428 | \n",
+ " 0.002362 | \n",
+ " 0.0 | \n",
+ " -0.155764 | \n",
+ " 0.421093 | \n",
+ " 2.162187e-14 | \n",
+ " 0.355266 | \n",
+ " 0.212239 | \n",
+ " 0.782895 | \n",
+ " 0.995718 | \n",
+ " NaN | \n",
+ " 0.716738 | \n",
+ "
\n",
+ " \n",
+ " (AHR, FOS) | \n",
+ " 0.075604 | \n",
+ " 1.447006 | \n",
+ " 0.005936 | \n",
+ " -0.000021 | \n",
+ " -1.688957 | \n",
+ " -0.004656 | \n",
+ " -0.260388 | \n",
+ " 0.0 | \n",
+ " 0.997901 | \n",
+ " 0.077100 | \n",
+ " 9.545845e-01 | \n",
+ " 0.132889 | \n",
+ " 0.118768 | \n",
+ " 0.995564 | \n",
+ " 0.736247 | \n",
+ " NaN | \n",
+ " 0.187290 | \n",
+ "
\n",
+ " \n",
+ " (AHR, SOS1) | \n",
+ " 0.469212 | \n",
+ " 0.675038 | \n",
+ " 0.764064 | \n",
+ " 0.000008 | \n",
+ " -0.986869 | \n",
+ " -0.397666 | \n",
+ " -0.218486 | \n",
+ " 0.0 | \n",
+ " 1.340383 | \n",
+ " 0.319415 | \n",
+ " 2.434483e-14 | \n",
+ " 0.491512 | \n",
+ " 0.272152 | \n",
+ " 0.568683 | \n",
+ " 0.734236 | \n",
+ " NaN | \n",
+ " 0.034528 | \n",
+ "
\n",
+ " \n",
+ " (AHR, UGT1A6) | \n",
+ " 0.071785 | \n",
+ " -1.422249 | \n",
+ " 0.133097 | \n",
+ " -0.000007 | \n",
+ " -0.751396 | \n",
+ " 0.789607 | \n",
+ " 1.100960 | \n",
+ " 0.0 | \n",
+ " 0.563332 | \n",
+ " 0.147520 | \n",
+ " 2.895810e-01 | \n",
+ " 0.675375 | \n",
+ " 0.561933 | \n",
+ " 0.433673 | \n",
+ " 0.237741 | \n",
+ " NaN | \n",
+ " 0.534613 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " (ZNF653, CDKN2A) | \n",
+ " 0.261828 | \n",
+ " -2.962304 | \n",
+ " 0.195867 | \n",
+ " -0.000037 | \n",
+ " 0.956503 | \n",
+ " 1.237842 | \n",
+ " 1.233544 | \n",
+ " 0.0 | \n",
+ " -1.645633 | \n",
+ " 0.000009 | \n",
+ " 1.319707e-04 | \n",
+ " 0.000767 | \n",
+ " 0.250047 | \n",
+ " 0.052591 | \n",
+ " 0.038164 | \n",
+ " NaN | \n",
+ " 0.006131 | \n",
+ "
\n",
+ " \n",
+ " (E2F7, SP1) | \n",
+ " 0.231473 | \n",
+ " 0.375956 | \n",
+ " 0.302768 | \n",
+ " 0.000028 | \n",
+ " 0.878721 | \n",
+ " 1.194801 | \n",
+ " 1.148187 | \n",
+ " 0.0 | \n",
+ " 0.181981 | \n",
+ " 0.576601 | \n",
+ " 5.622973e-05 | \n",
+ " 0.016079 | \n",
+ " 0.318238 | \n",
+ " 0.078094 | \n",
+ " 0.070756 | \n",
+ " NaN | \n",
+ " 0.771183 | \n",
+ "
\n",
+ " \n",
+ " (ZNF417, CDKN2A) | \n",
+ " 0.183978 | \n",
+ " -2.813077 | \n",
+ " 0.155436 | \n",
+ " -0.000037 | \n",
+ " 0.672192 | \n",
+ " 0.860186 | \n",
+ " 0.882694 | \n",
+ " 0.0 | \n",
+ " -1.724703 | \n",
+ " 0.000058 | \n",
+ " 4.457482e-02 | \n",
+ " 0.001722 | \n",
+ " 0.451491 | \n",
+ " 0.219116 | \n",
+ " 0.170465 | \n",
+ " NaN | \n",
+ " 0.006266 | \n",
+ "
\n",
+ " \n",
+ " (ZNF384, CDKN2A) | \n",
+ " 0.165512 | \n",
+ " -3.017923 | \n",
+ " 0.120197 | \n",
+ " -0.000036 | \n",
+ " 0.915799 | \n",
+ " 1.166994 | \n",
+ " 1.129410 | \n",
+ " 0.0 | \n",
+ " -1.811391 | \n",
+ " 0.000019 | \n",
+ " 1.957493e-01 | \n",
+ " 0.003156 | \n",
+ " 0.303410 | \n",
+ " 0.087805 | \n",
+ " 0.075205 | \n",
+ " NaN | \n",
+ " 0.004527 | \n",
+ "
\n",
+ " \n",
+ " (ZNF384, COL1A1) | \n",
+ " 0.035308 | \n",
+ " -2.388996 | \n",
+ " 0.016159 | \n",
+ " -0.000002 | \n",
+ " 2.266961 | \n",
+ " 1.227011 | \n",
+ " 0.839028 | \n",
+ " 0.0 | \n",
+ " 0.528321 | \n",
+ " 0.029983 | \n",
+ " 9.138044e-01 | \n",
+ " 0.914840 | \n",
+ " 0.115510 | \n",
+ " 0.263972 | \n",
+ " 0.409823 | \n",
+ " NaN | \n",
+ " 0.601052 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
14147 rows × 17 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " R2 coef_intercept coef_TF coef_birth_days_to \\\n",
+ "(PARP1, BRCA2) 0.518944 -0.628582 1.009024 0.000016 \n",
+ "(AHR, CYP1B1) 0.460382 -0.372820 0.524032 -0.000007 \n",
+ "(AHR, FOS) 0.075604 1.447006 0.005936 -0.000021 \n",
+ "(AHR, SOS1) 0.469212 0.675038 0.764064 0.000008 \n",
+ "(AHR, UGT1A6) 0.071785 -1.422249 0.133097 -0.000007 \n",
+ "... ... ... ... ... \n",
+ "(ZNF653, CDKN2A) 0.261828 -2.962304 0.195867 -0.000037 \n",
+ "(E2F7, SP1) 0.231473 0.375956 0.302768 0.000028 \n",
+ "(ZNF417, CDKN2A) 0.183978 -2.813077 0.155436 -0.000037 \n",
+ "(ZNF384, CDKN2A) 0.165512 -3.017923 0.120197 -0.000036 \n",
+ "(ZNF384, COL1A1) 0.035308 -2.388996 0.016159 -0.000002 \n",
+ "\n",
+ " coef_race_ASIAN coef_race_BLACK OR AFRICAN AMERICAN \\\n",
+ "(PARP1, BRCA2) 1.641409 1.672660 \n",
+ "(AHR, CYP1B1) 0.767510 0.131428 \n",
+ "(AHR, FOS) -1.688957 -0.004656 \n",
+ "(AHR, SOS1) -0.986869 -0.397666 \n",
+ "(AHR, UGT1A6) -0.751396 0.789607 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) 0.956503 1.237842 \n",
+ "(E2F7, SP1) 0.878721 1.194801 \n",
+ "(ZNF417, CDKN2A) 0.672192 0.860186 \n",
+ "(ZNF384, CDKN2A) 0.915799 1.166994 \n",
+ "(ZNF384, COL1A1) 2.266961 1.227011 \n",
+ "\n",
+ " coef_race_WHITE coef_race_[Not Evaluated] \\\n",
+ "(PARP1, BRCA2) 1.696522 0.0 \n",
+ "(AHR, CYP1B1) 0.002362 0.0 \n",
+ "(AHR, FOS) -0.260388 0.0 \n",
+ "(AHR, SOS1) -0.218486 0.0 \n",
+ "(AHR, UGT1A6) 1.100960 0.0 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) 1.233544 0.0 \n",
+ "(E2F7, SP1) 1.148187 0.0 \n",
+ "(ZNF417, CDKN2A) 0.882694 0.0 \n",
+ "(ZNF384, CDKN2A) 1.129410 0.0 \n",
+ "(ZNF384, COL1A1) 0.839028 0.0 \n",
+ "\n",
+ " coef_gender_MALE pval_intercept pval_TF \\\n",
+ "(PARP1, BRCA2) -0.119685 0.387606 2.496552e-14 \n",
+ "(AHR, CYP1B1) -0.155764 0.421093 2.162187e-14 \n",
+ "(AHR, FOS) 0.997901 0.077100 9.545845e-01 \n",
+ "(AHR, SOS1) 1.340383 0.319415 2.434483e-14 \n",
+ "(AHR, UGT1A6) 0.563332 0.147520 2.895810e-01 \n",
+ "... ... ... ... \n",
+ "(ZNF653, CDKN2A) -1.645633 0.000009 1.319707e-04 \n",
+ "(E2F7, SP1) 0.181981 0.576601 5.622973e-05 \n",
+ "(ZNF417, CDKN2A) -1.724703 0.000058 4.457482e-02 \n",
+ "(ZNF384, CDKN2A) -1.811391 0.000019 1.957493e-01 \n",
+ "(ZNF384, COL1A1) 0.528321 0.029983 9.138044e-01 \n",
+ "\n",
+ " pval_birth_days_to pval_race_ASIAN \\\n",
+ "(PARP1, BRCA2) 0.196470 0.079281 \n",
+ "(AHR, CYP1B1) 0.355266 0.212239 \n",
+ "(AHR, FOS) 0.132889 0.118768 \n",
+ "(AHR, SOS1) 0.491512 0.272152 \n",
+ "(AHR, UGT1A6) 0.675375 0.561933 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) 0.000767 0.250047 \n",
+ "(E2F7, SP1) 0.016079 0.318238 \n",
+ "(ZNF417, CDKN2A) 0.001722 0.451491 \n",
+ "(ZNF384, CDKN2A) 0.003156 0.303410 \n",
+ "(ZNF384, COL1A1) 0.914840 0.115510 \n",
+ "\n",
+ " pval_race_BLACK OR AFRICAN AMERICAN pval_race_WHITE \\\n",
+ "(PARP1, BRCA2) 0.019698 0.010310 \n",
+ "(AHR, CYP1B1) 0.782895 0.995718 \n",
+ "(AHR, FOS) 0.995564 0.736247 \n",
+ "(AHR, SOS1) 0.568683 0.734236 \n",
+ "(AHR, UGT1A6) 0.433673 0.237741 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) 0.052591 0.038164 \n",
+ "(E2F7, SP1) 0.078094 0.070756 \n",
+ "(ZNF417, CDKN2A) 0.219116 0.170465 \n",
+ "(ZNF384, CDKN2A) 0.087805 0.075205 \n",
+ "(ZNF384, COL1A1) 0.263972 0.409823 \n",
+ "\n",
+ " pval_race_[Not Evaluated] pval_gender_MALE \n",
+ "(PARP1, BRCA2) NaN 0.852860 \n",
+ "(AHR, CYP1B1) NaN 0.716738 \n",
+ "(AHR, FOS) NaN 0.187290 \n",
+ "(AHR, SOS1) NaN 0.034528 \n",
+ "(AHR, UGT1A6) NaN 0.534613 \n",
+ "... ... ... \n",
+ "(ZNF653, CDKN2A) NaN 0.006131 \n",
+ "(E2F7, SP1) NaN 0.771183 \n",
+ "(ZNF417, CDKN2A) NaN 0.006266 \n",
+ "(ZNF384, CDKN2A) NaN 0.004527 \n",
+ "(ZNF384, COL1A1) NaN 0.601052 \n",
+ "\n",
+ "[14147 rows x 17 columns]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.get_model_stats()"
+ ]
}
],
"metadata": {
@@ -1441,7 +1862,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.2"
+ "version": "3.11.3"
}
},
"nbformat": 4,