Merge pull request #81 from ArcInstitute/dev

code improvements and bug fixes
ArcInstitute · Jul 25, 2024 · aa63c69 · aa63c69
2 parents 77c097a + ba01f00
commit aa63c69
Show file tree

Hide file tree

Showing 20 changed files with 287 additions and 208 deletions.
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         os-version: ["ubuntu-latest"]
-        python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10", "3.11"] #, "3.12"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -15,7 +15,7 @@ jobs:
       fail-fast: false
       matrix:
         os-version: ["ubuntu-latest"]
-        python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
+        python-version: ["3.11"] # ["3.8", "3.9", "3.10"]
 
     steps:
     - uses: actions/checkout@v3

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -36,15 +36,15 @@
     "seaborn",
     "scanpy",
     "anndata",
-    "screenpro",
+    # "screenpro",
     "pyarrow",
     "biobear",
     "click",
     "polars",
     "biobear",
     "numba",
-    "bokeh",
-    "pydeseq2",
+    # "bokeh",
+    # "pydeseq2",
     "watermark"
 ]
 

diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md
@@ -1,22 +1,26 @@
 # Phenotype calculation modules
 
+## Phenotype score calculation
+
 Log ratio of {math}`y` vs {math}`x`:
 
 ```{math}
 \Delta =
 \log(\frac
-    {\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} + 1}
-    {\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)} + 1}
+    {\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)}}
+    {\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)}}
 )
 ```
 
 -   {math}`y \rightarrow` condition {math}`x` (e.g. treated samples)
--   {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples)
+-   {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples, or untreated samples)
 -   {math}`a \rightarrow` number of library elements with sgRNAs targeting {math}`T`
 -   {math}`b \rightarrow` number of biological replicates, {math}`R` (e.g. 2 or 3)
 -   {math}`N_{x}` \| {math}`N_{y} \rightarrow` read counts normalized for sequencing
     depth in condition {math}`x` or {math}`y`
 
+___
+
 Here is a formula for V3 library with single library element per gene
 (i.e. dual sgRNAs in one construct targeting same gene).
 
@@ -40,6 +44,8 @@ Phenotype score for each {math}`T` comparing {math}`y` vs {math}`x`:
 -   {math}`d_{growth} \rightarrow` growth factor to normalize the phenotype
     score.
 
+## Phenotype statistics calculation
+
 Statistical test comparing {math}`y` vs {math}`x` per each target, {math}`T`:
 
 ```{math}
@@ -58,10 +64,39 @@ module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_r
 > This is a test for the null hypothesis that two related or repeated
 > samples have identical average (expected) values).
 
+## Combined score calculation
+
+```{math}
+\text{combined score} = \left( \dfrac{T_{\text{phenotype score}}}{\sigma{\text{(negative controls)}}} \right) \times -\log_{10}(\text{pvalue})
+```
+
 ___
 
 ```{eval-rst}
 .. automodule:: screenpro.phenoscore
    :members:
    :show-inheritance:
+
+```
+
+## Other related modules and functions
+
+```{eval-rst}
+
+.. automodule:: screenpro.phenoscore.phenostat
+   :members:
+   :show-inheritance:
+
+.. automodule:: screenpro.phenoscore.delta
+    :members:
+    :show-inheritance:
+
+.. automodule:: screenpro.phenoscore.deseq
+    :members:
+    :show-inheritance:
+
+.. automodule:: screenpro.phenoscore.annotate
+    :members:
+    :show-inheritance:
+
 ```
diff --git a/environment.yml b/environment.yml
@@ -4,7 +4,7 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - python=3.9
+  - python>=3.9,<=3.12
   - scanpy
   - python-igraph
   - leidenalg
@@ -14,15 +14,15 @@ dependencies:
   - scipy
   - matplotlib<3.7
   - seaborn
-  - pyarrow
   - bokeh
   - ipykernel
   - mscorefonts
   - rust>=1.72
   - pip
   - pip:
       - polars
-      - biobear
+      - pyarrow<17.0
+      - biobear>=0.20.3,<0.21
       - numba
       - pydeseq2
       - simple_colors

diff --git a/screenpro/__init__.py b/screenpro/__init__.py
@@ -18,16 +18,19 @@
 - datasets: API for accessing pre-processed datasets
 '''
 
-from . import phenoscore as ps
-from . import preprocessing as pp
 from . import ngs
-from . import assays
 from . import load
-from . import visualize as viz
+from . import preprocessing as pp
+from . import phenoscore as ps
+from . import assays
+from . import plotting as pl
+from . import dashboard
 
 from .ngs import GuideCounter
 from .assays import PooledScreens, GImaps
+from .dashboard import DrugScreenDashboard
+
 
-__version__ = "0.4.5"
+__version__ = "0.4.6"
 __author__ = "Abe Arab"
 __email__ = '[email protected]' # "[email protected]"
diff --git a/screenpro/assays.py b/screenpro/assays.py
@@ -16,6 +16,8 @@
 from .phenoscore import runPhenoScore, runPhenoScoreForReplicate
 from .preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth
 from .phenoscore.annotate import annotateScoreTable, hit_dict
+
+import warnings
 from copy import copy
 
 
@@ -158,29 +160,38 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='
             run_name (str): name for the phenotype calculation run
             **kwargs: additional arguments to pass to runPhenoScore
         """
-        growth_factor_table = self._calculateGrowthFactor(
-            untreated = untreated, treated = treated, db_rate_col = db_rate_col
-        )
+        if 'pop_doublings' not in self.adata.obs.columns or db_rate_col == None:
+            warnings.warn('No doubling rate information provided.')
+            db_untreated = 1
+            db_treated = 1
+            db_treated_vs_untreated = 1
+            growth_factor_table = None
+
+        else:
+            growth_factor_table = self._calculateGrowthFactor(
+                untreated = untreated, treated = treated, db_rate_col = db_rate_col
+            )
 
-        db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean()
-        db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean()
+            db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean()
+            db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean()
+            db_treated_vs_untreated = np.abs(db_untreated - db_treated)
 
         # calculate phenotype scores: gamma, tau, rho
         gamma_name, gamma = runPhenoScore(
-            self.adata, cond1=t0, cond2=untreated, growth_rate=db_untreated,
+            self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated,
             n_reps=self.n_reps,
             transformation=self.fc_transformation, test=self.test, score_level=score_level,
             **kwargs
         )
         tau_name, tau = runPhenoScore(
-            self.adata, cond1=t0, cond2=treated, growth_rate=db_treated,
+            self.adata, cond_ref=t0, cond_test=treated, growth_rate=db_treated,
             n_reps=self.n_reps,
             transformation=self.fc_transformation, test=self.test, score_level=score_level,
             **kwargs
         )
         # TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0.
         rho_name, rho = runPhenoScore(
-            self.adata, cond1=untreated, cond2=treated, growth_rate=np.abs(db_untreated - db_treated),
+            self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_treated_vs_untreated,
             n_reps=self.n_reps,
             transformation=self.fc_transformation, test=self.test, score_level=score_level,
             **kwargs
@@ -197,27 +208,28 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='
         self._add_phenotype_results(f'tau:{tau_name}')
         self._add_phenotype_results(f'rho:{rho_name}')
 
-        # get replicate level phenotype scores
-        pdata_df = pd.concat([
-            runPhenoScoreForReplicate(
-                self.adata, x_label = x_label, y_label = y_label, score = score_label,
-                transformation=self.fc_transformation, 
-                growth_factor_table=growth_factor_table,
-                **kwargs
-            ).add_prefix(f'{score_label}_')
-
-            for x_label, y_label, score_label in [
-                ('T0', untreated, 'gamma'),
-                ('T0', treated, 'tau'),
-                (untreated, treated, 'rho')
-            ]
-        ],axis=1).T
-        # add .pdata
-        self.pdata = ad.AnnData(
-            X = pdata_df,
-            obs = growth_factor_table.loc[pdata_df.index,:],
-            var=self.adata.var
-        )
+        if growth_factor_table:
+            # get replicate level phenotype scores
+            pdata_df = pd.concat([
+                runPhenoScoreForReplicate(
+                    self.adata, x_label = x_label, y_label = y_label, score = score_label,
+                    transformation=self.fc_transformation, 
+                    growth_factor_table=growth_factor_table,
+                    **kwargs
+                ).add_prefix(f'{score_label}_')
+
+                for x_label, y_label, score_label in [
+                    ('T0', untreated, 'gamma'),
+                    ('T0', treated, 'tau'),
+                    (untreated, treated, 'rho')
+                ]
+            ],axis=1).T
+            # add .pdata
+            self.pdata = ad.AnnData(
+                X = pdata_df,
+                obs = growth_factor_table.loc[pdata_df.index,:],
+                var=self.adata.var
+            )
 
     def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs):
         """
@@ -232,7 +244,7 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None
         """
         # calculate phenotype scores
         delta_name, delta = runPhenoScore(
-            self.adata, cond1=low_bin, cond2=high_bin, n_reps=self.n_reps,
+            self.adata, cond_ref=low_bin, cond_test=high_bin, n_reps=self.n_reps,
             transformation=self.fc_transformation, test=self.test, score_level=score_level,
             **kwargs
         )

diff --git a/screenpro/visualize/dashboard.py → screenpro/dashboard/__init__.py b/screenpro/visualize/dashboard.py → screenpro/dashboard/__init__.py
@@ -1,3 +1,9 @@
+## Copyright (c) 2022-2024 ScreenPro2 Development Team.
+## All rights reserved.
+## Gilbart Lab, UCSF / Arc Institute.
+## Multi-Omics Tech Center, Arc Insititue.
+
+
 import numpy as np
 import pandas as pd
 import bokeh