Skip to content

Commit

Permalink
Merge pull request #81 from ArcInstitute/dev
Browse files Browse the repository at this point in the history
code improvements and bug fixes
  • Loading branch information
abearab authored Jul 25, 2024
2 parents 77c097a + ba01f00 commit aa63c69
Show file tree
Hide file tree
Showing 20 changed files with 287 additions and 208 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
fail-fast: false
matrix:
os-version: ["ubuntu-latest"]
python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
python-version: ["3.9", "3.10", "3.11"] #, "3.12"]

steps:
- uses: actions/checkout@v3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
fail-fast: false
matrix:
os-version: ["ubuntu-latest"]
python-version: ["3.9"] # ["3.8", "3.9", "3.10"]
python-version: ["3.11"] # ["3.8", "3.9", "3.10"]

steps:
- uses: actions/checkout@v3
Expand Down
6 changes: 3 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@
"seaborn",
"scanpy",
"anndata",
"screenpro",
# "screenpro",
"pyarrow",
"biobear",
"click",
"polars",
"biobear",
"numba",
"bokeh",
"pydeseq2",
# "bokeh",
# "pydeseq2",
"watermark"
]

Expand Down
41 changes: 38 additions & 3 deletions docs/source/phenotype.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
# Phenotype calculation modules

## Phenotype score calculation

Log ratio of {math}`y` vs {math}`x`:

```{math}
\Delta =
\log(\frac
{\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} + 1}
{\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)} + 1}
{\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)}}
{\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)}}
)
```

- {math}`y \rightarrow` condition {math}`x` (e.g. treated samples)
- {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples)
- {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples, or untreated samples)
- {math}`a \rightarrow` number of library elements with sgRNAs targeting {math}`T`
- {math}`b \rightarrow` number of biological replicates, {math}`R` (e.g. 2 or 3)
- {math}`N_{x}` \| {math}`N_{y} \rightarrow` read counts normalized for sequencing
depth in condition {math}`x` or {math}`y`

___

Here is a formula for V3 library with single library element per gene
(i.e. dual sgRNAs in one construct targeting same gene).

Expand All @@ -40,6 +44,8 @@ Phenotype score for each {math}`T` comparing {math}`y` vs {math}`x`:
- {math}`d_{growth} \rightarrow` growth factor to normalize the phenotype
score.

## Phenotype statistics calculation

Statistical test comparing {math}`y` vs {math}`x` per each target, {math}`T`:

```{math}
Expand All @@ -58,10 +64,39 @@ module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_r
> This is a test for the null hypothesis that two related or repeated
> samples have identical average (expected) values).
## Combined score calculation

```{math}
\text{combined score} = \left( \dfrac{T_{\text{phenotype score}}}{\sigma{\text{(negative controls)}}} \right) \times -\log_{10}(\text{pvalue})
```

___

```{eval-rst}
.. automodule:: screenpro.phenoscore
:members:
:show-inheritance:
```

## Other related modules and functions

```{eval-rst}
.. automodule:: screenpro.phenoscore.phenostat
:members:
:show-inheritance:
.. automodule:: screenpro.phenoscore.delta
:members:
:show-inheritance:
.. automodule:: screenpro.phenoscore.deseq
:members:
:show-inheritance:
.. automodule:: screenpro.phenoscore.annotate
:members:
:show-inheritance:
```
6 changes: 3 additions & 3 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- conda-forge
- bioconda
dependencies:
- python=3.9
- python>=3.9,<=3.12
- scanpy
- python-igraph
- leidenalg
Expand All @@ -14,15 +14,15 @@ dependencies:
- scipy
- matplotlib<3.7
- seaborn
- pyarrow
- bokeh
- ipykernel
- mscorefonts
- rust>=1.72
- pip
- pip:
- polars
- biobear
- pyarrow<17.0
- biobear>=0.20.3,<0.21
- numba
- pydeseq2
- simple_colors
Expand Down
13 changes: 8 additions & 5 deletions screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,19 @@
- datasets: API for accessing pre-processed datasets
'''

from . import phenoscore as ps
from . import preprocessing as pp
from . import ngs
from . import assays
from . import load
from . import visualize as viz
from . import preprocessing as pp
from . import phenoscore as ps
from . import assays
from . import plotting as pl
from . import dashboard

from .ngs import GuideCounter
from .assays import PooledScreens, GImaps
from .dashboard import DrugScreenDashboard


__version__ = "0.4.5"
__version__ = "0.4.6"
__author__ = "Abe Arab"
__email__ = '[email protected]' # "[email protected]"
72 changes: 42 additions & 30 deletions screenpro/assays.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from .phenoscore import runPhenoScore, runPhenoScoreForReplicate
from .preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth
from .phenoscore.annotate import annotateScoreTable, hit_dict

import warnings
from copy import copy


Expand Down Expand Up @@ -158,29 +160,38 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='
run_name (str): name for the phenotype calculation run
**kwargs: additional arguments to pass to runPhenoScore
"""
growth_factor_table = self._calculateGrowthFactor(
untreated = untreated, treated = treated, db_rate_col = db_rate_col
)
if 'pop_doublings' not in self.adata.obs.columns or db_rate_col == None:
warnings.warn('No doubling rate information provided.')
db_untreated = 1
db_treated = 1
db_treated_vs_untreated = 1
growth_factor_table = None

else:
growth_factor_table = self._calculateGrowthFactor(
untreated = untreated, treated = treated, db_rate_col = db_rate_col
)

db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean()
db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean()
db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean()
db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean()
db_treated_vs_untreated = np.abs(db_untreated - db_treated)

# calculate phenotype scores: gamma, tau, rho
gamma_name, gamma = runPhenoScore(
self.adata, cond1=t0, cond2=untreated, growth_rate=db_untreated,
self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated,
n_reps=self.n_reps,
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)
tau_name, tau = runPhenoScore(
self.adata, cond1=t0, cond2=treated, growth_rate=db_treated,
self.adata, cond_ref=t0, cond_test=treated, growth_rate=db_treated,
n_reps=self.n_reps,
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)
# TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0.
rho_name, rho = runPhenoScore(
self.adata, cond1=untreated, cond2=treated, growth_rate=np.abs(db_untreated - db_treated),
self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_treated_vs_untreated,
n_reps=self.n_reps,
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
Expand All @@ -197,27 +208,28 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col='
self._add_phenotype_results(f'tau:{tau_name}')
self._add_phenotype_results(f'rho:{rho_name}')

# get replicate level phenotype scores
pdata_df = pd.concat([
runPhenoScoreForReplicate(
self.adata, x_label = x_label, y_label = y_label, score = score_label,
transformation=self.fc_transformation,
growth_factor_table=growth_factor_table,
**kwargs
).add_prefix(f'{score_label}_')

for x_label, y_label, score_label in [
('T0', untreated, 'gamma'),
('T0', treated, 'tau'),
(untreated, treated, 'rho')
]
],axis=1).T
# add .pdata
self.pdata = ad.AnnData(
X = pdata_df,
obs = growth_factor_table.loc[pdata_df.index,:],
var=self.adata.var
)
if growth_factor_table:
# get replicate level phenotype scores
pdata_df = pd.concat([
runPhenoScoreForReplicate(
self.adata, x_label = x_label, y_label = y_label, score = score_label,
transformation=self.fc_transformation,
growth_factor_table=growth_factor_table,
**kwargs
).add_prefix(f'{score_label}_')

for x_label, y_label, score_label in [
('T0', untreated, 'gamma'),
('T0', treated, 'tau'),
(untreated, treated, 'rho')
]
],axis=1).T
# add .pdata
self.pdata = ad.AnnData(
X = pdata_df,
obs = growth_factor_table.loc[pdata_df.index,:],
var=self.adata.var
)

def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs):
"""
Expand All @@ -232,7 +244,7 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None
"""
# calculate phenotype scores
delta_name, delta = runPhenoScore(
self.adata, cond1=low_bin, cond2=high_bin, n_reps=self.n_reps,
self.adata, cond_ref=low_bin, cond_test=high_bin, n_reps=self.n_reps,
transformation=self.fc_transformation, test=self.test, score_level=score_level,
**kwargs
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## Copyright (c) 2022-2024 ScreenPro2 Development Team.
## All rights reserved.
## Gilbart Lab, UCSF / Arc Institute.
## Multi-Omics Tech Center, Arc Insititue.


import numpy as np
import pandas as pd
import bokeh
Expand Down
Loading

0 comments on commit aa63c69

Please sign in to comment.