Skip to content

Commit

Permalink
Merge pull request #97 from ArcInstitute/dev
Browse files Browse the repository at this point in the history
bug fixes in `buildPhenotypeData`
  • Loading branch information
abearab authored Sep 24, 2024
2 parents 7777e1b + 0685fec commit a9c3cca
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 15 deletions.
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies:
- mscorefonts
- rust>=1.72
- polars>0.20
- pycairo
- pip
- pip:
- pyarrow
Expand Down
19 changes: 7 additions & 12 deletions screenpro/assays/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,16 +345,6 @@ def buildPhenotypeData(self, run_name='auto',db_rate_col='pop_doubling', **kwarg
raise ValueError('Only `compare_reps` run_name is supported for now!')

untreated = self.phenotypes[run_name]['config']['untreated']
treated = self.phenotypes[run_name]['config']['treated']

if type(treated) != list: treated = [treated]

if db_rate_col:
#TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate`
growth_factor_table = self._calculateGrowthFactor(
untreated = untreated, treated = treated,
db_rate_col = db_rate_col
)

pdata_list = []

Expand All @@ -363,10 +353,16 @@ def buildPhenotypeData(self, run_name='auto',db_rate_col='pop_doubling', **kwarg
score_tag, comparison = phenotype_name.split(':')
cond_test, cond_ref = comparison.split('_vs_')

#TODO: fix `_calculateGrowthFactor` and `_getTreatmentDoublingRate`
if db_rate_col:
growth_rate_reps=growth_factor_table.query(
growth_rate_reps = self._calculateGrowthFactor(
untreated = untreated,
treated = cond_test, # should be part of "treated" list!
db_rate_col = db_rate_col
).query(
f'score=="{score_tag}"'
).set_index('replicate')['growth_factor'].to_dict()

else:
growth_rate_reps=None

Expand All @@ -376,7 +372,6 @@ def buildPhenotypeData(self, run_name='auto',db_rate_col='pop_doubling', **kwarg
growth_rate_reps=growth_rate_reps,
**kwargs
)
# obs = growth_factor_table.loc[pdata_df.index,:],

pdata_list.append(pdata)

Expand Down
8 changes: 5 additions & 3 deletions screenpro/phenoscore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
from .phenostat import matrixStat, multipleTestsCorrection


def runPhenoScore(adata, cond_ref, cond_test, score_level, var_names='target', test='ttest',
growth_rate=1, n_reps='auto', keep_top_n = None, collapse_var=False,
def runPhenoScore(adata, cond_ref, cond_test, score_level,
var_names='target', collapse_var=False,
test='ttest', growth_rate=1, n_reps='auto', keep_top_n = None,
num_pseudogenes='auto', pseudogene_size='auto',
count_layer=None, count_filter_type='mean', count_filter_threshold=40,
ctrl_label='negative_control'
Expand All @@ -39,6 +40,7 @@ def runPhenoScore(adata, cond_ref, cond_test, score_level, var_names='target', t
cond_test (str): condition test
score_level (str): score level
var_names (str): variable names to use as index in the result dataframe
collapse_var (str): variable to use for `getBestTargetByTSS` function, default is False
test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test)
growth_rate (int): growth rate
n_reps (int): number of replicates
Expand Down Expand Up @@ -127,7 +129,7 @@ def runPhenoScore(adata, cond_ref, cond_test, score_level, var_names='target', t
filter_threshold=count_filter_threshold
)

# get best best transcript as lowest p-value for each target
# get the best transcript as lowest p-value for each target
if collapse_var not in [False, None]:
if collapse_var not in result.columns:
raise ValueError(f'collapse_var "{collapse_var}" not found in result columns.')
Expand Down

0 comments on commit a9c3cca

Please sign in to comment.