Skip to content

Commit

Permalink
Adapt etest minimal pvalue
Browse files Browse the repository at this point in the history
Smallest pvalue now 1 divided by runs instead of zero.
  • Loading branch information
stefanpeidli committed Feb 22, 2023
1 parent f4331ed commit 1211ca0
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions package/src/scperturb/etest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
from sklearn.metrics import pairwise_distances
from .edistance import edist

def etest(adata, obs_key='perturbation', obsm_key='X_pca', dist='sqeuclidean', control='control', alpha=0.05, runs=100, correction_method='holm-sidak', verbose=True):
# TODO make etest allow for multiple controls (accept list of controls)
# TODO make etest allow to use correction factor (divide by N-1 instead of N)

def etest(adata, obs_key='perturbation', obsm_key='X_pca', dist='sqeuclidean',
control='control', alpha=0.05, runs=1000,
correction_method='holm-sidak', correction_factor=False,
verbose=True):
"""Performs Monte Carlo permutation test with E-distance as test statistic.
Tests for each group of cells defined in adata.obs[obs_key] if it is significantly
different from control based on the E-distance in adata.obsm[obsm_key] space.
Expand All @@ -33,6 +39,8 @@ def etest(adata, obs_key='perturbation', obsm_key='X_pca', dist='sqeuclidean', c
We do not recommend going lower than `100` and suggest between `100` and `10000` iterations.
correction_method: `None` or any valid method for statsmodels.stats.multitest.multipletests (default: `'holm-sidak'`)
Method used for multiple-testing correction, since we are testing each group in `adata.obs[obs_key]`.
correction_factor: `bool` (default: `False`)
Whether make the estimator for sigma more unbiased (dividing by N-1 instead of N, similar to sample and population variance).
verbose: `bool` (default: `True`)
Whether to show a progress bar iterating over all groups.
Expand Down Expand Up @@ -116,8 +124,10 @@ def etest(adata, obs_key='perturbation', obsm_key='X_pca', dist='sqeuclidean', c
df = df.sort_index()

# Evaluate test (hypothesis vs null hypothesis)
results = pd.concat([r['edist'] - df['edist'] for r in res], axis=1) > 0 # count times shuffling resulted in larger e-distance
pvalues = np.sum(results, axis=1) / runs
# count times shuffling resulted in larger e-distance
results = pd.concat([r['edist'] - df['edist'] for r in res], axis=1) > 0
n_failure = np.min(np.sum(results, axis=1), 1)
pvalues = n_failure / runs

# Apply multiple testing correction
significant_adj, pvalue_adj, _, _ = multipletests(pvalues, alpha=alpha, method=correction_method)
Expand Down

0 comments on commit 1211ca0

Please sign in to comment.