diff --git a/docs/environment.yaml b/docs/environment.yaml index 0a39f58..22dbb0e 100644 --- a/docs/environment.yaml +++ b/docs/environment.yaml @@ -22,11 +22,13 @@ dependencies: - sphinx=5.3.0 - sphinx_rtd_theme=1.1.1 - sphinxcontrib-bibtex + - polars>0.20 - pip - pip: - - polars + - pyarrow - biobear - numba - pydeseq2 - simple_colors + - adjustText - watermark diff --git a/environment.yml b/environment.yml index 5cefad6..2313478 100644 --- a/environment.yml +++ b/environment.yml @@ -18,12 +18,13 @@ dependencies: - ipykernel - mscorefonts - rust>=1.72 + - polars>0.20 - pip - pip: - - polars - pyarrow - biobear - numba - pydeseq2 - simple_colors + - adjustText - watermark diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 6893079..b79006f 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -31,6 +31,6 @@ from .dashboard import DrugScreenDashboard -__version__ = "0.4.13" +__version__ = "0.4.14" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" diff --git a/screenpro/phenoscore/_annotate.py b/screenpro/phenoscore/_annotate.py index 8684cc8..e7993d7 100644 --- a/screenpro/phenoscore/_annotate.py +++ b/screenpro/phenoscore/_annotate.py @@ -21,7 +21,7 @@ } -def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='negative_control'): +def getCombinedScore(df_in, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'): """ Calculate the combined score column based on the given phenotypic scores and p-values. Combined score is calculated as: @@ -29,7 +29,7 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg $combined\_score = \frac{score}{pseudo\_sd} \times -\log_{10}(pvalue)$ Parameters: - df (pandas.DataFrame): The input DataFrame. + df_in (pandas.DataFrame): The input DataFrame. score_col (str): The column name for the individual scores. Default is 'score'. pvalue_col (str): The column name for the p-values. Default is 'pvalue'. target_col (str): The column name for the target variable. Default is 'target'. @@ -39,18 +39,22 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg Returns: pandas.Series: The calculated combined score column. """ - if 'target' not in df.columns: - raise ValueError('Column "target" not found in the input DataFrame.') + # make a copy of input dataframe + df = df_in.copy() + + for col in [score_col, pvalue_col, target_col]: + if col not in df.columns: + raise ValueError(f'Column "{col}" not found in the input DataFrame.') # calculate pseudo_sd - pseudo_sd = df[df['target'].eq(ctrl_label)][score_col].tolist() + pseudo_sd = df[df[target_col].eq(ctrl_label)][score_col].tolist() pseudo_sd = np.std(pseudo_sd) # calculate combined score return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col]) -def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'): +def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'): """ Annotate the given score tabel @@ -60,49 +64,47 @@ def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalu up_hit (str): up hit label down_hit (str): down hit label threshold (int): threshold value - score_col (str): score column name - pvalue_col (str): pvalue column name - ctrl_label (str): control label value + score_col (str): score column name. Default is 'score'. + target_col (str): column name for the target variable. Default is 'target'. + pvalue_col (str): pvalue column name. Default is 'pvalue'. + ctrl_label (str): control label value. Default is 'negative_control'. Returns: pd.DataFrame: annotated score dataframe """ - if score_col is None: score_col = 'score' - if pvalue_col is None: pvalue_col = 'pvalue' + # make a copy of input dataframe + df = df_in.copy() - sel = ['target',score_col, pvalue_col] - - for col in sel: - if col not in df_in.columns: + for col in [score_col, pvalue_col, target_col]: + if col not in df.columns: raise ValueError(f'Column "{col}" not found in the input DataFrame.') - - # make a copy of input dataframe - df = df_in[sel].copy() - # # rename/reformat columns - # df.columns = ['target', 'score', 'pvalue'] + df[score_col] = df[score_col].astype(float) df[pvalue_col] = df[pvalue_col].astype(float) # add combined score column - df['combined_score'] = getCombinedScore(df, score_col, pvalue_col, ctrl_label) + df['combined_score'] = getCombinedScore( + df, + score_col=score_col, pvalue_col=pvalue_col, target_col=target_col, + ctrl_label=ctrl_label) # add label column df['label'] = '.' # annotate hits: up df.loc[ - (df[score_col] > 0) & (~df['target'].eq(ctrl_label)) & + (df[score_col] > 0) & (~df[target_col].eq(ctrl_label)) & (df['combined_score'] >= threshold), 'label' ] = up_hit # annotate hits: down df.loc[ - (df[score_col] < 0) & (~df['target'].eq(ctrl_label)) & + (df[score_col] < 0) & (~df[target_col].eq(ctrl_label)) & (df['combined_score'] <= -threshold), 'label' ] = down_hit # annotate control - df.loc[df['target'].eq(ctrl_label), 'label'] = ctrl_label + df.loc[df[target_col].eq(ctrl_label), 'label'] = ctrl_label # annotate non-hit df.loc[df['label'] == '.', 'label'] = 'target_non_hit' diff --git a/screenpro/phenoscore/delta.py b/screenpro/phenoscore/delta.py index dcef007..da26261 100644 --- a/screenpro/phenoscore/delta.py +++ b/screenpro/phenoscore/delta.py @@ -153,10 +153,10 @@ def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names # combine results into a dataframe result = pd.concat([ - pd.Series(scores, name='score'), - pd.Series(p_values, name=f'{test} pvalue'), - pd.Series(adj_p_values, name='BH adj_pvalue'), - pd.Series(target_sizes, name='number_of_guide_elements'), + pd.Series(scores, name='score', dtype=float), + pd.Series(p_values, name=f'{test} pvalue', dtype=float), + pd.Series(adj_p_values, name='BH adj_pvalue', dtype=float), + pd.Series(target_sizes, name='number_of_guide_elements', dtype=int), ], axis=1) # add targets information diff --git a/screenpro/plotting/_rank.py b/screenpro/plotting/_rank.py index eddf858..bd11b64 100644 --- a/screenpro/plotting/_rank.py +++ b/screenpro/plotting/_rank.py @@ -1,5 +1,7 @@ import pandas as pd import matplotlib.pyplot as plt + +from adjustText import adjust_text from ._utils import yellow_blue @@ -57,8 +59,23 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_ ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor) if highlight_values['text'] is not False: + texts = [] for i, row in highlight_ranks.iterrows(): - ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=txt_font_size, color=highlight_color, ha='right') + t = ax.text( + row['Rank'] + .01, + row[rank_col] + .001, + row[name_col], + fontsize=txt_font_size, + color=highlight_color, + ha='right' + ) + texts.append(t) + + adjust_text( + texts, + arrowprops=dict(arrowstyle='-', color=highlight_color, lw=0.5), + ax=ax + ) # Add labels and title ax.set_xlabel(xlabel)