Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

minor fixes #95

Merged
merged 9 commits into from
Sep 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@ dependencies:
- sphinx=5.3.0
- sphinx_rtd_theme=1.1.1
- sphinxcontrib-bibtex
- polars>0.20
- pip
- pip:
- polars
- pyarrow
- biobear
- numba
- pydeseq2
- simple_colors
- adjustText
- watermark
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ dependencies:
- ipykernel
- mscorefonts
- rust>=1.72
- polars>0.20
- pip
- pip:
- polars
- pyarrow
- biobear
- numba
- pydeseq2
- simple_colors
- adjustText
- watermark
2 changes: 1 addition & 1 deletion screenpro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@
from .dashboard import DrugScreenDashboard


__version__ = "0.4.13"
__version__ = "0.4.14"
__author__ = "Abe Arab"
__email__ = '[email protected]' # "[email protected]"
50 changes: 26 additions & 24 deletions screenpro/phenoscore/_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
}


def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='negative_control'):
def getCombinedScore(df_in, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
"""
Calculate the combined score column based on the given phenotypic scores and p-values.
Combined score is calculated as:

$combined\_score = \frac{score}{pseudo\_sd} \times -\log_{10}(pvalue)$

Parameters:
df (pandas.DataFrame): The input DataFrame.
df_in (pandas.DataFrame): The input DataFrame.
score_col (str): The column name for the individual scores. Default is 'score'.
pvalue_col (str): The column name for the p-values. Default is 'pvalue'.
target_col (str): The column name for the target variable. Default is 'target'.
Expand All @@ -39,18 +39,22 @@ def getCombinedScore(df, score_col='score', pvalue_col='pvalue', ctrl_label='neg
Returns:
pandas.Series: The calculated combined score column.
"""
if 'target' not in df.columns:
raise ValueError('Column "target" not found in the input DataFrame.')
# make a copy of input dataframe
df = df_in.copy()

for col in [score_col, pvalue_col, target_col]:
if col not in df.columns:
raise ValueError(f'Column "{col}" not found in the input DataFrame.')

# calculate pseudo_sd
pseudo_sd = df[df['target'].eq(ctrl_label)][score_col].tolist()
pseudo_sd = df[df[target_col].eq(ctrl_label)][score_col].tolist()
pseudo_sd = np.std(pseudo_sd)

# calculate combined score
return df[score_col]/pseudo_sd * -np.log10(df[pvalue_col])


def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalue_col=None, ctrl_label='negative_control'):
def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col='score', pvalue_col='pvalue', target_col='target', ctrl_label='negative_control'):
"""
Annotate the given score tabel

Expand All @@ -60,49 +64,47 @@ def annotateScoreTable(df_in, up_hit, down_hit, threshold, score_col=None, pvalu
up_hit (str): up hit label
down_hit (str): down hit label
threshold (int): threshold value
score_col (str): score column name
pvalue_col (str): pvalue column name
ctrl_label (str): control label value
score_col (str): score column name. Default is 'score'.
target_col (str): column name for the target variable. Default is 'target'.
pvalue_col (str): pvalue column name. Default is 'pvalue'.
ctrl_label (str): control label value. Default is 'negative_control'.

Returns:
pd.DataFrame: annotated score dataframe
"""
if score_col is None: score_col = 'score'
if pvalue_col is None: pvalue_col = 'pvalue'
# make a copy of input dataframe
df = df_in.copy()

sel = ['target',score_col, pvalue_col]

for col in sel:
if col not in df_in.columns:
for col in [score_col, pvalue_col, target_col]:
if col not in df.columns:
raise ValueError(f'Column "{col}" not found in the input DataFrame.')

# make a copy of input dataframe
df = df_in[sel].copy()
# # rename/reformat columns
# df.columns = ['target', 'score', 'pvalue']

df[score_col] = df[score_col].astype(float)
df[pvalue_col] = df[pvalue_col].astype(float)

# add combined score column
df['combined_score'] = getCombinedScore(df, score_col, pvalue_col, ctrl_label)
df['combined_score'] = getCombinedScore(
df,
score_col=score_col, pvalue_col=pvalue_col, target_col=target_col,
ctrl_label=ctrl_label)

# add label column
df['label'] = '.'

# annotate hits: up
df.loc[
(df[score_col] > 0) & (~df['target'].eq(ctrl_label)) &
(df[score_col] > 0) & (~df[target_col].eq(ctrl_label)) &
(df['combined_score'] >= threshold), 'label'
] = up_hit

# annotate hits: down
df.loc[
(df[score_col] < 0) & (~df['target'].eq(ctrl_label)) &
(df[score_col] < 0) & (~df[target_col].eq(ctrl_label)) &
(df['combined_score'] <= -threshold), 'label'
] = down_hit

# annotate control
df.loc[df['target'].eq(ctrl_label), 'label'] = ctrl_label
df.loc[df[target_col].eq(ctrl_label), 'label'] = ctrl_label

# annotate non-hit
df.loc[df['label'] == '.', 'label'] = 'target_non_hit'
Expand Down
8 changes: 4 additions & 4 deletions screenpro/phenoscore/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ def compareByTargetGroup(adata, df_cond_ref, df_cond_test, keep_top_n, var_names

# combine results into a dataframe
result = pd.concat([
pd.Series(scores, name='score'),
pd.Series(p_values, name=f'{test} pvalue'),
pd.Series(adj_p_values, name='BH adj_pvalue'),
pd.Series(target_sizes, name='number_of_guide_elements'),
pd.Series(scores, name='score', dtype=float),
pd.Series(p_values, name=f'{test} pvalue', dtype=float),
pd.Series(adj_p_values, name='BH adj_pvalue', dtype=float),
pd.Series(target_sizes, name='number_of_guide_elements', dtype=int),
], axis=1)

# add targets information
Expand Down
19 changes: 18 additions & 1 deletion screenpro/plotting/_rank.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import matplotlib.pyplot as plt

from adjustText import adjust_text
from ._utils import yellow_blue


Expand Down Expand Up @@ -57,8 +59,23 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_
ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor)

if highlight_values['text'] is not False:
texts = []
for i, row in highlight_ranks.iterrows():
ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=txt_font_size, color=highlight_color, ha='right')
t = ax.text(
row['Rank'] + .01,
row[rank_col] + .001,
row[name_col],
fontsize=txt_font_size,
color=highlight_color,
ha='right'
)
texts.append(t)

adjust_text(
texts,
arrowprops=dict(arrowstyle='-', color=highlight_color, lw=0.5),
ax=ax
)

# Add labels and title
ax.set_xlabel(xlabel)
Expand Down
Loading