From b36085dce3eca956ab4ea44e4f8efceb2a55b375 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 19:21:48 -0700 Subject: [PATCH 01/34] reorganize plotting features --- screenpro/__init__.py | 11 +- .../dashboard.py => dashboard/__init__.py} | 6 + screenpro/plotting/__init__.py | 9 + screenpro/{visualize => plotting}/qc_plots.py | 2 - screenpro/{visualize => plotting}/utils.py | 3 + screenpro/plotting/volcano.py | 149 +++++++++ screenpro/visualize/__init__.py | 308 ------------------ screenpro/visualize/rank.py | 70 ---- 8 files changed, 174 insertions(+), 384 deletions(-) rename screenpro/{visualize/dashboard.py => dashboard/__init__.py} (98%) create mode 100644 screenpro/plotting/__init__.py rename screenpro/{visualize => plotting}/qc_plots.py (99%) rename screenpro/{visualize => plotting}/utils.py (97%) create mode 100644 screenpro/plotting/volcano.py delete mode 100644 screenpro/visualize/__init__.py delete mode 100644 screenpro/visualize/rank.py diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 13ad8d1..f51c26b 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -18,15 +18,18 @@ - datasets: API for accessing pre-processed datasets ''' -from . import phenoscore as ps -from . import preprocessing as pp from . import ngs -from . import assays from . import load -from . import visualize as viz +from . import preprocessing as pp +from . import phenoscore as ps +from . import assays +from . import plotting as pl +from . import dashboard from .ngs import GuideCounter from .assays import PooledScreens, GImaps +from .dashboard import DrugScreenDashboard + __version__ = "0.4.5" __author__ = "Abe Arab" diff --git a/screenpro/visualize/dashboard.py b/screenpro/dashboard/__init__.py similarity index 98% rename from screenpro/visualize/dashboard.py rename to screenpro/dashboard/__init__.py index a36851a..594392d 100644 --- a/screenpro/visualize/dashboard.py +++ b/screenpro/dashboard/__init__.py @@ -1,3 +1,9 @@ +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + + import numpy as np import pandas as pd import bokeh diff --git a/screenpro/plotting/__init__.py b/screenpro/plotting/__init__.py new file mode 100644 index 0000000..ccb5576 --- /dev/null +++ b/screenpro/plotting/__init__.py @@ -0,0 +1,9 @@ +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +import numpy as np +import scanpy as sc +from .qc_plots import plotReplicateScatter +from .rank import rankPlot diff --git a/screenpro/visualize/qc_plots.py b/screenpro/plotting/qc_plots.py similarity index 99% rename from screenpro/visualize/qc_plots.py rename to screenpro/plotting/qc_plots.py index 5f00606..dab57cc 100644 --- a/screenpro/visualize/qc_plots.py +++ b/screenpro/plotting/qc_plots.py @@ -38,5 +38,3 @@ def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, l ax.get_legend().remove() ax.grid(False) - - diff --git a/screenpro/visualize/utils.py b/screenpro/plotting/utils.py similarity index 97% rename from screenpro/visualize/utils.py rename to screenpro/plotting/utils.py index 84b9c7e..34fa620 100644 --- a/screenpro/visualize/utils.py +++ b/screenpro/plotting/utils.py @@ -20,6 +20,9 @@ 'YlBu', [(0, '#0000ff'), (.49, '#000000'), (.51, '#000000'), (1, '#ffff00')]) yellow_blue.set_bad('#999999', 1) +up_hit_color = '#fcae91' +down_hit_color = '#bdd7e7' + # plt.rcParams['font.sans-serif'] = [ # 'Helvetica', 'Arial', 'Verdana', 'Bitstream Vera Sans' # ] diff --git a/screenpro/plotting/volcano.py b/screenpro/plotting/volcano.py new file mode 100644 index 0000000..3aeba09 --- /dev/null +++ b/screenpro/plotting/volcano.py @@ -0,0 +1,149 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from .utils import yellow_blue, up_hit_color, down_hit_color + + +class ScatterPlot: + def __init__( + self, df, up_hit, down_hit, ax, + score_col, pvalue_col, xlabel, ylabel, + ctrl_label='negative_control', + dot_size=1, + xlims='auto', + ylims='auto', + **args + ): + if f'-log10({pvalue_col})' not in df.columns: + df[f'-log10({pvalue_col})'] = np.log10(df[pvalue_col]) * -1 + + if xlims == 'auto': + xlims = (df[score_col].min() - 0.1, df[score_col].max() + 0.1) + if ylims == 'auto': + ylims = (df[f'-log10({pvalue_col})'].min() - 0.1, df[f'-log10({pvalue_col})'].max() + 0.1) + + # Scatter plot for each category + ax.scatter( df.loc[df['label'] == 'target_non_hit', score_col], + df.loc[df['label'] == 'target_non_hit', f'-log10({pvalue_col})'], + alpha=0.1, s=dot_size, c='black', label='target_non_hit', + **args) + + ax.scatter( df.loc[df['label'] == up_hit, score_col], + df.loc[df['label'] == up_hit, f'-log10({pvalue_col})'], + alpha=0.9, s=dot_size, c=up_hit_color, label=up_hit, + **args) + + ax.scatter( df.loc[df['label'] == down_hit, score_col], + df.loc[df['label'] == down_hit, f'-log10({pvalue_col})'], + alpha=0.9, s=dot_size, c=down_hit_color, label=down_hit, + **args) + + ax.scatter( df.loc[df['label'] == ctrl_label, score_col], + df.loc[df['label'] == ctrl_label, f'-log10({pvalue_col})'], + alpha=0.1, s=dot_size, c='gray', label=ctrl_label, + **args) + + # Set x-axis and y-axis labels + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + + # Set x-axis limits + ax.set_xlim(xlims) + + # Set y-axis limits + ax.set_ylim(ylims) + + # Add legend + ax.legend() + + return ax + + def _label_by_color(self, ax, df_in, label, + x_col, y_col, + size=2, size_txt="auto", + edgecolors='black', facecolors='black', + textcolor='black', + t_x=.5, t_y=-0.1, **args): + + df = df_in.copy() + target_data = df[df['target'] == label] + + # Scatter plot for labeled data + ax.scatter( + target_data[x_col], target_data[y_col], + s=size, linewidth=0.5, + edgecolors=edgecolors, + facecolors=facecolors, label='target', + **args + ) + + if size_txt == None: + pass + elif size_txt == 'auto': + size_txt = size * 2 + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), + color=textcolor, size=size_txt) + + def label_as_black(self, ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + self._label_by_color(ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='black', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + def label_sensitivity_hit(self, ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + self._label_by_color(ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#3182bd', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + def label_resistance_hit(self, ax, df_in, label, + x_col='score', y_col='-log10(pvalue)', + size=2, size_txt="auto", + t_x=.5, t_y=-0.1, + **args): + self._label_by_color(ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#de2d26', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) + + +class VolcanoPlot(ScatterPlot): + def __init__( + self, df, up_hit, down_hit, ax, + score_col='score', pvalue_col='pvalue', + xlabel='phenotype score', + ylabel='-log10(pvalue)', + ctrl_label='negative_control', + dot_size=1, + xlims='auto', + ylims='auto', + **args + ): + super().__init__(df, up_hit, down_hit, ax, + score_col=score_col, pvalue_col=pvalue_col, + xlabel=xlabel, ylabel=ylabel, + ctrl_label=ctrl_label, + dot_size=dot_size, + xlims=xlims, ylims=ylims, + **args) diff --git a/screenpro/visualize/__init__.py b/screenpro/visualize/__init__.py deleted file mode 100644 index 92c5a5a..0000000 --- a/screenpro/visualize/__init__.py +++ /dev/null @@ -1,308 +0,0 @@ -## Copyright (c) 2022-2024 ScreenPro2 Development Team. -## All rights reserved. -## Gilbart Lab, UCSF / Arc Institute. -## Multi-Omics Tech Center, Arc Insititue. - -import numpy as np -import scanpy as sc -from .qc_plots import plotReplicateScatter -from .rank import rankPlot - - -## Phenotype plotting functions -class DrugScreenPlotter: - def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): - self.screen = screen - self.threshold = threshold - self.ctrl_label = ctrl_label - self.run_name = run_name - self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' - self.rho_score_name = f'rho:{treated}_vs_{untreated}' - self.tau_score_name = f'tau:{treated}_vs_{t0}' - - def _label_by_color(self, ax, df_in, label, - x_col, y_col, - size=2, size_txt="auto", - edgecolors='black', facecolors='black', - textcolor='black', - t_x=.5, t_y=-0.1, **args): - - df = df_in.copy() - target_data = df[df['target'] == label] - - # Scatter plot for labeled data - ax.scatter( - target_data[x_col], target_data[y_col], - s=size, linewidth=0.5, - edgecolors=edgecolors, - facecolors=facecolors, label='target', - **args - ) - - if size_txt == None: - pass - elif size_txt == 'auto': - size_txt = size * 2 - else: - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), - color=textcolor, size=size_txt) - - def _prep_data(self, score_col='score', pvalue_col='pvalue'): - - gamma = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.gamma_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - - gamma[f'-log10({pvalue_col})'] = np.log10(gamma[pvalue_col]) * -1 - - tau = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.tau_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - tau[f'-log10({pvalue_col})'] = np.log10(tau[pvalue_col]) * -1 - - rho = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.rho_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - rho[f'-log10({pvalue_col})'] = np.log10(rho[pvalue_col]) * -1 - - return gamma, tau, rho - - def _volcano( - self, ax, df, up_hit, down_hit, - score_col='score', pvalue_col='pvalue', - xlabel='phenotype score', - ylabel='-log10(pvalue)', - dot_size=1, - xlims='auto', - ylims='auto', - **args - ): - - if f'-log10({pvalue_col})' not in df.columns: - df[f'-log10({pvalue_col})'] = np.log10(df[pvalue_col]) * -1 - - if xlims == 'auto': - xlims = (df[score_col].min() - 0.1, df[score_col].max() + 0.1) - if ylims == 'auto': - ylims = (df[f'-log10({pvalue_col})'].min() - 0.1, df[f'-log10({pvalue_col})'].max() + 0.1) - - # Scatter plot for each category - ax.scatter( df.loc[df['label'] == 'target_non_hit', score_col], - df.loc[df['label'] == 'target_non_hit', f'-log10({pvalue_col})'], - alpha=0.1, s=dot_size, c='black', label='target_non_hit', - **args) - - ax.scatter( df.loc[df['label'] == up_hit, score_col], - df.loc[df['label'] == up_hit, f'-log10({pvalue_col})'], - alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, - **args) - - ax.scatter( df.loc[df['label'] == down_hit, score_col], - df.loc[df['label'] == down_hit, f'-log10({pvalue_col})'], - alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, - **args) - - ax.scatter( df.loc[df['label'] == self.ctrl_label, score_col], - df.loc[df['label'] == self.ctrl_label, f'-log10({pvalue_col})'], - alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, - **args) - - # Set x-axis and y-axis labels - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - - # Set x-axis limits - ax.set_xlim(xlims) - - # Set y-axis limits - ax.set_ylim(ylims) - - # Add legend - ax.legend() - - def drawVolcanoRho( - self, ax, - rho_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if rho_df is None: - _, _, rho_df = self._prep_data() - if xlabel == 'auto': - xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - - self._volcano(ax, rho_df, - up_hit='resistance_hit', down_hit='sensitivity_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - **args) - - def drawVolcanoGamma( - self, ax, - gamma_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if gamma_df is None: - gamma_df, _, _ = self._prep_data() - if xlabel == 'auto': - xlabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - - self._volcano(ax, gamma_df, - up_hit='up_hit', down_hit='essential_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - **args) - - def drawVolcanoTau( - self, ax, - tau_df=None, - dot_size=1, - score_col='score', - pvalue_col='pvalue', - xlabel='auto', - ylabel='-log10(pvalue)', - xlims='auto', - ylims='auto', - **args - ): - if tau_df is None: - _, tau_df, _, = self._prep_data() - if xlabel == 'auto': - xlabel = self.tau_score_name.replace(':', ': ').replace('_', ' ') - - self._volcano(ax, tau_df, - up_hit='up_hit', down_hit='down_hit', - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - dot_size=dot_size, xlims=xlims, ylims=ylims, - **args) - - def drawRhoGammaScatter( - self, ax, - rho_df=None, gamma_df=None, - dot_size=1, - score_col='score', - xlabel='auto', - ylabel='auto', - xlims='auto', - ylims='auto', - **args - ): - #TODO: fix by making a single dataframe with both rho and gamma scores - if rho_df is None: - _, _, rho_df = self._prep_data() - if gamma_df is None: - gamma_df, _, _ = self._prep_data() - - if xlabel == 'auto': - xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - if ylabel == 'auto': - ylabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - - # color by rho score labels - up_hit = 'resistance_hit' - down_hit = 'sensitivity_hit' - - # Scatter plot for each category - ax.scatter( rho_df.loc[rho_df['label'] == 'target_non_hit', score_col], - gamma_df.loc[rho_df['label'] == 'target_non_hit', score_col], - alpha=0.1, s=dot_size, c='black', label='target_non_hit', - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == up_hit, score_col], - gamma_df.loc[rho_df['label'] == up_hit, score_col], - alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == down_hit, score_col], - gamma_df.loc[rho_df['label'] == down_hit, score_col], - alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, - **args) - - ax.scatter( rho_df.loc[rho_df['label'] == self.ctrl_label, score_col], - gamma_df.loc[rho_df['label'] == self.ctrl_label, score_col], - alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, - **args) - - # Set x-axis and y-axis labels - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - - # Set x-axis limits - ax.set_xlim(xlims) - ax.set_ylim(ylims) - - # Add legend - ax.legend() - - def label_as_black(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='black', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) - - def label_sensitivity_hit(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#3182bd', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) - - def label_resistance_hit(self, ax, df_in, label, - x_col='score', y_col='-log10(pvalue)', - size=2, size_txt="auto", - t_x=.5, t_y=-0.1, - **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#de2d26', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) diff --git a/screenpro/visualize/rank.py b/screenpro/visualize/rank.py deleted file mode 100644 index b3707c7..0000000 --- a/screenpro/visualize/rank.py +++ /dev/null @@ -1,70 +0,0 @@ -import pandas as pd -import matplotlib.pyplot as plt -from .utils import yellow_blue - - -def rankPlot(df, rank_col, color_col=None, name_col='target', highlight_values_dict=None, xlabel='Rank', ylabel='Values', title='Rank Plot', ax=None, dot_size=1.5, highlight_size_factor=100, **args): - """ - Plot the ranks against their values with specified color. - - Args: - df (DataFrame): The input DataFrame. - rank_col (str): The column name containing the values to be ranked. - color_col (str): The column name containing the values to be used for color coding. Default is None. - name_col (str, optional): The column name containing the names of the values. Default is 'target'. - highlight_values_dict (dict, optional): A dictionary specifying the values to be highlighted. - The keys are the highlight colors and the values are dictionaries with 'genes' and 'text' keys. - 'genes' is a list of values to be highlighted and 'text' is a boolean indicating whether to display - the names of the highlighted values. Default is None. - xlabel (str, optional): The label for the x-axis. Default is 'Rank'. - ylabel (str, optional): The label for the y-axis. Default is 'Values'. - title (str, optional): The title of the plot. Default is 'Rank Plot'. - ax (matplotlib.axes.Axes, optional): The axis object to plot on. If not provided, a new axis will be created. - dot_size (float, optional): The size of the dots in the scatter plot. Default is 1.5. - highlight_size_factor (int, optional): The size factor for the highlighted dots. Default is 100. - **args: Additional keyword arguments to be passed to the scatter plot. - - Returns: - matplotlib.axes.Axes: The axis object containing the plot. - """ - # Create a new DataFrame with the values and their corresponding ranks - rank_df = df.copy() - rank_df['Rank'] = rank_df[rank_col].rank() - rank_df.sort_values('Rank', inplace=True) - - # Use a color that is suitable for publications - if color_col is None: - color_col = 'darkgray' - - # If no axis is provided, create one - if ax is None: - _, ax = plt.subplots() - - # Plot the ranks against their values with specified color - rank_df.plot.scatter( - 'Rank', rank_col, marker='o', - colormap=yellow_blue, - s=dot_size, - c=color_col, ax=ax, - colorbar=False, - **args - ) - - if highlight_values_dict is not None: - for highlight_color, highlight_values in highlight_values_dict.items(): - highlight_ranks = rank_df[rank_df[name_col].isin(highlight_values['genes'])] - ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor) - - if highlight_values['text'] is not False: - for i, row in highlight_ranks.iterrows(): - ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=8, color=highlight_color, ha='right') - - # Add labels and title - ax.set_xlabel(xlabel) - ax.set_ylabel(ylabel) - ax.set_title(title) - - # Customize the grid lines for a clean look - ax.grid(False) - - return ax From 7f41f2ead1d55c1a60a4e7b1311be97cbc867a98 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 19:39:41 -0700 Subject: [PATCH 02/34] reorganize plotting features --- screenpro/plotting/__init__.py | 4 ++-- screenpro/plotting/{volcano.py => pheno_plots.py} | 7 ++++--- screenpro/plotting/utils.py | 2 -- 3 files changed, 6 insertions(+), 7 deletions(-) rename screenpro/plotting/{volcano.py => pheno_plots.py} (96%) diff --git a/screenpro/plotting/__init__.py b/screenpro/plotting/__init__.py index ccb5576..d3dcd1e 100644 --- a/screenpro/plotting/__init__.py +++ b/screenpro/plotting/__init__.py @@ -5,5 +5,5 @@ import numpy as np import scanpy as sc -from .qc_plots import plotReplicateScatter -from .rank import rankPlot +from .qc_plots import * +from .pheno_plots import * diff --git a/screenpro/plotting/volcano.py b/screenpro/plotting/pheno_plots.py similarity index 96% rename from screenpro/plotting/volcano.py rename to screenpro/plotting/pheno_plots.py index 3aeba09..33b191c 100644 --- a/screenpro/plotting/volcano.py +++ b/screenpro/plotting/pheno_plots.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt -from .utils import yellow_blue, up_hit_color, down_hit_color +from .utils import yellow_blue class ScatterPlot: @@ -30,12 +30,12 @@ def __init__( ax.scatter( df.loc[df['label'] == up_hit, score_col], df.loc[df['label'] == up_hit, f'-log10({pvalue_col})'], - alpha=0.9, s=dot_size, c=up_hit_color, label=up_hit, + alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, **args) ax.scatter( df.loc[df['label'] == down_hit, score_col], df.loc[df['label'] == down_hit, f'-log10({pvalue_col})'], - alpha=0.9, s=dot_size, c=down_hit_color, label=down_hit, + alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, **args) ax.scatter( df.loc[df['label'] == ctrl_label, score_col], @@ -147,3 +147,4 @@ def __init__( dot_size=dot_size, xlims=xlims, ylims=ylims, **args) + diff --git a/screenpro/plotting/utils.py b/screenpro/plotting/utils.py index 34fa620..316f460 100644 --- a/screenpro/plotting/utils.py +++ b/screenpro/plotting/utils.py @@ -20,8 +20,6 @@ 'YlBu', [(0, '#0000ff'), (.49, '#000000'), (.51, '#000000'), (1, '#ffff00')]) yellow_blue.set_bad('#999999', 1) -up_hit_color = '#fcae91' -down_hit_color = '#bdd7e7' # plt.rcParams['font.sans-serif'] = [ # 'Helvetica', 'Arial', 'Verdana', 'Bitstream Vera Sans' From c2447bf0737f6fb2c7805d4e649ef2242ce739ed Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 19:46:24 -0700 Subject: [PATCH 03/34] reorganize plotting features --- screenpro/plotting/_rank.py | 70 ++++++++++++++++++++++ screenpro/plotting/{utils.py => _utils.py} | 0 2 files changed, 70 insertions(+) create mode 100644 screenpro/plotting/_rank.py rename screenpro/plotting/{utils.py => _utils.py} (100%) diff --git a/screenpro/plotting/_rank.py b/screenpro/plotting/_rank.py new file mode 100644 index 0000000..7eb04b9 --- /dev/null +++ b/screenpro/plotting/_rank.py @@ -0,0 +1,70 @@ +import pandas as pd +import matplotlib.pyplot as plt +from ._utils import yellow_blue + + +def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_dict=None, xlabel='Rank', ylabel='Values', title='Rank Plot', ax=None, dot_size=1.5, highlight_size_factor=100, **args): + """ + Plot the ranks against their values with specified color. + + Args: + df (DataFrame): The input DataFrame. + rank_col (str): The column name containing the values to be ranked. + color_col (str): The column name containing the values to be used for color coding. Default is None. + name_col (str, optional): The column name containing the names of the values. Default is 'target'. + highlight_values_dict (dict, optional): A dictionary specifying the values to be highlighted. + The keys are the highlight colors and the values are dictionaries with 'genes' and 'text' keys. + 'genes' is a list of values to be highlighted and 'text' is a boolean indicating whether to display + the names of the highlighted values. Default is None. + xlabel (str, optional): The label for the x-axis. Default is 'Rank'. + ylabel (str, optional): The label for the y-axis. Default is 'Values'. + title (str, optional): The title of the plot. Default is 'Rank Plot'. + ax (matplotlib.axes.Axes, optional): The axis object to plot on. If not provided, a new axis will be created. + dot_size (float, optional): The size of the dots in the scatter plot. Default is 1.5. + highlight_size_factor (int, optional): The size factor for the highlighted dots. Default is 100. + **args: Additional keyword arguments to be passed to the scatter plot. + + Returns: + matplotlib.axes.Axes: The axis object containing the plot. + """ + # Create a new DataFrame with the values and their corresponding ranks + rank_df = df.copy() + rank_df['Rank'] = rank_df[rank_col].rank() + rank_df.sort_values('Rank', inplace=True) + + # Use a color that is suitable for publications + if color_col is None: + color_col = 'darkgray' + + # If no axis is provided, create one + if ax is None: + _, ax = plt.subplots() + + # Plot the ranks against their values with specified color + rank_df.plot.scatter( + 'Rank', rank_col, marker='o', + colormap=yellow_blue, + s=dot_size, + c=color_col, ax=ax, + colorbar=False, + **args + ) + + if highlight_values_dict is not None: + for highlight_color, highlight_values in highlight_values_dict.items(): + highlight_ranks = rank_df[rank_df[name_col].isin(highlight_values['genes'])] + ax.plot(highlight_ranks['Rank'], highlight_ranks[rank_col], 'o', color=highlight_color, markersize=dot_size * highlight_size_factor) + + if highlight_values['text'] is not False: + for i, row in highlight_ranks.iterrows(): + ax.text(row['Rank'] + .01, row[rank_col] + .001, row[name_col], fontsize=8, color=highlight_color, ha='right') + + # Add labels and title + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + ax.set_title(title) + + # Customize the grid lines for a clean look + ax.grid(False) + + return ax \ No newline at end of file diff --git a/screenpro/plotting/utils.py b/screenpro/plotting/_utils.py similarity index 100% rename from screenpro/plotting/utils.py rename to screenpro/plotting/_utils.py From 5cb38ddec80fce5b90173a647ed8e3007ba07994 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 19:54:39 -0700 Subject: [PATCH 04/34] reorganize plotting features --- screenpro/plotting/pheno_plots.py | 272 +++++++++++++++++++++++------- 1 file changed, 213 insertions(+), 59 deletions(-) diff --git a/screenpro/plotting/pheno_plots.py b/screenpro/plotting/pheno_plots.py index 33b191c..4dcaff7 100644 --- a/screenpro/plotting/pheno_plots.py +++ b/screenpro/plotting/pheno_plots.py @@ -1,19 +1,97 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt -from .utils import yellow_blue +from ._utils import yellow_blue +from ._rank import rank_plot -class ScatterPlot: - def __init__( - self, df, up_hit, down_hit, ax, - score_col, pvalue_col, xlabel, ylabel, - ctrl_label='negative_control', +## Phenotype plotting functions +class DrugScreenPlotter: + def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): + self.screen = screen + self.threshold = threshold + self.ctrl_label = ctrl_label + self.run_name = run_name + self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' + self.rho_score_name = f'rho:{treated}_vs_{untreated}' + self.tau_score_name = f'tau:{treated}_vs_{t0}' + + def _label_by_color(self, ax, df_in, label, + x_col, y_col, + size=2, size_txt="auto", + edgecolors='black', facecolors='black', + textcolor='black', + t_x=.5, t_y=-0.1, **args): + + df = df_in.copy() + target_data = df[df['target'] == label] + + # Scatter plot for labeled data + ax.scatter( + target_data[x_col], target_data[y_col], + s=size, linewidth=0.5, + edgecolors=edgecolors, + facecolors=facecolors, label='target', + **args + ) + + if size_txt == None: + pass + elif size_txt == 'auto': + size_txt = size * 2 + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), + color=textcolor, size=size_txt) + + def _prep_data(self, score_col='score', pvalue_col='pvalue'): + + gamma = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.gamma_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + + gamma[f'-log10({pvalue_col})'] = np.log10(gamma[pvalue_col]) * -1 + + tau = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.tau_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + tau[f'-log10({pvalue_col})'] = np.log10(tau[pvalue_col]) * -1 + + rho = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.rho_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + rho[f'-log10({pvalue_col})'] = np.log10(rho[pvalue_col]) * -1 + + return gamma, tau, rho + + def _volcano( + self, ax, df, up_hit, down_hit, + score_col='score', pvalue_col='pvalue', + xlabel='phenotype score', + ylabel='-log10(pvalue)', dot_size=1, xlims='auto', ylims='auto', **args ): + if f'-log10({pvalue_col})' not in df.columns: df[f'-log10({pvalue_col})'] = np.log10(df[pvalue_col]) * -1 @@ -38,9 +116,9 @@ def __init__( alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, **args) - ax.scatter( df.loc[df['label'] == ctrl_label, score_col], - df.loc[df['label'] == ctrl_label, f'-log10({pvalue_col})'], - alpha=0.1, s=dot_size, c='gray', label=ctrl_label, + ax.scatter( df.loc[df['label'] == self.ctrl_label, score_col], + df.loc[df['label'] == self.ctrl_label, f'-log10({pvalue_col})'], + alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, **args) # Set x-axis and y-axis labels @@ -56,38 +134,136 @@ def __init__( # Add legend ax.legend() - return ax - - def _label_by_color(self, ax, df_in, label, - x_col, y_col, - size=2, size_txt="auto", - edgecolors='black', facecolors='black', - textcolor='black', - t_x=.5, t_y=-0.1, **args): + def drawVolcanoRho( + self, ax, + rho_df=None, + dot_size=1, + score_col='score', + pvalue_col='pvalue', + xlabel='auto', + ylabel='-log10(pvalue)', + xlims='auto', + ylims='auto', + **args + ): + if rho_df is None: + _, _, rho_df = self._prep_data() + if xlabel == 'auto': + xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - df = df_in.copy() - target_data = df[df['target'] == label] + self._volcano(ax, rho_df, + up_hit='resistance_hit', down_hit='sensitivity_hit', + score_col=score_col, pvalue_col=pvalue_col, + xlabel=xlabel, ylabel=ylabel, + dot_size=dot_size, xlims=xlims, ylims=ylims, + **args) + + def drawVolcanoGamma( + self, ax, + gamma_df=None, + dot_size=1, + score_col='score', + pvalue_col='pvalue', + xlabel='auto', + ylabel='-log10(pvalue)', + xlims='auto', + ylims='auto', + **args + ): + if gamma_df is None: + gamma_df, _, _ = self._prep_data() + if xlabel == 'auto': + xlabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') + + self._volcano(ax, gamma_df, + up_hit='up_hit', down_hit='essential_hit', + score_col=score_col, pvalue_col=pvalue_col, + xlabel=xlabel, ylabel=ylabel, + dot_size=dot_size, xlims=xlims, ylims=ylims, + **args) + + def drawVolcanoTau( + self, ax, + tau_df=None, + dot_size=1, + score_col='score', + pvalue_col='pvalue', + xlabel='auto', + ylabel='-log10(pvalue)', + xlims='auto', + ylims='auto', + **args + ): + if tau_df is None: + _, tau_df, _, = self._prep_data() + if xlabel == 'auto': + xlabel = self.tau_score_name.replace(':', ': ').replace('_', ' ') + + self._volcano(ax, tau_df, + up_hit='up_hit', down_hit='down_hit', + score_col=score_col, pvalue_col=pvalue_col, + xlabel=xlabel, ylabel=ylabel, + dot_size=dot_size, xlims=xlims, ylims=ylims, + **args) - # Scatter plot for labeled data - ax.scatter( - target_data[x_col], target_data[y_col], - s=size, linewidth=0.5, - edgecolors=edgecolors, - facecolors=facecolors, label='target', + def drawRhoGammaScatter( + self, ax, + rho_df=None, gamma_df=None, + dot_size=1, + score_col='score', + xlabel='auto', + ylabel='auto', + xlims='auto', + ylims='auto', **args - ) + ): + #TODO: fix by making a single dataframe with both rho and gamma scores + if rho_df is None: + _, _, rho_df = self._prep_data() + if gamma_df is None: + gamma_df, _, _ = self._prep_data() - if size_txt == None: - pass - elif size_txt == 'auto': - size_txt = size * 2 - else: - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), - color=textcolor, size=size_txt) + if xlabel == 'auto': + xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') + if ylabel == 'auto': + ylabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') + + # color by rho score labels + up_hit = 'resistance_hit' + down_hit = 'sensitivity_hit' + + # Scatter plot for each category + ax.scatter( rho_df.loc[rho_df['label'] == 'target_non_hit', score_col], + gamma_df.loc[rho_df['label'] == 'target_non_hit', score_col], + alpha=0.1, s=dot_size, c='black', label='target_non_hit', + **args) + + ax.scatter( rho_df.loc[rho_df['label'] == up_hit, score_col], + gamma_df.loc[rho_df['label'] == up_hit, score_col], + alpha=0.9, s=dot_size, c='#fcae91', label=up_hit, + **args) + + ax.scatter( rho_df.loc[rho_df['label'] == down_hit, score_col], + gamma_df.loc[rho_df['label'] == down_hit, score_col], + alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, + **args) + + ax.scatter( rho_df.loc[rho_df['label'] == self.ctrl_label, score_col], + gamma_df.loc[rho_df['label'] == self.ctrl_label, score_col], + alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, + **args) + + # Set x-axis and y-axis labels + ax.set_xlabel(xlabel) + ax.set_ylabel(ylabel) + # Set x-axis limits + ax.set_xlim(xlims) + ax.set_ylim(ylims) + + # Add legend + ax.legend() + def label_as_black(self, ax, df_in, label, x_col='score', y_col='-log10(pvalue)', size=2, size_txt="auto", @@ -125,26 +301,4 @@ def label_resistance_hit(self, ax, df_in, label, edgecolors='black', facecolors='#de2d26', textcolor='black', t_x=t_x, t_y=t_y, - **args) - - -class VolcanoPlot(ScatterPlot): - def __init__( - self, df, up_hit, down_hit, ax, - score_col='score', pvalue_col='pvalue', - xlabel='phenotype score', - ylabel='-log10(pvalue)', - ctrl_label='negative_control', - dot_size=1, - xlims='auto', - ylims='auto', - **args - ): - super().__init__(df, up_hit, down_hit, ax, - score_col=score_col, pvalue_col=pvalue_col, - xlabel=xlabel, ylabel=ylabel, - ctrl_label=ctrl_label, - dot_size=dot_size, - xlims=xlims, ylims=ylims, - **args) - + **args) \ No newline at end of file From aa354ebf67c83ee9c3580dca858a32ab71ee0b83 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 21:37:24 -0700 Subject: [PATCH 05/34] reorganize plotting features --- screenpro/plotting/pheno_plots.py | 218 ++++++++++++++++-------------- screenpro/plotting/qc_plots.py | 7 +- 2 files changed, 119 insertions(+), 106 deletions(-) diff --git a/screenpro/plotting/pheno_plots.py b/screenpro/plotting/pheno_plots.py index 4dcaff7..585bad6 100644 --- a/screenpro/plotting/pheno_plots.py +++ b/screenpro/plotting/pheno_plots.py @@ -5,90 +5,15 @@ from ._rank import rank_plot -## Phenotype plotting functions -class DrugScreenPlotter: - def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): - self.screen = screen - self.threshold = threshold - self.ctrl_label = ctrl_label - self.run_name = run_name - self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' - self.rho_score_name = f'rho:{treated}_vs_{untreated}' - self.tau_score_name = f'tau:{treated}_vs_{t0}' - - def _label_by_color(self, ax, df_in, label, - x_col, y_col, - size=2, size_txt="auto", - edgecolors='black', facecolors='black', - textcolor='black', - t_x=.5, t_y=-0.1, **args): - - df = df_in.copy() - target_data = df[df['target'] == label] - - # Scatter plot for labeled data - ax.scatter( - target_data[x_col], target_data[y_col], - s=size, linewidth=0.5, - edgecolors=edgecolors, - facecolors=facecolors, label='target', - **args - ) - - if size_txt == None: - pass - elif size_txt == 'auto': - size_txt = size * 2 - else: - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), - color=textcolor, size=size_txt) - - def _prep_data(self, score_col='score', pvalue_col='pvalue'): - - gamma = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.gamma_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - - gamma[f'-log10({pvalue_col})'] = np.log10(gamma[pvalue_col]) * -1 - - tau = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.tau_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - tau[f'-log10({pvalue_col})'] = np.log10(tau[pvalue_col]) * -1 - - rho = self.screen.getPhenotypeScores( - run_name=self.run_name, - score_name=self.rho_score_name, - threshold=self.threshold, - ctrl_label=self.ctrl_label, - score_col=score_col, - pvalue_col=pvalue_col - ) - rho[f'-log10({pvalue_col})'] = np.log10(rho[pvalue_col]) * -1 - - return gamma, tau, rho - - def _volcano( - self, ax, df, up_hit, down_hit, +def volcano_plot( + ax, df, up_hit, down_hit, score_col='score', pvalue_col='pvalue', xlabel='phenotype score', ylabel='-log10(pvalue)', dot_size=1, xlims='auto', ylims='auto', + ctrl_label='negative_control', **args ): @@ -116,9 +41,9 @@ def _volcano( alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit, **args) - ax.scatter( df.loc[df['label'] == self.ctrl_label, score_col], - df.loc[df['label'] == self.ctrl_label, f'-log10({pvalue_col})'], - alpha=0.1, s=dot_size, c='gray', label=self.ctrl_label, + ax.scatter( df.loc[df['label'] == ctrl_label, score_col], + df.loc[df['label'] == ctrl_label, f'-log10({pvalue_col})'], + alpha=0.1, s=dot_size, c='gray', label=ctrl_label, **args) # Set x-axis and y-axis labels @@ -134,6 +59,83 @@ def _volcano( # Add legend ax.legend() + +def label_by_color(ax, df_in, label, + x_col, y_col, + size=2, size_txt="auto", + edgecolors='black', facecolors='black', + textcolor='black', + t_x=.5, t_y=-0.1, **args): + + df = df_in.copy() + target_data = df[df['target'] == label] + + # Scatter plot for labeled data + ax.scatter( + target_data[x_col], target_data[y_col], + s=size, linewidth=0.5, + edgecolors=edgecolors, + facecolors=facecolors, label='target', + **args + ) + + if size_txt == None: + pass + elif size_txt == 'auto': + size_txt = size * 2 + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data[x_col].iloc[i] + t_x, target_data[y_col].iloc[i] + t_y), + color=textcolor, size=size_txt) + + +class DrugScreenPlotter: + def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): + self.screen = screen + self.threshold = threshold + self.ctrl_label = ctrl_label + self.run_name = run_name + self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' + self.rho_score_name = f'rho:{treated}_vs_{untreated}' + self.tau_score_name = f'tau:{treated}_vs_{t0}' + + def _prep_data(self, score_col='score', pvalue_col='pvalue'): + + gamma = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.gamma_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + + gamma[f'-log10({pvalue_col})'] = np.log10(gamma[pvalue_col]) * -1 + + tau = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.tau_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + tau[f'-log10({pvalue_col})'] = np.log10(tau[pvalue_col]) * -1 + + rho = self.screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.rho_score_name, + threshold=self.threshold, + ctrl_label=self.ctrl_label, + score_col=score_col, + pvalue_col=pvalue_col + ) + rho[f'-log10({pvalue_col})'] = np.log10(rho[pvalue_col]) * -1 + + return gamma, tau, rho + def drawVolcanoRho( self, ax, rho_df=None, @@ -151,11 +153,12 @@ def drawVolcanoRho( if xlabel == 'auto': xlabel = self.rho_score_name.replace(':', ': ').replace('_', ' ') - self._volcano(ax, rho_df, + volcano_plot(ax, rho_df, up_hit='resistance_hit', down_hit='sensitivity_hit', score_col=score_col, pvalue_col=pvalue_col, xlabel=xlabel, ylabel=ylabel, dot_size=dot_size, xlims=xlims, ylims=ylims, + ctrl_label=self.ctrl_label, **args) def drawVolcanoGamma( @@ -175,11 +178,12 @@ def drawVolcanoGamma( if xlabel == 'auto': xlabel = self.gamma_score_name.replace(':', ': ').replace('_', ' ') - self._volcano(ax, gamma_df, + volcano_plot(ax, gamma_df, up_hit='up_hit', down_hit='essential_hit', score_col=score_col, pvalue_col=pvalue_col, xlabel=xlabel, ylabel=ylabel, dot_size=dot_size, xlims=xlims, ylims=ylims, + ctrl_label=self.ctrl_label, **args) def drawVolcanoTau( @@ -199,11 +203,12 @@ def drawVolcanoTau( if xlabel == 'auto': xlabel = self.tau_score_name.replace(':', ': ').replace('_', ' ') - self._volcano(ax, tau_df, + volcano_plot(ax, tau_df, up_hit='up_hit', down_hit='down_hit', score_col=score_col, pvalue_col=pvalue_col, xlabel=xlabel, ylabel=ylabel, dot_size=dot_size, xlims=xlims, ylims=ylims, + ctrl_label=self.ctrl_label, **args) def drawRhoGammaScatter( @@ -269,36 +274,39 @@ def label_as_black(self, ax, df_in, label, size=2, size_txt="auto", t_x=.5, t_y=-0.1, **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='black', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='black', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) def label_sensitivity_hit(self, ax, df_in, label, x_col='score', y_col='-log10(pvalue)', size=2, size_txt="auto", t_x=.5, t_y=-0.1, **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#3182bd', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#3182bd', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) def label_resistance_hit(self, ax, df_in, label, x_col='score', y_col='-log10(pvalue)', size=2, size_txt="auto", t_x=.5, t_y=-0.1, **args): - self._label_by_color(ax, df_in, label, - x_col=x_col, y_col=y_col, - size=size, size_txt=size_txt, - edgecolors='black', facecolors='#de2d26', - textcolor='black', - t_x=t_x, t_y=t_y, - **args) \ No newline at end of file + label_by_color( + ax, df_in, label, + x_col=x_col, y_col=y_col, + size=size, size_txt=size_txt, + edgecolors='black', facecolors='#de2d26', + textcolor='black', + t_x=t_x, t_y=t_y, + **args) diff --git a/screenpro/plotting/qc_plots.py b/screenpro/plotting/qc_plots.py index dab57cc..594c964 100644 --- a/screenpro/plotting/qc_plots.py +++ b/screenpro/plotting/qc_plots.py @@ -1,6 +1,11 @@ import numpy as np import scanpy as sc -from .utils import almost_black +from ._utils import almost_black + + +## Histogram of guide counts distribution +def plotCountDistribution(ax, adat, title, **args): + pass ## Scatter plot of replicates From e22f3ab5455c98b934f6e06e847af5bb88d78a18 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 21:42:23 -0700 Subject: [PATCH 06/34] fix imports --- screenpro/plotting/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/screenpro/plotting/__init__.py b/screenpro/plotting/__init__.py index d3dcd1e..b8cb64c 100644 --- a/screenpro/plotting/__init__.py +++ b/screenpro/plotting/__init__.py @@ -5,5 +5,6 @@ import numpy as np import scanpy as sc -from .qc_plots import * -from .pheno_plots import * +from .qc_plots import plotReplicateScatter, plotCountDistribution +from .pheno_plots import volcano_plot, label_by_color +from .pheno_plots import DrugScreenPlotter \ No newline at end of file From 1c3869859a0838f49865e313e62c470ab957c454 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 21:55:41 -0700 Subject: [PATCH 07/34] return rank_df --- screenpro/plotting/_rank.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/plotting/_rank.py b/screenpro/plotting/_rank.py index 7eb04b9..b836c4c 100644 --- a/screenpro/plotting/_rank.py +++ b/screenpro/plotting/_rank.py @@ -67,4 +67,4 @@ def rank_plot(df, rank_col, color_col=None, name_col='target', highlight_values_ # Customize the grid lines for a clean look ax.grid(False) - return ax \ No newline at end of file + return rank_df, ax \ No newline at end of file From 4b3e01cb6b6368b0714dbbe141881bce2b8ace60 Mon Sep 17 00:00:00 2001 From: abearab Date: Tue, 16 Jul 2024 22:27:21 -0700 Subject: [PATCH 08/34] update docs --- docs/source/phenotype.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md index 29a3c96..9d54b76 100644 --- a/docs/source/phenotype.md +++ b/docs/source/phenotype.md @@ -1,22 +1,26 @@ # Phenotype calculation modules +## Phenotype score calculation + Log ratio of {math}`y` vs {math}`x`: ```{math} \Delta = \log(\frac - {\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)} + 1} - {\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)} + 1} + {\begin{bmatrix}{N_{y}}\end{bmatrix}_{(a,b)}} + {\begin{bmatrix}{N_{x}}\end{bmatrix}_{(a,b)}} ) ``` - {math}`y \rightarrow` condition {math}`x` (e.g. treated samples) -- {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples) +- {math}`x \rightarrow` condition {math}`y` (e.g. {math}`t_{0}` samples, or untreated samples) - {math}`a \rightarrow` number of library elements with sgRNAs targeting {math}`T` - {math}`b \rightarrow` number of biological replicates, {math}`R` (e.g. 2 or 3) - {math}`N_{x}` \| {math}`N_{y} \rightarrow` read counts normalized for sequencing depth in condition {math}`x` or {math}`y` +___ + Here is a formula for V3 library with single library element per gene (i.e. dual sgRNAs in one construct targeting same gene). @@ -40,6 +44,8 @@ Phenotype score for each {math}`T` comparing {math}`y` vs {math}`x`: - {math}`d_{growth} \rightarrow` growth factor to normalize the phenotype score. +## Phenotype statistics calculation + Statistical test comparing {math}`y` vs {math}`x` per each target, {math}`T`: ```{math} @@ -64,4 +70,21 @@ ___ .. automodule:: screenpro.phenoscore :members: :show-inheritance: + +.. automodule:: screenpro.phenotype.phenostat + :members: + :show-inheritance: + +.. automodule:: screenpro.phenotype.delta + :members: + :show-inheritance: + +.. automodule:: screenpro.phenotype.deseq + :members: + :show-inheritance: + +.. automodule:: screenpro.phenotype.annotate + :members: + :show-inheritance: + ``` From bd63f542f94732df3936ca9b1d79fc19ae69f276 Mon Sep 17 00:00:00 2001 From: abearab Date: Wed, 17 Jul 2024 01:23:36 -0700 Subject: [PATCH 09/34] mend --- docs/source/conf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index da6f885..1648f75 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -36,15 +36,15 @@ "seaborn", "scanpy", "anndata", - "screenpro", + # "screenpro", "pyarrow", "biobear", "click", "polars", "biobear", "numba", - "bokeh", - "pydeseq2", + # "bokeh", + # "pydeseq2", "watermark" ] From b791b098f918211b851dff0fd126d92d86e83abf Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 01:22:16 -0700 Subject: [PATCH 10/34] optional argument allow skipping doubling rate `calculateDrugScreen` --- screenpro/assays.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index d05ea03..efe0b6b 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -16,6 +16,8 @@ from .phenoscore import runPhenoScore, runPhenoScoreForReplicate from .preprocessing import addPseudoCount, findLowCounts, normalizeSeqDepth from .phenoscore.annotate import annotateScoreTable, hit_dict + +import warnings from copy import copy @@ -158,12 +160,20 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' run_name (str): name for the phenotype calculation run **kwargs: additional arguments to pass to runPhenoScore """ - growth_factor_table = self._calculateGrowthFactor( - untreated = untreated, treated = treated, db_rate_col = db_rate_col - ) + if 'pop_doublings' not in self.adata.obs.columns or db_rate_col == None: + warnings.warn('No doubling rate information provided.') + db_untreated = 1 + db_treated = 1 + db_treated_vs_untreated = 1 + + else: + growth_factor_table = self._calculateGrowthFactor( + untreated = untreated, treated = treated, db_rate_col = db_rate_col + ) - db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean() - db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean() + db_untreated=growth_factor_table.query(f'score=="gamma"')['growth_factor'].mean() + db_treated=growth_factor_table.query(f'score=="tau"')['growth_factor'].mean() + db_treated_vs_untreated = np.abs(db_untreated - db_treated) # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( @@ -180,7 +190,7 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' ) # TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. rho_name, rho = runPhenoScore( - self.adata, cond1=untreated, cond2=treated, growth_rate=np.abs(db_untreated - db_treated), + self.adata, cond1=untreated, cond2=treated, growth_rate=db_treated_vs_untreated, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs From 6481196ca2857a7d5fa94cc584bb37777b149d35 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 01:53:42 -0700 Subject: [PATCH 11/34] improve argument --- screenpro/assays.py | 8 ++--- screenpro/phenoscore/__init__.py | 55 ++++++++++++++++++-------------- 2 files changed, 35 insertions(+), 28 deletions(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index efe0b6b..5e86a60 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -177,20 +177,20 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' # calculate phenotype scores: gamma, tau, rho gamma_name, gamma = runPhenoScore( - self.adata, cond1=t0, cond2=untreated, growth_rate=db_untreated, + self.adata, cond_ref=t0, cond_test=untreated, growth_rate=db_untreated, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) tau_name, tau = runPhenoScore( - self.adata, cond1=t0, cond2=treated, growth_rate=db_treated, + self.adata, cond_ref=t0, cond_test=treated, growth_rate=db_treated, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) # TO-DO: warning / error if db_untreated and db_treated are too close, i.e. growth_rate ~= 0. rho_name, rho = runPhenoScore( - self.adata, cond1=untreated, cond2=treated, growth_rate=db_treated_vs_untreated, + self.adata, cond_ref=untreated, cond_test=treated, growth_rate=db_treated_vs_untreated, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs @@ -242,7 +242,7 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None """ # calculate phenotype scores delta_name, delta = runPhenoScore( - self.adata, cond1=low_bin, cond2=high_bin, n_reps=self.n_reps, + self.adata, cond_ref=low_bin, cond_test=high_bin, n_reps=self.n_reps, transformation=self.fc_transformation, test=self.test, score_level=score_level, **kwargs ) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 581d3e8..780a899 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -19,7 +19,7 @@ from .phenostat import multipleTestsCorrection -def generatePseudoGeneAnnData(adata, num_pseudogenes='auto', pseudogene_size='auto', ctrl_label='negative_control'): +def _generatePseudoGeneAnnData(adata, num_pseudogenes='auto', pseudogene_size='auto', ctrl_label='negative_control'): """Generate pseudogenes from negative control elements in the library. Args: @@ -68,15 +68,15 @@ def generatePseudoGeneAnnData(adata, num_pseudogenes='auto', pseudogene_size='au return out -def runPhenoScore(adata, cond1, cond2, transformation, score_level, test, - growth_rate=1, n_reps=2, keep_top_n = None,num_pseudogenes='auto', pseudogene_size='auto', +def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, + growth_rate=1, n_reps='auto', keep_top_n = None,num_pseudogenes='auto', pseudogene_size='auto', count_layer=None, ctrl_label='negative_control'): - """Calculate phenotype score and p-values when comparing `cond2` vs `cond1`. + """Calculate phenotype score and p-values when comparing `cond_test` vs `cond_ref`. Args: adata (AnnData): AnnData object - cond1 (str): condition 1 - cond2 (str): condition 2 + cond_ref (str): condition 1 + cond_test (str): condition 2 transformation (str): transformation to use for calculating score test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test) score_level (str): score level @@ -93,8 +93,15 @@ def runPhenoScore(adata, cond1, cond2, transformation, score_level, test, pd.DataFrame: result dataframe """ # format result name - result_name = f'{cond2}_vs_{cond1}' - print(f'\t{cond2} vs {cond1}') + result_name = f'{cond_test}_vs_{cond_ref}' + print(f'\t{cond_test} vs {cond_ref}') + + # set n_reps if not provided + if n_reps == 'auto': + n_reps = min( + adata.obs.query(f'condition=="{cond_ref}"').shape[0], + adata.obs.query(f'condition=="{cond_test}"').shape[0] + ) # check if count_layer exists if count_layer is None: @@ -113,16 +120,16 @@ def runPhenoScore(adata, cond1, cond2, transformation, score_level, test, # calc phenotype score and p-value if score_level in ['compare_reps']: # prep counts for phenoScore calculation - df_cond1 = adata[adata.obs.query(f'condition=="{cond1}"').index[:n_reps],].to_df(count_layer).T - df_cond2 = adata[adata.obs.query(f'condition=="{cond2}"').index[:n_reps],].to_df(count_layer).T + df_cond_ref = adata[adata.obs.query(f'condition=="{cond_ref}"').index[:n_reps],].to_df(count_layer).T + df_cond_test = adata[adata.obs.query(f'condition=="{cond_test}"').index[:n_reps],].to_df(count_layer).T # convert to numpy arrays - x = df_cond1.to_numpy() - y = df_cond2.to_numpy() + x = df_cond_ref.to_numpy() + y = df_cond_test.to_numpy() # get control values - x_ctrl = df_cond1[adata.var.targetType.eq(ctrl_label)].to_numpy() - y_ctrl = df_cond2[adata.var.targetType.eq(ctrl_label)].to_numpy() + x_ctrl = df_cond_ref[adata.var.targetType.eq(ctrl_label)].to_numpy() + y_ctrl = df_cond_test[adata.var.targetType.eq(ctrl_label)].to_numpy() # calculate growth score and p_value scores, p_values = matrixTest( @@ -151,20 +158,20 @@ def runPhenoScore(adata, cond1, cond2, transformation, score_level, test, adata0 = adata.copy() # prep counts for phenoScore calculation - df_cond1 = adata0[adata0.obs.query(f'condition=="{cond1}"').index].to_df().T - df_cond2 = adata0[adata0.obs.query(f'condition=="{cond2}"').index].to_df().T + df_cond_ref = adata0[adata0.obs.query(f'condition=="{cond_ref}"').index].to_df().T + df_cond_test = adata0[adata0.obs.query(f'condition=="{cond_test}"').index].to_df().T # get control values - x_ctrl = df_cond1[adata0.var.targetType.eq(ctrl_label)].to_numpy() - y_ctrl = df_cond2[adata0.var.targetType.eq(ctrl_label)].to_numpy() - del df_cond1, df_cond2 + x_ctrl = df_cond_ref[adata0.var.targetType.eq(ctrl_label)].to_numpy() + y_ctrl = df_cond_test[adata0.var.targetType.eq(ctrl_label)].to_numpy() + del df_cond_ref, df_cond_test - adata_pseudo = generatePseudoGeneAnnData(adata0, num_pseudogenes=num_pseudogenes, pseudogene_size=pseudogene_size, ctrl_label=ctrl_label) + adata_pseudo = _generatePseudoGeneAnnData(adata0, num_pseudogenes=num_pseudogenes, pseudogene_size=pseudogene_size, ctrl_label=ctrl_label) adata = ad.concat([adata0[:,~adata0.var.targetType.eq(ctrl_label)], adata_pseudo], axis=1) adata.obs = adata0.obs.copy() # prep counts for phenoScore calculation - df_cond1 = adata[adata.obs.query(f'condition=="{cond1}"').index].to_df().T - df_cond2 = adata[adata.obs.query(f'condition=="{cond2}"').index].to_df().T + df_cond_ref = adata[adata.obs.query(f'condition=="{cond_ref}"').index].to_df().T + df_cond_test = adata[adata.obs.query(f'condition=="{cond_test}"').index].to_df().T targets = [] scores = [] @@ -173,8 +180,8 @@ def runPhenoScore(adata, cond1, cond2, transformation, score_level, test, # group by target genes or pseudogenes to aggregate counts for score calculation for target_name, target_group in adata.var.groupby('target'): # convert to numpy arrays - x = df_cond1.loc[target_group.index,:] - y = df_cond2.loc[target_group.index,:] + x = df_cond_ref.loc[target_group.index,:] + y = df_cond_test.loc[target_group.index,:] # Sort and find top n guide per target, see #18 if keep_top_n: x = x.sort_values(x.columns.to_list(), ascending=False) From 5938c7e749ed7549b1082a0f743eb0873a80f215 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 02:06:10 -0700 Subject: [PATCH 12/34] mend --- screenpro/phenoscore/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 780a899..74c0bb2 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -75,8 +75,8 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, Args: adata (AnnData): AnnData object - cond_ref (str): condition 1 - cond_test (str): condition 2 + cond_ref (str): condition reference + cond_test (str): condition test transformation (str): transformation to use for calculating score test (str): test to use for calculating p-value ('MW': Mann-Whitney U rank; 'ttest' : t-test) score_level (str): score level From c901e987504f8dfb035c1a4d899a257cc1243fa8 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 02:14:09 -0700 Subject: [PATCH 13/34] fix a bug use `pd.Series()` --- screenpro/phenoscore/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 74c0bb2..684b77d 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -206,8 +206,8 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, targets.append(target_name) # get mean scores and p-values across replicates - scores = [np.mean(s) for s in scores] - p_values = [np.mean(p) for p in p_values] + scores = pd.Series([np.mean(s) for s in scores]) + p_values = pd.Series([np.mean(p) for p in p_values]) # get adjusted p-values adj_p_values = multipleTestsCorrection(p_values) From 129eb08c97d0b1ebffc4c5b274f41221d2139870 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:07:38 -0700 Subject: [PATCH 14/34] mend --- screenpro/phenoscore/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 684b77d..3857f52 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -205,10 +205,6 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, p_values.append(target_p_values) targets.append(target_name) - # get mean scores and p-values across replicates - scores = pd.Series([np.mean(s) for s in scores]) - p_values = pd.Series([np.mean(p) for p in p_values]) - # get adjusted p-values adj_p_values = multipleTestsCorrection(p_values) From 993d375319f476e09754986af8fd35b089853918 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:10:20 -0700 Subject: [PATCH 15/34] temp commit --- screenpro/phenoscore/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 3857f52..ccd1d4b 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -205,14 +205,14 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, p_values.append(target_p_values) targets.append(target_name) - # get adjusted p-values - adj_p_values = multipleTestsCorrection(p_values) + # # get adjusted p-values + # adj_p_values = multipleTestsCorrection(p_values) # combine results into a dataframe result = pd.concat([ pd.Series(scores, index=targets, name='score'), pd.Series(p_values, index=targets, name=f'{test} pvalue'), - pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), + # pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), ], axis=1) else: From ad9be71d635ce88c89bac63a03a96dcf8b08101b Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:16:33 -0700 Subject: [PATCH 16/34] fix a bug set axis as None for test level == 'all' https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_rel.html --- screenpro/phenoscore/__init__.py | 6 +++--- screenpro/phenoscore/phenostat.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index ccd1d4b..3857f52 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -205,14 +205,14 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, p_values.append(target_p_values) targets.append(target_name) - # # get adjusted p-values - # adj_p_values = multipleTestsCorrection(p_values) + # get adjusted p-values + adj_p_values = multipleTestsCorrection(p_values) # combine results into a dataframe result = pd.concat([ pd.Series(scores, index=targets, name='score'), pd.Series(p_values, index=targets, name=f'{test} pvalue'), - # pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), + pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), ], axis=1) else: diff --git a/screenpro/phenoscore/phenostat.py b/screenpro/phenoscore/phenostat.py index ef270d7..58aec8e 100644 --- a/screenpro/phenoscore/phenostat.py +++ b/screenpro/phenoscore/phenostat.py @@ -37,7 +37,7 @@ def matrixStat(x, y, test, level): p_value = ttest_rel(y, x, axis=0)[1] elif level == 'all': # average across all values - p_value = ttest_rel(y, x)[1] + p_value = ttest_rel(y, x, axis=None)[1] else: raise ValueError(f'Level "{level}" not recognized') return p_value From dad417b833024dffd48a1936276caf8288691c47 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:24:32 -0700 Subject: [PATCH 17/34] temp commit --- screenpro/phenoscore/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 3857f52..0fbc2f1 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -205,8 +205,8 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, p_values.append(target_p_values) targets.append(target_name) - # get adjusted p-values - adj_p_values = multipleTestsCorrection(p_values) + # # get adjusted p-values + # adj_p_values = multipleTestsCorrection(p_values) # combine results into a dataframe result = pd.concat([ From b979d7de1583a67f53acefc751c6d72356ca9b40 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:25:54 -0700 Subject: [PATCH 18/34] mend --- screenpro/phenoscore/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 0fbc2f1..ccd1d4b 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -212,7 +212,7 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, result = pd.concat([ pd.Series(scores, index=targets, name='score'), pd.Series(p_values, index=targets, name=f'{test} pvalue'), - pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), + # pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), ], axis=1) else: From cd3c3efd44a059d341bc894e3684173eb283d7e8 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:31:39 -0700 Subject: [PATCH 19/34] fix a bug --- screenpro/assays.py | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index 5e86a60..672f454 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -165,6 +165,7 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' db_untreated = 1 db_treated = 1 db_treated_vs_untreated = 1 + growth_factor_table = None else: growth_factor_table = self._calculateGrowthFactor( @@ -207,27 +208,28 @@ def calculateDrugScreen(self, t0, untreated, treated, score_level, db_rate_col=' self._add_phenotype_results(f'tau:{tau_name}') self._add_phenotype_results(f'rho:{rho_name}') - # get replicate level phenotype scores - pdata_df = pd.concat([ - runPhenoScoreForReplicate( - self.adata, x_label = x_label, y_label = y_label, score = score_label, - transformation=self.fc_transformation, - growth_factor_table=growth_factor_table, - **kwargs - ).add_prefix(f'{score_label}_') - - for x_label, y_label, score_label in [ - ('T0', untreated, 'gamma'), - ('T0', treated, 'tau'), - (untreated, treated, 'rho') - ] - ],axis=1).T - # add .pdata - self.pdata = ad.AnnData( - X = pdata_df, - obs = growth_factor_table.loc[pdata_df.index,:], - var=self.adata.var - ) + if growth_factor_table: + # get replicate level phenotype scores + pdata_df = pd.concat([ + runPhenoScoreForReplicate( + self.adata, x_label = x_label, y_label = y_label, score = score_label, + transformation=self.fc_transformation, + growth_factor_table=growth_factor_table, + **kwargs + ).add_prefix(f'{score_label}_') + + for x_label, y_label, score_label in [ + ('T0', untreated, 'gamma'), + ('T0', treated, 'tau'), + (untreated, treated, 'rho') + ] + ],axis=1).T + # add .pdata + self.pdata = ad.AnnData( + X = pdata_df, + obs = growth_factor_table.loc[pdata_df.index,:], + var=self.adata.var + ) def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None, **kwargs): """ From cd9c9890abf690b2568430b63199a9ace06f312e Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:33:05 -0700 Subject: [PATCH 20/34] use `np.array` --- screenpro/phenoscore/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index ccd1d4b..7694695 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -205,14 +205,17 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, p_values.append(target_p_values) targets.append(target_name) - # # get adjusted p-values - # adj_p_values = multipleTestsCorrection(p_values) + scores = np.array(scores) + p_values = np.array(p_values) + + # get adjusted p-values + adj_p_values = multipleTestsCorrection(p_values) # combine results into a dataframe result = pd.concat([ pd.Series(scores, index=targets, name='score'), pd.Series(p_values, index=targets, name=f'{test} pvalue'), - # pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), + pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), ], axis=1) else: From ef8660195b2e3f4a51db5617480946ad26555c18 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:38:11 -0700 Subject: [PATCH 21/34] mend --- screenpro/phenoscore/__init__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 7694695..1527408 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -204,12 +204,9 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, scores.append(target_scores) p_values.append(target_p_values) targets.append(target_name) - - scores = np.array(scores) - p_values = np.array(p_values) # get adjusted p-values - adj_p_values = multipleTestsCorrection(p_values) + adj_p_values = multipleTestsCorrection(np.array(p_values)) # combine results into a dataframe result = pd.concat([ From 0de43adad017738781af8614964d359453f1a33b Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:49:29 -0700 Subject: [PATCH 22/34] fix a bug --- screenpro/phenoscore/delta.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/screenpro/phenoscore/delta.py b/screenpro/phenoscore/delta.py index 75366df..dcaab97 100644 --- a/screenpro/phenoscore/delta.py +++ b/screenpro/phenoscore/delta.py @@ -20,15 +20,13 @@ def calculateDelta(x, y, transformation, level): np.array: array of log ratio values """ # check if transformation is implemented - if transformation not in ['log2', 'log2(x+1)', 'log10', 'log1p']: + if transformation not in ['log2', 'log10', 'log1p']: raise ValueError(f'transformation "{transformation}" not recognized') if level == 'all': # average across all values if transformation == 'log2': - return np.log2(y) - np.log2(x) - elif transformation == 'log2(x+1)': - return np.mean(np.log2(y+1) - np.log2(x+1)) + return np.mean(np.log2(y) - np.log2(x)) elif transformation == 'log10': return np.mean(np.log10(y) - np.log10(x)) elif transformation == 'log1p': @@ -36,9 +34,7 @@ def calculateDelta(x, y, transformation, level): elif level == 'row': # average across rows if transformation == 'log2': - return np.log2(y) - np.log2(x) - elif transformation == 'log2(x+1)': - return np.mean(np.log2(y+1) - np.log2(x+1), axis=0) + return np.mean(np.log2(y) - np.log2(x), axis=0) elif transformation == 'log10': return np.mean(np.log10(y) - np.log10(x), axis=0) elif transformation == 'log1p': @@ -47,8 +43,6 @@ def calculateDelta(x, y, transformation, level): # average across columns if transformation == 'log2': return np.mean(np.log2(y) - np.log2(x), axis=1) - elif transformation == 'log2(x+1)': - return np.mean(np.log2(y+1) - np.log2(x+1), axis=1) elif transformation == 'log10': return np.mean(np.log10(y) - np.log10(x), axis=1) elif transformation == 'log1p': From 1483cf70f116293b8a597b9f4620be10518108c2 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:51:53 -0700 Subject: [PATCH 23/34] update docs file --- docs/source/phenotype.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md index 9d54b76..64633cd 100644 --- a/docs/source/phenotype.md +++ b/docs/source/phenotype.md @@ -64,6 +64,12 @@ module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_r > This is a test for the null hypothesis that two related or repeated > samples have identical average (expected) values). +# + +```{math} +\text{combined score} = \left( \dfrac{T_{\text{phenotype score}}}{\sigma{\text{(negative controls)}}} \right) \times -\log_{10}(\text{pvalue})$ +``` + ___ ```{eval-rst} @@ -71,19 +77,19 @@ ___ :members: :show-inheritance: -.. automodule:: screenpro.phenotype.phenostat +.. automodule:: screenpro.phenoscore.phenostat :members: :show-inheritance: -.. automodule:: screenpro.phenotype.delta +.. automodule:: screenpro.phenoscore.delta :members: :show-inheritance: -.. automodule:: screenpro.phenotype.deseq +.. automodule:: screenpro.phenoscore.deseq :members: :show-inheritance: -.. automodule:: screenpro.phenotype.annotate +.. automodule:: screenpro.phenoscore.annotate :members: :show-inheritance: From e9f03ab9d9698e39d80e93fd7cced8f3b14aa34a Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 03:55:31 -0700 Subject: [PATCH 24/34] mv tests folder --- {screenpro/tests => tests}/__init__.py | 0 {screenpro/tests => tests}/test_fastq2count.py | 0 {screenpro/tests => tests}/test_phenoscore.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {screenpro/tests => tests}/__init__.py (100%) rename {screenpro/tests => tests}/test_fastq2count.py (100%) rename {screenpro/tests => tests}/test_phenoscore.py (100%) diff --git a/screenpro/tests/__init__.py b/tests/__init__.py similarity index 100% rename from screenpro/tests/__init__.py rename to tests/__init__.py diff --git a/screenpro/tests/test_fastq2count.py b/tests/test_fastq2count.py similarity index 100% rename from screenpro/tests/test_fastq2count.py rename to tests/test_fastq2count.py diff --git a/screenpro/tests/test_phenoscore.py b/tests/test_phenoscore.py similarity index 100% rename from screenpro/tests/test_phenoscore.py rename to tests/test_phenoscore.py From 53194171d9375305847a71ff0adef3b9d13803a3 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 04:00:55 -0700 Subject: [PATCH 25/34] add `target` column --- screenpro/phenoscore/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/screenpro/phenoscore/__init__.py b/screenpro/phenoscore/__init__.py index 1527408..e4ac013 100644 --- a/screenpro/phenoscore/__init__.py +++ b/screenpro/phenoscore/__init__.py @@ -112,7 +112,7 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, adata.X = adata.layers[count_layer].copy() # evaluate library table to get targets and riase error if not present - required_columns = ['target', 'sequence'] + required_columns = ['target'] #, 'sequence'] missing_columns = list(set(required_columns) - set(adata.var.columns)) if len(missing_columns) > 0: raise ValueError(f"Missing required columns in library table: {missing_columns}") @@ -210,10 +210,15 @@ def runPhenoScore(adata, cond_ref, cond_test, transformation, score_level, test, # combine results into a dataframe result = pd.concat([ + pd.Series(targets, index=targets, name='target'), pd.Series(scores, index=targets, name='score'), pd.Series(p_values, index=targets, name=f'{test} pvalue'), pd.Series(adj_p_values, index=targets, name='BH adj_pvalue'), ], axis=1) + + # rename pseudo genes in target column to `ctrl_label` + result['target'] = result['target'].apply(lambda x: ctrl_label if 'pseudo' in x else x) + else: raise ValueError(f'score_level "{score_level}" not recognized. Currently, "compare_reps" and "compare_guides" are supported.') From f5af200ce40cd1d54b930b5f779d02f9d6817603 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 04:04:53 -0700 Subject: [PATCH 26/34] bump version 0.4.6 --- screenpro/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index f51c26b..6d155ab 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -31,6 +31,6 @@ from .dashboard import DrugScreenDashboard -__version__ = "0.4.5" +__version__ = "0.4.6" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" From d8d70cb60420543cd842754c136b7a920ec2a770 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 04:13:26 -0700 Subject: [PATCH 27/34] mend --- tests/test_phenoscore.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_phenoscore.py b/tests/test_phenoscore.py index 8738d2c..297ab6d 100644 --- a/tests/test_phenoscore.py +++ b/tests/test_phenoscore.py @@ -45,9 +45,9 @@ def test_runPhenoScore(): # run function result_name, result = runPhenoScore( adata=adat, - cond1='A', - cond2='B', - transformation='log2(x+1)', + cond_ref='A', + cond_test='B', + transformation='log2', test='ttest', score_level='compare_reps', growth_rate=1, From 52753cef49c0f29e9df15d3219d867952c004424 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 04:49:11 -0700 Subject: [PATCH 28/34] change version ranges --- environment.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 5ac5774..e72d61b 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - python=3.9 + - python=>3.9,<3.12 - scanpy - python-igraph - leidenalg @@ -14,7 +14,6 @@ dependencies: - scipy - matplotlib<3.7 - seaborn - - pyarrow - bokeh - ipykernel - mscorefonts From f66471f0df7d9a82320a035eac702a1d719ae9ff Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 05:13:02 -0700 Subject: [PATCH 29/34] mend --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index e72d61b..70aa7d7 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - conda-forge - bioconda dependencies: - - python=>3.9,<3.12 + - python>=3.9,<=3.12 - scanpy - python-igraph - leidenalg From 68cc8bada7688132c3c1b14bd6e011e021b67fc8 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 05:23:39 -0700 Subject: [PATCH 30/34] add more python versions --- .github/workflows/python-package.yml | 2 +- .github/workflows/python-publish.yml | 2 +- setup.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 169ecba..9ae180c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os-version: ["ubuntu-latest"] - python-version: ["3.9"] # ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 1c23099..c0ff647 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,7 +15,7 @@ jobs: fail-fast: false matrix: os-version: ["ubuntu-latest"] - python-version: ["3.9"] # ["3.8", "3.9", "3.10"] + python-version: ["3.11"] # ["3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v3 diff --git a/setup.py b/setup.py index 21d4cfb..973102f 100644 --- a/setup.py +++ b/setup.py @@ -29,5 +29,8 @@ 'License :: OSI Approved :: MIT License', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + ] ) From 410112aadb68c831b15d5e4efe85c42894d5315b Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 05:25:41 -0700 Subject: [PATCH 31/34] set version range try to fix failed test similar to https://github.com/pola-rs/polars/issues/8546 --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 70aa7d7..6face60 100644 --- a/environment.yml +++ b/environment.yml @@ -21,6 +21,7 @@ dependencies: - pip - pip: - polars + - pyarrow<17.0 - biobear - numba - pydeseq2 From ea30738ba8c3b15f4cf28eeb92e3f3767c15ad40 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 05:27:22 -0700 Subject: [PATCH 32/34] mend --- docs/source/phenotype.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/source/phenotype.md b/docs/source/phenotype.md index 64633cd..09e60f0 100644 --- a/docs/source/phenotype.md +++ b/docs/source/phenotype.md @@ -64,10 +64,10 @@ module](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_r > This is a test for the null hypothesis that two related or repeated > samples have identical average (expected) values). -# +## Combined score calculation ```{math} -\text{combined score} = \left( \dfrac{T_{\text{phenotype score}}}{\sigma{\text{(negative controls)}}} \right) \times -\log_{10}(\text{pvalue})$ +\text{combined score} = \left( \dfrac{T_{\text{phenotype score}}}{\sigma{\text{(negative controls)}}} \right) \times -\log_{10}(\text{pvalue}) ``` ___ @@ -77,6 +77,12 @@ ___ :members: :show-inheritance: +``` + +## Other related modules and functions + +```{eval-rst} + .. automodule:: screenpro.phenoscore.phenostat :members: :show-inheritance: From 6f18a01a98a50b2f95b246715f58709f9509b23a Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 15:09:05 -0700 Subject: [PATCH 33/34] mend --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 9ae180c..565bafa 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os-version: ["ubuntu-latest"] - python-version: ["3.9", "3.10", "3.11", "3.12"] + python-version: ["3.9", "3.10", "3.11"] #, "3.12"] steps: - uses: actions/checkout@v3 From ba01f000489090bd85ae8c58bbc90ccf0d4fc231 Mon Sep 17 00:00:00 2001 From: abearab Date: Thu, 25 Jul 2024 15:11:12 -0700 Subject: [PATCH 34/34] set version ranges for `biobear` --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 6face60..bd9689a 100644 --- a/environment.yml +++ b/environment.yml @@ -22,7 +22,7 @@ dependencies: - pip: - polars - pyarrow<17.0 - - biobear + - biobear>=0.20.3,<0.21 - numba - pydeseq2 - simple_colors