From 47aac80b584754d360fc20b60afaa2c9aeabb6d4 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Tue, 21 May 2024 13:07:52 -0700 Subject: [PATCH 01/27] add option to only highlight dots --- screenpro/plotting.py | 54 ++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/screenpro/plotting.py b/screenpro/plotting.py index 99f2f56..df33575 100644 --- a/screenpro/plotting.py +++ b/screenpro/plotting.py @@ -106,7 +106,7 @@ def plot_volcano(ax, df_in, threshold, up_hit='resistance_hit', down_hit='sensit ax.legend() -def label_as_black(ax, df_in, label, threshold, size=2, size_txt=None, +def label_as_black(ax, df_in, label, threshold, size=2, size_txt="auto", ctrl_label = 'no-targeting', t_x=.5, t_y=-0.1): df = prep_data(df_in, threshold, ctrl_label) @@ -117,17 +117,19 @@ def label_as_black(ax, df_in, label, threshold, size=2, size_txt=None, ax.scatter(target_data['score'], target_data['-log10(pvalue)'], s=size, linewidth=0.5, edgecolors='black', facecolors='black', label='target') - if not size_txt: + if size_txt == None: + pass + elif size_txt == 'auto': size_txt = size * 2 + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), + color='black', size=size_txt) - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), - color='black', size=size_txt) - -def label_sensitivity_hit(ax, df_in, label, threshold, size=2, size_txt=None, +def label_sensitivity_hit(ax, df_in, label, threshold, size=2, size_txt="auto", ctrl_label = 'no-targeting', t_x=.5, t_y=-0.1): df = prep_data(df_in, threshold, ctrl_label) @@ -138,17 +140,19 @@ def label_sensitivity_hit(ax, df_in, label, threshold, size=2, size_txt=None, ax.scatter(target_data['score'], target_data['-log10(pvalue)'], s=size, linewidth=0.5, edgecolors='black', facecolors='#3182bd', label='target') - if not size_txt: + if size_txt == None: + pass + elif size_txt == 'auto': size_txt = size * 2 - - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), - color='black', size=size_txt) + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), + color='black', size=size_txt) -def label_resistance_hit(ax, df_in, label, threshold, size=2, size_txt=None, +def label_resistance_hit(ax, df_in, label, threshold, size=2, size_txt="auto", ctrl_label = 'no-targeting', t_x=.5, t_y=-0.1): df = prep_data(df_in, threshold, ctrl_label) @@ -159,14 +163,16 @@ def label_resistance_hit(ax, df_in, label, threshold, size=2, size_txt=None, ax.scatter(target_data['score'], target_data['-log10(pvalue)'], s=size, linewidth=0.5, edgecolors='black', facecolors='#de2d26', label='target') - if not size_txt: + if size_txt == None: + pass + elif size_txt == 'auto': size_txt = size * 2 - - # Annotate the points - for i, _ in enumerate(target_data['target']): - txt = target_data['target'].iloc[i] - ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), - color='black', size=size_txt) + else: + # Annotate the points + for i, _ in enumerate(target_data['target']): + txt = target_data['target'].iloc[i] + ax.annotate(txt, (target_data['score'].iloc[i] + t_x, target_data['-log10(pvalue)'].iloc[i] + t_y), + color='black', size=size_txt) def plotReplicateScatter(ax, adat_in, x, y, title, min_val=None, max_val=None, log_transform=True): From 8340fef9a1c9dc4ae004c72a9b14d6f1aba2c167 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 00:01:17 -0700 Subject: [PATCH 02/27] add `getAnnotatedTable` function --- screenpro/assays.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/screenpro/assays.py b/screenpro/assays.py index 3ffb958..cebc303 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -216,6 +216,50 @@ def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negC return out + def getAnnotatedTable(self, run_name, threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'): + hit_dict = { + 'gamma':{ + 'up_hit':'up_hit', + 'down_hit':'essential_hit' + }, + 'tau':{ + 'up_hit':'up_hit', + 'down_hit':'down_hit' + }, + 'rho':{ + 'up_hit':'resistance_hit', + 'down_hit':'sensitivity_hit' + } + } + + keep_col = [target_col, score_column, pvalue_column] + + scores = {score for score, col in self.phenotypes[run_name].columns} + sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list() + + df_list = {} + for score in scores: + score_tag = score.split(':')[0] + # get label + df_label = ann_score_df( + self.phenotypes[run_name][score].loc[:,keep_col], + up_hit=hit_dict[score_tag]['up_hit'], + down_hit=hit_dict[score_tag]['down_hit'], + ctrl_label=ctrl_label, + threshold=threshold + )['label'] + # get replicate phe + df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T + + # make table + df = pd.concat([self.phenotypes['compare_reps'][score], df_phe_reps, df_label],axis=1).loc[sort_var,:] + + df_list.update({score:df}) + + out = pd.concat(df_list,axis=1) + + return out + class GImaps(object): pass \ No newline at end of file From f1492922cf45c77c0bca721ab56fc8e5f849affc Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 00:07:13 -0700 Subject: [PATCH 03/27] mend --- screenpro/assays.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index cebc303..ebceeef 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -190,7 +190,7 @@ def calculateFlowBasedScreen(self, low_bin, high_bin, score_level, run_name=None # save phenotype name for reference self._add_phenotype_results(f'delta:{delta_name}') - def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'): + def getPhenotypeScores(self, score_name, run_name='auto', threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'): """ Get phenotype scores for a given score level @@ -203,9 +203,19 @@ def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negC pvalue_column (str): column name for the p-value, default is 'ttest pvalue' score_column (str): column name for the score, default is 'score' """ + if run_name == 'auto': + if len(list(self.phenotypes.keys())) == 1: + run_name = list(self.phenotypes.keys())[0] + else: + raise ValueError( + 'Multiple phenotype calculation runs found.' + 'Please specify run_name. Available runs: ' + '' + ', '.join(self.phenotypes.keys()) + ) + if score_name not in self.phenotype_names: raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names") - + keep_col = [target_col, score_column, pvalue_column] out = ann_score_df( @@ -216,7 +226,7 @@ def getPhenotypeScores(self, run_name, score_name, threshold=5, ctrl_label='negC return out - def getAnnotatedTable(self, run_name, threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'): + def getAnnotatedTable(self, run_name='auto', threshold=5, ctrl_label='negCtrl', target_col='target',pvalue_column='ttest pvalue', score_column='score'): hit_dict = { 'gamma':{ 'up_hit':'up_hit', @@ -232,6 +242,16 @@ def getAnnotatedTable(self, run_name, threshold=5, ctrl_label='negCtrl', target_ } } + if run_name == 'auto': + if len(list(self.phenotypes.keys())) == 1: + run_name = list(self.phenotypes.keys())[0] + else: + raise ValueError( + 'Multiple phenotype calculation runs found.' + 'Please specify run_name. Available runs: ' + '' + ', '.join(self.phenotypes.keys()) + ) + keep_col = [target_col, score_column, pvalue_column] scores = {score for score, col in self.phenotypes[run_name].columns} From 987f83e21c3d9ccf61df9028e6e966c81bf5e24c Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 01:11:35 -0700 Subject: [PATCH 04/27] fix filter rule --- screenpro/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/utils.py b/screenpro/utils.py index 681652f..18fa169 100644 --- a/screenpro/utils.py +++ b/screenpro/utils.py @@ -35,7 +35,7 @@ def find_low_counts(adata, filter_type='either', minimum_reads=50): """ count_bin = adata.X >= minimum_reads if filter_type == 'either': - out = adata[:, count_bin.any(axis=0)].copy() + out = adata[:, ~(~count_bin.all(axis=0))].copy() elif filter_type == 'all': out = adata[:, count_bin.all(axis=0)].copy() From cd9d99ad895291b2b3c193b8a269107b1b100342 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 02:48:51 -0700 Subject: [PATCH 05/27] add `sum` option for count filter --- screenpro/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/screenpro/utils.py b/screenpro/utils.py index 18fa169..c02214c 100644 --- a/screenpro/utils.py +++ b/screenpro/utils.py @@ -38,7 +38,9 @@ def find_low_counts(adata, filter_type='either', minimum_reads=50): out = adata[:, ~(~count_bin.all(axis=0))].copy() elif filter_type == 'all': out = adata[:, count_bin.all(axis=0)].copy() - + elif filter_type == 'sum': + out = adata[:, adata.to_df().sum(axis=0) >= minimum_reads].copy() + # print the number of removed variables n_removed = adata.shape[1] - out.shape[1] print( From 60654cf50c62f4fd510c3599303c753cc7d12e96 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 02:51:38 -0700 Subject: [PATCH 06/27] mend --- screenpro/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/utils.py b/screenpro/utils.py index c02214c..e792b00 100644 --- a/screenpro/utils.py +++ b/screenpro/utils.py @@ -44,7 +44,7 @@ def find_low_counts(adata, filter_type='either', minimum_reads=50): # print the number of removed variables n_removed = adata.shape[1] - out.shape[1] print( - f"{n_removed} variables with less than {minimum_reads} reads in {filter_type} replicates / experiment" + f"{n_removed} variables with less than {minimum_reads} reads are removed. (filter_type:{filter_type})" ) adata.var['low_count'] = ~adata.var.index.isin(out.var.index.to_list()) From e12e4e687234461833d75429777f779d92d222ba Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 02:59:13 -0700 Subject: [PATCH 07/27] mend --- screenpro/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/utils.py b/screenpro/utils.py index e792b00..736ea0c 100644 --- a/screenpro/utils.py +++ b/screenpro/utils.py @@ -44,7 +44,7 @@ def find_low_counts(adata, filter_type='either', minimum_reads=50): # print the number of removed variables n_removed = adata.shape[1] - out.shape[1] print( - f"{n_removed} variables with less than {minimum_reads} reads are removed. (filter_type:{filter_type})" + f"{n_removed} variables with less than {minimum_reads} reads (filter_type:{filter_type})" ) adata.var['low_count'] = ~adata.var.index.isin(out.var.index.to_list()) From 26a2383a9ffd0a9fb0dc3efab53b6dfd4d107d63 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 03:04:44 -0700 Subject: [PATCH 08/27] mend --- screenpro/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/utils.py b/screenpro/utils.py index 736ea0c..2e8218f 100644 --- a/screenpro/utils.py +++ b/screenpro/utils.py @@ -44,7 +44,7 @@ def find_low_counts(adata, filter_type='either', minimum_reads=50): # print the number of removed variables n_removed = adata.shape[1] - out.shape[1] print( - f"{n_removed} variables with less than {minimum_reads} reads (filter_type:{filter_type})" + f"{n_removed} variables with less than {minimum_reads} reads (filter_type: '{filter_type}')" ) adata.var['low_count'] = ~adata.var.index.isin(out.var.index.to_list()) From 70bd9e933bfb15724ebdf6ba9ff5cc2459887480 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Wed, 22 May 2024 20:05:23 -0700 Subject: [PATCH 09/27] fix value filter --- screenpro/plotting.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/screenpro/plotting.py b/screenpro/plotting.py index df33575..da7ed43 100644 --- a/screenpro/plotting.py +++ b/screenpro/plotting.py @@ -88,9 +88,9 @@ def plot_volcano(ax, df_in, threshold, up_hit='resistance_hit', down_hit='sensit alpha=0.9, s=dot_size, c='#fcae91', label=up_hit) ax.scatter(df.loc[df['label'] == down_hit, 'score'], df.loc[df['label'] == down_hit, '-log10(pvalue)'], alpha=0.9, s=dot_size, c='#bdd7e7', label=down_hit) - ax.scatter(df.loc[df['label'] == 'non-targeting', 'score'], - df.loc[df['label'] == 'non-targeting', '-log10(pvalue)'], - alpha=0.1, s=dot_size, c='gray', label='non-targeting') + ax.scatter(df.loc[df['label'] == ctrl_label, 'score'], + df.loc[df['label'] == ctrl_label, '-log10(pvalue)'], + alpha=0.1, s=dot_size, c='gray', label=ctrl_label) # Set x-axis and y-axis labels ax.set_xlabel('phenotype score') From 080734088cc5cdd103cce18b80e4a515af8fc561 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 01:13:44 -0700 Subject: [PATCH 10/27] update hit labels --- screenpro/assays.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/screenpro/assays.py b/screenpro/assays.py index ebceeef..5dd8ed0 100644 --- a/screenpro/assays.py +++ b/screenpro/assays.py @@ -203,6 +203,21 @@ def getPhenotypeScores(self, score_name, run_name='auto', threshold=5, ctrl_labe pvalue_column (str): column name for the p-value, default is 'ttest pvalue' score_column (str): column name for the score, default is 'score' """ + hit_dict = { + 'gamma':{ + 'up_hit':'up_hit', + 'down_hit':'essential_hit' + }, + 'tau':{ + 'up_hit':'up_hit', + 'down_hit':'down_hit' + }, + 'rho':{ + 'up_hit':'resistance_hit', + 'down_hit':'sensitivity_hit' + } + } + if run_name == 'auto': if len(list(self.phenotypes.keys())) == 1: run_name = list(self.phenotypes.keys())[0] @@ -217,10 +232,12 @@ def getPhenotypeScores(self, score_name, run_name='auto', threshold=5, ctrl_labe raise ValueError(f"Phenotype '{score_name}' not found in self.phenotype_names") keep_col = [target_col, score_column, pvalue_column] - + score_tag = score_name.split(':')[0] out = ann_score_df( self.phenotypes[run_name][score_name].loc[:,keep_col], ctrl_label=ctrl_label, + up_hit=hit_dict[score_tag]['up_hit'], + down_hit=hit_dict[score_tag]['down_hit'], threshold=threshold ) @@ -254,15 +271,15 @@ def getAnnotatedTable(self, run_name='auto', threshold=5, ctrl_label='negCtrl', keep_col = [target_col, score_column, pvalue_column] - scores = {score for score, col in self.phenotypes[run_name].columns} + score_names = {s for s, col in self.phenotypes[run_name].columns} sort_var = self.adata.var.sort_values(['targetType','target']).index.to_list() df_list = {} - for score in scores: - score_tag = score.split(':')[0] + for score_name in score_names: + score_tag = score_name.split(':')[0] # get label df_label = ann_score_df( - self.phenotypes[run_name][score].loc[:,keep_col], + self.phenotypes[run_name][score_name].loc[:,keep_col], up_hit=hit_dict[score_tag]['up_hit'], down_hit=hit_dict[score_tag]['down_hit'], ctrl_label=ctrl_label, @@ -272,9 +289,11 @@ def getAnnotatedTable(self, run_name='auto', threshold=5, ctrl_label='negCtrl', df_phe_reps = self.pdata[self.pdata.obs.score.eq(score_tag)].to_df().T # make table - df = pd.concat([self.phenotypes['compare_reps'][score], df_phe_reps, df_label],axis=1).loc[sort_var,:] + df = pd.concat([ + self.phenotypes['compare_reps'][score_name], df_phe_reps, df_label + ],axis=1).loc[sort_var,:] - df_list.update({score:df}) + df_list.update({score_name:df}) out = pd.concat(df_list,axis=1) From c586b7bca22c7017583e4475603c0f7ba4144d64 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:36:05 -0700 Subject: [PATCH 11/27] add codes for interactive plots --- screenpro/dashboard.py | 256 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) create mode 100644 screenpro/dashboard.py diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py new file mode 100644 index 0000000..f71cb5d --- /dev/null +++ b/screenpro/dashboard.py @@ -0,0 +1,256 @@ +## Copyright (c) 2022-2024 ScreenPro2 Development Team. +## All rights reserved. +## Gilbart Lab, UCSF / Arc Institute. +## Multi-Omics Tech Center, Arc Insititue. + +import pandas as pd +import bokeh + + +class DrugScreenDashboard: + + def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): + self.threshold = threshold + self.ctrl_label = ctrl_label + self.run_name = run_name + self.gamma_score_name = f'gamma:{untreated}_vs_{T0}' + self.rho_score_name = f'rho:{treated}_vs_{untreated}' + self.plots = {} + + def _prep_data(self): + + gamma = screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.gamma_score_name, + ctrl_label=self.ctrl_label, + threshold=self.threshold, + ) + + rho = screen.getPhenotypeScores( + run_name=self.run_name, + score_name=self.rho_score_name, + ctrl_label=self.ctrl_label, + threshold=self.threshold + ) + + df = pd.DataFrame({ + 'target': rho['target'], + 'rho_score': rho['score'], + 'rho_pvalue': rho['pvalue'], + 'rho_label': rho['label'], + '-log10(rho_pvalue)': np.log10(rho['pvalue']) * -1, + 'gamma_score': gamma.loc[rho.index,'score'], + 'gamma_pvalue': gamma.loc[rho.index,'pvalue'], + 'gamma_label': gamma.loc[rho.index,'label'], + '-log10(gamma_pvalue)': np.log10(gamma.loc[rho.index,'pvalue']) * -1, + }) + + return df + + def _plot_scatter( + self, + x_source,y_source, + xaxis_label,yaxis_label, + up_hit, down_hit, + hit_label_col, + x_min, x_max, y_min, y_max, + title='', + dot_size=1, + width=500, height=400, + toolbar_location='below', + ): + + df = self._prep_data() + + # if filter_labels is not None: + # df = df[df['label'].isin(filter_labels)] + + TOOLS = "box_select,box_zoom,lasso_select,reset,save,wheel_zoom,pan,copy,undo,redo,reset,examine,fullscreen" + + # create a new plot with a specific size + TOOLTIPS = [ + ("name", "@target"), + ("rho score", "@rho_score"), + ("rho p-value", "@rho_pvalue"), + ("rho label", "@rho_label"), + ("gamma score", "@gamma_score"), + ("gamma p-value", "@gamma_pvalue"), + ("gamma label", "@gamma_label"), + ] + p = bokeh.plotting.figure( + sizing_mode="stretch_width", + tools=TOOLS, + tooltips=TOOLTIPS, + toolbar_location=toolbar_location, + title=title, + max_width=width, height=height, + ) + p.toolbar.autohide = True + + source = bokeh.models.ColumnDataSource( + df.loc[df[hit_label_col] == 'target_non_hit',:] + ) + p.scatter( + x=x_source, y=y_source, + source=source, + alpha=0.2, + size=dot_size * 1.2, + color='gray', + legend_label='target_non_hit', + name='circles' + ) + + # size_mapper=bokeh.models.LinearInterpolator( + # x=[df['1/gamma_score'].min(),df['1/gamma_score'].max()], + # y=[1,100] + # ) + + source = bokeh.models.ColumnDataSource( + df.loc[df[hit_label_col] == up_hit,:] + ) + p.scatter( + x=x_source, y=y_source, + source=source, + alpha=0.8, + size=dot_size * 1.2, + # size={'field':'1/gamma_score','transform':size_mapper}, + color='#fcae91', + legend_label=up_hit, + name='circles' + ) + + source = bokeh.models.ColumnDataSource( + df.loc[df[hit_label_col] == down_hit,:] + ) + p.scatter( + x=x_source, y=y_source, + source=source, + alpha=0.8, + # size={'field':'1/gamma_score','transform':size_mapper}, + size=dot_size * 1.2, + color='#bdd7e7', + legend_label=down_hit, + name='circles' + ) + + source = bokeh.models.ColumnDataSource( + df.loc[df[hit_label_col] == self.ctrl_label,:] + ) + p.scatter( + x=x_source, y=y_source, + source=source, + alpha=0.2, + size=dot_size*0.8, + color='silver', + legend_label=self.ctrl_label, + name='circles' + ) + + # Set x-axis and y-axis labels + p.xaxis.axis_label = xaxis_label + p.xaxis.axis_label_text_font_style = 'normal' + p.yaxis.axis_label = yaxis_label + p.yaxis.axis_label_text_font_style = 'normal' + + # Set x-axis limits + p.x_range.start = x_min + p.x_range.end = x_max + + # Set y-axis limits + p.y_range.start = y_min + p.y_range.end = y_max + + # Add legend + p.legend.location = "top_left" + + p.title.text = title + p.title.align = 'center' + p.title.text_font_size = '12pt' + p.title.text_font_style = 'bold' + + return p + + def _get_html(self, p): + html = bokeh.embed.file_html(p, bokeh.resources.CDN, "") + return html + + def RhoVolcanoPlot( + self, + x_source='rho_score', y_source='-log10(rho_pvalue)', + xaxis_label='phenotype score', + yaxis_label='-log10(p-value)', + up_hit='resistance_hit', down_hit='sensitivity_hit', + hit_label_col='rho_label', + x_min=-2.5, x_max=2.5, y_min=0, y_max=5.5, + return_html=True, + **kwargs + ): + p = self._plot_scatter( + x_source, y_source, + xaxis_label, yaxis_label, + up_hit, down_hit, + hit_label_col, + x_min, x_max, y_min, y_max, + **kwargs + ) + + if return_html: + return self._get_html(p) + + self.plots.update( + {'RhoVolcanoPlot': p} + ) + + def GammaVolcanoPlot( + self, + x_source='gamma_score', y_source='-log10(gamma_pvalue)', + xaxis_label='phenotype score', + yaxis_label='-log10(p-value)', + up_hit='up_hit', down_hit='essential_hit', + hit_label_col='gamma_label', + x_min=-2.5, x_max=2.5, y_min=0, y_max=5.5, + return_html=True, + **kwargs + ): + p = self._plot_scatter( + x_source, y_source, + xaxis_label, yaxis_label, + up_hit, down_hit, + hit_label_col, + x_min, x_max, y_min, y_max, + **kwargs + ) + + if return_html: + return self._get_html(p) + + self.plots.update( + {'GammaVolcanoPlot': p} + ) + + def RhoGammaScatter( + self, + x_source='rho_score', y_source='gamma_score', + xaxis_label='rho score', + yaxis_label='gamma score', + up_hit='resistance_hit', down_hit='sensitivity_hit', + hit_label_col='rho_label', + return_html=True, + x_min=-2.5, x_max=2.5, y_min=-2.5, y_max=2.5, + **kwargs + ): + p = self._plot_scatter( + x_source, y_source, + xaxis_label, yaxis_label, + up_hit, down_hit, + hit_label_col, + x_min, x_max, y_min, y_max, + **kwargs + ) + + if return_html: + return self._get_html(p) + + self.plots.update( + {'GammaRhoScatter': p} + ) From 9523b838faa0ecdefde34009cf8bf44c1f578143 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:36:17 -0700 Subject: [PATCH 12/27] add bokeh --- docs/source/conf.py | 1 + environment.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/source/conf.py b/docs/source/conf.py index 7cb5d44..70bdce6 100755 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -43,6 +43,7 @@ "polars", "biobear", "numba", + "bokeh", "pydeseq2", "watermark" ] diff --git a/environment.yml b/environment.yml index 4ee05be..5ac5774 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,7 @@ dependencies: - matplotlib<3.7 - seaborn - pyarrow + - bokeh - ipykernel - mscorefonts - rust>=1.72 From bb836b66cecece9e73703713436a694f805a9ecc Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:46:10 -0700 Subject: [PATCH 13/27] add `DataDashboard` class --- screenpro/dashboard.py | 50 +++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index f71cb5d..6ea1199 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -7,7 +7,33 @@ import bokeh -class DrugScreenDashboard: +class DataDashboard: + + def __init__(self): + pass + + def _new_plot(self,TOOLTIPS,width,height,toolbar_location): + + TOOLS = "box_select,box_zoom,lasso_select,reset,save,wheel_zoom,pan,copy,undo,redo,reset,examine,fullscreen" + + # create a new plot with a specific size + p = bokeh.plotting.figure( + sizing_mode="stretch_width", + tools=TOOLS, + tooltips=TOOLTIPS, + toolbar_location=toolbar_location, + title=title, + max_width=width, height=height, + ) + p.toolbar.autohide = True + return p + + def _get_html(self, p): + html = bokeh.embed.file_html(p, bokeh.resources.CDN, "") + return html + + +class DrugScreenDashboard(DataDashboard): def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): self.threshold = threshold @@ -16,6 +42,7 @@ def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label= self.gamma_score_name = f'gamma:{untreated}_vs_{T0}' self.rho_score_name = f'rho:{treated}_vs_{untreated}' self.plots = {} + super().__init__(self) def _prep_data(self): @@ -62,12 +89,6 @@ def _plot_scatter( df = self._prep_data() - # if filter_labels is not None: - # df = df[df['label'].isin(filter_labels)] - - TOOLS = "box_select,box_zoom,lasso_select,reset,save,wheel_zoom,pan,copy,undo,redo,reset,examine,fullscreen" - - # create a new plot with a specific size TOOLTIPS = [ ("name", "@target"), ("rho score", "@rho_score"), @@ -77,15 +98,8 @@ def _plot_scatter( ("gamma p-value", "@gamma_pvalue"), ("gamma label", "@gamma_label"), ] - p = bokeh.plotting.figure( - sizing_mode="stretch_width", - tools=TOOLS, - tooltips=TOOLTIPS, - toolbar_location=toolbar_location, - title=title, - max_width=width, height=height, - ) - p.toolbar.autohide = True + + p = self._new_plot(TOOLTIPS,width,height,toolbar_location) source = bokeh.models.ColumnDataSource( df.loc[df[hit_label_col] == 'target_non_hit',:] @@ -169,10 +183,6 @@ def _plot_scatter( p.title.text_font_style = 'bold' return p - - def _get_html(self, p): - html = bokeh.embed.file_html(p, bokeh.resources.CDN, "") - return html def RhoVolcanoPlot( self, From 987277b0ae9d7d5b6ed8a69fd380f530d126fe84 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:46:24 -0700 Subject: [PATCH 14/27] import dashboard module --- screenpro/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 69ca0c5..63c4240 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -1,11 +1,14 @@ from . import plotting as pl from . import phenoscore as ps + from . import utils from . import ngs from . import load +from . import dashboard + from .ngs import Counter from .assays import PooledScreens, GImaps __version__ = "0.3.2" __author__ = "Abe Arab" -__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" \ No newline at end of file +__email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com" From 70ac8099340fe3d6607b753dfe85a0ada4d6d9b9 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:48:51 -0700 Subject: [PATCH 15/27] mend --- screenpro/dashboard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 6ea1199..6d387f5 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -5,6 +5,7 @@ import pandas as pd import bokeh +import bokeh.plotting class DataDashboard: From b0eb44ad543d53ec742bfe0ef8b5bdabae13a251 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:50:12 -0700 Subject: [PATCH 16/27] mend --- screenpro/dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 6d387f5..759b5ce 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -40,7 +40,7 @@ def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label= self.threshold = threshold self.ctrl_label = ctrl_label self.run_name = run_name - self.gamma_score_name = f'gamma:{untreated}_vs_{T0}' + self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' self.rho_score_name = f'rho:{treated}_vs_{untreated}' self.plots = {} super().__init__(self) From fde96e08c9e9becb5c961313c9a9ddcfbfbed862 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:51:58 -0700 Subject: [PATCH 17/27] mend --- screenpro/dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 759b5ce..dd91824 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -43,7 +43,7 @@ def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label= self.gamma_score_name = f'gamma:{untreated}_vs_{t0}' self.rho_score_name = f'rho:{treated}_vs_{untreated}' self.plots = {} - super().__init__(self) + super().__init__() def _prep_data(self): From ef35c51b4abf6ada35945b5a5813989dfb7432bd Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:54:46 -0700 Subject: [PATCH 18/27] mend --- screenpro/dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index dd91824..1189655 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -47,7 +47,7 @@ def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label= def _prep_data(self): - gamma = screen.getPhenotypeScores( + gamma = self.screen.getPhenotypeScores( run_name=self.run_name, score_name=self.gamma_score_name, ctrl_label=self.ctrl_label, From 8a229f61eb314dfcf84fde54bbe3fa590363f47b Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:56:35 -0700 Subject: [PATCH 19/27] mend --- screenpro/dashboard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 1189655..bb21f4d 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -37,6 +37,7 @@ def _get_html(self, p): class DrugScreenDashboard(DataDashboard): def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label='negative_control',run_name='auto'): + self.screen = screen self.threshold = threshold self.ctrl_label = ctrl_label self.run_name = run_name From 84144110e6c9fc5e4de64ff2f5494115474a0682 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:58:23 -0700 Subject: [PATCH 20/27] mend --- screenpro/dashboard.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index bb21f4d..7cfaf01 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -46,9 +46,9 @@ def __init__(self, screen, treated, untreated, t0='T0', threshold=3, ctrl_label= self.plots = {} super().__init__() - def _prep_data(self): + def _prep_data(self,screen): - gamma = self.screen.getPhenotypeScores( + gamma = screen.getPhenotypeScores( run_name=self.run_name, score_name=self.gamma_score_name, ctrl_label=self.ctrl_label, @@ -89,7 +89,7 @@ def _plot_scatter( toolbar_location='below', ): - df = self._prep_data() + df = self._prep_data(self.screen) TOOLTIPS = [ ("name", "@target"), From 2883fd14b90176ad7cfca5fef3e1d5d6385c8ae0 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 13:59:05 -0700 Subject: [PATCH 21/27] mend --- screenpro/dashboard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 7cfaf01..cd1efba 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -3,6 +3,7 @@ ## Gilbart Lab, UCSF / Arc Institute. ## Multi-Omics Tech Center, Arc Insititue. +improt numpy as np import pandas as pd import bokeh import bokeh.plotting From 894ca8ca1b35767b51006d154508f36a1255a4e9 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 14:00:26 -0700 Subject: [PATCH 22/27] fix typo --- screenpro/dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index cd1efba..f9efd9b 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -3,7 +3,7 @@ ## Gilbart Lab, UCSF / Arc Institute. ## Multi-Omics Tech Center, Arc Insititue. -improt numpy as np +import numpy as np import pandas as pd import bokeh import bokeh.plotting From 96fdf002e27f0271bd8dfe8a5311b7db506bb5f6 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 14:03:13 -0700 Subject: [PATCH 23/27] mend --- screenpro/dashboard.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index f9efd9b..1f5826b 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -14,7 +14,7 @@ class DataDashboard: def __init__(self): pass - def _new_plot(self,TOOLTIPS,width,height,toolbar_location): + def _new_plot(self,title,tooltips,width,height,toolbar_location): TOOLS = "box_select,box_zoom,lasso_select,reset,save,wheel_zoom,pan,copy,undo,redo,reset,examine,fullscreen" @@ -22,7 +22,7 @@ def _new_plot(self,TOOLTIPS,width,height,toolbar_location): p = bokeh.plotting.figure( sizing_mode="stretch_width", tools=TOOLS, - tooltips=TOOLTIPS, + tooltips=tooltips, toolbar_location=toolbar_location, title=title, max_width=width, height=height, @@ -102,7 +102,13 @@ def _plot_scatter( ("gamma label", "@gamma_label"), ] - p = self._new_plot(TOOLTIPS,width,height,toolbar_location) + p = self._new_plot( + title=title, + tooltips=TOOLTIPS, + width=width, + height=height, + toolbar_location=toolbar_location + ) source = bokeh.models.ColumnDataSource( df.loc[df[hit_label_col] == 'target_non_hit',:] From 5cea9b67536109f8ec1bbbf25a699c4873ef3058 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 14:09:14 -0700 Subject: [PATCH 24/27] improve axis limits --- screenpro/dashboard.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 1f5826b..bbc8878 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -92,6 +92,11 @@ def _plot_scatter( df = self._prep_data(self.screen) + if y_max == 'auto': y_max = df[y_source].max() * 1.2 + if x_max == 'auto': x_max = df[x_source].max() * 1.2 + if y_min == 'auto': y_min = df[y_source].min() * 1.2 + if x_min == 'auto': x_min = df[x_source].min() * 1.2 + TOOLTIPS = [ ("name", "@target"), ("rho score", "@rho_score"), @@ -200,7 +205,7 @@ def RhoVolcanoPlot( yaxis_label='-log10(p-value)', up_hit='resistance_hit', down_hit='sensitivity_hit', hit_label_col='rho_label', - x_min=-2.5, x_max=2.5, y_min=0, y_max=5.5, + x_min=-2.5, x_max=2.5, y_min=0, y_max='auto', return_html=True, **kwargs ): @@ -227,7 +232,7 @@ def GammaVolcanoPlot( yaxis_label='-log10(p-value)', up_hit='up_hit', down_hit='essential_hit', hit_label_col='gamma_label', - x_min=-2.5, x_max=2.5, y_min=0, y_max=5.5, + x_min=-2.5, x_max=2.5, y_min=0, y_max='auto', return_html=True, **kwargs ): From b5bc4e61979177d71aca00c7b4e20a7f0984e264 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 14:11:31 -0700 Subject: [PATCH 25/27] add `legend_loc` --- screenpro/dashboard.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index bbc8878..066d787 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -88,6 +88,7 @@ def _plot_scatter( dot_size=1, width=500, height=400, toolbar_location='below', + legend_loc="top_left" ): df = self._prep_data(self.screen) @@ -189,7 +190,7 @@ def _plot_scatter( p.y_range.end = y_max # Add legend - p.legend.location = "top_left" + p.legend.location = legend_loc p.title.text = title p.title.align = 'center' From d7b175ae6dd2ac267fb2b8031f1e637506206201 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 14:13:52 -0700 Subject: [PATCH 26/27] mend --- screenpro/dashboard.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/screenpro/dashboard.py b/screenpro/dashboard.py index 066d787..d709e37 100644 --- a/screenpro/dashboard.py +++ b/screenpro/dashboard.py @@ -190,7 +190,10 @@ def _plot_scatter( p.y_range.end = y_max # Add legend - p.legend.location = legend_loc + if legend_loc == False or legend_loc == None: + p.legend.visible = False + else: + p.legend.location = legend_loc p.title.text = title p.title.align = 'center' From d2377f6b9fa2db53ebbe4c7860a5ff10d6888764 Mon Sep 17 00:00:00 2001 From: "Abolfazl (Abe)" Date: Thu, 23 May 2024 15:06:05 -0700 Subject: [PATCH 27/27] bump version 0.3.3 --- screenpro/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/screenpro/__init__.py b/screenpro/__init__.py index 63c4240..4e4dabe 100644 --- a/screenpro/__init__.py +++ b/screenpro/__init__.py @@ -9,6 +9,6 @@ from .ngs import Counter from .assays import PooledScreens, GImaps -__version__ = "0.3.2" +__version__ = "0.3.3" __author__ = "Abe Arab" __email__ = 'abea@arcinstitute.org' # "abarbiology@gmail.com"