diff --git a/GRN/.gitignore b/GRN/.gitignore index d4b7ad3..a2e915d 100644 --- a/GRN/.gitignore +++ b/GRN/.gitignore @@ -10,15 +10,25 @@ explore data/shared data/joint_cortex data/joint_pons +data/joint_cortex_extended +data/joint_pons_extended shiny_bookmarks +data/ct_e10 data/ct_e12 data/ct_e12_not_needed +data/ct_e13 data/ct_e15 +data/ct_e16 +data/ct_e18 data/ct_p0 data/ct_p3 data/ct_p6 +data/po_e10 data/po_e12 +data/po_e13 data/po_e15 +data/po_e16 +data/po_e18 data/po_p0 data/po_p3 data/po_p6 diff --git a/GRN/data/data.json b/GRN/data/data.json index 7b5efc1..f891a76 100644 --- a/GRN/data/data.json +++ b/GRN/data/data.json @@ -1,11 +1,18 @@ { "shared": [ - { - "file": "metadata_20190716.tsv", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/metadata/metadata_20190716.tsv", - "description": "The whole metadata with important columns as Cluster (matching all the data),Cell_type (the full name of the cell type),Age (the time point of the sample),Colour (a hex value to use as a colour for that cluster)", - "contents": "Dataframe with cluster metadata." + { + "file": "metadata_20210710_with_qc.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/metadata_extended/metadata_20210710_with_qc.tsv", + "description": "For extended mouse brain atlas (joint cortex and pons analysis uses these cluster labels as the latest labelling): the whole metadata with important columns as Cluster (matching all the data),Cell_type (the full name of the cell type),Age (the time point of the sample),Colour (a hex value to use as a colour for that cluster)", + "contents": "Dataframe with cluster metadata (extended mouse brain atlas)." + + }, + { + "file": "metadata_20201028_with_qc.tsv", + "path": "selin.jessa/from_hydra/atlas/data/metadata_extended/metadata_20201028_with_qc.tsv", + "description": "For extended mouse brain atlas (per sample analysis uses these cluster labels as the latest labelling): the whole metadata with important columns as Cluster (matching all the data),Cell_type (the full name of the cell type),Age (the time point of the sample),Colour (a hex value to use as a colour for that cluster)", + "contents": "Dataframe with cluster metadata (extended mouse brain atlas)." }, { @@ -14,6 +21,13 @@ "description": "TF input into the SCENIC pipeline. Actives TFs in each dataset represents a subset of this.", "contents": "Text file with each TF on one line" + }, + { + "file": "palette_ID_20210710_joint_clustering.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_mouse_extended/palette_ID_20210710_joint_clustering.Rds", + "description": "Colour palette for the joint clusters in the extended mouse dataset. Values are colour hex codes and names are the cluster labels.", + "contents": "Named vector" + }, { "file": "common_prep.Rda", @@ -21,163 +35,218 @@ "contents": "R object containing colour palettes, and heatmap annotations", "script": "data_prep.R" + }, + { + "file": "timeseries_proportion_plots.Rda", + "description": "Ribbon plot of the proportion of each cluster across developmental time, for forebrain and pons.", + "contents": "List of ggplot objects ", + "script": "data_prep.R" } ], -"joint_cortex": [ - { - "file": "cortex_prep.Rda", + +"joint_cortex_extended": [ + { + "file": "cortex_extended_prep.Rda", "description": "Rda file containing datasets generated in data_prep.R, to be loaded at the beginning of app.R to optimize run-time speed and efficiency. ", - "contents": "Several R objects: forebrain_data, TF_and_ext, TF_active, metadata, tf_df, cell_metadata_cortex,binary_activity data", + "contents": "Several R objects generated in the script for visualising the joint cortex extended dataset in the app", "script": "data_prep.R" }, { - "file": "joint_cortex.regulon_activity_per_joint_cluster.feather", - "description": "feather file containing average TF activity per joint cluster instead of per sample cluster in the forebrain.", - "contents": "a data frame with a joint cluster column and columns corresponding to TFs and their activity in each cluster.", - "script": "data_prep.R" - - }, - { - "file": "Forebrain_join.2D.tsv", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex/Forebrain_join.2D.tsv", - "description": "Cell-level information for forebrain data, including corresponding clusters and UMAP/tSNE/PCA coordinates, used to plot scatterplots", + "file": "joint_cortex_extended.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.metadata.tsv", + "description": "Cell-level information for extended forebrain dataset, including corresponding clusters and UMAP/tSNE/PCA coordinates, used to plot scatterplots", "contents": "Data frame with columns, 'Cell' 'Sample' 'Sample_cluster', followed by columns for embedding coordinates." - }, - { - "file": "Jessa2019_Table_2b_joint_cortex_metadata.tsv", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/metadata/Jessa2019_Table_2b_joint_cortex_metadata.tsv", - "description": "Joint_cluster-level information for forebrain data, including color palatte for each cluster, used to color scatter plots", - "contents": "Data frame with columns, 'Cluster_number' 'Color' 'Cell_type' 'Sample' etc., followed by summary info for each cluster." - }, { - "file": "joint_cortex.regulon_target_info.Rds", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex/joint_cortex.regulon_target_info.Rds", + "file": "joint_cortex_extended.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.regulon_target_info.Rds", "description": "Rds file containing data specific for the first tab, where the table is generated from this data file and used for plotting the cytoscape network to see correlations of different tfs with different/same genes related, note that the tf contai", "contents": "Rds data specific each transcription factor and its corresponding genes and activity data, weight, highconfAnnotation data, motifs." }, { - "file": "joint_cortex.regulon_activity_per_cluster.feather", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex/joint_cortex.regulon_activity_per_cluster.feather", - "description": "A feather file of tf activity with respect to each cluster that is used for reading by a certain column to optimize speed, used for plotting tab2 (clustering and heatmap) ", + "file": "joint_cortex_extended.regulon_activity_per_cluster.joint_extended.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.regulon_activity_per_cluster.joint_extended.feather", + "description": "A feather file of tf activity with respect to each cluster in the joint space", "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," }, { - "file": "joint_cortex.regulon_activity_per_cell.feather", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex/joint_cortex.regulon_activity_per_cell.feather", + "file": "joint_cortex_extended.regulon_activity_per_cluster.per_sample.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.regulon_activity_per_cluster.per_sample.feather", + "description": "A feather file of tf activity with respect to each cluster in the per sample space ", + "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," + + }, + { + + "file": "joint_cortex_extended.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.regulon_activity_per_cell.feather", "description": "A feather file of tf activity with respect to each cell that is used for reading by a certain column to optimize speed, used for plotting tab2(clustering and heatmap) ", "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," }, { - "file": "joint_cortex.active_regulons.Rds", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex/joint_cortex.active_regulons.Rds", + "file": "joint_cortex_extended.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_cortex_extended/joint_cortex_extended.active_regulons.Rds", "description": "Rds file that is read as tibble containing all tf names with suffix(extended and weight) in the dataset of the second/third tab, note that the tfs are not the same as the first tab data. This vector contains fewer tf than the data in the first tab", "contents": "A character vector(read as tibble later) containing all tf names with suffix(extended and weight) in the dataset of the second/third tab." }, + { + "file": "joint_cortex_extended.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/joint_cortex_extended.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + }, + { + "file": "dendrogram_order_joint_extended_forebrain.Rds", + "path": "howard.li/from_hydra/dendrograms/output/dendrogram_figures/dendrogram_order_joint_extended_forebrain.Rds", + "description": "vector of all clusters in the joint cortex extended dataset ordered in the order of the extended joint cortex dendrogram", + "contents": "A character vector of clusters" + }, { - "file": "joint_cortex.binaryRegulonActivity_nonDupl.Rds", - "path": "alexis.blanchetcohen/analyses/njabado/single_cell/mouse_and_human_development/regulon_analysis/scenic/samples/normal_brain/joint_cortex/cluster_indiv/int/4.2_binaryRegulonActivity_nonDupl.Rds", - "description": "Binary activity of every tf in each cell, used for plotting cortex timeseries, we also use the rownames to generate a dataframe that saves all the tf names", + "file": "4.2_binaryRegulonActivity_nonDupl_cortex_extended.Rds", + "path": "selin.jessa/from_hydra/atlas/2020-12_SCENIC_extended_dataset/results/joint_cortex_extended/int/4.2_binaryRegulonActivity_nonDupl.Rds", + "description": "Binary activity of every tf in each cell, used for plotting pons timeseries, we also use the rownames to generate a dataframe that saves all the tf names", "contents": "A giant matrix with columns of each cell and rows of tfs, with 1 or 0 denoting the presence of that tf in the cell." - }, - { - "file": "joint_cortex.active_specific_tf.Rds", - "path": "howard.li/from_hydra/active_and_specific/data/joint_cortex.active_specific_tf.Rds", - "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", - "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } - ], - - "joint_pons": [ - { - "file": "pons_prep.Rda", +"joint_pons_extended": [ + { + "file": "pons_extended_prep.Rda", "description": "Rda file containing datasets generated in data_prep.R, to be loaded at the beginning of app.R to optimize run-time speed and efficiency. ", - "contents": "Several R objects: pons_data, TF_and_ext, TF_active, metadata, tf_df, cell_metadata_pons,binary_activity data", + "contents": "Several R objects generated in the script for visualising the joint pons extended dataset in the app", "script": "data_prep.R" }, { - "file": "joint_pons.regulon_activity_per_joint_cluster.feather", - "description": "feather file containing average TF activity per joint cluster instead of per sample cluster in the pons.", - "contents": "a data frame with a joint cluster column and columns corresponding to TFs and their activity in each cluster.", - "script": "data_prep.R" - - }, - { - "file": "Jessa2019_Table_2c_joint_pons_metadata.tsv", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/metadata/Jessa2019_Table_2c_joint_pons_metadata.tsv", - "description": "Joint_cluster-level information for forebrain data, including color palatte for each cluster, used to color scatter plots", - "contents": "Data frame with columns, 'Cluster_number' 'Color' 'Cell_type' 'Sample' etc., followed by summary info for each cluster." - - }, - { - "file": "Pons_join.2D.tsv", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons/Pons_join.2D.tsv", - "description": "Cell-level information for pons data, including corresponding clusters and UMAP/tSNE/PCA coordinates, used to plot scatterplots", + "file": "joint_pons_extended.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.metadata.tsv", + "description": "Cell-level information for extended pons dataset, including corresponding clusters and UMAP/tSNE/PCA coordinates, used to plot scatterplots", "contents": "Data frame with columns, 'Cell' 'Sample' 'Sample_cluster', followed by columns for embedding coordinates." }, { - "file": "joint_pons.regulon_target_info.Rds", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons/joint_pons.regulon_target_info.Rds", + "file": "joint_pons_extended.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.regulon_target_info.Rds", "description": "Rds file containing data specific for the first tab, where the table is generated from this data file and used for plotting the cytoscape network to see correlations of different tfs with different/same genes related, note that the tf contai", "contents": "Rds data specific each transcription factor and its corresponding genes and activity data, weight, highconfAnnotation data, motifs." }, { - "file": "joint_pons.regulon_activity_per_cluster.feather", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons/joint_pons.regulon_activity_per_cluster.feather", - "description": "A feather file of tf activity with respect to each cluster that is used for reading by a certain column to optimize speed, used for plotting tab2 (clustering and heatmap) ", + "file": "joint_pons_extended.regulon_activity_per_cluster.joint_extended.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.regulon_activity_per_cluster.joint_extended.feather", + "description": "A feather file of tf activity with respect to each cluster in the joint space", + "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," + + }, + { + + "file": "joint_pons_extended.regulon_activity_per_cluster.per_sample.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.regulon_activity_per_cluster.per_sample.feather", + "description": "A feather file of tf activity with respect to each cluster in the per sample space ", "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," }, { - "file": "joint_pons.regulon_activity_per_cell.feather", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons/joint_pons.regulon_activity_per_cell.feather", + "file": "joint_pons_extended.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.regulon_activity_per_cell.feather", "description": "A feather file of tf activity with respect to each cell that is used for reading by a certain column to optimize speed, used for plotting tab2(clustering and heatmap) ", "contents": "A dataframe, the first column is cluster, subsequent columns correspond to active TF (TF_active) the values describe the NES(activity score of TF in each cluster)," }, { - "file": "joint_pons.active_regulons.Rds", - "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons/joint_pons.active_regulons.Rds", + "file": "joint_pons_extended.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/joint_pons_extended/joint_pons_extended.active_regulons.Rds", "description": "Rds file that is read as tibble containing all tf names with suffix(extended and weight) in the dataset of the second/third tab, note that the tfs are not the same as the first tab data. This vector contains fewer tf than the data in the first tab", "contents": "A character vector(read as tibble later) containing all tf names with suffix(extended and weight) in the dataset of the second/third tab." }, + { + "file": "joint_pons_extended.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/joint_pons_extended.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + }, + { + "file": "dendrogram_order_joint_extended_pons.Rds", + "path": "howard.li/from_hydra/dendrograms/output/dendrogram_figures/dendrogram_order_joint_extended_pons.Rds", + "description": "vector of all clusters in the joint cortex extended dataset ordered in the order of the extended joint cortex dendrogram", + "contents": "A character vector of clusters" + }, { - "file": "joint_pons.binaryRegulonActivity_nonDupl.Rds", - "path": "alexis.blanchetcohen/analyses/njabado/single_cell/mouse_and_human_development/regulon_analysis/scenic/samples/normal_brain/joint_pons/cluster_indiv/int/4.2_binaryRegulonActivity_nonDupl.Rds", + "file": "4.2_binaryRegulonActivity_nonDupl_pons_extended.Rds", + "path": "selin.jessa/from_hydra/atlas/2020-12_SCENIC_extended_dataset/results/joint_pons_extended/int/4.2_binaryRegulonActivity_nonDupl.Rds", "description": "Binary activity of every tf in each cell, used for plotting pons timeseries, we also use the rownames to generate a dataframe that saves all the tf names", "contents": "A giant matrix with columns of each cell and rows of tfs, with 1 or 0 denoting the presence of that tf in the cell." - }, + } + ], + + +"ct_e10": [ + { + "file": "ct_e10.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e10/ct_e10.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "ct_e10.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e10/ct_e10.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "ct_e10.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e10/ct_e10.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "ct_e10.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e10/ct_e10.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e10.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e10/ct_e10.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e10_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, { - "file": "joint_pons.active_specific_tf.Rds", - "path": "howard.li/from_hydra/active_and_specific/data/joint_pons.active_specific_tf.Rds", + "file": "ct_e10.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/ct_e10.active_specific_tf.Rds", "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } - - ], - + ], "ct_e12": [ { "file": "ct_e12.metadata.tsv", @@ -228,6 +297,56 @@ "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } ], +"ct_e13": [ + { + "file": "ct_e13.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e13/ct_e13.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "ct_e13.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e13/ct_e13.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "ct_e13.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e13/ct_e13.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "ct_e13.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e13/ct_e13.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e13.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e13/ct_e13.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e13_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "ct_e13.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/ct_e13.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], "ct_e15": [ { "file": "ct_e15.metadata.tsv", @@ -278,6 +397,106 @@ "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } ], +"ct_e16": [ + { + "file": "ct_e16.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e16/ct_e16.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "ct_e16.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e16/ct_e16.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "ct_e16.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e16/ct_e16.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "ct_e16.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e16/ct_e16.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e16.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e16/ct_e16.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e16_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "ct_e16.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/ct_e16.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], +"ct_e18": [ + { + "file": "ct_e18.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e18/ct_e18.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "ct_e18.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e18/ct_e18.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "ct_e18.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e18/ct_e18.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "ct_e18.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e18/ct_e18.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e18.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/ct_e18/ct_e18.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "ct_e18_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "ct_e18.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/ct_e18.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], "ct_p0": [ { "file": "ct_p0.metadata.tsv", @@ -428,6 +647,56 @@ "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } ], +"po_e10": [ + { + "file": "po_e10.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e10/po_e10.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "po_e10.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e10/po_e10.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "po_e10.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e10/po_e10.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "po_e10.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e10/po_e10.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e10.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e10/po_e10.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e10_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "po_e10.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/po_e10.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], "po_e12": [ { "file": "po_e12.metadata.tsv", @@ -478,6 +747,56 @@ "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } ], +"po_e13": [ + { + "file": "po_e13.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e13/po_e13.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "po_e13.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e13/po_e13.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "po_e13.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e13/po_e13.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "po_e13.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e13/po_e13.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e13.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e13/po_e13.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e13_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "po_e13.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/po_e13.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], "po_e15": [ { "file": "po_e15.metadata.tsv", @@ -528,6 +847,106 @@ "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" } ], +"po_e16": [ + { + "file": "po_e16.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e16/po_e16.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "po_e16.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e16/po_e16.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "po_e16.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e16/po_e16.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "po_e16.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e16/po_e16.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e16.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e16/po_e16.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e16_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "po_e16.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/po_e16.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], +"po_e18": [ + { + "file": "po_e18.metadata.tsv", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e18/po_e18.metadata.tsv", + "description": "Per cell metadata for this timepoint with cluster assignments and embedding coordinates", + "contents": "Dataframe with cell metadata. - Has multiple cluster assignments" + + }, + { + "file": "po_e18.active_regulons.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e18/po_e18.active_regulons.Rds", + "description": "Active TFs in this sample", + "contents": "Character list with TF names" + + }, + { + "file": "po_e18.regulon_target_info.Rds", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e18/po_e18.regulon_target_info.Rds", + "description": "Active TFs in this sample and their gene target information: number of motifs, GENIE3 weight, best motif, motif logo URL", + "contents": "data frame" + + }, + { + "file": "po_e18.regulon_activity_per_cell.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e18/po_e18.regulon_activity_per_cell.feather", + "description": "Activity of each TF in each cell in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e18.regulon_activity_per_cluster.feather", + "path": "selin.jessa/from_hydra/single_cell/scDev_data/data/po_e18/po_e18.regulon_activity_per_cluster.feather", + "description": "Activity of each TF in each cluster in this sample", + "contents": "A feather file containing a data frame" + + }, + { + "file": "po_e18_prep.Rda", + "description": "Colour palettes and cluster annotations used for heatmaps, GRN, info table, dr-plots", + "contents": "R list object containing 3 tibbles and one vector; see details in data_prep.R", + "script": "data_prep.R" + + }, + { + "file": "po_e18.active_specific_tf.Rds", + "path": "howard.li/from_hydra/active_and_specific/data/po_e18.active_specific_tf.Rds", + "description": "data calculating the activity and specificity of each TF in each cluster in the sample, used to plot plots in the active and specific tab of the app", + "contents": "A list containing: a matrix with clusters as observations and AUC of tfs as variables, a matrix with clusters as observations and fold change of the average AUC of a tf in a given cluster compared to all other clusters, a large list the same length as the number of clusters in the sample with each element as a tibble containing AUC, and FC for that cluster" + } + ], "po_p0": [ { "file": "po_p0.metadata.tsv", diff --git a/GRN/data/data_prep.R b/GRN/data/data_prep.R index b4be2c7..ee6efea 100644 --- a/GRN/data/data_prep.R +++ b/GRN/data/data_prep.R @@ -8,14 +8,60 @@ library(glue) library(ggplot2) source("../functions.R") -# ———————————————————————————————————color palette———————————————————————————————————————— -# make color palette -metadata <- read_tsv("shared/metadata_20190716.tsv") +# --------------------color palettes------------------------- + +message("@ color palettes") + +# make color palettes using the cluster label column and the colour label in the metadata data frame +# color palette for ggplot plots are named vectors with cluster colour as names and colour hex codes as values +# colour palette for pheatmap is a list of one element named "Cluster", this element is a named vector + +#different metadata files are loaded here because different SCENIC datasets (non-extended, extended, per-time point) +#have cluster labels generated at different dates + +#metadata <- read_tsv("shared/metadata_20190716.tsv") #for the non-extended dataset + +metadata_extended <- read_tsv("shared/metadata_20210710_with_qc.tsv") #for the extended dataset in the joint space + +#maps the per sample cluster label to a more broad cluster label +#F-e10_VRGC maps to RGC +#used in timeseries +lvl2_cluster_extended <- metadata_extended %>% select(Label, Level2_type) %>% deframe + +#even borader cluster mapping, for use in the heatmap +lvl1_cluster_extended <- metadata_extended %>% select(Label, Level1_type) %>% deframe + +metadata_per_sample <- read_tsv("shared/metadata_20201028_with_qc.tsv") #for the per-timepoint analyses + +#used in timeseries +lvl2_cluster_per_sample <- metadata_per_sample %>% select(Label, Level2_type) %>% deframe + +#even borader cluster mapping, for use in the heatmap +lvl1_cluster_per_sample <- metadata_per_sample %>% select(Label, Level1_type) %>% deframe + +cell_ontological_class_labels_from_lvl2 <- metadata_per_sample %>% filter(!is.na(Level2_type)) %>% + select(Level2_type, Cell_ontological_class) %>% deframe + +cell_ontological_class_labels_from_per_sample_cluster <- metadata_per_sample %>% + select(Label, Cell_ontological_class) %>% deframe + + +lvl2_cluster_labels <- c(lvl2_cluster_extended, lvl2_cluster_per_sample) + +lvl1_cluster_labels <- c(lvl1_cluster_extended, lvl1_cluster_per_sample) + +cell_onto_label <- c(cell_ontological_class_labels_from_per_sample_cluster, + cell_ontological_class_labels_from_lvl2) +#per_sample data colour palette +colour_palette_per_sample <- metadata_per_sample %>% select(Label, Colour) %>% deframe() +colour_palette_per_sample_space <- colour_palette_per_sample +names(colour_palette_per_sample_space) <- gsub("_", " ", names(colour_palette_per_sample)) # color palette for heatmap -colour_palette_cluster <- metadata %>% +colour_palette_cluster <- metadata_extended %>% + mutate(Label = gsub("_EXCLUDE", "", Label)) %>% # use gsub to change all contents in Cluster (cluster name format) - mutate(Cluster = gsub("_", " ", Cluster)) %>% + mutate(Cluster = gsub("_", " ", Label)) %>% # Get two columns select(Cluster, Colour) %>% # Convert to vector of colours, where the first column gives the names @@ -25,11 +71,10 @@ colour_palette_cluster <- metadata %>% colour_palette_cluster_underscore <- colour_palette_cluster names(colour_palette_cluster_underscore) <- gsub(" ", "_", names(colour_palette_cluster)) -all_tf_list <- scan("shared/Mus_musculus_TF_one_TF_per_line.txt", character()) # color palette for timeseries plot, tab3 -colour_palette <- metadata %>% - mutate(Cluster = gsub("_", " ", Cluster)) %>% +colour_palette <- metadata_extended %>% + mutate(Cluster = gsub("_", " ", Label)) %>% separate(Cluster, into = c("Prefix", "Cluster"), sep = " ") %>% # Get two columns select(Cluster, Colour) %>% @@ -46,133 +91,205 @@ hm_anno <- makePheatmapAnno(colour_palette_cluster, "Cluster") hm_anno_new <- makePheatmapAnno(colour_palette, "Cluster") # this is used in: annotation_colors = hm_anno_new$side_colors, in both heatmaps (by cluster/cells) +#------------------------tf_input------------------- +#this is list of the TF input into SCENIC, all active TF identified in each dataset is a subset of this +all_tf_list <- scan("shared/Mus_musculus_TF_one_TF_per_line.txt", character()) -# ———————————————————————————————————Cortex data———————————————————————————————————————— -forebrain_data <- read_tsv("joint_cortex/Forebrain_join.2D.tsv") %>% # for UMAP cluster - mutate(Sample_cluster = str_replace(Sample_cluster," ","_")) -# clean some samples with space in between ... - -TF_active <- as_tibble(read_rds("joint_cortex/joint_cortex.active_regulons.Rds")) - -# These datasets describe TF and genes that are target of TFs, don't have ext suffix -TF_target_gene <- as_tibble(read_rds("joint_cortex/joint_cortex.regulon_target_info.Rds")) %>% +message("@ extended metadata") + +# -----------------------joint_cortex_extended----------------------------------- +#joint_cortex_extended metadata containing DR coordinates +forebrain_data_extended <- read_tsv("joint_cortex_extended/joint_cortex_extended.metadata.tsv", + col_types = cols(.default = col_character(), + Joint_extended_PC_1 = col_double(), + Joint_extended_PC_2 = col_double(), + Joint_extended_tSNE_1 = col_double(), + Joint_extended_tSNE_2 = col_double(), + Joint_extended_UMAP_1 = col_double(), + Joint_extended_UMAP_2 = col_double())) %>% + transmute("Cell" = Cell_clean, "Sample" = Sample, "Sample_cluster" = ID_20210710_with_exclude, + "Joint_cluster_number" = Joint_extended_cluster, "Joint_cluster" = ID_20210710_joint_clustering, + "PC1" = Joint_extended_PC_1, "PC2" = Joint_extended_PC_2, + "tSNE_1" = Joint_extended_tSNE_1, "tSNE_2" = Joint_extended_tSNE_2, + "UMAP1" = Joint_extended_UMAP_1, "UMAP2" = Joint_extended_UMAP_2) #renames different columns to be more clear + +#TFs that are active in this dataset +TF_active_cortex_extended <- as_tibble(read_rds("joint_cortex_extended/joint_cortex_extended.active_regulons.Rds")) + +#Dataframe containing TF-target gene pair in each row with info from the GRNBoost portion of SCENIC +#Use: GRN tab, Gene target info Table Tab +TF_target_gene_joint_cortex_extended <- as_tibble(read_rds( + "joint_cortex_extended/joint_cortex_extended.regulon_target_info.Rds")) %>% select(-logo) -unique_TF <- unique(TF_target_gene[["TF"]]) - -#reads metadata file for color palette of clustering by region -forebrain_cluster_palette <- read_tsv("joint_cortex/Jessa2019_Table_2b_joint_cortex_metadata.tsv") -forebrain_cluster_palette <- forebrain_cluster_palette %>% select(Cluster, Colour) %>% deframe() - -TF_and_ext <- identify_tf(TF_active) - -timeseries_input_meta_cortex <- create_metadata_timeseries(forebrain_data, "cortex") - -# metadata specific for each cell, corresponding to the activity data -#cell_metadata_cortex_prep <- read_tsv("joint_cortex/joint_cortex.metadata.tsv") +unique_TF_cortex_extended <- unique(TF_target_gene_joint_cortex_extended[["TF"]]) -#cell_metadata_cortex_test <- create_cell_metadata_cortex(forebrain_data) +#Converts between TF's gene symbol and the name of its regulon produced by SCENIC +#regulon name format: {gene symbol}_{extended} ({number of genes in the regulon}) +#extended regulon indicates if low confidence regulatory relationships are included in the regulon +# ex: Gene symbol: Arx Regulon name: Arx_extended (21g) +TF_and_ext_cortex_extended <- identify_tf(TF_active_cortex_extended) -#cell_metadata_cortex <- create_cell_metadata(cell_metadata_cortex_prep) -# activity for cortex timeseries graph data -binary_activity <- readRDS("joint_cortex/joint_cortex.binaryRegulonActivity_nonDupl.Rds") -tf_df <- as_tibble(rownames(binary_activity)) #a dataframe that contains all the tf with -# best representation of its identity in the binary_activity dataset +# binarized activity matrix for time-series ribbon plot +binary_activity_cortex_extended <- readRDS("joint_cortex_extended/4.2_binaryRegulonActivity_nonDupl_cortex_extended.Rds") +colnames(binary_activity_cortex_extended) <- gsub("_.","", colnames(binary_activity_cortex_extended)) +tf_df_cortex_extended <- as_tibble(rownames(binary_activity_cortex_extended)) -l <- c() -l_nexist_cortex <- c() -for (tf in unique_TF){ - tf_after <- translate_tf(tf, tf_df) - if(tf_after !=FALSE ){ - l <- c(l, tf) - } - else{l_nexist_cortex<- c(l_nexist_cortex,tf)} -} +#metadata file for time-series ribbon plot +timeseries_input_meta_cortex_extended <- create_metadata_timeseries(forebrain_data_extended, "cortex", lvl2_cluster_labels) -# ----------------------------------Pons data------------------------------------------------------- -pons_data <- read_tsv("joint_pons/Pons_join.2D.tsv") # for UMAP cluster - -TF_active_pon <- as_tibble(read_rds("joint_pons/joint_pons.active_regulons.Rds")) +data_cortex_extended <- list( + "cell_metadata" = forebrain_data_extended, + "TF_and_ext" = TF_and_ext_cortex_extended, + "TF_target_gene_info" = TF_target_gene_joint_cortex_extended, + "unique_active_TFs_bare" = unique_TF_cortex_extended, + "active_TFs" = TF_active_cortex_extended, + "timeseries_input_meta" = timeseries_input_meta_cortex_extended, + "binary_active_TFs" = tf_df_cortex_extended, + "binary_activity" = binary_activity_cortex_extended + +) -# These datasets describe TF and genes that are target of TFs, don't have ext suffix -TF_target_gene_pon <- as_tibble(read_rds("joint_pons/joint_pons.regulon_target_info.Rds")) %>% +save(data_cortex_extended, file = "joint_cortex_extended/cortex_extended_prep.Rda") + +# -----------------------joint_pons_extended----------------------------------- +#every element identical to the cortex_extended data +pons_data_extended <- read_tsv("joint_pons_extended/joint_pons_extended.metadata.tsv", + col_types = cols(.default = col_character(), + Joint_extended_PC_1 = col_double(), + Joint_extended_PC_2 = col_double(), + Joint_extended_tSNE_1 = col_double(), + Joint_extended_tSNE_2 = col_double(), + Joint_extended_UMAP_1 = col_double(), + Joint_extended_UMAP_2 = col_double())) %>% + transmute("Cell" = Cell_clean, "Sample" = Sample, "Sample_cluster" = ID_20210710_with_exclude, + "Joint_cluster_number" = Joint_extended_cluster, "Joint_cluster" = ID_20210710_joint_clustering, + "PC1" = Joint_extended_PC_1, "PC2" = Joint_extended_PC_2, + "tSNE_1" = Joint_extended_tSNE_1, "tSNE_2" = Joint_extended_tSNE_2, + "UMAP1" = Joint_extended_UMAP_1, "UMAP2" = Joint_extended_UMAP_2) + +TF_active_pons_extended <- as_tibble(read_rds("joint_pons_extended/joint_pons_extended.active_regulons.Rds")) + +TF_target_gene_joint_pons_extended <- as_tibble(read_rds( + "joint_pons_extended/joint_pons_extended.regulon_target_info.Rds")) %>% select(-logo) -unique_TF_pon <- unique(TF_target_gene_pon[["TF"]]) - -#reads metadata file for color palette of clustering by region -pons_cluster_palette <- read_tsv("joint_pons/Jessa2019_Table_2c_joint_pons_metadata.tsv") -pons_cluster_palette <- pons_cluster_palette %>% select(Cluster, Colour) %>% deframe() +unique_TF_pons_extended <- unique(TF_target_gene_joint_pons_extended[["TF"]]) -TF_and_ext_pon <- identify_tf(TF_active_pon) +TF_and_ext_pons_extended <- identify_tf(TF_active_pons_extended) -timeseries_input_meta_pons <- create_metadata_timeseries(pons_data,"pons") %>% - filter(Cell != "___po_e12_TACGGGCGTCAAGCGA") -# filter out the extra cell -# remove the extra line to make the number of cells the same as the binary activity pon data -# to correctly make the timeseires ribbon plot +timeseries_input_meta_pons_extended <- create_metadata_timeseries(pons_data_extended, "pons", lvl2_cluster_labels) -# activity for cortex timeseries graph data -binary_activity_pon <- readRDS("joint_pons/joint_pons.binaryRegulonActivity_nonDupl.Rds") -tf_df_pon <- as_tibble(rownames(binary_activity_pon)) #a dataframe that contains all the tf with -# best representation of its identity in the binary_activity dataset - -l <- c() -l_nexist_pons <- c() -for (tf in unique_TF_pon){ - tf_after <- translate_tf(tf, tf_df_pon) - if(tf_after !=FALSE ){ - l <- c(l, tf) - } - else{l_nexist_pons<- c(l_nexist_pons,tf)} -} +binary_activity_pons_extended <- readRDS("joint_pons_extended/4.2_binaryRegulonActivity_nonDupl_pons_extended.Rds") +colnames(binary_activity_pons_extended) <- gsub("_.","", colnames(binary_activity_pons_extended)) +tf_df_pons_extended <- as_tibble(rownames(binary_activity_pons_extended)) -# make two lists containing same name (will be assigned to a reactive list), -# then we can use the same name to code -data_cortex <- list( - "cell_metadata" = forebrain_data, - "TF_and_ext" = TF_and_ext, - "TF_target_gene_info" = TF_target_gene, - "unique_active_TFs_bare" = unique_TF, - "active_TFs" = TF_active, - "binary_active_TFs" = tf_df, - "timeseries_input_meta" = timeseries_input_meta_cortex, - "binary_activity" = binary_activity, - "tfs_not_exist_timeseries" = l_nexist_cortex, - "cluster_palette" = forebrain_cluster_palette +data_pons_extended <- list( + "cell_metadata" = pons_data_extended, + "TF_and_ext" = TF_and_ext_pons_extended, + "TF_target_gene_info" = TF_target_gene_joint_pons_extended, + "unique_active_TFs_bare" = unique_TF_pons_extended, + "active_TFs" = TF_active_pons_extended, + "timeseries_input_meta" = timeseries_input_meta_pons_extended, + "binary_active_TFs" = tf_df_pons_extended, + "binary_activity" = binary_activity_pons_extended + ) - -data_pons <- list( - "cell_metadata" = pons_data, - "TF_and_ext" = TF_and_ext_pon, - "TF_target_gene_info" = TF_target_gene_pon, - "unique_active_TFs_bare" = unique_TF_pon, - "active_TFs" = TF_active_pon, - "binary_active_TFs" = tf_df_pon, - "timeseries_input_meta" = timeseries_input_meta_pons, - "binary_activity" = binary_activity_pon, - "tfs_not_exist_timeseries" = l_nexist_pons, - "cluster_palette" = pons_cluster_palette +save(data_pons_extended, file = "joint_pons_extended/pons_extended_prep.Rda") + +message("@ master color palettes") + +#------------------------master color palette---------------------------------- +#When I was adding various different data sets generated at different times with different formats, +#colour palettes were super annoying. +#The master palette contains colours for all cluster labels regardless of when the cluster labels were assigned or +#if the labels have underscores or not +#Used in all ggplots in the app + +#Note: all cluster labels should have the same format now, but will keep using this in case I missed something + +extended_mouse_joint_cluster_palette <- readRDS("shared/palette_ID_20210710_joint_clustering.Rds") + +#palette for the level2 cluster labels from metadata_extended +palette_broad_clusters <- c("RGC" = "#ffcc00", + "Telencephalic progenitors" = "#FFE5AF", + "Inhibitory neurons" = "#135ca0", + "Neuronal IPC" = "#f77750", + "Excitatory neurons" = "#840200", + "Meninges" = "#dbd2d7", + "Unresolved" = "gray50", + "Myeloid" = "gray50", + "Endothelial" = "#636363", + "Thalamic precursors" = "gray50", + "Other neurons" = "#c18ba0", + "Cortical hem" = "#FFE5AF", + "Thalamic neurons" = "#805f91", + "Pericytes" = "#857e89", + "Microglia" = "#aca2b2", + "Gliogenic progenitors" = "#d5d98b", + "Astrocytes" = "#00a385", + "Choroid plexus" = "#1ba02a", + "Oligodendrocytes" = "#d5d98b", + "Macrophages" = "#86778e", + "Ependymal" = "#8ee5cf", + "Neurons" = "#ff9385", + "Schwann cells" = "#4b6d34", + "Vascular smooth muscle" = "#665f5f", + "Hindbrain progenitors" = "#ffe5af", + "Vascular leptomeningeal" = "#ceb9c5", + "Mixed progenitors" = "#ffe500" + ) +lvl1_cluster_palette <- c("Progenitors" = "#ffbda3", + "Neurons" = "#135ca0", + "Leptomeningeal" = "#ceb9c5", + "Unresolved" = "gray50", + "Blood" = "gray90", + "Endothelial" = "#636363", + "Vascular" = "#665f5f", + "Immune" = "#86778e", + "Glia" = "#00a385") + +cell_ontological_class_palette <- c("RGC" = "#ffcc00", + "Glial progenitors" = "#d5d98b", + "OPC" = "#e0de53", + "Proliferating OPC" = "#e6f957", + "Oligodendrocytes" = "#b4e04e", + "Astrocytes" = "#00a385", + "Ependymal" = "#8ee5cf", + "Neuronal progenitors" = "#ffbda3", + "Neurons" = "#135ca0", + "Immune" = "gray50", + "Vascular & other" = "gray70", + "Normal" = "gray90") master_palette <- c(hm_anno_new$side_colors$Cluster, # per-timepoint cluster with timepoint removed colour_palette_cluster, # per-timepoint cluster, with spaces - colour_palette_cluster_underscore, # per-timepoint cluster, with underscores - forebrain_cluster_palette, # joint clustering, forebrain - pons_cluster_palette) # joint clustering, pons -master_palette <- list("Cluster" = master_palette) + colour_palette_cluster_underscore, # per-timepoint cluster, with underscores + #forebrain_cluster_palette, # joint clustering, forebrain + #pons_cluster_palette, # joint clustering, pons + extended_mouse_joint_cluster_palette, + colour_palette_per_sample_space, + colour_palette_per_sample) + +master_palette <- list("Cluster" = master_palette, + "Broad Cluster" = palette_broad_clusters, + "Broader Cluster" = lvl1_cluster_palette, + "Cell Ontology Class" = cell_ontological_class_palette) + +message("@ time point data") #---------------------time_point data---------------------------------------------- -#use a loop for this +#Data processing for each time point is the same for (reg in c("ct", "po")){ - for(tp in c("e12", "e15", "p0", "p3", "p6")){ + for(tp in c("e10", "e12", "e13", "e15", "e16", "e18", "p0", "p3", "p6")){ TF_target_gene_info <- as_tibble(read_rds(glue("{reg}_{tp}/{reg}_{tp}.regulon_target_info.Rds"))) %>% select(-logo) @@ -181,129 +298,81 @@ for (reg in c("ct", "po")){ TF_active <- as_tibble(read_rds(glue("{reg}_{tp}/{reg}_{tp}.active_regulons.Rds"))) TF_and_ext <- identify_tf(TF_active) + #metadata with DR coords cell_data <- read_tsv(glue("{reg}_{tp}/{reg}_{tp}.metadata.tsv")) - black_list_cells <- cell_data %>% select(Cell, ID_20190715_with_blacklist) %>% - filter(grepl("BLACKLISTED", ID_20190715_with_blacklist)) %>% select(Cell) %>% + #indicates cells that should not be included in plots because they belong to a blacklisted cluster + #Used to filter the TF activity per cell feather in the create_activity_data function + black_list_cells <- cell_data %>% select(cell, ID_20201028_with_exclude) %>% + filter(grepl("EXCLUDE", ID_20201028_with_exclude)) %>% select(cell) %>% deframe() - #print(black_list_cells) - - cell_data <- cell_data %>% filter(!grepl("BLACKLISTED", ID_20190715_with_blacklist)) + cell_data <- cell_data %>% filter(!grepl("EXCLUDE", ID_20201028_with_exclude)) - awoo <- switch(reg, "ct" = "F-", "po" = "P-") - - dr_palette <- metadata %>% - separate(Cluster, into = c("Timepoint", "Cluster"), sep = "_") %>% - filter(Timepoint == glue("{awoo}{tp}")) %>% - unite(col = "Cluster", c("Timepoint", "Cluster"), sep = "_") %>% - select(Cluster, Colour) %>% - deframe() - x <- list( "TF_target_gene_info" = TF_target_gene_info, "TF_and_ext" = TF_and_ext, "cell_metadata" = cell_data, "bad_cells" = black_list_cells, - "cluster_palette" = dr_palette, "unique_TF" = unique_TF ) - #print(x) saveRDS(x, file = glue("{reg}_{tp}/{reg}_{tp}_prep.Rds")) } } -#-----------------------------forebrain joint cluster regulon activity data for heatmap------------ -forebrain_regulon_activity_data <- - read_feather("joint_cortex/joint_cortex.regulon_activity_per_cell.feather") - -forebrain_joint_cluster_info <- forebrain_data %>% select(Cell, Joint_cluster) - -forebrain_cluster_regulon_data <- - inner_join(forebrain_regulon_activity_data, forebrain_joint_cluster_info, by = "Cell") - -forebrain_cluster_regulon_data <- forebrain_cluster_regulon_data %>% group_by(Joint_cluster) %>% - summarize_if(is.numeric, mean) - -write_feather(forebrain_cluster_regulon_data, - path = "joint_cortex/joint_cortex.regulon_activity_per_joint_cluster.feather") - -#-----------------------------pons joint cluster regulon activity data for heatmap------------ -pons_regulon_activity_data <- - read_feather("joint_pons/joint_pons.regulon_activity_per_cell.feather") - -pons_joint_cluster_info <- pons_data %>% select(Cell, Joint_cluster) - -pons_cluster_regulon_data <- - inner_join(pons_regulon_activity_data, pons_joint_cluster_info, by = "Cell") - -pons_cluster_regulon_data <- pons_cluster_regulon_data %>% group_by(Joint_cluster) %>% - summarize_if(is.numeric, mean) - -write_feather(pons_cluster_regulon_data, - path = "joint_pons/joint_pons.regulon_activity_per_joint_cluster.feather") -# ---------------------------cortex data----------------------------- -save(data_cortex, file = "joint_cortex/cortex_prep.Rda") - - -# -----------------------------pons data----------------------------- -save(data_pons, file = "joint_pons/pons_prep.Rda") - +message("@ shared") # -----------------------------shared data----------------------------- save(colour_palette_cluster, - hm_anno, hm_anno_new, colour_palette, all_tf_list, master_palette, file = "shared/common_prep.Rda") + hm_anno, hm_anno_new, colour_palette, all_tf_list, + master_palette, palette_broad_clusters, + lvl1_cluster_labels, lvl2_cluster_labels, cell_onto_label, + file = "shared/common_prep.Rda") + +message("@ ribbon") #-----------------cell proportion over time ribbon plot-------------------- -forebrain_fraction <- forebrain_data %>% select(Cell, Sample, Sample_cluster) %>% - filter(!grepl("BLACKLIST", Sample_cluster)) %>% +#calculates the fraction of cells of the total that belong to each cluster +forebrain_fraction <- forebrain_data_extended %>% select(Cell, Sample, Sample_cluster) %>% + filter(!grepl("EXCLUDE", Sample_cluster)) + +forebrain_fraction <- forebrain_fraction %>% + mutate(broad_cluster = recode(forebrain_fraction$Sample_cluster, !!!lvl2_cluster_labels)) %>% separate(Sample_cluster, into = c("tp", "Cluster"), sep = "_") %>% group_by(Sample) %>% mutate (total_in_tp = n()) %>% - ungroup() %>% group_by(Sample, Cluster) %>% + ungroup() %>% group_by(Sample, broad_cluster) %>% mutate(frac = n()/total_in_tp) %>% - ungroup() %>% group_by(tp) + ungroup() %>% group_by(Sample) + -forebrain_clusters <- forebrain_fraction$Cluster %>% unique +forebrain_clusters <- forebrain_fraction$broad_cluster %>% unique -unique_forebrain_fraction <- forebrain_fraction %>% select(-Cell) %>% - distinct() %>% select(-Sample, -total_in_tp) +#removes duplicate rows such that each row corresponds to data for one cluster at one timepoint +unique_forebrain_fraction <- forebrain_fraction %>% select(-Cell, -Cluster) %>% + distinct() %>% select(-total_in_tp, -tp) -tp <- unique_forebrain_fraction$tp %>% unique() +#not all clusters appear in all timepoints, each timepoint needs to have a complete set of clusters for ribbon plot to look right +#adds any missing clusters in each timepoint and sets its fraction value to 0 unique_forebrain_fraction_complete <- unique_forebrain_fraction %>% - mutate(Cluster = factor(Cluster, levels = unique(.$Cluster))) %>% - complete(Cluster, nesting(tp), fill = list(frac = 0)) - -# for (i in tp){ -# tp_clusters <- unique_forebrain_fraction %>% ungroup() %>% -# filter(tp == i) %>% pull(Cluster) - #print(tp_clusters) -# clust_not_in <- forebrain_clusters[!(forebrain_clusters %in% tp_clusters)] - #print(clust_not_in) -# to_add <- tibble(tp = rep(i, length(clust_not_in)), Cluster = clust_not_in, frac = rep(0, length(clust_not_in))) - # for(j in clust_not_in){ - # row <- tibble(tp = i, Cluster = j, frac = 0) - # } -# print(to_add) - - # unique_forebrain_fraction2 <- rbind(unique_forebrain_fraction, to_add) -# } - -# unique_forebrain_fraction_complete %>% ungroup() %>% group_by(tp) + mutate(broad_cluster = factor(broad_cluster, levels = unique(.$broad_cluster))) %>% + complete(broad_cluster, nesting(Sample), fill = list(frac = 0)) + + unique_forebrain_fraction_complete$xpos = group_indices(unique_forebrain_fraction_complete) forebrain_plot <- unique_forebrain_fraction_complete %>% - ggplot(aes(x = xpos, y = frac, fill = Cluster)) + + ggplot(aes(x = xpos, y = frac, fill = broad_cluster)) + geom_area(stat = "identity", show.legend = FALSE) + - scale_fill_manual(values = colour_palette, drop = FALSE, name = "") + - scale_x_continuous(breaks = c(1,2,3,4,5), - labels = c("E12.5", "E15.5", "P0", "P3", "P6"), - limits = c(1, 5)) + + scale_fill_manual(values = palette_broad_clusters, drop = FALSE, name = "") + + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9), + labels = c("E10.5", "E12.5", "E13.5", "E15.5", "E16.5", "E18.5", "P0", "P3", "P6"), + limits = c(1, 9)) + labs(x = "Developmental Age", y = "Proportion") + guides(fill = guide_legend(ncol = 5)) + theme_min() + @@ -311,52 +380,42 @@ forebrain_plot <- unique_forebrain_fraction_complete %>% #same for pons -pons_fraction <- pons_data %>% select(Cell, Sample, Sample_cluster) %>% - filter(!grepl("BLACKLIST", Sample_cluster)) %>% + +pons_fraction <- pons_data_extended %>% select(Cell, Sample, Sample_cluster) %>% + filter(!grepl("EXCLUDE", Sample_cluster)) + +# for pons metadata, some cells are labelled E15.5 for sample, and E12.5 for the cluster labels +# making the assumption here that the sample label is right and the cluster label is wrong +# in the code: group_by(Sample) and doing the total cell and proportion calculations based on assumption that +# the sample label is correct. All these confused cells are put with the E15.5 cells. +pons_fraction <- pons_fraction %>% + mutate(broad_cluster = recode(pons_fraction$Sample_cluster, !!!lvl2_cluster_labels)) %>% separate(Sample_cluster, into = c("tp", "Cluster"), sep = "_") %>% - group_by(tp) %>% mutate (total_in_tp = n()) %>% - ungroup() %>% group_by(Sample, Cluster) %>% + group_by(Sample) %>% mutate (total_in_tp = n()) %>% + ungroup() %>% group_by(Sample, broad_cluster) %>% mutate(frac = n()/total_in_tp, number = n()) %>% - ungroup() %>% group_by(tp) + ungroup() %>% group_by(Sample) -pons_clusters <- pons_fraction$Cluster %>% unique -unique_pons_fraction <- pons_fraction %>% select(-Cell) %>% - distinct() %>% select(-Sample, -total_in_tp) +unique_pons_fraction <- pons_fraction %>% select(-Cell, -Cluster) %>% + distinct() %>% select(-total_in_tp, -tp) -test <- unique_pons_fraction %>% group_by(tp) %>% summarize(sum(frac)) - -tp <- unique_pons_fraction$tp %>% unique() unique_pons_fraction_complete <- unique_pons_fraction %>% - mutate(Cluster = factor(Cluster, levels = unique(.$Cluster))) %>% - complete(Cluster, nesting(tp), fill = list(frac = 0)) - - -# for (i in tp){ -# tp_clusters <- unique_pons_fraction %>% ungroup() %>% -# filter(tp == i) %>% pull(Cluster) - #print(tp_clusters) -# clust_not_in <- pons_clusters[!(pons_clusters %in% tp_clusters)] - #print(clust_not_in) -# to_add <- tibble(tp = rep(i, length(clust_not_in)), Cluster = clust_not_in, frac = rep(0, length(clust_not_in))) - # for(j in clust_not_in){ - # row <- tibble(tp = i, Cluster = j, frac = 0) - # } -# unique_pons_fraction <- rbind(unique_pons_fraction, to_add) -#} - -# unique_pons_fraction %>% ungroup() %>% group_by(tp) + mutate(broad_cluster = factor(broad_cluster, levels = unique(.$broad_cluster))) %>% + complete(broad_cluster, nesting(Sample), fill = list(frac = 0)) + + unique_pons_fraction_complete$xpos = group_indices(unique_pons_fraction_complete) pons_plot <- unique_pons_fraction_complete %>% - ggplot(aes(x = xpos, y = frac, fill = Cluster)) + + ggplot(aes(x = xpos, y = frac, fill = broad_cluster)) + geom_area(stat = "identity", show.legend = FALSE) + - scale_fill_manual(values = colour_palette, drop = FALSE, name = "") + - scale_x_continuous(breaks = c(1,2,3,4,5), - labels = c("E12.5", "E15.5", "P0", "P3", "P6"), - limits = c(1, 5)) + + scale_fill_manual(values = palette_broad_clusters, drop = FALSE, name = "") + + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9), + labels = c("E10.5", "E12.5", "E13.5", "E15.5", "E16.5", "E18.5", "P0", "P3", "P6"), + limits = c(1, 9)) + labs(x = "Developmental Age", y = "Proportion") + guides(fill = guide_legend(ncol = 5)) + theme_min() + @@ -369,5 +428,5 @@ timeseries_proportion_plots <- list( save(timeseries_proportion_plots, file = "shared/timeseries_proportion_plots.Rda") - +message("@ done.") diff --git a/GRN/functions.R b/GRN/functions.R index a04c2ec..0b0e0ab 100644 --- a/GRN/functions.R +++ b/GRN/functions.R @@ -1,4 +1,5 @@ ##----------------------------ggplot style--------------------------------------------- +#theme for plots, adopted from Selin in the Clusters App theme_min <- function(base_size = 11, base_family = "", border_colour = "black", border_size = 1) { @@ -28,8 +29,10 @@ theme_min <- function(base_size = 11, base_family = "", #----------------------------TF information table--------------------------------------------- -#function to add a column to data-table containing the HTML code necessary to display the motif logo -addMotifPic <- function(subset_data){ #need to comment this and test to see if it works when I have wifi +#function to add a column to data-table containing the HTML necessary to display the binding motif logo +#for use in the Table tab +#' @param subset_data dataframe containing user selected TFs and its predicted regulatory relationships +addMotifPic <- function(subset_data){ subset_data <- mutate(subset_data, motif_logo = bestMotif) for (i in 1:nrow(subset_data)){ @@ -41,182 +44,24 @@ addMotifPic <- function(subset_data){ #need to comment this and test to see if i return(subset_data) } -#------------------------------Bubble plot-------------------------------------- -#' Prepare input for bubble_plot -#' -#' Load gene expression data from feather, tidy & optionally scale expression, -#' and return the dataframe required as input for the bubble_plot() function -#' -#' @param gene Character vector, one or more genes of interest to plot -#' @param scale Logical, whether or not to linearly scale gene expression across -#' clusters to [0,1] to improve visualization. Default: TRUE -#' @param show_mean Logical, whether or not to display the mean expression of -#' given genes in a new bubble plot line. Default: FALSE -#' TODO: Use this to provide an option to download the underlying data. -#' -#' @examples -#' bubble_prep("Dlx1") -#' bubble_prep <- function(tf, -#' scale = TRUE, -#' show_mean = FALSE, -#' region) { -#' -#' # Load the mean expression of genes across clusters, given gene of interest -#' file_path <- glue("data/joint_{region}/joint_cortex.regulon_activity_per_cluster.feather") -#' exp <- read_feather(path = file_path, -#' columns = c("Cluster", tf)) #%>% -#' #dendogram_order from hydra file, commented out for now because no dendogram -#' #filter(Cluster %in% dendrogram_order) -#' -#' # Scale expression of each gene linearly across clusters to [0, 1] -#' if (scale) { -#' -#' exp <- exp %>% -#' as.data.frame() %>% -#' tibble::column_to_rownames(var = "Cluster") %>% -#' apply(2, scales::rescale, to = c(0, 1)) %>% -#' as.data.frame %>% -#' tibble::rownames_to_column(var = "Cluster") -#' -#' } -#' -#' # Convert to long / tidy format with columns: Cluster, Gene, Expression -#' exp <- exp %>% -#' gather(., "TF", "Activity", 2:ncol(.)) -#' -#' # Probably don't need this in the TF plot -#' # # Load the proportion of cells in each cluster in which each gene was detected, -#' # # and convert to long / tidy format with columns: Cluster, Gene, Pct1 -#' # pct1 <- read_feather("data/joint_mouse/pct1_per_ID_20190715_cluster.feather", -#' # columns = c("Cluster", gene)) %>% -#' # gather(., "Gene", "Pct1", 2:ncol(.)) -#' -#' # # Join with cluster metadata -#' # df <- left_join(exp, pct1, by = c("Cluster", "Gene")) %>% -#' # left_join(metadata, by = c("Cluster" = "Cluster_nounderscore")) -#' -#' # Tidy data for plotting -#' df <- df %>% -#' -#' # Order genes to match order input by user -#' mutate(TF = factor(TF, levels = rev(TF))) %>% -#' arrange(TF) %>% -#' -#' # Pad gene names so that the plot takes up a more standardized -#' # width; to roughly the the # of characters in the gene w/ longest name -#' # However, letters take up more pixels than spaces, so do less padding -#' # for genes with longer names -#' # TODO: Test the (commented) third line inside mutate() and adjust padding as required -#' mutate(TF_padded = case_when( -#' str_length(TF) <= 5 ~ str_pad(TF, 15, side = 'right', pad = " "), -#' between(str_length(TF), 5, 8) ~ str_pad(TF, 12, side = 'right', pad = " ") -#' #, str_length(Gene) > 8 ~ str_pad(Gene, 9, side = 'right', pad = " ") -#' ) -#' ) %>% -#' mutate(TF_padded = factor(TF_padded, levels = unique(.$TF_padded))) %>% -#' -#' # Order the clusters on the x-axis to match the dendrogram image -#' mutate(Cluster = factor(Cluster, levels = dendrogram_order)) %>% -#' -#' filter(!is.na(Cluster)) %>% -#' -#' # Convert NAs (undetected genes) to 0s -- this ensures all -#' # clusters have a value for all genes, so that all clusters are plot, -#' # even if the gene was undetected -#' replace_na(list(Expression = 0, Pct1 = 0)) %>% -#' -#' # Keep columns -#' select(Gene, Cluster, Sample, Cell_type, Cell_class, N_cells, Expression, Pct1, Sample, Colour, Gene_padded) -#' -#' # Create & append set of rows containing mean expression over all selected genes -#' if(show_mean) { -#' -#' # Create mean expression rows, preserving information for tooltip -#' mean_exp <- df %>% -#' group_by(Cluster, Sample, Cell_type, N_cells, Cell_class, Colour) %>% -#' summarize(#Gene = "MEAN", -#' #Cluster = Cluster, -#' #Sample = Sample, -#' #Cell_type = Cell_type, -#' #Cell_class = Cell_class, -#' #N_cells = N_cells, -#' Expression = mean(Expression) -#' #Pct1 = mean(Pct1), -#' #Colour = Colour, -#' #Gene_padded = "MEAN" -#' ) %>% -#' # Remove the Pct1 value from the mean expression -#' # and label the mean expression -#' mutate(Pct1 = 1, Gene = "MEAN", Gene_padded = "MEAN") -#' -#' # Add the rows containing mean expression to the original dataframe, -#' # removing duplicate rows and ordering them once more by user input, -#' # except the mean which is placed at the bottom -#' gene_order_padded <- levels(df$Gene_padded) -#' df <- bind_rows(df, mean_exp) %>% -#' distinct(.) %>% -#' mutate(Gene_padded = factor(Gene_padded, levels = c("MEAN", gene_order_padded))) -#' -#' } -#' -#' return(df) -#' -#' } -#' -#' -#' #' Bubbleplot of gene expression -#' #' -#' #' Generate a bubble plot for genes of interest across clusters in the mouse -#' #' dendrogram, where bubble colour encodes the mean expression in each cluster -#' #' and bubble size encodes the proportion of cells where each gene is detected -#' #' -#' #' @param df Data frame as returned by bubble_prep(), with require columns Cluster, -#' #' Gene_padded, Pct1, and Expression -#' #' -#' #' @return ggplot2 object -#' #' -#' #' @examples -#' #' bubble_prep("Dlx1") %>% bubbleplot() -#' #' -#' #' @export -#' bubble_plot <- function(df, max_point_size) { -#' -#' # Generate plot -#' p1 <- df %>% -#' ggplot(aes(x = Cluster, y = Gene_padded)) + -#' geom_point(aes(size = Pct1, colour = Expression), alpha = 0.8) + -#' scale_size_area(max_size = max_point_size) + -#' scale_color_gradientn(colours = tail(rdbu, 70)) + -#' theme_min() + -#' ylab(NULL) + -#' theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, -#' colour = joint_mouse_palette, size = rel(0.7)), -#' panel.grid.major.x = element_line(colour = "grey90"), -#' panel.border = element_blank(), -#' axis.ticks.x = element_blank(), -#' axis.ticks.y = element_blank(), -#' # Do not show the legend because it is included in the static -#' # dendrogram image displayed above the bubbleplot -#' legend.position = "bottom") + -#' # Put gene labels on the right hand side to improve alignment -#' scale_y_discrete(position = "right") -#' -#' return(p1) -#' -#' } + #----------------------------ggNet visualisation--------------------------------------------- #function to create an igraph object #' @param tf the user input vector of transcription factors #' @param tf_target_gene_info a data frame containing information for each association #' between a TF and a target gene; created in data_prep.R, loaded in global.R and used depending #' on the region input in input_new() -make_network <- function(tf, tf_target_gene_info, gene_list){ - #add a step to select only the transcription factors that are in the list +make_network <- function(gene_input, tf_target_gene_info, gene_list, network_by_target_gene = FALSE){ + #create edgelist - edges <- tf_target_gene_info %>% select(TF, gene, nMotifs, Genie3Weight.weight) %>% - #and filter it to only the transcription factors that are the input - filter(TF %in% tf) - #print(edges) + edges <- tf_target_gene_info %>% select(TF, gene, nMotifs, starts_with("Genie3Weight")) #%>% + # filter(TF %in% tf) + + #and filter it to only the transcription factors or genes that are the input + + if(network_by_target_gene){edges <- edges %>% filter(gene %in% gene_input)} #at this point, this is a matrix relating genes nad + else{edges <- edges %>% filter(TF %in% gene_input)} #TFs + #create node list #the nodes in this case are all the TFs from user input and all the genes that are regulated by the #transcription factors @@ -235,7 +80,13 @@ make_network <- function(tf, tf_target_gene_info, gene_list){ set_vertex_attr("Gene_Type", index = gene_list_in_network, "Input Target Genes") %>% set_vertex_attr("Gene_Type", index = unique_TF, "TF") -} +} + +#plots igraph network using ggNet +#' @param net igraph object made with make_network function +#' @param labelNodes User input that allows all nodes to be labelled; if false, only TF nodes are labelled +#' @param tf list of TFs input by user; used to label just the TF nodes if labelNodes is false + plot_network <- function(net, labelNodes, tf){ #print(net) if(labelNodes){ @@ -265,79 +116,7 @@ plot_network <- function(net, labelNodes, tf){ } -# --------------------------OBSOLETE-cytoscape network visualization---------------------------------------------- -# function to create network - -#' Create rcytoscape network data -#' -#' Takes a vector input that contains user selected TFs and output a list of nodeData and edgeData -#' which will be used for createCytoscapeJsNetwork -#' A good to visualize correlations among multiple TFs -#' -#' @param tf one single tf name character -#' @param TF_target_gene TF_target_gene data, specific for cortex/pon -#' @param unique_TF unique_TF data, specific for cortex/pon -#' -#' @return a list of nodeData and edgeData that are required for generating a rcytoscapejs network object -#' -#' @examples -#' TF <- c("Arx","Lef1") -#' # Note that TF_target_gene and unique_TF will be saved in data_cortex list, by data_prep.R -#' nodeData <- create_network(TF, TF_target_gene_pon, unique_TF)$nodes -#' edgeData <- create_network(TF, TF_target_gene_pon, unique_TF)$edges -#' network <- createCytoscapeJsNetwork(nodeData, edgeData) -#' rcytoscapejs2(network$nodes, network$edges) -#' -# create_network <- function(tf, TF_target_gene, unique_TF, pathway_genes = c(), -# shrink_gray = FALSE){ -# TF_interest <- filter(TF_target_gene, TF %in% tf)[["TF"]] -# gene_target <- filter(TF_target_gene, TF %in% tf)[["gene"]] -# -# source <- TF_interest -# target <- gene_target -# -# id <- c(TF_interest, gene_target) -# name <- id -# nodeData <- data.frame(id,name, stringsAsFactors = FALSE) -# edgeData <- data.frame(source, target, stringsAsFactors = FALSE) -# -# #unique_TF <- unique(TF_target_gene[["TF"]]) -# -# mutual_target <- edgeData %>% -# # a character vector that indicates the nodes that are target of multiple selected TFs -# count(target) %>% -# filter(n > 1 & !target %in% tf ) %>% -# .[[1]] -# -# nodeData <- nodeData %>% -# # you can customize the color using the case_when structure easily, -# # check the tfs in id column that exist in your vector, then you can control its size, -# # shape and color easily -# mutate(color = case_when(id %in% tf ~ "#9d4097", # orange -# # orange nodes are tfs that are active in this region -# id %in% pathway_genes ~ "green", -# id %in% unique_TF ~ "#D6604D", -# id %in% mutual_target ~ "#4fafc6", -# TRUE ~ "lightgrey")) %>% -# mutate(height = case_when(id %in% tf ~ "100", -# TRUE ~ "70")) %>% -# mutate(width = case_when(id %in% tf ~ "100", -# TRUE ~ "70")) -# -# if(shrink_gray){ -# nodeData <- nodeData %>% -# mutate(height = case_when(color %in% "lightgrey" ~ "40", -# TRUE ~ "70")) %>% -# mutate(width = case_when(color %in% "lightgrey" ~ "40", -# TRUE ~ "70")) -# -# } -# -# return(list(nodes = nodeData, -# edges = edgeData -# )) -# } # --------------------------------Helper functions---------------------------------------------------- #' Identify transcription factor data type #' @@ -448,6 +227,11 @@ tf_ext <- function(TF, TF_and_ext){ filter(TF_and_ext, type==TF & ext=="ext")[[1,1]] } + +#'@param TF a user selected TF list +#'@param TF_ref a list of TFs that are in the dataset +#' +#'subsets TF based on if each element appears in TF_ref check_tf_input <- function(TF, TF_ref){ TF_in_data <- TF[TF %in% TF_ref] TF_not_data <- TF[!(TF %in% TF_ref)] @@ -457,22 +241,52 @@ check_tf_input <- function(TF, TF_ref){ ) } +#'@param tf a user selected TF list, gene symbols +#'@param TF_and_ext a list of TFs in the dataset and whether the regulon is a regular or extended regulon +#' +#'Converts gene symbols in tf to the regulon name that is used in the TF activity feather files transform_tf_input <- function(tf, tf_and_ext){ tf_to_read <- character(0) for(TF in tf){ # tf is input tf list, could contain many tfs if(has_regular(TF, tf_and_ext)){ - tf_to_read[TF] <- tf_regular(TF, tf_and_ext) - } # a helper to read the corresponding data + tf_to_read[TF] <- tf_regular(TF, tf_and_ext) #if the TF has a non-extended/regular regulon, then the regular regulong name is used + } else{ - tf_to_read[TF] <- tf_ext(TF, tf_and_ext) + tf_to_read[TF] <- tf_ext(TF, tf_and_ext) #gets the extended regulon name for the TF } } names(tf_to_read) <- NULL return(tf_to_read) } + +#' Determine if a background colour is dark enough to warrant white text +#' +#' @param hex_color String, colour in hex colour format e.g. #000000 +#' +#' @return TRUE if the colour is dark enough (arbitrary) +dark <- function(hex_color) { + + red <- substr(hex_color, 2, 2) + green <- substr(hex_color, 4, 4) + blue <- substr(hex_color, 6, 6) + dark_nums <- c(0:8) + + if ((red %in% dark_nums && blue %in% dark_nums) || + (red %in% dark_nums && green %in% dark_nums) || + (green %in% dark_nums && blue %in% dark_nums)) { + + return(TRUE) + + } else { + + return(FALSE) + + } +} + # --------------------------------Create data for plots-------------------------------- # NOTE: TF_and_ext is a dataframe (loaded already) that created in order to identify # whether the TF data is a regular TF (with high confidence annotation) @@ -481,42 +295,41 @@ transform_tf_input <- function(tf, tf_and_ext){ # if we have no data related to this tf, we will either give an error message or do nothing -#' create Cell/Cluster activity data -#' -#' Make activity data used in tab2 by either Cell or Cluster, the method would be provided by -#' user's input in Shiny app -#' This function uses feather file that will be read by a certain col to maximize speed, -#' so we switch the paths of the feather file for different brain region + #' #' @param tf character vector, containing one or more TF names #' @param method either by Cell --> use cell data, or by cluster --> use cluster data, #' this should be a string indicating the column name +#' @param region cortex or pons #' @param TF_and_ext TF_and_ext data, specific for cortex/pons +#' @param timepoint user input developmental timepoint, only used if looking at per sample data +#' @param per_sample boolean +#' @param bad_cells list of cells that belong to excluded/blacklisted clusters that needs to be filtered out #' @return a dataframe that has a column containing all the cell names and columns of the input tfs #' the corresponding activity #' data value (NES) #' -#' @examples -#' create_activity_data("Arx", "Cell", "cortex", TF_and_ext) -#' create_activity_data("Pax6", "Cluster", "pons", TF_and_ext_pon) +#' +#' This function subsets a TF activity feather (basically a dataframe with cell/cluster identifiers as rows +#' and regulons as columns) based on the user TF input, region of interest, timepoint (if looking at +#' per-sample data), and the cell/cluster identification method (methd can be joint for joint space cluster, +#' cell for TF activity per cell, or cluster for TF activity per sample clustering) + create_activity_data <- function(tf, method, region, TF_and_ext, timepoint = NULL, per_sample = FALSE, bad_cells = ""){ - # use the feature of feather data to read certain col to optimize speed - #if(tf_exist(tf, TF_and_ext) != TRUE){return("TF does not exist")} tp <- timepoint - #building path of the feather object to read if the per sample toggle is on - #method should be joint if per_sample is true - #print(per_sample) - #print("ahhh") + #building path of the feather file to read if the per sample toggle is on + + # set up the path of the feather file to read depending on region and cluster or cell if(per_sample == TRUE){ reg <- switch(region, "cortex" = "ct", "pons" = "po") - #time <- substring(timepoint, 3) commented out for now + meth <- str_to_lower(method) #per_sample should only be true for method: "joint" or "Cell" #if its "Cell", turn to lower case and use for DR plots #if its "joint", then its for per-sample heatmap and load feather @@ -528,27 +341,37 @@ create_activity_data <- function(tf, method, region, TF_and_ext, path <- glue("data/{reg}_{timepoint}/{reg}_{timepoint}.regulon_activity_per_{meth}.feather") } + # set up the path of the feather file to read depending on region and cluster or cell + #per_sample is false here so read rether files for the joint_extended data else{ - if(!region %in% c("cortex", "pons")) return("Wrong usage: region should be either cortex/pons") + + if(!region %in% c("cortex", "pons")) return("Wrong usage: region should be either cortex/pons") #sanity check + if(method == "joint"){ - method2 <- "joint_cluster" + path <- glue('data/joint_{region}_extended/joint_{region}_extended.regulon_activity_per_cluster.joint_extended.feather') } + else if (method == "Cluster"){ + path <- glue('data/joint_{region}_extended/joint_{region}_extended.regulon_activity_per_cluster.per_sample.feather') + } + #method is Cell else{ - method2 <- str_to_lower(method) + path <- glue('data/joint_{region}_extended/joint_{region}_extended.regulon_activity_per_cell.feather') } - # set up the path of the feather file to read dependingo on region and cluster or cell - path <- glue('data/joint_{region}/joint_{region}.regulon_activity_per_{method2}.feather') + } # case-insensitive checking and reading in the first column which corresponds to cell or cluster label - #why even do this if method2 object has all lowercase already? + # the first step just reads in the cell or cluster labels column -> collectively called cell_col + #the TF columns are read after if(str_detect(method,"(?i)Cell")){ cell_col <- read_feather(path, "Cell") } else if(str_detect(method,"(?i)Cluster")){ - cell_col <- read_feather(path,"Cluster") + cell_col <- read_feather(path,"ID_20210710") + colnames(cell_col) <- "Cluster" } else if(str_detect(method,"(?i)joint")){ + #per sample data has a different name for the cluster column compared to the joint_extended data if(per_sample){ ID <- "ID_20201028" cell_col <- read_feather(path, ID) @@ -556,20 +379,26 @@ create_activity_data <- function(tf, method, region, TF_and_ext, method <- "Cluster" } else{ - cell_col <- read_feather(path, "Joint_cluster") + cell_col <- read_feather(path, "ID_20210710_joint_clustering") + colnames(cell_col) <- "Joint_cluster" method <- "Joint_cluster" } } - else{return("Wrong usage, method should be Cell/Cluster")} #this should never return + else{return("Wrong usage, method should be Cell/Cluster")} #this should never return when called from the app - # add certain tf activity data to the Cell column + #Read TF activity columns and add to the cell/cluster identifier column read above #loops through each factor in tf input and checks to see if the entry has regular or ext forms and extracts - #data prioritizing the regular factor data and not extended + #data prioritizing the regular regulon data and not extended #puts each regular or ext TF name in a list + + #activity is the matrix that will contain a subset of the entire feather depending on what TFs the user selected activity <- cell_col - #print(tf) + + + #buids tf_to_read vector: a list of regulon names that should be read from the feather file + for(TF in tf){ # tf is input tf list, could contain many tfs tf_to_read <- TF #wouldnt this line combined with the if else if statement double up the data for TF #with both regular and ext forms @@ -582,32 +411,31 @@ create_activity_data <- function(tf, method, region, TF_and_ext, else{ next # means we don't have that data, we jump over it and do nothing } - #both outcomes are the same here? why is there an if else statement - #reads the data from file specificed in path using the tf_to_read list - #should check what the tf_to_read list actually says + + #reads the data from file specified in path using the tf_to_read list col <- read_feather(path,tf_to_read) - + activity <- add_column(activity,col) } activity %>% - select(method, everything()) # move method col to start + select(method, everything()) # move the cell/cluster identifier column to start if(!(is.null(tp)) & per_sample == FALSE & identical(method, "Cluster")){ #when timepoint has an input and the input is not All #split column, select rows for the corresponding timepoints using filter - if(identical(tp, "F-All") || identical(tp, "P-All")){ + if(identical(tp, "F-All Time-Points") || identical(tp, "P-All Time-Points")){ } else{ - activity <- separate(activity, Cluster, into = c("Timepoint", "Cluster"), sep = " ") + activity <- separate(activity, Cluster, into = c("Timepoint", "Cluster"), sep = "_") activity <- filter(activity, Timepoint == tp) - activity <- unite(activity, Cluster, Timepoint, Cluster, sep = " ") + activity <- unite(activity, Cluster, Timepoint, Cluster, sep = "_") } } else if(identical(method, "Cell") & per_sample){ #gets rid of blacklisted cells in the per sample data - #print(method) + activity <- activity %>% filter(!(Cell %in% bad_cells)) - #print(str(activity)) + } activity } @@ -642,120 +470,105 @@ makePheatmapAnno <- function(palette, column) { #' @param method #' @param region #' @param TF_and_ext -#' @param brain_data either forebrain_data or pon_data, eventually will be saved by data_prep.R -#' and loaded at the beginning of app.R as an element in a list -#' -#' @return -#' @export +#' @param timepoint +#' @param per_sample +#' +#' params same as create_activity_data inputs because the function is called in this one #' -#' @examples -#' plot_heatmap(c("Arx","Lef1"), "Cluster","cortex", TF_and_ext,forebrain_data) -#' plot_heatmap(c("Arx","Lef1"), "Cell","cortex", TF_and_ext,forebrain_data) -#' plot_heatmap(c("Pax6","Lef1"), "Cluster","pons", TF_and_ext_pon, pon_data) -#' plot_heatmap(c("Pax6","Lef1"), "Cell","pons", TF_and_ext_pon,pon_data) + +# #' -plot_heatmap <- function(tf, method, region, TF_and_ext, #brain_data, cell_plot_num = 300, +plot_heatmap <- function(tf, method, region, TF_and_ext, timepoint = NULL, per_sample = FALSE){ # sanity checking if(!region %in% c("cortex", "pons")) return("Wrong usage: region should be either cortex/pons") if(!method %in% c("Cell","Cluster", "joint")) return("Wrong usage, method should be Cell/Cluster") - # if(method == "Cell"){ - # # 1. create the activity data for plotting - # act_cell <- create_activity_data(tf, "Cell",region, TF_and_ext) %>% - # mutate(Cluster = gsub("_"," ",brain_data[["Sample_cluster"]])) %>% - # filter(!grepl("BLACKLIST", Cluster)) %>% # filter out bad samples - # sample_n(cell_plot_num) %>% # randomly sample it - # tibble::column_to_rownames(var = "Cell") # make that column name as row name ... - # - # anno_row_cell <- select(act_cell, Cluster) - # # change the anno_row, since we change the color palettes - # new_anno_row_cell <- anno_row_cell %>% - # mutate(Cluster = gsub(pattern = ".* ", replacement = "", Cluster)) - # rownames(new_anno_row_cell) <- rownames(anno_row_cell) # re-assign the rownames - # - # act <- select(act_cell, -Cluster) # must remove Cluster data before plotting - # - # # customized for plotting by cell - # anno_col <- new_anno_row_cell # assign to the same variable for plotting - # cell_width_plot <- 2 - # show_colname_plot <- FALSE - # } + + #generates heatmap for the per sample clusters in the joint_extended datasets if(method == "Cluster"){ - act <- create_activity_data(tf, "Cluster",region, TF_and_ext, timepoint) - #sample_n(cluster_plot_num) %>% # randomly sample it - #str(act) + act <- create_activity_data(tf, "Cluster",region, TF_and_ext, timepoint) + act <- column_to_rownames(act, var = "Cluster") # make that column name as row name ... - # change the anno_row, since we change the color palettes - new_anno_row <- hm_anno$anno_row %>% - mutate(Cluster = gsub(pattern = ".* ", replacement = "", Cluster)) - rownames(new_anno_row) <- rownames(hm_anno$anno_row) # re-assign the rownames + # hm_anno$anno_row contains the cluster labels of all clusters in the joint_extended datasets + # used to colour the cluster by a palette + new_anno_row <- hm_anno$anno_row + + rownames(new_anno_row) <- gsub(" ", "_", rownames(hm_anno$anno_row)) # re-assign the rownames # note that the rownames correspond to the col names of the matrix t(act_cluster) # customized for plotting by cluster - anno_col <- new_anno_row # this is loaded by data_prep.R - #print(anno_col) - cell_width_plot <- 20 - cell_height_plot <- 20 - if (identical(timepoint, "F-All") || identical(timepoint, "P-All")){ - cell_width_plot <- 7 - cell_height_plot <- 10 - } + + #anno_col contains the mapping from individual clusters to broader cluster classifications + #for the purpose of colour coding columns in the heatmap + #the each column is coloured by its individual per-sample cluster and by a braoder cell ontology class + #the following steps maps the per-sample cluster labels first to a broader lvl2 labels + #and then maps lvl2 labels to the cell ontology labels + #this is because the cell ontology labels are pulled from the metadata_20201028_with_qc file whlie + #the persample labels for the extended datasets are pulled from the metadata_20210710_with_qc file + #the formats of certain per-sample labels are different between these files so its easier to map with the + #lvl2 labels as an intermediary + anno_col <- new_anno_row %>% + mutate('Broad Cluster' = recode(rownames(new_anno_row), !!!lvl2_cluster_labels)) + + anno_col <- anno_col %>% mutate('Cell Ontology Class' = + recode(get('Broad Cluster'), !!!cell_onto_label)) %>% + select(-'Broad Cluster') + # print(anno_col) + + rownames(anno_col) + show_colname_plot <- TRUE title <- glue('Transcription Factor Regulon Activity at Developmental Time: {timepoint}') } - else if(method == "joint"){ #plot heat map by joint cluster + else if(method == "joint"){ #plot heat map by joint cluster for the joint_extended dataset, or for the per-sample analyses act <- create_activity_data(tf, "joint", region, TF_and_ext, per_sample = per_sample, - timepoint = timepoint) #%>% - #filter(!grepl("BLACKLISTED", Cluster)) - - print(act) + timepoint = timepoint) if(per_sample == TRUE){ #filter out the exclude clusters act <- act %>% filter(!grepl("EXCLUDE", act$Cluster)) - #print(act) + col_to_row <- "Cluster" - new_anno_row <- hm_anno$anno_row %>% - mutate(Cluster = gsub(pattern = ".* ", replacement = "", Cluster)) - rownames(new_anno_row) <- gsub(" ", "_", rownames(hm_anno$anno_row)) - # print(new_anno_row) + + #generate the labels for the clusters in the data-set so that the palette can be properly displayed + new_anno_row <- act %>% mutate(rownames = Cluster) %>% + column_to_rownames("rownames") %>% select(Cluster) %>% + mutate('Cell Ontology Class' = recode(act$Cluster, !!!cell_onto_label)) + + # new_anno_row <- new_anno_row %>% mutate('Cell Ontological Classification' = + # recode(get('Broad Cluster'), !!!cell_ontological_class_labels)) %>% + # select(-'Broad Cluster') + # + # print(new_anno_row) } else{ col_to_row <- "Joint_cluster" new_anno_row <- act %>% mutate(Cluster = Joint_cluster) %>% column_to_rownames("Joint_cluster") %>% select(Cluster) - #print(new_anno_row) - - #%>% - # column_to_rownames("Cluster") - #row.names(new_anno_row) <- new_anno_row$Cluster } act <- act %>% column_to_rownames(var = col_to_row) - - # change the anno_row, since we change the color palettes - # new_anno_row <- hm_anno$anno_row %>% - # mutate(Cluster = gsub(pattern = ".* ", replacement = "", Cluster)) - # rownames(new_anno_row) <- rownames(hm_anno$anno_row) # re-assign the rownames + # note that the rownames correspond to the col names of the matrix t(act_cluster) # customized for plotting by cluster anno_col <- new_anno_row # this is loaded by data_prep.R - #print(anno_col) - cell_width_plot <- 20 - cell_height_plot <- 20 + show_colname_plot <- TRUE title <- "Transcription Factor Regulon Activity per Cluster" } cluster_row <- FALSE - #do not do row clustering if there is only one TF selected + + #do not do row heirarchal clustering if there is only one TF selected if(length(tf) > 1){ cluster_row <- TRUE } + + pheatmap::pheatmap(t(act), show_colnames = show_colname_plot, scale = "none", @@ -767,36 +580,26 @@ plot_heatmap <- function(tf, method, region, TF_and_ext, #brain_data, cell_plot_ # change the default color annotation annotation_colors = master_palette, # loaded by data_prep.R annotation_legend = FALSE, - cellwidth = cell_width_plot, - cellheight = cell_height_plot) + cellwidth = 20, + cellheight = 20) } #----------------------------Dimension reduction--------------------------------------------- -#' Make UMAP clustering scatterplot +#' Make DR clustering scatterplot #' #' @param tf_number Either 1 or 2. In the tf input vector we get from user in Shiny app, there could be #' multiple tfs, but we only support plotting two tfs since the scatterplot is big -#' @param overall_brain_data metadata (forebrain_data or pon_data), saved in data_cortex +#' @param cell_metadata metadata containing DR coordinates , saved in data_cortex #' and data_pons #' @param cell_activity_data made by create_activity_data() function given the tf input -#' @param sample_number we eliminate half of the cell samples to relieve the burden of -#' the RAM to speed up plotting, since we have over 37000 cells(samples), we randomly sample -#' 13000 to optimize speed, but one can also specify this value to see fewer or more sample points +#' @param dim_red_type allow user to select between UMAP, tSNE, PCA DR methods #' -#' @return a UMAP scatter plot that shows in which cluster(region) the tf expresses the most +#' @return a DR scatter plot coloured by TF activity #' -#' @examples -#' tf <- c("Arx","Lef1") -#' activity_test_tf1 <- create_activity_data(tf, "Cell","cortex", data_cortex$TF_and_ext) -#' plot_UMAP(tf_number = 1,data_cortex$overall, activity_test_tf1) -#' -plot_UMAP <- function(tf_number, cell_metadata, cell_activity_data, dim_red_type){ #cell_metadata is the tsv with the + +plot_dr <- function(tf_number, cell_metadata, cell_activity_data, dim_red_type){ #cell_metadata is the tsv with the #embedding coordinates for each cell in the dataset - # if(tf_number == 1) tf_plot <- 2 # number of col, the first col is Cell, so start from 2 - # else if(tf_number == 2) tf_plot <- 3 - # else{return( - # "Wrong usage, now we only support plotting two tfs since the scatterplot is big" - # )} + tf_plot <- tf_number + 1 #replaces the above control flow @@ -804,6 +607,8 @@ plot_UMAP <- function(tf_number, cell_metadata, cell_activity_data, dim_red_type activity_tf <- cell_activity_data[,tf_plot][[1]] #extracts the TF activity from the cell_activity_data #and appends it to the cell_metadata to make cell meta with activity to plot + TF <- colnames(cell_activity_data)[tf_plot] + cell_meta_with_activity <- mutate(cell_metadata, activity_tf = activity_tf) x_axis <- switch(dim_red_type, "umap" = "UMAP1", "tsne" = "tSNE_1", "pca" = "PC1") @@ -812,13 +617,18 @@ plot_UMAP <- function(tf_number, cell_metadata, cell_activity_data, dim_red_type ggplot(data = cell_meta_with_activity, mapping = aes_string(x = x_axis, y = y_axis))+ geom_point(aes(color = activity_tf), alpha = 0.2)+ scale_color_gradient(low = "grey90", high = "red")+ - theme_min() + labs(color = 'TF Activity') + theme_min() + labs(color = 'TF Activity') + ggtitle(TF) } -#if I want to include labeled clusters, then I need to map cells to the clusters -#place a label at the mean of the umap coordinates for the cells that belong in that cluster -color_by_cluster <- function(cell_metadata, cluster_palette, dim_red_type, cluster_label, +#' @param cluster_label boolean: include ggrepel label of clusters or not +#' @param cell_metadata metadata containing DR coordinates +#' @param per_sample indicates if the function call is for a per_sample dataset +#' @param dim_red_type allow user to select between UMAP, tSNE, PCA DR methods +#' +#' @return a DR scatter plot coloured by cluster label + +color_by_cluster <- function(cell_metadata, dim_red_type, cluster_label, per_sample = FALSE){ x_axis <- switch(dim_red_type, "umap" = "UMAP1", "tsne" = "tSNE_1", "pca" = "PC1") @@ -828,20 +638,12 @@ color_by_cluster <- function(cell_metadata, cluster_palette, dim_red_type, clust var_group <- "Joint_cluster" if(per_sample){ - var_group <- "ID_20190715_with_blacklist" + var_group <- "ID_20201028_with_exclude" } - # centers <- cell_metadata %>% - # group_by(get(var_group)) %>% - # summarise(center_x = median(get(x_axis)), - # center_y = median(get(y_axis))) - - #print("step1") - #print(centers) gg <- ggplot(data = cell_metadata, mapping = aes_string(x = x_axis,y = y_axis))+ - geom_point(aes(color = get(var_group)), alpha = 0.2) + theme_min() + theme(legend.position="bottom") + - guides(fill=guide_legend(nrow=5, byrow=TRUE)) + scale_color_manual(values = cluster_palette) + - labs(color = 'Cluster Label') + geom_point(aes(color = get(var_group)), alpha = 0.2) + theme_min() + theme(legend.position="none") + + scale_color_manual(values = master_palette$Cluster) # print("step2") if(cluster_label){ @@ -864,12 +666,11 @@ color_by_cluster <- function(cell_metadata, cluster_palette, dim_red_type, clust segment.size = 0.5, arrow = arrow(length = unit(0.01, 'npc'))) } - #print("step3") return(gg) } #----------------------------Time Course Ribbon Plot--------------------------------------------- -#need to maybe change the colors, select out the numbers and rename legend, just cosmetic things + #' make cell metadata of certain region, cortex/pon #' @@ -882,46 +683,41 @@ color_by_cluster <- function(cell_metadata, cluster_palette, dim_red_type, clust #' cell_metadata_cortex <- read_tsv("data/joint_cortex/joint_cortex.metadata.tsv") #' cell_metadata_cortex <- create_cell_metadata(cell_metadata_cortex) #' -create_metadata_timeseries <- function(cell_metadata, part){ - if(part == "cortex") level <- c("Forebrain E12.5", +create_metadata_timeseries <- function(cell_metadata, part, general_cluster_labels){ + if(part == "cortex") level <- c("Forebrain E10.5", + "Forebrain E12.5", + "Forebrain E13.5", "Forebrain E15.5", + "Forebrain E16.5", + "Forebrain E18.5", "Forebrain P0", "Forebrain P3", "Forebrain P6") - else if (part == "pons") level <- c("Hindbrain E12.5", + else if (part == "pons") level <- c("Hindbrain E10.5", + "Hindbrain E12.5", + "Hindbrain E13.5", "Pons E15.5", + "Pons E16.5", + "Pons E18.5", "Pons P0", "Pons P3", "Pons P6") - else{(return("Wrong usage, input either cortex or pons"))} + else{(return("Wrong usage, input either cortex or pons"))} #sanity check + #print(level) cell_metadata %>% select(Age = Sample, Cell, Cluster = Sample_cluster) %>% # In this case, we remove the "prefix" of the Cluster column, so that we are # simply left with the abbreviation representing the cell type, so that # we can link the cells of the same cell type across ages - separate(Cluster, into = c("Prefix", "Cluster"), sep = "_") %>% + #separate(Cluster, into = c("Prefix", "Cluster"), sep = "_") %>% + #filter(!grepl("EXCLUDE", Cluster)) %>% + mutate(broad_cluster = recode(Cluster, !!!general_cluster_labels)) %>% mutate(Age = factor(Age, levels = level)) %>% arrange(Cell) } -# create_cell_metadata_pon <- function(metadata_part){ -# metadata_part %>% -# select(Age = orig.ident, Cell, Cluster = ID_20190715_with_blacklist_and_refined) %>% -# # In this case, we remove the "prefix" of the Cluster column, so that we are -# # simply left with the abbreviation representing the cell type, so that -# # we can link the cells of the same cell type across ages -# separate(Cluster, into = c("Prefix", "Cluster"), sep = "_") %>% -# mutate(Age = factor(Age, levels = c("Hindbrain E12.5", -# "Pons E15.5", -# "Pons P0", -# "Pons P3", -# "Pons P6"))) %>% -# arrange(Cell) -# -# } - #' Translate transcription factor name version #' @@ -978,72 +774,79 @@ translate_tf <- function(tf_list, tf_dataframe){ #' cell_metadata_cortex <- create_metadata_timeseries(data_cortex$cell_metadata, "cortex") #' plot_timeseries(TF,cell_metadata_cortex, binary_activity) #' -plot_timeseries <- function(TF,cell_metadata, activity, make_plotly = FALSE, show_legend = TRUE){ +plot_timeseries <- function(TF, cell_metadata, activity, make_plotly = FALSE, show_legend = TRUE){ + + cell_names <- colnames(activity) activity <- activity[TF, ] %>% - {data.frame("TF" = .)} %>% - tibble::rownames_to_column(var = "Cell") %>% # the original activity vector has names - arrange(Cell) + {data.frame("TF" = .)} + + activity$Cell <- cell_names + + activity <- activity %>% arrange(Cell) + + #there is one cell in the pons metadata file that is not in the binary activity matrix so im gonna remove it + #not sure why its here + + + cell_diff <- setdiff(cell_metadata$Cell, activity$Cell) + if(length(cell_diff) > 0) {cell_metadata <- cell_metadata %>% filter(cell_metadata$Cell != cell_diff)} + + if(!all(cell_metadata$Cell == activity$Cell)) return (-1) - # Add the TF activity to the new dataframe + # Add the binarized TF activity to the new dataframe ribbon_df <- cell_metadata ribbon_df$TF <- activity$TF ribbon_df <- ribbon_df %>% - filter(!grepl("BLACKLIST", Cluster)) + filter(!grepl("EXCLUDE", Cluster)) %>% + select(-Cluster) + ribbon_df_celltype_frac <- ribbon_df %>% group_by(Age) %>% # Total cells at each age mutate(total = n()) %>% - group_by(Age, Cluster) %>% + group_by(Age, broad_cluster) %>% # Proportion of TF+ cells per cluster, per age mutate(frac = sum(TF > 0) / total) %>% - distinct(Age, Cluster, frac) %>% - ungroup() + distinct(Age, broad_cluster, frac) %>% + ungroup() %>% group_by(Age) - ribbon_df_cum_frac <- ribbon_df %>% - group_by(Age) %>% - summarize(cumfrac = sum(TF > 0) / n()) %>% - ungroup() + ribbon_df_clusters_complete <- ribbon_df_celltype_frac %>% + mutate(broad_cluster = factor(broad_cluster, levels = unique(.$broad_cluster))) %>% + complete(broad_cluster, nesting(Age), fill = list(frac = 0)) - timepoints2 <- ribbon_df$Age - clusters <- ribbon_df$Cluster + ribbon_df_clusters_complete$xpos = group_indices(ribbon_df_clusters_complete) - df = data.frame(cluster = rep(unique(clusters), length(unique(timepoints2))), - stage = do.call(c, lapply(as.character(unique(timepoints2)), rep, times = length(unique(clusters))))) - - df$ranking = match(df$cluster, names(colours)) - df = df[order(df$stage, df$ranking),] - - df <- left_join(df, select(ribbon_df_celltype_frac, cluster = Cluster, stage = Age, frac)) %>% - mutate(frac = replace_na(frac, 0)) %>% - left_join(select(ribbon_df_cum_frac, stage = Age, cumfrac)) - - df$xpos = match(df$stage, unique(timepoints2)) - - #view(df) - - plot <- df %>% - ggplot(aes(x = xpos, y = frac, fill = cluster)) + + plot <- ribbon_df_clusters_complete %>% + ggplot(aes(x = xpos, y = frac, fill = broad_cluster)) + geom_area(stat = "identity", show.legend = show_legend) + - scale_fill_manual(values = colour_palette, drop = FALSE, name = "") + - scale_x_continuous(breaks = seq_along(unique(df$stage)), - labels = c("E12.5", "E15.5", "P0", "P3", "P6"), - limits = c(1, length(unique(df$stage)))) + + scale_fill_manual(values = palette_broad_clusters, drop = FALSE, name = "") + + scale_x_continuous(breaks = c(1,2,3,4,5,6,7,8,9), + labels = c("E10.5", "E12.5", "E13.5", "E15.5", "E16.5", "E18.5", "P0", "P3", "P6"), + limits = c(1, 9)) + labs(x = "Developmental Age", y = "Proportion", title = TF) + guides(fill = guide_legend(ncol = 5)) + theme_min() + theme(legend.position = "bottom") if(make_plotly) { - return (ggplotly(plot, tooltip = "cluster") %>% style(hoveron = "points + fills")) + return (ggplotly(plot, tooltip = "broad_cluster") %>% style(hoveron = "points + fills")) } else{return(plot)} } #--------------------------Active_specific------------------------ + +#' @param sample name of the data-set that the user wants to look at, used as a part of the path of file to read +#' @param cluster cluster of interest + +#' +#' @return a list of 3 matrices: tf_table corresponds to the table for a specific cluster and used in the By_cluster subtab +#' FC_df and AUC_df are matriices containing AUC and FC values for each gene in each cluster + active_specific_prep <- function(sample, cluster){ - path <- glue("data/{sample}/{sample}.active_specific_tf.Rds") + path <- glue("data/{sample}/{sample}.active_specific_tf.Rds") data <- readRDS(path) #print() tf_table <- data$tf_table[[cluster]] %>% @@ -1054,13 +857,23 @@ active_specific_prep <- function(sample, cluster){ send_back <- list("tf_table" = tf_table, "FC_df" = FC_df, "AUC_df" = AUC_df) - #names(data) <- cluster } + +#' @param data data generated by active_specific_prep. Specifically, the tf_table part +#' @param fc Fold change cutoff selected by the user, TFs above cutoff are labelled and displayed in the table, genes below +#' are not +#' @param cluster cluster of interest + +#' +#' @return a scatter plot of TFs + plot_scatter <- function(data, fc, cluster){ + #data used for ggrepel + #the mutate(gsub) statements are to replace the "_extended (21g)" portion with just a "+" to shorten the label to_label <- data %>% filter(AUC_FC > fc) %>% mutate(why = gsub("_extended", "+", TF)) %>% mutate(TF = gsub("\\(.+\\)", "", why)) - #print(to_label) + ggplot(data = data, mapping = aes(AUC_out, AUC_in)) + geom_point(aes(color = AUC_FC, shape = is_ext), size = 4, alpha = 0.6) + @@ -1087,16 +900,19 @@ plot_scatter <- function(data, fc, cluster){ segment.size = 0.5, arrow = NULL) } + +#' @param data data generated by active_specific prep. Specifically, the AUC_df data frame +#' @param tf cluster of interest + +#' +#' @return a list of bar plots that displays the TF ranked by AUC value in each cluster plot_bar_list <- function(data, tf){ data <- data %>% mutate(Cluster = gsub("_", " ", Cluster)) - #print(data) - palette <- hm_anno$side_colors$Cluster[names(hm_anno$side_colors$Cluster) %in% data$Cluster] - #print(palette) - #print(length(data$Cluster)) - #print(length(palette)) - #(data) - #purrr::map(tf, ~print(.x) ) + + palette <- master_palette$Cluster[names(master_palette$Cluster) %in% data$Cluster] + + # 1. loop over genes, then each index can be referred to with .x purrr::map(.x = tf, .f = ~data %>% select(.x, Cluster) %>% arrange(desc(.x)) %>% head(30) %>% @@ -1112,4 +928,117 @@ plot_bar_list <- function(data, tf){ } +#-------------Bubble-plot-------------------- +#code adapeted from the clusters app + +bubble_prep <- function(sample, tf, dend_order, scale){ + path <- glue("data/{sample}/{sample}.active_specific_tf.Rds") + data <- readRDS(path) + + AUC <- data$AUC_df %>% select(Cluster, tf) %>% + filter(Cluster %in% dend_order) + + FC <- data$FC_df %>% select(Cluster, tf) %>% + filter(Cluster %in% dend_order) + + # Scale activity of each tf linearly across clusters to [0, 1] + if (scale) { + + AUC <- AUC %>% + as.data.frame() %>% + select(-Cluster) %>% + apply(2, scales::rescale, to = c(0, 1)) %>% + as.data.frame %>% + mutate(Cluster = rownames(AUC)) %>% + select(Cluster, everything()) + + + } + + + # Convert to long / tidy format with columns: Cluster, TF, AUC + AUC <- AUC %>% + gather(., "TF", "AUC", 2:ncol(.)) + + #print(AUC) + + FC <- FC %>% + gather(., "TF", "FC", 2:ncol(.)) + + #print(AUC) + + # print(FC) + + df <- left_join(AUC, FC, by = c("Cluster", "TF")) + #print(df) + + # Tidy data for plotting + df <- df %>% + + # Order genes to match order input by user + mutate(TF = factor(TF, levels = rev(tf))) %>% + arrange(TF) %>% + + # Pad gene names so that the plot takes up a more standardized + # width; to roughly the the # of characters in the gene w/ longest name + # However, letters take up more pixels than spaces, so do less padding + # for genes with longer names + # TODO: Fix alignment of bubble plot w/ dendrogram for long gene names (issue #7) + mutate(TF_padded = case_when( + str_length(TF) <= 5 ~ str_pad(TF, 15, side = 'right', pad = " "), + str_length(TF) > 5 ~ str_pad(TF, 12, side = 'right', pad = " ") + ) + ) %>% + mutate(TF_padded = factor(TF_padded, levels = unique(.$TF_padded))) %>% + + # Order the clusters on the x-axis to match the dendrogram image + #slice(match(dend_order_joint_cortex_extended, Cluster)) %>% + + # Keep columns + select(TF, Cluster, AUC, FC, TF_padded) + + # df$Cluster <- factor(df$Cluster, levels = dend_order) + + label_palette <- hm_anno$side_colors$Cluster + names(label_palette) <- gsub(" ", "_", names(hm_anno$side_colors$Cluster)) + label_palette <- label_palette[dend_order] + + + + return(list("data" = df, "label_palette" = label_palette)) +} + +plot_bubble <- function(data, label_palette, dend_order){ + + data$Cluster <- factor(data$Cluster, levels=dend_order) + + p1 <- data %>% + ggplot(aes(x = Cluster, y = TF_padded)) + + geom_point(aes(size = FC, colour = AUC), alpha = 0.8) + + #scale_size_area(max_size = 4) + + scale_size(range = c(0.1, 4)) + + scale_color_gradientn(colours = tail(rdbu, 70)) + + theme_min() + + ylab(NULL) + + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, + colour = label_palette, + size = rel(0.7)), + panel.grid.major.x = element_line(colour = "grey90"), + panel.border = element_blank(), + axis.ticks.x = element_blank(), + axis.ticks.y = element_blank(), + legend.key.width = unit(1, "cm"), + legend.position = "bottom") + + # Put gene labels on the right hand side to improve alignment + scale_y_discrete(position = "right") + + gene_labels <- cowplot::get_y_axis(plot = p1, position = "right") + + p1 <- p1 + scale_y_discrete(labels = NULL) + + return(list(plot = p1, labels = gene_labels)) +} + + + diff --git a/GRN/global.R b/GRN/global.R index 113a161..7578fc4 100644 --- a/GRN/global.R +++ b/GRN/global.R @@ -13,8 +13,6 @@ library(ggplot2) library(cowplot) library(pheatmap) library(DT) -#library(rcytoscapejs2) # downloaded from https://github.com/uc-bd2k/rcytoscapejs2 -#could probably get rid of rcytoscape library(glue) library(GGally) library(sna) @@ -24,35 +22,53 @@ library(purrr) library(shinyWidgets) # Data -load("data/joint_cortex/cortex_prep.Rda") # a list, data_cortex -load("data/joint_pons/pons_prep.Rda") # a list, data_pons +load("data/joint_cortex_extended/cortex_extended_prep.Rda") # a list, data_cortex_extended +load("data/joint_pons_extended/pons_extended_prep.Rda") # a list, data_pons_extended load("data/shared/common_prep.Rda") # metadata and colour_palettes -data_ct_e12 <- readRDS("data/ct_e12/ct_e12_prep.Rds") # a list, data_ct_e12 +#a list of R objects for each per-sample SCENIC dataset prepared in data_prep.R +#Cortex +data_ct_e10 <- readRDS("data/ct_e10/ct_e10_prep.Rds") +data_ct_e12 <- readRDS("data/ct_e12/ct_e12_prep.Rds") +data_ct_e13 <- readRDS("data/ct_e13/ct_e13_prep.Rds") data_ct_e15 <- readRDS("data/ct_e15/ct_e15_prep.Rds") +data_ct_e16 <- readRDS("data/ct_e16/ct_e16_prep.Rds") +data_ct_e18 <- readRDS("data/ct_e18/ct_e18_prep.Rds") data_ct_p0 <- readRDS("data/ct_p0/ct_p0_prep.Rds") data_ct_p3 <- readRDS("data/ct_p3/ct_p3_prep.Rds") data_ct_p6 <- readRDS("data/ct_p6/ct_p6_prep.Rds") -data_po_e12 <- readRDS("data/po_e12/po_e12_prep.Rds") +#Pons +data_po_e10 <- readRDS("data/po_e10/po_e10_prep.Rds") +data_po_e12 <- readRDS("data/po_e12/po_e12_prep.Rds") +data_po_e13 <- readRDS("data/po_e13/po_e13_prep.Rds") data_po_e15 <- readRDS("data/po_e15/po_e15_prep.Rds") +data_po_e16 <- readRDS("data/po_e16/po_e16_prep.Rds") +data_po_e18 <- readRDS("data/po_e18/po_e18_prep.Rds") data_po_p0 <- readRDS("data/po_p0/po_p0_prep.Rds") data_po_p3 <- readRDS("data/po_p3/po_p3_prep.Rds") data_po_p6 <- readRDS("data/po_p6/po_p6_prep.Rds") -#load_plots + +#order of clusters in the dendrograms +dend_order_joint_cortex_extended <- readRDS("data/joint_cortex_extended/dendrogram_order_joint_extended_forebrain.Rds") +dend_order_joint_pons_extended <- readRDS("data/joint_pons_extended/dendrogram_order_joint_extended_pons.Rds") + +#load ribbon_plots of proportion of cells across developmental time in pons and forebrain load("data/shared/timeseries_proportion_plots.Rda") # Custom functions source("functions.R") -#allows server side save states +#allows URL save states enableBookmarking(store = "url") -#store variable accessible to all of app -dev_time_points <- c("e12", "e15", "p0", "p3", "p6") +#this vector is used repeatedly in app as the selection options for per timepoint data +#visualisation, assigned here to make it accessible to all of app +dev_time_points <- c("e10", "e12", "e13", "e15", "e16", "e18", "p0", "p3", "p6") -#overwrite pheatmap function so that the heatmap labels can be 45 degrees +#overwrite pheatmap function that draws the column names on the heatmap +#so that the heatmap labels can be 45 degrees library(grid) draw_colnames_45 <- function (coln, gaps, ...) { @@ -64,3 +80,6 @@ draw_colnames_45 <- function (coln, gaps, ...) { ## 'Overwrite' default draw_colnames with your own version assignInNamespace(x="draw_colnames", value="draw_colnames_45", ns=asNamespace("pheatmap")) + +#palette for bubble plot +rdbu <- rev(grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "RdBu"))(n = 100)) diff --git a/GRN/server.R b/GRN/server.R index 426bede..9712149 100644 --- a/GRN/server.R +++ b/GRN/server.R @@ -1,51 +1,46 @@ -#----------------------------------problem--------------------------------- -#need to check select input for TF and genes and display any that are not in the data or annotation -#then generate a list of genes or TFs to use and then supply that as the input_new()$tf input + server <- function(input, output, session) { - # Dynamic UI, change the selectInput tf lists on display depending on the brain region that is selected - updateSelectizeInput(session, inputId = "TF", choices = all_tf_list, - selected = c("Arx","Lef1"), server = TRUE) + # Dynamic UI, change the selectInput tf and gene lists on display depending on the brain region that is selected observeEvent(input$region,{ if(input$region == "cortex"){ updateSelectizeInput(session, inputId = "TF", choices = all_tf_list, selected = c("Arx","Lef1"), server = TRUE) - updateSelectizeInput(session, inputId = "gene", choices = unique(data_cortex$TF_target_gene_info$gene), + updateSelectizeInput(session, inputId = "gene", choices = unique(data_cortex_extended$TF_target_gene_info$gene), selected = c("Dlx6","Sox6"), server = TRUE ) } else{ updateSelectizeInput(session, inputId = "TF", choices = all_tf_list, selected = c("Lhx5","Pax7"), server = TRUE) - updateSelectizeInput(session, inputId = "gene", choices = unique(data_pons$TF_target_gene_info$gene), + updateSelectizeInput(session, inputId = "gene", choices = unique(data_pons_extended$TF_target_gene_info$gene), selected = c("Gad2"), server = TRUE) } - #updateRadioButtons(session, "show", selected = "stop") #resets the network visualization }) #uses the input update button to update a list of the parameters of the app for the following functions input_new <- eventReactive(input$update,{ - #data_cortex and data_pons are created by data_prep.R and loaded in global.R contains a list + #data_cortex_extended and data_pons_extended are created by data_prep.R and loaded in global.R contains a list #data files (defined in data_prep.R) l <- list() if(input$region == "cortex"){ - l <- data_cortex + l <- data_cortex_extended temp <- paste("F-", input$time, sep = "") + l$dend_order <- dend_order_joint_cortex_extended } else if(input$region == "pons"){ - l <- data_pons + l <- data_pons_extended temp <- paste("P-", input$time, sep = "") + l$dend_order <- dend_order_joint_pons_extended } - #str(input$TF) + l$tf <- input$TF l$region <- input$region - #l$input_pathway <- input$input_pathway l$method <- input$method - l$num_cell_plot <- input$num_cell_plot l$time_point <- temp l$gene <- input$gene l$label <- input$label @@ -59,10 +54,9 @@ server <- function(input, output, session) { l$as_toggle <- input$as_toggle l$fc <- input$fc l$as_tp <- input$as_tp - # l$gene_file_path <- input$file_gene$datapath - # print(l$gene_file_path) - # l has following elements with same names for both options above: - # l contains ... + l$bubble_scale <- input$bubble_scale + l$GRN_by_gene <- input$GRN_by_gene + # We will use the same name attributes to retrieve data return (l) @@ -79,8 +73,18 @@ server <- function(input, output, session) { #before the insertUI kicks in and supplies the correct time value to the path #using priorities in the observeEvent has not helped - #solved using req() lol + #UPDATE: + #solved using req() + + + #same chunk of code repeated for each tab that displays per-sample data + #observes for 2 things: what tab the app is currently displaying, and if the per-sample toggel for that tab is T or F + #If the toggle is on for the tab that is curretnly displayed, insert an UI element to select which time-point to display + #data for. + + #Organized this way such that each chunk of code corresponds to one tab, can be condensed, but done this way for ease of reading + #--------------Insert UI: Table Tab----------------- observeEvent(input$tabs, { if (input$table_toggle == TRUE & !identical(input$tabs, "Transcription Factor Target Information")){ @@ -129,6 +133,7 @@ server <- function(input, output, session) { } }, priority = 1000) + #--------------Insert UI: GRN Tab----------------- observeEvent(input$tabs, { if (input$grn_toggle == TRUE & !identical(input$tabs, "Regulatory Network Visualization")){ @@ -175,6 +180,8 @@ server <- function(input, output, session) { } }) + #--------------Insert UI: Heatmap Tab----------------- + #heatmap tab has a timepoint selection UI element by default so this just updates the selection options observeEvent(input$heatmap_toggle,{ if(input$heatmap_toggle){ updateCheckboxGroupInput(session, inputId = "method", "Cluster Method", @@ -195,8 +202,8 @@ server <- function(input, output, session) { selected = "joint") updateSelectInput(session, inputId = "time", label = "Time-point to Visualize", - choices = c("All", "e12", "e15", "p0", "p3", "p6"), - selected = "All") + choices = c("All Time-Points", dev_time_points), + selected = "All Time-Points") output$hm_data <- renderText({ "" }) @@ -204,6 +211,7 @@ server <- function(input, output, session) { }) + #--------------Insert UI: DR plot Tab----------------- observeEvent(input$tabs, { if (input$cluster_toggle == TRUE & !identical(input$tabs, "Clustering")){ @@ -250,6 +258,8 @@ server <- function(input, output, session) { } }) + #--------------Insert UI: Active/Specific Tab----------------- + #Update the active and specific tab inputs and responding to toggle observeEvent(input$tabs, { if (input$as_toggle == TRUE & !identical(input$tabs, @@ -298,13 +308,14 @@ server <- function(input, output, session) { } }) + #update inputs when toggle is turned so that the plots auto-update - observeEvent(input$grn_toggle|input$table_toggle|input$heatmap_toggle|input$cluster_toggle|input$as_toggle, { + observeEvent(input$grn_toggle|input$table_toggle|input$heatmap_toggle|input$cluster_toggle|input$as_toggle|input$bb_toggle, { click(id = "update") }, ignoreInit = TRUE, priority = 1) #updates a reactive value reg depending on the input region which is used to select the right dataset to display in the app - #uses the input_new() region because wants to be dependant on the update button + #uses the input_new() region because wants to be dependent on the update button reg <- reactive({ if(identical(input_new()$region, "cortex")){ "ct" @@ -314,20 +325,26 @@ server <- function(input, output, session) { } }) + #displays informative pop-up about the per-timepoint toggles observeEvent(input$info|input$info1|input$info2|input$info3|input$info4, { sendSweetAlert(session, title = "What Is This?", text = "Use toggle to explore data from SCENIC analyses performed on each developmental time-point individually.") }, ignoreInit = TRUE) + #Initiates a reactive value that will contain all the TF inputs + # by the user, subset by whether or not the TFs are active in the currently viewed + #dataset + tf_list <- reactiveValues( TF_in_data = NULL, TF_not_data = NULL ) + + + #Tells the user which of their inputs are not active in our dataset output$tf_check <- renderText({ - # if(identical(input$tabs, "active_specific")){ - # "" - # } + if(!length(tf_list$TF_in_data) > 0 & !is.null(tf_list$TF_in_data)){ " None of the input TFs are active in the current dataset. " } @@ -339,10 +356,12 @@ server <- function(input, output, session) { # -----------------------------Table------------------------------------------ - #filter the data, add a column for logos, then display + #Starts with the dataframe containing all regulatory relationships of all TFs + #filter the data based on TF input, add a column for gene motif logos, then display output$table1 <- renderDataTable({ # process data, filter the lines with our interested TF - + + datafile <- glue("data_{reg()}_{input$table_tp}") @@ -350,16 +369,24 @@ server <- function(input, output, session) { req(input$table_tp) + #checks to see which TFs in the use input are active and which are not, and then subset accordingly + temp <- check_tf_input(input_new()$tf, get(datafile)$unique_TF) tf_list$TF_in_data <- temp$TF_in_data tf_list$TF_not_data <- temp$TF_not_data - #print(tf_list) subset_data <- get(datafile)$TF_target_gene_info %>% dplyr::filter(TF %in% tf_list$TF_in_data) %>% - select(TF, gene, Genie3Weight.weight, highConfAnnot, nMotifs, bestMotif) + select(TF, gene, starts_with("Genie3Weight"), highConfAnnot, nMotifs, bestMotif) + + #the column name for the SCENIC inference weight has a different name depending on if the timepoint + #is in the original dataset from 2019 nat genetic or if it is the extended dataset + #so need to have a variable weight_col to rename the appropriate column in the datatable() function below + if(input$table_tp %in% c("e12", "e15", "p0", "p3", "p6")){weight_col <- "Genie3Weight.weight"} + else{weight_col <- "Genie3Weight"} + } else{ @@ -368,11 +395,11 @@ server <- function(input, output, session) { tf_list$TF_in_data <- temp$TF_in_data tf_list$TF_not_data <- temp$TF_not_data - #print(tf_list) subset_data <- input_new()$TF_target_gene_info %>% dplyr::filter(TF %in% tf_list$TF_in_data) %>% - select(TF, gene, Genie3Weight.weight, highConfAnnot, nMotifs, bestMotif) + select(TF, gene, starts_with("Genie3Weight"), highConfAnnot, nMotifs, bestMotif) + weight_col <- "Genie3Weight" } subset_data <- addMotifPic(subset_data) @@ -380,14 +407,14 @@ server <- function(input, output, session) { datatable(subset_data, escape = FALSE, colnames = c('Gene' = 'gene', 'Number of Motifs' = 'nMotifs', 'Best Motif' = 'bestMotif', - 'Strength of Association' = 'Genie3Weight.weight', + 'Strength of Association' = weight_col, 'Logo' = 'motif_logo'), rownames = FALSE) }) # -----------------------------GRN------------------------------------------ - gene_list <- reactiveValues( #makes reactive values to take in the user input gene list + gene_list <- reactiveValues( #makes reactive values to take in the user input gene list from a file data = NULL, clear = FALSE ) @@ -413,7 +440,7 @@ server <- function(input, output, session) { }, priority = 1000) output$file1_ui <- renderUI({ - input$reset ## Create a dependency with the reset button + input$reset ## Create a dependency with the reset button that clears the UI everytime the reset button is clicked fileInput( "file_gene", "Choose a CSV file containing your genes list", accept = c( @@ -424,6 +451,7 @@ server <- function(input, output, session) { placeholder = "example_list.csv" ) }) + #check if there is a user input gene_list file, if there is, use it, if not, use the selectInput genes igraph_network <- reactive ({ @@ -447,8 +475,9 @@ server <- function(input, output, session) { req(length(tf_list$TF_in_data) > 0) - make_network(tf_list$TF_in_data, get(datafile)$TF_target_gene_info, - gene_to_highlight) + + make_network(tf_list$TF_in_data, get(datafile)$TF_target_gene_info, + gene_to_highlight) } else{ @@ -458,9 +487,13 @@ server <- function(input, output, session) { tf_list$TF_not_data <- temp$TF_not_data req(length(tf_list$TF_in_data) > 0) - - make_network(tf_list$TF_in_data, input_new()$TF_target_gene_info, - gene_to_highlight) #returns an igraph network object + + + + make_network(tf_list$TF_in_data, input_new()$TF_target_gene_info, + gene_to_highlight) #returns an igraph network object + + } }) network_ggplot <- reactive({ @@ -484,20 +517,14 @@ server <- function(input, output, session) { # -----------------------------Heatmap------------------------------------------- - # output$color_hm_palette <- renderImage({ - # - # expr = list(src = "www/timeseries_color.png", #picture of colors corresponding with clusters - # alt = "This is alternate text") - # - # - # }, - # deleteFile = FALSE) + #this plot displays a heatmap of TF activity in each joint cluster when the per-sample toggle is off + #when the per_sample toggle is on, it displays the heatmap of TF activity of whatever timepoint is elected hm_joint_cluster_plot <- reactive({ req("joint" %in% input_new()$method) - if(input_new()$heatmap_toggle == TRUE){ + if(input_new()$heatmap_toggle == TRUE){ #luser is looking at per_sample data datafile <- glue("data_{reg()}_{input$time}") @@ -510,7 +537,7 @@ server <- function(input, output, session) { req(input$time != "All") plot_heatmap(tf_list$TF_in_data, "joint",input_new()$region, - get(datafile)$TF_and_ext, per_sample = input_new()$heatmap_toggle, + get(datafile)$TF_and_ext, per_sample = TRUE, timepoint = input$time) } else{ @@ -525,8 +552,10 @@ server <- function(input, output, session) { } }) - #need to adjust this, at least zoomable, change color scheme to dark blue and neon yellow - hm_sample_cluster_plot <- reactive({ #this is displaying heatmap clustering by sample cluster and not joint cluster + + #this plot shows TF activity heatmap for the joint_cortex(or pons)_extended dataset but using the sample clusers labels + #it is not displayed if the toggle is on + hm_sample_cluster_plot <- reactive({ req("Cluster" %in% input_new()$method) temp <- check_tf_input(input_new()$tf, unique(input_new()$TF_and_ext[["type"]])) @@ -541,11 +570,12 @@ server <- function(input, output, session) { }) output$heatmap_joint <- renderPlot({ - #print(str(input_new()$tf)) + req(length(input_new()$tf) != 23 ) #change this line after the tf input check has been done hm_joint_cluster_plot() }) + #changes name of the file hm_name <- reactive({ if(input$heatmap_toggle){ glue("{reg()}_{input$time}_heatmap.pdf") @@ -554,6 +584,7 @@ server <- function(input, output, session) { "heatmap_joint.pdf" } }) + output$download_hm_joint <- downloadHandler(filename = hm_name(), contentType = "application/pdf", content = function(file){ @@ -561,9 +592,20 @@ server <- function(input, output, session) { width = 20, height = 25) }) - output$heatmap_cluster <- renderPlot({ - #req(length(input_new()$tf) != 23 ) #change this line after the tf input check has been done + output$heatmap_cluster <- renderUI({ + + #adjusts plot width manually so that only the large plots have scrolling + if(input$time == "All Time-Points" & input$region == "cortex"){plot_width = '3500px'} + else if(input$time == "All Time-Points" & input$region == "pons"){plot_width = '3800px'} + else{plot_width = '800px'} + + plotOutput('heatmap_cluster_plot', width = plot_width) + }) + + output$heatmap_cluster_plot <- renderPlot({ + hm_sample_cluster_plot() + }) @@ -584,6 +626,7 @@ server <- function(input, output, session) { if(input_new()$cluster_toggle){ datafile <- glue("data_{reg()}_{input$cluster_tp}") + #print(datafile) temp <- check_tf_input(input_new()$tf, unique(get(datafile)$TF_and_ext[["type"]])) tf_list$TF_in_data <- temp$TF_in_data @@ -608,7 +651,7 @@ server <- function(input, output, session) { } }) #this part takes the TF that the user selects and subsets the feather file containing all #regulon activity data to include only the regulons that the user selects and this is entered into the - #UMAP plot function + #DR plot function @@ -621,14 +664,15 @@ server <- function(input, output, session) { req(input$cluster_tp) - plot_UMAP(tf_number = 1, get(datafile)$cell_metadata, + plot_dr(tf_number = 1, get(datafile)$cell_metadata, activity_data_cluster(), input_new()$dim_red) } else{ - plot_UMAP(tf_number = 1,input_new()$cell_metadata, + plot_dr(tf_number = 1,input_new()$cell_metadata, activity_data_cluster(), input_new()$dim_red) } }) + Umap_plot_2 <- reactive({ req(ncol(activity_data_cluster()) > 2) @@ -638,15 +682,16 @@ server <- function(input, output, session) { req(input$cluster_tp) - plot_UMAP(tf_number = 2, get(datafile)$cell_metadata, + plot_dr(tf_number = 2, get(datafile)$cell_metadata, activity_data_cluster(), input_new()$dim_red) } else{ - plot_UMAP(tf_number = 2,input_new()$cell_metadata, + plot_dr(tf_number = 2,input_new()$cell_metadata, activity_data_cluster(), input_new()$dim_red) } }) + #plots a DR plot of the current data set where points are coloured by which cluster they belong to output$color_by_cluster <- renderPlot({ if (input_new()$cluster_toggle){ @@ -654,12 +699,12 @@ server <- function(input, output, session) { req(input$cluster_tp) - color_by_cluster(get(datafile)$cell_metadata, get(datafile)$cluster_palette, + color_by_cluster(get(datafile)$cell_metadata, input_new()$dim_red, input_new()$cluster_label, per_sample = input_new()$cluster_toggle) } else{ - color_by_cluster(input_new()$cell_metadata, input_new()$cluster_palette, + color_by_cluster(input_new()$cell_metadata, input_new()$dim_red, input_new()$cluster_label) } }) @@ -690,60 +735,6 @@ server <- function(input, output, session) { # --------------------------------------Timeseries------------------------------------------- - # tf_nexist_data <- reactive({ - # tf_nexist <- "" - # for(tf in input_new()$tf){ - # if (tf %in% input_new()$tfs_not_exist_timeseries){ - # tf_nexist <- paste(tf_nexist,tf,sep = " ") - # } - # } - # }) - # tf_desc_timeseries <- reactive({ - # tf_nexist_string <- "" - # for(tf_n in input_new()$tfs_not_exist_timeseries){ - # tf_nexist_string <- paste(tf_nexist_string,tf_n,sep = " " ) - # } - # text <- glue("We do not have these followning tfs in this tab: {tf_nexist_string}") - # - # tf_nexist <- "" - # for(tf in input_new()$tf){ - # if (tf %in% input_new()$tfs_not_exist_timeseries){ - # tf_nexist <- paste(tf_nexist,tf,sep = " ") - # } - # } - # - # if(tf_nexist == ""){ - # text <- "Good! All of your input tfs exist in our timeseries activity datasets!" - # } - # else{ - # tf_nexist_string <- "" - # for(tf_n in input_new()$tfs_not_exist_timeseries){ - # tf_nexist_string <- paste(tf_nexist_string,tf_n,sep = " " ) - # } - # text <- glue('Those tfs in your input list does not not exist in our - # timeseries datasets: {tf_nexist}. - # We do not have these followning tfs in this tab: {tf_nexist_string}') - # } - # }) - - # output$tf_timeseries_desc <- renderText({ - # # tf_desc_timeseries() - # tf_nexist_string <- "" - # for(tf_n in input_new()$tfs_not_exist_timeseries){ - # tf_nexist_string <- paste(tf_nexist_string,tf_n,sep = " " ) - # } - # text <- glue("We do not have data for the following trancription: {tf_nexist_string}") - # - # - # }) - - - output$timeseries_desc <- renderText({ - text <- "Click option: double clicking a cell type in the legend displays that cell type ONLY; - single click removes that cell type from the plot. Mouse over ribbons in the plot to see the cell types. - We only support four plots of your first four transcripton factor inputs." - - }) # we must transform the TF format, from raw form (Arx) to (Arx_extended (21g)) to fetch # information @@ -758,16 +749,6 @@ server <- function(input, output, session) { translate_tf(input_new()$tf,input_new()$binary_active_TFs) }) - # ggplotly_list_plot <- reactive({ - # req(TF_transformed()) - # # binary_active_TFs is loaded at beginning by data_prep.R - # plot_list <- lapply(TF_transformed(), plot_timeseries, cell_metadata = data_cortex$timeseries_input_meta, - # activity = data_cortex$binary_activity, make_plotly = TRUE) - # # produce a list of ggplotly plots - # subplot(plot_list, nrows = 2, margin = 0.04, heights = c(0.6, 0.4), shareX = TRUE, shareY = FALSE) - # - # }) - ggplot_list_plot <- reactive({ req(TF_transformed()) @@ -776,9 +757,9 @@ server <- function(input, output, session) { plot_grid(plotlist = plot_list) }) - output$timeseries1 <- renderPlotly({ # a plotly list + output$timeseries1 <- renderPlotly({ # a plotly plot req(length(input_new()$tf)>0) - plot_timeseries(TF_transformed()[1][1], input_new()$timeseries_input_meta, input_new()$binary_activity,make_plotly = TRUE) + plot_timeseries(TF_transformed()[1][1], input_new()$timeseries_input_meta, input_new()$binary_activity, make_plotly = TRUE) }) output$timeseries2 <- renderPlot({ # a ggplot list @@ -793,12 +774,6 @@ server <- function(input, output, session) { width = 20, height = 15) }) - output$pons_timeseries <- renderImage({ - - list(src = "www/pons_timeseries.png", - alt = "This is alternate text") - - }, deleteFile = FALSE) output$cell_proportion_timeseries <- renderPlotly({ @@ -816,13 +791,6 @@ server <- function(input, output, session) { }) - # output$timeseries_color <- renderImage({ - # - # list(src = "www/timeseries_color.png", - # alt = "This is alternate text") - # - # }, - # deleteFile = FALSE) #-------------------------------------Active specific------------------- #same idea as reg but used to update the cluster list in this tab, uses the input$region because do not want @@ -835,27 +803,37 @@ server <- function(input, output, session) { "po" } }) + + #each data-set for each timepoint and for the joint_extended data has a different set of clusters that belong to it + #clust_list is a reactive value that updates whenever the data-set that the user views changes, then its used to update + #the selection options in the side-bar clust_list <- reactive({ if(input$as_toggle){ req(input$as_tp) datafile <- glue("data_{reg2()}_{input$as_tp}") - get(datafile)$cell_metadata %>% select(ID_20190715_with_blacklist) %>% unique() %>% deframe() + get(datafile)$cell_metadata %>% select(ID_20201028_with_exclude) %>% unique() %>% + filter(!grepl("EXCLUDE", ID_20201028_with_exclude)) %>% deframe() } else{ - datafile <- glue("data_{input$region}") + datafile <- glue("data_{input$region}_extended") get(datafile)$cell_metadata %>% select(Sample_cluster) %>% unique() %>% - filter(!grepl("BLACKLISTED", Sample_cluster)) %>% deframe() + filter(!grepl("EXCLUDE", Sample_cluster)) %>% deframe() } }) + + #actual updating the selection widget update_in <- observe({ updateSelectizeInput(session, inputId = "active_specific_cluster", choices = clust_list(), selected = clust_list()[1], server = TRUE) }, priority = 1000) + + #Reactive data used in the active specific tab + #This updates depending on the region, the time-point, and the cluster of interest active_specific_data <- reactive({ if(input$as_toggle){ @@ -866,8 +844,8 @@ server <- function(input, output, session) { sample_name <- glue("data_{reg()}_{input$as_tp}") } else{ - data_sample <- glue("joint_{input_new()$region}") - sample_name <- glue("data_{input_new()$region}") + data_sample <- glue("joint_{input_new()$region}_extended") + sample_name <- glue("data_{input_new()$region}_extended") } #print(input_new()$as_cluster) @@ -875,32 +853,36 @@ server <- function(input, output, session) { "data" = active_specific_prep(data_sample, input_new()$as_cluster), "name" = sample_name ) - #active_specific_prep(data_sample, input_new()$as_cluster) }) - - + #a scatter plot and a table for the active-specific by Cluster subtab output$as_clust <- renderUI({ fluidRow( column(width = 7, plotOutput("active_specific_scatter")), - column(width = 5, tableOutput("active_specific_table")) + column(width = 5, + + (div(style='height:400px;overflow-y: scroll; width:450px;overflow-x: scroll;', + tableOutput("active_specific_table")))) ) - #fluidRow(plotOutput("active_specific_cluster")) + }) + #bar plot for the active-specific by TF subtab output$as_tf <- renderUI({ plotOutput("as_bar_AUC", height = '800px') }) + #A Umap plot that only colours the points that belong in the user selected cluster + #uses the plot from color_by_cluster function and then overwrites the palette used output$active_specific_dr <- renderPlot({ data <- get(active_specific_data()$name) - #cluster <- gsub(".+_", "", input_new()$as_cluster) + awo <- hm_anno$side_colors$Cluster names(awo) <- gsub(" ", "_", names(hm_anno$side_colors$Cluster)) to_color <- replace(awo, !grepl(input_new()$as_cluster, names(awo)), "#e5e5e5") - #print(to_color) - gg <- color_by_cluster(data$cell_metadata, data$cluster_palette, - "tsne", FALSE, + + gg <- color_by_cluster(data$cell_metadata, + "umap", FALSE, per_sample = input_new()$as_toggle) if(input$as_toggle == FALSE){ gg <- gg + geom_point(aes(color = Sample_cluster), alpha = 0.2) @@ -924,6 +906,7 @@ server <- function(input, output, session) { head(4) tf_list$TF_not_data <- temp$TF_not_data + } else{ temp <- check_tf_input(input_new()$tf, unique(input_new()$TF_and_ext[["type"]])) @@ -932,8 +915,7 @@ server <- function(input, output, session) { tf_list$TF_not_data <- temp$TF_not_data } - #print(tf_list$TF_in_data) - #print(active_specific_data()) + req(length(tf_list$TF_in_data) > 0) plot_bar_list(active_specific_data()$data$AUC_df, tf_list$TF_in_data) }) @@ -973,14 +955,135 @@ server <- function(input, output, session) { 'Average AUC in Other' = AUC_out, 'AUC Fold Change' = AUC_FC ) - # datatable(data, escape = TRUE, - # colnames = c('Average AUC in Cluster' = 'AUC_in', - # 'Average AUC in Other' = 'AUC_out', - # 'AUC Fold Change' = 'AUC_FC'), - # rownames = FALSE) }) +#-------------------------Bubbles-------------------- + #code courtesy of clusters app and adapted for use + + #dendrogram image for the joint_cortex and pons extended datasets + #size and width are hardcoded to line up with the bubble plot + output$dend_image <- renderUI({ + image_source <- switch(input$region, + "cortex" = "joint_cortex_extended_tree.png", + "pons" = "joint_pons_extended_tree.png") + image_height <- switch(input$region, + "cortex" = "143", + "pons" = "120") + div(style = "margin-top: 3em; margin-bottom: -2em !important;", + fluidRow(tags$img(src = image_source, width = "1150", height = image_height)) + ) + }) + + + #reactively changes data for bubble plot depending on inputs + bubble_data <- reactive({ + + temp <- check_tf_input(input_new()$tf, unique(input_new()$TF_and_ext[["type"]])) + tf_list$TF_in_data <- temp$TF_in_data %>% transform_tf_input(input_new()$TF_and_ext) %>% + head(20) + tf_list$TF_not_data <- temp$TF_not_data + + data_sample <- glue("joint_{input_new()$region}_extended") + + + + bubble_prep(sample = data_sample, + tf = tf_list$TF_in_data, + dend_order = input_new()$dend_order, + scale = input_new()$bubble_scale) + + }) + + num_of_tf <- reactive({ + length(tf_list$TF_in_data) + }) + + # Generate the bubbleplot + output$bubble <- renderPlot({ + + + + plot_bubble(data = bubble_data()$data, + label_palette = bubble_data()$label_palette, + input_new()$dend_order)$plot # Get plot part of output + + }, + + # Choose width to align horizontally with dendrogram image + width = 1143, + + # Customize the height of the bubbleplot to scale with the number of genes which + # are being displayed, after allocating a baseline height for the x-axis & legend + + + height = function() 150 + 30 * num_of_tf() + ) + + + # Create a tooltip with cluster / expression information + # that appears when hovering over a bubble + # This was adapted from this example: https://gitlab.com/snippets/16220 + + output$bubble_hover_info <- renderUI({ + + hover <- input$bubble_hover + + + + # Find the nearest data point to the mouse hover position + point <- nearPoints(bubble_data()$data, + hover, + xvar = "Cluster", + yvar = "TF_padded", + maxpoints = 1) %>% + select(TF, Cluster, AUC, FC) + + # Hide the tooltip if mouse is not hovering over a bubble + if (nrow(point) == 0) return(NULL) + + # Create style property for tooltip + # background is set to the cluster colour, with opacity = 95% ("F2" at end of hex) + # z-index is set so we are sure are tooltip will be on top + style <- paste0("position:absolute; z-index:100; background-color: ", bubble_data()$label_palette[point$Cluster], "F2;", + "left: -350px; top: 400px; width: 350px;") + + # Set text to white if the background colour is dark, else it's black (default) + if (dark(bubble_data()$label_palette[point$Cluster])) { + style <- paste0(style, "color: #FFFFFF") + } + + # Specify text content of tooltips - special content for mean expression plot + tooltip_text <- paste0(" TF: ", point$TF, "
", + " Cluster: ", point$Cluster, "
", + " TF Activity: ", point$AUC %>% round(3), "
", + " TF Activity Fold Change: ", point$FC %>% round(3), "
") + + # Actual tooltip created as wellPanel + wellPanel( + style = style, + p(HTML(tooltip_text)) + ) + }) + + # Render the bubble plot gene labels separately with ggdraw + output$bubble_labels <- renderPlot({ + + ggdraw(plot_bubble(data = bubble_data()$data, + label_palette = bubble_data()$label_palette)$labels) # Get labels part of output + + }, + + # Set height of bubble plot gene labels to (hopefully) align with plots + height = function() 8 + 29 * num_of_tf(), + + # Max length of a gene is 200px + # NOTE: If altering this, also change the corresponding cellWidth for + # splitLayout in ui.R + width = 200 + + ) + } diff --git a/GRN/ui.R b/GRN/ui.R index 9b2296a..948a146 100644 --- a/GRN/ui.R +++ b/GRN/ui.R @@ -50,6 +50,9 @@ ui <- fluidPage( uiOutput('file1_ui'), ## instead of fileInput('file1', label = NULL) so that the file can be reset actionButton("reset", label = "Reset File"), + + #a switch to allow subsetting the GRN by target gene instead of by TF, not fully implemented so commented out for now + #materialSwitch("GRN_by_gene", "View Network by Target Gene", status = "primary"), checkboxInput(inputId = "label", label = "Label Target Gene Nodes", value = FALSE) @@ -72,9 +75,9 @@ ui <- fluidPage( selected = "joint"), selectInput(inputId = "time", label = "Time-point to Visualize", - choices = c("All","e12", "e15", "p0", "p3", "p6"), + choices = c("All Time-Points", dev_time_points), multiple = FALSE, - selected = "All") + selected = "All Time-Points") ), # -----------------DR plots --------------------------------------------- @@ -102,6 +105,14 @@ ui <- fluidPage( min = 1, max = 4, value = 1.5, step = 0.25, ticks = TRUE), #materialSwitch("dendro", "See Dendrogram", status = "success", right = TRUE) ), + +#----------------------Bubble----------------- +conditionalPanel(condition = "input.tabs == 'bubble'", + selectInput("bubble_scale", "Scaling", + choices = c("Scale activity of TFs to [0, 1]" = TRUE, + "Conserve scale across TFs" = FALSE), + selected = "Scale activity of TFs to [0, 1]"), +), # Update everything actionButton("update", label = "Update"), @@ -115,6 +126,114 @@ ui <- fluidPage( mainPanel( tabsetPanel( + #-----------------------Bubble-plot------------------- + tabPanel( + title = "Dendrogram", + tags$br(), + tags$b("This tab displays the activity and activity fold change of up to 20 TFs over each cluster in the selected brain region."), + tags$br(), + tags$br(), + p("• Clusters are ordered according to the dendrogram which represents a molecular taxonomy of all cell populations"), + + p("• Below the dendrogram, clusters are annotated by brain region, and time point."), + + p("• Bubble colour encodes the mean TF activity within the cluster, and bubble size encodes the fold change of TF activity in each cluster compared to all other clusters - effectively describing TF specificity."), + + p("• The \"Scaling\" option allows the option of normalizing mean TF activity of all TF from 0, 1 for a better visualization when different TFs have large differences in absolute AUC value. This does not change the FC values."), + + p("• Hover over each bubble to get additional details about each cluster & its expression level"), + + uiOutput("dend_image"), + + div(style = "margin-top: 2em; margin-left: 0em; margin-bottom: -5em; + overflow-x: visible; overflow-y: visible;", + + fluidRow( + # Set cellWidths equal to the actual width of each plot (server.R) + splitLayout(cellWidths = c(1148, 200), + + # Bubble plot(s) + (plotOutput("bubble", + hover = hoverOpts(id = "bubble_hover", clip = FALSE), + height = 2000) %>% ws), + + # Gene labels + # No spinner to prevent confusing user, because there is only 1 plot + (plotOutput("bubble_labels", height = 2000)) + ) + + ), + + # UI for tooltip + fluidRow( + uiOutput("bubble_hover_info")), + + ), + value = "bubble" + ), + # -----------------Heatmap --------------------------------------------- + tabPanel( + strong("This tab displays a heatmap of user selected TF activity per cluster") %>% p(), + p("• Values in the heatmap represent the mean TF activity per cluster."), + p("• Joint clusters are classified based on the combined data from every developmental + time-point in a brain region (forebrain or pons); sample cluster are identified based on data from each + individual time-point per brain region."), + p("• Use the \"Time-point to Visualise\" option to select which (if not all) time-points + to visualise in the sample cluster heatmap."), + + fluidRow( + column(width = 3, materialSwitch(inputId = "heatmap_toggle", + label = "Explore per Time-Point Heatmap", + value = FALSE, status = "success")), + column(width = 3, actionButton("info2", "What Is This?")) + ), + + htmlOutput("hm_data"), + title = "TF Activity Heatmap", + value = "Heatmap", + fluidRow( + plotOutput("heatmap_joint") + ), + + downloadButton("download_hm_joint", "Heatmap Download (PDF)"), + + (div(style='width:800px;overflow-x: scroll;', + uiOutput("heatmap_cluster"))), + + + downloadButton("download_hm_cluster", "Heatmap Download (PDF)"), + ), + + # -----------------DR plots --------------------------------------------- + tabPanel( + title = "TF Activity, by Region", + value = "Clustering", + + p("This tab displays the activity of selected transcription factors") %>% strong(), + p("• Cells are plotted in 2D according to selected dimensionality reduction algorithm"), + p("• The top row is colored by joint cluster and the bottom row is colored by transcription factor activity"), + p("• Only the first two transcriptions factors are displayed"), + fluidRow( + column(width = 3, materialSwitch(inputId = "cluster_toggle", + label = "Explore per Time-Point TF Activity", + value = FALSE, status = "success")), + column(width = 6, actionButton("info3", "What Is This?")) + ), + htmlOutput("dr_data"), + fluidRow( + column(width = 10, plotOutput("color_by_cluster", width = "6in", height = "6in")) + ), + fluidRow( + + column(width = 6, plotOutput("cluster1",width = "4.2in", height = "4in"), + downloadButton("download_UMAP_1", "Transcription Factor 1 Activity Plot (PDF)")), + + column(width = 6, plotOutput("cluster2", width = "4.2in",height = "4in"), + downloadButton("download_UMAP_2", "Transcription Factor 2 Activity Plot (PDF)")), + + ) + ), + # -----------------ggNet visualisation --------------------------------------------- tabPanel( strong("This tab displays a network visualisation of the inferred regulatory relationship between TFs and target genes.") %>% p(), @@ -136,8 +255,7 @@ ui <- fluidPage( htmlOutput("grn_data"), - #textOutput("general_desc"), # this line breaks things/ probably cause you can't have 2 general_desc - #textOutput("desc"), + plotlyOutput("network"), br(),#so the plotly doesn't overlap with the download button br(), @@ -173,100 +291,37 @@ ui <- fluidPage( value = FALSE, status = "success")), column(width = 3, actionButton("info1", "What Is This?")) ), - # materialSwitch(inputId = "table_toggle", label = "Explore per Time-Point Data", - # value = FALSE, status = "success"), + htmlOutput("table_data"), - #textOutput("general_desc"), + dataTableOutput("table1"), value = "Transcription Factor Target Information" ), - # -----------------Heatmap --------------------------------------------- - tabPanel( - strong("This tab displays a heatmap of user selected TF activity per cluster") %>% p(), - p("• Values in the heatmap represent the mean TF activity per cluster."), - p("• Joint clusters are classified based on the combined data from every developmental - time-point in a brain region (forebrain or pons); sample cluster are identified based on data from each - individual time-point per brain region."), - p("• Use the \"Time-point to Visalise\" option to select which (if not all) time-points - to visualise in the sample cluster heatmap."), - - fluidRow( - column(width = 3, materialSwitch(inputId = "heatmap_toggle", - label = "Explore per Time-Point Heatmap", - value = FALSE, status = "success")), - column(width = 3, actionButton("info2", "What Is This?")) - ), - # materialSwitch(inputId = "heatmap_toggle", - # label = "Explore per Time-Point Heatmap", - # value = FALSE, status = "success"), - htmlOutput("hm_data"), - title = "TF Activity Heatmap", - value = "Heatmap", - fluidRow( - plotOutput("heatmap_joint") - ), - downloadButton("download_hm_joint", "Heatmap Download (PDF)"), - div(style = "margin-left: 1.3em; margin-right: 1.3em;", - fluidRow( - plotOutput("heatmap_cluster") - )), - downloadButton("download_hm_cluster", "Heatmap Download (PDF)"), - #imageOutput("color_hm_palette", width = "6in", height = "4in") - ), - # -----------------DR plots --------------------------------------------- - tabPanel( - title = "TF Activity, by Region", - value = "Clustering", - - p("This tab displays the activity of selected transcription factors") %>% strong(), - p("• Cells are plotted in 2D according to selected dimensionality reduction algorithm"), - p("• The top row is colored by joint cluster and the bottom row is colored by transcription factor activity"), - p("• Only the first two transcriptions factors are displayed"), - fluidRow( - column(width = 3, materialSwitch(inputId = "cluster_toggle", - label = "Explore per Time-Point TF Activity", - value = FALSE, status = "success")), - column(width = 6, actionButton("info3", "What Is This?")) - ), - htmlOutput("dr_data"), - fluidRow( - column(width = 10, plotOutput("color_by_cluster", width = "6in", height = "7in")) - ), - fluidRow( - - column(width = 6, plotOutput("cluster1",width = "4.2in", height = "4in"), - downloadButton("download_UMAP_1", "Transcription Factor 1 Activity Plot (PDF)")), - - column(width = 6, plotOutput("cluster2", width = "4.2in",height = "4in"), - downloadButton("download_UMAP_2", "Transcription Factor 2 Activity Plot (PDF)")), - - ) - ), + + # -----------------Time Series --------------------------------------------- tabPanel("Time Series", p("This plot quantifies the proportion of cells (from 0 to 1) at each timepoint where a given TF is active, broken down by cell type, to allow for visualizing activity across time.") %>% strong(), p("• For any given cell, any given TF is considered active if its activity in that cell - is higher than a TF activity threshold."), + is higher than a TF activity threshold determined in the SCENIC pipeline."), p("• The time series for the first TF selected in the sidebar will be an interactive plot, with the remaining plots being static."), - - textOutput("timeseries_desc"), + p("Interactive Plot Options: double clicking a cell type in the legend displays that cell type ONLY; + single click removes that cell type from the plot. Mouse over ribbons in the plot to see the cell types. + We only support four plots of your first four transcripton factor inputs."), + br(), - #textOutput("tf_timeseries_desc"), fluidRow( - plotlyOutput("timeseries1"), - downloadButton("download_ribbon_1", "Timeseries ribbon plot (PDF)"), - plotOutput("timeseries2"), - #imageOutput("timeseries_color"), - #plotOutput("timeseries3"), - #plotlyOutput("timeseries4") - plotlyOutput("cell_proportion_timeseries") + plotlyOutput("timeseries1"), + downloadButton("download_ribbon_1", "Timeseries ribbon plot (PDF)"), + plotOutput("timeseries2"), + plotlyOutput("cell_proportion_timeseries") ), - #imageOutput("proportion_timeseries", width = "auto", height = "auto"), + value = "Time Series"), #-----------------------------Active specific------------------------ tabPanel("TF Activity and Specificity", @@ -299,12 +354,7 @@ ui <- fluidPage( tabPanel("By TF", uiOutput("as_tf"), value = "by_tf"), id = "as_tabs" ), - #uiOutput("as_plots"), - # fluidRow( - # column(width = 7, plotOutput("active_specific_scatter")), - # column(width = 5, tableOutput("active_specific_table")) - # - # ), + plotOutput("active_specific_bar"), downloadButton("as_bar_download", "Bar-Plot Download (PDF)"), value = "active_specific"), diff --git a/GRN/www/joint_cortex_extended_tree.png b/GRN/www/joint_cortex_extended_tree.png new file mode 100644 index 0000000..afd53aa Binary files /dev/null and b/GRN/www/joint_cortex_extended_tree.png differ diff --git a/GRN/www/joint_pons_extended_tree.png b/GRN/www/joint_pons_extended_tree.png new file mode 100644 index 0000000..a3bd3d8 Binary files /dev/null and b/GRN/www/joint_pons_extended_tree.png differ diff --git a/clusters/data/populate_data_flags.R b/clusters/data/populate_data_flags.R index 2aab582..0dbd798 100644 --- a/clusters/data/populate_data_flags.R +++ b/clusters/data/populate_data_flags.R @@ -88,7 +88,7 @@ process_file <- function(file, dir_name) { dest <- file.path(dir_name, file$file) # Produce and echo rsync command including hydra username - cmd <- glue("rsync {username}@hydra.ladydavis.ca:{src} {dest}") + cmd <- glue("rsync {username}@hydra2.ladydavis.ca:{src} {dest}") message(cmd) } else { diff --git a/clusters/functions.R b/clusters/functions.R index 53dc85e..fd2d04b 100644 --- a/clusters/functions.R +++ b/clusters/functions.R @@ -832,6 +832,7 @@ feature_plot <- function(df, #' @return a ggplot object vln <- function(df, palette, + title = NULL, scale = "width", points = FALSE, point_size = 0.4, @@ -854,7 +855,13 @@ vln <- function(df, scale_fill_manual(values = palette) + theme_min() + theme(axis.text.x = element_text(angle = 90, hjust = 1), - legend.position = "none") + legend.position = "none") + + if (!is.null(title)){ + gg <- gg + ggtitle(title) + + theme(plot.title = element_text(size = 15)) + } + if (points) gg <- gg + geom_jitter(size = point_size) diff --git a/clusters/google-analytics.html b/clusters/google-analytics.html new file mode 100644 index 0000000..3677f13 --- /dev/null +++ b/clusters/google-analytics.html @@ -0,0 +1,9 @@ + + + diff --git a/clusters/server.R b/clusters/server.R index 1c3c28b..7dbf93e 100644 --- a/clusters/server.R +++ b/clusters/server.R @@ -249,9 +249,9 @@ server <- function(input, output, session) { #### ---- Cluster info & markers table tab content ---- - # --- LOAD GENE EXPRESSION DATA --- + # --- LOAD GENE EXPRESSION / PCT DATA --- - # Generate the input dataframe for the expression table + # Generate the input dataframe for the detection rate table table_exp_input <- reactive({ # Check whether a gene was provided or not @@ -270,6 +270,9 @@ server <- function(input, output, session) { anno_genes <- setdiff(not_data_genes, not_anno_genes) } + # Set the mean as always valid since it is not being used + valid_mean <- TRUE + # Perform input validation validate( need(is.null(anno_genes), @@ -280,22 +283,16 @@ server <- function(input, output, session) { glue("\n\n\n\nThe input gene \"{not_anno_genes}\" was not found in the gene annotation.")) ) - # Only display mean if more than one gene is given AND the user requested it - valid_mean <- FALSE - if (length(input_new()$gene) > 1 && input_new()$mean_exp){ - valid_mean <- TRUE - } - # Display the first 20 genes provided as input bubble_prep(gene = head(input_new()$gene, 20), - scale = TRUE, # ALWAYS WANT SCALING [0,1] FOR TABLE + scale = TRUE, show_mean = valid_mean) }) - # --- EXPRESSION TABLE --- + # --- DETECTION RATE TABLE --- - # Display table before update button has been clicked (no gene expression either) + # Display table before update button has been clicked (no detection rate either) output$cluster_table_no_update <- renderReactable({ # Modify metadata for plotting @@ -347,7 +344,7 @@ server <- function(input, output, session) { ) }) - # Show table with cluster & expression info + # Show table with cluster & detection rate info output$cluster_table <- renderReactable({ if (length(input_new()$gene) > 0){ @@ -360,9 +357,9 @@ server <- function(input, output, session) { # Modify table for plotting table <- table_exp_input() %>% - select(-Pct1, -Gene_padded) %>% - mutate(Expression = round(Expression, 2)) %>% - spread(Gene, Expression) %>% + select(-Expression, -Gene_padded) %>% + mutate(Pct1 = round(Pct1, 2)) %>% + spread(Gene, Pct1) %>% # Select all except Colour column, rename columns for human readability, # follow bubble_input order for gene columns (saved above) select(Cluster, @@ -487,9 +484,9 @@ server <- function(input, output, session) { } }) - # Download expression table as TSV - output$download_exp_table <- - downloadHandler(filename = "mean_cluster_expression.tsv", + # Download detection rate table as TSV + output$download_pct1_table <- + downloadHandler(filename = "detection_rate_per_cluster.tsv", contentType = "text/tsv", content = function(file) { write_tsv(bubble_input() %>% select(-Gene_padded), path = file) @@ -544,7 +541,7 @@ server <- function(input, output, session) { # Validate whether a cluster is selected or not, display message to user validate( need(!is.null(getReactableState("cluster_table_no_update", "selected")), - "Please select a cluster for which to display markers in the expression table above.") + "Please select a cluster for which to display markers in the detection rate table above.") ) # Load marker and signature files for the selected cluster's region & timepoint @@ -617,7 +614,7 @@ server <- function(input, output, session) { # Validate whether a cluster is selected or not, display message to user validate( need(!is.null(getReactableState("cluster_table", "selected")), " - Please select a cluster for which to display markers in the expression table above.") + Please select a cluster for which to display markers in the detection rate table above.") ) # Load marker and signature files for the selected cluster's region & timepoint @@ -1135,14 +1132,13 @@ server <- function(input, output, session) { # Map vln() plotter (functions.R) to each timepoint, producing # one plot per timepoint - map(dr_sample_exp(), + map2(dr_sample_exp(), timepoints, ~ vln(.x, + title = .y, palette = pal, points = input_new()$vln_points) + theme(plot.margin = unit(c(0.5, 0, 1, 1.5), "cm"))) %>% - {plot_grid(plotlist = ., ncol = 1, align = "hv", - labels = timepoints, label_size = 15)} - + {plot_grid(plotlist = ., ncol = 1)} }) #### ---- Clusters ranked by expression tab content ---- diff --git a/clusters/ui.R b/clusters/ui.R index 23c4335..9d6b082 100644 --- a/clusters/ui.R +++ b/clusters/ui.R @@ -3,8 +3,12 @@ source("../www/ui_functions.R") ui <- function(request){ + bootstrapPage( + # Add google analytics + tags$head(includeHTML(("google-analytics.html"))), + # Custom styling includeCSS("../www/minimal.css"), @@ -21,12 +25,14 @@ ui <- function(request){ sidebarPanel(width = 3, - conditionalPanel(condition = '!input.upload', + tags$h4(tags$b("Input sidebar")), + + conditionalPanel(condition = "!input.upload && input.tabs != 'welcome'", # Gene input field, shared across tabs selectizeInput(inputId = "gene", label = "Gene", choices = NULL, multiple = TRUE)), - conditionalPanel(condition = 'input.upload', + conditionalPanel(condition = "input.upload && input.tabs != 'welcome'", # Gene list input with a file, shared across tabs fileInput(inputId = "genelist", label = "Gene list (.txt, .csv, or .tsv)", buttonLabel = "Browse...", @@ -34,14 +40,16 @@ ui <- function(request){ accept = c(".txt", ".csv", ".tsv"), placeholder = "No file selected")), - materialSwitch("upload", "Use gene list from file", - # status doesn't have any effect other than color scheme. See bootstrap status values - status = "success", - value = FALSE, - right = TRUE), + conditionalPanel(condition = "input.tabs != 'welcome'", + materialSwitch("upload", "Use gene list from file", + # status doesn't have any effect other than color scheme. See bootstrap status values + status = "success", + value = FALSE, + right = TRUE), + ), # Input for dendrogram tab, expression table, and ranked clusters tab - conditionalPanel(condition = "(input.tabs == 'dendrogram' || input.tabs == 'exp_table' || input.tabs == 'rank_exp') && + conditionalPanel(condition = "(input.tabs == 'dendrogram' || input.tabs == 'rank_exp') && (input.gene.length > 1 || input.upload)", materialSwitch("mean_exp", "Display mean expression over the selected genes", # status doesn't have any effect other than color scheme. See bootstrap status values @@ -64,8 +72,8 @@ ui <- function(request){ ), # Input for all tabs other than dendrogram, expression table, ranked clusters, and heatmap - conditionalPanel(condition = "input.tabs != 'dendrogram' && input.tabs != 'exp_table' - && input.tabs != 'rank_exp' && input.tabs != 'heatmap'", + conditionalPanel(condition = "input.tabs != 'dendrogram' && input.tabs != 'pct_table' + && input.tabs != 'rank_exp' && input.tabs != 'heatmap' && input.tabs != 'welcome'", # Specify the visible label as well as the internal # strings used to refer to each region, matching @@ -149,16 +157,18 @@ ui <- function(request){ ), ), - # Update button for all sidebar inputs. Coloured to differentiate - # from the bookmark button beside it - # tags$head( - # tags$style(HTML('#update{background-color:#4863A0; - # color:#FFFFFF;}')) - # ), - actionButton("update", label = "Update"), - + # Update button for all sidebar inputs. # Bookmark button to store current inputs / state of app - bookmarkButton() + conditionalPanel(condition = "input.tabs != 'welcome'", + actionButton("update", label = "Update"), + bookmarkButton(), + ), + + # Information about the input sidebar for a user on the Welcome page + conditionalPanel(condition = "input.tabs == 'welcome'", + p("Input options will appear here after navigation to an analysis tab."), + ), + ), @@ -227,7 +237,7 @@ ui <- function(request){ ), HTML("


"), - + value = "welcome" ), #### ---- Dendrogram tab output ---- @@ -297,21 +307,21 @@ ui <- function(request){ tags$br(), h3(tags$b("Cluster information")), tags$br(), - h4(tags$b("Gene expression by cluster")), + h4(tags$b("Gene detection rate by cluster")), tags$br(), - tags$b("This table compares the expression of up to 20 genes in each cluster from the mouse single-cell RNA-seq development atlas."), + tags$b("This table compares the detection rate of up to 20 genes in each cluster from the mouse single-cell RNA-seq development atlas."), tags$br(), tags$br(), p(tags$b("• Sidebar gene input is optional for this tab. "), "Cluster information and the marker table will display before any genes have been entered."), - p("• When genes are entered in the sidebar, the value in each gene's column denotes the mean expression of the gene in the specified cluster"), + p("• When genes are entered in the sidebar, the value in each gene's column denotes the detection rate of the gene in cells of the specified cluster"), - p("• When entering more than one gene, use the sidebar switch to display the gene expression averaged (mean) over all input genes in a new column"), + # p("• When entering more than one gene, use the sidebar switch to display the gene expression averaged (mean) over all input genes in a new column"), p("• Use the button(s) below the table to download TSV files of the table contents. Click \"Download cluster information table\" for the cluster information only. - A second button named \"Download gene expression table\" will display when a gene is entered into the app. This button provides a TSV file containing the + A second button named \"Download detection rate table\" will display when a gene is entered into the app. This button provides a TSV file containing the mean expression of each input gene in each cluster, as well as a percentage of cells in each cluster expressing the gene"), p("• Select a cluster using the radio button to the left of each row to view the cluster's gene markers below (sidebar update button not required)"), @@ -347,8 +357,8 @@ ui <- function(request){ # Only show expression table if update button has been pressed once or more # Unfortunately, I cannot condition on the content of gene input in this file.. conditionalPanel(condition='input.update!=0', - downloadButton("download_exp_table", - "Download gene expression table (TSV)") + downloadButton("download_pct1_table", + "Download detection rate table (TSV)") ) ) ), @@ -397,7 +407,7 @@ ui <- function(request){ HTML("


"), # Specify the value to use when checking if this tab is selected - value = "exp_table" + value = "pct_table" ), diff --git a/lifespan/README.md b/lifespan/README.md index 1050c75..1d617a8 100644 --- a/lifespan/README.md +++ b/lifespan/README.md @@ -1,2 +1,5 @@ -# braindex: Lifespan app \ No newline at end of file +# braindex: Lifespan app + +Note: This analysis is based on adaptations of code for curve +fitting and visualization of the BrainSpan data from Marie Forest and Claudia Kleinman. diff --git a/lifespan/ui.R b/lifespan/ui.R index 3a28e86..9f8a9ac 100644 --- a/lifespan/ui.R +++ b/lifespan/ui.R @@ -38,6 +38,8 @@ shinyUI(bootstrapPage( tags$br(), p("Visualize the expression of a gene of interest across the lifespan in samples from the BrainSpan project"), + + p("This analysis is based on code for curve fitting and visualization from Marie Forest and Claudia Kleinman"), p("• In the top row, each curve corresponds to the smoothed expression for one brain region"), @@ -53,6 +55,7 @@ shinyUI(bootstrapPage( ws(plotOutput("regcurves", width = "7in", height = "8in")), h3("Citation guidelines"), + p("These visualizations are based on work from Marie Forest and Claudia Kleinman, please consult for Claudia Kleinman for citation and credit"), p("Miller, J.A. et al. (2014) Transcriptional landscape of the prenatal human brain, Nature 508: 199-206. doi:10.1038/nature13185"), p("© 2010 Allen Institute for Brain Science. Allen Human Brain Atlas. Available from: https://www.brainspan.org/") )