diff --git a/.Rhistory b/.Rhistory index b923cf3..8545efc 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,27 +1,3 @@ -n_per_group = 10, -cells_per_case = 20, -cells_per_control = 20, -ncells_variation_type = "Poisson", -pval = 0.05, -foldchange = 2) -error_hierarchicell(data_summ, -method = "Pseudobulk_mean", -n_genes = 1000, -n_per_group = 10, -cells_per_case = 3, -cells_per_control = 3, -ncells_variation_type = "Poisson", -pval = 0.05, -foldchange = 2) -error_hierarchicell(data_summ, -method = "Pseudobulk_mean", -n_genes = 1000, -n_per_group = 10, -cells_per_case = 300, -cells_per_control = 300, -ncells_variation_type = "Poisson", -pval = 0.05, -foldchange = 2) error_hierarchicell(data_summ, method = "Pseudobulk_mean", n_genes = 1000, @@ -473,40 +449,64 @@ library(Hmisc) ?select ?select ?setnames -?quantile -?as.formula -library(rworkflows) -getwd() -setwd("../Power_Analysis_package/") -rworkflows::use_workflow() -getwd() -rworkflows::use_workflow() -rworkflows::use_workflow() -rworkflows::use_workflow(branches=c("master","main","devel","RELEASE_**","Rworkflows_test"),save_dir="C:/Users/salma/Power_Analysis_package") -rworkflows::use_workflow(branches=c("master","main","devel","RELEASE_**","Rworkflows_test"),save_dir="C:/Users/salma/Power_Analysis_package/.github") -here::here -here::here(".git","w") -rworkflows::use_workflow(branches=c("master","main","devel","RELEASE_**","Rworkflows_test"),save_dir="C:/Users/salma/Power_Analysis_package/.github/workflows") -getwd() -devtools::check() -getwd() -devtools::load_all() +?rowSums +Matrix::rowSums +?Matrix::rowSums +?select +library(Hmisc) +?select +?get +?names +?aes +?factor +?unique +library(ensembldb) +browseVignettes("ensembldb") +?lapply +?element_text +?duplicted +?duplicated +?length +?sapply +?as.character +?matrix +?unique +conflicts() +conflicts() +library(BiocGenerics) +conflicts() +?geom_bar_repel +library(ggrepel) +?geom_bar_repel +library(ggplot2) +geom_bar_repel +?geom_bar_repel +?get +?select +?scales +?percent +?unit +library(ggplot2) +?unit +class("hello") +test <- list(1,2,3) +test +for(j in list){ +for(j in list){ +print(j) +} +for(j in test){ +print(j) +} +test <- c(1,2,3) +for(j in test){ +print(j) +} getwd() +setwd("C:/Users/salma/Power_Analysis_package") devtools::document() -?usethis::use_vignette() -usethis::use_vignette("poweranalysis") +#usethis::use_vignette("poweranalysis") devtools::load_all() -sessionInfo() -getwd() -devtools::document() -devtools::document() -warnings() -devtools::document() -?geom_bar_repel -devtools::document() -install.packages("roxygen2") -install.packages("roxygen2") -install.packages("roxygen2") -install.packages("roxygen2") -library(roxygen2) -packageVersion("roxygen2") +devtools::install() +devtools::check() +devtools::build() diff --git a/R/correlation_analysis.r b/R/correlation_analysis.r index 7f0d91c..f1d78f0 100644 --- a/R/correlation_analysis.r +++ b/R/correlation_analysis.r @@ -23,8 +23,20 @@ correlation_analysis <- function(dataset_name="placeholder", numSubsets="placeholder", output_path=getwd()){ - # create correlation plots for each p-value cut-off - - return("TestOut") + # run plot_mean_correlation for each p-value (saving outputs) + mean_correlation_results <- plot_mean_correlation(dataset_name, + allstudies, + celltypes, + pvals, + data_names, + output_path) + + # run correlation_boxplots for each p-value (saving outputs) + boxplot_results <- correlation_boxplots(mean_correlation_results, + numRealDatasets=length(allstudies), + pvals=pvals, + output_path=output_path) + + return(list(mean_correlation_results = mean_correlation_results, boxplot_results = boxplot_results)) } \ No newline at end of file diff --git a/R/correlation_boxplots.r b/R/correlation_boxplots.r index 0b70368..aebeafa 100644 --- a/R/correlation_boxplots.r +++ b/R/correlation_boxplots.r @@ -8,7 +8,7 @@ utils::globalVariables(c("alpha")) #' @param corrMats (named) list of correlation matrices for each celltype with the final element being the mean correlation matrix, all at specified p-value #' @param numRealDatasets total number of *real* datasets (most likely the number of studies, but sometimes a study may be split e.g. into 2 brain regions, so in this case it would be the number of studies plus 1) -#' @param pval the cut-off p-value which was used to select DEGs +#' @param pvals the cut-off p-value which was used to select DEGs #' @param alphaval (alpha) transparency of the non-mean boxplots #' @param numPerms number of random permutations of the dataset used to select significant DEGs from #' @param numSubsets number of pairs of random subsets of the dataset used to select significant DEGs from @@ -19,12 +19,13 @@ utils::globalVariables(c("alpha")) #' @param fontsize_legendlabels font size for legend labels in plot #' @param fontsize_legendtitle font size for legend title in plot #' @param fontsize_facet_labels font size for facet labels +#' @param output_path base path in which outputs will be stored #' @return box plots for correlation matrices at a certain p-value cut-off, sorted by celltype and then type of correlation correlation_boxplots <- function(corrMats, numRealDatasets, - pval, + pvals, alphaval=0.25, numPerms=5, numSubsets=5, @@ -34,148 +35,163 @@ correlation_boxplots <- function(corrMats, fontsize_title=14, fontsize_legendlabels=9, fontsize_legendtitle=9, - fontsize_facet_labels=9){ + fontsize_facet_labels=9, + output_path=getwd()){ + + # outputs + output_list <- list() - # validate function input params - validate_input_parameters_correlation(corrMats=corrMats, numRealDatasets=numRealDatasets, pvalue=pval, - alphaval=alphaval, numPerms=numPerms, numSubsets=numSubsets, - sexDEGs=sexDEGs, fontsize_yaxislabels=fontsize_yaxislabels, fontsize_yaxisticks=fontsize_yaxisticks, - fontsize_title=fontsize_title, fontsize_legendlabels=fontsize_legendlabels, fontsize_legendtitle=fontsize_legendtitle, - fontsize_facet_labels=fontsize_facet_labels) + # loop over each p-value + for(pval in pvals){ + # validate function input params + validate_input_parameters_correlation(corrMats=corrMats, numRealDatasets=numRealDatasets, pvalue=pval, + alphaval=alphaval, numPerms=numPerms, numSubsets=numSubsets, + sexDEGs=sexDEGs, fontsize_yaxislabels=fontsize_yaxislabels, fontsize_yaxisticks=fontsize_yaxisticks, + fontsize_title=fontsize_title, fontsize_legendlabels=fontsize_legendlabels, fontsize_legendtitle=fontsize_legendtitle, + fontsize_facet_labels=fontsize_facet_labels, output_path=output_path) - # midCor submatrix limits - midCorLim <- numPerms + numRealDatasets - # list to hold results - corrOuts <- c() - # index - j <- 1 + # midCor submatrix limits + midCorLim <- numPerms + numRealDatasets + # list to hold results + corrOuts <- c() + # index + j <- 1 - # get lists with all correlations - for(corrMat in corrMats){ - # specify submatrices with upper/middle/lower bounds - lower <- corrMat[1:numPerms,1:numPerms] - middle <- corrMat[(numPerms+1):midCorLim,(numPerms+1):midCorLim] - upper <- corrMat[(midCorLim+1):(midCorLim+numSubsets),(midCorLim+1):(midCorLim+numSubsets)] - # convert each one to a list and remove "1" (selfcorrelation) - lower <- unique(unlist(as.list(lower))) - lower <- lower[-c(1)] - middle <- unique(unlist(as.list(middle))) - middle <- middle[-c(1)] - upper <- unique(unlist(as.list(upper))) - upper <- upper[-c(1)] - # store in list - corrOuts[[j]] <- list(lower,middle,upper) - names(corrOuts)[[j]] <- names(corrMats)[[j]] - # increment - j <- j+1 - } + # get lists with all correlations + for(corrMat in corrMats){ + # specify submatrices with upper/middle/lower bounds + lower <- corrMat[1:numPerms,1:numPerms] + middle <- corrMat[(numPerms+1):midCorLim,(numPerms+1):midCorLim] + upper <- corrMat[(midCorLim+1):(midCorLim+numSubsets),(midCorLim+1):(midCorLim+numSubsets)] + # convert each one to a list and remove "1" (selfcorrelation) + lower <- unique(unlist(as.list(lower))) + lower <- lower[-c(1)] + middle <- unique(unlist(as.list(middle))) + middle <- middle[-c(1)] + upper <- unique(unlist(as.list(upper))) + upper <- upper[-c(1)] + # store in list + corrOuts[[j]] <- list(lower,middle,upper) + names(corrOuts)[[j]] <- names(corrMats)[[j]] + # increment + j <- j+1 + } - # store in dataframe - i <- 1 - # empty dataframe - df <- data.frame() - # fill dataframe - for(out in corrOuts){ - # define variables - var1 <- replicate(length(out[[1]])+length(out[[2]])+length(out[[3]]), names(corrOuts)[[i]]) #celltype - var2 <- c(replicate(length(out[[1]]),"Random Permutations"),replicate(length(out[[2]]),"Between Study"),replicate(length(out[[3]]),"Within-study subsamples")) - val <- unlist(out) - # put in dataframe - df_new <- data.frame(var1) - df_new$var2 <- var2 - df_new$val <- val - # join - df <- rbind(df,df_new) - i <- i+1 - } + # store in dataframe + i <- 1 + # empty dataframe + df <- data.frame() + # fill dataframe + for(out in corrOuts){ + # define variables + var1 <- replicate(length(out[[1]])+length(out[[2]])+length(out[[3]]), names(corrOuts)[[i]]) #celltype + var2 <- c(replicate(length(out[[1]]),"Random Permutations"),replicate(length(out[[2]]),"Between Study"),replicate(length(out[[3]]),"Within-study subsamples")) + val <- unlist(out) + # put in dataframe + df_new <- data.frame(var1) + df_new$var2 <- var2 + df_new$val <- val + # join + df <- rbind(df,df_new) + i <- i+1 + } - df$alpha <- replicate(dim(df)[[1]],alphaval) - df$alpha <- ifelse(df$var1=="Mean", 1, ifelse(df$var1!="Mean",alphaval,alphaval)) - unique_alphas <- df[!duplicated(df[,c("var1")]),]$alpha + df$alpha <- replicate(dim(df)[[1]],alphaval) + df$alpha <- ifelse(df$var1=="Mean", 1, ifelse(df$var1!="Mean",alphaval,alphaval)) + unique_alphas <- df[!duplicated(df[,c("var1")]),]$alpha - # box plot - if(pval == 1){ - if(sexDEGs == FALSE){ - fig.plot <- ggplot(df, - aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ - geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ - theme_cowplot()+ - scale_colour_brewer(palette = "Set1")+ - labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from all chromosomes)"))+ - facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ - theme(axis.title.x=element_blank(), - axis.text.x=element_blank(), - axis.ticks.x=element_blank(), - axis.title.y = element_text(size = fontsize_yaxislabels), - axis.text.y = element_text(size = fontsize_yaxisticks), - plot.title = element_text(size = fontsize_title), - legend.text = element_text(size = fontsize_legendlabels), - legend.title = element_text(size = fontsize_legendtitle), - strip.text = element_text(size = fontsize_facet_labels))+ - scale_alpha(guide = 'none')+ - guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + # box plot + if(pval == 1){ + if(sexDEGs == FALSE){ + fig.plot <- ggplot(df, + aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ + geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ + theme_cowplot()+ + scale_colour_brewer(palette = "Set1")+ + labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from all chromosomes)"))+ + facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ + theme(axis.title.x=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + axis.title.y = element_text(size = fontsize_yaxislabels), + axis.text.y = element_text(size = fontsize_yaxisticks), + plot.title = element_text(size = fontsize_title), + legend.text = element_text(size = fontsize_legendlabels), + legend.title = element_text(size = fontsize_legendtitle), + strip.text = element_text(size = fontsize_facet_labels))+ + scale_alpha(guide = 'none')+ + guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + }else{ + fig.plot <- ggplot(df, + aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ + geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ + theme_cowplot()+ + scale_colour_brewer(palette = "Set1")+ + labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from sex chromosomes)"))+ + facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ + theme(axis.title.x=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + axis.title.y = element_text(size = fontsize_yaxislabels), + axis.text.y = element_text(size = fontsize_yaxisticks), + plot.title = element_text(size = fontsize_title), + legend.text = element_text(size = fontsize_legendlabels), + legend.title = element_text(size = fontsize_legendtitle), + strip.text = element_text(size = fontsize_facet_labels))+ + scale_alpha(guide = 'none')+ + guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + } }else{ - fig.plot <- ggplot(df, - aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ - geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ - theme_cowplot()+ - scale_colour_brewer(palette = "Set1")+ - labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("Using all DEGs (from sex chromosomes)"))+ - facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ - theme(axis.title.x=element_blank(), - axis.text.x=element_blank(), - axis.ticks.x=element_blank(), - axis.title.y = element_text(size = fontsize_yaxislabels), - axis.text.y = element_text(size = fontsize_yaxisticks), - plot.title = element_text(size = fontsize_title), - legend.text = element_text(size = fontsize_legendlabels), - legend.title = element_text(size = fontsize_legendtitle), - strip.text = element_text(size = fontsize_facet_labels))+ - scale_alpha(guide = 'none')+ - guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + if(sexDEGs == FALSE){ + fig.plot <- ggplot(df, + aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ + geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ + theme_cowplot()+ + scale_colour_brewer(palette = "Set1")+ + labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from all chromosomes)"))+ + facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ + theme(axis.title.x=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + axis.title.y = element_text(size = fontsize_yaxislabels), + axis.text.y = element_text(size = fontsize_yaxisticks), + plot.title = element_text(size = fontsize_title), + legend.text = element_text(size = fontsize_legendlabels), + legend.title = element_text(size = fontsize_legendtitle), + strip.text = element_text(size = fontsize_facet_labels))+ + scale_alpha(guide = 'none')+ + guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + }else{ + fig.plot <- ggplot(df, + aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ + geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ + theme_cowplot()+ + scale_colour_brewer(palette = "Set1")+ + labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from sex chromosomes)"))+ + facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ + theme(axis.title.x=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank(), + axis.title.y = element_text(size = fontsize_yaxislabels), + axis.text.y = element_text(size = fontsize_yaxisticks), + plot.title = element_text(size = fontsize_title), + legend.text = element_text(size = fontsize_legendlabels), + legend.title = element_text(size = fontsize_legendtitle), + strip.text = element_text(size = fontsize_facet_labels))+ + scale_alpha(guide = 'none')+ + guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + } } - }else{ - if(sexDEGs == FALSE){ - fig.plot <- ggplot(df, - aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ - geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ - theme_cowplot()+ - scale_colour_brewer(palette = "Set1")+ - labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from all chromosomes)"))+ - facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ - theme(axis.title.x=element_blank(), - axis.text.x=element_blank(), - axis.ticks.x=element_blank(), - axis.title.y = element_text(size = fontsize_yaxislabels), - axis.text.y = element_text(size = fontsize_yaxisticks), - plot.title = element_text(size = fontsize_title), - legend.text = element_text(size = fontsize_legendlabels), - legend.title = element_text(size = fontsize_legendtitle), - strip.text = element_text(size = fontsize_facet_labels))+ - scale_alpha(guide = 'none')+ - guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) - }else{ - fig.plot <- ggplot(df, - aes(x=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),y=val))+ - geom_boxplot(outlier.shape=NA,aes(fill=factor(var1,levels=c("Mean","Astro","Endo","Micro","Oligo")),alpha=alpha),width=5)+ - theme_cowplot()+ - scale_colour_brewer(palette = "Set1")+ - labs(y="Correlation", x="Type", fill="Cell Type",title=paste0("DEGs selected at a ",pval*100,"% cut-off (from sex chromosomes)"))+ - facet_wrap(factor(df$var2,levels=c("Random Permutations","Between Study","Within-study subsamples")), scales="fixed")+ - theme(axis.title.x=element_blank(), - axis.text.x=element_blank(), - axis.ticks.x=element_blank(), - axis.title.y = element_text(size = fontsize_yaxislabels), - axis.text.y = element_text(size = fontsize_yaxisticks), - plot.title = element_text(size = fontsize_title), - legend.text = element_text(size = fontsize_legendlabels), - legend.title = element_text(size = fontsize_legendtitle), - strip.text = element_text(size = fontsize_facet_labels))+ - scale_alpha(guide = 'none')+ - guides(fill=guide_legend(override.aes = list(alpha=unique_alphas))) + + # store output in list with p-value as key + output_plots[[as.character(pval)]] <- fig.plot + + # save the plot if output_path is specified + if (!is.null(output_path)) { + ggsave(paste0(output_path, "/correlation_boxplot_p", pval, ".png"), fig.plot) } } - return(fig.plot) + return(output_plots) } \ No newline at end of file diff --git a/R/plot_mean_correlation.r b/R/plot_mean_correlation.r index ed4eb3a..70994bd 100644 --- a/R/plot_mean_correlation.r +++ b/R/plot_mean_correlation.r @@ -6,54 +6,72 @@ #' @param dataset_name name of the dataset used to select significant DEGs from (specified as a string, name as in allStudies) #' @param allstudies a list containing all the datasets (most likely as SCE objects) #' @param celltypes a list containing the celltypes to compute mean correlation across -#' @param pvalue the cut-off p-value which will be used to select DEGs +#' @param pvals the cut-off p-value which will be used to select DEGs #' @param data_names names of the datasets as they appear in the correlation plot +#' @param output_path base path in which outputs will be stored #' @return mean correlation matrix plot_mean_correlation <- function(dataset_name, allstudies, celltypes, - pvalue, - data_names="placeholder"){ - - # validate function input params - validate_input_parameters_correlation(dataset_name=dataset_name, allstudies=allstudies, celltypes=celltypes, - pvalue=pvalue, data_names=data_names) - - # list for genes of each celltype at specified p-value - genes <- list() - allCorrs <- list() - i <- 0 - for(celltype in celltypes){ - # correlation for each celltype at specified p-value - corrOut <- plot_celltype_correlation(dataset_name, allstudies, celltype, pvalue) - i <- i+1 - # get correlation matrix for each celltype - allCorrs[[i]] <- corrOut[[1]] - # get all present genes for current celltype - genes[[i]] <- corrOut[[4]] - } - # total number of unique genes across celltypes - genes <- unlist(genes) - totNumGenes <- length(unique(genes)) - # average correlations actoss celltypes, for specified p-value - meanCorr <- Reduce("+",allCorrs)/length(allCorrs) - - # rename columns and rows - if(data_names!="placeholder"&&is.vector(data_names)){ - rownames(meanCorr) <- colnames(meanCorr) <- data_names - } + pvals, + data_names="placeholder", + output_path=getwd()){ + + # outputs + output_list <- list() + + # loop over each p-value + for(pvalue in pvals){ + # validate function input params + validate_input_parameters_correlation(dataset_name=dataset_name, allstudies=allstudies, celltypes=celltypes, + pvalue=pvalue, data_names=data_names, output_path=output_path) + + # list for genes of each celltype at specified p-value + genes <- list() + allCorrs <- list() + i <- 0 + for(celltype in celltypes){ + # correlation for each celltype at specified p-value + corrOut <- plot_celltype_correlation(dataset_name, allstudies, celltype, pvalue) + i <- i+1 + # get correlation matrix for each celltype + allCorrs[[i]] <- corrOut[[1]] + # get all present genes for current celltype + genes[[i]] <- corrOut[[4]] + } + # total number of unique genes across celltypes + genes <- unlist(genes) + totNumGenes <- length(unique(genes)) + # average correlations actoss celltypes, for specified p-value + meanCorr <- Reduce("+",allCorrs)/length(allCorrs) - # plot correlation matrix - corr_plot.plot <- ggcorrplot(round(meanCorr,3), - hc.order = F,insig="pch",pch=5,pch.col = "grey", - pch.cex=9, - title=paste0("Total ", totNumGenes," DEGs"), - colors = c("#FC4E07", "white", "#00AFBB"), - outline.color = "white", lab = TRUE, lab_size=3.5, - sig.level=0.05) + theme(plot.title = element_text(hjust = 0.7)) # add/remove type="upper" in ggcorrplot (after hc.order) to get upper triangular/full matrix + # rename columns and rows + if(data_names!="placeholder"&&is.vector(data_names)){ + rownames(meanCorr) <- colnames(meanCorr) <- data_names + } + + # plot correlation matrix + corr_plot.plot <- ggcorrplot(round(meanCorr,3), + hc.order = F,insig="pch",pch=5,pch.col = "grey", + pch.cex=9, + title=paste0("Total ", totNumGenes," DEGs"), + colors = c("#FC4E07", "white", "#00AFBB"), + outline.color = "white", lab = TRUE, lab_size=3.5, + sig.level=0.05) + theme(plot.title = element_text(hjust = 0.7)) # add/remove type="upper" in ggcorrplot (after hc.order) to get upper triangular/full matrix + + # store output in list with p-value as key + output_list[[as.character(pvalue)]] <- list(corr_plot = corr_plot.plot, meanCorr = meanCorr) + + # save the plot + if (!is.null(output_path)) { + ggsave(paste0(output_path, "/mean_correlation_p", pvalue, ".png"), corr_plot.plot) + #write.csv(meanCorr, paste0(output_path, "/mean_correlation_matrix_p", pvalue, ".csv")) + } + + } # output plot and final matrix - return(list(corr_plot.plot,meanCorr)) + return(output_list) } diff --git a/R/validate_input_parameters_correlation.r b/R/validate_input_parameters_correlation.r index b0fe35a..5487b2a 100644 --- a/R/validate_input_parameters_correlation.r +++ b/R/validate_input_parameters_correlation.r @@ -17,6 +17,7 @@ #' @param fontsize_legendlabels font size for legend labels in plot #' @param fontsize_legendtitle font size for legend title in plot #' @param fontsize_facet_labels font size for facet labels +#' @param output_path base path in which outputs will be stored #' Checks all correlation analysis parameters are specified correctly @@ -36,7 +37,8 @@ validate_input_parameters_correlation <- function(dataset_name="placeholder", fontsize_title="placeholder", fontsize_legendlabels="placeholder", fontsize_legendtitle="placeholder", - fontsize_facet_labels="placeholder"){ + fontsize_facet_labels="placeholder", + output_path="placeholder"){ # test each parameter to check if it works if(dataset_name!="placeholder"){ @@ -146,5 +148,14 @@ validate_input_parameters_correlation <- function(dataset_name="placeholder", } } } - + if(output_path!="placeholder"){ + if(output_path!=getwd()){ + if(!is.character(output_path)){ + stop("Error: output_path should be a string specifying the base path where output will be stored.") + } + if(!dir.exists(output_path)){ + stop("Error: the specified output_path directory does not exist.") + } + } + } } \ No newline at end of file diff --git a/man/correlation_boxplots.Rd b/man/correlation_boxplots.Rd index e94ba96..8eb7b18 100644 --- a/man/correlation_boxplots.Rd +++ b/man/correlation_boxplots.Rd @@ -7,7 +7,7 @@ correlation_boxplots( corrMats, numRealDatasets, - pval, + pvals, alphaval = 0.25, numPerms = 5, numSubsets = 5, @@ -17,7 +17,8 @@ correlation_boxplots( fontsize_title = 14, fontsize_legendlabels = 9, fontsize_legendtitle = 9, - fontsize_facet_labels = 9 + fontsize_facet_labels = 9, + output_path = getwd() ) } \arguments{ @@ -25,7 +26,7 @@ correlation_boxplots( \item{numRealDatasets}{total number of \emph{real} datasets (most likely the number of studies, but sometimes a study may be split e.g. into 2 brain regions, so in this case it would be the number of studies plus 1)} -\item{pval}{the cut-off p-value which was used to select DEGs} +\item{pvals}{the cut-off p-value which was used to select DEGs} \item{alphaval}{(alpha) transparency of the non-mean boxplots} @@ -46,6 +47,8 @@ correlation_boxplots( \item{fontsize_legendtitle}{font size for legend title in plot} \item{fontsize_facet_labels}{font size for facet labels} + +\item{output_path}{base path in which outputs will be stored} } \value{ box plots for correlation matrices at a certain p-value cut-off, sorted by celltype and then type of correlation diff --git a/man/plot_mean_correlation.Rd b/man/plot_mean_correlation.Rd index 5dd850f..35e0166 100644 --- a/man/plot_mean_correlation.Rd +++ b/man/plot_mean_correlation.Rd @@ -8,8 +8,9 @@ plot_mean_correlation( dataset_name, allstudies, celltypes, - pvalue, - data_names = "placeholder" + pvals, + data_names = "placeholder", + output_path = getwd() ) } \arguments{ @@ -19,9 +20,11 @@ plot_mean_correlation( \item{celltypes}{a list containing the celltypes to compute mean correlation across} -\item{pvalue}{the cut-off p-value which will be used to select DEGs} +\item{pvals}{the cut-off p-value which will be used to select DEGs} \item{data_names}{names of the datasets as they appear in the correlation plot} + +\item{output_path}{base path in which outputs will be stored} } \value{ mean correlation matrix diff --git a/man/validate_input_parameters_correlation.Rd b/man/validate_input_parameters_correlation.Rd index 49aed7c..469c7cf 100644 --- a/man/validate_input_parameters_correlation.Rd +++ b/man/validate_input_parameters_correlation.Rd @@ -21,7 +21,8 @@ validate_input_parameters_correlation( fontsize_title = "placeholder", fontsize_legendlabels = "placeholder", fontsize_legendtitle = "placeholder", - fontsize_facet_labels = "placeholder" + fontsize_facet_labels = "placeholder", + output_path = "placeholder" ) } \arguments{ @@ -57,7 +58,9 @@ validate_input_parameters_correlation( \item{fontsize_legendtitle}{font size for legend title in plot} -\item{fontsize_facet_labels}{font size for facet labels +\item{fontsize_facet_labels}{font size for facet labels} + +\item{output_path}{base path in which outputs will be stored Checks all correlation analysis parameters are specified correctly} } \description{