Skip to content

Commit

Permalink
Merge pull request #320 from ncborcherding/dev
Browse files Browse the repository at this point in the history
  • Loading branch information
ncborcherding authored Feb 16, 2024
2 parents 3f25230 + 1e0d8ca commit e51695a
Show file tree
Hide file tree
Showing 27 changed files with 10,595 additions and 1,613 deletions.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Description: scRepertoire is a toolkit for processing and analyzing single-cell
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing
Depends:
ggplot2,
Expand Down Expand Up @@ -62,4 +62,3 @@ LinkingTo:
Rcpp
URL: https://www.borch.dev/uploads/screpertoire/
BugReports: https://github.com/ncborcherding/scRepertoire/issues

3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ export(percentGenes)
export(percentKmer)
export(percentVJ)
export(positionalEntropy)
export(positionalProperty)
export(subsetClones)
export(vizGenes)
import(dplyr)
Expand All @@ -51,6 +52,7 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,count)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,mutate_at)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
Expand Down Expand Up @@ -98,6 +100,7 @@ importFrom(stats,mad)
importFrom(stats,na.omit)
importFrom(stats,optim)
importFrom(stats,pgamma)
importFrom(stats,qt)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(stats,setNames)
Expand Down
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
* Added ```percentVJ()```
* Added ```percentKmer()```
* Added ```exportClones()```
* Added ```positionalEntropy()```
* Added ```positionalProperty()```
* Changed compareClonotypes to ```clonalCompare()```
* Changed clonotypeSizeDistribution to ```clonalSizeDistribution()```
* Changed scatterClonotypes to ```clonalScatter()```
Expand Down Expand Up @@ -38,14 +40,14 @@
* ```clonalDiversity()``` no longer automatically orders samples.
* Remove **order** parameter from ```clonalQuant()```, ```clonalLength()```, and ```clonalAbundance()```
* **x.axis** parameter in ```clonalDiversity()``` separated from **group.by** parameter
* filtering chains will not eliminate none matching chains.

## DEPRECATED AND DEFUNCT

* Deprecate stripBarcodes()
* Deprecate expression2List() (now only an internal function).
* Deprecate checkContigs()


# scRepertoire VERSION 1.11.0

* Rebasing for the purposes of bioconductor version
Expand Down
7 changes: 5 additions & 2 deletions R/combineExpression.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ combineExpression <- function(input.data,
stop("Adjust the cloneSize parameter - there are groupings < 1")
}
cloneSize <- c(None = 0, cloneSize)

cloneCall <- .theCall(input.data, cloneCall)
if (chain != "both") {
input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
for(i in seq_along(input.data)) {
input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
}
}
input.data <- .checkList(input.data)
cloneCall <- .theCall(input.data, cloneCall)

#Getting Summaries of clones from combineTCR() or combineBCR()
Con.df <- NULL
Expand Down
6 changes: 3 additions & 3 deletions R/exportClones.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ exportClones <- function(input.data,

.TCRmatchExport<- function(input.data) {

input.data <- .data.wrangle(input.data, group.by, "CTgene", "TRB")
input.data <- .data.wrangle(input.data, NULL, "CTgene", "TRB")

for(i in seq_along(input.data)) {
input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTaa")
input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTnt")
input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTaa", check = FALSE)
input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTnt", check = FALSE)
}

input.data <- bind_rows(input.data, .id = "group")
Expand Down
5 changes: 5 additions & 0 deletions R/global.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,10 @@
utils::globalVariables ("group")
utils::globalVariables ("chain2_aa")
utils::globalVariables ("dotSize")
utils::globalVariables ("ci_lower")
utils::globalVariables ("ci_upper")
utils::globalVariables ("mat_melt")
utils::globalVariables ("position")
utils::globalVariables ("se")
invisible ()
}
47 changes: 12 additions & 35 deletions R/percentAA.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom dplyr mutate_at %>%
#' @export
#' @concept Summarize_Repertoire
#' @return ggplot of stacked bar graphs of amino acid proportions
Expand All @@ -37,29 +38,17 @@ percentAA <- function(input.data,
input.data <- .groupList(input.data, group.by)
}

res.list <- list()
for (i in seq_along(input.data)) {
strings <- input.data[[i]][,"CTaa"]
strings <- do.call(c,str_split(strings, ";"))
strings <- strings[strings != "NA"]
strings <- strings[nchar(strings) < aa.length]
strings <- na.omit(strings)
strings <- .padded_strings(strings, aa.length)
strings <- do.call(rbind, strings)

#Summarizing the % of each position
aa.output <- apply(strings, 2, function(x) {
summary <- as.data.frame(prop.table(table(x, useNA = "always")))
})

#Forming a matrix of % across each position and formatting
res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="x"), aa.output))
colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
melt.res <- suppressMessages(melt(res))
melt.res$group <- names(input.data)[i]
res.list[[i]] <- melt.res
}
#Getting AA Counts
aa.count.list <- .aa.counter(input.data, "CTaa", aa.length)

#Calculating proportion and melting data
lapply(seq_along(aa.count.list), function(x) {
aa.count.list[[x]] <- aa.count.list[[x]] %>% mutate_if(is.numeric, list(~ ./sum(.)))
melt.res <- suppressMessages(melt(aa.count.list[[x]]))
melt.res$group <- names(input.data)[x]
melt.res
}) -> res.list

mat_melt <- do.call(rbind, res.list)
plot <- ggplot(mat_melt, aes(x=as.factor(variable), y = value, fill=AA)) +
geom_bar(stat = "identity", position="fill", lwd= 0.25, color = "black") +
Expand All @@ -78,15 +67,3 @@ percentAA <- function(input.data,
return(plot)
}

.padded_strings <- function(strings, max_length) {

x <- lapply(strings, function(str) {
str_len <- nchar(str)
str <- strsplit(str, split = "")[[1]]
if (str_len < max_length) {
c(str, rep(NA, max_length - str_len))
} else {
str
}
})
}
66 changes: 22 additions & 44 deletions R/positionalEntropy.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
#' @param aa.length The maximum length of the CDR3 amino acid sequence.
#' @param method The method to calculate the entropy/diversity -
#' "shannon", "inv.simpson", "norm.entropy".
#' @param n.boots number of bootstraps to down sample in order to
#' get mean diversity.
#' @param exportTable Returns the data frame used for forming the graph.
#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
#' @import ggplot2
Expand All @@ -36,8 +34,7 @@ positionalEntropy <- function(input.data,
chain = "TRB",
group.by = NULL,
aa.length = 20,
method = "shannon",
n.boots = 20,
method = "norm.entropy",
exportTable = FALSE,
palette = "inferno") {

Expand All @@ -55,52 +52,33 @@ positionalEntropy <- function(input.data,
input.data <- .groupList(input.data, group.by)
}

#Selecting Diversit Function
#Selecting Diversity Function
diversityFunc <- switch(method,
"norm.entropy" = .shannon,
"norm.entropy" = .normentropy,
"inv.simpson" = .invsimpson,
"shannon" = .normentropy,
"shannon" = .shannon,
stop("Invalid method provided"))

min <- .short.check(input.data, cloneCall)
aa.count.list <- .aa.counter(input.data, "CTaa", aa.length)

lapply(input.data, function(x) {
lapply(seq_len(n.boots), function(y) {
strings <- x[,cloneCall]
strings <- do.call(c,str_split(strings, ";"))
strings <- strings[strings != "NA"]
strings <- na.omit(strings)
strings <- strings[nchar(strings) < aa.length]
strings <- strings[sample(seq_len(length(strings)), min)]
strings <- .padded_strings(strings, aa.length)
strings <- do.call(rbind, strings)
aa.output <- apply(strings, 2, function(z) {
summary <- as.data.frame(table(z, useNA = "always"))
})
res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output))
colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
diversity <- sapply(res[,2:ncol(res)], diversityFunc)
diversity[is.nan(diversity)] <- 0
diversity
}) -> diversity.calculations
diversity.calculations <- do.call(rbind, diversity.calculations)
diversity.means <- colMeans(diversity.calculations)
diversity.means
}) -> positional.diversity

mat <- do.call(rbind, positional.diversity)
mat_melt <- suppressMessages(melt(mat))
lapply(aa.count.list, function(x){
diversity <- sapply(x[,2:ncol(x)], diversityFunc)
diversity[is.nan(diversity)] <- 0
diversity
}) -> group.results

mat <- do.call(rbind, group.results)
mat_melt <- suppressMessages(melt(mat))

plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
geom_line(stat = "identity") +
geom_point() +
scale_color_manual(name = "Groups",
values = rev(.colorizer(palette,nrow(mat)))) +
xlab("Amino Acid Residues") +
ylab("Relative Diversity") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
geom_line(stat = "identity") +
geom_point() +
scale_color_manual(name = "Groups",
values = rev(.colorizer(palette,nrow(mat)))) +
xlab("Amino Acid Residues") +
ylab("Relative Diversity") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
if (exportTable == TRUE) {
return(mat_melt)
}
Expand Down
Loading

0 comments on commit e51695a

Please sign in to comment.