diff --git a/DESCRIPTION b/DESCRIPTION
index 75eaac66..019209ad 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,7 +9,7 @@ Description: scRepertoire is a toolkit for processing and analyzing single-cell
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing
Depends:
ggplot2,
@@ -62,4 +62,3 @@ LinkingTo:
Rcpp
URL: https://www.borch.dev/uploads/screpertoire/
BugReports: https://github.com/ncborcherding/scRepertoire/issues
-
diff --git a/NAMESPACE b/NAMESPACE
index 3c139e00..c1c0a76e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -32,6 +32,7 @@ export(percentGenes)
export(percentKmer)
export(percentVJ)
export(positionalEntropy)
+export(positionalProperty)
export(subsetClones)
export(vizGenes)
import(dplyr)
@@ -51,6 +52,7 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,count)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
+importFrom(dplyr,mutate_at)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
@@ -98,6 +100,7 @@ importFrom(stats,mad)
importFrom(stats,na.omit)
importFrom(stats,optim)
importFrom(stats,pgamma)
+importFrom(stats,qt)
importFrom(stats,quantile)
importFrom(stats,sd)
importFrom(stats,setNames)
diff --git a/NEWS.md b/NEWS.md
index 7a285832..207ec27b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -8,6 +8,8 @@
* Added ```percentVJ()```
* Added ```percentKmer()```
* Added ```exportClones()```
+* Added ```positionalEntropy()```
+* Added ```positionalProperty()```
* Changed compareClonotypes to ```clonalCompare()```
* Changed clonotypeSizeDistribution to ```clonalSizeDistribution()```
* Changed scatterClonotypes to ```clonalScatter()```
@@ -38,6 +40,7 @@
* ```clonalDiversity()``` no longer automatically orders samples.
* Remove **order** parameter from ```clonalQuant()```, ```clonalLength()```, and ```clonalAbundance()```
* **x.axis** parameter in ```clonalDiversity()``` separated from **group.by** parameter
+* filtering chains will not eliminate none matching chains.
## DEPRECATED AND DEFUNCT
@@ -45,7 +48,6 @@
* Deprecate expression2List() (now only an internal function).
* Deprecate checkContigs()
-
# scRepertoire VERSION 1.11.0
* Rebasing for the purposes of bioconductor version
diff --git a/R/combineExpression.R b/R/combineExpression.R
index 4d268147..2aace59c 100644
--- a/R/combineExpression.R
+++ b/R/combineExpression.R
@@ -69,11 +69,14 @@ combineExpression <- function(input.data,
stop("Adjust the cloneSize parameter - there are groupings < 1")
}
cloneSize <- c(None = 0, cloneSize)
+
+ cloneCall <- .theCall(input.data, cloneCall)
if (chain != "both") {
- input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
+ for(i in seq_along(input.data)) {
+ input.data[[i]] <- .off.the.chain(input.data[[i]], chain, cloneCall)
+ }
}
input.data <- .checkList(input.data)
- cloneCall <- .theCall(input.data, cloneCall)
#Getting Summaries of clones from combineTCR() or combineBCR()
Con.df <- NULL
diff --git a/R/exportClones.R b/R/exportClones.R
index 0b4ba356..1de69e87 100644
--- a/R/exportClones.R
+++ b/R/exportClones.R
@@ -60,11 +60,11 @@ exportClones <- function(input.data,
.TCRmatchExport<- function(input.data) {
- input.data <- .data.wrangle(input.data, group.by, "CTgene", "TRB")
+ input.data <- .data.wrangle(input.data, NULL, "CTgene", "TRB")
for(i in seq_along(input.data)) {
- input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTaa")
- input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTnt")
+ input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTaa", check = FALSE)
+ input.data[[i]] <- .off.the.chain(input.data[[i]], "TRB", "CTnt", check = FALSE)
}
input.data <- bind_rows(input.data, .id = "group")
diff --git a/R/global.R b/R/global.R
index 198815ff..f4bc7bd2 100644
--- a/R/global.R
+++ b/R/global.R
@@ -43,5 +43,10 @@
utils::globalVariables ("group")
utils::globalVariables ("chain2_aa")
utils::globalVariables ("dotSize")
+ utils::globalVariables ("ci_lower")
+ utils::globalVariables ("ci_upper")
+ utils::globalVariables ("mat_melt")
+ utils::globalVariables ("position")
+ utils::globalVariables ("se")
invisible ()
}
diff --git a/R/percentAA.R b/R/percentAA.R
index 7b897290..5c50e85d 100644
--- a/R/percentAA.R
+++ b/R/percentAA.R
@@ -21,6 +21,7 @@
#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
#' @import ggplot2
#' @importFrom reshape2 melt
+#' @importFrom dplyr mutate_at %>%
#' @export
#' @concept Summarize_Repertoire
#' @return ggplot of stacked bar graphs of amino acid proportions
@@ -37,29 +38,17 @@ percentAA <- function(input.data,
input.data <- .groupList(input.data, group.by)
}
- res.list <- list()
- for (i in seq_along(input.data)) {
- strings <- input.data[[i]][,"CTaa"]
- strings <- do.call(c,str_split(strings, ";"))
- strings <- strings[strings != "NA"]
- strings <- strings[nchar(strings) < aa.length]
- strings <- na.omit(strings)
- strings <- .padded_strings(strings, aa.length)
- strings <- do.call(rbind, strings)
-
- #Summarizing the % of each position
- aa.output <- apply(strings, 2, function(x) {
- summary <- as.data.frame(prop.table(table(x, useNA = "always")))
- })
-
- #Forming a matrix of % across each position and formatting
- res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="x"), aa.output))
- colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
- res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
- melt.res <- suppressMessages(melt(res))
- melt.res$group <- names(input.data)[i]
- res.list[[i]] <- melt.res
- }
+ #Getting AA Counts
+ aa.count.list <- .aa.counter(input.data, "CTaa", aa.length)
+
+ #Calculating proportion and melting data
+ lapply(seq_along(aa.count.list), function(x) {
+ aa.count.list[[x]] <- aa.count.list[[x]] %>% mutate_if(is.numeric, list(~ ./sum(.)))
+ melt.res <- suppressMessages(melt(aa.count.list[[x]]))
+ melt.res$group <- names(input.data)[x]
+ melt.res
+ }) -> res.list
+
mat_melt <- do.call(rbind, res.list)
plot <- ggplot(mat_melt, aes(x=as.factor(variable), y = value, fill=AA)) +
geom_bar(stat = "identity", position="fill", lwd= 0.25, color = "black") +
@@ -78,15 +67,3 @@ percentAA <- function(input.data,
return(plot)
}
-.padded_strings <- function(strings, max_length) {
-
- x <- lapply(strings, function(str) {
- str_len <- nchar(str)
- str <- strsplit(str, split = "")[[1]]
- if (str_len < max_length) {
- c(str, rep(NA, max_length - str_len))
- } else {
- str
- }
- })
- }
diff --git a/R/positionalEntropy.R b/R/positionalEntropy.R
index 1cd18a58..5560bec7 100644
--- a/R/positionalEntropy.R
+++ b/R/positionalEntropy.R
@@ -23,8 +23,6 @@
#' @param aa.length The maximum length of the CDR3 amino acid sequence.
#' @param method The method to calculate the entropy/diversity -
#' "shannon", "inv.simpson", "norm.entropy".
-#' @param n.boots number of bootstraps to down sample in order to
-#' get mean diversity.
#' @param exportTable Returns the data frame used for forming the graph.
#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
#' @import ggplot2
@@ -36,8 +34,7 @@ positionalEntropy <- function(input.data,
chain = "TRB",
group.by = NULL,
aa.length = 20,
- method = "shannon",
- n.boots = 20,
+ method = "norm.entropy",
exportTable = FALSE,
palette = "inferno") {
@@ -55,52 +52,33 @@ positionalEntropy <- function(input.data,
input.data <- .groupList(input.data, group.by)
}
- #Selecting Diversit Function
+ #Selecting Diversity Function
diversityFunc <- switch(method,
- "norm.entropy" = .shannon,
+ "norm.entropy" = .normentropy,
"inv.simpson" = .invsimpson,
- "shannon" = .normentropy,
+ "shannon" = .shannon,
stop("Invalid method provided"))
- min <- .short.check(input.data, cloneCall)
+ aa.count.list <- .aa.counter(input.data, "CTaa", aa.length)
- lapply(input.data, function(x) {
- lapply(seq_len(n.boots), function(y) {
- strings <- x[,cloneCall]
- strings <- do.call(c,str_split(strings, ";"))
- strings <- strings[strings != "NA"]
- strings <- na.omit(strings)
- strings <- strings[nchar(strings) < aa.length]
- strings <- strings[sample(seq_len(length(strings)), min)]
- strings <- .padded_strings(strings, aa.length)
- strings <- do.call(rbind, strings)
- aa.output <- apply(strings, 2, function(z) {
- summary <- as.data.frame(table(z, useNA = "always"))
- })
- res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output))
- colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
- res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
- diversity <- sapply(res[,2:ncol(res)], diversityFunc)
- diversity[is.nan(diversity)] <- 0
- diversity
- }) -> diversity.calculations
- diversity.calculations <- do.call(rbind, diversity.calculations)
- diversity.means <- colMeans(diversity.calculations)
- diversity.means
- }) -> positional.diversity
-
- mat <- do.call(rbind, positional.diversity)
- mat_melt <- suppressMessages(melt(mat))
+ lapply(aa.count.list, function(x){
+ diversity <- sapply(x[,2:ncol(x)], diversityFunc)
+ diversity[is.nan(diversity)] <- 0
+ diversity
+ }) -> group.results
+
+ mat <- do.call(rbind, group.results)
+ mat_melt <- suppressMessages(melt(mat))
- plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
- geom_line(stat = "identity") +
- geom_point() +
- scale_color_manual(name = "Groups",
- values = rev(.colorizer(palette,nrow(mat)))) +
- xlab("Amino Acid Residues") +
- ylab("Relative Diversity") +
- theme_classic() +
- theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
+ plot <- ggplot(mat_melt, aes(x=Var2, y = value, group= Var1, color = Var1)) +
+ geom_line(stat = "identity") +
+ geom_point() +
+ scale_color_manual(name = "Groups",
+ values = rev(.colorizer(palette,nrow(mat)))) +
+ xlab("Amino Acid Residues") +
+ ylab("Relative Diversity") +
+ theme_classic() +
+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
if (exportTable == TRUE) {
return(mat_melt)
}
diff --git a/R/positionalProperty.R b/R/positionalProperty.R
new file mode 100644
index 00000000..d03f0cbe
--- /dev/null
+++ b/R/positionalProperty.R
@@ -0,0 +1,279 @@
+#' Examining the mean property of amino acids by position
+#'
+#' This function calculates the mean selected property for
+#' amino acids along the residues of the CDR3 amino acid sequence.
+#' The ribbon surrounding the individual line represents the 95%
+#' confidence interval.
+#'
+#' @details
+#' More information for the individual methods can be found at the following citations:
+#'
+#' \strong{Atchley:} \href{https://pubmed.ncbi.nlm.nih.gov/15851683/}{citation}
+#'
+#' \strong{Kidera:} \href{https://link.springer.com/article/10.1007/BF01025492}{citation}
+#'
+#' \strong{stScales:} \href{https://pubmed.ncbi.nlm.nih.gov/19373543/}{citation}
+#'
+#' \strong{tScales:} \href{https://www.sciencedirect.com/science/article/pii/S0022286006006314?casa_token=uDj97DwXDDEAAAAA:VZfahldPRwU1WObySJlohudtMSDwF7nJSUzcEGwPhvkY13ALLKhs08Cf0_FyyfYZjxJlj-fVf0SM}{citation}
+#'
+#' \strong{VHSE:} \href{https://pubmed.ncbi.nlm.nih.gov/15895431/}{citation}
+#'
+#'
+#' @examples
+#' #Making combined contig data
+#' combined <- combineTCR(contig_list,
+#' samples = c("P17B", "P17L", "P18B", "P18L",
+#' "P19B","P19L", "P20B", "P20L"))
+#' positionalProperty(combined,
+#' chain = "TRB",
+#' method = "Atchley",
+#' aa.length = 20)
+
+#' @param input.data The product of \code{\link{combineTCR}},
+#' \code{\link{combineBCR}}, or \code{\link{combineExpression}}.
+#' @param chain "TRA", "TRB", "TRG", "TRG", "IGH", "IGL".
+#' @param group.by The variable to use for grouping.
+#' @param aa.length The maximum length of the CDR3 amino acid sequence.
+#' @param method The method to calculate the property - "Atchley", "Kidera",
+#' "stScales", "tScales", or "VHSE".
+#' @param exportTable Returns the data frame used for forming the graph.
+#' @param palette Colors to use in visualization - input any \link[grDevices]{hcl.pals}.
+#' @import ggplot2
+#' @importFrom stringr str_split
+#' @importFrom stats qt
+#' @export
+#' @concept Summarize_Repertoire
+#' @return ggplot of line graph of diversity by position
+
+positionalProperty <- function(input.data,
+ chain = "TRB",
+ group.by = NULL,
+ aa.length = 20,
+ method = "Atchley",
+ exportTable = FALSE,
+ palette = "inferno") {
+ options( dplyr.summarise.inform = FALSE )
+ if(method %!in% c("Atchley", "Kidera", "stScales", "tScales", "VHSE")) {
+ stop("Please select a compatible method.")
+ }
+ sco <- is_seurat_object(input.data) | is_se_object(input.data)
+ input.data <- .data.wrangle(input.data,
+ group.by,
+ .theCall(input.data, "CTaa", check.df = FALSE),
+ chain)
+ cloneCall <- .theCall(input.data, "CTaa")
+
+ if(!is.null(group.by) & !sco) {
+ input.data <- .groupList(input.data, group.by)
+ }
+
+ #Selecting Diversit Function
+ propertyFunc <- switch(method,
+ "Atchley" = .af.ref,
+ "Kidera" = .kf.ref,
+ "stScales" = .stscales.ref,
+ "tScales" = .tscales.ref,
+ "VHSE" = .vhse.ref,
+ stop("Invalid method provided"))
+
+ #Getting AA Counts
+ aa.count.list <- .aa.counter(input.data, cloneCall, aa.length)
+
+ #Calculating properties and melting data
+ lapply(seq_along(aa.count.list), function(x) {
+ lapply(seq_len(nrow(aa.count.list[[x]]))[-1], function(y) {
+ pos <- aa.count.list[[x]][1:20,y]
+ names(pos) <- aa.count.list[[x]][1:20,1]
+ pos <- pos[pos > 0]
+ lapply(seq_len(length(pos)), function(t) {
+ char <- names(pos[t])
+ results <- rep(propertyFunc[char,], pos[t])
+ results
+ }) -> output.values
+ output.values <- unlist(output.values)
+ df <- data.frame(group = names(output.values),
+ value = unlist(output.values))
+
+ if(nrow(df) == 0) { #For only NA positions
+ summary <- data.frame(mean = rep(0, dim(propertyFunc)[2]),
+ ci_lower = rep(0, dim(propertyFunc)[2]),
+ ci_upper = rep(0, dim(propertyFunc)[2]))
+
+ } else {
+ summary <- df %>%
+ group_by(group) %>%
+ summarise(mean = mean(value),
+ sd = sd(value), # Standard deviation
+ n = n(), # Number of observations per group
+ se = sd / sqrt(n), # Standard error of the mean
+ ci_lower = mean - qt(0.975, n-1) * se,
+ ci_upper = mean + qt(0.975, n-1) * se) %>%
+ as.data.frame()
+
+ summary <- summary[,c("mean", "ci_lower", "ci_upper")]
+ }
+ summary$property <- colnames(propertyFunc)
+ summary
+ })-> output.values
+ int.mat <- bind_rows(output.values, .id = "position")
+ int.mat
+ }) -> property.calculations
+ names(property.calculations) <- names(input.data)
+ mat <- bind_rows(property.calculations, .id = "group")
+ mat$position <- paste0("pos", mat$position)
+ mat$position <- factor(mat$position, levels = str_sort(unique(mat$position), numeric = TRUE))
+
+ plot <- ggplot(mat, aes(x = position,
+ y = mean,
+ group = group,
+ color = group)) +
+ geom_ribbon(aes(ymin = ci_lower, ymax = ci_upper, fill = group), alpha = 0.5, lwd = 0) +
+ geom_line(stat = "identity", alpha = 0.5) +
+ geom_point() +
+ scale_color_manual(name = "Group",
+ values = .colorizer(palette,length(unique(mat$group)))) +
+ scale_fill_manual(values = .colorizer(palette,length(unique(mat$group)))) +
+ xlab("Amino Acid Residues") +
+ ylab("Mean Values") +
+ facet_grid(property~.) +
+ guides(fill = "none",
+ color = guide_legend(override.aes=list(fill=NA))) +
+ theme_classic() +
+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
+ if (exportTable == TRUE) {
+ return(mat_melt)
+ }
+ return(plot)
+
+}
+
+###############################
+#Amino Acid Property Matrices
+###############################
+
+.af.ref <- matrix(c(
+ -0.591, -1.302, -0.733, 1.570, -0.146, # A (Alanine)
+ 1.538, -0.055, 1.502, 0.440, 2.897, # R (Arginine)
+ 0.945, 0.828, 1.299, -0.169, 0.933, # N (Asparagine)
+ 1.050, 0.302, -1.768, 0.276, 1.068, # D (Aspartic Acid)
+ -1.343, 0.465, -0.862, -1.020, -0.255, # C (Cysteine)
+ 0.931, -0.179, -3.005, 0.941, 0.360, # Q (Glutamine)
+ 1.357, -1.453, 1.477, 0.114, -0.384, # E (Glutamic Acid)
+ -0.384, 1.652, 1.330, 1.045, 2.065, # G (Glycine)
+ -0.510, 0.292, -0.203, -1.378, -0.276, # H (Histidine)
+ -1.006, -0.590, 1.891, -0.397, 0.412, # I (Isoleucine)
+ -1.006, -0.590, 1.891, -0.397, 0.412, # L (Leucine)
+ 0.960, -0.181, 1.932, -0.041, 1.697, # K (Lysine)
+ -1.343, 0.465, -0.862, -1.020, -0.255, # M (Methionine)
+ -1.006, -0.590, 1.891, -0.397, 0.412, # F (Phenylalanine)
+ 0.189, 0.001, -1.756, 0.767, -0.542, # P (Proline)
+ -0.228, 1.399, -4.760, 0.670, -2.647, # S (Serine)
+ -0.228, 1.399, -4.760, 0.670, -2.647, # T (Threonine)
+ -1.006, -0.590, 1.891, -0.397, 0.412, # W (Tryptophan)
+ -1.343, 0.465, -0.862, -1.020, -0.255, # Y (Tyrosine)
+ -1.006, -0.590, 1.891, -0.397, 0.412 # V (Valine)
+), nrow = 20, ncol = 5, byrow = TRUE)
+rownames(.af.ref) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
+colnames(.af.ref) <- paste0("AF",1:5)
+
+
+.kf.ref <- matrix(c(
+ -1.56, -1.67, -0.97, -0.27, -0.93, -0.78, -0.20, -2.65, 0.19, 0.82, # A (Alanine)
+ -2.52, 0.27, -0.85, -0.71, 1.11, 1.15, 1.07, 0.86, -0.49, 0.95, # R (Arginine)
+ -1.01, 0.53, 0.06, 0.13, -1.75, 0.16, 0.42, -0.73, 0.53, 0.81, # N (Asparagine)
+ -0.51, 0.07, 1.50, 0.13, 0.32, 0.42, -1.04, -0.26, 0.76, 0.22, # D (Aspartic Acid)
+ 0.24, -2.32, 0.60, -0.14, 1.27, 1.15, 0.21, 1.03, -0.84, -0.45, # C (Cysteine)
+ -0.22, -0.04, 0.53, -0.11, -1.18, 0.45, 0.84, -0.71, 0.82, -0.62, # Q (Glutamine)
+ -0.76, 0.18, 0.06, -0.42, 1.25, 0.83, 0.51, 0.44, 0.65, -0.20, # E (Glutamic Acid)
+ 0.81, -0.67, 2.80, 0.60, 0.88, 0.69, -0.29, 0.81, -0.92, 0.32, # G (Glycine)
+ -0.47, 1.94, 0.45, -1.61, 1.49, -0.38, 1.14, 0.74, -0.72, 1.59, # H (Histidine)
+ 1.97, -1.73, -0.16, 0.70, -0.83, 0.69, -0.07, -0.61, 0.81, -0.03, # I (Isoleucine)
+ 1.41, -0.23, -0.25, 1.53, -1.50, 0.27, 0.70, -0.10, 0.45, -0.02, # L (Leucine)
+ -1.27, 1.32, 0.57, 1.62, -0.30, 0.49, 0.84, -0.60, 0.71, 0.46, # K (Lysine)
+ 1.23, -0.08, -1.11, 0.16, -1.10, 0.84, 0.83, 1.23, -0.32, 0.51, # M (Methionine)
+ 1.46, -1.96, -0.23, 0.53, -0.60, 0.75, 0.20, 0.26, -0.16, 0.84, # F (Phenylalanine)
+ 0.59, 0.91, -0.01, -0.28, 0.22, -0.26, 0.19, 0.36, -0.33, 0.38, # P (Proline)
+ -0.34, 1.15, 0.25, -1.39, 0.67, -0.76, -0.40, 0.51, -0.27, 1.56, # S (Serine)
+ 0.71, 0.24, -0.96, 0.67, 1.27, -1.14, 0.54, 0.43, -0.37, 0.19, # T (Threonine)
+ 0.85, -0.71, 2.25, 0.65, -1.09, 0.53, -0.53, 0.16, 0.93, 0.60, # W (Tryptophan)
+ 0.02, 0.73, -0.16, 1.08, 1.59, 0.56, -0.82, 0.29, 0.25, 0.06, # Y (Tyrosine)
+ 1.08, -1.81, 0.08, 0.47, -1.04, 0.06, -0.46, -0.40, 0.50, -0.05 # V (Valine)
+), nrow = 20, ncol = 10, byrow = TRUE)
+rownames(.kf.ref) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
+colnames(.kf.ref) <- paste0("KF",1:10)
+
+.vhse.ref <- matrix(c(
+ 0.15, -1.11, -1.35, -0.92, 0.02, -0.91, 0.36, -0.48, # A (Alanine)
+ -1.47, 1.45, 1.24, 1.27, 1.55, 1.47, 1.30, 0.83, # C (Cysteine)
+ -0.99, 0.00, -0.37, 0.69, -0.55, 0.85, 0.73, -0.80, # N (Asparagine)
+ -1.15, 0.67, -0.41, -0.01, -2.68, 1.31, 0.03, 0.56, # D (Aspartic Acid)
+ 0.18, -1.67, -0.46, -0.21, 0.00, 1.20, -1.61, -0.19, # C (Cysteine)
+ -0.96, 0.12, 0.18, 0.16, 0.09, 0.42, -0.20, -0.41, # Q (Glutamine)
+ -1.18, 0.40, 0.10, 0.36, -2.16, -0.17, 0.91, 0.02, # E (Glutamic Acid)
+ -0.20, -1.53, -2.63, 2.28, -0.53, -1.18, 2.01, -1.34, # G (Glycine)
+ -0.43, -0.25, 0.37, 0.19, 0.51, 1.28, 0.93, 0.65, # H (Histidine)
+ 1.27, -0.14, 0.30, -1.80, 0.30, -1.61, -0.16, -0.13, # I (Isoleucine)
+ 1.36, 0.07, 0.36, -0.80, 0.22, -1.37, 0.08, -0.62, # L (Leucine)
+ -1.17, 0.70, 0.70, 0.80, 1.64, 0.67, 1.63 , 0.13, # K (Lysine)
+ 1.01, -0.53, 0.43, 0.00, 0.23, 0.10, -0.86, -0.68, #M (Methionine)
+ 1.52, 0.61, 0.96, -0.16, 0.25, 0.28, -1.33, -0.20, # F (Phenylalanine)
+ 0.22, -0.17, -0.50, 0.05, -0.01, -1.34, -0.19, 3.56, # P (Proline)
+ -0.67, -0.86, -1.07, -0.41, -0.32, 0.27, -0.64, 0.11, # S (Serine)
+ -0.34, -0.51, -0.55, -1.06, -0.06, -0.01, -0.79, 0.39, # T (Threonine)
+ 1.50, 2.06, 1.79, 0.75, 0.75, -0.13, -1.01, -0.85, # W (Tryptophan)
+ 0.61, 1.60, 1.17, 0.73, 0.53, 0.25, -0.96, -0.52, #Y (Tyrosine)
+ 0.76, -0.92, -0.17, -1.91, 0.22, -1.40, -0.24, -0.03 #V (Valine)
+), nrow = 20, ncol = 8, byrow = TRUE)
+rownames(.vhse.ref) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
+colnames(.vhse.ref) <- paste0("VHSE",1:8)
+
+.stscales.ref <- matrix(c(
+ -1.552, -0.791, -0.627, 0.237, -0.461, -2.229, 0.283, 1.221,
+ -0.059, 0.731, -0.013, -0.096, -0.253, 0.300, 1.256, 0.854,
+ -0.888, -0.057, -0.651, -0.214, 0.917, 0.164, -0.140, -0.166,
+ -0.907, -0.054, -0.781, -0.248, 1.120, 0.101, -0.245, -0.075,
+ -1.276, -0.401, 0.134, 0.859, -0.196, -0.720, 0.639, -0.857,
+ -0.662, 0.228, -0.193, -0.105, 0.418, 0.474, 0.172, 0.408,
+ -0.629, -0.390, -0.380, -0.366, 0.635, 0.514, 0.175, 0.367,
+ -1.844, -0.018, -0.184, 0.573, -0.728, -3.317, 0.166, 2.522,
+ -0.225, 0.361, 0.079, -1.037, 0.568, 0.273, 1.208, -0.001,
+ -0.785, -1.010, -0.349, -0.097, -0.402, 1.091, -0.139, -0.764,
+ -0.826, -0.379, 0.038, -0.059, -0.625, 1.025, -0.229, -0.129,
+ -0.504, 0.245, 0.297, -0.065, -0.387, 1.011, 0.525, 0.553,
+ -0.693, 0.498, 0.658, 0.457, -0.231, 1.064, 0.248, -0.778,
+ -0.019, 0.024, 1.080, -0.220, -0.937, 0.570, -0.357, 0.278,
+ -1.049, -0.407, -0.067, -0.066, -0.813, -0.890, 0.021, -0.894,
+ -1.343, -0.311, -0.917, -0.049, 0.549, -1.533, 0.166, 0.280,
+ -1.061, -0.928, -0.911, -0.063, 0.538, -0.775, -0.147, -0.717,
+ 0.853, 0.039, 0.260,-1.163, 0.160, -0.202, 1.010, 0.195,
+ 0.308, 0.569, 1.100, -0.464, -0.144, -0.354, -1.099, 0.162,
+ -1.133, -0.893, -0.325, 0.303, -0.561, -0.175, -0.020, -0.311
+), nrow = 20, ncol = 8, byrow = TRUE)
+rownames(.stscales.ref) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
+colnames(.stscales.ref) <- paste0("stScales",1:8)
+
+.tscales.ref <- matrix(c(
+ -9.11, -1.63, 0.63, 1.04, 2.26,
+ 0.23, 3.89, -1.16, -0.39, -0.06,
+ -4.62, 0.66, 1.16, -0.22, 0.93,
+ -4.65, 0.75, 1.39, -0.40, 1.05,
+ -7.35, -0.86, -0.33, 0.80, 0.98,
+ -3.00, 1.72, 0.28, -0.39, 0.33,
+ -3.03, 1.82, 0.51, -0.58, 0.43,
+ -10.61, -1.21, -0.12, 0.75, 3.25,
+ -1.01, -1.31, 0.01, -1.81, -0.21,
+ -4.25, -0.28, -0.15, 1.40, -0.21,
+ -4.38, 0.28, -0.49, 1.45, 0.02,
+ -2.59, 2.34, -1.69, 0.41, -0.21,
+ -4.08, 0.98, -2.34, 1.64, -0.79,
+ 0.49, -0.94, -0.63, -1.27, -0.44,
+ -5.11, -3.54, -0.53, -0.36, -0.29,
+ -7.44, -0.65, 0.68, -0.17, 1.58,
+ -5.97, -0.62, 1.11, 0.31, 0.95,
+ 5.73, -2.67, -0.07, -1.96, -0.54,
+ 2.08, -0.47, 0.07, -1.67, -0.35,
+ -5.87, -0.94, 0.28, 1.10, 0.48
+), nrow = 20, ncol = 5, byrow = TRUE)
+rownames(.tscales.ref) <- c("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
+colnames(.tscales.ref) <- paste0("tScales",1:5)
+
diff --git a/R/utils.R b/R/utils.R
index fc30d913..77e671c1 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -9,7 +9,7 @@ is_seurat_or_se_object <- function(obj) {
#Use to shuffle between chains Qile: the NA handling here *might* be related to the unnamed combineTCR bugs from the new rcpp con.df construction
#' @keywords internal
#' @author Ye-Lin Son Nick Borcherding
-.off.the.chain <- function(dat, chain, cloneCall) {
+.off.the.chain <- function(dat, chain, cloneCall, check = TRUE) {
chain1 <- toupper(chain) #to just make it easier
if (chain1 %in% c("TRA", "TRG", "IGH")) {
x <- 1
@@ -18,12 +18,61 @@ is_seurat_or_se_object <- function(obj) {
} else {
warning("It looks like ", chain, " does not match the available options for `chain = `")
}
+
+ if(check){
+ #Adding a chain check to prevent issues with TRA + TRD/IGH data
+ chain.check<- substr(str_split(dat[,"CTgene"], "_", simplify = TRUE)[,x], 1,3)
+ chain.check[chain.check == "NA"] <- NA
+ chain.check[chain.check == "NA;NA"] <- NA
+ chain.check[chain.check == "Non"] <- NA
+ any.alt.chains <- which(!is.na(chain.check) & chain.check != chain)
+ if(length(any.alt.chains) > 0) {
+ dat <- dat[-any.alt.chains,]
+ }
+ }
+
dat[,cloneCall] <- str_split(dat[,cloneCall], "_", simplify = TRUE)[,x]
dat[,cloneCall][dat[,cloneCall] == "NA"] <- NA
dat[,cloneCall][dat[,cloneCall] == "NA;NA"] <- NA
+
return(dat)
}
+.padded_strings <- function(strings, max_length) {
+
+ x <- lapply(strings, function(str) {
+ str_len <- nchar(str)
+ str <- strsplit(str, split = "")[[1]]
+ if (str_len < max_length) {
+ c(str, rep(NA, max_length - str_len))
+ } else {
+ str
+ }
+ })
+}
+
+
+#' @importFrom stringr str_split
+.aa.counter <- function(input.data, cloneCall, aa.length) {
+ lapply(input.data, function(x) {
+ strings <- x[,cloneCall]
+ strings <- do.call(c,str_split(strings, ";"))
+ strings <- strings[strings != "NA"]
+ strings <- na.omit(strings)
+ strings <- strings[nchar(strings) < aa.length]
+ strings <- .padded_strings(strings, aa.length)
+ strings <- do.call(rbind, strings)
+ aa.output <- apply(strings, 2, function(z) {
+ summary <- as.data.frame(table(z, useNA = "always"))
+ })
+ res <- suppressWarnings(Reduce(function(...) merge(..., all = TRUE, by="z"), aa.output))
+ colnames(res) <- c("AA", paste0("pos.", seq_len(aa.length)))
+ res[seq_len(20),][is.na(res[seq_len(20),])] <- 0
+ res
+ }) -> res.list
+ return(res.list)
+}
+
#Pulling a color palette for visualizations
#' @importFrom grDevices hcl.colors
#' @keywords internal
diff --git a/man/positionalEntropy.Rd b/man/positionalEntropy.Rd
index 601ac8b3..f8c2b4d0 100644
--- a/man/positionalEntropy.Rd
+++ b/man/positionalEntropy.Rd
@@ -9,8 +9,7 @@ positionalEntropy(
chain = "TRB",
group.by = NULL,
aa.length = 20,
- method = "shannon",
- n.boots = 20,
+ method = "norm.entropy",
exportTable = FALSE,
palette = "inferno"
)
@@ -28,9 +27,6 @@ positionalEntropy(
\item{method}{The method to calculate the entropy/diversity -
"shannon", "inv.simpson", "norm.entropy".}
-\item{n.boots}{number of bootstraps to down sample in order to
-get mean diversity.}
-
\item{exportTable}{Returns the data frame used for forming the graph.}
\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.}
diff --git a/man/positionalProperty.Rd b/man/positionalProperty.Rd
new file mode 100644
index 00000000..d27c3fc1
--- /dev/null
+++ b/man/positionalProperty.Rd
@@ -0,0 +1,66 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/positionalProperty.R
+\name{positionalProperty}
+\alias{positionalProperty}
+\title{Examining the mean property of amino acids by position}
+\usage{
+positionalProperty(
+ input.data,
+ chain = "TRB",
+ group.by = NULL,
+ aa.length = 20,
+ method = "Atchley",
+ exportTable = FALSE,
+ palette = "inferno"
+)
+}
+\arguments{
+\item{input.data}{The product of \code{\link{combineTCR}},
+\code{\link{combineBCR}}, or \code{\link{combineExpression}}.}
+
+\item{chain}{"TRA", "TRB", "TRG", "TRG", "IGH", "IGL".}
+
+\item{group.by}{The variable to use for grouping.}
+
+\item{aa.length}{The maximum length of the CDR3 amino acid sequence.}
+
+\item{method}{The method to calculate the property - "Atchley", "Kidera",
+"stScales", "tScales", or "VHSE".}
+
+\item{exportTable}{Returns the data frame used for forming the graph.}
+
+\item{palette}{Colors to use in visualization - input any \link[grDevices]{hcl.pals}.}
+}
+\value{
+ggplot of line graph of diversity by position
+}
+\description{
+This function calculates the mean selected property for
+amino acids along the residues of the CDR3 amino acid sequence.
+The ribbon surrounding the individual line represents the 95%
+confidence interval.
+}
+\details{
+More information for the individual methods can be found at the following citations:
+
+\strong{Atchley:} \href{https://pubmed.ncbi.nlm.nih.gov/15851683/}{citation}
+
+\strong{Kidera:} \href{https://link.springer.com/article/10.1007/BF01025492}{citation}
+
+\strong{stScales:} \href{https://pubmed.ncbi.nlm.nih.gov/19373543/}{citation}
+
+\strong{tScales:} \href{https://www.sciencedirect.com/science/article/pii/S0022286006006314?casa_token=uDj97DwXDDEAAAAA:VZfahldPRwU1WObySJlohudtMSDwF7nJSUzcEGwPhvkY13ALLKhs08Cf0_FyyfYZjxJlj-fVf0SM}{citation}
+
+\strong{VHSE:} \href{https://pubmed.ncbi.nlm.nih.gov/15895431/}{citation}
+}
+\examples{
+#Making combined contig data
+combined <- combineTCR(contig_list,
+ samples = c("P17B", "P17L", "P18B", "P18L",
+ "P19B","P19L", "P20B", "P20L"))
+positionalProperty(combined,
+ chain = "TRB",
+ method = "Atchley",
+ aa.length = 20)
+}
+\concept{Summarize_Repertoire}
diff --git a/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg b/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg
deleted file mode 100644
index af181bdd..00000000
--- a/tests/testthat/_snaps/StartractDiversity/startracdiversity-plot.new.svg
+++ /dev/null
@@ -1,344 +0,0 @@
-
-
diff --git a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg b/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg
deleted file mode 100644
index 003cf42d..00000000
--- a/tests/testthat/_snaps/clonalOverlay/clonaloverlay-plot.new.svg
+++ /dev/null
@@ -1,681 +0,0 @@
-
-
diff --git a/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg b/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg
deleted file mode 100644
index 33091eda..00000000
--- a/tests/testthat/_snaps/clonalRarefaction/clonalclonalrarefaction-h2-p3-plot.new.svg
+++ /dev/null
@@ -1,60 +0,0 @@
-
-
diff --git a/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg b/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg
deleted file mode 100644
index cb2025f6..00000000
--- a/tests/testthat/_snaps/clonalSizeDistribution/clonalsizedistribution-plot.new.svg
+++ /dev/null
@@ -1,94 +0,0 @@
-
-
diff --git a/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg b/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg
index 604773bc..d97e6eb0 100644
--- a/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg
+++ b/tests/testthat/_snaps/positionalEntropy/positionalentropy-tra-plot.svg
@@ -27,14 +27,14 @@
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
@@ -43,149 +43,149 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
@@ -199,13 +199,13 @@
0.00
-0.25
-0.50
-0.75
+0.25
+0.50
+0.75
-
-
-
+
+
+
diff --git a/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg b/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg
index 5fdf6428..58b9cdd1 100644
--- a/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg
+++ b/tests/testthat/_snaps/positionalEntropy/positionalentropy-trb-plot.svg
@@ -27,14 +27,14 @@
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
@@ -43,150 +43,150 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -199,13 +199,13 @@
0.00
-0.25
-0.50
-0.75
+0.25
+0.50
+0.75
-
-
-
+
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-kidera-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-kidera-plot.svg
new file mode 100644
index 00000000..e0507c41
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-kidera-plot.svg
@@ -0,0 +1,2331 @@
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-stscales-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-stscales-plot.svg
new file mode 100644
index 00000000..a05b5f11
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-stscales-plot.svg
@@ -0,0 +1,1885 @@
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-tra-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-tra-plot.svg
new file mode 100644
index 00000000..9af8e772
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-tra-plot.svg
@@ -0,0 +1,1211 @@
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-trb-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-trb-plot.svg
new file mode 100644
index 00000000..2e21a4a0
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-trb-plot.svg
@@ -0,0 +1,1216 @@
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-tscales-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-tscales-plot.svg
new file mode 100644
index 00000000..19458895
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-tscales-plot.svg
@@ -0,0 +1,1216 @@
+
+
diff --git a/tests/testthat/_snaps/positionalProperty/positionalentropy-vhse-plot.svg b/tests/testthat/_snaps/positionalProperty/positionalentropy-vhse-plot.svg
new file mode 100644
index 00000000..089e5fbb
--- /dev/null
+++ b/tests/testthat/_snaps/positionalProperty/positionalentropy-vhse-plot.svg
@@ -0,0 +1,1869 @@
+
+
diff --git a/tests/testthat/test-positionalProperty.R b/tests/testthat/test-positionalProperty.R
new file mode 100644
index 00000000..3dd4a5f5
--- /dev/null
+++ b/tests/testthat/test-positionalProperty.R
@@ -0,0 +1,51 @@
+# test script for positionalProperty.R - testcases are NOT comprehensive!
+
+test_that("positionalProperty works", {
+
+ set.seed(42)
+ expect_doppelganger(
+ "positionalEntropy_TRB_plot",
+ positionalProperty(getCombined(),
+ chain = "TRB",
+ aa.length = 20)
+ )
+
+ expect_doppelganger(
+ "positionalEntropy_TRA_plot",
+ positionalProperty(getCombined(),
+ chain = "TRA",
+ aa.length = 20)
+ )
+
+ expect_doppelganger(
+ "positionalEntropy_Kidera_plot",
+ positionalProperty(getCombined(),
+ chain = "TRB",
+ method = "Kidera",
+ aa.length = 20)
+ )
+
+ expect_doppelganger(
+ "positionalEntropy_stScales_plot",
+ positionalProperty(getCombined(),
+ chain = "TRB",
+ method = "stScales",
+ aa.length = 20)
+ )
+
+ expect_doppelganger(
+ "positionalEntropy_tScales_plot",
+ positionalProperty(getCombined(),
+ chain = "TRB",
+ method = "tScales",
+ aa.length = 20)
+ )
+
+ expect_doppelganger(
+ "positionalEntropy_VHSE_plot",
+ positionalProperty(getCombined(),
+ chain = "TRB",
+ method = "VHSE",
+ aa.length = 20)
+ )
+})
\ No newline at end of file
diff --git a/vignettes/articles/Installation.Rmd b/vignettes/articles/Installation.Rmd
index 67e76322..cbbf9904 100644
--- a/vignettes/articles/Installation.Rmd
+++ b/vignettes/articles/Installation.Rmd
@@ -55,4 +55,11 @@ remotes::install_github(repo = "ncborcherding/scRepertoire", ref = "dev")
## Bioconductor
-Coming soon
\ No newline at end of file
+The current version of scRepertoire is also available in the development version of Bioconductor. Important to note, the version is listed as 1.99.0 on [Bioconductor](https://bioconductor.org/packages/3.19/bioc/html/scRepertoire.html) per their version guidelines.
+
+```{r}
+BiocManager::install(version='devel')
+
+BiocManager::install("scRepertoire")
+```
+
diff --git a/vignettes/articles/Repertoire_Summary.Rmd b/vignettes/articles/Repertoire_Summary.Rmd
index f8c51fdd..91d724c0 100644
--- a/vignettes/articles/Repertoire_Summary.Rmd
+++ b/vignettes/articles/Repertoire_Summary.Rmd
@@ -44,6 +44,51 @@ combined.TCR <- combineTCR(contig_list,
"P19B","P19L", "P20B", "P20L"))
```
+## percentAA
+
+Quantify the proportion of amino acids along the cdr3 sequence with `percentAA()`. By default, the function will pad the sequences with NAs up to the maximum of **aa.length**. Sequences longer than **aa.length** will be removed before visualization **(default aa.length = 20)**.
+
+```{r, message = FALSE, tidy = FALSE}
+percentAA(combined.TCR,
+ chain = "TRB",
+ aa.length = 20)
+```
+
+## positionalEntropy
+
+We can also quantify the level of entropy/diversity across amino acid residues along the cdr3 sequence. `positionalEntropy()` combines the quantification by residue of ``percentAA()`` with the diversity calls in ```clonalDiversity()```.
+
+**method**
+
+* "shannon" - Shannon Diversity
+* "inv.simpson" - Inverse Simpson Diversity
+* "norm.entropy" - Normalized Entropy
+
+```{r tidy = FALSE}
+positionalEntropy(combined.TCR,
+ chain = "TRB",
+ aa.length = 20)
+```
+## positionalProperty
+
+Like ```positionalEntropy()```, we can also examine a series of amino acid properties along the cdr3 sequences using ```positionalProperty()```. Important differences from the above function for ```positionalProperty()``` is dropping NA values as they would void the mean calculation. ```positionalProperty()``` also display a ribbon with the 95% confidence interval surrounding the mean value for the selected properties.
+
+**method**
+
+* "Atchley" - Atchley Factors
+* "Kidera" - Kidera Factors
+* "stScales" - stScales Vectors
+* "tScales" - tScales Vectors
+* "VHSE" - Vectors of Hydrophobic, Steric, and Electronic properties
+
+```{r tidy = FALSE}
+positionalProperty(combined.TCR[c(1,2)],
+ chain = "TRB",
+ aa.length = 20,
+ method = "Atchley") +
+ scale_color_manual(values = hcl.colors(5, "inferno")[c(2,4)])
+```
+
## vizGenes
A visualization of the relative usage of genes of the TCR or BCR, using `vizGenes()`. There is some functional crossover between `vizGenes()` and two functions below called `percentGenes()` and `percentVJ()`. But `vizGenes()` is more adaptable to allow for comparisons across chains, scaling, etc.
@@ -107,33 +152,6 @@ vizGenes(combined.TCR[c(1,2)],
scale = FALSE)
```
-## percentAA
-
-Quantify the proportion of amino acids along the cdr3 sequence with `percentAA()`. By default, the function will pad the sequences with NAs up to the maximum of **aa.length**. Sequences longer than **aa.length** will be removed before visualization **(default aa.length = 20)**.
-
-```{r, message = FALSE, tidy = FALSE}
-percentAA(combined.TCR,
- chain = "TRB",
- aa.length = 20)
-```
-
-## positionalEntropy
-
-We can also quantify the level of entropy/diversity across amino acid residues along the cdr3 sequence. `positionalEntropy()` combines the quantification by residue of ``percentAA()`` with the diversity calls in ```clonalDiversity()```. Importantly, because diversity can be affected by size of sample, similar to ```clonalDiversity()```, ```positionalEntropy()``` will also downsample/bootstrap the calculation. Positions without variance will have a value reported as 0 for the purposes of comparison.
-
-**method**
-
-* "shannon" - Shannon Diversity
-* "inv.simpson" - Inverse Simpson Diversity
-* "norm.entropy" - Normalized Entropy
-
-```{r}
-positionalEntropy(combined.TCR,
- chain = "TRB",
- aa.length = 20)
-```
-
-
## percentGenes
Quantify the proportion of V or J gene usage with `percentGenes()`. Like `percentAA()`, we select the chain of interest and then indicate the gene of interest with the **gene** parameter. Two major limitations of `percentGenes()` are, 1) the function quantifies only V or J genes, and 2) the quantification of the genes are limited to all the V or J genes seen across the samples, not all possible V or J genes.