Skip to content

Commit

Permalink
Export and document incidence_matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
mahendra-mariadassou committed May 30, 2019
1 parent 74d8079 commit 7885c2b
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 67 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(ggformat)
export(ggpdrare)
export(ggrare)
export(import_frogs)
export(incidence_matrix)
export(phylodiv)
export(plot_clust)
export(plot_composition)
Expand All @@ -15,12 +16,14 @@ export(rarecurve2)
export(sample_copy_number)
export(scale_sample_counts)
export(top_taxa)
import(ape)
import(dplyr)
import(ggplot2)
import(phyloseq)
import(reshape)
import(scales)
import(tidyr)
import(vegan)
importFrom(biomformat,biom_data)
importFrom(biomformat,read_biom)
importFrom(biomformat,sample_metadata)
Expand Down
112 changes: 56 additions & 56 deletions R/edgePCA.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
require(vegan)
require(scales)
require(ape)
require(phyloseq)


#' Compute incidence matrix of a tree
#'
#' @title incidenceMatrix
#' @title incidence_matrix
#' @param phy Required. A \code{phylo} class object
#' @return An incidence matrix M of size nedges(tree) x ntaxa(tree) where
#' M[i,j] is set to 1 if taxa derives from edge i and 0 otherwise.
#' @note incidenceMatrix assumes that the tree is rooted. If not, it roots it
#' it at an arbitrary taxa (taxa 1).
incidenceMatrix <- function(phy) {
if (!is.rooted(phy)) {
warning("Tree is not rooted, incidence matrix may be meaningless")
#' @note incidence_matrix assumes that the tree is rooted. If not, it roots it
#' it at an arbitrary taxa (taxa 1).
#'
#' @export
#'
#' @examples
#' data(food)
#' incidence_matrix(phy_tree(food))
incidence_matrix <- function(phy) {
if (!ape::is.rooted(phy)) {
warning("Tree is not rooted, incidence matrix may be meaningless")
}
## Construct incidence matrix of the tree (taxa x edge matrix)
## All taxa descending from an edge are set to 1, all others to -1
Expand All @@ -25,13 +25,13 @@ incidenceMatrix <- function(phy) {
ncol = nedges,
dimnames = list(phy$tip.label, phy$edge[, 2]))
## Incidence of internal edges
phy.part <- prop.part(phy) ## clade composition indexed by (shifted) node number
phy.part <- ape::prop.part(phy) ## clade composition indexed by (shifted) node number
for (i in 2:length(phy.part)) { ## first clade corresponds to root node
edge <- which(phy$edge[, 2] == i + ntaxa) ## node numbers are shifted by ntaxa
edge <- which(phy$edge[, 2] == i + ntaxa) ## node numbers are shifted by ntaxa
incidence[phy.part[[i]] , edge] <- 1
}
## Incidence of pendant edges
## pendant.edges[i] is the edge leading to tip i.
## pendant.edges[i] is the edge leading to tip i.
pendant.edges <- match(seq_len(ntaxa), phy$edge[ , 2])
for (i in seq_len(ntaxa)) {
incidence[i, pendant.edges[i]] <- 1
Expand Down Expand Up @@ -61,7 +61,7 @@ incidenceMatrix <- function(phy) {
#' - \item{scores}: The score of the samples
#' - \item{values}: A date frame with components eigenvalues, relative eigenvalues and
#' cumulative eigenvalues.
#' @note scores and extract_eigenvalues methods are associated with 'edgepca'.
#' @note scores and extract_eigenvalues methods are associated with 'edgepca'.
#' @references Shen, H. and Huang, J. Z. (2008). Sparse principal component
#' analysis via regularized low rank matrix approximation. _Journal
#' of Multivariate Analysis_ *99*, 1015-1034.
Expand All @@ -79,14 +79,14 @@ edgePCA <- function(physeq,
x <- as(otu_table(physeq), "matrix")
if (taxa_are_rows(physeq)) { x <- t(x) }
phy <- phy_tree(physeq)

## Scale counts to frequencies
x <- x/rowSums(x)

## Construct incidence matrix of the tree and transform it to contrasts
incidence <- incidenceMatrix(phy)
incidence <- incidence_matrix(phy)
incidence <- 2 * (incidence - 0.5)

## Order community table according to edge order and create
## mass difference matrix
x <- x[ , rownames(incidence), drop = FALSE]
Expand All @@ -101,7 +101,7 @@ edgePCA <- function(physeq,
number.edges <- rep(number.edges[1], number.components)
}


## Compute number.component first axis from the mass difference matrix
method <- match.arg(method)
results <- switch(method,
Expand Down Expand Up @@ -154,7 +154,7 @@ pca <- function(x, number.components = NULL, center = TRUE, scale. = FALSE) {
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
Expand Down Expand Up @@ -190,13 +190,13 @@ pca <- function(x, number.components = NULL, center = TRUE, scale. = FALSE) {


#' Perform a regularized principal component analysis on a given data matrix using
#' Singular Value Decomposition and regularization.
#' Singular Value Decomposition and regularization.
#'
#' @title regularizedPca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
Expand Down Expand Up @@ -231,7 +231,7 @@ regularizedPca <- function(x, number.components = 10, center = TRUE, scale. = FA
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
Expand Down Expand Up @@ -275,13 +275,13 @@ regularizedPca <- function(x, number.components = 10, center = TRUE, scale. = FA


#' Perform a sparse principal component analysis on a given data matrix using
#' NIPALS algorithm.
#' NIPALS algorithm.
#'
#' @title sparsePca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
Expand All @@ -292,9 +292,9 @@ regularizedPca <- function(x, number.components = 10, center = TRUE, scale. = FA
#' the number of non-zero loadings for each component. By default, all
#' loadings can be different from 0 (no sparsity).
#' @param iter.max Optional. Integer. The maximum number of iterations used in NIPALS for each
#' each component. Defaults to 500.
#' each component. Defaults to 500.
#' @param tol Optional. A (small) positive numeric used to check convergence of loadings
#' for each component. Defaults to 1e-09.
#' for each component. Defaults to 1e-09.
#' @return A list with components
#' - \item{number.components}: The number of components used
#' - \item{loadings}: The matrix of variable loadings
Expand All @@ -306,7 +306,7 @@ regularizedPca <- function(x, number.components = 10, center = TRUE, scale. = FA
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}},
sparsePca <- function(x, number.components = 10,
sparsePca <- function(x, number.components = 10,
number.variables = rep(ncol(x), number.components),
center = TRUE, scale. = FALSE,
iter.max = 500, tol = 1e-09) {
Expand All @@ -326,7 +326,7 @@ sparsePca <- function(x, number.components = 10,
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
Expand Down Expand Up @@ -354,7 +354,7 @@ sparsePca <- function(x, number.components = 10,
mat.v <- matrix(NA, nrow = p, ncol = number.components)
x.new <- x
for (h in 1:number.components) {
## Initialization
## Initialization
x.svd <- svd(x.new, nu = 1, nv = 1)
u.new <- x.svd$u[ , 1]
v.new <- x.svd$d[1] * x.svd$v[ , 1]
Expand Down Expand Up @@ -413,7 +413,7 @@ sparsePca <- function(x, number.components = 10,
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- mat.v
scores <- mat.u
eigenvalues <- -diff(c(0, vect.d))
eigenvalues <- -diff(c(0, vect.d))
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
Expand All @@ -435,13 +435,13 @@ sparsePca <- function(x, number.components = 10,
}

#' Perform a sparse principal component analysis on a given data matrix using
#' iterative algorithm from Shen and Huang.
#' iterative algorithm from Shen and Huang.
#'
#' @title sparsePca
#' @param x Required. Numeric matrix.
#' @param number.components Optional. Integer. Number of components used for denoising.
#' Defaults to 10 and is automatically reduced to max(dim(x)) - 1
#' if 10 is too high.
#' if 10 is too high.
#' @param center Optional. Logical. Should x be shifted to be (column-wise) 0-centered?
#' Defaults to TRUE. Alternatively, a vector of length equal to the
#' number of columns in x. The value is passed on to scale.
Expand All @@ -451,7 +451,7 @@ sparsePca <- function(x, number.components = 10,
#' @param lambda Required. Regularization parameter.
#' @param iter.max Optional. Integer. The maximum number of iterations used in iterative step
#' for each
#' each component. Defaults to 500.
#' each component. Defaults to 500.
#' @param tol Optional. A (small) positive numeric used to check convergence of loadings
#' for each component. Defaults to 1e-09.
#' @references Shen, H. and Huang, J. Z. (2008). Sparse principal component
Expand All @@ -468,7 +468,7 @@ sparsePca <- function(x, number.components = 10,
#' - \item{center, scale}: centering and scaling vectors, used, if any.
#' Otherwise, FALSE.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}},
sparsePca.lambda <- function(x, number.components = 10,
sparsePca.lambda <- function(x, number.components = 10,
lambda,
center = TRUE, scale. = FALSE,
iter.max = 500, tol = 1e-09) {
Expand All @@ -488,7 +488,7 @@ sparsePca.lambda <- function(x, number.components = 10,
x <- scale(x, center = center, scale = scale.)
cen <- attr(x, "scaled:center")
sc <- attr(x, "scaled:scale")
if (any(sc == 0))
if (any(sc == 0))
stop("cannot rescale a constant/zero column to unit variance")
## Check for missing values
if (any(is.na(x))) {
Expand Down Expand Up @@ -516,7 +516,7 @@ sparsePca.lambda <- function(x, number.components = 10,
mat.v <- matrix(NA, nrow = p, ncol = number.components)
x.new <- x
for (h in 1:number.components) {
## Initialization
## Initialization
x.svd <- svd(x.new, nu = 1, nv = 1)
## u and v solutions of unpenalized approximation problem
## u %*% t(v) is the best rank one approximation of x
Expand All @@ -534,7 +534,7 @@ sparsePca.lambda <- function(x, number.components = 10,
v.new <- ifelse(abs(z) > lambda, sign(z) * (abs(z) - lambda), 0)
## Update and normalize u
u.new <- as.vector(x.new %*% v.new)
u.new <- u.new / sqrt(sum(u.new^2))
u.new <- u.new / sqrt(sum(u.new^2))
## Check convergence
if (sum( (u.new - u.old)^2 ) < tol) {
u.conv <- TRUE
Expand All @@ -548,9 +548,9 @@ sparsePca.lambda <- function(x, number.components = 10,
convergence[h] <- (v.conv && u.conv)
## Norm v (?) and update mat.u, mat.v
## v.new <- v.new / sqrt(sum(v.new^2))
mat.v[ , h] <- v.new
mat.v[ , h] <- v.new
mat.u[ , h] <- u.new
## Deflate x.new with rank one approximation
## Deflate x.new with rank one approximation
x.new <- x.new - tcrossprod(u.new, v.new)
## Compute percentage of variance explained
## for x reconstructed with first h components
Expand All @@ -560,7 +560,7 @@ sparsePca.lambda <- function(x, number.components = 10,
## Extract eigenvectors (loadings), eigenvalues and scores
loadings <- mat.v
scores <- mat.u
eigenvalues <- diff(c(0, vect.d, 1))
eigenvalues <- diff(c(0, vect.d, 1))
## Add sensible names to loadings and scores
dimnames(loadings) <- list(variable.names,
paste("Axis", 1:ncol(loadings), sep = "."))
Expand All @@ -572,7 +572,7 @@ sparsePca.lambda <- function(x, number.components = 10,
Cumul_eig = cumsum(eigenvalues) / sum(eigenvalues))
## Construct results and return it
result <- list(number.components = number.components,
lambda = lambda,
lambda = lambda,
scores = scores,
loadings = loadings,
values = values,
Expand Down Expand Up @@ -614,14 +614,14 @@ scores.pca <- function(pca, choices,
#' Extract supporting taxa from an edge pca
#'
#' @title support_taxa
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param epca Required. An \code{\link{edgepca}} class object
#' @param choices Optional. Ordination axis for which support taxa are extracted.
#' Defaults to all axis.
#' Defaults to all axis.
#' @param threshold Optional. Numeric. Threshold that a taxa loading
#' must pass to be extracted. Defaults to 0.
#' must pass to be extracted. Defaults to 0.
#' @param number Optional. Integer. Number of taxa to extract from each axis.
#' Defaults to all taxa.
#' Defaults to all taxa.
#' @note If number and threshold are specified, extracts at most \code{number} taxa whose loadings
#' exceed threshold for each axis.
#' @return A character vector ot taxa names.
Expand Down Expand Up @@ -654,18 +654,18 @@ support_taxa <- function(physeq, epca, choices = seq(epca$number.components),


#' Function for plotting a tree with edge fattened and colored according to
#' the loadings of an edgePCA results.
#' the loadings of an edgePCA results.
#'
#' @title plotLoadings
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param physeq Required. A \code{\link{phyloseq-class}} class instance.
#' @param epca Required. An \code{\link{edgepca}} class object
#' @param choices Required. Ordination axes.
#' @param width.lim Optional. Numeric. Minimum and maximal edge after fattening.
#' Defaults to c(0.1, 4). Set to c(0, x) for true linear scaling.
#' @param fatten.edges.by Required. Aesthetics that will be used for fattening.
#' Subset of 'size' and 'alpha'.
#' @param fatten.tips Optional. Logical. Should tips be fattened like pendant edges.
#' Defaults to FALSE.
#' Defaults to FALSE.
#' @param color Optional. Color vector of length 2 used to distinguish positive
#' loading edges (color[1]) and negative loading edges (color[2]).
#' Defaults to c("#EF8A62", "#67A9CF")
Expand All @@ -676,12 +676,12 @@ support_taxa <- function(physeq, epca, choices = seq(epca$number.components),
#' Defaults to NULL. If NULL, nothing happens. Use "white", "transparent",
#' or par("bg") to remove them from plot.
#' @param ... Additional arguments passed on to plot.phylo
#'
#'
#' @return Nothing, this function is used for its side effect of plotting a tree
#' @seealso \code{\link{edgePCA}}
plotLoadings <- function(physeq, epca, choices, fatten.by = c("size"), fatten.tips = FALSE,
plotLoadings <- function(physeq, epca, choices, fatten.by = c("size"), fatten.tips = FALSE,
width.lim = c(0.1, 4), color = c("#EF8A62", "#67A9CF"),
color.tip.by = NULL, missing.color = NULL,
color.tip.by = NULL, missing.color = NULL,
...) {
## Check fatten.by
if (any( !fatten.by %in% c("size", "alpha"))) {
Expand All @@ -697,7 +697,7 @@ plotLoadings <- function(physeq, epca, choices, fatten.by = c("size"), fatten.ti
tip.loadings <- x[pendant.edges, , drop = FALSE]
## Get edge.color
edge.color <- ifelse(x > 0, color[1], color[2])
## Get tip.color
## Get tip.color
tip.color <- edge.color[pendant.edges, , drop = FALSE]
## Get fattening factor
fattening.factor <- rescale(abs(x), to = width.lim)
Expand Down Expand Up @@ -788,15 +788,15 @@ extract_eigenvalue.pca = function(ordination) ordination$values$Eigenvalues
#' @title plot_edgepca
#' @param physeq Required. A \code{\link{phyloseq-class}} object
#' @param epca Required. A \code{\link{edgepca}} class object
#' @param axes Optional. Ordination axes. Defaults to c(1, 2).
#' @param axes Optional. Ordination axes. Defaults to c(1, 2).
#' @param type Required. Unambiguous abbreviation of 'variables' or 'samples'.
#' For compatibility with vegan and phyloseq, 'variables' can be replaced
#' by 'species' or 'taxa' and 'samples' can be replaced by 'sites'.
#' @param args.loadings Optional. Additional passed on to plotLoadings. Defaults to NULL
#' @param widths Optional. A vector of 2 values for the widths of columns. Defaults to c(1, 1)
#' @param heights Optional. A vector of 2 values for the heights of rows. Defaults to c(1, 1)
#' @param ... Optional. Additional argument passed on to plot_ordination
#' @return Nothing. The function is called for its side effect.
#' @return Nothing. The function is called for its side effect.
#' @seealso \code{\link{pca}}, \code{\link{edgePCA}}, \code{\link{regularizedPca}}, \code{\link{sparsePca}},
plot_edgepca <- function(physeq, epca, axes = c(1, 2),
widths = c(1, 1), heights = c(1, 1),
Expand Down
2 changes: 1 addition & 1 deletion R/phyloseq-extended.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
#' @docType package
#' @name phyloseq.extended
#'
#' @import dplyr tidyr phyloseq ggplot2 scales
#' @import dplyr tidyr phyloseq ggplot2 scales ape vegan
NULL
Loading

0 comments on commit 7885c2b

Please sign in to comment.