Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add differential expression #50

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,20 @@ Collate:
'class-SampleMetadata.R'
'class-CollectionWithMetadata.R'
'class-VariableMetadata.R'
'class-Comparator.R'
'class-ComputeResult.R'
'class-CorrelationResult.R'
'class-CountDataCollection.R'
'class-Megastudy.R'
'class-Range.R'
'class-Statistic.R'
'data.R'
'method-correlation.R'
'method-differentialExpression.R'
'methods-Bin.R'
'methods-CollectionWithMetadata.R'
'methods-Collections.R'
'methods-Comparator.R'
'methods-VariableMetadata.R'
'methods-ComputeResult.R'
'methods-Megastudy.R'
Expand Down
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@ export(BinList)
export(Collection)
export(CollectionWithMetadata)
export(Collections)
export(Comparator)
export(ComputeResult)
export(CorrelationResult)
export(CountDataCollection)
export(DataShape)
export(DataType)
export(DifferentialExpressionResult)
export(Megastudy)
export(PlotReference)
export(Range)
Expand All @@ -40,6 +43,7 @@ export(cut_interval)
export(cut_number)
export(cut_width)
export(data_frame)
export(differentialExpression)
export(findAllColNames)
export(findAncestorIdColumns)
export(findColNamesByPredicate)
Expand Down Expand Up @@ -73,6 +77,7 @@ export(getDataFromSource)
export(getDataTable)
export(getDiscretizedBins)
export(getEntityId)
export(getGroupLabels)
export(getHasStudyDependentVocabulary)
export(getIdColumns)
export(getMetadataVariableNames)
Expand Down Expand Up @@ -118,10 +123,13 @@ exportClasses(Bin)
exportClasses(BinList)
exportClasses(Collection)
exportClasses(CollectionWithMetadata)
exportClasses(Comparator)
exportClasses(ComputeResult)
exportClasses(CorrelationResult)
exportClasses(CountDataCollection)
exportClasses(DataShape)
exportClasses(DataType)
exportClasses(DifferentialExpressionResult)
exportClasses(Megastudy)
exportClasses(PlotReference)
exportClasses(Range)
Expand All @@ -136,6 +144,7 @@ exportClasses(VariableMetadataList)
exportClasses(VariableSpec)
exportClasses(VariableSpecList)
exportMethods(as.numeric)
exportMethods(differentialExpression)
exportMethods(findAllColNames)
exportMethods(findColNamesByPredicate)
exportMethods(findColNamesFromPlotRef)
Expand All @@ -155,6 +164,7 @@ exportMethods(getColName)
exportMethods(getDTWithImputedZeroes)
exportMethods(getDataTable)
exportMethods(getEntityId)
exportMethods(getGroupLabels)
exportMethods(getHasStudyDependentVocabulary)
exportMethods(getStudyIdColumnName)
exportMethods(getVariableSpec)
Expand All @@ -167,6 +177,7 @@ exportMethods(whichValuesInBinList)
exportMethods(writeData)
exportMethods(writeMeta)
exportMethods(writeStatistics)
import(DESeq2)
import(data.table)
importFrom(Hmisc,rcorr)
importFrom(S4Vectors,SimpleList)
Expand All @@ -175,6 +186,8 @@ importFrom(SpiecEasi,sparcc)
importFrom(SpiecEasi,sparccboot)
importFrom(digest,digest)
importFrom(microbenchmark,microbenchmark)
importFrom(purrr,discard)
importFrom(purrr,map)
importFrom(purrr,map_lgl)
importFrom(purrr,none)
importFrom(stringi,stri_detect_regex)
79 changes: 79 additions & 0 deletions R/class-Comparator.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

check_comparator <- function(object) {

variable <- object@variable
groupA <- object@groupA
groupB <- object@groupB

errors <- character()

# Check that the variable has a reasonable variable spec
if (is.na(variable@variableSpec@variableId)) {
msg <- "Comparator variable needs a variable id"
errors <- c(errors, msg)
}

# Check that groups exist
if (!length(groupA) || !length(groupA)) {
msg <- "Both groupA and groupB must be defined"
errors <- c(errors, msg)
}

if (identical(variable@dataShape@value, "CONTINUOUS")) {
## Checks for continuous variables

# Err if variable is continuous but either group is missing a binStart or binEnd
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binStart))})))) {
msg <- "All groupA bins must have a binStart"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupA, function(bin) {return(!!length(bin@binEnd))})))) {
msg <- "All groupA bins must have a binEnd"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binStart))})))) {
msg <- "All groupB bins must have a binStart"
errors <- c(errors, msg)
}
if (!all(unlist(lapply(groupB, function(bin) {return(!!length(bin@binEnd))})))) {
msg <- "All groupB bins must have a binEnd"
errors <- c(errors, msg)
}
} else {
## Checks for non-continuous variables

# Ensure no values are duplicated between group A and group B
groupAValues <- getGroupLabels(object, "groupA")
groupBValues <- getGroupLabels(object, "groupB")

if (!!length(intersect(groupAValues, groupBValues))) {
msg <- "groupA and groupB cannot share members"
errors <- c(errors, msg)
}

}

return(if (length(errors) == 0) TRUE else errors)
}

#' Comparator
#'
#' A class for representing a variable that will be used to compare samples between two groups. The variable's
#' values will be used to split samples into groups.
#'
#' @slot variable A VariableMetadata
#' @slot groupA BinList
#' @slot groupB BinList
#' @name Comparator-class
#' @rdname Comparator-class
#' @include class-VariableMetadata.R
#' @export
Comparator <- setClass("Comparator", representation(
variable = 'VariableMetadata',
groupA = 'BinList',
groupB = 'BinList'
), prototype = prototype(
variable = new("VariableMetadata"),
groupA = new("BinList"),
groupB = new("BinList")
), validity = check_comparator)
46 changes: 46 additions & 0 deletions R/class-CountDataCollection.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
check_count_data_collection <- function(object) {
errors <- character()
df <- object@data
record_id_col <- object@recordIdColumn
ancestor_id_cols <- object@ancestorIdColumns
all_id_cols <- c(record_id_col, ancestor_id_cols)


allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols))], is.numeric)))
if (inherits(df, 'data.table')) allDataColsNumeric <- all(unlist(lapply(df[, !(names(df) %in% c(record_id_col, ancestor_id_cols)), with=F], is.numeric)))
if (!allDataColsNumeric) {
msg <- paste("All columns except the ID columns must be numeric.")
errors <- c(errors, msg)
}

numeric_data <- df[, !(names(df) %in% all_id_cols)]
if (inherits(df, 'data.table')) numeric_data <- df[, !(names(df) %in% all_id_cols), with=F]

if (!identical(numeric_data, round(numeric_data))) {
msg <- "Count data must be integer numbers."
errors <- c(errors, msg)
}

if (any(df < 0, na.rm=TRUE)) {
msg <- paste("Count data cannot contain negative values.")
errors <- c(errors, msg)
}


return(if (length(errors) == 0) TRUE else errors)
}

#' Count Data
#'
#' A class for working with count data, including microbial or genetic assays.
#'
#' @slot data A data.frame of integer abundance counts with genes (species, etc.) as columns and samples as rows
#' @slot sampleMetadata A data.frame of metadata about the samples with samples as rows and metadata variables as columns
#' @slot recordIdColumn The name of the column containing IDs for the samples. All other columns will be treated as abundance values.
#' @slot ancestorIdColumns A character vector of column names representing parent entities of the recordIdColumn.
#' @slot imputeZero A logical indicating whether NA/ null values should be replaced with zeros.
#' @name CountDataCollection-class
#' @rdname CountDataCollection-class
#' @include class-CollectionWithMetadata.R
#' @export
CountDataCollection <- setClass("CountDataCollection", contains = "CollectionWithMetadata", validity = check_count_data_collection)
Loading
Loading