Skip to content

Commit

Permalink
change function structure
Browse files Browse the repository at this point in the history
  • Loading branch information
alanmmobbs93 committed Dec 18, 2024
1 parent 9e42bdd commit c2f666f
Showing 1 changed file with 91 additions and 44 deletions.
135 changes: 91 additions & 44 deletions modules/local/validatemodel/resources/usr/bin/validate_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ if (is.null(opt$yml) || is.null(opt$samplesheet) ) {
}

### Collect parameters (easier for dev and testing)
path_yml <- opt$yml # "/workspace/differentialabundance/modules/local/validate_model/input_test/contrasts.yml" #
path_samplesheet <- opt$samplesheet # "/workspace/differentialabundance/results/SRP254919.samplesheet.csv" #
path_yml <- opt$yml # "/workspace/differentialabundance/modules/local/validatemodel/tests/contrasts.yml" #
path_samplesheet <- opt$samplesheet # "/workspace/differentialabundance/results/SRP254919.samplesheet_witherrors.csv" #
sample_column <- opt$sample_id_col # "sample" #

# LOAD FILES --------------------------------------------------------
Expand Down Expand Up @@ -71,58 +71,99 @@ tryCatch({
## * var_list: collects all factors levels for each variable.
## * contrast_list: collects info for each contrast

### Initialize empty lists
var <- list()
contrasts_list <- list()
process_models <- function(models) {
# Initialize lists to store the output
contrasts_list <- list()
var <- list()

## Iterate through models (formula)
for (FORMULA in models$models) {

## Iterate through contrasts for each model (formula)
for (CONTRAST in FORMULA$contrasts) {
### Get column name: first comparison's element
name <- CONTRAST$comparison[1]
# Ensure models$models is not null or empty
if (is.null(models$models) || length(models$models) == 0) {
stop("models$models is null or empty. Please provide valid input.")
}

### Get factors (rest of components)
variables <- CONTRAST$comparison[2:length(CONTRAST$comparison)]
# Temporary storage for gathering contrasts per variable
temp_var <- list()
blocking_vars <- c()

### Populate contrasts_list for later model validation
contrasts_list[[ CONTRAST$id ]] <- list( ## Adds the ID as main identifier
"formula" = FORMULA$formula,
"variable" = name,
"contrast" = variables,
"blocking_factors" = CONTRAST$blocking_factors ## This list can be later extended for "make_contrast" option is required
)
# Iterate through models (formula)
for (FORMULA in models$models) {

### Start populating the variables_list
#### If the column is already present in the list, add more components to it
if (name %in% names(var) ) {
variables <- unique( c(var[[ name ]], variables )) ## Combine previous levels with the new ones without duplicating them
var[[ name ]] <- variables ## assign the values to the list
#cat("Duplicated variable: ", name, ". Adding more factors to it.\n", sep = "")
#### Default to new element
} else {
#cat("Detected variable:", name, "\n")
var[[ length(var) + 1 ]] <- variables ## Asign levels to a new entry
names(var)[length(var) ] <- name ## Specify the variable name
# Ensure FORMULA$contrasts is not null or empty
if (is.null(FORMULA$contrasts) || length(FORMULA$contrasts) == 0) {
stop("FORMULA$contrasts is null or empty. Unable to proceed with this model.")
}

## Get blocking factors
blocking <- c()
if ( !is.null(CONTRAST$blocking_factors) ) { ## Check if they were defined in the yml
#cat("Blocking factors detected for variable", name, "\n")
blocking <- CONTRAST$blocking_factors
# Iterate through contrasts for each model (formula)
for (CONTRAST in FORMULA$contrasts) {

# Validate CONTRAST$id
if (is.null(CONTRAST$id)) {
stop("Missing CONTRAST$id detected. Unable to proceed.")
}

if (CONTRAST$id %in% names(contrasts_list)) {
stop(paste("Duplicate CONTRAST$id detected:", CONTRAST$id, ". Unable to proceed."))
}

# Get column name: first comparison's element
name <- CONTRAST$comparison[1]

# Check if name is valid
if (is.null(name) || name == "") {
stop("Invalid name detected in CONTRAST$comparison. Unable to proceed.")
}

# Get factors (rest of components)
variables <- CONTRAST$comparison[2:length(CONTRAST$comparison)]

# Populate contrasts_list for later model validation
contrasts_list[[CONTRAST$id]] <- list(
"formula" = FORMULA$formula,
"variable" = name,
"contrast" = variables,
"blocking_factors" = CONTRAST$blocking_factors
)

# Gather contrasts per variable into temporary storage
if (name %in% names(temp_var)) {
temp_var[[name]] <- unique(c(temp_var[[name]], variables))
} else {
temp_var[[name]] <- variables
}

# Gather blocking factors
if (!is.null(CONTRAST$blocking_factors)) {
blocking_vars <- unique(c(blocking_vars, CONTRAST$blocking_factors))
}
}
}

## Add blocking variables
if ( "blocking_factors" %in% names(var) ) { ## Check if the category exists from previous iterations
var[[ "blocking_factors" ]] <- unique( c( var[[ "blocking_factors" ]], blocking ))
# Consolidate gathered contrasts into `var`
for (name in names(temp_var)) {
if (name %in% names(var)) {
var[[name]] <- unique(c(var[[name]], temp_var[[name]]))
} else {
var[[ "blocking_factors" ]] <- unique( blocking )
var[[name]] <- temp_var[[name]]
}
}

# Add blocking variables to `var`
var[[ "blocking_factors" ]] <- blocking_vars

# Return the populated lists
return(list(
contrasts_list = contrasts_list,
var = var
))
}

## Process models
models_lists <- process_models(models)
### Extract models contrasts
contrasts_list <- models_lists[["contrasts_list"]]
### Extract variables and levels
var <- models_lists[["var"]]

## Print explicit messages
for (INDEX in 1:length(var)) {
cat("\033[32mDetected '", names(var)[INDEX], "' variable with ", paste(var[[INDEX]], collapse = " "), " levels.\033[0m \n", sep = "")
Expand Down Expand Up @@ -221,7 +262,7 @@ validate_model <- function(sample_column, variables, samplesheet) { # sample_col
}

## Check that the column data does not contain undesidered characters
if ( sum( stringr::str_detect(samplesheet[[ VARIABLE ]], pattern = undesired_chars )) > 0 ) {
if ( sum( stringr::str_detect( samplesheet[[ VARIABLE ]], pattern = regex(undesired_chars) ), na.rm = TRUE) > 0 ) {
errors <- c(errors,
paste0("Column ", VARIABLE, " contains undesired characters\n")
)
Expand Down Expand Up @@ -273,7 +314,7 @@ validate_model <- function(sample_column, variables, samplesheet) { # sample_col

## Report ERRORS and stop
if (length(errors) > 0 ) {
stop(cat("Some errors where found while validating the samplesheet and models definitions:\n", paste(errors, collapse = "\n"), "\n", sep = ""))
stop(cat("\033[1;31mSome errors where found while validating the samplesheet and models definitions:\n", paste(errors, collapse = "\n"), "\033[0m\n", sep = ""))
}

## Generate validated phenotypic table
Expand Down Expand Up @@ -350,7 +391,6 @@ check_model_contrasts <- function(contrasts_list, colData) {

# Build the design matrix
design_matrix <- model.matrix(updated_formula, data = colData)
print(design_matrix)

# Check the rank of the design matrix
rank <- qr(design_matrix)$rank
Expand All @@ -365,6 +405,13 @@ check_model_contrasts <- function(contrasts_list, colData) {
names(design_list)[ length(design_list) ] <- model_name
}

for (DESIGN in names(design_list)) {
cat("Design ", DESIGN,
if( design_list[[ DESIGN ]]$full_rank ) { " is " } else { " is not "},
"full ranked\n", sep = ""
)
}

return(design_list)
}

Expand Down

0 comments on commit c2f666f

Please sign in to comment.