Skip to content

Commit

Permalink
Further work on tasks.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexzwanenburg committed Oct 30, 2024
1 parent 5f2bdca commit 8693001
Show file tree
Hide file tree
Showing 6 changed files with 407 additions and 99 deletions.
26 changes: 13 additions & 13 deletions R/DataPreProcessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -293,19 +293,19 @@ determine_preprocessing_parameters <- function(



.get_feature_info_list <- function(run) {

# Find pre-processing control element for the current run
pre_proc_id_list <- .get_preprocessing_iteration_identifiers(run = run)

# Load feature info list from backend
feature_info_list <- get_feature_info_from_backend(
data_id = pre_proc_id_list$data,
run_id = pre_proc_id_list$run
)

return(feature_info_list)
}
# .get_feature_info_list <- function(run) {
#
# # Find pre-processing control element for the current run
# pre_proc_id_list <- .get_preprocessing_iteration_identifiers(run = run)
#
# # Load feature info list from backend
# feature_info_list <- get_feature_info_from_backend(
# data_id = pre_proc_id_list$data,
# run_id = pre_proc_id_list$run
# )
#
# return(feature_info_list)
# }



Expand Down
2 changes: 2 additions & 0 deletions R/FamiliarS4Generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,5 @@ setGeneric(".file_exists", function(object, ...) standardGeneric(".file_exists")
setGeneric(".perform_task", function(object, data, ...) standardGeneric(".perform_task"))

setGeneric(".get_task_descriptor", function(object, ...) standardGeneric(".get_task_descriptor"))

setGeneric(".get_feature_info_list", function(object, ...) standardGeneric(".get_feature_info_list"))
50 changes: 22 additions & 28 deletions R/TaskFeatureInfo.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ setMethod(
if (is.null(file_paths)) return(object)

# Generate file name of pre-processing file
file_name <- paste0(object@project_id, "_generic_feature_info.RDS")

# Add file path and normalise according to the OS
object@file <- normalizePath(
file.path(file_paths$process_data_dir, file_name),
mustWork = FALSE
object@file <- get_object_file_name(
object_type = "genericFeatureInfo",
project_id = object@project_id,
dir_path = file_paths$process_data_dir
)

return(object)
Expand Down Expand Up @@ -58,25 +56,23 @@ setMethod(
function(
object,
data,
settings = NULL,
outcome_info = NULL,
...
) {
# This method is called when "data" is expected to be available somewhere in
# the backend.

if (is.null(project_info)) {
..error_reached_unreachable_code("project_info is required for retrieving data from the backend.")
}
if (is.null(settings)) {
..error_reached_unreachable_code("settings is required for retrieving data from the backend.")

if (is.null(outcome_info)) {
..error_reached_unreachable_code("outcome_info is required.")
}

# Create a dataObject.
data <- methods::new(
"dataObject",
data = get_data_from_backend(),
preprocessing_level = "none",
outcome_type = settings$data$outcome_type
outcome_type = outcome_info@outcome_type,
outcome_info = outcome_info
)

# Pass to .perform_task for dataObject.
Expand Down Expand Up @@ -141,15 +137,13 @@ setMethod(
function(object, file_paths = NULL) {
if (is.null(file_paths)) return(object)

# Generate file name of pre-processing file.
file_name <- paste0(
object@project_id, "_", object@data_id, "_", object@run_id, "_feature_info.RDS"
)

# Add file path and normalise according to the OS
object@file <- normalizePath(
file.path(file_paths$process_data_dir, file_name),
mustWork = FALSE
# Generate file name of pre-processing file
object@file <- get_object_file_name(
object_type = "featureInfo",
project_id = object@project_id,
data_id = object@data_id,
run_id = object@run_id,
dir_path = file_paths$process_data_dir
)

return(object)
Expand Down Expand Up @@ -179,8 +173,8 @@ setMethod(
function(
object,
data,
settings = NULL,
project_info = NULL,
outcome_info = NULL,
...
) {
# This method is called when "data" is expected to be available somewhere in
Expand All @@ -189,8 +183,8 @@ setMethod(
if (is.null(project_info)) {
..error_reached_unreachable_code("project_info is required for retrieving data from the backend.")
}
if (is.null(settings)) {
..error_reached_unreachable_code("settings is required for retrieving data from the backend.")
if (is.null(outcome_info)) {
..error_reached_unreachable_code("outcome_info is required.")
}

# Find the run list.
Expand All @@ -212,14 +206,14 @@ setMethod(
"dataObject",
data = get_data_from_backend(sample_identifiers = sample_identifiers),
preprocessing_level = "none",
outcome_type = settings$data$outcome_type
outcome_type = outcome_info@outcome_type,
outcome_info = outcome_info
)

# Pass to method that dispatches with dataObject for further processing.
return(.perform_task(
object = object,
data = data,
settings = settings,
...
))
}
Expand Down
44 changes: 3 additions & 41 deletions R/TaskMain.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ setMethod(
)



.generate_trainer_tasks <- function(
file_paths,
project_id
Expand Down Expand Up @@ -48,7 +49,7 @@ setMethod(
# Add tasks related to data processing for learners.
task_list <- c(
task_list,
.generate_learner_tasks(
.generate_learner_data_preprocessing_tasks(
file_paths = file_paths,
project_id = project_id
)
Expand All @@ -58,46 +59,7 @@ setMethod(
}


.generate_vimp_tasks <- function(
file_paths,
project_id
) {

task_list <- list()

# Check if vimp should be computed separately or is computed during
# hyperparameter optimisation.

for (data_id in data_ids) {
for (run_id in run_ids) {
for (vimp_method in vimp_methods) {

# Check if the variable importance method requires any computation.
# For example, signature_only, none and random do not require
# computation.

# Set up variable importance computation task.

# Set up variable importance hyperparameter task.

}
}
}

# Check if any vimp-related tasks are required.
if (len(task_list) == 0L) return(NULL)

# Add tasks related to data processing for vimp methods.
task_list <- c(
task_list,
.generate_vimp_data_preprocessing_tasks(
file_paths = file_paths,
project_id = project_id
)
)

return(task_list)
}




Expand Down
Loading

0 comments on commit 8693001

Please sign in to comment.