From c0013e4da8a8108547715cfae58660a1ff9939a0 Mon Sep 17 00:00:00 2001 From: Alex Zwanenburg Date: Wed, 8 Jan 2025 17:46:16 +0100 Subject: [PATCH] Reworked outdated functions and methods. --- DESCRIPTION | 5 +- R/FamiliarS4Generics.R | 2 +- R/FeatureInfo.R | 56 ---- R/ParseData.R | 14 +- R/TestFeatureInfo.R | 59 ++++ R/TestFunctions.R | 16 +- R/TestTrain.R | 217 ++++++++++++++ R/{TrainS4Methods.R => TestTrainNovelty.R} | 275 ++---------------- R/TestVimp.R | 167 +++++++++++ R/VimpMain.R | 193 ------------ .../test-0_preprocessing_clustering.R | 58 +--- tests/testthat/test-0_vimp_rfsrc_S4.R | 24 +- tests/testthat/test-batch_normalisation.R | 51 +--- tests/testthat/test-general_normalisation.R | 42 +-- tests/testthat/test-imputation.R | 70 +---- tests/testthat/test-transformation.R | 49 +--- tests/testthat/test-vimp_concordance_S4.R | 8 +- tests/testthat/test-vimp_corelearn_S4.R | 18 +- tests/testthat/test-vimp_correlation_S4.R | 4 +- .../test-vimp_mutual_information_S4.R | 24 +- tests/testthat/test-vimp_ranger_S4.R | 24 +- tests/testthat/test-vimp_regression_S4.R | 8 +- tests/testthat/test-vimp_table.R | 10 +- 23 files changed, 592 insertions(+), 802 deletions(-) create mode 100644 R/TestFeatureInfo.R create mode 100644 R/TestTrain.R rename R/{TrainS4Methods.R => TestTrainNovelty.R} (55%) create mode 100644 R/TestVimp.R diff --git a/DESCRIPTION b/DESCRIPTION index a164ad7f..7f6832b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -204,8 +204,11 @@ Collate: 'TaskVimp.R' 'TaskVimpHyperparameters.R' 'TestDataCreators.R' + 'TestFeatureInfo.R' 'TestFunctions.R' - 'TrainS4Methods.R' + 'TestTrain.R' + 'TestTrainNovelty.R' + 'TestVimp.R' 'TrimUtilities.R' 'Utilities.R' 'UtilitiesS4.R' diff --git a/R/FamiliarS4Generics.R b/R/FamiliarS4Generics.R index 358eb5ac..74c66b3c 100644 --- a/R/FamiliarS4Generics.R +++ b/R/FamiliarS4Generics.R @@ -234,7 +234,7 @@ setGeneric(".vimp", function(object, ...) standardGeneric(".vimp")) setGeneric("promote_vimp_method", function(object, ...) standardGeneric("promote_vimp_method")) -setGeneric("prepare_vimp_object", function(data, ...) standardGeneric("prepare_vimp_object")) +setGeneric("test_create_vimp_method", function(data, ...) standardGeneric("test_create_vimp_method")) diff --git a/R/FeatureInfo.R b/R/FeatureInfo.R index 0a313d2f..8044f1cc 100644 --- a/R/FeatureInfo.R +++ b/R/FeatureInfo.R @@ -2,62 +2,6 @@ #' @include FamiliarS4Classes.R NULL -create_feature_info <- function( - data, - signature = NULL, - ... -) { - # This creates a list of featureInfo objects, with processing, based on data. - # This code is primarily used within unit tests. - - # Reconstitute settings from the data. - settings <- extract_settings_from_data(data = data) - - # Update some missing settings that can be fixed within this method. - settings$data$train_cohorts <- unique(data@data[[get_id_columns(single_column = "batch")]]) - - # Parse the remaining settings that are important. - settings <- do.call( - .parse_general_settings, - args = c( - list( - "settings" = settings, - "data" = data@data - ), - list(...) - ) - ) - - # Create a list of featureInfo objects. - feature_info_list <- .get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = data@outcome_type - ) - - # Extract the generic data. - feature_info_list <- feature_info_list[["generic"]] - - # Add signature feature info - feature_info_list <- add_signature_info( - feature_info_list = feature_info_list, - signature = signature - ) - - # Perform some pre-processing (i.e. remove singular features) - feature_info_list <- .determine_preprocessing_parameters( - cl = NULL, - data = data, - feature_info_list = feature_info_list, - settings = settings, - verbose = FALSE - ) - - return(feature_info_list) -} - - .get_feature_info_data <- function( data, diff --git a/R/ParseData.R b/R/ParseData.R index 913a8cf2..3195f3cf 100644 --- a/R/ParseData.R +++ b/R/ParseData.R @@ -804,17 +804,19 @@ data_column_info = create_data_column_info(settings = settings) ) + # Add generic feature information. + generic_feature_info_task <- new("familiarTaskGenericFeatureInfo") + generic_feature_info <- .perform_task( + object = generic_feature_info_task, + data = data + ) + # Set up vimp object and promote to concordance. vimp_object <- promote_vimp_method(methods::new( "familiarVimpMethod", outcome_type = data@outcome_type, outcome_info = data@outcome_info, - feature_info = .get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = settings$data$outcome_type - )[["generic"]], + feature_info = generic_feature_info, vimp_method = "concordance" )) diff --git a/R/TestFeatureInfo.R b/R/TestFeatureInfo.R new file mode 100644 index 00000000..ae230692 --- /dev/null +++ b/R/TestFeatureInfo.R @@ -0,0 +1,59 @@ +test_create_generic_info <- function( + data +) { + # Setup feature info task. + feature_info_task <- methods::new( + "familiarTaskGenericFeatureInfo" + ) + + # Feature information objects are created from the bypass dataset. + feature_info <- .perform_task( + object = feature_info_task, + data = data + ) + + return(feature_info) +} + + + +test_create_feature_info <- function( + data, + signature = NULL, + ... +) { + # This creates a list of featureInfo objects, with processing, based on data. + # This code is primarily used within unit tests. + + # Reconstitute settings from the data. + settings <- extract_settings_from_data(data = data) + + # Update some missing settings that can be fixed within this method. + settings$data$train_cohorts <- unique(data@data[[get_id_columns(single_column = "batch")]]) + + # Parse the remaining settings that are important. + settings <- do.call( + .parse_general_settings, + args = c( + list( + "settings" = settings, + "data" = data@data + ), + list(...) + ) + ) + + # Setup feature info task. + feature_info_task <- methods::new( + "familiarTaskFeatureInfo" + ) + + # Feature information objects are created from the bypass dataset. + feature_info <- .perform_task( + object = feature_info_task, + data = data, + settings = settings + ) + + return(feature_info) +} diff --git a/R/TestFunctions.R b/R/TestFunctions.R index 04875879..a6168672 100644 --- a/R/TestFunctions.R +++ b/R/TestFunctions.R @@ -2139,7 +2139,7 @@ test_all_vimp_methods <- function( # Full dataset ----------------------------------------------------------- # Process dataset. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = full_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2250,7 +2250,7 @@ test_all_vimp_methods <- function( # One-feature dataset ---------------------------------------------------- # Process dataset. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = one_feature_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2317,7 +2317,7 @@ test_all_vimp_methods <- function( no_censoring_data <- test_create_good_data_without_censoring(outcome_type) # Process dataset. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = no_censoring_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2354,7 +2354,7 @@ test_all_vimp_methods <- function( one_censored_data <- test_create_good_data_one_censored(outcome_type) # Process dataset. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = one_censored_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2390,7 +2390,7 @@ test_all_vimp_methods <- function( few_censored_data <- test_create_good_data_few_censored(outcome_type) # Process dataset. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = few_censored_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2426,7 +2426,7 @@ test_all_vimp_methods <- function( # Fully prospective dataset ---------------------------------------------- # Set up the vimp object. - vimp_object <- prepare_vimp_object( + vimp_object <- test_create_vimp_method( data = full_data, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, @@ -2553,7 +2553,7 @@ test_all_vimp_methods_parallel <- function( "1" = full_data, "2" = full_data ), - prepare_vimp_object, + test_create_vimp_method, vimp_method = vimp_method, vimp_method_parameter_list = hyperparameters, outcome_type = outcome_type, @@ -5253,7 +5253,7 @@ test_not_deprecated <- function(x, deprecation_string = c("deprec", "replac")) { } # Create feature info list. - feature_info_list <- create_feature_info( + feature_info_list <- test_create_feature_info( data = data, vimp_method = vimp_method, learner = learner, diff --git a/R/TestTrain.R b/R/TestTrain.R new file mode 100644 index 00000000..d6338fe0 --- /dev/null +++ b/R/TestTrain.R @@ -0,0 +1,217 @@ +#' @include FamiliarS4Generics.R +#' @include FamiliarS4Classes.R +NULL + + + +# test_train (generic) --------------------------------------------------------- +setGeneric( + "test_train", + function(data, ...) standardGeneric("test_train") +) + + + +# test_train (data.table) ------------------------------------------------------ +setMethod( + "test_train", + signature(data = "data.table"), + function( + data, + data_bypass = NULL, + learner, + hyperparameter_list = list(), + create_bootstrap = FALSE, + ... + ) { + if (!is.null(data_bypass)) { + # Convert data_bypass to dataObject. + data_bypass <- do.call( + as_data_object, + args = c( + list("data" = data_bypass), + list(...) + ) + ) + } + + # Convert data to dataObject. + data <- do.call( + as_data_object, + args = c( + list("data" = data), + list(...) + ) + ) + + return(do.call( + test_train, + args = c( + list( + "data" = data, + "data_bypass" = data_bypass, + "learner" = learner, + "hyperparameter_list" = hyperparameter_list, + "create_bootstrap" = create_bootstrap + ), + list(...) + ) + )) + } +) + + + +# test_train (dataObject) ------------------------------------------------------ +setMethod( + "test_train", + signature(data = "dataObject"), + function( + data, + data_bypass = NULL, + learner, + hyperparameter_list = list(), + create_bootstrap = FALSE, + create_novelty_detector = FALSE, + create_naive = FALSE, + cl = NULL, + trim_model = FALSE, + ... + ) { + # The bypass data allows for bypassing important aspects of the + # pre-processing pipeline, e.g. the preprocessing checks. This enables + # testing of very rare cases where preprocessing may run fine, but the + # subsample does not allow for training. + if (is.null(data_bypass)) data_bypass <- data + + # Prepare settings --------------------------------------------------------- + + # Reconstitute settings from the data. + settings <- extract_settings_from_data(data_bypass) + + # Update some missing settings that can be fixed within this method. + settings$data$train_cohorts <- unique(data_bypass@data[[get_id_columns(single_column = "batch")]]) + + # Parse the remaining settings that are important. Remove outcome_type from + # ... This prevents an error caused by multiple matching arguments. + dots <- list(...) + dots$parallel <- NULL + dots$vimp_method <- NULL + + # Determine if a naive model should be forced. + vimp_method <- ifelse(create_naive, "no_features", "none") + + settings <- do.call( + .parse_general_settings, + args = c( + list( + "settings" = settings, + "data" = data_bypass@data, + "parallel" = FALSE, + "vimp_method" = vimp_method, + "learner" = learner + ), + dots + ) + ) + + # Push settings to the backend as some functions require this. + .assign_settings_to_global(settings = settings) + + + # Prepare hyperparameters -------------------------------------------------- + + # Get default hyperparameters. + param_list <- .get_preset_hyperparameters( + data = data, + learner = learner, + names_only = FALSE + ) + + # Update with user-provided settings. + param_list <- .update_hyperparameters( + parameter_list = param_list, + user_list = hyperparameter_list + ) + + # Determine which hyperparameters still need to be specified. + unset_parameters <- sapply( + param_list, + function(hyperparameter_entry) hyperparameter_entry$randomise + ) + + # Raise an error if any hyperparameters were not set. + if (any(unset_parameters)) { + ..error(paste0( + "The following hyperparameters need to be specified: ", + paste_s(names(unset_parameters)[unset_parameters]) + )) + } + + # Obtain the final list of hyperparameters. + param_list <- lapply( + param_list, + function(hyperparameter_entry) hyperparameter_entry$init_config + ) + + + # Create feature information list ------------------------------------------ + + feature_info_task <- methods::new( + "familiarTaskFeatureInfo" + ) + + # Feature information objects are created from the bypass dataset. + feature_info <- .perform_task( + object = feature_info_task, + data = data_bypass, + settings = settings + ) + + + # Create learner ----------------------------------------------------------- + + train_task <- methods::new( + "familiarTaskTrain", + vimp_method = vimp_method, + learner = learner + ) + + if (create_bootstrap) { + data <- select_data_from_samples( + data = data, + samples = fam_sample( + x = data@data, + replace = TRUE, + seed = 19L + ) + ) + } + + # Learners are trained using the actual data. + object <- .perform_task( + object = train_task, + data = data, + settings = settings, + feature_info_list = feature_info, + hyperparameters = param_list, + novelty_detector = ifelse(create_novelty_detector, "isolation_forest", "none"), + detector_parameters = NULL, + trim_model = trim_model + ) + + return(object) + } +) + + + +get_placeholder_run_table <- function() { + return(data.table::data.table( + "run_id" = 1L, + "data_id" = 1L, + "can_pre_process" = TRUE, + "perturbation" = "main", + "perturb_level" = 1L + )) +} diff --git a/R/TrainS4Methods.R b/R/TestTrainNovelty.R similarity index 55% rename from R/TrainS4Methods.R rename to R/TestTrainNovelty.R index adeeda3b..c0355535 100644 --- a/R/TrainS4Methods.R +++ b/R/TestTrainNovelty.R @@ -1,210 +1,3 @@ -#' @include FamiliarS4Generics.R -#' @include FamiliarS4Classes.R -NULL - - - -# test_train (generic) --------------------------------------------------------- -setGeneric( - "test_train", - function(data, ...) standardGeneric("test_train") -) - - - -# test_train (data.table) ------------------------------------------------------ -setMethod( - "test_train", - signature(data = "data.table"), - function( - data, - data_bypass = NULL, - learner, - hyperparameter_list = list(), - create_bootstrap = FALSE, - ... - ) { - if (!is.null(data_bypass)) { - # Convert data_bypass to dataObject. - data_bypass <- do.call( - as_data_object, - args = c( - list("data" = data_bypass), - list(...) - ) - ) - } - - # Convert data to dataObject. - data <- do.call( - as_data_object, - args = c( - list("data" = data), - list(...) - ) - ) - - return(do.call( - test_train, - args = c( - list( - "data" = data, - "data_bypass" = data_bypass, - "learner" = learner, - "hyperparameter_list" = hyperparameter_list, - "create_bootstrap" = create_bootstrap - ), - list(...) - ) - )) - } -) - - - -# test_train (dataObject) ------------------------------------------------------ -setMethod( - "test_train", - signature(data = "dataObject"), - function( - data, - data_bypass = NULL, - learner, - hyperparameter_list = list(), - create_bootstrap = FALSE, - create_novelty_detector = FALSE, - create_naive = FALSE, - cl = NULL, - trim_model = FALSE, - ... - ) { - # The bypass data allows for bypassing important aspects of the - # pre-processing pipeline, e.g. the preprocessing checks. This enables - # testing of very rare cases where preprocessing may run fine, but the - # subsample does not allow for training. - if (is.null(data_bypass)) data_bypass <- data - - # Prepare settings --------------------------------------------------------- - - # Reconstitute settings from the data. - settings <- extract_settings_from_data(data_bypass) - - # Update some missing settings that can be fixed within this method. - settings$data$train_cohorts <- unique(data_bypass@data[[get_id_columns(single_column = "batch")]]) - - # Parse the remaining settings that are important. Remove outcome_type from - # ... This prevents an error caused by multiple matching arguments. - dots <- list(...) - dots$parallel <- NULL - dots$vimp_method <- NULL - - # Determine if a naive model should be forced. - vimp_method <- ifelse(create_naive, "no_features", "none") - - settings <- do.call( - .parse_general_settings, - args = c( - list( - "settings" = settings, - "data" = data_bypass@data, - "parallel" = FALSE, - "vimp_method" = vimp_method, - "learner" = learner - ), - dots - ) - ) - - # Push settings to the backend as some functions require this. - .assign_settings_to_global(settings = settings) - - # Prepare hyperparameters -------------------------------------------------- - - # Get default hyperparameters. - param_list <- .get_preset_hyperparameters( - data = data, - learner = learner, - names_only = FALSE - ) - - # Update with user-provided settings. - param_list <- .update_hyperparameters( - parameter_list = param_list, - user_list = hyperparameter_list - ) - - # Determine which hyperparameters still need to be specified. - unset_parameters <- sapply( - param_list, - function(hyperparameter_entry) hyperparameter_entry$randomise - ) - - # Raise an error if any hyperparameters were not set. - if (any(unset_parameters)) { - ..error(paste0( - "The following hyperparameters need to be specified: ", - paste_s(names(unset_parameters)[unset_parameters]) - )) - } - - # Obtain the final list of hyperparameters. - param_list <- lapply( - param_list, - function(hyperparameter_entry) hyperparameter_entry$init_config - ) - - - # Create feature information list ------------------------------------------ - - feature_info_task <- methods::new( - "familiarTaskFeatureInfo" - ) - - # Feature information objects are created from the bypass dataset. - feature_info <- .perform_task( - object = feature_info_task, - data = data_bypass, - settings = settings - ) - - - # Create learner ----------------------------------------------------------- - - train_task <- methods::new( - "familiarTaskTrain", - vimp_method = vimp_method, - learner = learner - ) - - if (create_bootstrap) { - data <- select_data_from_samples( - data = data, - samples = fam_sample( - x = data@data, - replace = TRUE, - seed = 19L - ) - ) - } - - # Learners are trained using the actual data. - object <- .perform_task( - object = train_task, - data = data, - settings = settings, - feature_info_list = feature_info, - hyperparameters = param_list, - novelty_detector = ifelse(create_novelty_detector, "isolation_forest", "none"), - detector_parameters = NULL, - trim_model = trim_model - ) - - return(object) - } -) - - - # test_train_novelty_detector (generic) ---------------------------------------- setGeneric("test_train_novelty_detector", function(data, ...) standardGeneric("test_train_novelty_detector")) @@ -232,7 +25,7 @@ setMethod( ) ) } - + # Convert data to dataObject. data <- do.call( as_data_object, @@ -278,22 +71,22 @@ setMethod( # testing of very rare cases where preprocessing may run fine, but the # subsample does not allow for training. if (is.null(data_bypass)) data_bypass <- data - + # Prepare settings --------------------------------------------------------- - + # Reconstitute settings from the data. settings <- extract_settings_from_data(data_bypass) - + # Update some missing settings that can be fixed within this method. settings$data$train_cohorts <- unique(data_bypass@data[[get_id_columns(single_column = "batch")]]) - + # Parse the remaining settings that are important. Remove outcome_type from # ... This prevents an error caused by multiple matching arguments. dots <- list(...) dots$parallel <- NULL dots$vimp_method <- NULL dots$hyperparameter <- NULL - + # Create setting_hyperparam so that it can be parsed correctly. if (!detector %in% names(hyperparameter_list) && length(hyperparameter_list) > 0L) { setting_hyperparam <- list() @@ -301,7 +94,7 @@ setMethod( } else { setting_hyperparam <- hyperparameter_list } - + settings <- do.call( .parse_general_settings, args = c( @@ -317,12 +110,12 @@ setMethod( dots ) ) - + # Push settings to the backend. .assign_settings_to_global(settings = settings) - + # Prepare featureInfo objects ---------------------------------------------- - + # Create a list of featureInfo objects. feature_info_list <- .get_feature_info_data( data = data_bypass@data, @@ -330,10 +123,10 @@ setMethod( project_id = character(), outcome_type = settings$data$outcome_type ) - + # Extract the generic data. feature_info_list <- feature_info_list[["generic"]] - + # Perform some pre-processing (i.e. remove singular features) feature_info_list <- .determine_preprocessing_parameters( cl = cl, @@ -342,47 +135,47 @@ setMethod( settings = settings, verbose = FALSE ) - + # Remove invariant features from the data data <- filter_features( data = data, available_features = get_available_features(feature_info_list = feature_info_list) ) - + # Find features that are required for processing the data. required_features <- get_required_features( x = data, feature_info_list = feature_info_list ) - + # Find important features, i.e. those that constitute the signature either # individually or as part of a cluster. model_features <- get_model_features( x = data, feature_info_list = feature_info_list ) - + # Prepare hyperparameters -------------------------------------------------- - + # Get default hyperparameters. param_list <- .get_preset_hyperparameters( data = data, detector = detector, names_only = FALSE ) - + # Update with user-provided settings. param_list <- .update_hyperparameters( parameter_list = param_list, user_list = settings$mb$detector_parameters[[detector]] ) - + # Determine which hyperparameters still need to be specified. unset_parameters <- sapply( param_list, function(hyperparameter_entry) hyperparameter_entry$randomise ) - + # Raise an error if any hyperparameters were not set. if (any(unset_parameters)) { ..error(paste0( @@ -390,15 +183,15 @@ setMethod( paste_s(names(unset_parameters)[unset_parameters]) )) } - + # Obtain the final list of hyperparameters. param_list <- lapply( param_list, function(hyperparameter_entry) hyperparameter_entry$init_config ) - + # Prepare model and data --------------------------------------------------- - + # Create familiar model object <- methods::new( "familiarNoveltyDetector", @@ -410,17 +203,17 @@ setMethod( run_table = get_placeholder_run_table(), project_id = 0L ) - + # Add package version/ object <- add_package_version(object = object) - + # Process data. data <- process_input_data( object = object, data = data, stop_at = "clustering" ) - + # Create bootstraps. if (create_bootstrap) { data <- select_data_from_samples( @@ -431,7 +224,7 @@ setMethod( ) ) } - + # Train model. object <- .train( object = object, @@ -440,19 +233,7 @@ setMethod( trim_model = trim_model, timeout = Inf ) - + return(object) } ) - - - -get_placeholder_run_table <- function() { - return(data.table::data.table( - "run_id" = 1L, - "data_id" = 1L, - "can_pre_process" = TRUE, - "perturbation" = "main", - "perturb_level" = 1L - )) -} diff --git a/R/TestVimp.R b/R/TestVimp.R new file mode 100644 index 00000000..8cf0e3cb --- /dev/null +++ b/R/TestVimp.R @@ -0,0 +1,167 @@ +# test_create_vimp_method (data.table) ----------------------------------------- +setMethod( + "test_create_vimp_method", + signature(data = "data.table"), + function( + data, + vimp_method, + vimp_method_parameter_list = list(), + ... + ) { + # Convert data to dataObject. + data <- do.call( + as_data_object, + args = c( + list("data" = data), + list(...) + ) + ) + + return(do.call( + test_create_vimp_method, + args = c( + list( + "data" = data, + "vimp_method" = vimp_method, + "vimp_method_parameter_list" = vimp_method_parameter_list + ), + list(...) + ) + )) + } +) + + + +# test_create_vimp_method (dataObject) ----------------------------------------- +setMethod( + "test_create_vimp_method", + signature(data = "dataObject"), + function( + data, + data_bypass = NULL, + vimp_method, + vimp_method_parameter_list = list(), + ... + ) { + # The bypass data allows for bypassing important aspects of the + # pre-processing pipeline, e.g. the preprocessing checks. This enables + # testing of very rare cases where preprocessing may run fine, but the + # subsample does not allow for training. + if (is.null(data_bypass)) data_bypass <- data + + # Prepare setting ---------------------------------------------------------- + + # Reconstitute settings from the data. + settings <- extract_settings_from_data(data) + + # Update some missing settings that can be fixed within this method. + settings$data$train_cohorts <- unique(data@data[[get_id_columns(single_column = "batch")]]) + + # Parse the remaining settings that are important. Remove outcome_type from + # ... This prevents an error caused by multiple matching arguments. + dots <- list(...) + dots$parallel <- NULL + dots$vimp_method <- NULL + + if (!is.null(dots$signature)) settings$data$signature <- dots$signature + + settings <- do.call( + .parse_general_settings, + args = c( + list( + "settings" = settings, + "data" = data_bypass@data, + "parallel" = FALSE, + "vimp_method" = vimp_method, + "learner" = "glm" + ), + dots + ) + ) + + # Push settings to the backend. + .assign_settings_to_global(settings = settings) + + + # Prepare hyperparameters -------------------------------------------------- + + # Get default hyperparameters. + param_list <- .get_preset_hyperparameters( + data = data, + vimp_method = vimp_method, + names_only = FALSE + ) + + # Update with user-provided settings. + param_list <- .update_hyperparameters( + parameter_list = param_list, + user_list = vimp_method_parameter_list + ) + + # Determine which hyperparameters still need to be specified. + unset_parameters <- sapply( + param_list, + function(hyperparameter_entry) hyperparameter_entry$randomise + ) + + # Mark sign-size as set, as it is not used for variable importance. + if ("sign_size" %in% names(unset_parameters)) { + unset_parameters["sign_size"] <- FALSE + } + + # Raise an error if any hyperparameters were not set. + if (any(unset_parameters)) { + ..error(paste0( + "The following hyperparameters need to be specified: ", + paste_s(names(unset_parameters)[unset_parameters]) + )) + } + + # Obtain the final list of hyperparameters. + param_list <- lapply( + param_list, + function(hyperparameter_entry) hyperparameter_entry$init_config + ) + + + # Create feature information list ------------------------------------------ + + feature_info_task <- methods::new( + "familiarTaskFeatureInfo" + ) + + # Feature information objects are created from the bypass dataset. + feature_info <- .perform_task( + object = feature_info_task, + data = data_bypass, + settings = settings + ) + + + # Prepare vimp object ------------------------------------------------------ + + # Get required features. + required_features <- get_required_features( + x = data, + feature_info_list = feature_info + ) + + # Create a familiar variable importance method. + object <- methods::new( + "familiarVimpMethod", + outcome_type = data@outcome_type, + vimp_method = vimp_method, + hyperparameters = param_list, + outcome_info = data@outcome_info, + feature_info = feature_info[required_features], + required_features = required_features + ) + + # Promote object to correct subclass. + object <- promote_vimp_method(object) + + # Return in list. + return(object) + } +) diff --git a/R/VimpMain.R b/R/VimpMain.R index af488958..f3eb5eb1 100644 --- a/R/VimpMain.R +++ b/R/VimpMain.R @@ -259,199 +259,6 @@ setMethod( -# prepare_vimp_object (data.table) --------------------------------------------- -setMethod( - "prepare_vimp_object", - signature(data = "data.table"), - function( - data, - vimp_method, - vimp_method_parameter_list = list(), - ... - ) { - # Convert data to dataObject. - data <- do.call( - as_data_object, - args = c( - list("data" = data), - list(...) - ) - ) - - return(do.call( - prepare_vimp_object, - args = c( - list( - "data" = data, - "vimp_method" = vimp_method, - "vimp_method_parameter_list" = vimp_method_parameter_list - ), - list(...) - ) - )) - } -) - - - -# prepare_vimp_object (dataObject) --------------------------------------------- -setMethod( - "prepare_vimp_object", - signature(data = "dataObject"), - function( - data, - vimp_method, - vimp_method_parameter_list = list(), - ... - ) { - # This method is used within unit tests, but not by the main - # familiar workflow. - - # Prepare setting ---------------------------------------------------------- - - # Reconstitute settings from the data. - settings <- extract_settings_from_data(data) - - # Update some missing settings that can be fixed within this method. - settings$data$train_cohorts <- unique(data@data[[get_id_columns(single_column = "batch")]]) - - # Parse the remaining settings that are important. Remove outcome_type from - # ... This prevents an error caused by multiple matching arguments. - dots <- list(...) - dots$parallel <- NULL - dots$vimp_method <- NULL - dots$vimp_method_parameter <- NULL - dots$learner <- NULL - - if (!is.null(dots$signature)) settings$data$signature <- dots$signature - - # Create setting_hyperparam so that it can be parsed correctly. - if ( - !vimp_method %in% names(vimp_method_parameter_list) && - length(vimp_method_parameter_list) > 0L - ) { - setting_hyperparam <- list() - setting_hyperparam[[vimp_method]] <- vimp_method_parameter_list - - } else { - setting_hyperparam <- vimp_method_parameter_list - } - - settings <- do.call( - .parse_general_settings, - args = c( - list( - "settings" = settings, - "data" = data@data, - "parallel" = FALSE, - "vimp_method" = vimp_method, - "learner" = "glm", - "vimp_method_parameter" = setting_hyperparam - ), - dots - ) - ) - - # Push settings to the backend. - .assign_settings_to_global(settings = settings) - - # Prepare featureInfo objects ---------------------------------------------- - - # Create a list of featureInfo objects. - feature_info_list <- .get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = settings$data$outcome_type - ) - - # Extract the generic data. - feature_info_list <- feature_info_list[["generic"]] - - # Add signature info. - feature_info_list <- add_signature_info( - feature_info_list = feature_info_list, - signature = settings$data$signature - ) - - # Perform some pre-processing (i.e. remove singular features) - feature_info_list <- .determine_preprocessing_parameters( - cl = NULL, - data = data, - feature_info_list = feature_info_list, - settings = settings, - verbose = FALSE - ) - - # Prepare hyperparameters -------------------------------------------------- - - # Get default hyperparameters. - param_list <- .get_preset_hyperparameters( - data = data, - vimp_method = vimp_method, - names_only = FALSE - ) - - # Update with user-provided settings. - param_list <- .update_hyperparameters( - parameter_list = param_list, - user_list = settings$vimp$param[[vimp_method]] - ) - - # Determine which hyperparameters still need to be specified. - unset_parameters <- sapply( - param_list, - function(hyperparameter_entry) hyperparameter_entry$randomise - ) - - # Mark sign-size as set, as it is not used for variable importance. - if ("sign_size" %in% names(unset_parameters)) { - unset_parameters["sign_size"] <- FALSE - } - - # Raise an error if any hyperparameters were not set. - if (any(unset_parameters)) { - ..error(paste0( - "The following hyperparameters need to be specified: ", - paste_s(names(unset_parameters)[unset_parameters]) - )) - } - - # Obtain the final list of hyperparameters. - param_list <- lapply( - param_list, - function(hyperparameter_entry) hyperparameter_entry$init_config - ) - - # Prepare vimp object ------------------------------------------------------ - - # Get required features. - required_features <- get_required_features( - x = data, - feature_info_list = feature_info_list - ) - - # Create a familiar variable importance method. - object <- methods::new( - "familiarVimpMethod", - outcome_type = settings$data$outcome_type, - vimp_method = vimp_method, - hyperparameters = param_list, - outcome_info = data@outcome_info, - feature_info = feature_info_list[required_features], - required_features = required_features - ) - - # Promote object to correct subclass. - object <- promote_vimp_method(object) - - # Return in list. - return(object) - } -) - - - # add_package_version ---------------------------------------------------------- setMethod( "add_package_version", diff --git a/tests/testthat/test-0_preprocessing_clustering.R b/tests/testthat/test-0_preprocessing_clustering.R index 42097c34..208c6207 100644 --- a/tests/testthat/test-0_preprocessing_clustering.R +++ b/tests/testthat/test-0_preprocessing_clustering.R @@ -117,12 +117,7 @@ while (TRUE) { cluster_size = cluster_size) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( @@ -331,12 +326,7 @@ while (TRUE) { features = "feature_1") # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( @@ -419,12 +409,7 @@ while (TRUE) { features = c("feature_1", "feature_2")) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( @@ -515,13 +500,8 @@ while (TRUE) { features = c("feature_1_A", "feature_1_B")) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] - + feature_info_list <- test_create_generic_info(data = data) + # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( feature_info_list, @@ -594,12 +574,7 @@ while (TRUE) { cluster_size = cluster_size) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Set signature features feature_info_list <- familiar:::add_signature_info( @@ -710,12 +685,7 @@ while (TRUE) { cluster_size = cluster_size) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( @@ -819,12 +789,7 @@ while (TRUE) { cluster_size = cluster_size) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton(feature_info_list, @@ -896,12 +861,7 @@ while (TRUE) { cluster_size = cluster_size) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data@data, - file_paths = NULL, - project_id = character(), - outcome_type = parameters$outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data) # Create cluster skeletons feature_info_list <- familiar:::create_cluster_parameter_skeleton( diff --git a/tests/testthat/test-0_vimp_rfsrc_S4.R b/tests/testthat/test-0_vimp_rfsrc_S4.R index 0a9ae330..7324d290 100644 --- a/tests/testthat/test-0_vimp_rfsrc_S4.R +++ b/tests/testthat/test-0_vimp_rfsrc_S4.R @@ -101,7 +101,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_minimum_depth", vimp_method_parameter_list = list( @@ -135,7 +135,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_permutation", vimp_method_parameter_list = list( @@ -168,7 +168,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_holdout", vimp_method_parameter_list = list( @@ -207,7 +207,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_minimum_depth", vimp_method_parameter_list = list( @@ -240,7 +240,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_permutation", vimp_method_parameter_list = list( @@ -274,7 +274,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_holdout", vimp_method_parameter_list = list( @@ -311,7 +311,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_minimum_depth", vimp_method_parameter_list = list( @@ -344,7 +344,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_permutation", vimp_method_parameter_list = list( @@ -377,7 +377,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_holdout", vimp_method_parameter_list = list( @@ -414,7 +414,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_minimum_depth", vimp_method_parameter_list = list( @@ -447,7 +447,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_permutation", vimp_method_parameter_list = list( @@ -480,7 +480,7 @@ testthat::test_that( # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_rfsrc_holdout", vimp_method_parameter_list = list( diff --git a/tests/testthat/test-batch_normalisation.R b/tests/testthat/test-batch_normalisation.R index 0461f3a3..beb26405 100644 --- a/tests/testthat/test-batch_normalisation.R +++ b/tests/testthat/test-batch_normalisation.R @@ -23,12 +23,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% familiar:::.get_available_batch_normalisation_methods("combat")) { @@ -256,12 +251,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% @@ -432,12 +422,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% familiar:::.get_available_batch_normalisation_methods("combat")) { @@ -626,12 +611,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% familiar:::.get_available_batch_normalisation_methods("combat")) { @@ -777,12 +757,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% @@ -970,13 +945,8 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] - + feature_info_list <- test_create_generic_info(data = data_copy) + # Combat requires global standardisation if (batch_normalisation_method %in% familiar:::.get_available_batch_normalisation_methods("combat")) { # Create normalisation skeletons. @@ -1122,12 +1092,7 @@ for (batch_normalisation_method in familiar:::.get_available_batch_normalisation batch_ids <- unique(data_copy@data[[familiar:::get_id_columns("batch")]]) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Combat requires global standardisation if (batch_normalisation_method %in% diff --git a/tests/testthat/test-general_normalisation.R b/tests/testthat/test-general_normalisation.R index 2e98b006..31e08391 100644 --- a/tests/testthat/test-general_normalisation.R +++ b/tests/testthat/test-general_normalisation.R @@ -289,12 +289,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( @@ -416,12 +411,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( @@ -490,12 +480,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( @@ -573,12 +558,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( @@ -629,12 +609,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( @@ -712,12 +687,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_normalisation_parameter_skeleton( diff --git a/tests/testthat/test-imputation.R b/tests/testthat/test-imputation.R index ed5b8e55..9c8cf459 100644 --- a/tests/testthat/test-imputation.R +++ b/tests/testthat/test-imputation.R @@ -24,12 +24,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -89,12 +84,7 @@ for (n_numeric_features in c(3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -154,12 +144,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -216,12 +201,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -277,12 +257,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -353,12 +328,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -414,12 +384,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -474,12 +439,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -539,12 +499,7 @@ for (n_numeric_features in c(1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list <- familiar:::create_imputation_parameter_skeleton( @@ -600,12 +555,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Create imputation skeleton. feature_info_list_1 <- familiar:::create_imputation_parameter_skeleton( diff --git a/tests/testthat/test-transformation.R b/tests/testthat/test-transformation.R index a8448f21..512e2e98 100644 --- a/tests/testthat/test-transformation.R +++ b/tests/testthat/test-transformation.R @@ -19,12 +19,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -135,12 +130,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -200,12 +190,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -272,12 +257,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -338,12 +318,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -410,12 +385,7 @@ for (n_numeric_features in c(4, 3, 2, 1, 0)) { data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons. feature_info_list <- familiar:::create_transformation_parameter_skeleton( @@ -471,12 +441,7 @@ testthat::test_that( data_copy <- data.table::copy(data) # Create a list of featureInfo objects. - feature_info_list <- familiar:::.get_feature_info_data( - data = data_copy@data, - file_paths = NULL, - project_id = character(), - outcome_type = outcome_type - )[[1]] + feature_info_list <- test_create_generic_info(data = data_copy) # Add skeletons for features 1 and 2. feature_info_list <- familiar:::create_transformation_parameter_skeleton( diff --git a/tests/testthat/test-vimp_concordance_S4.R b/tests/testthat/test-vimp_concordance_S4.R index 732b3937..8a1acf02 100644 --- a/tests/testthat/test-vimp_concordance_S4.R +++ b/tests/testthat/test-vimp_concordance_S4.R @@ -20,7 +20,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "concordance", vimp_method_parameter_list = NULL, @@ -48,7 +48,7 @@ testthat::test_that(paste0("The concordance method correctly ranks continuous da data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "concordance", vimp_method_parameter_list = NULL, @@ -75,7 +75,7 @@ testthat::test_that(paste0("The concordance method correctly ranks binomial data data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "concordance", vimp_method_parameter_list = NULL, @@ -102,7 +102,7 @@ testthat::test_that(paste0("The concordance method correctly ranks multinomial o data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "concordance", vimp_method_parameter_list = NULL, diff --git a/tests/testthat/test-vimp_corelearn_S4.R b/tests/testthat/test-vimp_corelearn_S4.R index aa8576d1..eac1dd8c 100644 --- a/tests/testthat/test-vimp_corelearn_S4.R +++ b/tests/testthat/test-vimp_corelearn_S4.R @@ -54,7 +54,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "relieff_exp_rank", vimp_method_parameter_list = NULL, @@ -83,7 +83,7 @@ testthat::test_that(paste0( data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "relieff_exp_rank", vimp_method_parameter_list = NULL, @@ -108,7 +108,7 @@ testthat::test_that(paste0( }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "gini", vimp_method_parameter_list = NULL, @@ -132,7 +132,7 @@ testthat::test_that(paste0("The Gini method correctly ranks binomial data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "gain_ratio", vimp_method_parameter_list = NULL, @@ -157,7 +157,7 @@ testthat::test_that(paste0("The gain ratio method correctly ranks binomial data. }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mdl", vimp_method_parameter_list = NULL, @@ -185,7 +185,7 @@ testthat::test_that(paste0("The MDL method correctly ranks binomial data."), { data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "relieff_exp_rank", vimp_method_parameter_list = NULL, @@ -210,7 +210,7 @@ testthat::test_that(paste0( }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "gini", vimp_method_parameter_list = NULL, @@ -234,7 +234,7 @@ testthat::test_that(paste0("The Gini method correctly ranks multinomial outcome }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "gain_ratio", vimp_method_parameter_list = NULL, @@ -258,7 +258,7 @@ testthat::test_that(paste0("The gain ratio method correctly ranks multinomial ou }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mdl", vimp_method_parameter_list = NULL, diff --git a/tests/testthat/test-vimp_correlation_S4.R b/tests/testthat/test-vimp_correlation_S4.R index 4698889d..78c74496 100644 --- a/tests/testthat/test-vimp_correlation_S4.R +++ b/tests/testthat/test-vimp_correlation_S4.R @@ -22,7 +22,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "spearman", vimp_method_parameter_list = NULL, @@ -44,7 +44,7 @@ testthat::test_that(paste0("Spearman correlation correctly ranks continuous data data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "spearman", vimp_method_parameter_list = NULL, diff --git a/tests/testthat/test-vimp_mutual_information_S4.R b/tests/testthat/test-vimp_mutual_information_S4.R index e6b99f5d..ed68f2f7 100644 --- a/tests/testthat/test-vimp_mutual_information_S4.R +++ b/tests/testthat/test-vimp_mutual_information_S4.R @@ -31,7 +31,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mim", vimp_method_parameter_list = NULL, @@ -55,7 +55,7 @@ testthat::test_that(paste0("MIM correctly ranks continuous data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mifs", vimp_method_parameter_list = NULL, @@ -79,7 +79,7 @@ testthat::test_that(paste0("MIFS correctly ranks continuous data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mrmr", vimp_method_parameter_list = NULL, @@ -106,7 +106,7 @@ testthat::test_that(paste0("MRMR correctly ranks continuous data."), { data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mim", vimp_method_parameter_list = NULL, @@ -130,7 +130,7 @@ testthat::test_that(paste0("MIM correctly ranks binomial data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mifs", vimp_method_parameter_list = NULL, @@ -153,7 +153,7 @@ testthat::test_that(paste0("MIFS correctly ranks binomial data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mrmr", vimp_method_parameter_list = NULL, @@ -182,7 +182,7 @@ testthat::test_that(paste0("MRMR correctly ranks binomial data."), { data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mim", vimp_method_parameter_list = NULL, @@ -206,7 +206,7 @@ testthat::test_that(paste0("MIM correctly ranks multinomial outcome data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mifs", vimp_method_parameter_list = NULL, @@ -230,7 +230,7 @@ testthat::test_that(paste0("MIFS correctly ranks multinomial outcome data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mrmr", vimp_method_parameter_list = NULL, @@ -258,7 +258,7 @@ testthat::test_that(paste0("MRMR correctly ranks multinomial outcome data."), { data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mim", vimp_method_parameter_list = NULL, @@ -282,7 +282,7 @@ testthat::test_that(paste0("MIM correctly ranks survival outcome data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mifs", vimp_method_parameter_list = NULL, @@ -305,7 +305,7 @@ testthat::test_that(paste0("MIFS correctly ranks survival outcome data."), { }) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "mrmr", vimp_method_parameter_list = NULL, diff --git a/tests/testthat/test-vimp_ranger_S4.R b/tests/testthat/test-vimp_ranger_S4.R index c5bc772e..5a7054a1 100644 --- a/tests/testthat/test-vimp_ranger_S4.R +++ b/tests/testthat/test-vimp_ranger_S4.R @@ -91,7 +91,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_impurity", vimp_method_parameter_list = list( @@ -124,7 +124,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_permutation", vimp_method_parameter_list = list( @@ -158,7 +158,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_holdout_permutation", vimp_method_parameter_list = list( @@ -194,7 +194,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_impurity", vimp_method_parameter_list = list( @@ -227,7 +227,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_permutation", vimp_method_parameter_list = list( @@ -259,7 +259,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_holdout_permutation", vimp_method_parameter_list = list( @@ -295,7 +295,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_impurity", vimp_method_parameter_list = list( @@ -327,7 +327,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_permutation", vimp_method_parameter_list = list( @@ -361,7 +361,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_holdout_permutation", vimp_method_parameter_list = list( @@ -400,7 +400,7 @@ testthat::test_that( data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_impurity", vimp_method_parameter_list = list( @@ -433,7 +433,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_permutation", vimp_method_parameter_list = list( @@ -466,7 +466,7 @@ testthat::test_that( ) # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "random_forest_ranger_holdout_permutation", vimp_method_parameter_list = list( diff --git a/tests/testthat/test-vimp_regression_S4.R b/tests/testthat/test-vimp_regression_S4.R index c633c188..bfab943a 100644 --- a/tests/testthat/test-vimp_regression_S4.R +++ b/tests/testthat/test-vimp_regression_S4.R @@ -43,7 +43,7 @@ familiar:::test_all_vimp_methods_parallel( data <- familiar:::test_create_good_data("continuous") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "multivariate_regression", vimp_method_parameter_list = NULL, @@ -74,7 +74,7 @@ testthat::test_that(paste0("Multivariate regression correctly ranks continuous d data <- familiar:::test_create_good_data("binomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "multivariate_regression", vimp_method_parameter_list = NULL, @@ -105,7 +105,7 @@ testthat::test_that(paste0("Multivariate regression correctly ranks binomial dat data <- familiar:::test_create_good_data("multinomial") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "multivariate_regression", vimp_method_parameter_list = NULL, @@ -136,7 +136,7 @@ testthat::test_that(paste0("Multivariate regression correctly ranks multinomial data <- familiar:::test_create_good_data("survival") # Process dataset. -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "multivariate_regression", vimp_method_parameter_list = NULL, diff --git a/tests/testthat/test-vimp_table.R b/tests/testthat/test-vimp_table.R index 0fee5fc8..3c7d4134 100644 --- a/tests/testthat/test-vimp_table.R +++ b/tests/testthat/test-vimp_table.R @@ -10,7 +10,7 @@ data <- familiar:::test_create_synthetic_correlated_data( n_numeric = 2, cluster_size = c(1, 1, 2, 3)) -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "pearson", outcome_type = "continuous", @@ -158,7 +158,7 @@ data_2 <- familiar:::test_create_synthetic_correlated_data( n_numeric = 2, cluster_size = c(1, 1, 4, 2)) -vimp_object_2 <- familiar:::prepare_vimp_object( +vimp_object_2 <- familiar:::test_create_vimp_method( data = data_2, vimp_method = "pearson", outcome_type = "continuous", @@ -237,7 +237,7 @@ data.table::setnames( old = c("feature_1", "feature_2", "feature_3", "feature_4"), new = c("feature_A", "feature_B", "feature_C", "feature_D")) -vimp_object_3 <- familiar:::prepare_vimp_object( +vimp_object_3 <- familiar:::test_create_vimp_method( data = data_3, vimp_method = "pearson", outcome_type = "continuous", @@ -309,7 +309,7 @@ data <- familiar:::test_create_synthetic_correlated_data( n_numeric = 2, cluster_size = c(1, 1, 1, 1)) -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "pearson", outcome_type = "continuous", @@ -348,7 +348,7 @@ testthat::test_that("A signature feature does not appear in the variable importa # Test with all signature features --------------------------------------------- -vimp_object <- familiar:::prepare_vimp_object( +vimp_object <- familiar:::test_create_vimp_method( data = data, vimp_method = "pearson", outcome_type = "continuous",