Skip to content

Commit

Permalink
WIP on data tasks.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexzwanenburg committed Jan 2, 2025
1 parent fb4c880 commit 2729584
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 4 deletions.
2 changes: 1 addition & 1 deletion R/DataObject.R
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ setMethod(
keep_novelty = FALSE,
...
) {

browser()
# Check whether model data- and run-ids should be used.
if (data@defer_to_model_data_and_run_id) {
data@data_id <- object@data_id
Expand Down
1 change: 1 addition & 0 deletions R/FamiliarDataComputation.R
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ setMethod(
verbose = FALSE,
...
) {
browser()
## Compute distance between features ---------------------------------------
feature_similarity <- NULL
if (any(c("model_vimp", "feature_similarity", "univariate_analysis",
Expand Down
18 changes: 18 additions & 0 deletions R/FamiliarS4Classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
#' @slot calibration_info Calibration information, e.g. baseline survival in the
#' development cohort.
#' @slot km_info Data concerning stratification into risk groups.
#' @slot data_id Internal identifier for the dataset used to train the model.
#' @slot run_id Internal identifier for the specific subset of the dataset used
#' used to train the model.
#' @slot run_table Run table for the data used to train the model. Used
#' internally.
#' @slot settings A copy of the evaluation configuration parameters used at
Expand Down Expand Up @@ -90,6 +93,10 @@ setClass("familiarModel",
model_features = "ANY",
# Features that are required for novelty detection.
novelty_features = "ANY",
# data_id for the data used to train the model.
data_id = "integer",
# run_id for the data used to train the model.
run_id = "integer",
# Run table for the current model
run_table = "ANY",
# Information required to assess model calibrations (e.g. baseline survival)
Expand Down Expand Up @@ -133,6 +140,8 @@ setClass("familiarModel",
novelty_features = NULL,
calibration_info = NULL,
km_info = NULL,
data_id = NA_integer_,
run_id = NA_integer_,
run_table = NULL,
settings = NULL,
is_trimmed = FALSE,
Expand Down Expand Up @@ -172,6 +181,9 @@ setClass("familiarModel",
#' models in the ensemble,
#' @slot novelty_features The combined set of features that is used to train all
#' novelty detectors in the ensemble.
#' @slot data_id Internal identifier for the dataset used to train the ensemble.
#' @slot run_id Internal identifier for the specific subset of the dataset used
#' used to train the ensemble.
#' @slot run_table Run table for the data used to train the ensemble. Used
#' internally.
#' @slot calibration_info Calibration information, e.g. baseline survival in the
Expand Down Expand Up @@ -214,6 +226,10 @@ setClass("familiarEnsemble",
model_features = "ANY",
# Features that are required for novelty detection.
novelty_features = "ANY",
# data_id for the data used to train the model.
data_id = "integer",
# run_id for the data used to train the model.
run_id = "integer",
# Set of run tables for the current ensemble. This is only required for
# processing internal data.
run_table = "ANY",
Expand Down Expand Up @@ -246,6 +262,8 @@ setClass("familiarEnsemble",
required_features = NULL,
model_features = NULL,
novelty_features = NULL,
data_id = NA_integer_,
run_id = NA_integer_,
run_table = NULL,
calibration_info = NULL,
model_dir_path = NA_character_,
Expand Down
17 changes: 15 additions & 2 deletions R/TaskEvaluate.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,30 @@ setMethod(
..error_reached_unreachable_code("outcome_info is required.")
}

# Set up a delayed
data <- methods::new(
"dataObject",
"delayedDataObject",
data = NULL,
preprocessing_level = "none",
outcome_type = outcome_info@outcome_type,
outcome_info = outcome_info,
validation = object@validation,
delay_loading = TRUE,
aggregate_on_load = FALSE
)

# Set the data_id and run_id for the data itself.
if (object@force_ensemble_detail_level) {
data@data_id <- object@ensemble_data_id
data@run_id <- object@ensemble_run_id

} else {
data@data_id <- object@data_id
data@run_id <- object@run_id
}

# Determine whether model data and run ids should be used for predictions.
data@defer_to_model_data_and_run_id <- object@get_predictions_at_model_level

# Pass to method that dispatches with dataObject for further processing.
return(.perform_task(
object = object,
Expand Down
4 changes: 3 additions & 1 deletion R/TaskLearn.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ setMethod(
rank_threshold = vimp_rank_threshold
)

# Create the raw model object for training..
# Create the raw model object for training.
model_object <- methods::new(
"familiarModel",
outcome_type = data@outcome_type,
Expand All @@ -264,6 +264,8 @@ setMethod(
learner = object@learner,
feature_info = feature_info_list,
outcome_info = data@outcome_info,
data_id = object@data_id,
run_id = object@run_id,
run_table = .get_current_run_table(object = object),
settings = settings$eval,
project_id = object@project_id
Expand Down
5 changes: 5 additions & 0 deletions man/familiarEnsemble-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions man/familiarModel-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testthat/test-task_based_workflow.R
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ data <- familiar:::test_create_small_good_data("binomial")
results <- familiar::summon_familiar(
data = data,
experimental_design = "bs(fs,3)+bs(mb, 3)",
evaluation_elements = "auc_data",
vimp_method = "mim",
learner = "glm_logistic",
evaluate_top_level_only = FALSE,
Expand Down

0 comments on commit 2729584

Please sign in to comment.