diff --git a/R/ExperimentSetup.R b/R/ExperimentSetup.R
index 5721cfc0..ec43dce5 100644
--- a/R/ExperimentSetup.R
+++ b/R/ExperimentSetup.R
@@ -189,6 +189,12 @@ extract_experimental_setup <- function(
   # Suppress NOTES due to non-standard evaluation in data.table
   main_data_id <- NULL
   
+  ...get_n_samples <- function(x, type) {
+    if (is_empty(x[[type]])) return(0L)
+    
+    return(nrow(x[[type]]))
+  }
+  
   # Add perturbation level.
   section_table[, "perturbation_level" := 1L]
   for (data_id in section_table$main_data_id) {
@@ -201,16 +207,18 @@ extract_experimental_setup <- function(
     section_table[main_data_id == data_id, "n_runs" := length(iteration_list[[as.character(data_id)]]$run)]
   }
   
-  # Set the (max) number of available validation instances.
+  # Determine the number of instances available for development and validation.
   for (data_id in section_table$main_data_id) {
-    section_table[main_data_id == data_id, "max_validation_instances" := max(sapply(
-      iteration_list[[as.character(data_id)]]$run,
-      function(x) {
-        if (is_empty(x$valid_samples)) return(0L)
-        
-        return(nrow(x$valid_samples))
-      }
-    ))]
+    
+    n_run_training_samples <- sapply(iteration_list[[as.character(data_id)]]$run, ...get_n_samples, type = "train_samples")
+    n_run_validation_samples <- sapply(iteration_list[[as.character(data_id)]]$run, ...get_n_samples, type = "valid_samples")
+    
+    section_table[main_data_id == data_id, ":="(
+      "min_training_instances" = min(n_run_training_samples),
+      "max_training_instances" = max(n_run_training_samples),
+      "min_validation_instances" = min(n_run_validation_samples),
+      "max_validation_instances" = max(n_run_validation_samples)
+    )]
   }
 
   return(section_table)
diff --git a/R/Familiar.R b/R/Familiar.R
index 2f724fea..ca615923 100644
--- a/R/Familiar.R
+++ b/R/Familiar.R
@@ -414,7 +414,7 @@ summon_familiar <- function(
 
   # Select and sort unique tasks.
   tasks <- .sort_tasks(tasks)
-browser()
+  
   # Pre-processing -------------------------------------------------------------
   .run_preprocessing(
     cl = cl,
diff --git a/R/FamiliarS4Classes.R b/R/FamiliarS4Classes.R
index e26460cb..a744c6d7 100644
--- a/R/FamiliarS4Classes.R
+++ b/R/FamiliarS4Classes.R
@@ -642,20 +642,16 @@ setClass("familiarCollection",
 #' @slot outcome_info Outcome information object, which contains additional
 #'   information concerning the outcome, such as class levels.
 #' @slot data_column_info Object containing column information.
-#' @slot delay_loading logical. Allows delayed loading data, which enables data
-#'   parsing downstream without additional workflow complexity or memory
-#'   utilisation.
-#' @slot perturb_level numeric. This is the perturbation level for data which
-#'   has not been loaded. Used for data retrieval by interacting with the run
-#'   table of the accompanying model.
-#' @slot load_validation logical. This determines which internal data set will
-#'   be loaded. If TRUE, the validation data will be loaded, whereas FALSE loads
-#'   the development data.
-#' @slot aggregate_on_load logical. Determines whether data is aggregated after
-#'   loading.
-#' @slot sample_set_on_load NULL or vector of sample identifiers to be loaded.
-#' 
-setClass("dataObject",
+#' @slot data_id Data identifier for dataset. Set using internal routines if the
+#'   `dataObject` was created from a `delayedDataObject`
+#' @slot run_id Run identifier for dataset. Set using internal routines if the
+#'   `dataObject` was created from a `delayedDataObject`
+#' @slot validation Identifies if validation or development samples were loaded.
+#'   Set using internal routines if the `dataObject` was created from a
+#'   `delayedDataObject`.
+#' @slot sample_seed Seed used for creating a bootstrap of the data.
+setClass(
+  "dataObject",
   slots = list(
     # Data
     data = "ANY",
@@ -667,29 +663,81 @@ setClass("dataObject",
     outcome_info = "ANY",
     # Info related to the columns in the dataset.
     data_column_info = "ANY",
-    # Flag for delayed loading. This can only be meaningfully set using internal
-    # data.
-    delay_loading = "logical",
-    # Perturbation level for data which has not been loaded. Used for data
-    # retrieval in combination with the run table of the accompanying model.
-    perturb_level = "numeric",
-    # Determines which data should be loaded.
-    load_validation = "logical",
-    # Flag for aggregation after loading and pre-processing
-    aggregate_on_load = "logical",
-    # Samples to be loaded
-    sample_set_on_load = "ANY"
+    # Data id
+    data_id = "integer",
+    # Run id
+    run_id = "integer",
+    # Validation marker.
+    validation = "logical",
+    # Sample seed
+    sample_seed = "integer"
   ),
   prototype = list(
     data = NULL,
     preprocessing_level = "none",
     outcome_type = NA_character_,
     outcome_info = NULL,
-    delay_loading = FALSE,
-    perturb_level = NA_integer_,
-    load_validation = TRUE,
-    aggregate_on_load = FALSE,
-    sample_set_on_load = NULL
+    data_column_info = NULL,
+    data_id = NA_integer_,
+    run_id = NA_integer_,
+    validation = NA,
+    sample_seed = NA_integer_
+  )
+)
+
+
+
+# delayedDataObject object -----------------------------------------------------
+
+#' Data object with delayed loading
+#'
+#' The delayed loading object provides an interface to the backend data. This
+#' data object is typically used within the evaluation pipeline to load data
+#' when needed.
+#'
+#' @slot data NULL or data table containing the data. If present (not `NULL`),
+#'   data is considered loaded.
+#' @slot preprocessing_level character indicating the level of pre-processing
+#'   already conducted. `"none"` by default.
+#' @slot outcome_type character, determines the outcome type.
+#' @slot outcome_info Outcome information object, which contains additional
+#'   information concerning the outcome, such as class levels.
+#' @slot data_column_info Object containing column information.
+#' @slot data_id integer. Defines the data_id of the dataset that should be
+#'   loaded.
+#' @slot run_id integer. Defines the run_id of the dataset that should be load.
+#'   Together with data_id, run_id and validation allows for looking up the
+#'   sample set.
+#' @slot validation logical. This determines which internal data set will be
+#'   loaded. If TRUE, the validation data will be loaded, whereas FALSE loads
+#'   the development data.
+#' @slot aggregate_on_load logical. Determines whether data is aggregated after
+#'   loading.
+#' @slot sample_set_on_load NULL or vector of sample identifiers to be loaded.
+#'   Overrides any `sample_seed` that may have been provided.
+#' @slot defer_to_model_data_and_run_id logical. Determines whether the provided
+#'   data_id and run_id should be used (`FALSE`), or data_id and run_id of a
+#'   model (`TRUE`).
+setClass(
+  "delayedDataObject",
+  contains = "dataObject",
+  slots = list(
+    # Determines if validation or development data should be loaded.
+    validation = "logical",
+    # Flag for aggregation after loading and pre-processing
+    aggregate_on_load = "logical",
+    # Samples to be loaded. 
+    sample_set_on_load = "ANY",
+    # Flag for deferring loading of data depending on data_id and run_id of
+    # models. Used to ensure that development data and internal validation data
+    # are correctly handled. Overrides and data_id and run_id that may have been
+    # provided.
+    defer_to_model_data_and_run_id = "logical"
+  ),
+  prototype = list(
+    aggregate_on_load = NA,
+    sample_set_on_load = NULL,
+    defer_to_model_data_and_run_id = NA
   )
 )
 
diff --git a/R/TaskEvaluate.R b/R/TaskEvaluate.R
index b0c13f48..b03b231a 100644
--- a/R/TaskEvaluate.R
+++ b/R/TaskEvaluate.R
@@ -132,6 +132,8 @@ setClass(
     "validation" = "logical",
     "ensemble_data_id" = "integer",
     "ensemble_run_id" = "integer",
+    "get_predictions_at_model_level" = "logical",
+    "force_ensemble_detail_level" = "logical",
     "vimp_method" = "character",
     "learner" = "character",
     "data_set_name" = "character",
@@ -139,10 +141,23 @@ setClass(
   ),
   prototype = methods::prototype(
     validation = NA, 
-    # Whereas data_id describes where the data comes from, the ensemble_data_id
-    # describes where ensembles are formed.
+    # Whereas data_id describes where the overall data comes from, the
+    # ensemble_data_id describes where ensembles are formed.
     ensemble_data_id = NA_integer_,
     ensemble_run_id = NA_integer_,
+    # This parameter determines which data is used for generating predictions.
+    # If FALSE, data will be obtained using data_id and run_id of the task. If
+    # TRUE, data will be obtained using the data_id and run_id associated with
+    # each model. In practice, for internal runs this will be set to TRUE except
+    # for external validation data, whereas for external tasks (e.g. on existing
+    # models) this will be FALSE, and the provided data will be used directly.
+    get_predictions_at_model_level = FALSE,
+    # If individual models do not have sufficient data to perform hybrid
+    # analysis (each model is used to compute part of the bootstraps when
+    # computing confidence intervals) for an evaluation step, that evaluation
+    # step may fail, even though over all models, sufficient data are present.
+    # In that case, we need to force an ensemble detail level.
+    force_ensemble_detail_level = FALSE,
     vimp_method = NA_character_,
     learner = NA_character_,
     data_set_name = NA_character_,
@@ -200,6 +215,54 @@ setMethod(
 
 
 
+# .perform_task (evaluation task , NULL) --------------------------------------------
+setMethod(
+  ".perform_task",
+  signature(
+    object = "familiarTaskEvaluate",
+    data = "NULL"
+  ),
+  function(
+    object,
+    data,
+    experiment_data = NULL,
+    outcome_info = NULL,
+    ...
+  ) {
+    # This method is called when "data" is expected to be available somewhere in
+    # the backend.
+    
+    if (is.null(experiment_data)) {
+      ..error_reached_unreachable_code("experiment_data is required for retrieving data from the backend.")
+    }
+    if (is.null(outcome_info)) {
+      ..error_reached_unreachable_code("outcome_info is required.")
+    }
+    
+    data <- methods::new(
+      "dataObject",
+      data = NULL,
+      preprocessing_level = "none",
+      outcome_type = outcome_info@outcome_type,
+      outcome_info = outcome_info,
+      load_validation = object@validation,
+      delay_loading = TRUE,
+      aggregate_on_load = FALSE
+    )
+    
+    # Pass to method that dispatches with dataObject for further processing.
+    return(.perform_task(
+      object = object,
+      data = data,
+      experiment_data = experiment_data,
+      ...
+    ))
+  }
+)
+
+
+
+
 # .perform_task (evaluation task, dataObject) ----------------------------------
 setMethod(
   ".perform_task",
@@ -237,7 +300,41 @@ setMethod(
     
     # Check which detail level should be provided based on the number of
     # available instances for each model.
-    browser()
+    fam_ensemble <- methods::new(
+      "familiarEnsemble",
+      model_list = as.list(object@model_files),
+      learner = object@learner,
+      vimp_method = object@vimp_method
+    )
+    
+    # Add package version.
+    fam_ensemble <- add_package_version(object = fam_ensemble)
+    
+    # Load models and prevent auto-detaching.
+    fam_ensemble <- load_models(
+      object = fam_ensemble,
+      suppress_auto_detach = TRUE
+    )
+    
+    # Create a run table
+    fam_ensemble@run_table <- list(
+      "run_table" = lapply(
+        fam_ensemble@model_list,
+        function(fam_model) fam_model@run_table
+      ),
+      "ensemble_data_id" = object@ensemble_data_id,
+      "ensemble_run_id" = object@ensemble_run_id
+    )
+    
+    # Complete the ensemble using information provided by the model
+    fam_ensemble <- complete_familiar_ensemble(object = fam_ensemble)
+    
+    # Set evaluation level.
+    if (object@force_ensemble_detail_level) {
+      detail_level <- "ensemble"
+    } else {
+      detail_level <- settings$eval$detail_level
+    }
     
     # Compute evaluation data.
     evaluation_data <- extract_data(
@@ -344,20 +441,56 @@ setMethod(
   
   # evaluation tasks -----------------------------------------------------------
   
+  data_file_names <- NULL
+  
+  n_min_model_instances <- Inf
+  evaluate_external_validation <- evaluate_internal_validation <- evaluate_development <- FALSE
+  
+  # External validation: the top level has associated validation data.
+  if (experiment_data@experiment_setup[main_data_id == 1L]$max_validation_instances > 0L) {
+    evaluate_external_validation <- TRUE
+    n_min_model_instances <- min(
+      c(experiment_data@experiment_setup[main_data_id == 1L]$max_validation_instances),
+      n_min_model_instances
+    )
+  }
+  
+  # Internal validation: the lowest model ensembling level has associated
+  # validation data.
+  if (experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_validation_instances > 0L) {
+    evaluate_internal_validation <- TRUE
+    n_min_model_instances <- min(
+      c(experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_validation_instances),
+      n_min_model_instances
+    )
+  }
+  
+  # Development data: the lowest model ensembling level has associated
+  # development data. NOTE: this should always be the case.
+  if (experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_training_instances > 0L) {
+    evaluate_development <- TRUE
+    n_min_model_instances <- min(
+      c(experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_training_instances),
+      n_min_model_instances
+    )
+  }
+  
+  # Check model instances and determine if we need to force ensemble detail
+  # level for evaluation.
+  force_ensemble_detail_level <- n_min_model_instances < 10L
+  
   # Use collection tasks to set up the evaluation tasks, including for internal
   # validation.
   evaluate_task_list <- list()
   ii <- 1L
+  
   for (jj in seq_along(collect_task_list)) {
-    data_file_names <- NULL
-    
     for (learner in learners) {
       for (vimp_method in vimp_methods) {
         
         ## external validation -------------------------------------------------
         
-        # External validation: the top level has associated validation data.
-        if (experiment_data@experiment_setup[main_data_id == 1L]$max_validation_instances > 0L) {
+        if (evaluate_external_validation) {
           # Initialise task.
           evaluate_task <- methods::new(
             "familiarTaskEvaluate",
@@ -366,6 +499,8 @@ setMethod(
             validation = TRUE,
             ensemble_data_id = collect_task_list[[jj]]@data_id,
             ensemble_run_id = collect_task_list[[jj]]@run_id,
+            get_predictions_at_model_level = FALSE,
+            force_ensemble_detail_level = force_ensemble_detail_level,
             learner = learner,
             vimp_method = vimp_method,
             data_set_name = "external_validation",
@@ -387,8 +522,6 @@ setMethod(
         
         # internal validation --------------------------------------------------
         
-        # Internal validation: the ensembling level has associated validation
-        # data.
         if (experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_validation_instances > 0L) {
           # Initialise task.
           evaluate_task <- methods::new(
@@ -398,6 +531,8 @@ setMethod(
             validation = TRUE,
             ensemble_data_id = collect_task_list[[jj]]@data_id,
             ensemble_run_id = collect_task_list[[jj]]@run_id,
+            get_predictions_at_model_level = TRUE,
+            force_ensemble_detail_level = force_ensemble_detail_level,
             learner = learner,
             vimp_method = vimp_method,
             data_set_name = "internal_validation",
@@ -418,31 +553,39 @@ setMethod(
         }
         
         # development ----------------------------------------------------------
-        # Development at the ensembling level.
-        evaluate_task <- methods::new(
-          "familiarTaskEvaluate",
-          data_id = collect_task_list[[jj]]@data_id,
-          run_id = collect_task_list[[jj]]@run_id,
-          validation = FALSE,
-          ensemble_data_id = collect_task_list[[jj]]@data_id,
-          ensemble_run_id = collect_task_list[[jj]]@run_id,
-          learner = learner,
-          vimp_method = vimp_method,
-          data_set_name = "development",
-          project_id = experiment_data@project_id
-        )
-        
-        # Set file name.
-        evaluate_task <-.set_file_name(
-          object = evaluate_task,
-          file_paths = file_paths
-        )
         
-        # Make task and associated file names available.
-        data_file_names <- c(data_file_names, evaluate_task@file)
-        evaluate_task_list[[ii]] <- evaluate_task
-        
-        ii <- ii + 1L
+        if (evaluate_development) {
+          # Initialise task.
+          evaluate_task <- methods::new(
+            "familiarTaskEvaluate",
+            data_id = collect_task_list[[jj]]@data_id,
+            run_id = collect_task_list[[jj]]@run_id,
+            validation = FALSE,
+            ensemble_data_id = collect_task_list[[jj]]@data_id,
+            ensemble_run_id = collect_task_list[[jj]]@run_id,
+            get_predictions_at_model_level = TRUE,
+            force_ensemble_detail_level = force_ensemble_detail_level,
+            learner = learner,
+            vimp_method = vimp_method,
+            data_set_name = "development",
+            project_id = experiment_data@project_id
+          )
+          
+          # Set file name.
+          evaluate_task <-.set_file_name(
+            object = evaluate_task,
+            file_paths = file_paths
+          )
+          
+          # Make task and associated file names available.
+          data_file_names <- c(data_file_names, evaluate_task@file)
+          evaluate_task_list[[ii]] <- evaluate_task
+          
+          ii <- ii + 1L
+          
+        } else {
+          ..error_reached_unreachable_code("development data are always present.")
+        }
       }
     }
     
@@ -475,7 +618,6 @@ setMethod(
   # Iterate over evaluation tasks and add corresponding models based on ensemble
   # data id and run id.
   for (ii in seq_along(evaluate_task_list)) {
-    
     # Select run tables where the ensemble data and run identifiers appear.
     selected_run_tables <- run_tables[sapply(
       run_tables,
diff --git a/man/dataObject-class.Rd b/man/dataObject-class.Rd
index be446f2f..b80e63c1 100644
--- a/man/dataObject-class.Rd
+++ b/man/dataObject-class.Rd
@@ -25,21 +25,16 @@ information concerning the outcome, such as class levels.}
 
 \item{\code{data_column_info}}{Object containing column information.}
 
-\item{\code{delay_loading}}{logical. Allows delayed loading data, which enables data
-parsing downstream without additional workflow complexity or memory
-utilisation.}
+\item{\code{data_id}}{Data identifier for dataset. Set using internal routines if the
+\code{dataObject} was created from a \code{delayedDataObject}}
 
-\item{\code{perturb_level}}{numeric. This is the perturbation level for data which
-has not been loaded. Used for data retrieval by interacting with the run
-table of the accompanying model.}
+\item{\code{run_id}}{Run identifier for dataset. Set using internal routines if the
+\code{dataObject} was created from a \code{delayedDataObject}}
 
-\item{\code{load_validation}}{logical. This determines which internal data set will
-be loaded. If TRUE, the validation data will be loaded, whereas FALSE loads
-the development data.}
+\item{\code{validation}}{Identifies if validation or development samples were loaded.
+Set using internal routines if the \code{dataObject} was created from a
+\code{delayedDataObject}.}
 
-\item{\code{aggregate_on_load}}{logical. Determines whether data is aggregated after
-loading.}
-
-\item{\code{sample_set_on_load}}{NULL or vector of sample identifiers to be loaded.}
+\item{\code{sample_seed}}{Seed used for creating a bootstrap of the data.}
 }}
 
diff --git a/man/delayedDataObject-class.Rd b/man/delayedDataObject-class.Rd
new file mode 100644
index 00000000..988685f7
--- /dev/null
+++ b/man/delayedDataObject-class.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/FamiliarS4Classes.R
+\docType{class}
+\name{delayedDataObject-class}
+\alias{delayedDataObject-class}
+\title{Data object with delayed loading}
+\description{
+The delayed loading object provides an interface to the backend data. This
+data object is typically used within the evaluation pipeline to load data
+when needed.
+}
+\section{Slots}{
+
+\describe{
+\item{\code{data}}{NULL or data table containing the data. If present (not \code{NULL}),
+data is considered loaded.}
+
+\item{\code{preprocessing_level}}{character indicating the level of pre-processing
+already conducted. \code{"none"} by default.}
+
+\item{\code{outcome_type}}{character, determines the outcome type.}
+
+\item{\code{outcome_info}}{Outcome information object, which contains additional
+information concerning the outcome, such as class levels.}
+
+\item{\code{data_column_info}}{Object containing column information.}
+
+\item{\code{data_id}}{integer. Defines the data_id of the dataset that should be
+loaded.}
+
+\item{\code{run_id}}{integer. Defines the run_id of the dataset that should be load.
+Together with data_id, run_id and validation allows for looking up the
+sample set.}
+
+\item{\code{validation}}{logical. This determines which internal data set will be
+loaded. If TRUE, the validation data will be loaded, whereas FALSE loads
+the development data.}
+
+\item{\code{aggregate_on_load}}{logical. Determines whether data is aggregated after
+loading.}
+
+\item{\code{sample_set_on_load}}{NULL or vector of sample identifiers to be loaded.
+Overrides any \code{sample_seed} that may have been provided.}
+
+\item{\code{defer_to_model_data_and_run_id}}{logical. Determines whether the provided
+data_id and run_id should be used (\code{FALSE}), or data_id and run_id of a
+model (\code{TRUE}).}
+}}
+
diff --git a/tests/testthat/test-task_based_workflow.R b/tests/testthat/test-task_based_workflow.R
index ec05bfd5..b75dfbeb 100644
--- a/tests/testthat/test-task_based_workflow.R
+++ b/tests/testthat/test-task_based_workflow.R
@@ -173,9 +173,11 @@ testthat::test_that("all models are present", {
 
 # Including evaluation ---------------------------------------------------------
 
+data <- familiar:::test_create_small_good_data("binomial")
+
 results <- familiar::summon_familiar(
   data = data,
-  experimental_design = "bs(fs,3)+bs(mb,3)",
+  experimental_design = "bs(fs,3)+bs(mb, 3)",
   vimp_method = "mim",
   learner = "glm_logistic",
   evaluate_top_level_only = FALSE,