diff --git a/DESCRIPTION b/DESCRIPTION
index 7d7bfe60..4c8608c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -199,6 +199,7 @@ Collate:
     'RankStabilityAggregation.R'
     'SocketServer.R'
     'StringUtilities.R'
+    'TaskEvaluate.R'
     'TaskFeatureInfo.R'
     'TaskLearn.R'
     'TaskLearnerHyperparameters.R'
diff --git a/NEWS.md b/NEWS.md
index 123128a5..359eb367 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,8 +2,12 @@
 
 ## Breaking changes
 
-- Naming and on-disk location of variable importance tables and models has 
-  changed. These are no longer nested to limit path lengths.
+- Naming and on-disk location of variable importance tables, models, evaluated
+  datasets and collections, have changed. These are no longer nested to reduce 
+  path lengths and avoid issues due to long path lengths, particularly on 
+  Windows OS.
+
+- Ensembles are now no longer explicitly stored, but are formed at run-time.
 
 ## Major changes
 
@@ -37,6 +41,13 @@
 
   - `parallel_feature_selection` was renamed to `parallel_vimp`.
 
+- It is now possible to build models without explicitly defining a variable 
+  importance (feature selection) step. For example, 
+  `experimental_design = mb + ev` is now valid and will result in training of a
+  single model on the development dataset with subsequent evaluation on an
+  external dataset. This is realised by using variable importance data obtained
+  during hyperparameter optimisation.
+
 ## Minor changes
 
 - The `iteration_seed` configuration parameter was added to provide a fixed seed 
@@ -73,7 +84,7 @@
   lead to too few samples to allow for assessment. This affected 
   Leave-One-Out-Cross-Validation (LOOCV) schemes in particular.
 
-## Bug fixes
+## Fixes
 
 - Fixed errors when creating feature or similarity plots caused by sample or 
   feature names matching internal column names.
@@ -83,6 +94,9 @@
 - Variable importance methods and outcome information objects were missing a 
   familiar version attribute, which has now been added to ensure future 
   compatibility.
+  
+- Some vignettes referred to `experiment_design` where `experimental_design` was
+  intended.
 
 # Version 1.5.0 (Whole Whale)
 
diff --git a/R/ExperimentSetup.R b/R/ExperimentSetup.R
index abff3a07..5721cfc0 100644
--- a/R/ExperimentSetup.R
+++ b/R/ExperimentSetup.R
@@ -201,6 +201,18 @@ extract_experimental_setup <- function(
     section_table[main_data_id == data_id, "n_runs" := length(iteration_list[[as.character(data_id)]]$run)]
   }
   
+  # Set the (max) number of available validation instances.
+  for (data_id in section_table$main_data_id) {
+    section_table[main_data_id == data_id, "max_validation_instances" := max(sapply(
+      iteration_list[[as.character(data_id)]]$run,
+      function(x) {
+        if (is_empty(x$valid_samples)) return(0L)
+        
+        return(nrow(x$valid_samples))
+      }
+    ))]
+  }
+
   return(section_table)
 }
 
diff --git a/R/Familiar.R b/R/Familiar.R
index 0835dbed..e338d23d 100644
--- a/R/Familiar.R
+++ b/R/Familiar.R
@@ -407,6 +407,7 @@ summon_familiar <- function(
       optimisation_determine_vimp = settings$hpo$hpo_determine_vimp,
       vimp_methods = settings$vimp$vimp_methods,
       learners = settings$mb$learners,
+      pool_only = settings$eval$pool_only,
       file_paths = file_paths
     )
   }
diff --git a/R/TaskEvaluate.R b/R/TaskEvaluate.R
index e6a43d60..50107d92 100644
--- a/R/TaskEvaluate.R
+++ b/R/TaskEvaluate.R
@@ -16,6 +16,124 @@ setClass(
 )
 
 
+# .set_file_name (collection task) ---------------------------------------------
+setMethod(
+  ".set_file_name",
+  signature(object = "familiarTaskCollect"),
+  function(object, file_paths = NULL) {
+    if (is.null(file_paths)) return(object)
+    
+    name <- NULL
+    if (object@data_id == 1L && object@run_id == 1L) {
+      name <- "pooled"
+    }
+    
+    # Generate file name of the model.
+    object@file <- get_object_file_name(
+      object_type = "familiarCollection",
+      data_id = object@data_id,
+      run_id = object@run_id,
+      name = name,
+      project_id = object@project_id,
+      dir_path = file_paths$fam_coll_dir
+    )
+    
+    return(object)
+  }
+)
+
+
+
+# .get_task_descriptor (collection task) ---------------------------------------
+setMethod(
+  ".get_task_descriptor",
+  signature(object = "familiarTaskCollect"),
+  function(object, ...) {
+    return(paste0(
+      object@task_name, "_",
+      object@data_id, "_", 
+      object@run_id
+    ))
+  }
+)
+
+
+
+
+
+# familiarTaskEvaluate ---------------------------------------------------------
+setClass(
+  "familiarTaskEvaluate",
+  contains = "familiarTask",
+  slots = list(
+    "validation" = "logical",
+    "ensemble_data_id" = "integer",
+    "ensemble_run_id" = "integer",
+    "vimp_method" = "character",
+    "learner" = "character",
+    "data_set_name" = "character"
+  ),
+  prototype = methods::prototype(
+    validation = NA, 
+    # Whereas data_id describes where the data comes from, the ensemble_data_id
+    # describes where ensembles are formed.
+    ensemble_data_id = NA_integer_,
+    ensemble_run_id = NA_integer_,
+    vimp_method = NA_character_,
+    learner = NA_character_,
+    data_set_name = NA_character_,
+    task_name = "evaluate"
+  )
+)
+
+
+
+# .set_file_name (evaluation task) ---------------------------------------------
+setMethod(
+  ".set_file_name",
+  signature(object = "familiarTaskEvaluate"),
+  function(object, file_paths = NULL) {
+    if (is.null(file_paths)) return(object)
+    
+    # Generate file name of the model.
+    object@file <- get_object_file_name(
+      object_type = "familiarData",
+      data_id = object@data_id,
+      run_id = object@run_id,
+      learner = object@learner,
+      vimp_method = object@vimp_method,
+      ensemble_data_id = object@ensemble_data_id,
+      ensemble_run_id = object@ensemble_run_id,
+      name = object@data_set_name,
+      project_id = object@project_id,
+      dir_path = file_paths$fam_data_dir
+    )
+    
+    return(object)
+  }
+)
+
+
+
+# .get_task_descriptor (evaluation task) ---------------------------------------
+setMethod(
+  ".get_task_descriptor",
+  signature(object = "familiarTaskEvaluate"),
+  function(object, ...) {
+    return(paste0(
+      object@task_name, "_",
+      object@data_id, "_", 
+      object@run_id, "_", 
+      object@vimp_method, "_", 
+      object@learner, "_",
+      object@ensemble_data, "_",
+      object@ensemble_run_id, "_",
+      object@data_set_name
+    ))
+  }
+)
+
+
 
 .generate_evaluation_tasks <- function(
   experiment_data,
@@ -42,50 +160,182 @@ setClass(
     )
   )
   
+  # Find the data_id related to ensembling of models.
+  train_data_id <- experiment_data@experiment_setup[train == TRUE, ]$main_data_id[1L]
+  if (is_empty(train_data_id)) return(NULL)
+  
+  # Determine which parts of the experiment can be used for internal validation..
+  run_table <- .get_run_table_from_experiment_setup(
+    data_id = train_data_id,
+    experiment_setup = experiment_data@experiment_setup
+  )
+  
+  # Internal validation could exist at the level where pre-processing for
+  # training is allowed.
+  internal_validation_data_id <- tail(run_table[main_data_id <= train_data_id & can_pre_process == TRUE], n = 1L)$main_data_id[1L]
+  
   if (!pool_only) {
     # Determine the collections at the last experimental level that can
     # pre-process and is part of the model-building branch.
+    collection_run_ids <- seq_len(run_table[main_data_id == internal_validation_data_id]$n_runs[1L])
     
-    # Find the data_id related to training.
-    train_data_id <- experiment_data@experiment_setup[train == TRUE, ]$main_data_id[1L]
-    if (is_empty(train_data_id)) return(NULL)
-    
-    # Determine which parts of the experimental setup are used by training.
-    run_table <- .get_run_table_from_experiment_setup(
-      data_id = train_data_id,
-      experiment_setup = experiment_data@experiment_setup
-    )
-    
-    # Find the data_id and run_ids for preprocessing.
-    collection_data_id <- tail(run_table[main_data_id <= train_data_id & can_pre_process == TRUE], n = 1L)$main_data_id[1L]
-    collection_run_ids <- seq_len(run_table[main_data_id == collection_data_id]$n_runs[1L])
-    
+    ii <- 2L
     for (run_id in collection_run_ids) {
-      collect_task_list <- c(
-        collect_task_list,
-        methods::new(
-          "familiarTaskCollect",
-          data_id = collection_data_id,
-          run_id = run_id,
-          project_id = experiment_data@project_id
-        )
+      collect_task_list[[ii]] <- methods::new(
+        "familiarTaskCollect",
+        data_id = internal_validation_data_id,
+        run_id = run_id,
+        project_id = experiment_data@project_id
       )
+      
+      ii <- ii + 1L
     }
   }
   
   # evaluation tasks -----------------------------------------------------------
   
-  # Use collection tasks to set up the evaluation tasks, including for internal validation.
+  # Use collection tasks to set up the evaluation tasks, including for internal
+  # validation.
+  evaluate_task_list <- list()
+  ii <- 1L
+  for (jj in seq_along(collect_task_list)) {
+    data_file_names <- NULL
+    
+    for (learner in learners) {
+      for (vimp_method in vimp_methods) {
+        
+        ## external validation -------------------------------------------------
+        
+        # External validation: the top level has associated validation data.
+        if (experiment_data@experiment_setup[main_data_id == 1L]$max_validation_instances > 0L) {
+          # Initialise task.
+          evaluate_task <- methods::new(
+            "familiarTaskEvaluate",
+            data_id = 1L,
+            run_id = 1L,
+            validation = TRUE,
+            ensemble_data_id = collect_task_list[[jj]]@data_id,
+            ensemble_run_id = collect_task_list[[jj]]@run_id,
+            learner = learner,
+            vimp_method = vimp_method,
+            data_set_name = "external_validation",
+            project_id = experiment_data@project_id
+          )
+          
+          # Set file name.
+          evaluate_task <-.set_file_name(
+            object = evaluate_task,
+            file_paths = file_paths
+          )
+          
+          # Make task and associated file names available.
+          data_file_names <- c(data_file_names, evaluate_task@file)
+          evaluate_task_list[[ii]] <- evaluate_task
+          
+          ii <- ii + 1L
+        }
+        
+        # internal validation --------------------------------------------------
+        
+        # Internal validation: the ensembling level has associated validation
+        # data.
+        if (experiment_data@experiment_setup[main_data_id == internal_validation_data_id]$max_validation_instances > 0L) {
+          # Initialise task.
+          evaluate_task <- methods::new(
+            "familiarTaskEvaluate",
+            data_id = collect_task_list[[jj]]@data_id,
+            run_id = collect_task_list[[jj]]@run_id,
+            validation = TRUE,
+            ensemble_data_id = collect_task_list[[jj]]@data_id,
+            ensemble_run_id = collect_task_list[[jj]]@run_id,
+            learner = learner,
+            vimp_method = vimp_method,
+            data_set_name = "internal_validation",
+            project_id = experiment_data@project_id
+          )
+          
+          # Set file name.
+          evaluate_task <-.set_file_name(
+            object = evaluate_task,
+            file_paths = file_paths
+          )
+          
+          # Make task and associated file names available.
+          data_file_names <- c(data_file_names, evaluate_task@file)
+          evaluate_task_list[[ii]] <- evaluate_task
+          
+          ii <- ii + 1L
+        }
+        
+        # development ----------------------------------------------------------
+        # Development at the ensembling level.
+        evaluate_task <- methods::new(
+          "familiarTaskEvaluate",
+          data_id = collect_task_list[[jj]]@data_id,
+          run_id = collect_task_list[[jj]]@run_id,
+          validation = FALSE,
+          ensemble_data_id = collect_task_list[[jj]]@data_id,
+          ensemble_run_id = collect_task_list[[jj]]@run_id,
+          learner = learner,
+          vimp_method = vimp_method,
+          data_set_name = "development",
+          project_id = experiment_data@project_id
+        )
+        
+        # Set file name.
+        evaluate_task <-.set_file_name(
+          object = evaluate_task,
+          file_paths = file_paths
+        )
+        
+        # Make task and associated file names available.
+        data_file_names <- c(data_file_names, evaluate_task@file)
+        evaluate_task_list[[ii]] <- evaluate_task
+        
+        ii <- ii + 1L
+      }
+    }
+    
+    # Update collection tasks by adding file paths to 
+    collect_task_list[[jj]]@data_file <- data_file_names
+    collect_task_list[[jj]] <- .set_file_name(
+      object = collect_task_list[[jj]],
+      file_paths = file_paths
+    )
+  }
   
+  # ensembles ------------------------------------------------------------------
+  
+  # Obtain run tables related to models.
+  train_data_id <- experiment_data@experiment_setup[train == TRUE, ]$main_data_id[1L]
+  if (is_empty(train_data_id)) return(NULL)
+  
+  run_tables <- .collect_run_tables(iteration_list = experiment_data@iteration_list)
+  run_tables <- run_tables[sapply(
+    run_tables,
+    function(x, data_id) {
+      return (tail(x, n = 1L)$data_id == data_id)
+    },
+    data_id = train_data_id
+  )]
+  browser()
+  
+  # Iterate over evaluation tasks and add corresponding models based on ensemble
+  # data id and run id.
+  for (ii in seq_along(evaluate_task_list)) {
+    ensemble_data_id <- evaluate_task_list[[ii]]@ensemble_data_id
+    ensemble_run_id <- evaluate_task_list[[ii]]@ensemble_run_id
+    
+    
+  }
+  
+  browser()
   # train and variable importance tasks ----------------------------------------
-  task_list <- c(
-    task_list,
-    .generate_learner_tasks(
-      experiment_data = experiment_data,
-      vimp_methods = vimp_methods,
-      file_paths = file_paths,
-      skip_existing = skip_existing,
-      ...
-    )
+  task_list <- .generate_trainer_tasks(
+    experiment_data = experiment_data,
+    vimp_methods = vimp_methods,
+    file_paths = file_paths,
+    skip_existing = skip_existing,
+    ...
   )
 }
diff --git a/R/TaskLearn.R b/R/TaskLearn.R
index 06aafc2b..970e1880 100644
--- a/R/TaskLearn.R
+++ b/R/TaskLearn.R
@@ -35,7 +35,7 @@ setMethod(
   function(object, file_paths = NULL) {
     if (is.null(file_paths)) return(object)
     
-    # Generate file name of variable importance table
+    # Generate file name of the model.
     object@file <- get_object_file_name(
       object_type = "familiarModel",
       data_id = object@data_id,
diff --git a/R/TaskMain.R b/R/TaskMain.R
index 4b6f4197..5755faad 100644
--- a/R/TaskMain.R
+++ b/R/TaskMain.R
@@ -229,7 +229,6 @@ setMethod(
 
 
 
-
 .generate_learner_data_preprocessing_tasks <- function(
     experiment_data,
     file_paths
@@ -300,15 +299,6 @@ setMethod(
 
 
 
-.generate_evaluation_tasks <- function(
-    file_paths,
-    project_id
-) {
-  
-}
-
-
-
 .sort_tasks <- function(task_list) {
   # Select unique tasks.
   duplicate_tasks <- duplicated(sapply(task_list, FUN = .get_task_descriptor))
@@ -324,7 +314,6 @@ setMethod(
     "vimp" = task_list[task_class == "familiarTaskVimp"],
     "hyperparameters_learner" = task_list[task_class == "familiarTaskLearnerHyperparameters"],
     "train" = task_list[task_class == "familiarTaskTrain"],
-    "ensemble" = task_list[task_class == "familiarTaskEnsemble"],
     "evaluate" = task_list[task_class == "familiarTaskEvaluate"],
     "collect" = task_list[task_class == "familiarTaskCollect"]
   )
diff --git a/R/Utilities.R b/R/Utilities.R
index 18ddf733..5f52c06f 100644
--- a/R/Utilities.R
+++ b/R/Utilities.R
@@ -757,11 +757,10 @@ get_object_file_name <- function(
     run_id,
     learner = NULL,
     vimp_method = NULL,
-    pool_data_id = NULL,
-    pool_run_id = NULL,
+    ensemble_data_id = NULL,
+    ensemble_run_id = NULL,
+    name = NULL,
     object_type,
-    is_ensemble = NULL,
-    is_validation = NULL,
     with_extension = TRUE,
     dir_path = NULL
 ) {
@@ -782,29 +781,13 @@ get_object_file_name <- function(
       "_model"
     )
     
-  } else if (object_type == "familiarEnsemble") {
-    # For familiarEnsemble objects
-
-    if (is.null(learner) || is.null(vimp_method) || is.null(project_id) || is.null(is_ensemble)) {
-      ..error_reached_unreachable_code("missing arguments")
-    }
-
-    output_str <- paste0(
-      project_id, "_", 
-      learner, "_", 
-      vimp_method, "_", 
-      data_id, "_", 
-      run_id, "_", 
-      ifelse(is_ensemble, "ensemble", "pool")
-    )
-    
   } else if (object_type == "familiarData") {
     # For familiarData objects
 
     if (
       is.null(learner) || is.null(vimp_method) || is.null(project_id) ||
-      is.null(is_ensemble) || is.null(is_validation) || is.null(pool_data_id) ||
-      is.null(pool_run_id)
+      is.null(data_id) || is.null(run_id) ||
+      is.null(ensemble_data_id) || is.null(ensemble_run_id) || is.null(name)
     ) {
       ..error_reached_unreachable_code("missing arguments")
     }
@@ -814,13 +797,36 @@ get_object_file_name <- function(
       learner, "_", 
       vimp_method, "_", 
       data_id, "_", 
-      run_id, "_",
-      ifelse(is_ensemble, "ensemble", "pool"), "_",
-      pool_data_id, "_",
-      pool_run_id, "_",
-      ifelse(is_validation, "validation", "development"), "_data"
+      run_id, "_ensemble_",
+      ensemble_data_id, "_",
+      ensemble_run_id, "_",
+      name, "_data"
     )
   
+  } else if (object_type == "familiarCollection") {
+    
+    if (
+      is.null(project_id) || ((is.null(data_id) || is.null(run_id)) && is.null(name))
+    ) {
+      ..error_reached_unreachable_code("missing arguments")
+    }
+    
+    if (!is.null(name)) {
+      output_str <- paste0(
+        project_id, "_",
+        name,
+        "_collection"
+      )
+      
+    } else {
+      output_str <- paste0(
+        project_id, "_",
+        data_id, "_",
+        run_id,
+        "_collection"
+      )
+    }
+    
   } else if (object_type == "vimpTable") {
     
     if (is.null(vimp_method) || is.null(project_id)) {
diff --git a/tests/testthat/test-task_based_workflow.R b/tests/testthat/test-task_based_workflow.R
index 5cd118f4..ec05bfd5 100644
--- a/tests/testthat/test-task_based_workflow.R
+++ b/tests/testthat/test-task_based_workflow.R
@@ -169,3 +169,22 @@ testthat::test_that("all models are present", {
   testthat::expect_true(all(sapply(model, familiar:::model_is_trained)))
   testthat::expect_false(any(sapply(model, function(x) (is.null(x@vimp_table)))))
 })
+
+
+# Including evaluation ---------------------------------------------------------
+
+results <- familiar::summon_familiar(
+  data = data,
+  experimental_design = "bs(fs,3)+bs(mb,3)",
+  vimp_method = "mim",
+  learner = "glm_logistic",
+  evaluate_top_level_only = FALSE,
+  outcome_type = "binomial",
+  outcome_column = "outcome",
+  batch_id_column = "batch_id",
+  sample_id_column = "sample_id",
+  series_id_column = "series_id",
+  class_levels = c("red", "green"),
+  verbose = verbose,
+  parallel = FALSE
+)