From 6c5c27abbe64c06b4c6eb80c632f1a6771264d1c Mon Sep 17 00:00:00 2001
From: Alex Zwanenburg <alexander.zwanenburg@nct-dresden.de>
Date: Tue, 29 Oct 2024 14:46:38 +0100
Subject: [PATCH] Check whether hybrid detail level can be used based on sample
 sizes.

---
 R/Evaluation.R | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/R/Evaluation.R b/R/Evaluation.R
index 7887f109..9a1dc90c 100644
--- a/R/Evaluation.R
+++ b/R/Evaluation.R
@@ -112,6 +112,7 @@ run_evaluation <- function(
       project_list = project_list,
       only_pooling = only_pooling
     ),
+    fill = TRUE,
     use.names = TRUE
   )
 
@@ -244,7 +245,14 @@ run_evaluation <- function(
       "pool_data_id", "pool_run_id", "pool_perturb_level", "is_validation"
     ))
   ])
-    
+  
+  # Specify detail_level.
+  detail_level <- settings$eval$detail_level
+  
+  # If there are too few samples for hybrid analysis -- computing data from
+  # bootstraps of data for single models -- force detail_level to ensemble.
+  if (any(data_sets$n_hybrid_samples <= 10L)) detail_level <- "ensemble"
+   
   if (!is_empty(new_data_table)) {
     
     # Select unique entries.
@@ -291,6 +299,7 @@ run_evaluation <- function(
       MoreArgs = list(
         "cl" = cl_inner,
         "dir_path" = file_paths$fam_data_dir,
+        "detail_level" = detail_level,
         "message_indent" = message_indent + 1L,
         "verbose" = verbose
       )
@@ -361,6 +370,18 @@ run_evaluation <- function(
   # Suppress NOTES due to non-standard evaluation in data.table
   perturb_level <- data_id <- run_id <- has_validation <- NULL
   pool_perturb_level <- data_perturb_level <- can_pre_process <- NULL
+  model_run_id <- is_validation <- NULL
+  
+  # Helper function for determining how many samples are seen by a single model
+  # during evaluation. Some evaluation steps may fail (gracefully) if they see 
+  # too few samples.
+  ..get_n_hybrid_samples <- function(run_id, is_validation, run_list) {
+    if (is_validation) {
+      return(nrow(run_list[[as.character(run_id)]]$valid_samples))
+    } else {
+      return(nrow(run_list[[as.character(run_id)]]$train_samples))
+    }
+  }
   
   # Create empty ensemble run list
   ensemble_run_list <- list()
@@ -561,7 +582,22 @@ run_evaluation <- function(
       )
     }
   }
-
+  
+  # Set the number of samples evaluated during hybrid evaluations.
+  for (data_set in data_sets) {
+    if (nrow(data_set) == 0L) next
+    
+    data_set[
+      ,
+      "n_hybrid_samples" := ..get_n_hybrid_samples(
+        run_id = model_run_id,
+        is_validation = is_validation,
+        run_list = run_list
+      ),
+      by = 1L:nrow(data_set)
+    ]
+  }
+  
   return(data_sets)
 }
 
@@ -616,6 +652,7 @@ run_evaluation <- function(
     cl = NULL,
     pool_data_table,
     dir_path,
+    detail_level = waiver(),
     message_indent = 0L,
     verbose = TRUE
 ) {
@@ -659,7 +696,7 @@ run_evaluation <- function(
     time_max = settings$eval$time_max,
     evaluation_times = settings$eval$eval_times,
     sample_limit = settings$eval$sample_limit,
-    detail_level = settings$eval$detail_level,
+    detail_level = detail_level,
     estimation_type = settings$eval$estimation_type,
     aggregate_results = settings$eval$aggregate_results,
     aggregation_method = settings$eval$aggregation,