[TEST] iterating over groups f 4 subjects in pipeline_execution_test

bclenet · Jan 29, 2024 · 9a10149 · 9a10149
1 parent cce2dfe
commit 9a10149
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 16 deletions.
diff --git a/narps_open/utils/configuration/testing_config.toml b/narps_open/utils/configuration/testing_config.toml
@@ -22,4 +22,5 @@ neurovault_naming = true # true if results files are saved using the neurovault
 [testing]
 
 [testing.pipelines]
+nb_subjects_per_group = 4 # Compute first level analyses by subgroups of N subjects, to avoid lacking of disk and memory
 correlation_thresholds = [0.30, 0.70, 0.79, 0.85, 0.93] # Correlation between reproduced hypotheses files and results, respectively for [20, 40, 60, 80, 108] subjects.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -43,9 +43,13 @@ def test_pipeline_execution(
     TODO : how to keep intermediate files of the low level for the next numbers of subjects ?
         - keep intermediate levels : boolean in PipelineRunner
     """
+    # A list of number of subject to iterate over
+    nb_subjects_subgroup = Configuration()['testing']['pipelines']['nb_subjects_per_group']
+    nb_subjects_list = [s for s in range(nb_subjects_subgroup, nb_subjects, nb_subjects_subgroup)]
+    nb_subjects_list.append(nb_subjects)
+
     # Initialize the pipeline
     runner = PipelineRunner(team_id)
-    runner.nb_subjects = nb_subjects
     runner.pipeline.directories.dataset_dir = Configuration()['directories']['dataset']
     runner.pipeline.directories.results_dir = Configuration()['directories']['reproduced_results']
     runner.pipeline.directories.set_output_dir_with_team_id(team_id)
@@ -55,23 +59,27 @@ def test_pipeline_execution(
     # TODO : this is a workaround
     for _ in range(Configuration()['runner']['nb_trials']):
 
-        # Get missing subjects
-        missing_subjects = set()
-        for file in runner.get_missing_first_level_outputs():
-            subject_id = get_subject_id(file)
-            if subject_id is not None:
-                missing_subjects.add(subject_id)
+        for nb_subjects in nb_subjects_list:
 
-        # Leave if no missing subjects
-        if not missing_subjects:
-            break
+            runner.nb_subjects = nb_subjects
+
+            # Get missing subjects
+            missing_subjects = set()
+            for file in runner.get_missing_first_level_outputs():
+                subject_id = get_subject_id(file)
+                if subject_id is not None:
+                    missing_subjects.add(subject_id)
+
+            # Leave if no missing subjects
+            if not missing_subjects:
+                break
 
-        # Start pipeline
-        runner.subjects = missing_subjects
-        try: # This avoids errors in the workflow to make the test fail
-            runner.start(True, False)
-        except(RuntimeError) as err:
-            print('RuntimeError: ', err)
+            # Start pipeline
+            runner.subjects = missing_subjects
+            try: # This avoids errors in the workflow to make the test fail
+                runner.start(True, False)
+            except(RuntimeError) as err:
+                print('RuntimeError: ', err)
 
     # Check missing files for the last time
     missing_files = runner.get_missing_first_level_outputs()