From 379074ab1555e0152729098c4f1e23ba85fd40f0 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Thu, 12 Oct 2023 16:47:43 -0500
Subject: [PATCH 01/13] Start working on nnunet stage

---
 .../project/stages/extract_nnunet.py          | 187 ++++++++++++++++++
 .../project/stages/generate_report.py         |   7 +-
 2 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 mlcubes/data_preparation/project/stages/extract_nnunet.py

diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
new file mode 100644
index 00000000..cd551629
--- /dev/null
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -0,0 +1,187 @@
+from typing import Union, List, Tuple
+from tqdm import tqdm
+import pandas as pd
+import os
+from os.path import realpath, dirname, join
+import shutil
+import traceback
+
+from .row_stage import RowStage
+from .PrepareDataset import Preparator, FINAL_FOLDER
+from .utils import update_row_with_dict, get_id_tp, MockTqdm
+
+MODALITY_MAPPING = {
+    "t1c": "t1c",
+    "t1ce": "t1c",
+    "t1": "t1n",
+    "t1n": "t1n",
+    "t2": "t2w",
+    "t2w": "t2w",
+    "t2f": "t2f",
+    "flair": "t2f"
+}
+
+class Extract(RowStage):
+    def __init__(
+        self,
+        data_csv: str,
+        out_path: str,
+        subpath: str,
+        prev_stage_path: str,
+        prev_subpath: str,
+        # pbar: tqdm,
+        func_name: str,
+        status_code: int,
+    ):
+        self.data_csv = data_csv
+        self.out_path = out_path
+        self.subpath = subpath
+        self.data_subpath = FINAL_FOLDER
+        self.prev_path = prev_stage_path
+        self.prev_subpath = prev_subpath
+        os.makedirs(self.out_path, exist_ok=True)
+        self.prep = Preparator(data_csv, out_path, "BraTSPipeline")
+        self.func_name = func_name
+        self.func = getattr(self.prep, func_name)
+        self.pbar = tqdm()
+        self.failed = False
+        self.exception = None
+        self.status_code = status_code
+
+    def get_name(self) -> str:
+        return self.func_name.replace("_", " ").capitalize()
+
+    def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool:
+        """Determine if case at given index needs to be converted to NIfTI
+
+        Args:
+            index (Union[str, int]): Case index, as used by the report dataframe
+            report (pd.DataFrame): Report Dataframe for providing additional context
+
+        Returns:
+            bool: Wether this stage could be executed for the given case
+        """
+        prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
+        return all([os.path.exists(path) for path in prev_paths])
+
+    def execute(
+        self, index: Union[str, int], report: pd.DataFrame
+    ) -> Tuple[pd.DataFrame, bool]:
+        """Runs the pretrained nnUNet models for tumor segmentation
+
+        Args:
+            index (Union[str, int]): case index, as used by the report
+            report (pd.DataFrame): DataFrame containing the current state of the preparation flow
+
+        Returns:
+            pd.DataFrame: Updated report dataframe
+        """
+        self.__prepare_exec()
+        self.__copy_case(index)
+        self.__process_case(index)
+        report, success = self.__update_state(index, report)
+        self.prep.write()
+
+        return report, success
+
+    def __prepare_exec(self):
+
+        # Reset the file contents for errors
+        open(self.prep.stderr_log, "w").close()
+
+        # Update the out dataframes to current state
+        self.prep.read()
+
+    def __get_paths(self, index: Union[str, int], path: str, subpath: str):
+        id, tp = get_id_tp(index)
+        data_path = os.path.join(path, self.data_subpath, id, tp)
+        out_path = os.path.join(path, subpath, id, tp)
+        return data_path, out_path
+
+    def __copy_case(self, index: Union[str, int]):
+        prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
+        copy_paths = self.__get_paths(index, self.out_path, self.prev_subpath)
+        for prev, copy in zip(prev_paths, copy_paths):
+            shutil.copytree(prev, copy, dirs_exist_ok=True)
+
+    def __get_models(self):
+        rel_models_path = "../models/nnUNet_trained_models/nnUNet/3d_fullres"
+        models_path = realpath(join(dirname(__file__), rel_models_path))
+        return os.listdir(models_path)
+
+    def __get_mod_order(self, model):
+        rel_orders_path = "../models/nnUNet_modality_order"
+        order_path = realpath(join(dirname(__file__), rel_orders_path, model, "order"))
+        with open(order_path, "r") as f:
+            order_str = f.readline()
+        # remove 'order = ' from the splitted list
+        modalities = order_str.split()[2:]
+        modalities = [MODALITY_MAPPING[mod] for mod in modalities]
+        return modalities
+        
+
+    def __prepare_case(self, path, id, tp, order):
+        pass
+    
+
+    def __process_case(self, index: Union[str, int]):
+        id, tp = get_id_tp(index)
+        # TODO: identify all the nnunet models
+        models = self.__get_models()
+        for model in models:
+            # TODO: get the required order for modalities
+            order = self.__get_mod_order(model)
+            # TODO: create a temporary folder with the renamed modalities
+            tmp_data_path = self.__prepare_case(self.out_path, id, tp, order)
+            # TODO: run model with specified inputs and outputs
+            run_model(model, tmp_data_path, tmp_out_path)
+            # get final .nii.gz file
+            finalize_pred(tmp_out_path)
+            #cleanup
+            cleanup_tmp_paths(tmp_data_path, tmp_out_path)
+
+
+    def __update_state(
+        self, index: Union[str, int], report: pd.DataFrame
+    ) -> Tuple[pd.DataFrame, bool]:
+        if self.failed:
+            del_paths = self.__get_paths(index, self.out_path, self.subpath)
+            report, success = self.__report_failure(index, report)
+        else:
+            del_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
+            report, success = self.__report_success(index, report)
+
+        for del_path in del_paths:
+            shutil.rmtree(del_path, ignore_errors=True)
+
+        return report, success
+
+    def __report_success(
+        self, index: Union[str, int], report: pd.DataFrame
+    ) -> Tuple[pd.DataFrame, bool]:
+        data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath)
+        report_data = {
+            "status": self.status_code,
+            "status_name": f"{self.func_name.upper()}_FINISHED",
+            "comment": "",
+            "data_path": data_path,
+            "labels_path": labels_path,
+        }
+        update_row_with_dict(report, report_data, index)
+        return report, True
+
+    def __report_failure(
+        self, index: Union[str, int], report: pd.DataFrame
+    ) -> Tuple[pd.DataFrame, bool]:
+        prev_data_path, prev_labels_path = self.__get_paths(index, self.prev_path, self.prev_subpath)
+        msg = f"{str(self.exception)}: {self.traceback}"
+
+        report_data = {
+            "status": -self.status_code,
+            "status_name": f"{self.func_name.upper()}_FAILED",
+            "comment": msg,
+            "data_path": prev_data_path,
+            "labels_path": prev_labels_path,
+        }
+        update_row_with_dict(report, report_data, index)
+        return report, False
\ No newline at end of file
diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py
index e666edaf..48d3e12c 100644
--- a/mlcubes/data_preparation/project/stages/generate_report.py
+++ b/mlcubes/data_preparation/project/stages/generate_report.py
@@ -94,12 +94,9 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]:
                     if report.loc[index]["input_hash"] == input_hash:
                         continue
 
-                    shutil.rmtree(out_tp_path, ignore_errors=True)
-                    shutil.copytree(in_tp_path, out_tp_path)
                     report = report.drop(index)
-                else:
-                    # New case not identified by the report. Add it
-                    shutil.copytree(in_tp_path, out_tp_path)
+                shutil.rmtree(out_tp_path, ignore_errors=True)
+                shutil.copytree(in_tp_path, out_tp_path)
 
                 data = {
                     "status": self.status_code,

From 957ca68effdfa7b0b3b8a3198625dab3b98d9d32 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Fri, 13 Oct 2023 15:34:07 -0500
Subject: [PATCH 02/13] Implement extract nnunet

---
 Dockerfile                                    | 10 +++
 mlcubes/data_preparation/mlcube/mlcube.yaml   |  2 +-
 mlcubes/data_preparation/project/prepare.py   |  5 +-
 .../project/stages/extract_nnunet.py          | 74 ++++++++++++++-----
 4 files changed, 68 insertions(+), 23 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 128d2fad..a64a3cdf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -83,6 +83,16 @@ RUN cp -R /Front-End/bin/install/appdir/usr/bin/data_prep_models /project/stages
 # Hotfix: install more recent version of GaNDLF for metrics generation
 RUN pip install git+https://github.com/mlcommons/GaNDLF@616b37bafad8f89d5c816a88f44fa30470601311
 
+RUN pip install torch torchvision
+
+RUN pip install git+https://github.com/MIC-DKFZ/nnUNet.git@nnunetv1
+
+RUN mkdir /nnUNet_raw_data_base && mkdir /nnUNet_preprocessed
+
+ENV nnUNet_raw_data_base="/nnUNet_raw_data_base"
+ENV nnUNet_preprocessed="/nnUNet_preprocessed"
+ENV RESULTS_FOLDER="/project/models/nnUNet_trained_models"
+
 COPY ./mlcubes/data_preparation/project /project
 
 ENTRYPOINT ["python", "/project/mlcube.py"]
diff --git a/mlcubes/data_preparation/mlcube/mlcube.yaml b/mlcubes/data_preparation/mlcube/mlcube.yaml
index 518a20d6..44386677 100644
--- a/mlcubes/data_preparation/mlcube/mlcube.yaml
+++ b/mlcubes/data_preparation/mlcube/mlcube.yaml
@@ -8,7 +8,7 @@ platform:
 
 docker:
   # Image name
-  image: mlcommons/rano-data-prep:latest
+  image: mlcommons/rano-data-prep:nnunet
   # Docker build context relative to $MLCUBE_ROOT. Default is `build`.
   build_context: "../project"
   # Docker file name within docker build context, default is `Dockerfile`.
diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py
index 3003cfb5..f5ab4fde 100644
--- a/mlcubes/data_preparation/project/prepare.py
+++ b/mlcubes/data_preparation/project/prepare.py
@@ -7,6 +7,7 @@
 from stages.get_csv import AddToCSV
 from stages.nifti_transform import NIfTITransform
 from stages.extract import Extract
+from stages.extract_nnunet import ExtractNnUNet
 from stages.manual import ManualStage
 from stages.comparison import SegmentationComparisonStage
 from stages.confirm import ConfirmStage
@@ -104,14 +105,12 @@ def init_pipeline(args):
         "extract_brain",
         3,
     )
-    tumor_extract_proc = Extract(
+    tumor_extract_proc = ExtractNnUNet(
         out_data_csv,
         tumor_data_out,
         TUMOR_MASK_FOLDER,
         brain_data_out,
         INTERIM_FOLDER,
-        # loop,
-        "extract_tumor",
         4,
     )
     manual_proc = ManualStage(out_data_csv, tumor_data_out, tumor_data_out, backup_out)
diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
index cd551629..8420d82f 100644
--- a/mlcubes/data_preparation/project/stages/extract_nnunet.py
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -5,6 +5,8 @@
 from os.path import realpath, dirname, join
 import shutil
 import traceback
+import time
+import subprocess
 
 from .row_stage import RowStage
 from .PrepareDataset import Preparator, FINAL_FOLDER
@@ -21,7 +23,7 @@
     "flair": "t2f"
 }
 
-class Extract(RowStage):
+class ExtractNnUNet(RowStage):
     def __init__(
         self,
         data_csv: str,
@@ -29,8 +31,6 @@ def __init__(
         subpath: str,
         prev_stage_path: str,
         prev_subpath: str,
-        # pbar: tqdm,
-        func_name: str,
         status_code: int,
     ):
         self.data_csv = data_csv
@@ -41,15 +41,13 @@ def __init__(
         self.prev_subpath = prev_subpath
         os.makedirs(self.out_path, exist_ok=True)
         self.prep = Preparator(data_csv, out_path, "BraTSPipeline")
-        self.func_name = func_name
-        self.func = getattr(self.prep, func_name)
         self.pbar = tqdm()
         self.failed = False
         self.exception = None
         self.status_code = status_code
 
     def get_name(self) -> str:
-        return self.func_name.replace("_", " ").capitalize()
+        return "nnUNet Tumor Extraction"
 
     def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool:
         """Determine if case at given index needs to be converted to NIfTI
@@ -121,24 +119,62 @@ def __get_mod_order(self, model):
         
 
     def __prepare_case(self, path, id, tp, order):
-        pass
+        tmp_subject = f"{id}-{tp}"
+        tmp_path = os.path.join(path, "tmp-data")
+        tmp_subject_path = os.path.join(tmp_path, tmp_subject)
+        tmp_out_path = os.path.join(path, "tmp-out")
+        shutil.rmtree(tmp_path, ignore_errors=True)
+        shutil.rmtree(tmp_out_path, ignore_errors=True)
+        os.makedirs(tmp_subject_path)
+        os.makedirs(tmp_out_path)
+        in_modalities_path = os.path.join(path, "DataForFeTS", id, tp)
+        for modality_file in os.listdir(in_modalities_path):
+            if not modality_file.endswith(".nii.gz"):
+                continue
+            modality = modality_file[:-7].split("_")[-1]
+            norm_mod = MODALITY_MAPPING[modality]
+            mod_idx = order.index(norm_mod)
+            mod_idx = str(mod_idx).zfill(4)
+
+            out_modality_file = f"{tmp_subject}_{mod_idx}.nii.gz"
+            in_file = os.path.join(in_modalities_path, modality_file)
+            out_file = os.path.join(tmp_subject_path, out_modality_file)
+            shutil.copyfile(in_file, out_file)
+            print(out_file)
+
+        return tmp_subject_path, tmp_out_path
     
+    def __run_model(self, model, data_path, out_path):
+        # models are named Task<ID>_..., where <ID> is always 3 numbers
+        task_id = model[4:7]
+        cmd = f"nnUNet_predict -i {data_path} -o {out_path} -t {task_id} -f all"
+        print(cmd)
+        print(os.listdir(data_path))
+        start = time.time()
+        subprocess.call(cmd, shell=True)
+        end = time.time()
+        total_time = (end - start)
+        print(f"Total time elapsed is {total_time} seconds")
+
+    def __finalize_pred(self, tmp_out_path, out_path, id, tp, model_idx):
+        # We assume there's only one file in out_path
+        pred = os.listdir(tmp_out_path)[0]
+        pred_filepath = os.path.join(tmp_out_path, pred)
+        out_pred_path = os.path.join(out_path, "DataForQC", id, tp, "TumorMasksForQC")
+        out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{model_idx}.nii.gz")
+        shutil.move(pred_filepath, out_pred_filepath)
 
     def __process_case(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
-        # TODO: identify all the nnunet models
         models = self.__get_models()
-        for model in models:
-            # TODO: get the required order for modalities
+        for i, model in enumerate(models):
             order = self.__get_mod_order(model)
-            # TODO: create a temporary folder with the renamed modalities
-            tmp_data_path = self.__prepare_case(self.out_path, id, tp, order)
-            # TODO: run model with specified inputs and outputs
-            run_model(model, tmp_data_path, tmp_out_path)
-            # get final .nii.gz file
-            finalize_pred(tmp_out_path)
+            tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order)
+            self.__run_model(model, tmp_data_path, tmp_out_path)
+            self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i)
             #cleanup
-            cleanup_tmp_paths(tmp_data_path, tmp_out_path)
+            shutil.rmtree(tmp_data_path, ignore_errors=True)
+            shutil.rmtree(tmp_out_path, ignore_errors=True)
 
 
     def __update_state(
@@ -162,7 +198,7 @@ def __report_success(
         data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath)
         report_data = {
             "status": self.status_code,
-            "status_name": f"{self.func_name.upper()}_FINISHED",
+            "status_name": "TUMOR_EXTRACT_FINISHED",
             "comment": "",
             "data_path": data_path,
             "labels_path": labels_path,
@@ -178,7 +214,7 @@ def __report_failure(
 
         report_data = {
             "status": -self.status_code,
-            "status_name": f"{self.func_name.upper()}_FAILED",
+            "status_name": "TUMOR_EXTRACT_FAILED",
             "comment": msg,
             "data_path": prev_data_path,
             "labels_path": prev_labels_path,

From c428ff06e31512d8d1b3d7ab4227add315e2f638 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Mon, 16 Oct 2023 10:44:02 -0500
Subject: [PATCH 03/13] Handle failure scenarios

---
 .../data_preparation/project/stages/extract_nnunet.py    | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
index 8420d82f..97245821 100644
--- a/mlcubes/data_preparation/project/stages/extract_nnunet.py
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -170,8 +170,13 @@ def __process_case(self, index: Union[str, int]):
         for i, model in enumerate(models):
             order = self.__get_mod_order(model)
             tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order)
-            self.__run_model(model, tmp_data_path, tmp_out_path)
-            self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i)
+            try:
+                self.__run_model(model, tmp_data_path, tmp_out_path)
+                self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i)
+            except Exception as e:
+                self.exception = e
+                self.failed = True
+                return
             #cleanup
             shutil.rmtree(tmp_data_path, ignore_errors=True)
             shutil.rmtree(tmp_out_path, ignore_errors=True)

From bf9b086ae2da62c7e1dea59fe4034d14617f0f33 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 18 Oct 2023 10:53:33 -0500
Subject: [PATCH 04/13] Modularize tumor extraction

---
 src/applications/PrepareDataset.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py
index a79da2dd..a11d8456 100644
--- a/src/applications/PrepareDataset.py
+++ b/src/applications/PrepareDataset.py
@@ -118,7 +118,7 @@ def _get_relevant_dicom_tags(filename: str) -> dict:
     return output_dict
 
 
-def _save_screenshot(
+def save_screenshot(
     input_images: dict, output_filename: str = None, input_mask: str = None
 ) -> None:
     """
@@ -447,7 +447,13 @@ def _run_tumor_segmentation_using_gandlf(
                 tumor_masks_to_return.append(renamed_path)
                 images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8))
 
+    fused_masks_to_return = generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id)
+    return tumor_masks_to_return + fused_masks_to_return
+
+    
+def generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id):
     tumor_class_list = [0, 1, 2, 3, 4]
+    fused_masks_to_return = []
 
     if len(images_for_fusion) > 1:
         for fusion_type in ["staple", "simple", "voting"]:
@@ -457,9 +463,9 @@ def _run_tumor_segmentation_using_gandlf(
                 f"{subject_id}_tumorMask_fused-{fusion_type}.nii.gz",
             )
             sitk.WriteImage(fused_mask, fused_mask_file)
-            tumor_masks_to_return.append(fused_mask_file)
+            fused_masks_to_return.append(fused_mask_file)
 
-    return tumor_masks_to_return
+    return fused_masks_to_return
 
 
 class Preparator:
@@ -685,7 +691,7 @@ def convert_to_dicom(self, idx: int, row: pd.Series, pbar: tqdm):
             f"{subject_id_timepoint}_summary_coregistration.png",
         )
         # save the screenshot
-        _save_screenshot(outputs_reoriented, screenshot_path)
+        save_screenshot(outputs_reoriented, screenshot_path)
 
         if os.path.exists(screenshot_path):
             shutil.copyfile(
@@ -743,7 +749,7 @@ def extract_brain(self, row: pd.Series, pbar: tqdm):
             sitk.WriteImage(masked_image, file_to_save)
 
         # save the screenshot
-        _save_screenshot(
+        save_screenshot(
             input_for_tumor_models,
             posixpath.join(
                 interimOutputDir_actual,
@@ -783,7 +789,7 @@ def extract_tumor(self, row: pd.Series, pbar: tqdm):
         for tumor_mask in tumor_masks_for_qc:
             tumor_mask_id = os.path.basename(tumor_mask).replace(".nii.gz", "")
             # save the screenshot
-            _save_screenshot(
+            save_screenshot(
                 input_for_tumor_models,
                 posixpath.join(interimOutputDir_actual, f"{tumor_mask_id}_summary.png"),
                 tumor_mask,

From b7bdd49947359d53716c43404ce9c996302b158a Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 18 Oct 2023 10:54:10 -0500
Subject: [PATCH 05/13] Add tumor fusing and screenshots

---
 .../project/stages/extract_nnunet.py          | 72 ++++++++++++++++---
 1 file changed, 61 insertions(+), 11 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
index 97245821..92956c0d 100644
--- a/mlcubes/data_preparation/project/stages/extract_nnunet.py
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -4,12 +4,14 @@
 import os
 from os.path import realpath, dirname, join
 import shutil
-import traceback
 import time
+import SimpleITK as sitk
 import subprocess
+import traceback
+from LabelFusion.wrapper import fuse_images
 
 from .row_stage import RowStage
-from .PrepareDataset import Preparator, FINAL_FOLDER
+from .PrepareDataset import Preparator, FINAL_FOLDER, generate_tumor_segmentation_fused_images, save_screenshot
 from .utils import update_row_with_dict, get_id_tp, MockTqdm
 
 MODALITY_MAPPING = {
@@ -23,6 +25,17 @@
     "flair": "t2f"
 }
 
+MODALITY_VARIANTS = {
+    "t1c": "T1GD",
+    "t1ce": "T1GD",
+    "t1": "T1",
+    "t1n": "T1",
+    "t2": "T2",
+    "t2w": "T2",
+    "t2f": "FLAIR",
+    "flair": "FLAIR"
+}
+
 class ExtractNnUNet(RowStage):
     def __init__(
         self,
@@ -116,7 +129,6 @@ def __get_mod_order(self, model):
         modalities = order_str.split()[2:]
         modalities = [MODALITY_MAPPING[mod] for mod in modalities]
         return modalities
-        
 
     def __prepare_case(self, path, id, tp, order):
         tmp_subject = f"{id}-{tp}"
@@ -128,6 +140,7 @@ def __prepare_case(self, path, id, tp, order):
         os.makedirs(tmp_subject_path)
         os.makedirs(tmp_out_path)
         in_modalities_path = os.path.join(path, "DataForFeTS", id, tp)
+        input_modalities = {}
         for modality_file in os.listdir(in_modalities_path):
             if not modality_file.endswith(".nii.gz"):
                 continue
@@ -139,10 +152,10 @@ def __prepare_case(self, path, id, tp, order):
             out_modality_file = f"{tmp_subject}_{mod_idx}.nii.gz"
             in_file = os.path.join(in_modalities_path, modality_file)
             out_file = os.path.join(tmp_subject_path, out_modality_file)
+            input_modalities[MODALITY_VARIANTS[modality]] = in_file
             shutil.copyfile(in_file, out_file)
-            print(out_file)
 
-        return tmp_subject_path, tmp_out_path
+        return tmp_subject_path, tmp_out_path, input_modalities
     
     def __run_model(self, model, data_path, out_path):
         # models are named Task<ID>_..., where <ID> is always 3 numbers
@@ -156,32 +169,69 @@ def __run_model(self, model, data_path, out_path):
         total_time = (end - start)
         print(f"Total time elapsed is {total_time} seconds")
 
-    def __finalize_pred(self, tmp_out_path, out_path, id, tp, model_idx):
+    def __finalize_pred(self, tmp_out_path, out_pred_filepath):
         # We assume there's only one file in out_path
-        pred = os.listdir(tmp_out_path)[0]
+        pred = None
+        for file in os.listdir(tmp_out_path):
+            if file.endswith(".nii.gz"):
+                pred = file
+
+        if pred is None:
+            raise RuntimeError("No tumor segmentation was found")
+
         pred_filepath = os.path.join(tmp_out_path, pred)
-        out_pred_path = os.path.join(out_path, "DataForQC", id, tp, "TumorMasksForQC")
-        out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{model_idx}.nii.gz")
         shutil.move(pred_filepath, out_pred_filepath)
+        return out_pred_filepath
 
     def __process_case(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
+        subject_id = f"{id}_{tp}"
         models = self.__get_models()
+        outputs = []
+        images_for_fusion = []
+        out_path = os.path.join(self.out_path, "DataForQC", id, tp) 
+        out_pred_path = os.path.join(out_path, "TumorMasksForQC")
+        os.makedirs(out_pred_path, exist_ok=True)
         for i, model in enumerate(models):
             order = self.__get_mod_order(model)
-            tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order)
+            tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order)
+            out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz")
+            if os.path.exists(out_pred_filepath):
+                print("Model output detected, skipping model")
+                continue
             try:
                 self.__run_model(model, tmp_data_path, tmp_out_path)
-                self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i)
+                output = self.__finalize_pred(tmp_out_path, out_pred_filepath)
+                outputs.append(output)
+                images_for_fusion.append(sitk.ReadImage(output, sitk.sitkUInt8))
             except Exception as e:
                 self.exception = e
                 self.failed = True
+                self.traceback = traceback.format_exc()
                 return
+
             #cleanup
             shutil.rmtree(tmp_data_path, ignore_errors=True)
             shutil.rmtree(tmp_out_path, ignore_errors=True)
 
 
+        fused_outputs = generate_tumor_segmentation_fused_images(images_for_fusion, out_pred_path, subject_id)
+        outputs += fused_outputs
+
+        for output in outputs:
+            # save the screenshot
+            tumor_mask_id = os.path.basename(output).replace(".nii.gz", "")
+            save_screenshot(
+                input_modalities,
+                os.path.join(
+                    out_path,
+                    f"{tumor_mask_id}_summary.png",
+                ),
+                output,
+            )
+
+
+
     def __update_state(
         self, index: Union[str, int], report: pd.DataFrame
     ) -> Tuple[pd.DataFrame, bool]:

From 600d4d8f33b6b63838466c555aca229dcbf2088c Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 18 Oct 2023 11:05:18 -0500
Subject: [PATCH 06/13] ignore models folder

---
 mlcubes/.gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlcubes/.gitignore b/mlcubes/.gitignore
index ac044f44..370c46a5 100644
--- a/mlcubes/.gitignore
+++ b/mlcubes/.gitignore
@@ -6,4 +6,5 @@
 *.png
 */mlcube/workspace/*
 !requirements.txt
-!*/mlcube/workspace/parameters.yaml
\ No newline at end of file
+!*/mlcube/workspace/parameters.yaml
+mlcubes/data_preparation/project/models
\ No newline at end of file

From 0f26cff91c6d1db4c4980a874e657164f4766eb3 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 18 Oct 2023 11:16:57 -0500
Subject: [PATCH 07/13] fix git ignore models

---
 mlcubes/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlcubes/.gitignore b/mlcubes/.gitignore
index 370c46a5..be8d1082 100644
--- a/mlcubes/.gitignore
+++ b/mlcubes/.gitignore
@@ -7,4 +7,4 @@
 */mlcube/workspace/*
 !requirements.txt
 !*/mlcube/workspace/parameters.yaml
-mlcubes/data_preparation/project/models
\ No newline at end of file
+models
\ No newline at end of file

From ed3ffbacb1ca9da8c785cb7dc25e14e1ff1df802 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Thu, 19 Oct 2023 10:45:06 -0500
Subject: [PATCH 08/13] Don't skip nnunet models

---
 mlcubes/data_preparation/project/stages/extract_nnunet.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
index 92956c0d..cf67a5a0 100644
--- a/mlcubes/data_preparation/project/stages/extract_nnunet.py
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -196,9 +196,6 @@ def __process_case(self, index: Union[str, int]):
             order = self.__get_mod_order(model)
             tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order)
             out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz")
-            if os.path.exists(out_pred_filepath):
-                print("Model output detected, skipping model")
-                continue
             try:
                 self.__run_model(model, tmp_data_path, tmp_out_path)
                 output = self.__finalize_pred(tmp_out_path, out_pred_filepath)

From c1cdc89aed43a1fbc17dd113cc79c207688c2c2e Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 15 Nov 2023 12:12:36 -0500
Subject: [PATCH 09/13] Pass stages to medperf through parameters

---
 .../mlcube/workspace/parameters.yaml          | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
index 63af4621..ac8e03e4 100644
--- a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
+++ b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
@@ -1,2 +1,23 @@
 seed: 2784
-train_percent: 0.8
\ No newline at end of file
+train_percent: 0.8
+medperf_report_stages:
+- "IDENTIFIED"
+- "VALIDATED"
+- "MISSING_MODALITIES"
+- "EXTRA_MODALITIES"
+- "VALIDATION"
+- "CONVERTED_TO_NIfTI"
+- "NIfTI_CONVERSION_FAILED"
+- "BRAIN_EXTRACT_FINISHED"
+- "BRAIN_EXTRACT_FAILED"
+- "TUMOR_EXTRACT_FINISHED"
+- "TUMOR_EXTRACT_FAILED"
+- "MANUAL_REVIEW_COMPLETED"
+- "MANUAL_REVIEW_REQUIRED"
+- "MULTIPLE_ANNOTATIONS_ERROR"
+- "COMPARISON_COMPLETE"
+- "EXACT_MATCH_IDENTIFIED"
+- "ANNOTATION_COMPARISON_FAILED"
+- "ANNOTATION_CONFIRMED"
+- "UNHANDLED_ERROR"
+- "DONE"
\ No newline at end of file

From e837c7ecf4675cbdf93432e72921237b56576ff9 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Wed, 15 Nov 2023 12:16:57 -0500
Subject: [PATCH 10/13] Revert "Pass stages to medperf through parameters"

This reverts commit c1cdc89aed43a1fbc17dd113cc79c207688c2c2e.
---
 .../mlcube/workspace/parameters.yaml          | 23 +------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
index ac8e03e4..63af4621 100644
--- a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
+++ b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml
@@ -1,23 +1,2 @@
 seed: 2784
-train_percent: 0.8
-medperf_report_stages:
-- "IDENTIFIED"
-- "VALIDATED"
-- "MISSING_MODALITIES"
-- "EXTRA_MODALITIES"
-- "VALIDATION"
-- "CONVERTED_TO_NIfTI"
-- "NIfTI_CONVERSION_FAILED"
-- "BRAIN_EXTRACT_FINISHED"
-- "BRAIN_EXTRACT_FAILED"
-- "TUMOR_EXTRACT_FINISHED"
-- "TUMOR_EXTRACT_FAILED"
-- "MANUAL_REVIEW_COMPLETED"
-- "MANUAL_REVIEW_REQUIRED"
-- "MULTIPLE_ANNOTATIONS_ERROR"
-- "COMPARISON_COMPLETE"
-- "EXACT_MATCH_IDENTIFIED"
-- "ANNOTATION_COMPARISON_FAILED"
-- "ANNOTATION_CONFIRMED"
-- "UNHANDLED_ERROR"
-- "DONE"
\ No newline at end of file
+train_percent: 0.8
\ No newline at end of file

From 7a6366bd951cb298857fbfb2639144c5548b7bb1 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Mon, 11 Dec 2023 12:14:04 -0500
Subject: [PATCH 11/13] Import from extract for nnUNet

---
 .../project/stages/extract.py                 |   4 +-
 .../project/stages/extract_nnunet.py          | 148 ++++--------------
 2 files changed, 35 insertions(+), 117 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/extract.py b/mlcubes/data_preparation/project/stages/extract.py
index a5f3a8a8..1aa53ee5 100644
--- a/mlcubes/data_preparation/project/stages/extract.py
+++ b/mlcubes/data_preparation/project/stages/extract.py
@@ -74,7 +74,7 @@ def execute(
         """
         self.__prepare_exec()
         self.__copy_case(index)
-        self.__process_case(index)
+        self._process_case(index)
         report, success = self.__update_state(index, report)
         self.prep.write()
 
@@ -99,7 +99,7 @@ def __copy_case(self, index: Union[str, int]):
         for prev, copy in zip(prev_paths, copy_paths):
             shutil.copytree(prev, copy, dirs_exist_ok=True)
 
-    def __process_case(self, index: Union[str, int]):
+    def _process_case(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
         df = self.prep.subjects_df
         row = df[(df["SubjectID"] == id) & (df["Timepoint"] == tp)].iloc[0]
diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py
index cf67a5a0..12361b61 100644
--- a/mlcubes/data_preparation/project/stages/extract_nnunet.py
+++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py
@@ -10,8 +10,13 @@
 import traceback
 from LabelFusion.wrapper import fuse_images
 
-from .row_stage import RowStage
-from .PrepareDataset import Preparator, FINAL_FOLDER, generate_tumor_segmentation_fused_images, save_screenshot
+from .extract import Extract
+from .PrepareDataset import (
+    Preparator,
+    FINAL_FOLDER,
+    generate_tumor_segmentation_fused_images,
+    save_screenshot,
+)
 from .utils import update_row_with_dict, get_id_tp, MockTqdm
 
 MODALITY_MAPPING = {
@@ -22,7 +27,7 @@
     "t2": "t2w",
     "t2w": "t2w",
     "t2f": "t2f",
-    "flair": "t2f"
+    "flair": "t2f",
 }
 
 MODALITY_VARIANTS = {
@@ -33,10 +38,11 @@
     "t2": "T2",
     "t2w": "T2",
     "t2f": "FLAIR",
-    "flair": "FLAIR"
+    "flair": "FLAIR",
 }
 
-class ExtractNnUNet(RowStage):
+
+class ExtractNnUNet(Extract):
     def __init__(
         self,
         data_csv: str,
@@ -45,6 +51,7 @@ def __init__(
         prev_stage_path: str,
         prev_subpath: str,
         status_code: int,
+        extra_labels_path=[],
     ):
         self.data_csv = data_csv
         self.out_path = out_path
@@ -57,63 +64,16 @@ def __init__(
         self.pbar = tqdm()
         self.failed = False
         self.exception = None
-        self.status_code = status_code
+        self.__status_code = status_code
+        self.extra_labels_path = extra_labels_path
 
-    def get_name(self) -> str:
+    @property
+    def name(self) -> str:
         return "nnUNet Tumor Extraction"
 
-    def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool:
-        """Determine if case at given index needs to be converted to NIfTI
-
-        Args:
-            index (Union[str, int]): Case index, as used by the report dataframe
-            report (pd.DataFrame): Report Dataframe for providing additional context
-
-        Returns:
-            bool: Wether this stage could be executed for the given case
-        """
-        prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
-        return all([os.path.exists(path) for path in prev_paths])
-
-    def execute(
-        self, index: Union[str, int], report: pd.DataFrame
-    ) -> Tuple[pd.DataFrame, bool]:
-        """Runs the pretrained nnUNet models for tumor segmentation
-
-        Args:
-            index (Union[str, int]): case index, as used by the report
-            report (pd.DataFrame): DataFrame containing the current state of the preparation flow
-
-        Returns:
-            pd.DataFrame: Updated report dataframe
-        """
-        self.__prepare_exec()
-        self.__copy_case(index)
-        self.__process_case(index)
-        report, success = self.__update_state(index, report)
-        self.prep.write()
-
-        return report, success
-
-    def __prepare_exec(self):
-
-        # Reset the file contents for errors
-        open(self.prep.stderr_log, "w").close()
-
-        # Update the out dataframes to current state
-        self.prep.read()
-
-    def __get_paths(self, index: Union[str, int], path: str, subpath: str):
-        id, tp = get_id_tp(index)
-        data_path = os.path.join(path, self.data_subpath, id, tp)
-        out_path = os.path.join(path, subpath, id, tp)
-        return data_path, out_path
-
-    def __copy_case(self, index: Union[str, int]):
-        prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
-        copy_paths = self.__get_paths(index, self.out_path, self.prev_subpath)
-        for prev, copy in zip(prev_paths, copy_paths):
-            shutil.copytree(prev, copy, dirs_exist_ok=True)
+    @property
+    def status_code(self) -> str:
+        return self.__status_code
 
     def __get_models(self):
         rel_models_path = "../models/nnUNet_trained_models/nnUNet/3d_fullres"
@@ -156,7 +116,7 @@ def __prepare_case(self, path, id, tp, order):
             shutil.copyfile(in_file, out_file)
 
         return tmp_subject_path, tmp_out_path, input_modalities
-    
+
     def __run_model(self, model, data_path, out_path):
         # models are named Task<ID>_..., where <ID> is always 3 numbers
         task_id = model[4:7]
@@ -166,7 +126,7 @@ def __run_model(self, model, data_path, out_path):
         start = time.time()
         subprocess.call(cmd, shell=True)
         end = time.time()
-        total_time = (end - start)
+        total_time = end - start
         print(f"Total time elapsed is {total_time} seconds")
 
     def __finalize_pred(self, tmp_out_path, out_pred_filepath):
@@ -183,19 +143,23 @@ def __finalize_pred(self, tmp_out_path, out_pred_filepath):
         shutil.move(pred_filepath, out_pred_filepath)
         return out_pred_filepath
 
-    def __process_case(self, index: Union[str, int]):
+    def _process_case(self, index: Union[str, int]):
         id, tp = get_id_tp(index)
         subject_id = f"{id}_{tp}"
         models = self.__get_models()
         outputs = []
         images_for_fusion = []
-        out_path = os.path.join(self.out_path, "DataForQC", id, tp) 
+        out_path = os.path.join(self.out_path, "DataForQC", id, tp)
         out_pred_path = os.path.join(out_path, "TumorMasksForQC")
         os.makedirs(out_pred_path, exist_ok=True)
         for i, model in enumerate(models):
             order = self.__get_mod_order(model)
-            tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order)
-            out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz")
+            tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(
+                self.out_path, id, tp, order
+            )
+            out_pred_filepath = os.path.join(
+                out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz"
+            )
             try:
                 self.__run_model(model, tmp_data_path, tmp_out_path)
                 output = self.__finalize_pred(tmp_out_path, out_pred_filepath)
@@ -207,12 +171,13 @@ def __process_case(self, index: Union[str, int]):
                 self.traceback = traceback.format_exc()
                 return
 
-            #cleanup
+            # cleanup
             shutil.rmtree(tmp_data_path, ignore_errors=True)
             shutil.rmtree(tmp_out_path, ignore_errors=True)
 
-
-        fused_outputs = generate_tumor_segmentation_fused_images(images_for_fusion, out_pred_path, subject_id)
+        fused_outputs = generate_tumor_segmentation_fused_images(
+            images_for_fusion, out_pred_path, subject_id
+        )
         outputs += fused_outputs
 
         for output in outputs:
@@ -226,50 +191,3 @@ def __process_case(self, index: Union[str, int]):
                 ),
                 output,
             )
-
-
-
-    def __update_state(
-        self, index: Union[str, int], report: pd.DataFrame
-    ) -> Tuple[pd.DataFrame, bool]:
-        if self.failed:
-            del_paths = self.__get_paths(index, self.out_path, self.subpath)
-            report, success = self.__report_failure(index, report)
-        else:
-            del_paths = self.__get_paths(index, self.prev_path, self.prev_subpath)
-            report, success = self.__report_success(index, report)
-
-        for del_path in del_paths:
-            shutil.rmtree(del_path, ignore_errors=True)
-
-        return report, success
-
-    def __report_success(
-        self, index: Union[str, int], report: pd.DataFrame
-    ) -> Tuple[pd.DataFrame, bool]:
-        data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath)
-        report_data = {
-            "status": self.status_code,
-            "status_name": "TUMOR_EXTRACT_FINISHED",
-            "comment": "",
-            "data_path": data_path,
-            "labels_path": labels_path,
-        }
-        update_row_with_dict(report, report_data, index)
-        return report, True
-
-    def __report_failure(
-        self, index: Union[str, int], report: pd.DataFrame
-    ) -> Tuple[pd.DataFrame, bool]:
-        prev_data_path, prev_labels_path = self.__get_paths(index, self.prev_path, self.prev_subpath)
-        msg = f"{str(self.exception)}: {self.traceback}"
-
-        report_data = {
-            "status": -self.status_code,
-            "status_name": "TUMOR_EXTRACT_FAILED",
-            "comment": msg,
-            "data_path": prev_data_path,
-            "labels_path": prev_labels_path,
-        }
-        update_row_with_dict(report, report_data, index)
-        return report, False
\ No newline at end of file

From 08a903278c018f4389c920a197265927234d9a30 Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Mon, 11 Dec 2023 15:17:24 -0500
Subject: [PATCH 12/13] Sync generate_report with parent

---
 mlcubes/data_preparation/project/stages/generate_report.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py
index 085e5c1e..fa1c0342 100644
--- a/mlcubes/data_preparation/project/stages/generate_report.py
+++ b/mlcubes/data_preparation/project/stages/generate_report.py
@@ -88,9 +88,12 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]:
                     if report.loc[index]["input_hash"] == input_hash:
                         continue
 
+                    shutil.rmtree(out_tp_path, ignore_errors=True)
+                    shutil.copytree(in_tp_path, out_tp_path)
                     report = report.drop(index)
-                shutil.rmtree(out_tp_path, ignore_errors=True)
-                shutil.copytree(in_tp_path, out_tp_path)
+                else:
+                    # New case not identified by the report. Add it
+                    shutil.copytree(in_tp_path, out_tp_path)
 
                 data = {
                     "status": self.status_code,

From c97597dd782818bc58b57a536ed25b4871ba403d Mon Sep 17 00:00:00 2001
From: Alejandro Aristizabal <alejandro.aristizabal24@gmail.com>
Date: Mon, 18 Dec 2023 10:24:30 -0500
Subject: [PATCH 13/13] Pass models through tarball. Fix skip issues

---
 mlcubes/data_preparation/mlcube/mlcube.yaml   |  1 +
 mlcubes/data_preparation/project/mlcube.py    |  3 +-
 mlcubes/data_preparation/project/prepare.py   | 29 +++++++------------
 .../project/stages/generate_report.py         | 18 ++++++------
 .../project/stages/nifti_transform.py         |  5 ++--
 .../project/stages/pipeline.py                |  5 ++--
 6 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/mlcubes/data_preparation/mlcube/mlcube.yaml b/mlcubes/data_preparation/mlcube/mlcube.yaml
index f924e2f6..4a2ec422 100644
--- a/mlcubes/data_preparation/mlcube/mlcube.yaml
+++ b/mlcubes/data_preparation/mlcube/mlcube.yaml
@@ -21,6 +21,7 @@ tasks:
         data_path: input_data,
         labels_path: input_labels,
         parameters_file: parameters.yaml,
+        models: additional_files/models,
       }
       outputs: {
         output_path: data/,
diff --git a/mlcubes/data_preparation/project/mlcube.py b/mlcubes/data_preparation/project/mlcube.py
index be71c415..749b7f72 100644
--- a/mlcubes/data_preparation/project/mlcube.py
+++ b/mlcubes/data_preparation/project/mlcube.py
@@ -23,12 +23,13 @@ def prepare(
     data_path: str = typer.Option(..., "--data_path"),
     labels_path: str = typer.Option(..., "--labels_path"),
     parameters_file: str = typer.Option(..., "--parameters_file"),
+    models_path: str = typer.Option(..., "--models"),
     output_path: str = typer.Option(..., "--output_path"),
     output_labels_path: str = typer.Option(..., "--output_labels_path"),
     report_file: str = typer.Option(..., "--report_file"),
     metadata_path: str = typer.Option(..., "--metadata_path"),
 ):
-    cmd = f"python3 project/prepare.py --data_path={data_path} --labels_path={labels_path} --data_out={output_path} --labels_out={output_labels_path} --report={report_file} --parameters={parameters_file} --metadata_path={metadata_path}"
+    cmd = f"python3 project/prepare.py --data_path={data_path} --labels_path={labels_path} --models_path={models_path} --data_out={output_path} --labels_out={output_labels_path} --report={report_file} --parameters={parameters_file} --metadata_path={metadata_path}"
     exec_python(cmd)
 
 
diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py
index a494c817..03427d87 100644
--- a/mlcubes/data_preparation/project/prepare.py
+++ b/mlcubes/data_preparation/project/prepare.py
@@ -2,6 +2,7 @@
 import argparse
 import pandas as pd
 import yaml
+import shutil
 from stages.generate_report import GenerateReport
 from stages.get_csv import AddToCSV
 from stages.nifti_transform import NIfTITransform
@@ -14,6 +15,8 @@
 from stages.pipeline import Pipeline
 from stages.constants import INTERIM_FOLDER, FINAL_FOLDER, TUMOR_MASK_FOLDER
 
+MODELS_PATH = "/project/models"
+
 
 def find_csv_filenames(path_to_dir, suffix=".csv"):
     filenames = os.listdir(path_to_dir)
@@ -28,6 +31,9 @@ def setup_argparser():
     parser.add_argument(
         "--labels_path", dest="labels", type=str, help="path containing labels"
     )
+    parser.add_argument(
+        "--models_path", dest="models", type=str, help="path to the nnunet models"
+    )
     parser.add_argument(
         "--data_out", dest="data_out", type=str, help="path to store prepared data"
     )
@@ -79,7 +85,7 @@ def init_pipeline(args):
     loop = None
     report_gen = GenerateReport(out_data_csv, args.data, out_raw, args.labels, args.labels_out, args.data_out, 8, brain_data_out, 3, tumor_data_out, 5)
     csv_proc = AddToCSV(out_raw, out_data_csv, valid_data_out, out_raw)
-    nifti_proc = NIfTITransform(out_data_csv, nifti_data_out, valid_data_out, args.metadata_path)
+    nifti_proc = NIfTITransform(out_data_csv, nifti_data_out, valid_data_out, args.metadata_path, args.data_out)
     brain_extract_proc = Extract(
         out_data_csv,
         brain_data_out,
@@ -141,24 +147,9 @@ def init_report(args) -> pd.DataFrame:
 def main():
     args = setup_argparser()
 
-    # Check if the input data is already prepared
-    # If so, just copy the contents and skip all processing
-    # TODO: this means we won't have a report. What would be the best way
-    # to handle this?
-    # TODO: Re-enable this when it is implemented correctly and we see the need for it
-    # # 1. If there is a csv file in the input folder
-    # # always reuse it for the prepared dataset
-    # csvs = find_csv_filenames(args.data_out)
-    # if len(csvs) == 1:
-    #     # One csv was found. Assume this is the desired csv
-    #     # move it to the expected location
-    #     # TODO: How to deal with inconsistent paths because of MLCube functionality?
-    #     csv_path = os.path.join(args.data_out, csvs[0])
-    #     os.rename(csv_path, out_data_csv)
-    #     # can we assume the paths inside data.csv to be relative to the csv?
-    #     # TODO: Create some logic to turn the csv paths into the expected paths for the MLCube
-    #     # update_csv_paths(out_data_csv)
-
+    # Move models to the expected location
+    if not os.path.exists(MODELS_PATH):
+        shutil.copytree(args.models, MODELS_PATH)
 
     report = init_report(args)
     pipeline = init_pipeline(args)
diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py
index e5e253ab..57c8882f 100644
--- a/mlcubes/data_preparation/project/stages/generate_report.py
+++ b/mlcubes/data_preparation/project/stages/generate_report.py
@@ -345,6 +345,15 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]:
                 # Keep track of the cases that were found on the input folder
                 observed_cases.add(index)
 
+                has_semiprepared, in_tp_path = has_semiprepared_folder_structure(in_tp_path, recursive=True)
+                if has_semiprepared:
+                    tumor_seg = get_tumor_segmentation(subject, timepoint, in_tp_path)
+                    if tumor_seg is not None:
+                        report = self._proceed_to_comparison(subject, timepoint, in_tp_path, report)
+                    else:
+                        report = self._proceed_to_tumor_extraction(subject, timepoint, in_tp_path, report)
+                    continue
+
                 if index in report.index:
                     # Case has already been identified, see if input hash is different
                     # if so, override the contents and restart the state for that case
@@ -373,15 +382,6 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]:
                     # Move files around so it has the expected structure
                     to_expected_folder_structure(out_tp_path, contents_path)
 
-                has_semiprepared, in_tp_path = has_semiprepared_folder_structure(in_tp_path, recursive=True)
-                if has_semiprepared:
-                    tumor_seg = get_tumor_segmentation(subject, timepoint, in_tp_path)
-                    if tumor_seg is not None:
-                        report = self._proceed_to_comparison(subject, timepoint, in_tp_path, report)
-                    else:
-                        report = self._proceed_to_tumor_extraction(subject, timepoint, in_tp_path, report)
-                    continue
-
                 if input_is_prepared:
                     data["status_name"] = "DONE"
                     data["status_code"] = self.done_status_code
diff --git a/mlcubes/data_preparation/project/stages/nifti_transform.py b/mlcubes/data_preparation/project/stages/nifti_transform.py
index f59048f6..07ecc712 100644
--- a/mlcubes/data_preparation/project/stages/nifti_transform.py
+++ b/mlcubes/data_preparation/project/stages/nifti_transform.py
@@ -11,10 +11,11 @@
 
 class NIfTITransform(RowStage):
     def __init__(
-        self, data_csv: str, out_path: str, prev_stage_path: str, metadata_path: str
+        self, data_csv: str, out_path: str, prev_stage_path: str, metadata_path: str, data_out: str,
     ):
         self.data_csv = data_csv
         self.out_path = out_path
+        self.data_out = data_out
         self.prev_stage_path = prev_stage_path
         self.metadata_path = metadata_path
         os.makedirs(self.out_path, exist_ok=True)
@@ -85,7 +86,7 @@ def __process_case(self, index: Union[str, int]):
 
     def __update_prev_stage_state(self, index: Union[str, int], report: pd.DataFrame):
         prev_data_path = report.loc[index]["data_path"]
-        prev_data_path = unnormalize_path(prev_data_path, "mlcube_io3")
+        prev_data_path = unnormalize_path(prev_data_path, self.data_out)
         shutil.rmtree(prev_data_path)
 
     def __undo_current_stage_changes(self, index: Union[str, int]):
diff --git a/mlcubes/data_preparation/project/stages/pipeline.py b/mlcubes/data_preparation/project/stages/pipeline.py
index 1ca4560e..fb8f517b 100644
--- a/mlcubes/data_preparation/project/stages/pipeline.py
+++ b/mlcubes/data_preparation/project/stages/pipeline.py
@@ -19,8 +19,9 @@ def normalize_report_paths(report: DataFrame) -> DataFrame:
     Returns:
         DataFrame: report with transformed paths
     """
-    report["data_path"] = report["data_path"].str.split("mlcube_io3").str[-1]
-    report["labels_path"] = report["labels_path"].str.split("mlcube_io3").str[-1]
+    pattern = "mlcube_io\d+"
+    report["data_path"] = report["data_path"].str.split(pattern).str[-1]
+    report["labels_path"] = report["labels_path"].str.split(pattern).str[-1]
     return report