From 379074ab1555e0152729098c4f1e23ba85fd40f0 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Thu, 12 Oct 2023 16:47:43 -0500 Subject: [PATCH 01/13] Start working on nnunet stage --- .../project/stages/extract_nnunet.py | 187 ++++++++++++++++++ .../project/stages/generate_report.py | 7 +- 2 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 mlcubes/data_preparation/project/stages/extract_nnunet.py diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py new file mode 100644 index 00000000..cd551629 --- /dev/null +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -0,0 +1,187 @@ +from typing import Union, List, Tuple +from tqdm import tqdm +import pandas as pd +import os +from os.path import realpath, dirname, join +import shutil +import traceback + +from .row_stage import RowStage +from .PrepareDataset import Preparator, FINAL_FOLDER +from .utils import update_row_with_dict, get_id_tp, MockTqdm + +MODALITY_MAPPING = { + "t1c": "t1c", + "t1ce": "t1c", + "t1": "t1n", + "t1n": "t1n", + "t2": "t2w", + "t2w": "t2w", + "t2f": "t2f", + "flair": "t2f" +} + +class Extract(RowStage): + def __init__( + self, + data_csv: str, + out_path: str, + subpath: str, + prev_stage_path: str, + prev_subpath: str, + # pbar: tqdm, + func_name: str, + status_code: int, + ): + self.data_csv = data_csv + self.out_path = out_path + self.subpath = subpath + self.data_subpath = FINAL_FOLDER + self.prev_path = prev_stage_path + self.prev_subpath = prev_subpath + os.makedirs(self.out_path, exist_ok=True) + self.prep = Preparator(data_csv, out_path, "BraTSPipeline") + self.func_name = func_name + self.func = getattr(self.prep, func_name) + self.pbar = tqdm() + self.failed = False + self.exception = None + self.status_code = status_code + + def get_name(self) -> str: + return self.func_name.replace("_", " ").capitalize() + + def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: + """Determine if case at given index needs to be converted to NIfTI + + Args: + index (Union[str, int]): Case index, as used by the report dataframe + report (pd.DataFrame): Report Dataframe for providing additional context + + Returns: + bool: Wether this stage could be executed for the given case + """ + prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + return all([os.path.exists(path) for path in prev_paths]) + + def execute( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + """Runs the pretrained nnUNet models for tumor segmentation + + Args: + index (Union[str, int]): case index, as used by the report + report (pd.DataFrame): DataFrame containing the current state of the preparation flow + + Returns: + pd.DataFrame: Updated report dataframe + """ + self.__prepare_exec() + self.__copy_case(index) + self.__process_case(index) + report, success = self.__update_state(index, report) + self.prep.write() + + return report, success + + def __prepare_exec(self): + + # Reset the file contents for errors + open(self.prep.stderr_log, "w").close() + + # Update the out dataframes to current state + self.prep.read() + + def __get_paths(self, index: Union[str, int], path: str, subpath: str): + id, tp = get_id_tp(index) + data_path = os.path.join(path, self.data_subpath, id, tp) + out_path = os.path.join(path, subpath, id, tp) + return data_path, out_path + + def __copy_case(self, index: Union[str, int]): + prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + copy_paths = self.__get_paths(index, self.out_path, self.prev_subpath) + for prev, copy in zip(prev_paths, copy_paths): + shutil.copytree(prev, copy, dirs_exist_ok=True) + + def __get_models(self): + rel_models_path = "../models/nnUNet_trained_models/nnUNet/3d_fullres" + models_path = realpath(join(dirname(__file__), rel_models_path)) + return os.listdir(models_path) + + def __get_mod_order(self, model): + rel_orders_path = "../models/nnUNet_modality_order" + order_path = realpath(join(dirname(__file__), rel_orders_path, model, "order")) + with open(order_path, "r") as f: + order_str = f.readline() + # remove 'order = ' from the splitted list + modalities = order_str.split()[2:] + modalities = [MODALITY_MAPPING[mod] for mod in modalities] + return modalities + + + def __prepare_case(self, path, id, tp, order): + pass + + + def __process_case(self, index: Union[str, int]): + id, tp = get_id_tp(index) + # TODO: identify all the nnunet models + models = self.__get_models() + for model in models: + # TODO: get the required order for modalities + order = self.__get_mod_order(model) + # TODO: create a temporary folder with the renamed modalities + tmp_data_path = self.__prepare_case(self.out_path, id, tp, order) + # TODO: run model with specified inputs and outputs + run_model(model, tmp_data_path, tmp_out_path) + # get final .nii.gz file + finalize_pred(tmp_out_path) + #cleanup + cleanup_tmp_paths(tmp_data_path, tmp_out_path) + + + def __update_state( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + if self.failed: + del_paths = self.__get_paths(index, self.out_path, self.subpath) + report, success = self.__report_failure(index, report) + else: + del_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) + report, success = self.__report_success(index, report) + + for del_path in del_paths: + shutil.rmtree(del_path, ignore_errors=True) + + return report, success + + def __report_success( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath) + report_data = { + "status": self.status_code, + "status_name": f"{self.func_name.upper()}_FINISHED", + "comment": "", + "data_path": data_path, + "labels_path": labels_path, + } + update_row_with_dict(report, report_data, index) + return report, True + + def __report_failure( + self, index: Union[str, int], report: pd.DataFrame + ) -> Tuple[pd.DataFrame, bool]: + prev_data_path, prev_labels_path = self.__get_paths(index, self.prev_path, self.prev_subpath) + msg = f"{str(self.exception)}: {self.traceback}" + + report_data = { + "status": -self.status_code, + "status_name": f"{self.func_name.upper()}_FAILED", + "comment": msg, + "data_path": prev_data_path, + "labels_path": prev_labels_path, + } + update_row_with_dict(report, report_data, index) + return report, False \ No newline at end of file diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py index e666edaf..48d3e12c 100644 --- a/mlcubes/data_preparation/project/stages/generate_report.py +++ b/mlcubes/data_preparation/project/stages/generate_report.py @@ -94,12 +94,9 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: if report.loc[index]["input_hash"] == input_hash: continue - shutil.rmtree(out_tp_path, ignore_errors=True) - shutil.copytree(in_tp_path, out_tp_path) report = report.drop(index) - else: - # New case not identified by the report. Add it - shutil.copytree(in_tp_path, out_tp_path) + shutil.rmtree(out_tp_path, ignore_errors=True) + shutil.copytree(in_tp_path, out_tp_path) data = { "status": self.status_code, From 957ca68effdfa7b0b3b8a3198625dab3b98d9d32 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Fri, 13 Oct 2023 15:34:07 -0500 Subject: [PATCH 02/13] Implement extract nnunet --- Dockerfile | 10 +++ mlcubes/data_preparation/mlcube/mlcube.yaml | 2 +- mlcubes/data_preparation/project/prepare.py | 5 +- .../project/stages/extract_nnunet.py | 74 ++++++++++++++----- 4 files changed, 68 insertions(+), 23 deletions(-) diff --git a/Dockerfile b/Dockerfile index 128d2fad..a64a3cdf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -83,6 +83,16 @@ RUN cp -R /Front-End/bin/install/appdir/usr/bin/data_prep_models /project/stages # Hotfix: install more recent version of GaNDLF for metrics generation RUN pip install git+https://github.com/mlcommons/GaNDLF@616b37bafad8f89d5c816a88f44fa30470601311 +RUN pip install torch torchvision + +RUN pip install git+https://github.com/MIC-DKFZ/nnUNet.git@nnunetv1 + +RUN mkdir /nnUNet_raw_data_base && mkdir /nnUNet_preprocessed + +ENV nnUNet_raw_data_base="/nnUNet_raw_data_base" +ENV nnUNet_preprocessed="/nnUNet_preprocessed" +ENV RESULTS_FOLDER="/project/models/nnUNet_trained_models" + COPY ./mlcubes/data_preparation/project /project ENTRYPOINT ["python", "/project/mlcube.py"] diff --git a/mlcubes/data_preparation/mlcube/mlcube.yaml b/mlcubes/data_preparation/mlcube/mlcube.yaml index 518a20d6..44386677 100644 --- a/mlcubes/data_preparation/mlcube/mlcube.yaml +++ b/mlcubes/data_preparation/mlcube/mlcube.yaml @@ -8,7 +8,7 @@ platform: docker: # Image name - image: mlcommons/rano-data-prep:latest + image: mlcommons/rano-data-prep:nnunet # Docker build context relative to $MLCUBE_ROOT. Default is `build`. build_context: "../project" # Docker file name within docker build context, default is `Dockerfile`. diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py index 3003cfb5..f5ab4fde 100644 --- a/mlcubes/data_preparation/project/prepare.py +++ b/mlcubes/data_preparation/project/prepare.py @@ -7,6 +7,7 @@ from stages.get_csv import AddToCSV from stages.nifti_transform import NIfTITransform from stages.extract import Extract +from stages.extract_nnunet import ExtractNnUNet from stages.manual import ManualStage from stages.comparison import SegmentationComparisonStage from stages.confirm import ConfirmStage @@ -104,14 +105,12 @@ def init_pipeline(args): "extract_brain", 3, ) - tumor_extract_proc = Extract( + tumor_extract_proc = ExtractNnUNet( out_data_csv, tumor_data_out, TUMOR_MASK_FOLDER, brain_data_out, INTERIM_FOLDER, - # loop, - "extract_tumor", 4, ) manual_proc = ManualStage(out_data_csv, tumor_data_out, tumor_data_out, backup_out) diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py index cd551629..8420d82f 100644 --- a/mlcubes/data_preparation/project/stages/extract_nnunet.py +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -5,6 +5,8 @@ from os.path import realpath, dirname, join import shutil import traceback +import time +import subprocess from .row_stage import RowStage from .PrepareDataset import Preparator, FINAL_FOLDER @@ -21,7 +23,7 @@ "flair": "t2f" } -class Extract(RowStage): +class ExtractNnUNet(RowStage): def __init__( self, data_csv: str, @@ -29,8 +31,6 @@ def __init__( subpath: str, prev_stage_path: str, prev_subpath: str, - # pbar: tqdm, - func_name: str, status_code: int, ): self.data_csv = data_csv @@ -41,15 +41,13 @@ def __init__( self.prev_subpath = prev_subpath os.makedirs(self.out_path, exist_ok=True) self.prep = Preparator(data_csv, out_path, "BraTSPipeline") - self.func_name = func_name - self.func = getattr(self.prep, func_name) self.pbar = tqdm() self.failed = False self.exception = None self.status_code = status_code def get_name(self) -> str: - return self.func_name.replace("_", " ").capitalize() + return "nnUNet Tumor Extraction" def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: """Determine if case at given index needs to be converted to NIfTI @@ -121,24 +119,62 @@ def __get_mod_order(self, model): def __prepare_case(self, path, id, tp, order): - pass + tmp_subject = f"{id}-{tp}" + tmp_path = os.path.join(path, "tmp-data") + tmp_subject_path = os.path.join(tmp_path, tmp_subject) + tmp_out_path = os.path.join(path, "tmp-out") + shutil.rmtree(tmp_path, ignore_errors=True) + shutil.rmtree(tmp_out_path, ignore_errors=True) + os.makedirs(tmp_subject_path) + os.makedirs(tmp_out_path) + in_modalities_path = os.path.join(path, "DataForFeTS", id, tp) + for modality_file in os.listdir(in_modalities_path): + if not modality_file.endswith(".nii.gz"): + continue + modality = modality_file[:-7].split("_")[-1] + norm_mod = MODALITY_MAPPING[modality] + mod_idx = order.index(norm_mod) + mod_idx = str(mod_idx).zfill(4) + + out_modality_file = f"{tmp_subject}_{mod_idx}.nii.gz" + in_file = os.path.join(in_modalities_path, modality_file) + out_file = os.path.join(tmp_subject_path, out_modality_file) + shutil.copyfile(in_file, out_file) + print(out_file) + + return tmp_subject_path, tmp_out_path + def __run_model(self, model, data_path, out_path): + # models are named Task_..., where is always 3 numbers + task_id = model[4:7] + cmd = f"nnUNet_predict -i {data_path} -o {out_path} -t {task_id} -f all" + print(cmd) + print(os.listdir(data_path)) + start = time.time() + subprocess.call(cmd, shell=True) + end = time.time() + total_time = (end - start) + print(f"Total time elapsed is {total_time} seconds") + + def __finalize_pred(self, tmp_out_path, out_path, id, tp, model_idx): + # We assume there's only one file in out_path + pred = os.listdir(tmp_out_path)[0] + pred_filepath = os.path.join(tmp_out_path, pred) + out_pred_path = os.path.join(out_path, "DataForQC", id, tp, "TumorMasksForQC") + out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{model_idx}.nii.gz") + shutil.move(pred_filepath, out_pred_filepath) def __process_case(self, index: Union[str, int]): id, tp = get_id_tp(index) - # TODO: identify all the nnunet models models = self.__get_models() - for model in models: - # TODO: get the required order for modalities + for i, model in enumerate(models): order = self.__get_mod_order(model) - # TODO: create a temporary folder with the renamed modalities - tmp_data_path = self.__prepare_case(self.out_path, id, tp, order) - # TODO: run model with specified inputs and outputs - run_model(model, tmp_data_path, tmp_out_path) - # get final .nii.gz file - finalize_pred(tmp_out_path) + tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order) + self.__run_model(model, tmp_data_path, tmp_out_path) + self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i) #cleanup - cleanup_tmp_paths(tmp_data_path, tmp_out_path) + shutil.rmtree(tmp_data_path, ignore_errors=True) + shutil.rmtree(tmp_out_path, ignore_errors=True) def __update_state( @@ -162,7 +198,7 @@ def __report_success( data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath) report_data = { "status": self.status_code, - "status_name": f"{self.func_name.upper()}_FINISHED", + "status_name": "TUMOR_EXTRACT_FINISHED", "comment": "", "data_path": data_path, "labels_path": labels_path, @@ -178,7 +214,7 @@ def __report_failure( report_data = { "status": -self.status_code, - "status_name": f"{self.func_name.upper()}_FAILED", + "status_name": "TUMOR_EXTRACT_FAILED", "comment": msg, "data_path": prev_data_path, "labels_path": prev_labels_path, From c428ff06e31512d8d1b3d7ab4227add315e2f638 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Mon, 16 Oct 2023 10:44:02 -0500 Subject: [PATCH 03/13] Handle failure scenarios --- .../data_preparation/project/stages/extract_nnunet.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py index 8420d82f..97245821 100644 --- a/mlcubes/data_preparation/project/stages/extract_nnunet.py +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -170,8 +170,13 @@ def __process_case(self, index: Union[str, int]): for i, model in enumerate(models): order = self.__get_mod_order(model) tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order) - self.__run_model(model, tmp_data_path, tmp_out_path) - self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i) + try: + self.__run_model(model, tmp_data_path, tmp_out_path) + self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i) + except Exception as e: + self.exception = e + self.failed = True + return #cleanup shutil.rmtree(tmp_data_path, ignore_errors=True) shutil.rmtree(tmp_out_path, ignore_errors=True) From bf9b086ae2da62c7e1dea59fe4034d14617f0f33 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 18 Oct 2023 10:53:33 -0500 Subject: [PATCH 04/13] Modularize tumor extraction --- src/applications/PrepareDataset.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/applications/PrepareDataset.py b/src/applications/PrepareDataset.py index a79da2dd..a11d8456 100644 --- a/src/applications/PrepareDataset.py +++ b/src/applications/PrepareDataset.py @@ -118,7 +118,7 @@ def _get_relevant_dicom_tags(filename: str) -> dict: return output_dict -def _save_screenshot( +def save_screenshot( input_images: dict, output_filename: str = None, input_mask: str = None ) -> None: """ @@ -447,7 +447,13 @@ def _run_tumor_segmentation_using_gandlf( tumor_masks_to_return.append(renamed_path) images_for_fusion.append(sitk.ReadImage(file_path, sitk.sitkUInt8)) + fused_masks_to_return = generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id) + return tumor_masks_to_return + fused_masks_to_return + + +def generate_tumor_segmentation_fused_images(images_for_fusion, mask_output_dir, subject_id): tumor_class_list = [0, 1, 2, 3, 4] + fused_masks_to_return = [] if len(images_for_fusion) > 1: for fusion_type in ["staple", "simple", "voting"]: @@ -457,9 +463,9 @@ def _run_tumor_segmentation_using_gandlf( f"{subject_id}_tumorMask_fused-{fusion_type}.nii.gz", ) sitk.WriteImage(fused_mask, fused_mask_file) - tumor_masks_to_return.append(fused_mask_file) + fused_masks_to_return.append(fused_mask_file) - return tumor_masks_to_return + return fused_masks_to_return class Preparator: @@ -685,7 +691,7 @@ def convert_to_dicom(self, idx: int, row: pd.Series, pbar: tqdm): f"{subject_id_timepoint}_summary_coregistration.png", ) # save the screenshot - _save_screenshot(outputs_reoriented, screenshot_path) + save_screenshot(outputs_reoriented, screenshot_path) if os.path.exists(screenshot_path): shutil.copyfile( @@ -743,7 +749,7 @@ def extract_brain(self, row: pd.Series, pbar: tqdm): sitk.WriteImage(masked_image, file_to_save) # save the screenshot - _save_screenshot( + save_screenshot( input_for_tumor_models, posixpath.join( interimOutputDir_actual, @@ -783,7 +789,7 @@ def extract_tumor(self, row: pd.Series, pbar: tqdm): for tumor_mask in tumor_masks_for_qc: tumor_mask_id = os.path.basename(tumor_mask).replace(".nii.gz", "") # save the screenshot - _save_screenshot( + save_screenshot( input_for_tumor_models, posixpath.join(interimOutputDir_actual, f"{tumor_mask_id}_summary.png"), tumor_mask, From b7bdd49947359d53716c43404ce9c996302b158a Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 18 Oct 2023 10:54:10 -0500 Subject: [PATCH 05/13] Add tumor fusing and screenshots --- .../project/stages/extract_nnunet.py | 72 ++++++++++++++++--- 1 file changed, 61 insertions(+), 11 deletions(-) diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py index 97245821..92956c0d 100644 --- a/mlcubes/data_preparation/project/stages/extract_nnunet.py +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -4,12 +4,14 @@ import os from os.path import realpath, dirname, join import shutil -import traceback import time +import SimpleITK as sitk import subprocess +import traceback +from LabelFusion.wrapper import fuse_images from .row_stage import RowStage -from .PrepareDataset import Preparator, FINAL_FOLDER +from .PrepareDataset import Preparator, FINAL_FOLDER, generate_tumor_segmentation_fused_images, save_screenshot from .utils import update_row_with_dict, get_id_tp, MockTqdm MODALITY_MAPPING = { @@ -23,6 +25,17 @@ "flair": "t2f" } +MODALITY_VARIANTS = { + "t1c": "T1GD", + "t1ce": "T1GD", + "t1": "T1", + "t1n": "T1", + "t2": "T2", + "t2w": "T2", + "t2f": "FLAIR", + "flair": "FLAIR" +} + class ExtractNnUNet(RowStage): def __init__( self, @@ -116,7 +129,6 @@ def __get_mod_order(self, model): modalities = order_str.split()[2:] modalities = [MODALITY_MAPPING[mod] for mod in modalities] return modalities - def __prepare_case(self, path, id, tp, order): tmp_subject = f"{id}-{tp}" @@ -128,6 +140,7 @@ def __prepare_case(self, path, id, tp, order): os.makedirs(tmp_subject_path) os.makedirs(tmp_out_path) in_modalities_path = os.path.join(path, "DataForFeTS", id, tp) + input_modalities = {} for modality_file in os.listdir(in_modalities_path): if not modality_file.endswith(".nii.gz"): continue @@ -139,10 +152,10 @@ def __prepare_case(self, path, id, tp, order): out_modality_file = f"{tmp_subject}_{mod_idx}.nii.gz" in_file = os.path.join(in_modalities_path, modality_file) out_file = os.path.join(tmp_subject_path, out_modality_file) + input_modalities[MODALITY_VARIANTS[modality]] = in_file shutil.copyfile(in_file, out_file) - print(out_file) - return tmp_subject_path, tmp_out_path + return tmp_subject_path, tmp_out_path, input_modalities def __run_model(self, model, data_path, out_path): # models are named Task_..., where is always 3 numbers @@ -156,32 +169,69 @@ def __run_model(self, model, data_path, out_path): total_time = (end - start) print(f"Total time elapsed is {total_time} seconds") - def __finalize_pred(self, tmp_out_path, out_path, id, tp, model_idx): + def __finalize_pred(self, tmp_out_path, out_pred_filepath): # We assume there's only one file in out_path - pred = os.listdir(tmp_out_path)[0] + pred = None + for file in os.listdir(tmp_out_path): + if file.endswith(".nii.gz"): + pred = file + + if pred is None: + raise RuntimeError("No tumor segmentation was found") + pred_filepath = os.path.join(tmp_out_path, pred) - out_pred_path = os.path.join(out_path, "DataForQC", id, tp, "TumorMasksForQC") - out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{model_idx}.nii.gz") shutil.move(pred_filepath, out_pred_filepath) + return out_pred_filepath def __process_case(self, index: Union[str, int]): id, tp = get_id_tp(index) + subject_id = f"{id}_{tp}" models = self.__get_models() + outputs = [] + images_for_fusion = [] + out_path = os.path.join(self.out_path, "DataForQC", id, tp) + out_pred_path = os.path.join(out_path, "TumorMasksForQC") + os.makedirs(out_pred_path, exist_ok=True) for i, model in enumerate(models): order = self.__get_mod_order(model) - tmp_data_path, tmp_out_path = self.__prepare_case(self.out_path, id, tp, order) + tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order) + out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz") + if os.path.exists(out_pred_filepath): + print("Model output detected, skipping model") + continue try: self.__run_model(model, tmp_data_path, tmp_out_path) - self.__finalize_pred(tmp_out_path, self.out_path, id, tp, i) + output = self.__finalize_pred(tmp_out_path, out_pred_filepath) + outputs.append(output) + images_for_fusion.append(sitk.ReadImage(output, sitk.sitkUInt8)) except Exception as e: self.exception = e self.failed = True + self.traceback = traceback.format_exc() return + #cleanup shutil.rmtree(tmp_data_path, ignore_errors=True) shutil.rmtree(tmp_out_path, ignore_errors=True) + fused_outputs = generate_tumor_segmentation_fused_images(images_for_fusion, out_pred_path, subject_id) + outputs += fused_outputs + + for output in outputs: + # save the screenshot + tumor_mask_id = os.path.basename(output).replace(".nii.gz", "") + save_screenshot( + input_modalities, + os.path.join( + out_path, + f"{tumor_mask_id}_summary.png", + ), + output, + ) + + + def __update_state( self, index: Union[str, int], report: pd.DataFrame ) -> Tuple[pd.DataFrame, bool]: From 600d4d8f33b6b63838466c555aca229dcbf2088c Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 18 Oct 2023 11:05:18 -0500 Subject: [PATCH 06/13] ignore models folder --- mlcubes/.gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlcubes/.gitignore b/mlcubes/.gitignore index ac044f44..370c46a5 100644 --- a/mlcubes/.gitignore +++ b/mlcubes/.gitignore @@ -6,4 +6,5 @@ *.png */mlcube/workspace/* !requirements.txt -!*/mlcube/workspace/parameters.yaml \ No newline at end of file +!*/mlcube/workspace/parameters.yaml +mlcubes/data_preparation/project/models \ No newline at end of file From 0f26cff91c6d1db4c4980a874e657164f4766eb3 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 18 Oct 2023 11:16:57 -0500 Subject: [PATCH 07/13] fix git ignore models --- mlcubes/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlcubes/.gitignore b/mlcubes/.gitignore index 370c46a5..be8d1082 100644 --- a/mlcubes/.gitignore +++ b/mlcubes/.gitignore @@ -7,4 +7,4 @@ */mlcube/workspace/* !requirements.txt !*/mlcube/workspace/parameters.yaml -mlcubes/data_preparation/project/models \ No newline at end of file +models \ No newline at end of file From ed3ffbacb1ca9da8c785cb7dc25e14e1ff1df802 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Thu, 19 Oct 2023 10:45:06 -0500 Subject: [PATCH 08/13] Don't skip nnunet models --- mlcubes/data_preparation/project/stages/extract_nnunet.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py index 92956c0d..cf67a5a0 100644 --- a/mlcubes/data_preparation/project/stages/extract_nnunet.py +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -196,9 +196,6 @@ def __process_case(self, index: Union[str, int]): order = self.__get_mod_order(model) tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order) out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz") - if os.path.exists(out_pred_filepath): - print("Model output detected, skipping model") - continue try: self.__run_model(model, tmp_data_path, tmp_out_path) output = self.__finalize_pred(tmp_out_path, out_pred_filepath) From c1cdc89aed43a1fbc17dd113cc79c207688c2c2e Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 15 Nov 2023 12:12:36 -0500 Subject: [PATCH 09/13] Pass stages to medperf through parameters --- .../mlcube/workspace/parameters.yaml | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml index 63af4621..ac8e03e4 100644 --- a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml +++ b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml @@ -1,2 +1,23 @@ seed: 2784 -train_percent: 0.8 \ No newline at end of file +train_percent: 0.8 +medperf_report_stages: +- "IDENTIFIED" +- "VALIDATED" +- "MISSING_MODALITIES" +- "EXTRA_MODALITIES" +- "VALIDATION" +- "CONVERTED_TO_NIfTI" +- "NIfTI_CONVERSION_FAILED" +- "BRAIN_EXTRACT_FINISHED" +- "BRAIN_EXTRACT_FAILED" +- "TUMOR_EXTRACT_FINISHED" +- "TUMOR_EXTRACT_FAILED" +- "MANUAL_REVIEW_COMPLETED" +- "MANUAL_REVIEW_REQUIRED" +- "MULTIPLE_ANNOTATIONS_ERROR" +- "COMPARISON_COMPLETE" +- "EXACT_MATCH_IDENTIFIED" +- "ANNOTATION_COMPARISON_FAILED" +- "ANNOTATION_CONFIRMED" +- "UNHANDLED_ERROR" +- "DONE" \ No newline at end of file From e837c7ecf4675cbdf93432e72921237b56576ff9 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Wed, 15 Nov 2023 12:16:57 -0500 Subject: [PATCH 10/13] Revert "Pass stages to medperf through parameters" This reverts commit c1cdc89aed43a1fbc17dd113cc79c207688c2c2e. --- .../mlcube/workspace/parameters.yaml | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml index ac8e03e4..63af4621 100644 --- a/mlcubes/data_preparation/mlcube/workspace/parameters.yaml +++ b/mlcubes/data_preparation/mlcube/workspace/parameters.yaml @@ -1,23 +1,2 @@ seed: 2784 -train_percent: 0.8 -medperf_report_stages: -- "IDENTIFIED" -- "VALIDATED" -- "MISSING_MODALITIES" -- "EXTRA_MODALITIES" -- "VALIDATION" -- "CONVERTED_TO_NIfTI" -- "NIfTI_CONVERSION_FAILED" -- "BRAIN_EXTRACT_FINISHED" -- "BRAIN_EXTRACT_FAILED" -- "TUMOR_EXTRACT_FINISHED" -- "TUMOR_EXTRACT_FAILED" -- "MANUAL_REVIEW_COMPLETED" -- "MANUAL_REVIEW_REQUIRED" -- "MULTIPLE_ANNOTATIONS_ERROR" -- "COMPARISON_COMPLETE" -- "EXACT_MATCH_IDENTIFIED" -- "ANNOTATION_COMPARISON_FAILED" -- "ANNOTATION_CONFIRMED" -- "UNHANDLED_ERROR" -- "DONE" \ No newline at end of file +train_percent: 0.8 \ No newline at end of file From 7a6366bd951cb298857fbfb2639144c5548b7bb1 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Mon, 11 Dec 2023 12:14:04 -0500 Subject: [PATCH 11/13] Import from extract for nnUNet --- .../project/stages/extract.py | 4 +- .../project/stages/extract_nnunet.py | 148 ++++-------------- 2 files changed, 35 insertions(+), 117 deletions(-) diff --git a/mlcubes/data_preparation/project/stages/extract.py b/mlcubes/data_preparation/project/stages/extract.py index a5f3a8a8..1aa53ee5 100644 --- a/mlcubes/data_preparation/project/stages/extract.py +++ b/mlcubes/data_preparation/project/stages/extract.py @@ -74,7 +74,7 @@ def execute( """ self.__prepare_exec() self.__copy_case(index) - self.__process_case(index) + self._process_case(index) report, success = self.__update_state(index, report) self.prep.write() @@ -99,7 +99,7 @@ def __copy_case(self, index: Union[str, int]): for prev, copy in zip(prev_paths, copy_paths): shutil.copytree(prev, copy, dirs_exist_ok=True) - def __process_case(self, index: Union[str, int]): + def _process_case(self, index: Union[str, int]): id, tp = get_id_tp(index) df = self.prep.subjects_df row = df[(df["SubjectID"] == id) & (df["Timepoint"] == tp)].iloc[0] diff --git a/mlcubes/data_preparation/project/stages/extract_nnunet.py b/mlcubes/data_preparation/project/stages/extract_nnunet.py index cf67a5a0..12361b61 100644 --- a/mlcubes/data_preparation/project/stages/extract_nnunet.py +++ b/mlcubes/data_preparation/project/stages/extract_nnunet.py @@ -10,8 +10,13 @@ import traceback from LabelFusion.wrapper import fuse_images -from .row_stage import RowStage -from .PrepareDataset import Preparator, FINAL_FOLDER, generate_tumor_segmentation_fused_images, save_screenshot +from .extract import Extract +from .PrepareDataset import ( + Preparator, + FINAL_FOLDER, + generate_tumor_segmentation_fused_images, + save_screenshot, +) from .utils import update_row_with_dict, get_id_tp, MockTqdm MODALITY_MAPPING = { @@ -22,7 +27,7 @@ "t2": "t2w", "t2w": "t2w", "t2f": "t2f", - "flair": "t2f" + "flair": "t2f", } MODALITY_VARIANTS = { @@ -33,10 +38,11 @@ "t2": "T2", "t2w": "T2", "t2f": "FLAIR", - "flair": "FLAIR" + "flair": "FLAIR", } -class ExtractNnUNet(RowStage): + +class ExtractNnUNet(Extract): def __init__( self, data_csv: str, @@ -45,6 +51,7 @@ def __init__( prev_stage_path: str, prev_subpath: str, status_code: int, + extra_labels_path=[], ): self.data_csv = data_csv self.out_path = out_path @@ -57,63 +64,16 @@ def __init__( self.pbar = tqdm() self.failed = False self.exception = None - self.status_code = status_code + self.__status_code = status_code + self.extra_labels_path = extra_labels_path - def get_name(self) -> str: + @property + def name(self) -> str: return "nnUNet Tumor Extraction" - def could_run(self, index: Union[str, int], report: pd.DataFrame) -> bool: - """Determine if case at given index needs to be converted to NIfTI - - Args: - index (Union[str, int]): Case index, as used by the report dataframe - report (pd.DataFrame): Report Dataframe for providing additional context - - Returns: - bool: Wether this stage could be executed for the given case - """ - prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) - return all([os.path.exists(path) for path in prev_paths]) - - def execute( - self, index: Union[str, int], report: pd.DataFrame - ) -> Tuple[pd.DataFrame, bool]: - """Runs the pretrained nnUNet models for tumor segmentation - - Args: - index (Union[str, int]): case index, as used by the report - report (pd.DataFrame): DataFrame containing the current state of the preparation flow - - Returns: - pd.DataFrame: Updated report dataframe - """ - self.__prepare_exec() - self.__copy_case(index) - self.__process_case(index) - report, success = self.__update_state(index, report) - self.prep.write() - - return report, success - - def __prepare_exec(self): - - # Reset the file contents for errors - open(self.prep.stderr_log, "w").close() - - # Update the out dataframes to current state - self.prep.read() - - def __get_paths(self, index: Union[str, int], path: str, subpath: str): - id, tp = get_id_tp(index) - data_path = os.path.join(path, self.data_subpath, id, tp) - out_path = os.path.join(path, subpath, id, tp) - return data_path, out_path - - def __copy_case(self, index: Union[str, int]): - prev_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) - copy_paths = self.__get_paths(index, self.out_path, self.prev_subpath) - for prev, copy in zip(prev_paths, copy_paths): - shutil.copytree(prev, copy, dirs_exist_ok=True) + @property + def status_code(self) -> str: + return self.__status_code def __get_models(self): rel_models_path = "../models/nnUNet_trained_models/nnUNet/3d_fullres" @@ -156,7 +116,7 @@ def __prepare_case(self, path, id, tp, order): shutil.copyfile(in_file, out_file) return tmp_subject_path, tmp_out_path, input_modalities - + def __run_model(self, model, data_path, out_path): # models are named Task_..., where is always 3 numbers task_id = model[4:7] @@ -166,7 +126,7 @@ def __run_model(self, model, data_path, out_path): start = time.time() subprocess.call(cmd, shell=True) end = time.time() - total_time = (end - start) + total_time = end - start print(f"Total time elapsed is {total_time} seconds") def __finalize_pred(self, tmp_out_path, out_pred_filepath): @@ -183,19 +143,23 @@ def __finalize_pred(self, tmp_out_path, out_pred_filepath): shutil.move(pred_filepath, out_pred_filepath) return out_pred_filepath - def __process_case(self, index: Union[str, int]): + def _process_case(self, index: Union[str, int]): id, tp = get_id_tp(index) subject_id = f"{id}_{tp}" models = self.__get_models() outputs = [] images_for_fusion = [] - out_path = os.path.join(self.out_path, "DataForQC", id, tp) + out_path = os.path.join(self.out_path, "DataForQC", id, tp) out_pred_path = os.path.join(out_path, "TumorMasksForQC") os.makedirs(out_pred_path, exist_ok=True) for i, model in enumerate(models): order = self.__get_mod_order(model) - tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case(self.out_path, id, tp, order) - out_pred_filepath = os.path.join(out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz") + tmp_data_path, tmp_out_path, input_modalities = self.__prepare_case( + self.out_path, id, tp, order + ) + out_pred_filepath = os.path.join( + out_pred_path, f"{id}_{tp}_tumorMask_model_{i}.nii.gz" + ) try: self.__run_model(model, tmp_data_path, tmp_out_path) output = self.__finalize_pred(tmp_out_path, out_pred_filepath) @@ -207,12 +171,13 @@ def __process_case(self, index: Union[str, int]): self.traceback = traceback.format_exc() return - #cleanup + # cleanup shutil.rmtree(tmp_data_path, ignore_errors=True) shutil.rmtree(tmp_out_path, ignore_errors=True) - - fused_outputs = generate_tumor_segmentation_fused_images(images_for_fusion, out_pred_path, subject_id) + fused_outputs = generate_tumor_segmentation_fused_images( + images_for_fusion, out_pred_path, subject_id + ) outputs += fused_outputs for output in outputs: @@ -226,50 +191,3 @@ def __process_case(self, index: Union[str, int]): ), output, ) - - - - def __update_state( - self, index: Union[str, int], report: pd.DataFrame - ) -> Tuple[pd.DataFrame, bool]: - if self.failed: - del_paths = self.__get_paths(index, self.out_path, self.subpath) - report, success = self.__report_failure(index, report) - else: - del_paths = self.__get_paths(index, self.prev_path, self.prev_subpath) - report, success = self.__report_success(index, report) - - for del_path in del_paths: - shutil.rmtree(del_path, ignore_errors=True) - - return report, success - - def __report_success( - self, index: Union[str, int], report: pd.DataFrame - ) -> Tuple[pd.DataFrame, bool]: - data_path, labels_path = self.__get_paths(index, self.out_path, self.subpath) - report_data = { - "status": self.status_code, - "status_name": "TUMOR_EXTRACT_FINISHED", - "comment": "", - "data_path": data_path, - "labels_path": labels_path, - } - update_row_with_dict(report, report_data, index) - return report, True - - def __report_failure( - self, index: Union[str, int], report: pd.DataFrame - ) -> Tuple[pd.DataFrame, bool]: - prev_data_path, prev_labels_path = self.__get_paths(index, self.prev_path, self.prev_subpath) - msg = f"{str(self.exception)}: {self.traceback}" - - report_data = { - "status": -self.status_code, - "status_name": "TUMOR_EXTRACT_FAILED", - "comment": msg, - "data_path": prev_data_path, - "labels_path": prev_labels_path, - } - update_row_with_dict(report, report_data, index) - return report, False \ No newline at end of file From 08a903278c018f4389c920a197265927234d9a30 Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Mon, 11 Dec 2023 15:17:24 -0500 Subject: [PATCH 12/13] Sync generate_report with parent --- mlcubes/data_preparation/project/stages/generate_report.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py index 085e5c1e..fa1c0342 100644 --- a/mlcubes/data_preparation/project/stages/generate_report.py +++ b/mlcubes/data_preparation/project/stages/generate_report.py @@ -88,9 +88,12 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: if report.loc[index]["input_hash"] == input_hash: continue + shutil.rmtree(out_tp_path, ignore_errors=True) + shutil.copytree(in_tp_path, out_tp_path) report = report.drop(index) - shutil.rmtree(out_tp_path, ignore_errors=True) - shutil.copytree(in_tp_path, out_tp_path) + else: + # New case not identified by the report. Add it + shutil.copytree(in_tp_path, out_tp_path) data = { "status": self.status_code, From c97597dd782818bc58b57a536ed25b4871ba403d Mon Sep 17 00:00:00 2001 From: Alejandro Aristizabal Date: Mon, 18 Dec 2023 10:24:30 -0500 Subject: [PATCH 13/13] Pass models through tarball. Fix skip issues --- mlcubes/data_preparation/mlcube/mlcube.yaml | 1 + mlcubes/data_preparation/project/mlcube.py | 3 +- mlcubes/data_preparation/project/prepare.py | 29 +++++++------------ .../project/stages/generate_report.py | 18 ++++++------ .../project/stages/nifti_transform.py | 5 ++-- .../project/stages/pipeline.py | 5 ++-- 6 files changed, 28 insertions(+), 33 deletions(-) diff --git a/mlcubes/data_preparation/mlcube/mlcube.yaml b/mlcubes/data_preparation/mlcube/mlcube.yaml index f924e2f6..4a2ec422 100644 --- a/mlcubes/data_preparation/mlcube/mlcube.yaml +++ b/mlcubes/data_preparation/mlcube/mlcube.yaml @@ -21,6 +21,7 @@ tasks: data_path: input_data, labels_path: input_labels, parameters_file: parameters.yaml, + models: additional_files/models, } outputs: { output_path: data/, diff --git a/mlcubes/data_preparation/project/mlcube.py b/mlcubes/data_preparation/project/mlcube.py index be71c415..749b7f72 100644 --- a/mlcubes/data_preparation/project/mlcube.py +++ b/mlcubes/data_preparation/project/mlcube.py @@ -23,12 +23,13 @@ def prepare( data_path: str = typer.Option(..., "--data_path"), labels_path: str = typer.Option(..., "--labels_path"), parameters_file: str = typer.Option(..., "--parameters_file"), + models_path: str = typer.Option(..., "--models"), output_path: str = typer.Option(..., "--output_path"), output_labels_path: str = typer.Option(..., "--output_labels_path"), report_file: str = typer.Option(..., "--report_file"), metadata_path: str = typer.Option(..., "--metadata_path"), ): - cmd = f"python3 project/prepare.py --data_path={data_path} --labels_path={labels_path} --data_out={output_path} --labels_out={output_labels_path} --report={report_file} --parameters={parameters_file} --metadata_path={metadata_path}" + cmd = f"python3 project/prepare.py --data_path={data_path} --labels_path={labels_path} --models_path={models_path} --data_out={output_path} --labels_out={output_labels_path} --report={report_file} --parameters={parameters_file} --metadata_path={metadata_path}" exec_python(cmd) diff --git a/mlcubes/data_preparation/project/prepare.py b/mlcubes/data_preparation/project/prepare.py index a494c817..03427d87 100644 --- a/mlcubes/data_preparation/project/prepare.py +++ b/mlcubes/data_preparation/project/prepare.py @@ -2,6 +2,7 @@ import argparse import pandas as pd import yaml +import shutil from stages.generate_report import GenerateReport from stages.get_csv import AddToCSV from stages.nifti_transform import NIfTITransform @@ -14,6 +15,8 @@ from stages.pipeline import Pipeline from stages.constants import INTERIM_FOLDER, FINAL_FOLDER, TUMOR_MASK_FOLDER +MODELS_PATH = "/project/models" + def find_csv_filenames(path_to_dir, suffix=".csv"): filenames = os.listdir(path_to_dir) @@ -28,6 +31,9 @@ def setup_argparser(): parser.add_argument( "--labels_path", dest="labels", type=str, help="path containing labels" ) + parser.add_argument( + "--models_path", dest="models", type=str, help="path to the nnunet models" + ) parser.add_argument( "--data_out", dest="data_out", type=str, help="path to store prepared data" ) @@ -79,7 +85,7 @@ def init_pipeline(args): loop = None report_gen = GenerateReport(out_data_csv, args.data, out_raw, args.labels, args.labels_out, args.data_out, 8, brain_data_out, 3, tumor_data_out, 5) csv_proc = AddToCSV(out_raw, out_data_csv, valid_data_out, out_raw) - nifti_proc = NIfTITransform(out_data_csv, nifti_data_out, valid_data_out, args.metadata_path) + nifti_proc = NIfTITransform(out_data_csv, nifti_data_out, valid_data_out, args.metadata_path, args.data_out) brain_extract_proc = Extract( out_data_csv, brain_data_out, @@ -141,24 +147,9 @@ def init_report(args) -> pd.DataFrame: def main(): args = setup_argparser() - # Check if the input data is already prepared - # If so, just copy the contents and skip all processing - # TODO: this means we won't have a report. What would be the best way - # to handle this? - # TODO: Re-enable this when it is implemented correctly and we see the need for it - # # 1. If there is a csv file in the input folder - # # always reuse it for the prepared dataset - # csvs = find_csv_filenames(args.data_out) - # if len(csvs) == 1: - # # One csv was found. Assume this is the desired csv - # # move it to the expected location - # # TODO: How to deal with inconsistent paths because of MLCube functionality? - # csv_path = os.path.join(args.data_out, csvs[0]) - # os.rename(csv_path, out_data_csv) - # # can we assume the paths inside data.csv to be relative to the csv? - # # TODO: Create some logic to turn the csv paths into the expected paths for the MLCube - # # update_csv_paths(out_data_csv) - + # Move models to the expected location + if not os.path.exists(MODELS_PATH): + shutil.copytree(args.models, MODELS_PATH) report = init_report(args) pipeline = init_pipeline(args) diff --git a/mlcubes/data_preparation/project/stages/generate_report.py b/mlcubes/data_preparation/project/stages/generate_report.py index e5e253ab..57c8882f 100644 --- a/mlcubes/data_preparation/project/stages/generate_report.py +++ b/mlcubes/data_preparation/project/stages/generate_report.py @@ -345,6 +345,15 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: # Keep track of the cases that were found on the input folder observed_cases.add(index) + has_semiprepared, in_tp_path = has_semiprepared_folder_structure(in_tp_path, recursive=True) + if has_semiprepared: + tumor_seg = get_tumor_segmentation(subject, timepoint, in_tp_path) + if tumor_seg is not None: + report = self._proceed_to_comparison(subject, timepoint, in_tp_path, report) + else: + report = self._proceed_to_tumor_extraction(subject, timepoint, in_tp_path, report) + continue + if index in report.index: # Case has already been identified, see if input hash is different # if so, override the contents and restart the state for that case @@ -373,15 +382,6 @@ def execute(self, report: pd.DataFrame) -> Tuple[pd.DataFrame, bool]: # Move files around so it has the expected structure to_expected_folder_structure(out_tp_path, contents_path) - has_semiprepared, in_tp_path = has_semiprepared_folder_structure(in_tp_path, recursive=True) - if has_semiprepared: - tumor_seg = get_tumor_segmentation(subject, timepoint, in_tp_path) - if tumor_seg is not None: - report = self._proceed_to_comparison(subject, timepoint, in_tp_path, report) - else: - report = self._proceed_to_tumor_extraction(subject, timepoint, in_tp_path, report) - continue - if input_is_prepared: data["status_name"] = "DONE" data["status_code"] = self.done_status_code diff --git a/mlcubes/data_preparation/project/stages/nifti_transform.py b/mlcubes/data_preparation/project/stages/nifti_transform.py index f59048f6..07ecc712 100644 --- a/mlcubes/data_preparation/project/stages/nifti_transform.py +++ b/mlcubes/data_preparation/project/stages/nifti_transform.py @@ -11,10 +11,11 @@ class NIfTITransform(RowStage): def __init__( - self, data_csv: str, out_path: str, prev_stage_path: str, metadata_path: str + self, data_csv: str, out_path: str, prev_stage_path: str, metadata_path: str, data_out: str, ): self.data_csv = data_csv self.out_path = out_path + self.data_out = data_out self.prev_stage_path = prev_stage_path self.metadata_path = metadata_path os.makedirs(self.out_path, exist_ok=True) @@ -85,7 +86,7 @@ def __process_case(self, index: Union[str, int]): def __update_prev_stage_state(self, index: Union[str, int], report: pd.DataFrame): prev_data_path = report.loc[index]["data_path"] - prev_data_path = unnormalize_path(prev_data_path, "mlcube_io3") + prev_data_path = unnormalize_path(prev_data_path, self.data_out) shutil.rmtree(prev_data_path) def __undo_current_stage_changes(self, index: Union[str, int]): diff --git a/mlcubes/data_preparation/project/stages/pipeline.py b/mlcubes/data_preparation/project/stages/pipeline.py index 1ca4560e..fb8f517b 100644 --- a/mlcubes/data_preparation/project/stages/pipeline.py +++ b/mlcubes/data_preparation/project/stages/pipeline.py @@ -19,8 +19,9 @@ def normalize_report_paths(report: DataFrame) -> DataFrame: Returns: DataFrame: report with transformed paths """ - report["data_path"] = report["data_path"].str.split("mlcube_io3").str[-1] - report["labels_path"] = report["labels_path"].str.split("mlcube_io3").str[-1] + pattern = "mlcube_io\d+" + report["data_path"] = report["data_path"].str.split(pattern).str[-1] + report["labels_path"] = report["labels_path"].str.split(pattern).str[-1] return report