From 08415ba3fdb99c8b04ac8c37a4f17a8440c98e06 Mon Sep 17 00:00:00 2001 From: Dan Levitas Date: Fri, 9 Feb 2024 22:00:11 +0000 Subject: [PATCH] [enh]: keep chugging along --- handler/ezBIDS_core/ezBIDS_core.py | 682 ++++++++++++++--------------- handler/preprocess.sh | 1 + 2 files changed, 326 insertions(+), 357 deletions(-) diff --git a/handler/ezBIDS_core/ezBIDS_core.py b/handler/ezBIDS_core/ezBIDS_core.py index 1f68be28..01a66452 100755 --- a/handler/ezBIDS_core/ezBIDS_core.py +++ b/handler/ezBIDS_core/ezBIDS_core.py @@ -868,375 +868,374 @@ def generate_dataset_list(uploaded_files_list, exclude_data): for img_file in img_list: corresponding_json = [ x for x in corresponding_files_list if x.endswith('.json') and img_file.split('.nii.gz')[0] in x - ] # should be length of 1, but may be empty (not exist) + ] # should be length of 1, but may be empty (i.e. no metadata json file) if len(corresponding_json): - - json_data = open(json_file) - json_data = json.load(json_data, strict=False) + json_path = corresponding_json[0] + json_data = open(corresponding_json[0]) + json_data = json.load(json_data, strict=False) + else: + json_path = img_file.split('.nii.gz')[0] + '.json' + json_data = { + 'ConversionSoftware': 'ezBIDS', + 'ConversionSoftwareVersion': '1.0.0' + } + if not os.path.exists(json_path): + with open(json_path, "w") as fp: + json.dump(json_data, fp, indent=3) + corresponding_files_list = corresponding_files_list + [json_path] + json_data = open(json_path) + json_data = json.load(json_data, strict=False) + + # Find ImageModality + if "Modality" in json_data: + modality = json_data["Modality"] + else: + # assume MR (Is this a proper assumption to make? Probably most likely scenario) + modality = "MR" + + # Phase encoding direction info + if "PhaseEncodingDirection" in json_data: + pe_direction = json_data["PhaseEncodingDirection"] + else: + pe_direction = None - # Make sure each JSON has a corresponding NIfTI file - corresponding_nifti = None try: - if "Modality" in json_data.keys(): - if json_data["Modality"] in ["MR", "PT"]: - corresponding_nifti = [x for x in nifti_list if json_file[:-4] in x and x.endswith(".nii.gz")][0] - elif json_data["Modality"] == "MEG": - corresponding_nifti = [ - x for x in nifti_list if json_file[:-4] in x - and x.endswith(tuple(MEG_extensions)) - ][0] - else: - pass - else: - if json_file.endswith("_blood.json"): - corresponding_nifti = [x for x in nifti_list if json_file[:-4] in x and x.endswith("blood.tsv")][0] - elif "ConversionSoftware" in json_data.keys(): - if isinstance(json_data["ConversionSoftware"], str): - if json_data["ConversionSoftware"] == "pypet2bids": - json_data["Modality"] = "PT" - corresponding_nifti = [ - x for x in nifti_list if json_file[:-4] in x and x.endswith(".nii.gz") - ][0] - elif isinstance(json_data["ConversionSoftware"], list): - if "pypet2bids" in json_data["ConversionSoftware"]: - json_data["Modality"] = "PT" - corresponding_nifti = [ - x for x in nifti_list if json_file[:-4] in x and x.endswith(".nii.gz") - ][0] - else: - pass + ornt = nib.aff2axcodes(nib.load(img_file).affine) + ornt = "".join(ornt) except: - pass - - if corresponding_nifti: - # Phase encoding direction info - if "PhaseEncodingDirection" in json_data: - pe_direction = json_data["PhaseEncodingDirection"] - else: - pe_direction = None + ornt = None - try: - ornt = nib.aff2axcodes(nib.load(corresponding_nifti).affine) - ornt = "".join(ornt) - except: - ornt = None + if pe_direction is not None and ornt is not None and json_data["Modality"] != "MEG": + proper_pe_direction = correct_pe(pe_direction, ornt) + ped = determine_direction(proper_pe_direction, ornt) + else: + ped = "" - if pe_direction is not None and ornt is not None and json_data["Modality"] != "MEG": - proper_pe_direction = correct_pe(pe_direction, ornt) - ped = determine_direction(proper_pe_direction, ornt) - else: - ped = "" + # JSON (and bval/bvec) file(s) associated with specific json file + corresponding_file_paths = [ + x for x in corresponding_files_list if img_file.split('.nii.gz')[0] in x and not x.endswith('.nii.gz') + ] - # NIfTI (and bval/bvec) file(s) associated with specific json file - nifti_paths_for_json = [x for x in nifti_list if json_file[:-4] in x and ".json" not in x] + # Find nifti file size + filesize = os.stat(img_file).st_size - # Find nifti file size - filesize = os.stat(nifti_paths_for_json[0]).st_size + # Find StudyID from json + if "StudyID" in json_data: + study_id = json_data["StudyID"] + else: + study_id = img_file.split('/')[1] # uppermost folder in file path - # Find StudyID from json - if "StudyID" in json_data: - study_id = json_data["StudyID"] - else: - study_id = "" + # Find subject_id from json, since some files contain neither PatientID nor PatientName + if "PatientID" in json_data: + patient_id = json_data["PatientID"] + else: + patient_id = "n/a" - # Find subject_id from json, since some files contain neither PatientID nor PatientName - if "PatientID" in json_data: - patient_id = json_data["PatientID"] - else: - patient_id = "n/a" + if "PatientName" in json_data: + patient_name = json_data["PatientName"] + else: + patient_name = "n/a" - if "PatientName" in json_data: - patient_name = json_data["PatientName"] - else: - patient_name = "n/a" + # Find PatientBirthDate + if "PatientBirthDate" in json_data: + patient_birth_date = json_data["PatientBirthDate"].replace("-", "") + else: + patient_birth_date = "00000000" - # Find PatientBirthDate - if "PatientBirthDate" in json_data: - patient_birth_date = json_data["PatientBirthDate"].replace("-", "") - else: - patient_birth_date = "00000000" + # Assume patient_species is homo sapiens + patient_species = "homo sapiens" - # Assume patient_species is homo sapiens - patient_species = "homo sapiens" + # Find PatientSex + patient_sex = "n/a" + if "PatientSex" in json_data: + patient_sex = json_data["PatientSex"] - # Find PatientSex - patient_sex = "n/a" - if "PatientSex" in json_data: - if json_data["PatientSex"] in ["M", "F"]: - patient_sex = json_data["PatientSex"] + # Find PatientAge + if "PatientAge" in json_data: + patient_age = json_data["PatientAge"] + else: + patient_age = "n/a" - # Find PatientAge - if "PatientAge" in json_data: - patient_age = json_data["PatientAge"] - else: - patient_age = "n/a" + # Patient handedness + patient_handedness = "n/a" - # Patient handedness - patient_handedness = "n/a" + """ + Metadata may contain PatientBirthDate and/or PatientAge. Check either + to see if one truly provides accurate age information. + """ + age = "n/a" + if "PatientAge" in json_data: + patient_age = json_data["PatientAge"] + if (isinstance(patient_age, int) or isinstance(patient_age, float)): + age = patient_age + + if age == "n/a" and "PatientBirthDate" in json_data: + patient_birth_date = json_data["PatientBirthDate"] # ISO 8601 "YYYY-MM-DD" + try: + age = int(today_date.split("-")[0]) - int(patient_birth_date.split("-")[0]) + - ((int(today_date.split("-")[1]), int(today_date.split("-")[2])) + < (int(patient_birth_date.split("-")[2]), int(patient_birth_date.split("-")[2]))) + except: + pass - """ - Metadata may contain PatientBirthDate and/or PatientAge. Check either - to see if one truly provides accurate age information. - """ - age = "n/a" - if "PatientAge" in json_data: - patient_age = json_data["PatientAge"] - if (isinstance(patient_age, int) or isinstance(patient_age, float)): - age = patient_age - - if age == "n/a" and "PatientBirthDate" in json_data: - patient_birth_date = json_data["PatientBirthDate"] # ISO 8601 "YYYY-MM-DD" - try: - age = int(today_date.split("-")[0]) - int(patient_birth_date.split("-")[0]) - - ((int(today_date.split("-")[1]), int(today_date.split("-")[2])) - < (int(patient_birth_date.split("-")[2]), int(patient_birth_date.split("-")[2]))) - except: - pass + # Find AcquisitionDateTime + if "AcquisitionDateTime" in json_data: + acquisition_date_time = json_data["AcquisitionDateTime"] + else: + acquisition_date_time = "0000-00-00T00:00:00.000000" - # Find AcquisitionDateTime - if "AcquisitionDateTime" in json_data: - acquisition_date_time = json_data["AcquisitionDateTime"] - else: - acquisition_date_time = "0000-00-00T00:00:00.000000" + # Find AcquisitionDate + if "AcquisitionDate" in json_data: + acquisition_date = json_data["AcquisitionDate"] + else: + acquisition_date = "0000-00-00" - # Find AcquisitionDate - if "AcquisitionDate" in json_data: - acquisition_date = json_data["AcquisitionDate"] - else: - acquisition_date = "0000-00-00" + # Find AcquisitionTime + if "AcquisitionTime" in json_data: + acquisition_time = json_data["AcquisitionTime"] + else: + acquisition_time = acquisition_time = "00:00:00.000000" - # Find AcquisitionTime - if "AcquisitionTime" in json_data: - acquisition_time = json_data["AcquisitionTime"] - else: - acquisition_time = acquisition_time = "00:00:00.000000" + # Find TimeZero + if "TimeZero" in json_data and json_data.get("ScanStart", None) == 0: + acquisition_time = json_data["TimeZero"] - # Find TimeZero - if "TimeZero" in json_data and json_data.get("ScanStart", None) == 0: - acquisition_time = json_data["TimeZero"] + # Find Manufacturer metadata or make placehodler + if "Manufacturer" not in json_data: + manufacturer = "n/a" + json_data["Manufacturer"] = manufacturer + else: + manufacturer = json_data["Manufacturer"] - # Find Manufacturer metadata or make placehodler - if "Manufacturer" not in json_data: - manufacturer = "n/a" - json_data["Manufacturer"] = manufacturer - else: - manufacturer = json_data["Manufacturer"] + # Find RepetitionTime + if "RepetitionTime" in json_data: + repetition_time = json_data["RepetitionTime"] + else: + repetition_time = 0 - # Find RepetitionTime - if "RepetitionTime" in json_data: - repetition_time = json_data["RepetitionTime"] - else: - repetition_time = 0 + # Find EchoNumber + if "EchoNumber" in json_data: + echo_number = json_data["EchoNumber"] + else: + echo_number = None - # Find EchoNumber - if "EchoNumber" in json_data: - echo_number = json_data["EchoNumber"] - else: - echo_number = None + # Find EchoTime + if "EchoTime" in json_data: + echo_time = json_data["EchoTime"] * 1000 + else: + echo_time = 0 - # Find EchoTime - if "EchoTime" in json_data: - echo_time = json_data["EchoTime"] * 1000 - else: - echo_time = 0 + # Get the nibabel nifti image info + if img_file.endswith('.nii.gz'): + image = nib.load(img_file) + ndim = image.ndim - # Get the nibabel nifti image info - if json_file.endswith("_blood.json"): - image = "n/a" + # Is image valid (i.e. does image have an acceptable dtype) + # TODO - Can this be moved to function above? + if image.get_data_dtype() in [" 9: - sub_info_list_id = str(sub_info_list_id) - else: - sub_info_list_id = "0" + str(sub_info_list_id) - sub_info_list[-1]["Subject"] = sub_info_list_id + """ + Select subject (and session, if applicable) IDs to display. + """ + if patient_id == "n/a" and patient_name == "n/a" and patient_birth_date == "00000000": + # Completely anonymized data, assume folder is the subject ID + folder = [x for x in img_file.split("/") if not x.endswith('.nii.gz')][-1] + else: + folder = "n/a" + + sub_info = { + "PatientID": patient_id, + "PatientName": patient_name, + "PatientBirthDate": patient_birth_date, + "Folder": folder, + "Subject": sub_info_list_id, + } + sub_info_list.append(sub_info) - subject = sub_info["Subject"] + if len(sub_info_list) == 1: + pass + else: + if sub_info != sub_info_list[len(sub_info_list) - 2]: + sub_info_list_id = int(sub_info_list_id) + 1 + if sub_info_list_id > 9: + sub_info_list_id = str(sub_info_list_id) + else: + sub_info_list_id = "0" + str(sub_info_list_id) + sub_info_list[-1]["Subject"] = sub_info_list_id - # Check if subject (and session) ID(s) explicitly specified - sub_search_term = "sub-" - ses_search_term = "ses-" + subject = sub_info["Subject"] - for value in [json_file, patient_id, patient_name]: - if sub_search_term in value.lower(): - subject = re.split('[^a-zA-Z0-9]', value.lower().split(f"{sub_search_term}")[-1])[0] - break + # Check if subject (and session) ID(s) explicitly specified + sub_search_term = "sub-" + ses_search_term = "ses-" - session = "" - for value in [json_file, patient_id, patient_name]: - if ses_search_term in value.lower(): - session = re.split('[^a-zA-Z0-9]', value.lower().split(f"{ses_search_term}")[-1])[0] - break + for value in [img_file, patient_id, patient_name]: + if sub_search_term in value.lower(): + subject = re.split('[^a-zA-Z0-9]', value.lower().split(f"{sub_search_term}")[-1])[0] + break - # Remove non-alphanumeric characters from subject (and session) ID(s) - subject = re.sub("[^A-Za-z0-9]+", "", subject) - session = re.sub("[^A-Za-z0-9]+", "", session) + session = "" + for value in [img_file, patient_id, patient_name]: + if ses_search_term in value.lower(): + session = re.split('[^a-zA-Z0-9]', value.lower().split(f"{ses_search_term}")[-1])[0] + break - # Find NIfTI path - if json_file.endswith("blood.json"): - nifti_path = json_file.split(".json")[0] + ".tsv" - else: - if json_data["Modality"] == "MEG": - nifti_path = [x for x in nifti_paths_for_json if x.endswith(tuple(MEG_extensions))][0] - else: - nifti_path = [x for x in nifti_paths_for_json if x.endswith(".nii.gz")][0] + # Remove non-alphanumeric characters from subject (and session) ID(s) + subject = re.sub("[^A-Za-z0-9]+", "", subject) + session = re.sub("[^A-Za-z0-9]+", "", session) + + # # Find NIfTI path + # if json_file.endswith("blood.json"): + # nifti_path = json_file.split(".json")[0] + ".tsv" + # else: + # if json_data["Modality"] == "MEG": + # nifti_path = [x for x in corresponding_file_paths if x.endswith(tuple(MEG_extensions))][0] + # else: + # nifti_path = [x for x in corresponding_file_paths if x.endswith(".nii.gz")][0] + + # TODO - do we really need to do this now, since we get path above? + # # Find JSON path + # if img_file.endswith("blood.json"): + # json_file = img_file.split(".json")[0] + ".tsv" + # else: + # if json_data["Modality"] == "MEG": + # json_file = [x for x in corresponding_file_paths if x.endswith(tuple(MEG_extensions))][0] + # else: + # json_file = [x for x in corresponding_file_paths if x.endswith(".nii.gz")][0] - """ - Organize all from individual SeriesNumber in dictionary - """ - acquisition_info_directory = { - "StudyID": study_id, - "PatientID": patient_id, - "PatientName": patient_name, - "PatientBirthDate": patient_birth_date, - "PatientSpecies": patient_species, - "PatientSex": patient_sex, - "PatientAge": age, - "PatientHandedness": patient_handedness, - "subject": subject, - "session": session, - "SeriesNumber": series_number, - "ModifiedSeriesNumber": mod_series_number, - "AcquisitionDateTime": acquisition_date_time, - "AcquisitionDate": acquisition_date, - "AcquisitionTime": acquisition_time, - "SeriesDescription": series_description, - "ProtocolName": protocol_name, - "descriptor": descriptor, - "Modality": modality, - "ImageType": image_type, - "RepetitionTime": repetition_time, - "EchoNumber": echo_number, - "EchoTime": echo_time, - "datatype": "", - "suffix": "", - "subject_idx": 0, - "session_idx": 0, - "series_idx": 0, - "direction": ped, - "exclude": False, - "filesize": filesize, - "NumVolumes": volume_count, - "orientation": ornt, - "error": None, - "IntendedFor": None, - "B0FieldIdentifier": None, - "B0FieldSource": None, - "section_id": 1, - "message": None, - "type": data_type, - "nifti_path": nifti_path, - "nibabel_image": image, - "ndim": ndim, - "valid_image": valid_image, - "json_path": json_file, - "file_directory": "/".join([x for x in json_file.split("/") if ".json" not in x]), - "paths": paths, - "headers": "", - "finalized_match": False, - "sidecar": json_data - } - dataset_list.append(acquisition_info_directory) + """ + Organize all from individual SeriesNumber in dictionary + """ + sequence_info_directory = { + "StudyID": study_id, + "PatientID": patient_id, + "PatientName": patient_name, + "PatientBirthDate": patient_birth_date, + "PatientSpecies": patient_species, + "PatientSex": patient_sex, + "PatientAge": age, + "PatientHandedness": patient_handedness, + "subject": subject, + "session": session, + "SeriesNumber": series_number, + "ModifiedSeriesNumber": mod_series_number, + "AcquisitionDateTime": acquisition_date_time, + "AcquisitionDate": acquisition_date, + "AcquisitionTime": acquisition_time, + "SeriesDescription": series_description, + "ProtocolName": protocol_name, + "descriptor": descriptor, + "Modality": modality, + "ImageType": image_type, + "RepetitionTime": repetition_time, + "EchoNumber": echo_number, + "EchoTime": echo_time, + "datatype": "", + "suffix": "", + "subject_idx": 0, + "session_idx": 0, + "series_idx": 0, + "direction": ped, + "exclude": False, + "filesize": filesize, + "NumVolumes": volume_count, + "orientation": ornt, + "error": None, + "IntendedFor": None, + "B0FieldIdentifier": None, + "B0FieldSource": None, + "section_id": 1, + "message": None, + "type": data_type, + "nifti_path": img_file, + "nibabel_image": image, + "ndim": ndim, + "valid_image": valid_image, + "json_path": json_path, + "file_directory": "/".join([x for x in img_file.split("/") if not x.endswith('.nii.gz')]), + 'uploaded_config_file': config, + "paths": paths, + "headers": "", + "finalized_match": False, + "sidecar": json_data + } + print(sequence_info_directory) + dataset_list.append(sequence_info_directory) # Sort dataset_list of dictionaries dataset_list = sorted(dataset_list, key=itemgetter("AcquisitionDate", @@ -1591,7 +1590,7 @@ def template_configuration(dataset_list_unique_series, subjects_information, con information. config_file : string - Path to the ezBIDS configuration file, if config == True. Otherwise, set as empty string. + Path to the ezBIDS configuration file, if config is True. Otherwise, set as empty string. Returns ------- @@ -2599,37 +2598,6 @@ def entity_labels_identification(dataset_list_unique_series, lookup_dic): series_entities["task"] = "" # If BIDS naming convention isn't detected, do a more thorough check for certain entities labels - - # # Specific code for Nigerian dataset only (revert these changes once dataset is converted) - # if "ABDN" in unique_dic["json_path"]: - # sd = re.sub("[^A-Za-z0-9]+", "", sd).lower() - # if sd[-1] == "c": - # series_entities["ceagent"] = "gadolinium" - # if any(x in sd for x in ["axial", "ax", "axi"]): - # series_entities["acquisition"] = "axial" - # if any(x in sd for x in ["cor", "coronal"]): - # series_entities["acquisition"] = "coronal" - # if any(x in sd for x in ["sag", "sagittal"]): - # series_entities["acquisition"] = "sagittal" - - # if ("t1" in sd or "t1w" in sd) and "gre" not in sd: - # unique_dic["datatype"] = "anat" - # unique_dic["suffix"] = "T1w" - # unique_dic["type"] = "anat/T1w" - # if ("t2" in sd or "t2w" in sd) and "gre" not in sd: - # if "flair" in sd: - # unique_dic["datatype"] = "anat" - # unique_dic["suffix"] = "FLAIR" - # unique_dic["type"] = "anat/FLAIR" - # else: - # unique_dic["datatype"] = "anat" - # unique_dic["suffix"] = "T2w" - # unique_dic["type"] = "anat/T2w" - # if "oblcor" in sd: - # unique_dic["type"] = "exclude" - # if "array" in sd: - # unique_dic["type"] = "exclude" - # task func_rest_keys = ["rest", "rsfmri", "fcmri"] if any(x in re.sub("[^A-Za-z0-9]+", "", sd).lower() for x in func_rest_keys) and not series_entities["task"]: @@ -2650,7 +2618,7 @@ def entity_labels_identification(dataset_list_unique_series, lookup_dic): or series_entities["subject"] == "emptyroom"): # for MEG data series_entities["task"] = "noise" - # dir (required for fmap/epi and highly recommended for dwi/dwi) + # dir (short for direction; required for fmap/epi and highly recommended for dwi/dwi) if any(x in unique_dic["type"] for x in ["fmap/epi", "dwi/dwi"]): series_entities["direction"] = unique_dic["direction"] diff --git a/handler/preprocess.sh b/handler/preprocess.sh index d961690b..0b78b732 100755 --- a/handler/preprocess.sh +++ b/handler/preprocess.sh @@ -204,6 +204,7 @@ else echo "searching for products in $root" (cd $root && find . -maxdepth 9 -type f \( -name "*.nii.gz" \) > list) + # TODO: MOVE THIS INTO find_img_data.py # find MEG data files MEG_extensions=("*.ds" "*.fif" "*.sqd" "*.con" "*.raw" "*.ave" "*.mrk" "*.kdf" "*.mhd" "*.trg" "*.chn" "*.dat") for ext in ${MEG_extensions[*]}