generated from PennLINC/paper-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
461 additions
and
1,509 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,8 @@ | ||
{ | ||
"python.linting.flake8Enabled": true, | ||
"python.linting.enabled": true | ||
"[python]": { | ||
"editor.rulers": [99] | ||
}, | ||
"flake8.args": [ | ||
"--max-line-length=100" | ||
], | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
# Flywheel project name | ||
project="bbl/MEBOLD" | ||
|
||
# List any subjects you want to download here | ||
subjects="ID1 ID2" | ||
|
||
# Include a path to your flywheel API token here | ||
token=$(</cbica/home/salot/tokens/flywheel.txt) | ||
fw login "$token" | ||
|
||
# Navigate to the folder to which you want to download the data | ||
cd /cbica/projects/mebold/sourcedata || exit | ||
|
||
for subject in $subjects; do | ||
fw download --yes --zip "fw://${project}/${subject}" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
"""Expand dicom zip files in order to run heudiconv.""" | ||
|
||
import os | ||
import zipfile | ||
from glob import glob | ||
|
||
if __name__ == "__main__": | ||
zip_files = sorted(glob("/cbica/projects/mebold/sourcedata/*_*/*/*/*.dicom.zip")) | ||
for zip_file in zip_files: | ||
with zipfile.ZipFile(zip_file, "r") as zip_ref: | ||
zip_ref.extractall(os.path.dirname(zip_file)) | ||
|
||
os.remove(zip_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
# Loop over subjects and run heudiconv on each. | ||
# Make sure to activate the conda environment with heudiconv installed before running this. | ||
|
||
declare -a subjects=("ID1" "ID2") | ||
for sub in "${subjects[@]}" | ||
do | ||
echo "$sub" | ||
heudiconv \ | ||
-f reproin \ | ||
-o /cbica/projects/mebold/dset \ | ||
-d "/cbica/projects/mebold/sourcedata/{subject}_{session}/*/*/*/*.dcm" \ | ||
-s "$sub" \ | ||
-ss 1 \ | ||
--bids | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
"""Fix BIDS files after heudiconv conversion. | ||
This script should deal with steps 1-6 below. | ||
The necessary steps are: | ||
1. Deal with duplicates. | ||
2. Rename multi-echo magnitude BOLD files to part-mag_bold. | ||
3. Rename phase files to part-phase_bold. | ||
4. Split out noRF noise scans from multi-echo BOLD scans. | ||
- Also copy the JSON. | ||
5. Copy first echo of each multi-echo field map without echo entity. | ||
6. Update filenames in the scans.tsv files. | ||
7. Remove events files. | ||
""" | ||
|
||
import os | ||
import shutil | ||
from glob import glob | ||
|
||
import nibabel as nb | ||
import pandas as pd | ||
|
||
# Number of EPI noise scans to split out of the BOLD scans. | ||
N_NOISE_VOLS = 3 | ||
|
||
FULL_RUN_LENGTHS = (240, 204, 200) | ||
|
||
|
||
if __name__ == "__main__": | ||
dset_dir = "/cbica/projects/mebold/dset/" | ||
subject_dirs = sorted(glob(os.path.join(dset_dir, "sub-*"))) | ||
for subject_dir in subject_dirs: | ||
sub_id = os.path.basename(subject_dir) | ||
session_dirs = sorted(glob(os.path.join(subject_dir, "ses-*"))) | ||
for session_dir in session_dirs: | ||
ses_id = os.path.basename(session_dir) | ||
anat_dir = os.path.join(session_dir, "anat") | ||
fmap_dir = os.path.join(session_dir, "fmap") | ||
func_dir = os.path.join(session_dir, "func") | ||
|
||
# Remove empty events files created by heudiconv | ||
events_files = sorted(glob(os.path.join(func_dir, "*_events.tsv"))) | ||
for events_file in events_files: | ||
os.remove(events_file) | ||
|
||
# Load scans file | ||
scans_file = os.path.join(session_dir, f"{sub_id}_{ses_id}_scans.tsv") | ||
assert os.path.isfile(scans_file), f"Scans file DNE: {scans_file}" | ||
scans_df = pd.read_table(scans_file) | ||
|
||
# Heudiconv's reproin heuristic currently (as of v1.2.0) names magnitude and phase | ||
# files as _bold and _phase, respectively. | ||
# The better way to do it is to call them part-mag_bold and part-phase_bold. | ||
mag_files = sorted(glob(os.path.join(func_dir, "*echo-*_bold.*"))) | ||
for mag_file in mag_files: | ||
if "part-" in mag_file: | ||
print(f"Skipping {mag_file}") | ||
continue | ||
|
||
new_mag_file = mag_file.replace("_bold.", "_part-mag_bold.") | ||
os.rename(mag_file, new_mag_file) | ||
|
||
mag_filename = os.path.join("func", os.path.basename(mag_file)) | ||
new_mag_filename = os.path.join("func", os.path.basename(new_mag_file)) | ||
|
||
# Replace the filename in the scans.tsv file | ||
scans_df = scans_df.replace({"filename": {mag_filename: new_mag_filename}}) | ||
|
||
# Rename phase files from _phase to _part-phase_bold. | ||
phase_files = sorted(glob(os.path.join(func_dir, "*_phase.*"))) | ||
for phase_file in phase_files: | ||
new_phase_file = phase_file.replace("_phase.", "_part-phase_bold.") | ||
os.rename(phase_file, new_phase_file) | ||
|
||
phase_filename = os.path.join("func", os.path.basename(phase_file)) | ||
new_phase_filename = os.path.join("func", os.path.basename(new_phase_file)) | ||
|
||
# Replace the filename in the scans.tsv file | ||
scans_df = scans_df.replace({"filename": {phase_filename: new_phase_filename}}) | ||
|
||
# Split out noise scans from all multi-echo BOLD files. | ||
# There is no metadata to distinguish noise scans from BOLD scans, | ||
# so we need to hardcode the number of noise scans to split out. | ||
# In order to handle partial scans where the last N volumes aren't noise scans, | ||
# we also need to hardcode valid scan lengths. | ||
me_bolds = sorted(glob(os.path.join(func_dir, "*acq-MBME*_bold.nii.gz"))) | ||
for me_bold in me_bolds: | ||
noise_scan = me_bold.replace("_bold.nii.gz", "_noRF.nii.gz") | ||
if os.path.isfile(noise_scan): | ||
print(f"File exists: {os.path.basename(noise_scan)}") | ||
continue | ||
|
||
img = nb.load(me_bold) | ||
n_vols = img.shape[-1] | ||
if n_vols not in FULL_RUN_LENGTHS: | ||
print(f"File is a partial scan: {os.path.basename(me_bold)}") | ||
continue | ||
|
||
noise_img = img.slicer[..., -N_NOISE_VOLS:] | ||
bold_img = img.slicer[..., :-N_NOISE_VOLS] | ||
|
||
# Overwrite the BOLD scan | ||
os.remove(me_bold) | ||
bold_img.to_filename(me_bold) | ||
noise_img.to_filename(noise_scan) | ||
|
||
# Copy the JSON as well | ||
shutil.copyfile( | ||
me_bold.replace(".nii.gz", ".json"), | ||
noise_scan.replace(".nii.gz", ".json"), | ||
) | ||
|
||
# Add noise scans to scans DataFrame | ||
i_row = len(scans_df.index) | ||
me_bold_fname = os.path.join("func", os.path.basename(me_bold)) | ||
noise_fname = os.path.join("func", os.path.basename(noise_scan)) | ||
scans_df.loc[i_row] = scans_df.loc[scans_df["filename"] == me_bold_fname].iloc[0] | ||
scans_df.loc[i_row, "filename"] = noise_fname | ||
|
||
# In this protocol, we have multi-echo field maps. | ||
# In practice, multi-echo field maps aren't useful, so we just grab the first echo's | ||
# data and label it as a single-echo field map. | ||
# Copy first echo's sbref of multi-echo field maps without echo entity. | ||
me_fmaps = sorted(glob(os.path.join(fmap_dir, "*_acq-ME*_echo-1_sbref.*"))) | ||
for me_fmap in me_fmaps: | ||
out_fmap = me_fmap.replace("_echo-1_", "_").replace("_sbref", "_epi") | ||
if os.path.isfile(out_fmap): | ||
print(f"File exists: {os.path.basename(out_fmap)}") | ||
continue | ||
|
||
me_fmap_fname = os.path.join("fmap", os.path.basename(me_fmap)) | ||
out_fmap_fname = os.path.join("fmap", os.path.basename(out_fmap)) | ||
shutil.copyfile(me_fmap, out_fmap) | ||
if me_fmap.endswith(".nii.gz"): | ||
i_row = len(scans_df.index) | ||
scans_df.loc[i_row] = scans_df.loc[ | ||
scans_df["filename"] == me_fmap_fname | ||
].iloc[0] | ||
scans_df.loc[i_row, "filename"] = out_fmap_fname | ||
|
||
# Save out the modified scans.tsv file. | ||
scans_df = scans_df.sort_values(by=["acq_time", "filename"]) | ||
os.remove(scans_file) | ||
scans_df.to_csv(scans_file, sep="\t", na_rep="n/a", index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
# Reface T1w images using afni_refacer_run. | ||
module load afni/2022_05_03 | ||
|
||
t1w_files=$(find /cbica/projects/mebold/dset/sub-*/ses-*/anat/*T1w.nii.gz) | ||
for t1w_file in $t1w_files | ||
do | ||
echo "$t1w_file" | ||
@afni_refacer_run \ | ||
-input "${t1w_file}" \ | ||
-mode_reface \ | ||
-no_images \ | ||
-overwrite \ | ||
-prefix "${t1w_file}" | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
"""Anonymize acquisition datetimes for a dataset. | ||
Anonymize acquisition datetimes for a dataset. Works for both longitudinal | ||
and cross-sectional studies. The time of day is preserved, but the first | ||
scan is set to January 1st, 1800. In a longitudinal study, each session is | ||
anonymized relative to the first session, so that time between sessions is | ||
preserved. | ||
Overwrites scan tsv files in dataset. Only run this *after* data collection | ||
is complete for the study, especially if it's longitudinal. | ||
""" | ||
|
||
import os | ||
from glob import glob | ||
|
||
import pandas as pd | ||
from dateutil import parser | ||
|
||
if __name__ == "__main__": | ||
dset_dir = "/cbica/projects/mebold/dset" | ||
|
||
bl_dt = parser.parse("1800-01-01") | ||
|
||
subject_dirs = sorted(glob(os.path.join(dset_dir, "sub-*"))) | ||
for subject_dir in subject_dirs: | ||
sub_id = os.path.basename(subject_dir) | ||
print(f"Processing {sub_id}") | ||
|
||
scans_files = sorted(glob(os.path.join(subject_dir, "ses-*/*_scans.tsv"))) | ||
|
||
for i_ses, scans_file in enumerate(scans_files): | ||
ses_dir = os.path.dirname(scans_file) | ||
ses_name = os.path.basename(ses_dir) | ||
print(f"\t{ses_name}") | ||
|
||
df = pd.read_table(scans_file) | ||
if i_ses == 0: | ||
# Anonymize in terms of first scan for subject. | ||
first_scan = df["acq_time"].min() | ||
first_dt = parser.parse(first_scan.split("T")[0]) | ||
diff = first_dt - bl_dt | ||
|
||
acq_times = df["acq_time"].apply(parser.parse) | ||
acq_times = (acq_times - diff).astype(str) | ||
df["acq_time"] = acq_times | ||
df["acq_time"] = df["acq_time"].str.replace(" ", "T") | ||
|
||
os.remove(scans_file) | ||
df.to_csv( | ||
scans_file, | ||
sep="\t", | ||
lineterminator="\n", | ||
na_rep="n/a", | ||
index=False, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
"""Remove unneeded fields from bottom-level JSON files.""" | ||
|
||
import json | ||
import os | ||
from glob import glob | ||
|
||
if __name__ == "__main__": | ||
dset_dir = "/cbica/projects/mebold/dset/" | ||
drop_keys = [ | ||
"AcquisitionTime", | ||
"CogAtlasID", | ||
"InstitutionAddress", | ||
"TaskName", | ||
"ImageComments", | ||
] | ||
|
||
json_files = sorted(glob(os.path.join(dset_dir, "sub-*/ses-*/*/*.json"))) | ||
for json_file in json_files: | ||
with open(json_file, "r") as fo: | ||
json_data = json.load(fo) | ||
|
||
for drop_key in drop_keys: | ||
if drop_key in json_data.keys(): | ||
json_data.pop(drop_key) | ||
|
||
os.remove(json_file) | ||
with open(json_file, "w") as fo: | ||
json.dump(json_data, fo, indent=4, sort_keys=True) |
Oops, something went wrong.