Skip to content

Commit

Permalink
include environment arg; use json keys; update scans.tsv
Browse files Browse the repository at this point in the history
  • Loading branch information
jungheejung committed Mar 25, 2022
1 parent e53be56 commit f035a3c
Showing 1 changed file with 34 additions and 49 deletions.
83 changes: 34 additions & 49 deletions bids/remove_bids_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,74 +12,59 @@
import os, time, sys, glob
from os.path import join
import json
from pathlib import Path
import pandas as pd

# example file dir
# /dartfs-hpc/rc/lab/C/CANlab/labdata/data/spacetop/dartmouth/sub-0085/ses-01/func/sub-0085_ses-01_task-alignvideo_acq-mb8_run-01_bold__dup-01.nii.gz'
# {main_dir}/{sub}/{ses}/func

fpattern = sys.argv[1]
bids_dir = sys.argv[2] # reason for this: script is in preprocessing git repo. BIDS repo is in a totally different directory, not parent or child.
# 1. get list of dup names _____________________________________________________________
# TODO: remove all 4 types of files - See if you can remove and datalad save within this script.
# 1) remove bold__dup-01.json
# 2) remove bold__dup-01.nii.gz
# 3) remove sbref__dup-01.json
# 4) remove sbref__dup-01.nii.gz
main_dir = '/dartfs-hpc/rc/lab/C/CANlab/labdata/data/spacetop/dartmouth'
save_dir = '/dartfs-hpc/rc/lab/C/CANlab/labdata/data/spacetop/scripts/spacetop_log'
dup_pattern = 'sub-*/ses-*/func/sub-*_ses-*_task-*_acq-mb8_run-*_bold__dup*.nii.gz'
current_dir = os.getcwd()
main_dir = Path(current_dir).parents[1]
save_dir = join(main_dir, 'log')

dup_glob = glob.glob(join(main_dir, dup_pattern))
dup_glob = glob.glob(join(bids_dir, fpattern))
flaglist = []
flaglist.append(f"This file keeps track of dup file and fieldmap mismatches.\nTwo erroneous cases:\n 1) IntendedFor field does not exist from the get go [IntendedFor X] \n 2) IntendedFor field exists. However, Duplicate file does not exist within this key [IntendedFor O; Dup X]")
for ind, dup_fpath in enumerate(dup_glob):

dup_fname = os.path.basename(dup_fpath)
sub = [match for match in dup_fname.split('_') if "sub" in match][0]
ses = [match for match in dup_fname.split('_') if "ses" in match][0]
run = [match for match in dup_fname.split('_') if "run" in match][0]
task = [match for match in dup_fname.split('_') if "task" in match][0]
fmap_dir = os.path.join(Path( os.path.dirname(dup_fpath)).parents[0], 'fmap')
fmap_glob = '*-run-'
dup_fname = os.path.basename(dup_fpath)
# TODO: make it flexible to handle DWI and T1 .jsonl.
# currently, we're just going to feed in epi images and update epi.jsons.
# dictionary:
# - DWI: acq-96dirX6b0Xmb
# - BOLD: acq-mb8
# 2. open fieldmap .json with corresponding .dup files _____________________________________________________________
fmap_glob = glob.glob(join(main_dir, f'{sub}/{ses}/fmap/{sub}_{ses}_acq-mb8_dir-*_epi.json'))
for fmap_ind, fmap_fname in enumerate(fmap_glob):
for fmap_ind, fmap_fname in enumerate(join(fmap_dir, '*')):
sidecar = load_json(fmap_fname)
# 2-1. check if "IntendedFor" field exists within json
if 'IntendedFor' in sidecar:
if key_field:
copy_list = f['IntendedFor']
copy_list = sidecar['IntendedFor']
print(copy_list)
# 2-2. find "IntendedFor" field and if dup_fname exists, pop item
dup_index = [i for i, s in enumerate(copy_list) if dup_fname in s]
if dup_index:
copy_list.pop(dup_index[0])
f['IntendedFor'] = copy_list
sidecar['IntendedFor'] = copy_list
print(f"removed {dup_fname} from list")
save_json(fmap_fname, sidecar)
# 3. remove files __________________________________________
os.remove(dup_fpath)
# TODO: remove all 4 types of files - See if you can remove and datalad save within this script.
# 1) remove bold__dup-01.json
# 2) remove bold__dup-01.nii.gz
# 3) remove sbref__dup-01.json
# 4) remove sbref__dup-01.nii.gz

save_json(fmap_fname, f)
else:
flag_msg1 = f"Intended For field exists - dup filename does not exist : {dup_fname}"
flaglist.append(flag_msg1)

elif not key_field:
flag_msg2 = f"IntendedFor field does not exist - {fmap_fname}"
print(flag_msg2)
flaglist.append(flag_msg2)

# 3. save filenames with missing intendedfor fields or missing dup filenames __________________________________________
# 4. update scans.tsv by removing the dup_fname from entire row
tsv_fname = glob.glob(join(os.path.dirname(dup_fpath)).parents[0], '*_scans.tsv')[0]
df = pd.read_csv(tsv_fname, sep = '\t')
drop_df = df[ df[ 'filename' ].str.contains(dup_fname )==False ]
drop_df.to_csv(tsv_fname)
# 5. save filenames with missing intendedfor fields or missing dup filenames __________________________________________
txt_filename = os.path.join(save_dir, 'dup_flaglist.txt')
with open(txt_filename, 'w') as f:
f.write(json.dumps(flaglist))




# TODO: delete later. clean up anything below __________________________________________
# 3-3. if the file is at the end of the list, deleted
# if dup_index[0] == len(copy_list)-1:
# copy_list.pop(-1)
# print(f"removed {dup_fname} from list")
# f['IntendedFor'] = copy_list
# 3-4. if the file is in the middle of the list, simply delete
# elif dup_index[0] < len(copy_list)-1:

# conda activate spacetop_env (within shell script)
# /dartfs-hpc/rc/lab/C/CANlab/labdata/data/spacetop/dartmouth/sub-0009/ses-01/fmap/sub-0009_ses-01_acq-mb8_dir-ap_run-01_epi.json
# /dartfs-hpc/rc/lab/C/CANlab/labdata/data/spacetop/dartmouth/sub-0085/ses-01/fmap/sub-0085_ses-01_acq-mb8_dir-ap_run-01_epi.json
f.write(json.dumps(flaglist))

0 comments on commit f035a3c

Please sign in to comment.