Skip to content

Commit

Permalink
Merge pull request #30 from alexpron/fix-wildcards
Browse files Browse the repository at this point in the history
[BF]: escape special characters in solr search
quentinduche authored Nov 22, 2024
2 parents f05750d + 364b005 commit e49f4c3
Showing 1 changed file with 87 additions and 40 deletions.
127 changes: 87 additions & 40 deletions shanoir2bids.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@

import os
from os.path import join as opj, splitext as ops, exists as ope, dirname as opd
import re
from glob import glob
import sys
from pathlib import Path
@@ -179,9 +180,9 @@ def read_json_config_file(json_file):


def generate_bids_heuristic_file(
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
) -> None:
"""Generate heudiconv heuristic.py file from shanoir2bids mapping dict
Parameters
@@ -427,31 +428,55 @@ def is_mapping_bids(self):
for i, far in enumerate(list_find_and_replace):
if i == 0:
normalised_subject = subject
normalised_subject = normalised_subject.replace(far["find"], far["replace"])
normalised_subject = normalised_subject.replace(
far["find"], far["replace"]
)
normalised_subjects.append(normalised_subject)
else:
normalised_subjects = subjects

sessions = list(set([d['bidsSession'] for d in self.shanoir2bids_dict if 'bidsSession' in d]))
extension = '.nii.gz'
sessions = list(
set(
[d["bidsSession"] for d in self.shanoir2bids_dict if "bidsSession" in d]
)
)
extension = ".nii.gz"

if not sessions:
paths = (
"/" + "sub-" + subject + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' +
map["bidsName"] + extension
"/"
+ "sub-"
+ subject
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
else:
paths = (
"/" + "sub-" + subject + '/' +
"ses-" + map['bidsSession'] + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' + "ses-" + map['bidsSession'] + '_' +
map["bidsName"] + extension

"/"
+ "sub-"
+ subject
+ "/"
+ "ses-"
+ map["bidsSession"]
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ "ses-"
+ map["bidsSession"]
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
@@ -493,7 +518,6 @@ def download_subject(self, subject_to_search):
for far in self.list_fars:
bids_subject_id.replace(far[K_FIND], far[K_REPLACE])


bids_seq_session = None

# Loop on each sequence defined in the dictionary
@@ -523,23 +547,44 @@ def download_subject(self, subject_to_search):
"[" + str(seq + 1) + "/" + str(self.n_seq) + "]",
)

request_terms = [
self.shanoir_study_id,
shanoir_seq_name,
subject_to_search,
self.shanoir_session_id,
self.date_from,
self.date_to,
]

def escape_solr_special_characters(s):
# List of Solr special characters
special_characters = r'\+\-\!\(\)\{\}\[\]\^"~\?:\\'
# remove \* from special characters to be able to use wildcards in solr
# Add more if needed
escape_pattern = re.compile(r'([{}])'.format(special_characters))
return escape_pattern.sub(r'\\\1', s)

escaped_request_terms = {s: escape_solr_special_characters(s) for s in request_terms}

# Initialize the parser
search_txt = (
"studyName:"
+ self.shanoir_study_id.replace(" ", "?")
+ " AND datasetName:"
+ shanoir_seq_name.replace(" ", "?")
+ " AND subjectName:"
+ subject_to_search.replace(" ", "?")
+ " AND examinationComment:"
+ self.shanoir_session_id.replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
"studyName:"
+ escaped_request_terms[self.shanoir_study_id].replace(" ", "?")
+ " AND datasetName:"
+ escaped_request_terms[shanoir_seq_name].replace(" ", "?")
+ " AND subjectName:"
+ escaped_request_terms[subject_to_search].replace(" ", "?")
+ " AND examinationComment:"
+ escaped_request_terms[self.shanoir_session_id].replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
)

print(search_txt)

args = self.parser.parse_args(
[
"-u",
@@ -647,19 +692,19 @@ def download_subject(self, subject_to_search):

# Launch DICOM to BIDS conversion using heudiconv + heuristic file + dcm2niix options
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
) as heuristic_file:
# Generate Heudiconv heuristic file from configuration.json mapping
generate_bids_heuristic_file(
bids_mapping, heuristic_file.name, output_type=self.output_file_type
)
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
) as dcm2niix_config_file:
self.export_dcm2niix_config_options(dcm2niix_config_file.name)
workflow_params = {
"files": glob(opj(tmp_dicom, "*", "*.dcm"), recursive=True),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(' ', ''),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(" ", ""),
"subjs": [bids_subject_id],
"converter": "dcm2niix",
"heuristic": heuristic_file.name,
@@ -678,12 +723,13 @@ def download_subject(self, subject_to_search):
try:
workflow(**workflow_params)
except AssertionError:
error = (f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n ")
error = (
f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n "
)
print(error)
fp.write(error)
finally:

if not self.debug_mode:
shutil.rmtree(tmp_archive, ignore_errors=True)
shutil.rmtree(tmp_dicom, ignore_errors=True)
@@ -708,9 +754,9 @@ def download(self):
dur_min = int((time() - t_start_subject) // 60)
dur_sec = int((time() - t_start_subject) % 60)
end_msg = (
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
)
banner_msg(end_msg)
else:
@@ -784,7 +830,8 @@ def main():
)
else:
if not stb.is_mapping_bids()[0]:
print(f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
print(
f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
)
stb.download()

0 comments on commit e49f4c3

Please sign in to comment.