Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BF]: escape special characters in solr search #30

Merged
merged 1 commit into from
Nov 22, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 87 additions & 40 deletions shanoir2bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import os
from os.path import join as opj, splitext as ops, exists as ope, dirname as opd
import re
from glob import glob
import sys
from pathlib import Path
Expand Down Expand Up @@ -179,9 +180,9 @@ def read_json_config_file(json_file):


def generate_bids_heuristic_file(
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
shanoir2bids_dict,
path_heuristic_file,
output_type='("dicom","nii.gz")',
) -> None:
"""Generate heudiconv heuristic.py file from shanoir2bids mapping dict
Parameters
Expand Down Expand Up @@ -427,31 +428,55 @@ def is_mapping_bids(self):
for i, far in enumerate(list_find_and_replace):
if i == 0:
normalised_subject = subject
normalised_subject = normalised_subject.replace(far["find"], far["replace"])
normalised_subject = normalised_subject.replace(
far["find"], far["replace"]
)
normalised_subjects.append(normalised_subject)
else:
normalised_subjects = subjects

sessions = list(set([d['bidsSession'] for d in self.shanoir2bids_dict if 'bidsSession' in d]))
extension = '.nii.gz'
sessions = list(
set(
[d["bidsSession"] for d in self.shanoir2bids_dict if "bidsSession" in d]
)
)
extension = ".nii.gz"

if not sessions:
paths = (
"/" + "sub-" + subject + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' +
map["bidsName"] + extension
"/"
+ "sub-"
+ subject
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
else:
paths = (
"/" + "sub-" + subject + '/' +
"ses-" + map['bidsSession'] + '/' +
map["bidsDir"] + '/' +
"sub-" + subject + '_' + "ses-" + map['bidsSession'] + '_' +
map["bidsName"] + extension

"/"
+ "sub-"
+ subject
+ "/"
+ "ses-"
+ map["bidsSession"]
+ "/"
+ map["bidsDir"]
+ "/"
+ "sub-"
+ subject
+ "_"
+ "ses-"
+ map["bidsSession"]
+ "_"
+ map["bidsName"]
+ extension
for subject in normalised_subjects
for map in self.shanoir2bids_dict
)
Expand Down Expand Up @@ -493,7 +518,6 @@ def download_subject(self, subject_to_search):
for far in self.list_fars:
bids_subject_id.replace(far[K_FIND], far[K_REPLACE])


bids_seq_session = None

# Loop on each sequence defined in the dictionary
Expand Down Expand Up @@ -523,23 +547,44 @@ def download_subject(self, subject_to_search):
"[" + str(seq + 1) + "/" + str(self.n_seq) + "]",
)

request_terms = [
self.shanoir_study_id,
shanoir_seq_name,
subject_to_search,
self.shanoir_session_id,
self.date_from,
self.date_to,
]

def escape_solr_special_characters(s):
# List of Solr special characters
special_characters = r'\+\-\!\(\)\{\}\[\]\^"~\?:\\'
# remove \* from special characters to be able to use wildcards in solr
# Add more if needed
escape_pattern = re.compile(r'([{}])'.format(special_characters))
return escape_pattern.sub(r'\\\1', s)

escaped_request_terms = {s: escape_solr_special_characters(s) for s in request_terms}

# Initialize the parser
search_txt = (
"studyName:"
+ self.shanoir_study_id.replace(" ", "?")
+ " AND datasetName:"
+ shanoir_seq_name.replace(" ", "?")
+ " AND subjectName:"
+ subject_to_search.replace(" ", "?")
+ " AND examinationComment:"
+ self.shanoir_session_id.replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
"studyName:"
+ escaped_request_terms[self.shanoir_study_id].replace(" ", "?")
+ " AND datasetName:"
+ escaped_request_terms[shanoir_seq_name].replace(" ", "?")
+ " AND subjectName:"
+ escaped_request_terms[subject_to_search].replace(" ", "?")
+ " AND examinationComment:"
+ escaped_request_terms[self.shanoir_session_id].replace(" ", "*")
+ " AND examinationDate:["
+ self.date_from
+ " TO "
+ self.date_to
+ "]"
)

print(search_txt)

args = self.parser.parse_args(
[
"-u",
Expand Down Expand Up @@ -647,19 +692,19 @@ def download_subject(self, subject_to_search):

# Launch DICOM to BIDS conversion using heudiconv + heuristic file + dcm2niix options
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".py"
) as heuristic_file:
# Generate Heudiconv heuristic file from configuration.json mapping
generate_bids_heuristic_file(
bids_mapping, heuristic_file.name, output_type=self.output_file_type
)
with tempfile.NamedTemporaryFile(
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
mode="r+", encoding="utf-8", dir=self.dl_dir, suffix=".json"
) as dcm2niix_config_file:
self.export_dcm2niix_config_options(dcm2niix_config_file.name)
workflow_params = {
"files": glob(opj(tmp_dicom, "*", "*.dcm"), recursive=True),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(' ', ''),
"outdir": opj(self.dl_dir, self.shanoir_study_id).replace(" ", ""),
"subjs": [bids_subject_id],
"converter": "dcm2niix",
"heuristic": heuristic_file.name,
Expand All @@ -678,12 +723,13 @@ def download_subject(self, subject_to_search):
try:
workflow(**workflow_params)
except AssertionError:
error = (f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n ")
error = (
f" \n >> WARNING : No DICOM file available for conversion for subject {subject_to_search} \n "
f"If some datasets are to be downloaded check log file and your configuration file syntax \n "
)
print(error)
fp.write(error)
finally:

if not self.debug_mode:
shutil.rmtree(tmp_archive, ignore_errors=True)
shutil.rmtree(tmp_dicom, ignore_errors=True)
Expand All @@ -708,9 +754,9 @@ def download(self):
dur_min = int((time() - t_start_subject) // 60)
dur_sec = int((time() - t_start_subject) % 60)
end_msg = (
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
"Downloaded dataset for subject "
+ subject_to_search
+ " in {}m{}s".format(dur_min, dur_sec)
)
banner_msg(end_msg)
else:
Expand Down Expand Up @@ -784,7 +830,8 @@ def main():
)
else:
if not stb.is_mapping_bids()[0]:
print(f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
print(
f" WARNING !: Provided BIDS keys {stb.is_mapping_bids()[1]} are not BIDS compliant check syntax in provided configuration file {args.config_file}"
)
stb.download()

Expand Down