Skip to content

Commit

Permalink
check for new filenames before merge/subset
Browse files Browse the repository at this point in the history
python logic written in separate library
- looks for lines in datatype sheet which duplicate a previous datatype (new)
- ignore normal sample files (which are ignored by importer)
perform check from subset-impact-data.sh and merge.py and exit on noticing new filename patterns.
  • Loading branch information
sheridancbio committed Jul 19, 2021
1 parent 551a275 commit 2e47458
Show file tree
Hide file tree
Showing 3 changed files with 567 additions and 0 deletions.
13 changes: 13 additions & 0 deletions import-scripts/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import shutil
import re
import csv
import updated_filename_for_datatype_test

# ------------------------------------------------------------------------------
# globals
Expand Down Expand Up @@ -814,6 +815,15 @@ def organize_files(studies, file_types, merge_clinical):
else:
file_types[SUPP_DATA].append(study_file)

def exit_if_unsupported_filenames_detected(study_paths):
unsupported_filenames_detected = False
for study_path in study_paths:
if updated_filename_for_datatype_test.directory_uses_updated_filenames(study_path):
print("unsupported filenames present in study directory '%s' - unable to merge" % study_path)
unsupported_filenames_detected = True
if unsupported_filenames_detected:
sys.exit(2)

def usage():
print >> OUTPUT_FILE, 'merge.py --subset [/path/to/subset] --output-directory [/path/to/output] --study-id [study id] --cancer-type [cancer type] --merge-clinical [true/false] --exclude-supplemental-data [true/false] --excluded-samples [/path/to/exclude_list] <path/to/study path/to/study ...>'

Expand Down Expand Up @@ -919,6 +929,9 @@ def main():
else:
exclude_supp_data = True

# test input study paths for new filenames
exit_if_unsupported_filenames_detected(args)

# get all the filenames
organize_files(args, file_types, merge_clinical)

Expand Down
20 changes: 20 additions & 0 deletions import-scripts/subset-impact-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,26 @@ if [ -z $PORTAL_SCRIPTS_DIRECTORY ]; then
fi
echo -e "\tPORTAL_SCRIPTS_DIRECTORY="$PORTAL_SCRIPTS_DIRECTORY

# check for presence of new filename pattens (not yet supported) and fail if present

PYTHON_FILENAME_CHECK_CALL="import sys
import updated_filename_for_datatype_test
sys.exit(updated_filename_for_datatype_test.directory_uses_updated_filenames(\"$INPUT_DIRECTORY\"))
"
if ! python -c "$PYTHON_FILENAME_CHECK_CALL" ; then
echo "unsupported filenames present - skipping subset" >&2
exit 1
fi

PYTHON_FILENAME_CHECK_CALL="import sys
import updated_filename_for_datatype_test
sys.exit(updated_filename_for_datatype_test.directory_uses_updated_filenames(\"$INPUT_DIRECTORY\"))
"
if ! python -c "$PYTHON_FILENAME_CHECK_CALL" ; then
echo "unsupported filenames present - skipping subset" >&2
exit 1
fi

# status flags
GEN_SUBSET_LIST_FAILURE=0
MERGE_SCRIPT_FAILURE=0
Expand Down
Loading

0 comments on commit 2e47458

Please sign in to comment.