Skip to content

Commit

Permalink
check for new filenames before merge/subset
Browse files Browse the repository at this point in the history
python logic written in separate library
- looks for any filenames present in the current datatype sheet (embedded in script)
- excludes any filenames which were in use before the introduction of updated filenames
- ignore normal sample files (which are ignored by importer)
- determines cancer study id for proper handling of <CANCER_STUDY> placeholder
- matches filenames in directory against wildcard patterns (up to a single asterisk)
perform check from subset-impact-data.sh and merge.py and exit on noticing new filename patterns.
  • Loading branch information
sheridancbio committed Jul 22, 2021
1 parent 2ae12e9 commit 9b8cf01
Show file tree
Hide file tree
Showing 3 changed files with 552 additions and 1 deletion.
14 changes: 13 additions & 1 deletion import-scripts/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import shutil
import re
import csv
import study_directory_uses_updated_filenames

# ------------------------------------------------------------------------------
# globals
Expand Down Expand Up @@ -103,7 +104,6 @@
'HYBRIDIZATION REF',
]


# only files fitting patterns placed in these two lists will be merged
NORMAL_MERGE_PATTERNS = [MUTATION_META_PATTERN,
FUSION_META_PATTERN,
Expand Down Expand Up @@ -814,6 +814,15 @@ def organize_files(studies, file_types, merge_clinical):
else:
file_types[SUPP_DATA].append(study_file)

def exit_if_unsupported_filenames_detected(study_paths):
unsupported_filenames_detected = False
for study_path in study_paths:
if study_directory_uses_updated_filenames.study_directory_uses_updated_filenames(study_path):
print("unsupported filenames present in study directory '%s' - unable to merge" % study_path)
unsupported_filenames_detected = True
if unsupported_filenames_detected:
sys.exit(2)

def usage():
print >> OUTPUT_FILE, 'merge.py --subset [/path/to/subset] --output-directory [/path/to/output] --study-id [study id] --cancer-type [cancer type] --merge-clinical [true/false] --exclude-supplemental-data [true/false] --excluded-samples [/path/to/exclude_list] <path/to/study path/to/study ...>'

Expand Down Expand Up @@ -919,6 +928,9 @@ def main():
else:
exclude_supp_data = True

# test input study paths for new filenames
exit_if_unsupported_filenames_detected(args)

# get all the filenames
organize_files(args, file_types, merge_clinical)

Expand Down
Loading

0 comments on commit 9b8cf01

Please sign in to comment.