From 446de5cd3516708b9ea728765aa5c5c6b861d680 Mon Sep 17 00:00:00 2001 From: Chelsea-Na <109613735+Chelsea-Na@users.noreply.github.com> Date: Fri, 12 Jan 2024 19:47:18 -0800 Subject: [PATCH] [GEN-1018] Update __main__.py and validate.py (#543) * Update __main__.py Updating the -h advice for filetype, oncotree code, and other minor updates. * Updating validate.py and __main__.py * lint * Update tests --------- Co-authored-by: Thomas Yu --- README.md | 2 +- genie/__main__.py | 63 +++++++++++++++++++++++------------------- genie/validate.py | 7 ++++- tests/test_validate.py | 6 ++-- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 1002e492..555bb925 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ These are instructions on how you would develop and test the pipeline locally. If you are having trouble with the above, try installing via `pipenv` - 1. Specify a python version that is supported by this repo: + 1. Specify a python version that is supported by this repo: ```pipenv --python ``` 1. [pipenv install from requirements file](https://docs.pipenv.org/en/latest/advanced.html#importing-from-requirements-txt) diff --git a/genie/__main__.py b/genie/__main__.py index 3e5ad0ec..f9d21d17 100644 --- a/genie/__main__.py +++ b/genie/__main__.py @@ -44,49 +44,35 @@ def build_parser(): subparsers = parser.add_subparsers( title="commands", - description="The following commands are available:", - help='For additional help: "genie -h"', + description="The following commands are available: ", + help='For additional help use: "genie -h"', ) parser_validate = subparsers.add_parser( - "validate", help="Validates GENIE file formats" + "validate", help="Validates GENIE file formats. " ) parser_validate.add_argument( "filepath", type=str, nargs="+", - help="File(s) that you are validating." - "If you validation your clinical files and you have both sample and " - "patient files, you must provide both", + help="File(s) that you are validating. " + "If you have separate clinical sample and patient files, " + "you must provide both files when validating.", ) parser_validate.add_argument("center", type=str, help="Contributing Centers") - parser_validate.add_argument( - "--format_registry_packages", - type=str, - nargs="+", - default=["genie_registry"], - help="Python package name(s) to get valid file formats from (default: %(default)s).", - ) - - parser_validate.add_argument( - "--oncotree_link", type=str, help="Link to oncotree code" - ) - validate_group = parser_validate.add_mutually_exclusive_group() validate_group.add_argument( "--filetype", type=str, - help="By default, the validator uses the filename to match " + help="Use the --filetype {FILETYPE} parameter to ignore filename validation. " + "By default, the validator uses the filename to match " "the file format. If your filename is incorrectly named, " - "it will be invalid. If you know the file format you are " - "validating, you can ignore the filename validation and skip " - "to file content validation. " - "Note, the filetypes with SP at " - "the end are for special sponsored projects.", + "it will be invalid. " + "Options: [maf, vcf, clinical, assayinfo, bed, cna, sv, seg, mutationsInCis]", ) validate_group.add_argument( @@ -98,18 +84,39 @@ def build_parser(): "to this directory.", ) + parser_validate.add_argument( + "--oncotree_link", + type=str, + help="Specify an oncotree url when validating your clinical " + "file " + "(e.g: https://oncotree.info/api/tumorTypes/tree?version=oncotree_2021_11_02). " + "By default the oncotree version used will be specified in this entity: " + "syn13890902", + ) + + parser_validate.add_argument( + "--nosymbol-check", + action="store_true", + help="Ignores specific post-processing validation criteria related to HUGO symbols " + "in the structural variant and cna files.", + ) + # TODO: remove this default when private genie project is ready parser_validate.add_argument( "--project_id", type=str, default="syn3380222", - help="Synapse Project ID where data is stored. (default: %(default)s).", + help="FOR DEVELOPER USE ONLY: Synapse Project ID where data is stored. " + "(default: %(default)s).", ) parser_validate.add_argument( - "--nosymbol-check", - action="store_true", - help="Do not check hugo symbols of fusion and cna file", + "--format_registry_packages", + type=str, + nargs="+", + default=["genie_registry"], + help="FOR DEVELOPER USE ONLY: Python package name(s) to get valid file formats " + "from (default: %(default)s).", ) parser_validate.set_defaults(func=validate._perform_validate) diff --git a/genie/validate.py b/genie/validate.py index 5bf0f17a..af6de306 100644 --- a/genie/validate.py +++ b/genie/validate.py @@ -95,8 +95,13 @@ def validate_single_file(self, **kwargs): valid: Boolean value of validation status """ if self.file_type not in self._format_registry: + allowed_filetypes = list(self._format_registry.keys()) + error_message = ( + f"Your filename is incorrect! Please change your filename before you run the validator or specify --filetype if you are running the validator locally. " + f"If specifying filetype, options are: [{', '.join(allowed_filetypes)}]\n" + ) valid_result_cls = example_filetype_format.ValidationResults( - errors="Your filename is incorrect! Please change your filename before you run the validator or specify --filetype if you are running the validator locally", + errors=error_message, warnings="", ) else: diff --git a/tests/test_validate.py b/tests/test_validate.py index 4e16cfee..85b9610b 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -162,7 +162,8 @@ def test_filetype_validate_single_file(syn): "----------------ERRORS----------------\n" "Your filename is incorrect! Please change your " "filename before you run the validator or specify " - "--filetype if you are running the validator locally" + "--filetype if you are running the validator locally. " + "If specifying filetype, options are: [wrong]\n" ) with patch.object(FileFormat, "validateFilename", side_effect=AssertionError): @@ -185,7 +186,8 @@ def test_wrongfiletype_validate_single_file(syn): "----------------ERRORS----------------\n" "Your filename is incorrect! Please change your " "filename before you run the validator or specify " - "--filetype if you are running the validator locally" + "--filetype if you are running the validator locally. " + "If specifying filetype, options are: [wrong]\n" ) with patch.object(