From a716cc066fdfdab6963b5fb8d52e1ff3c6fcf87a Mon Sep 17 00:00:00 2001 From: Chelesa-Na Date: Thu, 28 Dec 2023 22:35:41 -0800 Subject: [PATCH] Updating validate.py and __main__.py --- genie/__main__.py | 59 ++++++++++++++++++++++++----------------------- genie/validate.py | 7 +++++- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/genie/__main__.py b/genie/__main__.py index 3ddac7b6..62cc1c31 100644 --- a/genie/__main__.py +++ b/genie/__main__.py @@ -44,51 +44,34 @@ def build_parser(): subparsers = parser.add_subparsers( title="commands", - description="The following commands are available:", - help='For additional help: "genie -h"', + description="The following commands are available: ", + help='For additional help use: "genie -h"', ) parser_validate = subparsers.add_parser( - "validate", help="Validates GENIE file formats" + "validate", help="Validates GENIE file formats. " ) parser_validate.add_argument( "filepath", type=str, nargs="+", - help="File(s) that you are validating." - "When validating clinical files and you have separate sample and " - "patient files, you must provide both", + help="File(s) that you are validating. " + "If you have separate clinical sample and patient files, " + "you must provide both files when validating.", ) parser_validate.add_argument("center", type=str, help="Contributing Centers") - parser_validate.add_argument( - "--format_registry_packages", - type=str, - nargs="+", - default=["genie_registry"], - help="FOR DEVELOPER USE ONLY: Python package name(s) to get valid file formats" - "from (default: %(default)s).", - ) - - parser_validate.add_argument( - "--oncotree_link", type=str, help="Specify an oncotree url when validating your clinical" - "file with a different oncotree code version" - "(e.g: https://oncotree.info/api/tumorTypes/tree?version=oncotree_2021_11_02)" - "By default the oncotree version used will be specified in this" - "entity: syn13890902", - ) - validate_group = parser_validate.add_mutually_exclusive_group() validate_group.add_argument( "--filetype", type=str, - help="Use the --filetype FILETYPE parameter to ignore the file naming validation." + help="Use the --filetype {FILETYPE} parameter to ignore filename validation. " "By default, the validator uses the filename to match " "the file format. If your filename is incorrectly named, " - "it will be invalid." + "it will be invalid. " "Options: [maf, vcf, clinical, assayinfo, bed, cna, sv, seg, mutationsInCis]", ) @@ -101,19 +84,37 @@ def build_parser(): "to this directory.", ) + parser_validate.add_argument( + "--oncotree_link", type=str, help="Specify an oncotree url when validating your clinical " + "file " + "(e.g: https://oncotree.info/api/tumorTypes/tree?version=oncotree_2021_11_02). " + "By default the oncotree version used will be specified in this entity: " + "syn13890902", + ) + + parser_validate.add_argument( + "--nosymbol-check", + action="store_true", + help="Ignores specific post-processing validation criteria related to HUGO symbols " + "in the structural variant and cna files.", + ) + # TODO: remove this default when private genie project is ready parser_validate.add_argument( "--project_id", type=str, default="syn3380222", - help="FOR DEVELOPER USE ONLY: Synapse Project ID where data is stored." + help="FOR DEVELOPER USE ONLY: Synapse Project ID where data is stored. " "(default: %(default)s).", ) parser_validate.add_argument( - "--nosymbol-check", - action="store_true", - help="Do not check hugo symbols of structural variant and cna file", + "--format_registry_packages", + type=str, + nargs="+", + default=["genie_registry"], + help="FOR DEVELOPER USE ONLY: Python package name(s) to get valid file formats " + "from (default: %(default)s).", ) parser_validate.set_defaults(func=validate._perform_validate) diff --git a/genie/validate.py b/genie/validate.py index 5bf0f17a..c4b0928c 100644 --- a/genie/validate.py +++ b/genie/validate.py @@ -95,8 +95,13 @@ def validate_single_file(self, **kwargs): valid: Boolean value of validation status """ if self.file_type not in self._format_registry: + allowed_filetypes = list(self._format_registry.keys()) + error_message = ( + f"Your filename is incorrect! Please change your filename before you run the validator or specify --filetype if you are running the validator locally. " + f"If specifying filetype, options are: [{', '.join(allowed_filetypes)}]" + ) valid_result_cls = example_filetype_format.ValidationResults( - errors="Your filename is incorrect! Please change your filename before you run the validator or specify --filetype if you are running the validator locally", + errors=error_message, warnings="", ) else: