From 1f9859806a33915715dd368a11b8bfb21f983b94 Mon Sep 17 00:00:00 2001 From: Graeme Watt Date: Sat, 6 Jun 2020 21:40:30 +0100 Subject: [PATCH] Check that each value of cmenergies keyword is numeric (closes #18) * Move _get_major_version to Validator for SubmissionFileValidator use. * Add check that each value of cmenergies are numeric or a range. * Add test for check that cmenergies values are numeric or a range. * Modify valid_submission.yaml for example of cmenergies as a range. * Correct duplicate test_valid_submission_yaml_v1 function name. * Add Python 3.7 and 3.8 to versions checked in Travis CI. * Explain in README.rst how to pass 'data' for SubmissionFileValidator. --- .travis.yml | 2 ++ README.rst | 14 +++++++++++ hepdata_validator/__init__.py | 8 +++++++ hepdata_validator/data_file_validator.py | 8 ------- .../submission_file_validator.py | 23 +++++++++++++++++++ testsuite/test_data/invalid_cmenergies.yaml | 10 ++++++++ testsuite/test_data/valid_submission.yaml | 2 +- testsuite/test_submission_validator.py | 19 +++++++++++++-- 8 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 testsuite/test_data/invalid_cmenergies.yaml diff --git a/.travis.yml b/.travis.yml index 5a50fb6..a9e0d10 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,8 @@ language: python python: - "2.7" - "3.6" + - "3.7" + - "3.8" cache: - pip diff --git a/README.rst b/README.rst index 8948a63..7757204 100644 --- a/README.rst +++ b/README.rst @@ -116,6 +116,20 @@ for the error message lookup map. data_file_validator.print_errors('data.yaml') +For the analogous case of the ``SubmissionFileValidator``: + +.. code:: python + + from hepdata_validator.submission_file_validator import SubmissionFileValidator + import yaml + submission_file_path = 'submission.yaml' + + # convert a generator returned by yaml.safe_load_all into a list + docs = list(yaml.safe_load_all(open(submission_file_path, 'r'))) + + submission_file_validator = SubmissionFileValidator() + is_valid_submission_file = submission_file_validator.validate(file_path=submission_file_path, data=docs) + submission_file_validator.print_errors(submission_file_path) An example `offline validation script `_ uses the ``hepdata_validator`` package to validate the ``submission.yaml`` file and all YAML data files of a diff --git a/hepdata_validator/__init__.py b/hepdata_validator/__init__.py index 6097a2d..d45da4f 100644 --- a/hepdata_validator/__init__.py +++ b/hepdata_validator/__init__.py @@ -52,6 +52,14 @@ def __init__(self, *args, **kwargs): if self.schema_version not in VALID_SCHEMA_VERSIONS: raise ValueError('Invalid schema version ' + self.schema_version) + def _get_major_version(self): + """ + Parses the major version of the validator. + + :return: integer corresponding to the validator major version + """ + return int(self.schema_version.split('.')[0]) + def _get_schema_filepath(self, schema_filename): full_filepath = os.path.join(self.base_path, self.schema_folder, diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py index fd43d92..42bb116 100644 --- a/hepdata_validator/data_file_validator.py +++ b/hepdata_validator/data_file_validator.py @@ -50,14 +50,6 @@ def __init__(self, *args, **kwargs): super(DataFileValidator, self).__init__(*args, **kwargs) self.default_schema_file = self._get_schema_filepath(self.schema_name) - def _get_major_version(self): - """ - Parses the major version of the validator. - - :return: integer corresponding to the validator major version - """ - return int(self.schema_version.split('.')[0]) - def load_custom_schema(self, type, schema_file_path=None): """ Loads a custom schema, or will use a stored version for the given type if available. diff --git a/hepdata_validator/submission_file_validator.py b/hepdata_validator/submission_file_validator.py index 8a239bd..fbfe3ed 100644 --- a/hepdata_validator/submission_file_validator.py +++ b/hepdata_validator/submission_file_validator.py @@ -1,6 +1,7 @@ import json from jsonschema import validate, ValidationError import os +import re import yaml from yaml.scanner import ScannerError @@ -69,6 +70,8 @@ def validate(self, **kwargs): validate(data_item, additional_file_section_schema) else: validate(data_item, submission_file_schema) + if self._get_major_version() > 0: + check_cmenergies(data_item) except ValidationError as ve: self.add_validation_message( @@ -99,3 +102,23 @@ def validate(self, **kwargs): data_file_handle.close() return return_value + + +def check_cmenergies(data_item): + """ + Check that 'cmenergies' values are numeric unless a range like 1.7-4.7. + + :param data_item: YAML document from submission.yaml + :return: raise ValidationError if not numeric + """ + for keyword in data_item['keywords']: + if keyword['name'] == 'cmenergies': + cmenergies = keyword['values'] + for cmenergy in cmenergies: + try: + cmenergy = float(cmenergy) + except ValueError: + m = re.match(r'^\d+\.?\d?-\d+\.?\d?$', cmenergy) + if not m or len(cmenergies) > 1: + raise ValidationError("Invalid value (in GeV) for cmenergies: %s" % cmenergy, + instance=data_item) \ No newline at end of file diff --git a/testsuite/test_data/invalid_cmenergies.yaml b/testsuite/test_data/invalid_cmenergies.yaml new file mode 100644 index 0000000..93c365c --- /dev/null +++ b/testsuite/test_data/invalid_cmenergies.yaml @@ -0,0 +1,10 @@ +--- +# This is Table 2. +name: "Table 2" +location: Page 20 of preprint +description: The measured total cross sections. The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity +keywords: # used for searching, possibly multiple values for each keyword + - { name: reactions, values: [P P --> Z0 Z0 X]} + - { name: observables, values: [SIG]} + - { name: cmenergies, values: [7000 GeV]} +data_file: data2.yaml \ No newline at end of file diff --git a/testsuite/test_data/valid_submission.yaml b/testsuite/test_data/valid_submission.yaml index bedfcf1..f6b9898 100644 --- a/testsuite/test_data/valid_submission.yaml +++ b/testsuite/test_data/valid_submission.yaml @@ -56,5 +56,5 @@ description: The measured total cross sections. The first systematic uncertaint keywords: # used for searching, possibly multiple values for each keyword - { name: reactions, values: [P P --> Z0 Z0 X]} - { name: observables, values: [SIG]} - - { name: cmenergies, values: [7000]} + - { name: cmenergies, values: [7000.0-8000.0]} data_file: data2.yaml \ No newline at end of file diff --git a/testsuite/test_submission_validator.py b/testsuite/test_submission_validator.py index e18f273..6beb376 100644 --- a/testsuite/test_submission_validator.py +++ b/testsuite/test_submission_validator.py @@ -218,7 +218,7 @@ def test_invalid_schema_file(): VALID_SCHEMA_VERSIONS.pop() -def test_valid_submission_yaml_v1(validator_v1, data_path): +def test_data_schema_submission_yaml_v1(validator_v1, data_path): """ Tests the SubmissionFileValidator V1 against a valid YAML with a data_schema key """ @@ -230,4 +230,19 @@ def test_valid_submission_yaml_v1(validator_v1, data_path): is_valid = validator_v1.validate(file_path=file, data=yaml_obj) validator_v1.print_errors(file) - assert is_valid is True \ No newline at end of file + assert is_valid is True + + +def test_invalid_cmenergies_submission_yaml_v1(validator_v1, data_path): + """ + Tests the SubmissionFileValidator V1 against an invalid cmenergies value + """ + + file = os.path.join(data_path, 'invalid_cmenergies.yaml') + + with open(file, 'r') as submission: + yaml_obj = yaml.load_all(submission, Loader=Loader) + is_valid = validator_v1.validate(file_path=file, data=yaml_obj) + validator_v1.print_errors(file) + + assert is_valid is False \ No newline at end of file