Skip to content

Commit

Permalink
custom_schema_support: allows for custom data schemas to be loaded
Browse files Browse the repository at this point in the history
* NEW Supports different types of data schema definitions to be loaded and validated against
* NEW Add type to additional resources (optional)

Signed-off-by: Eamonn Maguire <[email protected]>
  • Loading branch information
eamonnmag committed Oct 12, 2016
1 parent 65a42c8 commit 464862e
Show file tree
Hide file tree
Showing 16 changed files with 515 additions and 119 deletions.
8 changes: 6 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
.. image:: https://img.shields.io/github/license/HEPData/hepdata-validator.svg
:target: https://github.com/HEPData/hepdata-validator/blob/master/LICENSE

.. image:: https://img.shields.io/github/release/hepdata/hepdata-validator.svg?maxAge=2592000
:target: https://github.com/HEPData/hepdata-validator/releases

.. image:: https://img.shields.io/github/issues/hepdata/hepdata-validator.svg?maxAge=2592000
:target: https://github.com/HEPData/hepdata-validator/issues

.. image:: https://readthedocs.org/projects/hepdata-validator/badge/?version=latest
:target: http://hepdata-validator.readthedocs.io/

Expand All @@ -27,8 +33,6 @@ HEPData is built upon Invenio 3 and is open source and free to use!
* Documentation: http://hepdata-validator.readthedocs.io/




Installation
------------

Expand Down
84 changes: 33 additions & 51 deletions hepdata_validator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,28 @@
import json
from jsonschema import validate, ValidationError
import yaml
from yaml.scanner import ScannerError
from yaml.parser import ParserError
# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import abc

from .version import __version__

Expand All @@ -15,54 +35,24 @@ class Validator(object):
which validates schema files created with the
JSONschema syntax http://json-schema.org/
"""
__metaclass__ = abc.ABCMeta

messages = {}
schema_file = ''
default_schema_file = ''

def __init__(self):
def __init__(self, *args, **kwargs):
self.messages = {}

self.schemas = kwargs.get('schemas', {})

@abc.abstractmethod
def validate(self, **kwargs):
"""
Validates a file.
:param file_path: path to file to be loaded.
:param data: pre loaded YAML object (optional).
:return: true if valid, false otherwise
"""
schema = json.load(open(self.schema_file, 'r'))

data = kwargs.pop("data", None)
file_path = kwargs.pop("file_path", None)

if data is None:

try:
try:
data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader)
except ScannerError as se:
self.add_validation_message(ValidationMessage(file=file_path, message=str(se)))
return False
except: #pragma: no cover
try: #pragma: no cover
data = yaml.load(open(file_path, 'r')) #pragma: no cover
except ScannerError as se: #pragma: no cover
self.add_validation_message(ValidationMessage(file=file_path, message=str(se))) #pragma: no cover
return False #pragma: no cover

try:
validate(data, schema)

except ValidationError as ve:
self.add_validation_message(
ValidationMessage(file=file_path,
message="{} in {}".format(ve.message, ve.instance)))
return False
except ParserError as pe:
self.add_validation_message(
ValidationMessage(file=file_path,
message=pe.__str__()))
return False

return True

def has_errors(self, file_name):
"""
Expand Down Expand Up @@ -130,12 +120,4 @@ def __init__(self, file='', level='error', message=''):
self.message = message

def __unicode__(self):
return self.level + ' - ' + self.message


class UnsupportedDataSchemaException(Exception):
def __init__(self, message=''):
self.message = message

def __unicode__(self):
return self.message
return self.level + ' - ' + self.message
106 changes: 104 additions & 2 deletions hepdata_validator/data_file_validator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,35 @@
# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import json

import os
from hepdata_validator import Validator
import yaml
from yaml.scanner import ScannerError

from hepdata_validator import Validator, ValidationMessage
from jsonschema import validate as json_validate, ValidationError

__author__ = 'eamonnmaguire'

Expand All @@ -9,4 +39,76 @@ class DataFileValidator(Validator):
Validates the Data file YAML/JSON file
"""
base_path = os.path.dirname(__file__)
schema_file = base_path + '/schemas/data_schema.json'
default_schema_file = base_path + '/schemas/data_schema.json'

custom_data_schemas = {}

def load_custom_schema(self, type, schema_file_path=None):
"""
Loads a custom schema, or will used a stored version for the given type if available
:param type: e.g. histfactory
:return:
"""
try:
if type in self.custom_data_schemas:
return self.custom_data_schemas[type]

if schema_file_path:
_schema_file = schema_file_path
else:
_schema_file = os.path.join(self.base_path, 'schemas', "{0}_schema.json".format(type))

custom_data_schema = json.load(open(_schema_file, 'r'))
self.custom_data_schemas[type] = custom_data_schema

return custom_data_schema
except Exception as e:
raise UnsupportedDataSchemaException(
message="There is no schema defined for the '{0}' data type.".format(type))

def validate(self, file_path):
try:
default_data_schema = json.load(
open(self.default_schema_file, 'r'))

try:
data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader)
except: #pragma: no cover
data = yaml.load_all(open(file_path, 'r')) #pragma: no cover

for data_item in data:
if data_item is None:
continue
try:
if 'type' in data_item:
custom_schema = self.load_custom_schema(data_item['type'])
json_validate(data_item, custom_schema)
else:
json_validate(data_item, default_data_schema)

except ValidationError as ve:
self.add_validation_message(
ValidationMessage(file=file_path,
message=ve.message + ' in ' + str(ve.instance)))
if self.has_errors(file_path):
return False
else:
return True
except ScannerError as se:
self.add_validation_message(
ValidationMessage(file=file_path,
message='There was a problem parsing the file. '
'This can be because you forgot spaces '
'after colons in your YAML file for instance.\n{0}'.format(se.__repr__()))
)


class UnsupportedDataSchemaException(Exception):
"""
Represents an error on the request of a custom data schema which does not exist.
"""
def __init__(self, message=''):
self.message = message

def __unicode__(self):
return self.message
38 changes: 37 additions & 1 deletion hepdata_validator/schemas/additional_info_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"id": "http://hepdata.org/submission/schema/additional_info",
"type": "object",
"title": "This section is optional for the provision of information about the overall submission.",
"description": "An explanation about the puropose of this instance described by this schema.",
"description": "An explanation about the purpose of this instance described by this schema.",
"name": "/",
"properties": {
"record_ids": {
Expand Down Expand Up @@ -128,6 +128,42 @@
}
]
},
"associated_records": {
"id": "http://jsonschema.net/associated_records",
"type": "array",
"description": "Links to other HEPData Submissions or INSPIRE Records that relate to this submission.",
"items": [
{
"id": "http://jsonschema.net/associated_records/output",
"type": "object",
"properties": {
"type": {
"id": "http://jsonschema.net/associated_records/output/type",
"type": "string"
},
"identifier": {
"id": "http://jsonschema.net/associated_records/output/identifier",
"type": [
"string",
"number"
]
},
"description": {
"id": "http://jsonschema.net/associated_records/output/description",
"type": "string"
},
"url": {
"id": "http://jsonschema.net/associated_records/output/url",
"type": "string"
}
},
"required": [
"identifier",
"type"
]
}
]
},
"comment": {
"id": "http://hepdata.org/submission/schema/additional_info/comment",
"type": "string",
Expand Down
30 changes: 24 additions & 6 deletions hepdata_validator/schemas/data_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
"id": "http://hepdata.org/submission/schema/data",
"type": "object",
"properties": {
"type": {
"id": "http://jsonschema.net/data_file",
"type": "string"
},
"independent_variables": {
"id": "http://hepdata.org/submission/schema/data/independent_variables",
"type": "array",
Expand Down Expand Up @@ -37,7 +41,10 @@
"properties": {
"value": {
"id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
"type": ["string","number"]
"type": [
"string",
"number"
]
},
"low": {
"id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
Expand Down Expand Up @@ -105,7 +112,6 @@
"number"
]
},

"units": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/qualifiers/qualifier/name",
"type": "string"
Expand All @@ -127,7 +133,10 @@
"properties": {
"value": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/value",
"type": ["string","number"]
"type": [
"string",
"number"
]
},
"errors": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors",
Expand All @@ -138,19 +147,28 @@
"properties": {
"symerror": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/symerror",
"type": ["number", "string"]
"type": [
"number",
"string"
]
},
"asymerror": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror",
"type": "object",
"properties": {
"minus": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror/minus",
"type": ["number", "string"]
"type": [
"number",
"string"
]
},
"plus": {
"id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror/plus",
"type": ["number", "string"]
"type": [
"number",
"string"
]
}
}
},
Expand Down
Loading

0 comments on commit 464862e

Please sign in to comment.