From 464862ed5e4f58fdafe4e01a210faf3a01dc399a Mon Sep 17 00:00:00 2001
From: Eamonn Maguire <eamonnmag@gmail.com>
Date: Wed, 12 Oct 2016 17:05:43 +0100
Subject: [PATCH] custom_schema_support: allows for custom data schemas to be
 loaded

* NEW Supports different types of data schema definitions to be loaded and validated against
* NEW Add type to additional resources (optional)

Signed-off-by: Eamonn Maguire <eamonnmag@gmail.com>
---
 README.rst                                    |   8 +-
 hepdata_validator/__init__.py                 |  84 ++++++--------
 hepdata_validator/data_file_validator.py      | 106 +++++++++++++++++-
 .../schemas/additional_info_schema.json       |  38 ++++++-
 hepdata_validator/schemas/data_schema.json    |  30 ++++-
 .../schemas/submission_schema.json            |  60 +++++++++-
 .../submission_file_validator.py              |   6 +-
 hepdata_validator/version.py                  |   2 +-
 testsuite/__init__.py                         |   1 -
 testsuite/test_data/custom_data_schema.json   |  84 ++++++++++++++
 testsuite/test_data/invalid_data.yaml         |  29 -----
 testsuite/test_data/invalid_data_file.yaml    |   8 ++
 .../test_data/invalid_syntax_submission.yaml  |  14 +++
 testsuite/test_data/valid_file_custom.yaml    |   8 ++
 ...lid_submission_with_associated_record.yaml |  66 +++++++++++
 testsuite/validation_test.py                  |  90 ++++++++++++---
 16 files changed, 515 insertions(+), 119 deletions(-)
 create mode 100644 testsuite/test_data/custom_data_schema.json
 delete mode 100644 testsuite/test_data/invalid_data.yaml
 create mode 100644 testsuite/test_data/invalid_data_file.yaml
 create mode 100644 testsuite/test_data/invalid_syntax_submission.yaml
 create mode 100644 testsuite/test_data/valid_file_custom.yaml
 create mode 100644 testsuite/test_data/valid_submission_with_associated_record.yaml

diff --git a/README.rst b/README.rst
index 7f824e4..88ceb82 100644
--- a/README.rst
+++ b/README.rst
@@ -11,6 +11,12 @@
 .. image:: https://img.shields.io/github/license/HEPData/hepdata-validator.svg
     :target: https://github.com/HEPData/hepdata-validator/blob/master/LICENSE
 
+.. image:: https://img.shields.io/github/release/hepdata/hepdata-validator.svg?maxAge=2592000
+    :target: https://github.com/HEPData/hepdata-validator/releases
+
+.. image:: https://img.shields.io/github/issues/hepdata/hepdata-validator.svg?maxAge=2592000
+    :target: https://github.com/HEPData/hepdata-validator/issues
+
 .. image:: https://readthedocs.org/projects/hepdata-validator/badge/?version=latest
     :target: http://hepdata-validator.readthedocs.io/
 
@@ -27,8 +33,6 @@ HEPData is built upon Invenio 3 and is open source and free to use!
 * Documentation: http://hepdata-validator.readthedocs.io/
 
 
-
-
 Installation
 ------------
 
diff --git a/hepdata_validator/__init__.py b/hepdata_validator/__init__.py
index 0794dc0..40ece4e 100644
--- a/hepdata_validator/__init__.py
+++ b/hepdata_validator/__init__.py
@@ -1,8 +1,28 @@
-import json
-from jsonschema import validate, ValidationError
-import yaml
-from yaml.scanner import ScannerError
-from yaml.parser import ParserError
+# -*- coding: utf-8 -*-
+#
+# This file is part of HEPData.
+# Copyright (C) 2016 CERN.
+#
+# HEPData is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# HEPData is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HEPData; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+import abc
 
 from .version import __version__
 
@@ -15,12 +35,17 @@ class Validator(object):
     which validates schema files created with the
     JSONschema syntax http://json-schema.org/
     """
+    __metaclass__ = abc.ABCMeta
+
     messages = {}
-    schema_file = ''
+    default_schema_file = ''
 
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         self.messages = {}
 
+        self.schemas = kwargs.get('schemas', {})
+
+    @abc.abstractmethod
     def validate(self, **kwargs):
         """
         Validates a file.
@@ -28,41 +53,6 @@ def validate(self, **kwargs):
         :param data: pre loaded YAML object (optional).
         :return: true if valid, false otherwise
         """
-        schema = json.load(open(self.schema_file, 'r'))
-
-        data = kwargs.pop("data", None)
-        file_path = kwargs.pop("file_path", None)
-
-        if data is None:
-
-            try:
-                try:
-                    data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader)
-                except ScannerError as se:
-                    self.add_validation_message(ValidationMessage(file=file_path, message=str(se)))
-                    return False
-            except: #pragma: no cover
-                try: #pragma: no cover
-                    data = yaml.load(open(file_path, 'r')) #pragma: no cover
-                except ScannerError as se: #pragma: no cover
-                    self.add_validation_message(ValidationMessage(file=file_path, message=str(se))) #pragma: no cover
-                    return False #pragma: no cover
-
-        try:
-            validate(data, schema)
-
-        except ValidationError as ve:
-            self.add_validation_message(
-                ValidationMessage(file=file_path,
-                                  message="{} in {}".format(ve.message, ve.instance)))
-            return False
-        except ParserError as pe:
-            self.add_validation_message(
-                ValidationMessage(file=file_path,
-                                  message=pe.__str__()))
-            return False
-
-        return True
 
     def has_errors(self, file_name):
         """
@@ -130,12 +120,4 @@ def __init__(self, file='', level='error', message=''):
         self.message = message
 
     def __unicode__(self):
-        return self.level + ' - ' + self.message
-
-
-class UnsupportedDataSchemaException(Exception):
-    def __init__(self, message=''):
-        self.message = message
-
-    def __unicode__(self):
-        return self.message
\ No newline at end of file
+        return self.level + ' - ' + self.message
\ No newline at end of file
diff --git a/hepdata_validator/data_file_validator.py b/hepdata_validator/data_file_validator.py
index 647343f..282958e 100644
--- a/hepdata_validator/data_file_validator.py
+++ b/hepdata_validator/data_file_validator.py
@@ -1,5 +1,35 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of HEPData.
+# Copyright (C) 2016 CERN.
+#
+# HEPData is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# HEPData is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with HEPData; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+import json
+
 import os
-from hepdata_validator import Validator
+import yaml
+from yaml.scanner import ScannerError
+
+from hepdata_validator import Validator, ValidationMessage
+from jsonschema import validate as json_validate, ValidationError
 
 __author__ = 'eamonnmaguire'
 
@@ -9,4 +39,76 @@ class DataFileValidator(Validator):
     Validates the Data file YAML/JSON file
     """
     base_path = os.path.dirname(__file__)
-    schema_file = base_path + '/schemas/data_schema.json'
+    default_schema_file = base_path + '/schemas/data_schema.json'
+
+    custom_data_schemas = {}
+
+    def load_custom_schema(self, type, schema_file_path=None):
+        """
+        Loads a custom schema, or will used a stored version for the given type if available
+        :param type: e.g. histfactory
+        :return:
+        """
+        try:
+            if type in self.custom_data_schemas:
+                return self.custom_data_schemas[type]
+
+            if schema_file_path:
+                _schema_file = schema_file_path
+            else:
+                _schema_file = os.path.join(self.base_path, 'schemas', "{0}_schema.json".format(type))
+
+            custom_data_schema = json.load(open(_schema_file, 'r'))
+            self.custom_data_schemas[type] = custom_data_schema
+
+            return custom_data_schema
+        except Exception as e:
+            raise UnsupportedDataSchemaException(
+                message="There is no schema defined for the '{0}' data type.".format(type))
+
+    def validate(self, file_path):
+        try:
+            default_data_schema = json.load(
+                open(self.default_schema_file, 'r'))
+
+            try:
+                data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader)
+            except: #pragma: no cover
+                data = yaml.load_all(open(file_path, 'r')) #pragma: no cover
+
+            for data_item in data:
+                if data_item is None:
+                    continue
+                try:
+                    if 'type' in data_item:
+                        custom_schema = self.load_custom_schema(data_item['type'])
+                        json_validate(data_item, custom_schema)
+                    else:
+                        json_validate(data_item, default_data_schema)
+
+                except ValidationError as ve:
+                    self.add_validation_message(
+                        ValidationMessage(file=file_path,
+                                          message=ve.message + ' in ' + str(ve.instance)))
+            if self.has_errors(file_path):
+                return False
+            else:
+                return True
+        except ScannerError as se:
+            self.add_validation_message(
+                ValidationMessage(file=file_path,
+                                  message='There was a problem parsing the file. '
+                                          'This can be because you forgot spaces '
+                                          'after colons in your YAML file for instance.\n{0}'.format(se.__repr__()))
+            )
+
+
+class UnsupportedDataSchemaException(Exception):
+    """
+    Represents an error on the request of a custom data schema which does not exist.
+    """
+    def __init__(self, message=''):
+        self.message = message
+
+    def __unicode__(self):
+        return self.message
diff --git a/hepdata_validator/schemas/additional_info_schema.json b/hepdata_validator/schemas/additional_info_schema.json
index 87435bb..dda7b22 100644
--- a/hepdata_validator/schemas/additional_info_schema.json
+++ b/hepdata_validator/schemas/additional_info_schema.json
@@ -3,7 +3,7 @@
   "id": "http://hepdata.org/submission/schema/additional_info",
   "type": "object",
   "title": "This section is optional for the provision of information about the overall submission.",
-  "description": "An explanation about the puropose of this instance described by this schema.",
+  "description": "An explanation about the purpose of this instance described by this schema.",
   "name": "/",
   "properties": {
     "record_ids": {
@@ -128,6 +128,42 @@
         }
       ]
     },
+    "associated_records": {
+      "id": "http://jsonschema.net/associated_records",
+      "type": "array",
+      "description": "Links to other HEPData Submissions or INSPIRE Records that relate to this submission.",
+      "items": [
+        {
+          "id": "http://jsonschema.net/associated_records/output",
+          "type": "object",
+          "properties": {
+            "type": {
+              "id": "http://jsonschema.net/associated_records/output/type",
+              "type": "string"
+            },
+            "identifier": {
+              "id": "http://jsonschema.net/associated_records/output/identifier",
+              "type": [
+                "string",
+                "number"
+              ]
+            },
+            "description": {
+              "id": "http://jsonschema.net/associated_records/output/description",
+              "type": "string"
+            },
+            "url": {
+              "id": "http://jsonschema.net/associated_records/output/url",
+              "type": "string"
+            }
+          },
+          "required": [
+            "identifier",
+            "type"
+          ]
+        }
+      ]
+    },
     "comment": {
       "id": "http://hepdata.org/submission/schema/additional_info/comment",
       "type": "string",
diff --git a/hepdata_validator/schemas/data_schema.json b/hepdata_validator/schemas/data_schema.json
index ed4254d..9ba80aa 100644
--- a/hepdata_validator/schemas/data_schema.json
+++ b/hepdata_validator/schemas/data_schema.json
@@ -3,6 +3,10 @@
   "id": "http://hepdata.org/submission/schema/data",
   "type": "object",
   "properties": {
+    "type": {
+      "id": "http://jsonschema.net/data_file",
+      "type": "string"
+    },
     "independent_variables": {
       "id": "http://hepdata.org/submission/schema/data/independent_variables",
       "type": "array",
@@ -37,7 +41,10 @@
               "properties": {
                 "value": {
                   "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
-                  "type": ["string","number"]
+                  "type": [
+                    "string",
+                    "number"
+                  ]
                 },
                 "low": {
                   "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
@@ -105,7 +112,6 @@
                     "number"
                   ]
                 },
-
                 "units": {
                   "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/qualifiers/qualifier/name",
                   "type": "string"
@@ -127,7 +133,10 @@
               "properties": {
                 "value": {
                   "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/value",
-                  "type": ["string","number"]
+                  "type": [
+                    "string",
+                    "number"
+                  ]
                 },
                 "errors": {
                   "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors",
@@ -138,7 +147,10 @@
                     "properties": {
                       "symerror": {
                         "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/symerror",
-                        "type": ["number", "string"]
+                        "type": [
+                          "number",
+                          "string"
+                        ]
                       },
                       "asymerror": {
                         "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror",
@@ -146,11 +158,17 @@
                         "properties": {
                           "minus": {
                             "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror/minus",
-                            "type": ["number", "string"]
+                            "type": [
+                              "number",
+                              "string"
+                            ]
                           },
                           "plus": {
                             "id": "http://hepdata.org/submission/schema/data/dependent_variables/axis/values/point/value/errors/error/asymerror/plus",
-                            "type": ["number", "string"]
+                            "type": [
+                              "number",
+                              "string"
+                            ]
                           }
                         }
                       },
diff --git a/hepdata_validator/schemas/submission_schema.json b/hepdata_validator/schemas/submission_schema.json
index 534e574..c752947 100644
--- a/hepdata_validator/schemas/submission_schema.json
+++ b/hepdata_validator/schemas/submission_schema.json
@@ -50,6 +50,14 @@
       "id": "http://jsonschema.net/data_file",
       "type": "string"
     },
+    "group": {
+      "id": "http://jsonschema.net/group",
+      "description": "It is possible to group data records together using a string or number. This can be used to conceptually linked data tables together (e.g. multiple records can correspond to the same overall plot)",
+      "type": [
+        "string",
+        "number"
+      ]
+    },
     "license": {
       "id": "http://jsonschema.net/data_file/license",
       "type": "object",
@@ -88,6 +96,10 @@
               "id": "http://jsonschema.net/additional_resources/resource/description",
               "type": "string"
             },
+            "type": {
+              "id": "http://jsonschema.net/additional_resources/resource/type",
+              "type": "string"
+            },
             "license": {
               "id": "http://jsonschema.net/additional_resources/resource/license",
               "type": "object",
@@ -116,12 +128,48 @@
           ]
         }
       ]
+    },
+    "associated_records": {
+      "id": "http://jsonschema.net/associated_records",
+      "type": "array",
+      "description": "Links to other HEPData Submissions or INSPIRE Records that relate to this submission.",
+      "items": [
+        {
+          "id": "http://jsonschema.net/associated_records/output",
+          "type": "object",
+          "properties": {
+            "type": {
+              "id": "http://jsonschema.net/associated_records/output/type",
+              "type": "string"
+            },
+            "identifier": {
+              "id": "http://jsonschema.net/associated_records/output/identifier",
+              "type": [
+                "string",
+                "number"
+              ]
+            },
+            "description": {
+              "id": "http://jsonschema.net/associated_records/output/description",
+              "type": "string"
+            },
+            "url": {
+              "id": "http://jsonschema.net/associated_records/output/url",
+              "type": "string"
+            }
+          },
+          "required": [
+            "identifier",
+            "type"
+          ]
+        }
+      ]
     }
   },
-    "required": [
-      "name",
-      "description",
-      "keywords",
-      "data_file"
-    ]
+  "required": [
+    "name",
+    "description",
+    "keywords",
+    "data_file"
+  ]
 }
\ No newline at end of file
diff --git a/hepdata_validator/submission_file_validator.py b/hepdata_validator/submission_file_validator.py
index b3b55df..7643577 100644
--- a/hepdata_validator/submission_file_validator.py
+++ b/hepdata_validator/submission_file_validator.py
@@ -13,7 +13,7 @@ class SubmissionFileValidator(Validator):
     Validates the Submission file YAML/JSON file
     """
     base_path = os.path.dirname(__file__)
-    schema_file = base_path + '/schemas/submission_schema.json'
+    default_schema_file = base_path + '/schemas/submission_schema.json'
     additonal_info_schema = base_path + '/schemas/additional_info_schema.json'
 
     def validate(self, **kwargs):
@@ -25,7 +25,7 @@ def validate(self, **kwargs):
         """
         try:
             submission_file_schema = json.load(
-                    open(self.schema_file, 'r'))
+                    open(self.default_schema_file, 'r'))
 
             additional_file_section_schema = json.load(
                     open(self.additonal_info_schema, 'r'))
@@ -58,6 +58,7 @@ def validate(self, **kwargs):
                     self.add_validation_message(
                             ValidationMessage(file=file_path,
                                               message=ve.message + ' in ' + str(ve.instance)))
+
             if self.has_errors(file_path):
                 return False
             else:
@@ -70,3 +71,4 @@ def validate(self, **kwargs):
                                               'after colons in your YAML file for instance.  '
                                               'Diagnostic information follows.\n' + str(se))
             )
+            return False
diff --git a/hepdata_validator/version.py b/hepdata_validator/version.py
index a6e820b..9f6500e 100644
--- a/hepdata_validator/version.py
+++ b/hepdata_validator/version.py
@@ -27,4 +27,4 @@
 
 from __future__ import absolute_import, print_function
 
-__version__ = "0.1.11"
+__version__ = "0.1.13"
diff --git a/testsuite/__init__.py b/testsuite/__init__.py
index cc4078f..e69de29 100644
--- a/testsuite/__init__.py
+++ b/testsuite/__init__.py
@@ -1 +0,0 @@
-__author__ = 'eamonnmaguire'
diff --git a/testsuite/test_data/custom_data_schema.json b/testsuite/test_data/custom_data_schema.json
new file mode 100644
index 0000000..f8dfc4b
--- /dev/null
+++ b/testsuite/test_data/custom_data_schema.json
@@ -0,0 +1,84 @@
+{
+  "$schema": "http://json-schema.org/draft-04/schema#",
+  "id": "http://hepdata.org/bad-submission/schema/data",
+  "type": "object",
+  "properties": {
+    "type": {
+      "id": "http://jsonschema.net/data_file",
+      "type": "string"
+    },
+    "some_variables": {
+      "id": "http://hepdata.org/submission/schema/data/independent_variables",
+      "type": "array",
+      "items": {
+        "id": "http://hepdata.org/submission/schema/data/independent_variables/0",
+        "type": "object",
+        "properties": {
+          "header": {
+            "id": "http://hepdata.org/submission/schema/data/independent_variables/0/header",
+            "type": "object",
+            "properties": {
+              "name": {
+                "id": "http://hepdata.org/submission/schema/data/independent_variables/0/header/name",
+                "type": "string"
+              },
+              "units": {
+                "id": "http://hepdata.org/submission/schema/data/independent_variables/0/header/units",
+                "type": "string"
+              }
+            },
+            "additionalProperties": false,
+            "required": [
+              "name"
+            ]
+          },
+          "values": {
+            "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values",
+            "type": "array",
+            "items": {
+              "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1",
+              "type": "object",
+              "properties": {
+                "value": {
+                  "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
+                  "type": [
+                    "string",
+                    "number"
+                  ]
+                },
+                "low": {
+                  "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
+                  "type": "number"
+                },
+                "high": {
+                  "id": "http://hepdata.org/submission/schema/data/independent_variables/0/values/1/value",
+                  "type": "number"
+                }
+              },
+              "additionalProperties": false
+            }
+          }
+        },
+        "additionalProperties": false,
+        "required": [
+          "header",
+          "values"
+        ]
+      },
+      "required": [
+        "0"
+      ]
+    },
+    "name": {
+      "id": "http://hepdata.org/submission/schema/data/name",
+      "type": "string",
+      "title": "Name.",
+      "description": "Used to reference a data record when everything is in the same file.",
+      "name": "name"
+    }
+  },
+  "additionalProperties": false,
+  "required": [
+    "some_variables"
+  ]
+}
\ No newline at end of file
diff --git a/testsuite/test_data/invalid_data.yaml b/testsuite/test_data/invalid_data.yaml
deleted file mode 100644
index 5c123c2..0000000
--- a/testsuite/test_data/invalid_data.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
----
-independent_variables:
-  - header: {name: SQRT(S), units: GEV}
-    values:
-      - value: 7000
-      - value: 8000
-      - value: 9000
-dependent_variables:
-  - header: {name: SIG(fiducial), units: FB}
-    qualifiers:
-      - {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > Z0 < LEPTON+ LEPTON- > X}
-    values:
-      - value: 25.4
-        errors:
-          - {asymerror: {plus: 3.3, minus: -3.0}, label: stat}
-          - {asymerror: {plus: 1, minus: -1.2}, label: sys}
-          - {asymerror: {plus: 1, minus: -1}, label: 'sys,lumi'}
-  
-      - value: 29.8
-        errors:
-          - {asymerror: {plus: 3.8, minus: -3.5}, label: stat}
-          - {asymerror: {plus: 1.7, minus: -1.5}, label: sys}
-          - {symerror: 1.2, label: 'sys,lumi'}
-  
-      - value: 12.7 
-        errors:
-          - {asymerror:{plus: 3.1, minus: -2.9}, label: stat}
-          - {symerror: 1.7, label: sys}
-          - {symerror: 0.5, label: 'sys,lumi'}
diff --git a/testsuite/test_data/invalid_data_file.yaml b/testsuite/test_data/invalid_data_file.yaml
new file mode 100644
index 0000000..26d9f19
--- /dev/null
+++ b/testsuite/test_data/invalid_data_file.yaml
@@ -0,0 +1,8 @@
+---
+type: "different"
+some_variables:
+  - header:{name: SQRT(S), units: GEV}
+    values:
+      - value: 7000
+
+---
\ No newline at end of file
diff --git a/testsuite/test_data/invalid_syntax_submission.yaml b/testsuite/test_data/invalid_syntax_submission.yaml
new file mode 100644
index 0000000..ad10a98
--- /dev/null
+++ b/testsuite/test_data/invalid_syntax_submission.yaml
@@ -0,0 +1,14 @@
+---
+name: "Table 3"
+location: Page 17 of preprint
+description: The measured fiducial cross sections.  The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
+keywords: # used for searching, possibly multiple values for each keyword
+  - { name: reactions, value: [P P --> Z0 Z0 X]}
+  - { name: observables, value: [SIG]}
+  - { name: energies, value: [7000]}
+data_file:'12321'
+additional_resources:
+  - location: "http:github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
+  - location: "http:github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
\ No newline at end of file
diff --git a/testsuite/test_data/valid_file_custom.yaml b/testsuite/test_data/valid_file_custom.yaml
new file mode 100644
index 0000000..ded4e43
--- /dev/null
+++ b/testsuite/test_data/valid_file_custom.yaml
@@ -0,0 +1,8 @@
+---
+type: "different"
+some_variables:
+  - header: {name: SQRT(S), units: GEV}
+    values:
+      - value: 7000
+
+---
\ No newline at end of file
diff --git a/testsuite/test_data/valid_submission_with_associated_record.yaml b/testsuite/test_data/valid_submission_with_associated_record.yaml
new file mode 100644
index 0000000..a12f44d
--- /dev/null
+++ b/testsuite/test_data/valid_submission_with_associated_record.yaml
@@ -0,0 +1,66 @@
+---
+additional_resources:
+  - location: "http://github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
+  - location: "http://github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
+associated_records:
+  - type: "HEPData"
+    identifier: "ins1226111"
+  - type: "INSPIRE"
+    identifier: "1221113"
+dateupdated: '01/02/2016 10:38:36'
+preprintyear: '2015'
+publicationyear: '2016'
+dateupdated: '01/02/2016 10:38:36'
+comment: | # Information that applies to all data tables.
+  CERN-LHC.  Measurements of the cross section  for ZZ production using the 4l and 2l2nu decay channels in proton-proton collisions at a centre-of-mass energy of 7 TeV with 4.6 fb^-1 of data collected in 2011.  The final states used are 4 electrons, 4 muons, 2 electrons and 2 muons, 2 electrons and missing transverse momentum, and 2 muons and missing transverse momentum (MET).
+
+  The cross section values reported in the tables should be multiplied by a factor of 1.0141 to take into account the updated value of the integrated luminosity for the ATLAS 2011 data taking period.  The uncertainty on the global normalisation ("Lumi") remains at 1.8%.  See Eur.Phys.J. C73 (2013) 2518 for more details.
+
+  The 4l channel fiducial region is defined as:
+  - 4e, 4mu or 2e2mu
+  - Ambiguities in pairing are resolved by choosing the combination that results in the smaller value of the sum |mll - mZ| for the two pairs, where mll is the mass of the dilepton system.
+  - ptLepton > 7 GeV (at least one with ptLepton > 20 (25) GeV for muons (electrons))
+  - |etaLepton| < 3.16
+  - At least one lepton pair is required to have invariant mass between 66 and 116 GeV. If the second pair also satisfies this, the event is ZZ, otherwise if the second pair satisfies mll > 20 GeV it is ZZ*.
+  - min(DeltaR(l,l)) > 0.2.
+
+  The 2l2nu channel fiducial region is defined as:
+  - 2e+MET or 2mu+MET
+  - ptLepton > 20 GeV
+  - |etaLepton| < 2.5
+  - mll must be between 76 and 106 GeV
+  - -MET*cos(phi_METZ)>75 GeV, where phi_METZ is the angle between the Z and the MET
+  - |MET - pTZ| / pTZ < 0.4, where pTZ is the transverse momentum of the dilepton system
+  - No events with a jet for which ptJet > 25 GeV and |etaJet| < 4.5
+  - No events with a third lepton for which ptLepton > 10 GeV
+  - min(DeltaR(l,l)) > 0.3
+
+
+---
+name: "Table 1"
+location: Page 17 of preprint
+description: The measured fiducial cross sections.  The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
+keywords: # used for searching, possibly multiple values for each keyword
+  - { name: reactions, values: [P P --> Z0 Z0 X]}
+  - { name: observables, values: [SIG]}
+  - { name: energies, values: [7000]}
+data_file: data1.yaml
+type: "different"
+additional_resources:
+  - location: "http://github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
+  - location: "http://github.com/HEPData/hepdata"
+    description: "Full source code for creating this data"
+
+---
+# This is Table 2.
+name: "Table 2"
+location: Page 20 of preprint
+description: The measured total cross sections.  The first systematic uncertainty is the combined systematic uncertainty excluding luminosity, the second is the luminosity
+keywords: # used for searching, possibly multiple values for each keyword
+  - { name: reactions, values: [P P --> Z0 Z0 X]}
+  - { name: observables, values: [SIG]}
+  - { name: energies, values: [7000]}
+data_file: data2.yaml
\ No newline at end of file
diff --git a/testsuite/validation_test.py b/testsuite/validation_test.py
index 31e498b..29c5481 100644
--- a/testsuite/validation_test.py
+++ b/testsuite/validation_test.py
@@ -3,11 +3,9 @@
 
 import yaml
 
-from hepdata_validator.data_file_validator import DataFileValidator
+from hepdata_validator.data_file_validator import DataFileValidator, UnsupportedDataSchemaException
 from hepdata_validator.submission_file_validator import SubmissionFileValidator
 
-__author__ = 'eamonnmaguire'
-
 
 class SubmissionFileValidationTest(unittest.TestCase):
     validator = None
@@ -17,8 +15,10 @@ def setUp(self):
 
         self.valid_license_file = 'test_data/valid_submission_license.yaml'
         self.valid_file = 'test_data/valid_submission.yaml'
+        self.valid_file_with_associated_records = 'test_data/valid_submission_with_associated_record.yaml'
         self.valid_empty_file = 'test_data/valid_submission_empty.yaml'
         self.invalid_file = 'test_data/invalid_submission.yaml'
+        self.invalid_syntax_file = 'test_data/invalid_syntax_submission.yaml'
 
     def test_valid_submission_yaml(self):
         print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission___'
@@ -31,7 +31,40 @@ def test_valid_submission_yaml(self):
         self.validator.validate(file_path=valid_sub_yaml, data=sub_yaml_obj)
         self.validator.print_errors(valid_sub_yaml)
 
-        print 'Valid\n'
+    def test_no_file_path_supplied(self):
+        self.validator = SubmissionFileValidator()
+        try:
+            self.validator.validate(file_path=None)
+        except LookupError as le:
+            assert (le)
+
+    def test_invalid_syntax(self):
+        self.validator = SubmissionFileValidator()
+        invalid_syntax_file = os.path.join(self.base_dir, self.invalid_syntax_file)
+
+        self.assertFalse(self.validator.validate(file_path=invalid_syntax_file))
+
+        self.assertTrue(self.validator.has_errors(invalid_syntax_file))
+        self.assertTrue(len(self.validator.get_messages(invalid_syntax_file)) == 1)
+        self.validator.print_errors(invalid_syntax_file)
+        for message in self.validator.get_messages(invalid_syntax_file):
+            print message.message
+            self.assertTrue(message.message.index("There was a problem parsing the file.") == 0)
+
+        self.assertTrue(len(self.validator.get_messages()) == 1)
+        self.validator.clear_messages()
+        self.assertTrue(len(self.validator.get_messages()) == 0)
+
+    def test_valid_submission_yaml_with_associated_records(self):
+        print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml submission with associated records___'
+
+        self.validator = None
+        self.validator = SubmissionFileValidator()
+        valid_sub_yaml = os.path.join(self.base_dir, self.valid_file_with_associated_records)
+
+        self.assertTrue(self.validator.validate(file_path=valid_sub_yaml))
+        self.assertTrue(not self.validator.has_errors(valid_sub_yaml))
+        self.validator.print_errors(valid_sub_yaml)
 
     def test_valid_submission_yaml_with_empty_section(self):
         print '___SUBMISSION_FILE_VALIDATION: Testing valid yaml ' \
@@ -44,7 +77,6 @@ def test_valid_submission_yaml_with_empty_section(self):
         self.assertEqual(self.validator.validate(file_path=valid_sub_yaml), True)
         self.validator.print_errors(valid_sub_yaml)
 
-
     def test_valid_submission_yaml_with_license(self):
         print '___SUBMISSION_FILE_VALIDATION: ' \
               'Testing valid yaml submission with license___'
@@ -103,11 +135,15 @@ def setUp(self):
             'test_data/valid_data_with_error.yaml'
         )
 
-        self.invalid_data_yaml = os.path.join(
+        self.invalid_syntax_data_file = os.path.join(
             self.base_dir,
-            'test_data/invalid_data.yaml'
+            'test_data/invalid_data_file.yaml'
         )
 
+        self.valid_custom_file = os.path.join(
+            self.base_dir,
+            'test_data/valid_file_custom.yaml')
+
     def test_valid_yaml_file(self):
         print '___DATA_VALIDATION: Testing valid yaml submission___'
         is_valid = self.validator.validate(file_path=self.valid_file_yaml)
@@ -120,15 +156,12 @@ def test_invalid_yaml_file(self):
                          False)
 
         self.validator.print_errors(self.invalid_file_yaml)
-        print 'Invalid\n'
-
 
     def test_valid_file_with_percent_errors(self):
         print '___DATA_VALIDATION: Testing valid yaml percent error ___'
         self.assertEqual(self.validator.validate(file_path=self.valid_file_error_percent_yaml),
                          False)
         self.validator.print_errors(self.valid_file_error_percent_yaml)
-        print 'Invalid\n'
 
     def test_valid_json_file(self):
         print '___DATA_VALIDATION: Testing valid json submission___'
@@ -137,21 +170,42 @@ def test_valid_json_file(self):
         self.assertEqual(is_valid, True)
 
         self.validator.print_errors(self.valid_file_json)
-        print 'VALID\n'
 
     def test_invalid_json_file(self):
         print '___DATA_VALIDATION: Testing invalid json submission___'
         self.assertEqual(self.validator.validate(file_path=self.invalid_file_json),
                          False)
         self.validator.print_errors(self.invalid_file_json)
-        print 'Invalid\n'
 
-    def test_invalid_data_file(self):
-        print '___DATA_VALIDATION: Testing invalid data file___'
-        self.assertEqual(self.validator.validate(file_path=self.invalid_data_yaml),
-                         False)
-        self.validator.print_errors(self.invalid_data_yaml)
-        print 'Invalid\n'
+    def test_load_data_with_custom_data_type(self):
+        self.validator = DataFileValidator()
+        custom_schema_path = os.path.join(self.base_dir, 'test_data/custom_data_schema.json')
+        self.validator.load_custom_schema('different', custom_schema_path)
+
+        self.assertTrue('different' in self.validator.custom_data_schemas)
+
+        self.assertTrue(self.validator.validate(file_path=self.valid_custom_file))
+
+    def test_load_invalid_custom_schema(self):
+        self.validator.custom_data_schemas = {}
+        print('Loading invalid schema')
+        try:
+            self.validator.load_custom_schema('different')
+        except UnsupportedDataSchemaException as udse:
+            self.assertTrue(udse.message == "There is no schema defined for the 'different' data type.")
+            self.assertTrue(udse.message == udse.__unicode__())
+
+    def test_load_invalid_data_file(self):
+
+        print('Loading invalid data file')
+
+        self.assertFalse(self.validator.validate(file_path=self.invalid_syntax_data_file))
+
+        self.assertTrue(self.validator.has_errors(self.invalid_syntax_data_file))
+        self.assertTrue(len(self.validator.get_messages(self.invalid_syntax_data_file)) == 1)
+        self.validator.print_errors(self.invalid_syntax_data_file)
+        for message in self.validator.get_messages(self.invalid_syntax_data_file):
+            self.assertTrue(message.message.index("There was a problem parsing the file.") == 0)
 
 
 if __name__ == '__main__':