__init__.py 7.1 KB
Newer Older
André Anjos's avatar
André Anjos committed
1 2 3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

"""Schema validation for BEAT JSON I/O"""
André Anjos's avatar
André Anjos committed
37 38 39
import os

import jsonschema
Samuel GAIST's avatar
Samuel GAIST committed
40 41 42
import pkg_resources
import simplejson as json
import six
André Anjos's avatar
André Anjos committed
43

44 45
from beat.backend.python.utils import error_on_duplicate_key_hook

André Anjos's avatar
André Anjos committed
46 47

def maybe_load_json(s):
48 49 50 51 52 53 54 55 56
    """Maybe loads the JSON from a string or filename"""

    # if it is a string
    if isinstance(s, six.string_types):
        # if it is a valid path
        if os.path.exists(s):
            with open(s, "rt") as f:
                return maybe_load_json(f)
        else:
57
            return json.loads(s, object_pairs_hook=error_on_duplicate_key_hook)
André Anjos's avatar
André Anjos committed
58

59 60 61
    # if it is a 'file-like' object
    if hasattr(s, "read"):
        return maybe_load_json(s.read())
André Anjos's avatar
André Anjos committed
62

63
    return s
André Anjos's avatar
André Anjos committed
64 65 66


def load_schema(schema_name, version=1):
67
    """Returns a JSON validator for the schema given the relative name
André Anjos's avatar
André Anjos committed
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89


  Parameters:

    schema_name (str): the name of the schema to load. This value corresponds
      to the filename inside our schema directory (where this file is located)
      and should *exclude* the extension ``.json``.

    version (int): the version of the schema to use.


  Returns:

    jsonschema.Draft4Validator: An instance of a JSON schema draft-4 validator.


  Raises:

    jsonschema.SchemaError: If there is an error on the schema.

  """

90 91 92
    fname = pkg_resources.resource_filename(
        __name__, os.path.join(schema_name, "%d.json" % version)
    )
André Anjos's avatar
André Anjos committed
93

94 95
    with open(fname, "rb") as f:
        data = f.read().decode()
96 97 98
        try:
            schema = json.loads(data)
        except json.errors.JSONDecodeError:
Samuel GAIST's avatar
Samuel GAIST committed
99
            print("Invalid json:\n {}".format(data))
100
            raise
André Anjos's avatar
André Anjos committed
101

102 103
    basedir = os.path.realpath(os.path.dirname(fname))
    resolver = jsonschema.RefResolver("file://" + basedir + "/", schema)
André Anjos's avatar
André Anjos committed
104

105 106
    # now we load it
    return jsonschema.Draft4Validator(schema, resolver=resolver)
André Anjos's avatar
André Anjos committed
107 108 109


def validate(schema_name, data):
110
    """Validates the input data using the schema
André Anjos's avatar
André Anjos committed
111 112 113 114 115 116 117 118 119 120 121 122 123

  This function handles schema versionning in the context of BEAT transparently
  by first peeking the schema version required by the JSON data and then
  validating the JSON data against the proper schema version for the respective
  type.


  Example:

    .. code-block:: python

       try:
           cleaned_data, error_list = validate('toolchain', '/to/my/file.json')
124
       except json.JSONDecodeError as e:
André Anjos's avatar
André Anjos committed
125 126 127 128 129 130 131 132 133 134 135 136 137 138
           print(e)


  Parameters:

    schema_name (str): The relative path to the schema to use for validation.
      For example, to validate a toolchain, use ``'toolchain'``.

    data (object, str, file): The piece of data to validate. The input can be a
      valid python object that represents a JSON structure, a file, from which
      the JSON contents will be read out or a string.

      If ``data`` is a string and represents a valid filesystem path, the
      relevant file will be opened and read as with
139
      :py:func:`json.load``. Otherwise, it will be considered to be
André Anjos's avatar
André Anjos committed
140
      string containing a valid JSON structure that will be loaded as with
141
      :py:func:`json.loads`.
André Anjos's avatar
André Anjos committed
142 143

      Note that if the file is opened and read internally using
144
      :py:func:`json.load`, exceptions may be thrown by that subsystem,
André Anjos's avatar
André Anjos committed
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
      concerning the file structure. Consult the manual page for
      :py:mod:`simplejson` for details.


  Returns:

    A tuple with two elements: the cleaned JSON data structure, after
    processing and a list of errors found by ``jsonschema``. If no errors
    occur, then returns an empty list for the second element of the tuple.

  Raises:

    jsonschema.SchemaError: If there is an error on the schema.

  """

161 162
    try:
        data = maybe_load_json(data)
163
    except json.JSONDecodeError as e:
164
        return data, ["invalid JSON code: %s" % str(e)]
165 166
    except RuntimeError as e:
        return data, ["Invalid JSON: %s" % str(e)]
André Anjos's avatar
André Anjos committed
167

168 169 170 171 172
    # handles the schema version
    if schema_name != "dataformat":
        version = data.get("schema_version", 1)
    else:
        version = data.get("#schema_version", 1)
André Anjos's avatar
André Anjos committed
173

174
    validator = load_schema(schema_name, version)
André Anjos's avatar
André Anjos committed
175

176 177 178 179 180 181 182
    def encode_error(error, indent=""):
        abspath = "/".join([""] + ([str(k) for k in error.absolute_path] or [""]))
        schpath = "/".join([""] + ([str(k) for k in error.schema_path] or [""]))
        retval = indent + "%s: %s (rule: %s)" % (abspath, error.message, schpath)
        for another_error in error.context:
            retval += "\n" + encode_error(another_error, indent + "  ")
        return retval
André Anjos's avatar
André Anjos committed
183

184 185 186 187
    errorlist = [
        encode_error(k)
        for k in sorted(validator.iter_errors(data), key=lambda e: e.path)
    ]
André Anjos's avatar
André Anjos committed
188

189
    return data, errorlist