__init__.py 7.11 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

Samuel GAIST's avatar
Samuel GAIST committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

"""Schema validation for BEAT JSON I/O"""
André Anjos's avatar
André Anjos committed
37
38
39
40
41
42


import os
import pkg_resources

import six
43
import simplejson as json
André Anjos's avatar
André Anjos committed
44
45
import jsonschema

46
47
from beat.backend.python.utils import error_on_duplicate_key_hook

André Anjos's avatar
André Anjos committed
48
49

def maybe_load_json(s):
Samuel GAIST's avatar
Samuel GAIST committed
50
51
52
53
54
55
56
57
58
    """Maybe loads the JSON from a string or filename"""

    # if it is a string
    if isinstance(s, six.string_types):
        # if it is a valid path
        if os.path.exists(s):
            with open(s, "rt") as f:
                return maybe_load_json(f)
        else:
59
            return json.loads(s, object_pairs_hook=error_on_duplicate_key_hook)
André Anjos's avatar
André Anjos committed
60

Samuel GAIST's avatar
Samuel GAIST committed
61
62
63
    # if it is a 'file-like' object
    if hasattr(s, "read"):
        return maybe_load_json(s.read())
André Anjos's avatar
André Anjos committed
64

Samuel GAIST's avatar
Samuel GAIST committed
65
    return s
André Anjos's avatar
André Anjos committed
66
67
68


def load_schema(schema_name, version=1):
Samuel GAIST's avatar
Samuel GAIST committed
69
    """Returns a JSON validator for the schema given the relative name
André Anjos's avatar
André Anjos committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91


  Parameters:

    schema_name (str): the name of the schema to load. This value corresponds
      to the filename inside our schema directory (where this file is located)
      and should *exclude* the extension ``.json``.

    version (int): the version of the schema to use.


  Returns:

    jsonschema.Draft4Validator: An instance of a JSON schema draft-4 validator.


  Raises:

    jsonschema.SchemaError: If there is an error on the schema.

  """

Samuel GAIST's avatar
Samuel GAIST committed
92
93
94
    fname = pkg_resources.resource_filename(
        __name__, os.path.join(schema_name, "%d.json" % version)
    )
André Anjos's avatar
André Anjos committed
95

Samuel GAIST's avatar
Samuel GAIST committed
96
97
    with open(fname, "rb") as f:
        data = f.read().decode()
98
99
100
101
102
        try:
            schema = json.loads(data)
        except json.errors.JSONDecodeError:
            print("Invalid json:\n {data}".format(data))
            raise
André Anjos's avatar
André Anjos committed
103

Samuel GAIST's avatar
Samuel GAIST committed
104
105
    basedir = os.path.realpath(os.path.dirname(fname))
    resolver = jsonschema.RefResolver("file://" + basedir + "/", schema)
André Anjos's avatar
André Anjos committed
106

Samuel GAIST's avatar
Samuel GAIST committed
107
108
    # now we load it
    return jsonschema.Draft4Validator(schema, resolver=resolver)
André Anjos's avatar
André Anjos committed
109
110
111


def validate(schema_name, data):
Samuel GAIST's avatar
Samuel GAIST committed
112
    """Validates the input data using the schema
André Anjos's avatar
André Anjos committed
113
114
115
116
117
118
119
120
121
122
123
124
125

  This function handles schema versionning in the context of BEAT transparently
  by first peeking the schema version required by the JSON data and then
  validating the JSON data against the proper schema version for the respective
  type.


  Example:

    .. code-block:: python

       try:
           cleaned_data, error_list = validate('toolchain', '/to/my/file.json')
126
       except json.JSONDecodeError as e:
André Anjos's avatar
André Anjos committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
           print(e)


  Parameters:

    schema_name (str): The relative path to the schema to use for validation.
      For example, to validate a toolchain, use ``'toolchain'``.

    data (object, str, file): The piece of data to validate. The input can be a
      valid python object that represents a JSON structure, a file, from which
      the JSON contents will be read out or a string.

      If ``data`` is a string and represents a valid filesystem path, the
      relevant file will be opened and read as with
141
      :py:func:`json.load``. Otherwise, it will be considered to be
André Anjos's avatar
André Anjos committed
142
      string containing a valid JSON structure that will be loaded as with
143
      :py:func:`json.loads`.
André Anjos's avatar
André Anjos committed
144
145

      Note that if the file is opened and read internally using
146
      :py:func:`json.load`, exceptions may be thrown by that subsystem,
André Anjos's avatar
André Anjos committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
      concerning the file structure. Consult the manual page for
      :py:mod:`simplejson` for details.


  Returns:

    A tuple with two elements: the cleaned JSON data structure, after
    processing and a list of errors found by ``jsonschema``. If no errors
    occur, then returns an empty list for the second element of the tuple.

  Raises:

    jsonschema.SchemaError: If there is an error on the schema.

  """

Samuel GAIST's avatar
Samuel GAIST committed
163
164
    try:
        data = maybe_load_json(data)
165
    except json.JSONDecodeError as e:
Samuel GAIST's avatar
Samuel GAIST committed
166
        return data, ["invalid JSON code: %s" % str(e)]
167
168
    except RuntimeError as e:
        return data, ["Invalid JSON: %s" % str(e)]
André Anjos's avatar
André Anjos committed
169

Samuel GAIST's avatar
Samuel GAIST committed
170
171
172
173
174
    # handles the schema version
    if schema_name != "dataformat":
        version = data.get("schema_version", 1)
    else:
        version = data.get("#schema_version", 1)
André Anjos's avatar
André Anjos committed
175

Samuel GAIST's avatar
Samuel GAIST committed
176
    validator = load_schema(schema_name, version)
André Anjos's avatar
André Anjos committed
177

Samuel GAIST's avatar
Samuel GAIST committed
178
179
180
181
182
183
184
    def encode_error(error, indent=""):
        abspath = "/".join([""] + ([str(k) for k in error.absolute_path] or [""]))
        schpath = "/".join([""] + ([str(k) for k in error.schema_path] or [""]))
        retval = indent + "%s: %s (rule: %s)" % (abspath, error.message, schpath)
        for another_error in error.context:
            retval += "\n" + encode_error(another_error, indent + "  ")
        return retval
André Anjos's avatar
André Anjos committed
185

Samuel GAIST's avatar
Samuel GAIST committed
186
187
188
189
    errorlist = [
        encode_error(k)
        for k in sorted(validator.iter_errors(data), key=lambda e: e.path)
    ]
André Anjos's avatar
André Anjos committed
190

Samuel GAIST's avatar
Samuel GAIST committed
191
    return data, errorlist