algorithm.py 19.3 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

Samuel GAIST's avatar
Samuel GAIST committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

André Anjos's avatar
André Anjos committed
36

37
38
39
40
41
42
"""
=========
algorithm
=========

Validation for algorithms
43
44
45
46

Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
47
"""
48
import os
Samuel GAIST's avatar
Samuel GAIST committed
49

André Anjos's avatar
André Anjos committed
50
import numpy
51
import pkg_resources
Samuel GAIST's avatar
Samuel GAIST committed
52
import simplejson as json
Samuel GAIST's avatar
Samuel GAIST committed
53
54
55
56
57
import six

from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
from beat.backend.python.algorithm import Runner  # noqa
from beat.backend.python.algorithm import Storage
André Anjos's avatar
André Anjos committed
58
59
60
61

from . import dataformat
from . import library
from . import prototypes
Samuel GAIST's avatar
Samuel GAIST committed
62
from . import schema
André Anjos's avatar
André Anjos committed
63
64


65
def load_algorithm_prototype(prefix):
66

Samuel GAIST's avatar
Samuel GAIST committed
67
68
69
    prototype_data = pkg_resources.resource_string(
        __name__, "prototypes/algorithm.json"
    )
70
    algorithm_data = json.loads(prototype_data)
71
    ref_dataformats = ["integer", "integers"]
72
73
    dataformat = None

74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    for ref_dataformat in ref_dataformats:
        for root, dirs, _ in os.walk(os.path.join(prefix, "dataformats")):
            if ref_dataformat in dirs:
                dataformat_versions = sorted(
                    os.listdir(os.path.join(root, ref_dataformat))
                )
                version = dataformat_versions[-1].split(".")[0]
                dataformat = "{}/{}/{}".format(
                    os.path.basename(root), ref_dataformat, version
                )
                break

    if dataformat is None:
        raise RuntimeError(
            "Reference data formats [{}] not found".format(",".join(ref_dataformats))
        )
90
91
92
93
94
    algorithm_data["groups"][0]["inputs"]["in_data"]["type"] = dataformat
    algorithm_data["groups"][0]["outputs"]["out_data"]["type"] = dataformat
    return algorithm_data


95
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
96
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
97

Philip ABBET's avatar
Philip ABBET committed
98
99
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
100
101
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
102
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
103
104


Philip ABBET's avatar
Philip ABBET committed
105
    Parameters:
André Anjos's avatar
André Anjos committed
106

Philip ABBET's avatar
Philip ABBET committed
107
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
108

André Anjos's avatar
André Anjos committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
127
128


Philip ABBET's avatar
Philip ABBET committed
129
    Attributes:
André Anjos's avatar
André Anjos committed
130

Philip ABBET's avatar
Philip ABBET committed
131
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
132

Philip ABBET's avatar
Philip ABBET committed
133
134
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
135

Philip ABBET's avatar
Philip ABBET committed
136
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
137

Philip ABBET's avatar
Philip ABBET committed
138
139
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
140

141
142
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
143
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
144

145
146
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
147

Philip ABBET's avatar
Philip ABBET committed
148
149
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
150

151
152
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
153

Philip ABBET's avatar
Philip ABBET committed
154
155
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
156

Philip ABBET's avatar
Philip ABBET committed
157
      input_map (dict): A dictionary where the key is the input name and the
158
159
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
160

Philip ABBET's avatar
Philip ABBET committed
161
      output_map (dict): A dictionary where the key is the output name and the
162
163
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
164

165
166
167
168
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
169

Philip ABBET's avatar
Philip ABBET committed
170
171
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
172

Philip ABBET's avatar
Philip ABBET committed
173
174
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
175

Philip ABBET's avatar
Philip ABBET committed
176
177
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
178

Philip ABBET's avatar
Philip ABBET committed
179
180
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
181

Philip ABBET's avatar
Philip ABBET committed
182
    """
André Anjos's avatar
André Anjos committed
183

184
185
    dataformat_klass = dataformat.DataFormat

Philip ABBET's avatar
Philip ABBET committed
186
187
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
188

Philip ABBET's avatar
Philip ABBET committed
189
190
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
191

Philip ABBET's avatar
Philip ABBET committed
192
193
194
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
195

Philip ABBET's avatar
Philip ABBET committed
196
197
        self._name = None
        self.storage = None
198
199
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
200
        code = None
André Anjos's avatar
André Anjos committed
201

202
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
203

Philip ABBET's avatar
Philip ABBET committed
204
205
            data = None
            code = None
André Anjos's avatar
André Anjos committed
206

207
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
208

209
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
210

211
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
212

Philip ABBET's avatar
Philip ABBET committed
213
214
215
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
216
                self.errors.append("Algorithm declaration file not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
217
                return
André Anjos's avatar
André Anjos committed
218

219
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
220

Philip ABBET's avatar
Philip ABBET committed
221
        # At this point, `data' can be a dictionary or ``None``
222
        if data is None:  # loads the default declaration for an algorithm
223
224
225
            algorithm_data = load_algorithm_prototype(self.prefix)
            self.data, self.errors = schema.validate("algorithm", algorithm_data)
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)  # nosec
226
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
227
            # this runs basic validation, including JSON loading if required
228
            self.data, self.errors = schema.validate("algorithm", data)
André Anjos's avatar
André Anjos committed
229

230
231
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
232

233
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
234
            if not self.storage.code.exists():
235
236
237
238
                if self.data["language"] != "cxx":
                    self.errors.append(
                        "Algorithm code not found: %s" % self.storage.code.path
                    )
Philip ABBET's avatar
Philip ABBET committed
239
240
241
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
242

Philip ABBET's avatar
Philip ABBET committed
243
        # At this point, `code' can be a string (or a binary blob) or ``None``
244
        if code is None:  # loads the default code for an algorithm
245
246
            self.code = prototypes.binary_load("algorithm.py")
            self.data["language"] = "python"
André Anjos's avatar
André Anjos committed
247

248
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
249
            self.code = code
André Anjos's avatar
André Anjos committed
250

251
252
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
253

Philip ABBET's avatar
Philip ABBET committed
254
        # if no errors so far, make sense out of the declaration data
255
        self.groups = self.data["groups"]
André Anjos's avatar
André Anjos committed
256

Philip ABBET's avatar
Philip ABBET committed
257
258
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
259

Philip ABBET's avatar
Philip ABBET committed
260
        # create maps for easy access to data
261
262
263
264
265
266
267
268
269
270
271
272
273
        self.input_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g["inputs"].items()]
        )
        self.output_map = dict(
            [
                (k, v["type"])
                for g in self.groups
                for k, v in g.get("outputs", {}).items()
            ]
        )
        self.loop_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g.get("loop", {}).items()]
        )
André Anjos's avatar
André Anjos committed
274

Philip ABBET's avatar
Philip ABBET committed
275
276
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
277

Philip ABBET's avatar
Philip ABBET committed
278
279
280
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
281

Philip ABBET's avatar
Philip ABBET committed
282
    def _check_endpoint_uniqueness(self):
Samuel GAIST's avatar
Samuel GAIST committed
283
        """Checks for name clashes accross input/output groups"""
André Anjos's avatar
André Anjos committed
284

Philip ABBET's avatar
Philip ABBET committed
285
        all_input_names = []
286
287
        for group in self.groups:
            all_input_names.extend(group["inputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
288
        if len(set(all_input_names)) != len(all_input_names):
289
290
291
292
            self.errors.append(
                "repeated input name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_input_names))
            )
André Anjos's avatar
André Anjos committed
293

Philip ABBET's avatar
Philip ABBET committed
294
295
296
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
297
298
299
            if "outputs" not in group:
                continue
            all_output_names.extend(group["outputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
300
        if len(set(all_output_names)) != len(all_output_names):
301
302
303
304
            self.errors.append(
                "repeated output name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_output_names))
            )
André Anjos's avatar
André Anjos committed
305

306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
    def _validate_format(self, type_name, group_name, entry_name, dataformat):
        if dataformat.errors:
            self.errors.append(
                "found error validating data format `%s' "
                "for %s `%s' on algorithm `%s': %s"
                % (
                    type_name,
                    group_name,
                    entry_name,
                    self.name,
                    "\n".join(dataformat.errors),
                )
            )

    def _validate_dataformats(self, group, group_name, dataformat_cache):
        for name, entry in group[group_name].items():
            type_name = entry["type"]
            thisformat = self._update_dataformat_cache(type_name, dataformat_cache)
            self._validate_format(type_name, group_name, name, thisformat)

Philip ABBET's avatar
Philip ABBET committed
326
    def _validate_required_dataformats(self, dataformat_cache):
Samuel GAIST's avatar
Samuel GAIST committed
327
        """Makes sure we can load all requested formats"""
André Anjos's avatar
André Anjos committed
328

Philip ABBET's avatar
Philip ABBET committed
329
        for group in self.groups:
André Anjos's avatar
André Anjos committed
330

331
332
            for name, input_ in group["inputs"].items():
                self._validate_dataformats(group, "inputs", dataformat_cache)
André Anjos's avatar
André Anjos committed
333

334
            if "outputs" in group:
335
                self._validate_dataformats(group, "outputs", dataformat_cache)
André Anjos's avatar
André Anjos committed
336

337
            if "loop" in group:
338
                self._validate_dataformats(group, "loop", dataformat_cache)
André Anjos's avatar
André Anjos committed
339

Philip ABBET's avatar
Philip ABBET committed
340
        if self.results:
André Anjos's avatar
André Anjos committed
341

Philip ABBET's avatar
Philip ABBET committed
342
            for name, result in self.results.items():
343
344
345
346
347
348
349
350
                result_type = result["type"]
                # results can only contain base types and plots therefore, only
                # process plots
                if result_type.find("/") != -1:
                    thisformat = self._update_dataformat_cache(
                        result_type, dataformat_cache
                    )
                    self._validate_format(result_type, "result", name, thisformat)
André Anjos's avatar
André Anjos committed
351

Philip ABBET's avatar
Philip ABBET committed
352
    def _convert_parameter_types(self):
353
354
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
355
        """
André Anjos's avatar
André Anjos committed
356

Philip ABBET's avatar
Philip ABBET committed
357
358
359
360
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
361
362
363
364
                self.errors.append(
                    "%s for parameter `%s' cannot be cast to type "
                    "`%s': %s" % (desc, name, tp.name, e)
                )
André Anjos's avatar
André Anjos committed
365

366
367
        if self.parameters is None:
            return
André Anjos's avatar
André Anjos committed
368

Philip ABBET's avatar
Philip ABBET committed
369
        for name, parameter in self.parameters.items():
370
371
            if parameter["type"] == "string":
                parameter["type"] = numpy.dtype("str")
Philip ABBET's avatar
Philip ABBET committed
372
            else:
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
                parameter["type"] = numpy.dtype(parameter["type"])

            if "range" in parameter:
                parameter["range"][0] = _try_convert(
                    name, parameter["type"], parameter["range"][0], "start of range"
                )
                parameter["range"][1] = _try_convert(
                    name, parameter["type"], parameter["range"][1], "end of range"
                )
                if parameter["range"][0] >= parameter["range"][1]:
                    self.errors.append(
                        "range for parameter `%s' has a start greater "
                        "then the end value (%r >= %r)"
                        % (name, parameter["range"][0], parameter["range"][1])
                    )

            if "choice" in parameter:
                for i, choice in enumerate(parameter["choice"]):
                    parameter["choice"][i] = _try_convert(
                        name,
                        parameter["type"],
                        parameter["choice"][i],
                        "choice[%d]" % i,
                    )

            if "default" in parameter:
                parameter["default"] = _try_convert(
                    name, parameter["type"], parameter["default"], "default"
                )

                if "range" in parameter:  # check range
                    if (
                        parameter["default"] < parameter["range"][0]
                        or parameter["default"] > parameter["range"][1]
                    ):
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "within parameter range [%r, %r]"
                            % (
                                name,
                                parameter["default"],
                                parameter["range"][0],
                                parameter["range"][1],
                            )
                        )

                if "choice" in parameter:  # check choices
                    if parameter["default"] not in parameter["choice"]:
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "a valid choice `[%s]'"
                            % (
                                name,
                                parameter["default"],
                                ", ".join(["%r" % k for k in parameter["choice"]]),
                            )
                        )
André Anjos's avatar
André Anjos committed
430

Philip ABBET's avatar
Philip ABBET committed
431
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
432

Philip ABBET's avatar
Philip ABBET committed
433
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
434

Philip ABBET's avatar
Philip ABBET committed
435
        if self.uses:
André Anjos's avatar
André Anjos committed
436

Philip ABBET's avatar
Philip ABBET committed
437
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
438

439
440
441
                self.libraries[value] = library_cache.setdefault(
                    value, library.Library(self.prefix, value, library_cache)
                )
André Anjos's avatar
André Anjos committed
442

Philip ABBET's avatar
Philip ABBET committed
443
                if not self.libraries[value].valid:
444
445
446
447
                    self.errors.append(
                        "referred library `%s' (%s) is not valid"
                        % (self.libraries[value].name, name)
                    )
André Anjos's avatar
André Anjos committed
448

Philip ABBET's avatar
Philip ABBET committed
449
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
450

Philip ABBET's avatar
Philip ABBET committed
451
        # all used libraries must be programmed with the same language
452
453
        if self.language == "unknown":
            return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
454

Philip ABBET's avatar
Philip ABBET committed
455
        if self.uses:
André Anjos's avatar
André Anjos committed
456

457
            for name, library_name in self.uses.items():
André Anjos's avatar
André Anjos committed
458

459
460
                if library_name not in self.libraries:
                    continue  # invalid
André Anjos's avatar
André Anjos committed
461

462
463
464
465
466
467
                if self.libraries[library_name].data is None:
                    self.errors.append(
                        "language for used library `%s' cannot be "
                        "inferred as the library was not properly loaded"
                        % (library_name,)
                    )
Philip ABBET's avatar
Philip ABBET committed
468
                    continue
André Anjos's avatar
André Anjos committed
469

470
471
472
473
474
475
476
477
478
479
                if self.libraries[library_name].language != self.language:
                    self.errors.append(
                        "language for used library `%s' (`%s') "
                        "differs from current language for this algorithm (`%s')"
                        % (
                            library_name,
                            self.libraries[library_name].language,
                            self.language,
                        )
                    )