algorithm.py 19.8 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

28
29
30
31
32
33
"""
=========
algorithm
=========

Validation for algorithms
34
35
36
37

Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
38
"""
André Anjos's avatar
André Anjos committed
39
40


41
import os
André Anjos's avatar
André Anjos committed
42
import six
43
import json
André Anjos's avatar
André Anjos committed
44
import numpy
45
import pkg_resources
André Anjos's avatar
André Anjos committed
46
47
48
49
50
51

from . import dataformat
from . import library
from . import schema
from . import prototypes

52
from beat.backend.python.algorithm import Storage
53
from beat.backend.python.algorithm import Runner  # noqa
54
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
55
56


57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def load_algorithm_prototype(prefix):
    algorithm_data = json.loads(
        pkg_resources.resource_string(__name__, "prototypes/algorithm.json")
    )
    ref_dataformat = "integers"
    dataformat = None

    for root, dirs, _ in os.walk(prefix, "algorithm"):
        if ref_dataformat in dirs:
            dataformat_versions = sorted(os.listdir(os.path.join(root, ref_dataformat)))
            version = dataformat_versions[-1].split(".")[0]
            dataformat = "{}/{}/{}".format(
                os.path.basename(root), ref_dataformat, version
            )
            break
    assert dataformat is not None, (  # nosec
        "Reference data format %s not found" % ref_dataformat
    )
    algorithm_data["groups"][0]["inputs"]["in_data"]["type"] = dataformat
    algorithm_data["groups"][0]["outputs"]["out_data"]["type"] = dataformat
    return algorithm_data


80
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
81
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
82

Philip ABBET's avatar
Philip ABBET committed
83
84
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
85
86
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
87
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
88
89


Philip ABBET's avatar
Philip ABBET committed
90
    Parameters:
André Anjos's avatar
André Anjos committed
91

Philip ABBET's avatar
Philip ABBET committed
92
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
93

André Anjos's avatar
André Anjos committed
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
112
113


Philip ABBET's avatar
Philip ABBET committed
114
    Attributes:
André Anjos's avatar
André Anjos committed
115

Philip ABBET's avatar
Philip ABBET committed
116
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
117

Philip ABBET's avatar
Philip ABBET committed
118
119
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
120

Philip ABBET's avatar
Philip ABBET committed
121
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
122

Philip ABBET's avatar
Philip ABBET committed
123
124
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
125

126
127
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
128
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
129

130
131
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
132

Philip ABBET's avatar
Philip ABBET committed
133
134
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
135

136
137
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
138

Philip ABBET's avatar
Philip ABBET committed
139
140
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
141

Philip ABBET's avatar
Philip ABBET committed
142
      input_map (dict): A dictionary where the key is the input name and the
143
144
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
145

Philip ABBET's avatar
Philip ABBET committed
146
      output_map (dict): A dictionary where the key is the output name and the
147
148
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
149

150
151
152
153
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
154

Philip ABBET's avatar
Philip ABBET committed
155
156
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
157

Philip ABBET's avatar
Philip ABBET committed
158
159
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
160

Philip ABBET's avatar
Philip ABBET committed
161
162
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
163

Philip ABBET's avatar
Philip ABBET committed
164
165
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
166

Philip ABBET's avatar
Philip ABBET committed
167
    """
André Anjos's avatar
André Anjos committed
168

Philip ABBET's avatar
Philip ABBET committed
169
170
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
171

Philip ABBET's avatar
Philip ABBET committed
172
173
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
174

Philip ABBET's avatar
Philip ABBET committed
175
176
177
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
178

Philip ABBET's avatar
Philip ABBET committed
179
180
        self._name = None
        self.storage = None
181
182
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
183
        code = None
André Anjos's avatar
André Anjos committed
184

185
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
186

Philip ABBET's avatar
Philip ABBET committed
187
188
            data = None
            code = None
André Anjos's avatar
André Anjos committed
189

190
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
191

192
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
193

194
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
195

Philip ABBET's avatar
Philip ABBET committed
196
197
198
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
199
                self.errors.append("Algorithm declaration file not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
200
                return
André Anjos's avatar
André Anjos committed
201

202
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
203

Philip ABBET's avatar
Philip ABBET committed
204
        # At this point, `data' can be a dictionary or ``None``
205
        if data is None:  # loads the default declaration for an algorithm
206
207
208
            algorithm_data = load_algorithm_prototype(self.prefix)
            self.data, self.errors = schema.validate("algorithm", algorithm_data)
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)  # nosec
209
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
210
            # this runs basic validation, including JSON loading if required
211
            self.data, self.errors = schema.validate("algorithm", data)
André Anjos's avatar
André Anjos committed
212

213
214
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
215

216
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
217
            if not self.storage.code.exists():
218
219
220
221
                if self.data["language"] != "cxx":
                    self.errors.append(
                        "Algorithm code not found: %s" % self.storage.code.path
                    )
Philip ABBET's avatar
Philip ABBET committed
222
223
224
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
225

Philip ABBET's avatar
Philip ABBET committed
226
        # At this point, `code' can be a string (or a binary blob) or ``None``
227
        if code is None:  # loads the default code for an algorithm
228
229
            self.code = prototypes.binary_load("algorithm.py")
            self.data["language"] = "python"
André Anjos's avatar
André Anjos committed
230

231
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
232
            self.code = code
André Anjos's avatar
André Anjos committed
233

234
235
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
236

Philip ABBET's avatar
Philip ABBET committed
237
        # if no errors so far, make sense out of the declaration data
238
        self.groups = self.data["groups"]
André Anjos's avatar
André Anjos committed
239

Philip ABBET's avatar
Philip ABBET committed
240
241
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
242

Philip ABBET's avatar
Philip ABBET committed
243
        # create maps for easy access to data
244
245
246
247
248
249
250
251
252
253
254
255
256
        self.input_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g["inputs"].items()]
        )
        self.output_map = dict(
            [
                (k, v["type"])
                for g in self.groups
                for k, v in g.get("outputs", {}).items()
            ]
        )
        self.loop_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g.get("loop", {}).items()]
        )
André Anjos's avatar
André Anjos committed
257

Philip ABBET's avatar
Philip ABBET committed
258
259
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
260

Philip ABBET's avatar
Philip ABBET committed
261
262
263
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
264

Philip ABBET's avatar
Philip ABBET committed
265
266
267
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
268

Philip ABBET's avatar
Philip ABBET committed
269
        all_input_names = []
270
271
        for group in self.groups:
            all_input_names.extend(group["inputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
272
        if len(set(all_input_names)) != len(all_input_names):
273
274
275
276
            self.errors.append(
                "repeated input name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_input_names))
            )
André Anjos's avatar
André Anjos committed
277

Philip ABBET's avatar
Philip ABBET committed
278
279
280
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
281
282
283
            if "outputs" not in group:
                continue
            all_output_names.extend(group["outputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
284
        if len(set(all_output_names)) != len(all_output_names):
285
286
287
288
            self.errors.append(
                "repeated output name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_output_names))
            )
André Anjos's avatar
André Anjos committed
289

Philip ABBET's avatar
Philip ABBET committed
290
291
292
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
293

Philip ABBET's avatar
Philip ABBET committed
294
        for group in self.groups:
André Anjos's avatar
André Anjos committed
295

296
297
298
            for name, input in group["inputs"].items():
                if input["type"] in self.dataformats:
                    continue
André Anjos's avatar
André Anjos committed
299

300
301
                if dataformat_cache and input["type"] in dataformat_cache:  # reuse
                    thisformat = dataformat_cache[input["type"]]
302
                else:  # load it
303
                    thisformat = dataformat.DataFormat(self.prefix, input["type"])
304
                    if dataformat_cache is not None:  # update it
305
                        dataformat_cache[input["type"]] = thisformat
André Anjos's avatar
André Anjos committed
306

307
                self.dataformats[input["type"]] = thisformat
André Anjos's avatar
André Anjos committed
308

Philip ABBET's avatar
Philip ABBET committed
309
                if thisformat.errors:
310
311
312
313
314
                    self.errors.append(
                        "found error validating data format `%s' "
                        "for input `%s' on algorithm `%s': %s"
                        % (input["type"], name, self.name, "\n".join(thisformat.errors))
                    )
André Anjos's avatar
André Anjos committed
315

316
317
            if "outputs" not in group:
                continue
André Anjos's avatar
André Anjos committed
318

319
320
321
            for name, output in group["outputs"].items():
                if output["type"] in self.dataformats:
                    continue
André Anjos's avatar
André Anjos committed
322

323
324
                if dataformat_cache and output["type"] in dataformat_cache:  # reuse
                    thisformat = dataformat_cache[output["type"]]
325
                else:  # load it
326
                    thisformat = dataformat.DataFormat(self.prefix, output["type"])
327
                    if dataformat_cache is not None:  # update it
328
                        dataformat_cache[output["type"]] = thisformat
André Anjos's avatar
André Anjos committed
329

330
                self.dataformats[output["type"]] = thisformat
André Anjos's avatar
André Anjos committed
331

Philip ABBET's avatar
Philip ABBET committed
332
                if thisformat.errors:
333
334
335
336
337
338
339
340
341
342
                    self.errors.append(
                        "found error validating data format `%s' "
                        "for output `%s' on algorithm `%s': %s"
                        % (
                            output["type"],
                            name,
                            self.name,
                            "\n".join(thisformat.errors),
                        )
                    )
André Anjos's avatar
André Anjos committed
343

Philip ABBET's avatar
Philip ABBET committed
344
        if self.results:
André Anjos's avatar
André Anjos committed
345

Philip ABBET's avatar
Philip ABBET committed
346
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
347

348
                if result["type"].find("/") != -1:
André Anjos's avatar
André Anjos committed
349

350
351
                    if result["type"] in self.dataformats:
                        continue
André Anjos's avatar
André Anjos committed
352

353
354
                    if dataformat_cache and result["type"] in dataformat_cache:  # reuse
                        thisformat = dataformat_cache[result["type"]]
Philip ABBET's avatar
Philip ABBET committed
355
                    else:
356
                        thisformat = dataformat.DataFormat(self.prefix, result["type"])
357
                        if dataformat_cache is not None:  # update it
358
                            dataformat_cache[result["type"]] = thisformat
André Anjos's avatar
André Anjos committed
359

360
                    self.dataformats[result["type"]] = thisformat
André Anjos's avatar
André Anjos committed
361

Philip ABBET's avatar
Philip ABBET committed
362
                    if thisformat.errors:
363
364
365
366
367
368
369
370
371
372
                        self.errors.append(
                            "found error validating data format `%s' "
                            "for result `%s' on algorithm `%s': %s"
                            % (
                                result["type"],
                                name,
                                self.name,
                                "\n".join(thisformat.errors),
                            )
                        )
André Anjos's avatar
André Anjos committed
373

Philip ABBET's avatar
Philip ABBET committed
374
    def _convert_parameter_types(self):
375
376
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
377
        """
André Anjos's avatar
André Anjos committed
378

Philip ABBET's avatar
Philip ABBET committed
379
380
381
382
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
383
384
385
386
                self.errors.append(
                    "%s for parameter `%s' cannot be cast to type "
                    "`%s': %s" % (desc, name, tp.name, e)
                )
André Anjos's avatar
André Anjos committed
387

388
389
        if self.parameters is None:
            return
André Anjos's avatar
André Anjos committed
390

Philip ABBET's avatar
Philip ABBET committed
391
        for name, parameter in self.parameters.items():
392
393
            if parameter["type"] == "string":
                parameter["type"] = numpy.dtype("str")
Philip ABBET's avatar
Philip ABBET committed
394
            else:
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
                parameter["type"] = numpy.dtype(parameter["type"])

            if "range" in parameter:
                parameter["range"][0] = _try_convert(
                    name, parameter["type"], parameter["range"][0], "start of range"
                )
                parameter["range"][1] = _try_convert(
                    name, parameter["type"], parameter["range"][1], "end of range"
                )
                if parameter["range"][0] >= parameter["range"][1]:
                    self.errors.append(
                        "range for parameter `%s' has a start greater "
                        "then the end value (%r >= %r)"
                        % (name, parameter["range"][0], parameter["range"][1])
                    )

            if "choice" in parameter:
                for i, choice in enumerate(parameter["choice"]):
                    parameter["choice"][i] = _try_convert(
                        name,
                        parameter["type"],
                        parameter["choice"][i],
                        "choice[%d]" % i,
                    )

            if "default" in parameter:
                parameter["default"] = _try_convert(
                    name, parameter["type"], parameter["default"], "default"
                )

                if "range" in parameter:  # check range
                    if (
                        parameter["default"] < parameter["range"][0]
                        or parameter["default"] > parameter["range"][1]
                    ):
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "within parameter range [%r, %r]"
                            % (
                                name,
                                parameter["default"],
                                parameter["range"][0],
                                parameter["range"][1],
                            )
                        )

                if "choice" in parameter:  # check choices
                    if parameter["default"] not in parameter["choice"]:
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "a valid choice `[%s]'"
                            % (
                                name,
                                parameter["default"],
                                ", ".join(["%r" % k for k in parameter["choice"]]),
                            )
                        )
André Anjos's avatar
André Anjos committed
452

Philip ABBET's avatar
Philip ABBET committed
453
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
454

Philip ABBET's avatar
Philip ABBET committed
455
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
456

Philip ABBET's avatar
Philip ABBET committed
457
        if self.uses:
André Anjos's avatar
André Anjos committed
458

Philip ABBET's avatar
Philip ABBET committed
459
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
460

461
462
463
                self.libraries[value] = library_cache.setdefault(
                    value, library.Library(self.prefix, value, library_cache)
                )
André Anjos's avatar
André Anjos committed
464

Philip ABBET's avatar
Philip ABBET committed
465
                if not self.libraries[value].valid:
466
467
468
469
                    self.errors.append(
                        "referred library `%s' (%s) is not valid"
                        % (self.libraries[value].name, name)
                    )
André Anjos's avatar
André Anjos committed
470

Philip ABBET's avatar
Philip ABBET committed
471
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
472

Philip ABBET's avatar
Philip ABBET committed
473
        # all used libraries must be programmed with the same language
474
475
        if self.language == "unknown":
            return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
476

Philip ABBET's avatar
Philip ABBET committed
477
        if self.uses:
André Anjos's avatar
André Anjos committed
478

479
            for name, library_name in self.uses.items():
André Anjos's avatar
André Anjos committed
480

481
482
                if library_name not in self.libraries:
                    continue  # invalid
André Anjos's avatar
André Anjos committed
483

484
485
486
487
488
489
                if self.libraries[library_name].data is None:
                    self.errors.append(
                        "language for used library `%s' cannot be "
                        "inferred as the library was not properly loaded"
                        % (library_name,)
                    )
Philip ABBET's avatar
Philip ABBET committed
490
                    continue
André Anjos's avatar
André Anjos committed
491

492
493
494
495
496
497
498
499
500
501
                if self.libraries[library_name].language != self.language:
                    self.errors.append(
                        "language for used library `%s' (`%s') "
                        "differs from current language for this algorithm (`%s')"
                        % (
                            library_name,
                            self.libraries[library_name].language,
                            self.language,
                        )
                    )