algorithm.py 20 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

28
29
30
31
32
33
"""
=========
algorithm
=========

Validation for algorithms
34
35
36
37

Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
38
"""
André Anjos's avatar
André Anjos committed
39
40


41
import os
André Anjos's avatar
André Anjos committed
42
import six
43
import json
André Anjos's avatar
André Anjos committed
44
import numpy
45
import pkg_resources
André Anjos's avatar
André Anjos committed
46
47
48
49
50
51

from . import dataformat
from . import library
from . import schema
from . import prototypes

52
from beat.backend.python.algorithm import Storage
53
from beat.backend.python.algorithm import Runner  # noqa
54
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
55
56


57
58
59
60
def load_algorithm_prototype(prefix):
    algorithm_data = json.loads(
        pkg_resources.resource_string(__name__, "prototypes/algorithm.json")
    )
61
    ref_dataformats = ["integer", "integers"]
62
63
    dataformat = None

64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    for ref_dataformat in ref_dataformats:
        for root, dirs, _ in os.walk(os.path.join(prefix, "dataformats")):
            if ref_dataformat in dirs:
                dataformat_versions = sorted(
                    os.listdir(os.path.join(root, ref_dataformat))
                )
                version = dataformat_versions[-1].split(".")[0]
                dataformat = "{}/{}/{}".format(
                    os.path.basename(root), ref_dataformat, version
                )
                break

    if dataformat is None:
        raise RuntimeError(
            "Reference data formats [{}] not found".format(",".join(ref_dataformats))
        )
80
81
82
83
84
    algorithm_data["groups"][0]["inputs"]["in_data"]["type"] = dataformat
    algorithm_data["groups"][0]["outputs"]["out_data"]["type"] = dataformat
    return algorithm_data


85
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
86
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
87

Philip ABBET's avatar
Philip ABBET committed
88
89
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
90
91
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
92
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
93
94


Philip ABBET's avatar
Philip ABBET committed
95
    Parameters:
André Anjos's avatar
André Anjos committed
96

Philip ABBET's avatar
Philip ABBET committed
97
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
98

André Anjos's avatar
André Anjos committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
117
118


Philip ABBET's avatar
Philip ABBET committed
119
    Attributes:
André Anjos's avatar
André Anjos committed
120

Philip ABBET's avatar
Philip ABBET committed
121
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
122

Philip ABBET's avatar
Philip ABBET committed
123
124
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
125

Philip ABBET's avatar
Philip ABBET committed
126
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
127

Philip ABBET's avatar
Philip ABBET committed
128
129
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
130

131
132
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
133
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
134

135
136
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
137

Philip ABBET's avatar
Philip ABBET committed
138
139
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
140

141
142
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
143

Philip ABBET's avatar
Philip ABBET committed
144
145
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
146

Philip ABBET's avatar
Philip ABBET committed
147
      input_map (dict): A dictionary where the key is the input name and the
148
149
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
150

Philip ABBET's avatar
Philip ABBET committed
151
      output_map (dict): A dictionary where the key is the output name and the
152
153
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
154

155
156
157
158
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
159

Philip ABBET's avatar
Philip ABBET committed
160
161
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
162

Philip ABBET's avatar
Philip ABBET committed
163
164
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
165

Philip ABBET's avatar
Philip ABBET committed
166
167
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
168

Philip ABBET's avatar
Philip ABBET committed
169
170
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
171

Philip ABBET's avatar
Philip ABBET committed
172
    """
André Anjos's avatar
André Anjos committed
173

Philip ABBET's avatar
Philip ABBET committed
174
175
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
176

Philip ABBET's avatar
Philip ABBET committed
177
178
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
179

Philip ABBET's avatar
Philip ABBET committed
180
181
182
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
183

Philip ABBET's avatar
Philip ABBET committed
184
185
        self._name = None
        self.storage = None
186
187
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
188
        code = None
André Anjos's avatar
André Anjos committed
189

190
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
191

Philip ABBET's avatar
Philip ABBET committed
192
193
            data = None
            code = None
André Anjos's avatar
André Anjos committed
194

195
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
196

197
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
198

199
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
200

Philip ABBET's avatar
Philip ABBET committed
201
202
203
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
204
                self.errors.append("Algorithm declaration file not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
205
                return
André Anjos's avatar
André Anjos committed
206

207
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
208

Philip ABBET's avatar
Philip ABBET committed
209
        # At this point, `data' can be a dictionary or ``None``
210
        if data is None:  # loads the default declaration for an algorithm
211
212
213
            algorithm_data = load_algorithm_prototype(self.prefix)
            self.data, self.errors = schema.validate("algorithm", algorithm_data)
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)  # nosec
214
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
215
            # this runs basic validation, including JSON loading if required
216
            self.data, self.errors = schema.validate("algorithm", data)
André Anjos's avatar
André Anjos committed
217

218
219
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
220

221
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
222
            if not self.storage.code.exists():
223
224
225
226
                if self.data["language"] != "cxx":
                    self.errors.append(
                        "Algorithm code not found: %s" % self.storage.code.path
                    )
Philip ABBET's avatar
Philip ABBET committed
227
228
229
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
230

Philip ABBET's avatar
Philip ABBET committed
231
        # At this point, `code' can be a string (or a binary blob) or ``None``
232
        if code is None:  # loads the default code for an algorithm
233
234
            self.code = prototypes.binary_load("algorithm.py")
            self.data["language"] = "python"
André Anjos's avatar
André Anjos committed
235

236
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
237
            self.code = code
André Anjos's avatar
André Anjos committed
238

239
240
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
241

Philip ABBET's avatar
Philip ABBET committed
242
        # if no errors so far, make sense out of the declaration data
243
        self.groups = self.data["groups"]
André Anjos's avatar
André Anjos committed
244

Philip ABBET's avatar
Philip ABBET committed
245
246
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
247

Philip ABBET's avatar
Philip ABBET committed
248
        # create maps for easy access to data
249
250
251
252
253
254
255
256
257
258
259
260
261
        self.input_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g["inputs"].items()]
        )
        self.output_map = dict(
            [
                (k, v["type"])
                for g in self.groups
                for k, v in g.get("outputs", {}).items()
            ]
        )
        self.loop_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g.get("loop", {}).items()]
        )
André Anjos's avatar
André Anjos committed
262

Philip ABBET's avatar
Philip ABBET committed
263
264
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
265

Philip ABBET's avatar
Philip ABBET committed
266
267
268
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
269

Philip ABBET's avatar
Philip ABBET committed
270
271
272
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
273

Philip ABBET's avatar
Philip ABBET committed
274
        all_input_names = []
275
276
        for group in self.groups:
            all_input_names.extend(group["inputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
277
        if len(set(all_input_names)) != len(all_input_names):
278
279
280
281
            self.errors.append(
                "repeated input name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_input_names))
            )
André Anjos's avatar
André Anjos committed
282

Philip ABBET's avatar
Philip ABBET committed
283
284
285
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
286
287
288
            if "outputs" not in group:
                continue
            all_output_names.extend(group["outputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
289
        if len(set(all_output_names)) != len(all_output_names):
290
291
292
293
            self.errors.append(
                "repeated output name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_output_names))
            )
André Anjos's avatar
André Anjos committed
294

Philip ABBET's avatar
Philip ABBET committed
295
296
297
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
298

Philip ABBET's avatar
Philip ABBET committed
299
        for group in self.groups:
André Anjos's avatar
André Anjos committed
300

301
302
303
            for name, input in group["inputs"].items():
                if input["type"] in self.dataformats:
                    continue
André Anjos's avatar
André Anjos committed
304

305
306
                if dataformat_cache and input["type"] in dataformat_cache:  # reuse
                    thisformat = dataformat_cache[input["type"]]
307
                else:  # load it
308
                    thisformat = dataformat.DataFormat(self.prefix, input["type"])
309
                    if dataformat_cache is not None:  # update it
310
                        dataformat_cache[input["type"]] = thisformat
André Anjos's avatar
André Anjos committed
311

312
                self.dataformats[input["type"]] = thisformat
André Anjos's avatar
André Anjos committed
313

Philip ABBET's avatar
Philip ABBET committed
314
                if thisformat.errors:
315
316
317
318
319
                    self.errors.append(
                        "found error validating data format `%s' "
                        "for input `%s' on algorithm `%s': %s"
                        % (input["type"], name, self.name, "\n".join(thisformat.errors))
                    )
André Anjos's avatar
André Anjos committed
320

321
322
            if "outputs" not in group:
                continue
André Anjos's avatar
André Anjos committed
323

324
325
326
            for name, output in group["outputs"].items():
                if output["type"] in self.dataformats:
                    continue
André Anjos's avatar
André Anjos committed
327

328
329
                if dataformat_cache and output["type"] in dataformat_cache:  # reuse
                    thisformat = dataformat_cache[output["type"]]
330
                else:  # load it
331
                    thisformat = dataformat.DataFormat(self.prefix, output["type"])
332
                    if dataformat_cache is not None:  # update it
333
                        dataformat_cache[output["type"]] = thisformat
André Anjos's avatar
André Anjos committed
334

335
                self.dataformats[output["type"]] = thisformat
André Anjos's avatar
André Anjos committed
336

Philip ABBET's avatar
Philip ABBET committed
337
                if thisformat.errors:
338
339
340
341
342
343
344
345
346
347
                    self.errors.append(
                        "found error validating data format `%s' "
                        "for output `%s' on algorithm `%s': %s"
                        % (
                            output["type"],
                            name,
                            self.name,
                            "\n".join(thisformat.errors),
                        )
                    )
André Anjos's avatar
André Anjos committed
348

Philip ABBET's avatar
Philip ABBET committed
349
        if self.results:
André Anjos's avatar
André Anjos committed
350

Philip ABBET's avatar
Philip ABBET committed
351
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
352

353
                if result["type"].find("/") != -1:
André Anjos's avatar
André Anjos committed
354

355
356
                    if result["type"] in self.dataformats:
                        continue
André Anjos's avatar
André Anjos committed
357

358
359
                    if dataformat_cache and result["type"] in dataformat_cache:  # reuse
                        thisformat = dataformat_cache[result["type"]]
Philip ABBET's avatar
Philip ABBET committed
360
                    else:
361
                        thisformat = dataformat.DataFormat(self.prefix, result["type"])
362
                        if dataformat_cache is not None:  # update it
363
                            dataformat_cache[result["type"]] = thisformat
André Anjos's avatar
André Anjos committed
364

365
                    self.dataformats[result["type"]] = thisformat
André Anjos's avatar
André Anjos committed
366

Philip ABBET's avatar
Philip ABBET committed
367
                    if thisformat.errors:
368
369
370
371
372
373
374
375
376
377
                        self.errors.append(
                            "found error validating data format `%s' "
                            "for result `%s' on algorithm `%s': %s"
                            % (
                                result["type"],
                                name,
                                self.name,
                                "\n".join(thisformat.errors),
                            )
                        )
André Anjos's avatar
André Anjos committed
378

Philip ABBET's avatar
Philip ABBET committed
379
    def _convert_parameter_types(self):
380
381
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
382
        """
André Anjos's avatar
André Anjos committed
383

Philip ABBET's avatar
Philip ABBET committed
384
385
386
387
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
388
389
390
391
                self.errors.append(
                    "%s for parameter `%s' cannot be cast to type "
                    "`%s': %s" % (desc, name, tp.name, e)
                )
André Anjos's avatar
André Anjos committed
392

393
394
        if self.parameters is None:
            return
André Anjos's avatar
André Anjos committed
395

Philip ABBET's avatar
Philip ABBET committed
396
        for name, parameter in self.parameters.items():
397
398
            if parameter["type"] == "string":
                parameter["type"] = numpy.dtype("str")
Philip ABBET's avatar
Philip ABBET committed
399
            else:
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
                parameter["type"] = numpy.dtype(parameter["type"])

            if "range" in parameter:
                parameter["range"][0] = _try_convert(
                    name, parameter["type"], parameter["range"][0], "start of range"
                )
                parameter["range"][1] = _try_convert(
                    name, parameter["type"], parameter["range"][1], "end of range"
                )
                if parameter["range"][0] >= parameter["range"][1]:
                    self.errors.append(
                        "range for parameter `%s' has a start greater "
                        "then the end value (%r >= %r)"
                        % (name, parameter["range"][0], parameter["range"][1])
                    )

            if "choice" in parameter:
                for i, choice in enumerate(parameter["choice"]):
                    parameter["choice"][i] = _try_convert(
                        name,
                        parameter["type"],
                        parameter["choice"][i],
                        "choice[%d]" % i,
                    )

            if "default" in parameter:
                parameter["default"] = _try_convert(
                    name, parameter["type"], parameter["default"], "default"
                )

                if "range" in parameter:  # check range
                    if (
                        parameter["default"] < parameter["range"][0]
                        or parameter["default"] > parameter["range"][1]
                    ):
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "within parameter range [%r, %r]"
                            % (
                                name,
                                parameter["default"],
                                parameter["range"][0],
                                parameter["range"][1],
                            )
                        )

                if "choice" in parameter:  # check choices
                    if parameter["default"] not in parameter["choice"]:
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "a valid choice `[%s]'"
                            % (
                                name,
                                parameter["default"],
                                ", ".join(["%r" % k for k in parameter["choice"]]),
                            )
                        )
André Anjos's avatar
André Anjos committed
457

Philip ABBET's avatar
Philip ABBET committed
458
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
459

Philip ABBET's avatar
Philip ABBET committed
460
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
461

Philip ABBET's avatar
Philip ABBET committed
462
        if self.uses:
André Anjos's avatar
André Anjos committed
463

Philip ABBET's avatar
Philip ABBET committed
464
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
465

466
467
468
                self.libraries[value] = library_cache.setdefault(
                    value, library.Library(self.prefix, value, library_cache)
                )
André Anjos's avatar
André Anjos committed
469

Philip ABBET's avatar
Philip ABBET committed
470
                if not self.libraries[value].valid:
471
472
473
474
                    self.errors.append(
                        "referred library `%s' (%s) is not valid"
                        % (self.libraries[value].name, name)
                    )
André Anjos's avatar
André Anjos committed
475

Philip ABBET's avatar
Philip ABBET committed
476
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
477

Philip ABBET's avatar
Philip ABBET committed
478
        # all used libraries must be programmed with the same language
479
480
        if self.language == "unknown":
            return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
481

Philip ABBET's avatar
Philip ABBET committed
482
        if self.uses:
André Anjos's avatar
André Anjos committed
483

484
            for name, library_name in self.uses.items():
André Anjos's avatar
André Anjos committed
485

486
487
                if library_name not in self.libraries:
                    continue  # invalid
André Anjos's avatar
André Anjos committed
488

489
490
491
492
493
494
                if self.libraries[library_name].data is None:
                    self.errors.append(
                        "language for used library `%s' cannot be "
                        "inferred as the library was not properly loaded"
                        % (library_name,)
                    )
Philip ABBET's avatar
Philip ABBET committed
495
                    continue
André Anjos's avatar
André Anjos committed
496

497
498
499
500
501
502
503
504
505
506
                if self.libraries[library_name].language != self.language:
                    self.errors.append(
                        "language for used library `%s' (`%s') "
                        "differs from current language for this algorithm (`%s')"
                        % (
                            library_name,
                            self.libraries[library_name].language,
                            self.language,
                        )
                    )