algorithm.py 17.3 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

28
29
30
31
32
33
"""
=========
algorithm
=========

Validation for algorithms
34
35
36
37

Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
38
"""
André Anjos's avatar
André Anjos committed
39
40
41
42
43
44
45
46
47
48
49


import six
import numpy

from . import dataformat
from . import library
from . import schema
from . import prototypes
from . import utils

50
51
52
from beat.backend.python.algorithm import Storage
from beat.backend.python.algorithm import Runner
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
53
54


55
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
56
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
57

Philip ABBET's avatar
Philip ABBET committed
58
59
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
60
61
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
62
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
63
64


Philip ABBET's avatar
Philip ABBET committed
65
    Parameters:
André Anjos's avatar
André Anjos committed
66

Philip ABBET's avatar
Philip ABBET committed
67
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
68

André Anjos's avatar
André Anjos committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
87
88


Philip ABBET's avatar
Philip ABBET committed
89
    Attributes:
André Anjos's avatar
André Anjos committed
90

Philip ABBET's avatar
Philip ABBET committed
91
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
92

Philip ABBET's avatar
Philip ABBET committed
93
94
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
95

Philip ABBET's avatar
Philip ABBET committed
96
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
97

Philip ABBET's avatar
Philip ABBET committed
98
99
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
100

101
102
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
103
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
104

105
106
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
107

Philip ABBET's avatar
Philip ABBET committed
108
109
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
110

111
112
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
113

Philip ABBET's avatar
Philip ABBET committed
114
115
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
116

Philip ABBET's avatar
Philip ABBET committed
117
      input_map (dict): A dictionary where the key is the input name and the
118
119
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
120

Philip ABBET's avatar
Philip ABBET committed
121
      output_map (dict): A dictionary where the key is the output name and the
122
123
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
124

125
126
127
128
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
129

Philip ABBET's avatar
Philip ABBET committed
130
131
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
132

Philip ABBET's avatar
Philip ABBET committed
133
134
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
135

Philip ABBET's avatar
Philip ABBET committed
136
137
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
138

Philip ABBET's avatar
Philip ABBET committed
139
140
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
141

Philip ABBET's avatar
Philip ABBET committed
142
    """
André Anjos's avatar
André Anjos committed
143

Philip ABBET's avatar
Philip ABBET committed
144
145
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
146
147


Philip ABBET's avatar
Philip ABBET committed
148
149
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
150

Philip ABBET's avatar
Philip ABBET committed
151
152
153
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
154

Philip ABBET's avatar
Philip ABBET committed
155
156
        self._name = None
        self.storage = None
157
158
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
159
        code = None
André Anjos's avatar
André Anjos committed
160

161
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
162

Philip ABBET's avatar
Philip ABBET committed
163
164
            data = None
            code = None
André Anjos's avatar
André Anjos committed
165

166
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
167

168
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
169
170


171
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
172

Philip ABBET's avatar
Philip ABBET committed
173
174
175
176
177
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
                self.errors.append('Algorithm declaration file not found: %s' % data)
                return
André Anjos's avatar
André Anjos committed
178

179
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
180
181


Philip ABBET's avatar
Philip ABBET committed
182
        # At this point, `data' can be a dictionary or ``None``
183
        if data is None:  # loads the default declaration for an algorithm
Philip ABBET's avatar
Philip ABBET committed
184
185
            self.data, self.errors = prototypes.load('algorithm')
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)
186
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
187
188
            # this runs basic validation, including JSON loading if required
            self.data, self.errors = schema.validate('algorithm', data)
André Anjos's avatar
André Anjos committed
189
190


191
        if self.errors: return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
192

193
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
194
195
196
197
198
199
200
            if not self.storage.code.exists():
                if self.data['language'] != 'cxx':
                    self.errors.append('Algorithm code not found: %s' % \
                            self.storage.code.path)
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
201
202


Philip ABBET's avatar
Philip ABBET committed
203
        # At this point, `code' can be a string (or a binary blob) or ``None``
204
        if code is None:  # loads the default code for an algorithm
Philip ABBET's avatar
Philip ABBET committed
205
206
            self.code = prototypes.binary_load('algorithm.py')
            self.data['language'] = 'python'
André Anjos's avatar
André Anjos committed
207

208
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
209
            self.code = code
André Anjos's avatar
André Anjos committed
210
211


212
        if self.errors: return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
213
214


Philip ABBET's avatar
Philip ABBET committed
215
216
        # if no errors so far, make sense out of the declaration data
        self.groups = self.data['groups']
André Anjos's avatar
André Anjos committed
217

Philip ABBET's avatar
Philip ABBET committed
218
219
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
220

Philip ABBET's avatar
Philip ABBET committed
221
222
223
224
225
        # create maps for easy access to data
        self.input_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g['inputs'].items()])
        self.output_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g.get('outputs', {}).items()])
226
227
        self.loop_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g.get('loop', {}).items()])
André Anjos's avatar
André Anjos committed
228

Philip ABBET's avatar
Philip ABBET committed
229
230
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
231

Philip ABBET's avatar
Philip ABBET committed
232
233
234
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
235
236


Philip ABBET's avatar
Philip ABBET committed
237
238
239
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
240

Philip ABBET's avatar
Philip ABBET committed
241
242
243
244
245
        all_input_names = []
        for group in self.groups: all_input_names.extend(group['inputs'].keys())
        if len(set(all_input_names)) != len(all_input_names):
            self.errors.append("repeated input name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_input_names)))
André Anjos's avatar
André Anjos committed
246

Philip ABBET's avatar
Philip ABBET committed
247
248
249
250
251
252
253
254
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
            if 'outputs' not in group: continue
            all_output_names.extend(group['outputs'].keys())
        if len(set(all_output_names)) != len(all_output_names):
            self.errors.append("repeated output name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_output_names)))
André Anjos's avatar
André Anjos committed
255
256


Philip ABBET's avatar
Philip ABBET committed
257
258
259
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
260

Philip ABBET's avatar
Philip ABBET committed
261
        for group in self.groups:
André Anjos's avatar
André Anjos committed
262

Philip ABBET's avatar
Philip ABBET committed
263
264
            for name, input in group['inputs'].items():
                if input['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
265

266
                if dataformat_cache and input['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
267
                    thisformat = dataformat_cache[input['type']]
268
                else:  # load it
Philip ABBET's avatar
Philip ABBET committed
269
                    thisformat = dataformat.DataFormat(self.prefix, input['type'])
270
                    if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
271
                        dataformat_cache[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
272

Philip ABBET's avatar
Philip ABBET committed
273
                self.dataformats[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
274

Philip ABBET's avatar
Philip ABBET committed
275
276
277
278
279
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for input `%s' on algorithm `%s': %s" % \
                            (input['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
280

Philip ABBET's avatar
Philip ABBET committed
281
            if 'outputs' not in group: continue
André Anjos's avatar
André Anjos committed
282

Philip ABBET's avatar
Philip ABBET committed
283
284
            for name, output in group['outputs'].items():
                if output['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
285

286
                if dataformat_cache and output['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
287
                    thisformat = dataformat_cache[output['type']]
288
                else:  # load it
Philip ABBET's avatar
Philip ABBET committed
289
                    thisformat = dataformat.DataFormat(self.prefix, output['type'])
290
                    if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
291
                        dataformat_cache[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
292

Philip ABBET's avatar
Philip ABBET committed
293
                self.dataformats[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
294

Philip ABBET's avatar
Philip ABBET committed
295
296
297
298
299
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for output `%s' on algorithm `%s': %s" % \
                            (output['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
        if self.results:
André Anjos's avatar
André Anjos committed
302

Philip ABBET's avatar
Philip ABBET committed
303
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
304

Philip ABBET's avatar
Philip ABBET committed
305
                if result['type'].find('/') != -1:
André Anjos's avatar
André Anjos committed
306

Philip ABBET's avatar
Philip ABBET committed
307
                    if result['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
308

309
                    if dataformat_cache and result['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
310
311
312
                        thisformat = dataformat_cache[result['type']]
                    else:
                        thisformat = dataformat.DataFormat(self.prefix, result['type'])
313
                        if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
314
                            dataformat_cache[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
315

Philip ABBET's avatar
Philip ABBET committed
316
                    self.dataformats[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
317

Philip ABBET's avatar
Philip ABBET committed
318
319
320
321
322
                    if thisformat.errors:
                        self.errors.append("found error validating data format `%s' " \
                                "for result `%s' on algorithm `%s': %s" % \
                                (result['type'], name, self.name,
                                    '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
323
324


Philip ABBET's avatar
Philip ABBET committed
325
    def _convert_parameter_types(self):
326
327
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
328
        """
André Anjos's avatar
André Anjos committed
329

Philip ABBET's avatar
Philip ABBET committed
330
331
332
333
334
335
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
                self.errors.append("%s for parameter `%s' cannot be cast to type " \
                        "`%s': %s" % (desc, name, tp.name, e))
André Anjos's avatar
André Anjos committed
336

Philip ABBET's avatar
Philip ABBET committed
337
        if self.parameters is None: return
André Anjos's avatar
André Anjos committed
338

Philip ABBET's avatar
Philip ABBET committed
339
340
341
342
343
        for name, parameter in self.parameters.items():
            if parameter['type'] == 'string':
                parameter['type'] = numpy.dtype('str')
            else:
                parameter['type'] = numpy.dtype(parameter['type'])
André Anjos's avatar
André Anjos committed
344

Philip ABBET's avatar
Philip ABBET committed
345
346
347
348
349
350
351
352
353
            if 'range' in parameter:
                parameter['range'][0] = _try_convert(name, parameter['type'],
                    parameter['range'][0], 'start of range')
                parameter['range'][1] = _try_convert(name, parameter['type'],
                    parameter['range'][1], 'end of range')
                if parameter['range'][0] >= parameter['range'][1]:
                    self.errors.append("range for parameter `%s' has a start greater " \
                            "then the end value (%r >= %r)" % \
                            (name, parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
354

Philip ABBET's avatar
Philip ABBET committed
355
356
357
358
            if 'choice' in parameter:
                for i, choice in enumerate(parameter['choice']):
                    parameter['choice'][i] = _try_convert(name, parameter['type'],
                        parameter['choice'][i], 'choice[%d]' % i)
André Anjos's avatar
André Anjos committed
359

Philip ABBET's avatar
Philip ABBET committed
360
361
362
            if 'default' in parameter:
                parameter['default'] = _try_convert(name, parameter['type'],
                    parameter['default'], 'default')
André Anjos's avatar
André Anjos committed
363

364
                if 'range' in parameter:  # check range
Philip ABBET's avatar
Philip ABBET committed
365
366
367
368
369
                    if parameter['default'] < parameter['range'][0] or \
                            parameter['default'] > parameter['range'][1]:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "within parameter range [%r, %r]" % (name, parameter['default'],
                              parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
370

371
                if 'choice' in parameter:  # check choices
Philip ABBET's avatar
Philip ABBET committed
372
373
374
375
                    if parameter['default'] not in parameter['choice']:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "a valid choice `[%s]'" % (name, parameter['default'],
                              ', '.join(['%r' % k for k in parameter['choice']])))
André Anjos's avatar
André Anjos committed
376
377


Philip ABBET's avatar
Philip ABBET committed
378
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
379

Philip ABBET's avatar
Philip ABBET committed
380
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
381

Philip ABBET's avatar
Philip ABBET committed
382
        if self.uses:
André Anjos's avatar
André Anjos committed
383

Philip ABBET's avatar
Philip ABBET committed
384
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
385

Philip ABBET's avatar
Philip ABBET committed
386
387
                self.libraries[value] = library_cache.setdefault(value,
                        library.Library(self.prefix, value, library_cache))
André Anjos's avatar
André Anjos committed
388

Philip ABBET's avatar
Philip ABBET committed
389
390
391
                if not self.libraries[value].valid:
                    self.errors.append("referred library `%s' (%s) is not valid" % \
                            (self.libraries[value].name, name))
André Anjos's avatar
André Anjos committed
392
393


Philip ABBET's avatar
Philip ABBET committed
394
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
395

Philip ABBET's avatar
Philip ABBET committed
396
        # all used libraries must be programmed with the same language
397
        if self.language == 'unknown': return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
398

Philip ABBET's avatar
Philip ABBET committed
399
        if self.uses:
André Anjos's avatar
André Anjos committed
400

Philip ABBET's avatar
Philip ABBET committed
401
            for name, library in self.uses.items():
André Anjos's avatar
André Anjos committed
402

403
                if library not in self.libraries: continue  # invalid
André Anjos's avatar
André Anjos committed
404

Philip ABBET's avatar
Philip ABBET committed
405
406
407
408
409
                if self.libraries[library].data is None:
                    self.errors.append("language for used library `%s' cannot be " \
                            "inferred as the library was not properly loaded" % \
                            (library,))
                    continue
André Anjos's avatar
André Anjos committed
410

Philip ABBET's avatar
Philip ABBET committed
411
412
413
414
                if self.libraries[library].language != self.language:
                    self.errors.append("language for used library `%s' (`%s') " \
                            "differs from current language for this algorithm (`%s')" % \
                            (library, self.libraries[library].language, self.language))