algorithm.py 17.1 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################

28
29
30
31
32
33
34
"""
=========
algorithm
=========

Validation for algorithms
"""
André Anjos's avatar
André Anjos committed
35
36
37
38
39
40
41
42
43
44
45


import six
import numpy

from . import dataformat
from . import library
from . import schema
from . import prototypes
from . import utils

46
47
48
from beat.backend.python.algorithm import Storage
from beat.backend.python.algorithm import Runner
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
49
50


51
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
52
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
53

Philip ABBET's avatar
Philip ABBET committed
54
55
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
56
57
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
58
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
59
60


Philip ABBET's avatar
Philip ABBET committed
61
    Parameters:
André Anjos's avatar
André Anjos committed
62

Philip ABBET's avatar
Philip ABBET committed
63
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
64

André Anjos's avatar
André Anjos committed
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
83
84


Philip ABBET's avatar
Philip ABBET committed
85
    Attributes:
André Anjos's avatar
André Anjos committed
86

Philip ABBET's avatar
Philip ABBET committed
87
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
88

Philip ABBET's avatar
Philip ABBET committed
89
90
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
91

Philip ABBET's avatar
Philip ABBET committed
92
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
93

Philip ABBET's avatar
Philip ABBET committed
94
95
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
96

97
98
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
99
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
100

101
102
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
103

Philip ABBET's avatar
Philip ABBET committed
104
105
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
106

107
108
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
109

Philip ABBET's avatar
Philip ABBET committed
110
111
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
112

Philip ABBET's avatar
Philip ABBET committed
113
      input_map (dict): A dictionary where the key is the input name and the
114
115
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
116

Philip ABBET's avatar
Philip ABBET committed
117
      output_map (dict): A dictionary where the key is the output name and the
118
119
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
120

121
122
123
124
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
125

Philip ABBET's avatar
Philip ABBET committed
126
127
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
128

Philip ABBET's avatar
Philip ABBET committed
129
130
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
131

Philip ABBET's avatar
Philip ABBET committed
132
133
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
134

Philip ABBET's avatar
Philip ABBET committed
135
136
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
137

Philip ABBET's avatar
Philip ABBET committed
138
    """
André Anjos's avatar
André Anjos committed
139

Philip ABBET's avatar
Philip ABBET committed
140
141
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
142
143


Philip ABBET's avatar
Philip ABBET committed
144
145
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
146

Philip ABBET's avatar
Philip ABBET committed
147
148
149
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
150

Philip ABBET's avatar
Philip ABBET committed
151
152
        self._name = None
        self.storage = None
153
154
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
155
        code = None
André Anjos's avatar
André Anjos committed
156

157
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
158

Philip ABBET's avatar
Philip ABBET committed
159
160
            data = None
            code = None
André Anjos's avatar
André Anjos committed
161

162
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
163

164
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
165
166


167
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
168

Philip ABBET's avatar
Philip ABBET committed
169
170
171
172
173
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
                self.errors.append('Algorithm declaration file not found: %s' % data)
                return
André Anjos's avatar
André Anjos committed
174

175
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
176
177


Philip ABBET's avatar
Philip ABBET committed
178
        # At this point, `data' can be a dictionary or ``None``
179
        if data is None:  # loads the default declaration for an algorithm
Philip ABBET's avatar
Philip ABBET committed
180
181
            self.data, self.errors = prototypes.load('algorithm')
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)
182
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
183
184
            # this runs basic validation, including JSON loading if required
            self.data, self.errors = schema.validate('algorithm', data)
André Anjos's avatar
André Anjos committed
185
186


187
        if self.errors: return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
188

189
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
190
191
192
193
194
195
196
            if not self.storage.code.exists():
                if self.data['language'] != 'cxx':
                    self.errors.append('Algorithm code not found: %s' % \
                            self.storage.code.path)
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
197
198


Philip ABBET's avatar
Philip ABBET committed
199
        # At this point, `code' can be a string (or a binary blob) or ``None``
200
        if code is None:  # loads the default code for an algorithm
Philip ABBET's avatar
Philip ABBET committed
201
202
            self.code = prototypes.binary_load('algorithm.py')
            self.data['language'] = 'python'
André Anjos's avatar
André Anjos committed
203

204
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
205
            self.code = code
André Anjos's avatar
André Anjos committed
206
207


208
        if self.errors: return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
209
210


Philip ABBET's avatar
Philip ABBET committed
211
212
        # if no errors so far, make sense out of the declaration data
        self.groups = self.data['groups']
André Anjos's avatar
André Anjos committed
213

Philip ABBET's avatar
Philip ABBET committed
214
215
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
216

Philip ABBET's avatar
Philip ABBET committed
217
218
219
220
221
        # create maps for easy access to data
        self.input_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g['inputs'].items()])
        self.output_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g.get('outputs', {}).items()])
André Anjos's avatar
André Anjos committed
222

Philip ABBET's avatar
Philip ABBET committed
223
224
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
225

Philip ABBET's avatar
Philip ABBET committed
226
227
228
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
229
230


Philip ABBET's avatar
Philip ABBET committed
231
232
233
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
234

Philip ABBET's avatar
Philip ABBET committed
235
236
237
238
239
        all_input_names = []
        for group in self.groups: all_input_names.extend(group['inputs'].keys())
        if len(set(all_input_names)) != len(all_input_names):
            self.errors.append("repeated input name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_input_names)))
André Anjos's avatar
André Anjos committed
240

Philip ABBET's avatar
Philip ABBET committed
241
242
243
244
245
246
247
248
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
            if 'outputs' not in group: continue
            all_output_names.extend(group['outputs'].keys())
        if len(set(all_output_names)) != len(all_output_names):
            self.errors.append("repeated output name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_output_names)))
André Anjos's avatar
André Anjos committed
249
250


Philip ABBET's avatar
Philip ABBET committed
251
252
253
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
254

Philip ABBET's avatar
Philip ABBET committed
255
        for group in self.groups:
André Anjos's avatar
André Anjos committed
256

Philip ABBET's avatar
Philip ABBET committed
257
258
            for name, input in group['inputs'].items():
                if input['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
259

260
                if dataformat_cache and input['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
261
                    thisformat = dataformat_cache[input['type']]
262
                else:  # load it
Philip ABBET's avatar
Philip ABBET committed
263
                    thisformat = dataformat.DataFormat(self.prefix, input['type'])
264
                    if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
265
                        dataformat_cache[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
266

Philip ABBET's avatar
Philip ABBET committed
267
                self.dataformats[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
268

Philip ABBET's avatar
Philip ABBET committed
269
270
271
272
273
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for input `%s' on algorithm `%s': %s" % \
                            (input['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
274

Philip ABBET's avatar
Philip ABBET committed
275
            if 'outputs' not in group: continue
André Anjos's avatar
André Anjos committed
276

Philip ABBET's avatar
Philip ABBET committed
277
278
            for name, output in group['outputs'].items():
                if output['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
279

280
                if dataformat_cache and output['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
281
                    thisformat = dataformat_cache[output['type']]
282
                else:  # load it
Philip ABBET's avatar
Philip ABBET committed
283
                    thisformat = dataformat.DataFormat(self.prefix, output['type'])
284
                    if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
285
                        dataformat_cache[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
286

Philip ABBET's avatar
Philip ABBET committed
287
                self.dataformats[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
288

Philip ABBET's avatar
Philip ABBET committed
289
290
291
292
293
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for output `%s' on algorithm `%s': %s" % \
                            (output['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
294

Philip ABBET's avatar
Philip ABBET committed
295
        if self.results:
André Anjos's avatar
André Anjos committed
296

Philip ABBET's avatar
Philip ABBET committed
297
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
298

Philip ABBET's avatar
Philip ABBET committed
299
                if result['type'].find('/') != -1:
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
                    if result['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
302

303
                    if dataformat_cache and result['type'] in dataformat_cache:  # reuse
Philip ABBET's avatar
Philip ABBET committed
304
305
306
                        thisformat = dataformat_cache[result['type']]
                    else:
                        thisformat = dataformat.DataFormat(self.prefix, result['type'])
307
                        if dataformat_cache is not None:  # update it
Philip ABBET's avatar
Philip ABBET committed
308
                            dataformat_cache[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
309

Philip ABBET's avatar
Philip ABBET committed
310
                    self.dataformats[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
311

Philip ABBET's avatar
Philip ABBET committed
312
313
314
315
316
                    if thisformat.errors:
                        self.errors.append("found error validating data format `%s' " \
                                "for result `%s' on algorithm `%s': %s" % \
                                (result['type'], name, self.name,
                                    '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
317
318


Philip ABBET's avatar
Philip ABBET committed
319
    def _convert_parameter_types(self):
320
321
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
322
        """
André Anjos's avatar
André Anjos committed
323

Philip ABBET's avatar
Philip ABBET committed
324
325
326
327
328
329
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
                self.errors.append("%s for parameter `%s' cannot be cast to type " \
                        "`%s': %s" % (desc, name, tp.name, e))
André Anjos's avatar
André Anjos committed
330

Philip ABBET's avatar
Philip ABBET committed
331
        if self.parameters is None: return
André Anjos's avatar
André Anjos committed
332

Philip ABBET's avatar
Philip ABBET committed
333
334
335
336
337
        for name, parameter in self.parameters.items():
            if parameter['type'] == 'string':
                parameter['type'] = numpy.dtype('str')
            else:
                parameter['type'] = numpy.dtype(parameter['type'])
André Anjos's avatar
André Anjos committed
338

Philip ABBET's avatar
Philip ABBET committed
339
340
341
342
343
344
345
346
347
            if 'range' in parameter:
                parameter['range'][0] = _try_convert(name, parameter['type'],
                    parameter['range'][0], 'start of range')
                parameter['range'][1] = _try_convert(name, parameter['type'],
                    parameter['range'][1], 'end of range')
                if parameter['range'][0] >= parameter['range'][1]:
                    self.errors.append("range for parameter `%s' has a start greater " \
                            "then the end value (%r >= %r)" % \
                            (name, parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
348

Philip ABBET's avatar
Philip ABBET committed
349
350
351
352
            if 'choice' in parameter:
                for i, choice in enumerate(parameter['choice']):
                    parameter['choice'][i] = _try_convert(name, parameter['type'],
                        parameter['choice'][i], 'choice[%d]' % i)
André Anjos's avatar
André Anjos committed
353

Philip ABBET's avatar
Philip ABBET committed
354
355
356
            if 'default' in parameter:
                parameter['default'] = _try_convert(name, parameter['type'],
                    parameter['default'], 'default')
André Anjos's avatar
André Anjos committed
357

358
                if 'range' in parameter:  # check range
Philip ABBET's avatar
Philip ABBET committed
359
360
361
362
363
                    if parameter['default'] < parameter['range'][0] or \
                            parameter['default'] > parameter['range'][1]:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "within parameter range [%r, %r]" % (name, parameter['default'],
                              parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
364

365
                if 'choice' in parameter:  # check choices
Philip ABBET's avatar
Philip ABBET committed
366
367
368
369
                    if parameter['default'] not in parameter['choice']:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "a valid choice `[%s]'" % (name, parameter['default'],
                              ', '.join(['%r' % k for k in parameter['choice']])))
André Anjos's avatar
André Anjos committed
370
371


Philip ABBET's avatar
Philip ABBET committed
372
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
373

Philip ABBET's avatar
Philip ABBET committed
374
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
375

Philip ABBET's avatar
Philip ABBET committed
376
        if self.uses:
André Anjos's avatar
André Anjos committed
377

Philip ABBET's avatar
Philip ABBET committed
378
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
379

Philip ABBET's avatar
Philip ABBET committed
380
381
                self.libraries[value] = library_cache.setdefault(value,
                        library.Library(self.prefix, value, library_cache))
André Anjos's avatar
André Anjos committed
382

Philip ABBET's avatar
Philip ABBET committed
383
384
385
                if not self.libraries[value].valid:
                    self.errors.append("referred library `%s' (%s) is not valid" % \
                            (self.libraries[value].name, name))
André Anjos's avatar
André Anjos committed
386
387


Philip ABBET's avatar
Philip ABBET committed
388
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
389

Philip ABBET's avatar
Philip ABBET committed
390
        # all used libraries must be programmed with the same language
391
        if self.language == 'unknown': return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
392

Philip ABBET's avatar
Philip ABBET committed
393
        if self.uses:
André Anjos's avatar
André Anjos committed
394

Philip ABBET's avatar
Philip ABBET committed
395
            for name, library in self.uses.items():
André Anjos's avatar
André Anjos committed
396

397
                if library not in self.libraries: continue  # invalid
André Anjos's avatar
André Anjos committed
398

Philip ABBET's avatar
Philip ABBET committed
399
400
401
402
403
                if self.libraries[library].data is None:
                    self.errors.append("language for used library `%s' cannot be " \
                            "inferred as the library was not properly loaded" % \
                            (library,))
                    continue
André Anjos's avatar
André Anjos committed
404

Philip ABBET's avatar
Philip ABBET committed
405
406
407
408
                if self.libraries[library].language != self.language:
                    self.errors.append("language for used library `%s' (`%s') " \
                            "differs from current language for this algorithm (`%s')" % \
                            (library, self.libraries[library].language, self.language))