algorithm.py 17.1 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################


29
30
31
32
33
34
35
"""
=========
algorithm
=========

Validation for algorithms
"""
André Anjos's avatar
André Anjos committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49

import os
import sys

import six
import numpy
import simplejson

from . import dataformat
from . import library
from . import schema
from . import prototypes
from . import utils

50
51
52
from beat.backend.python.algorithm import Storage
from beat.backend.python.algorithm import Runner
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
53
54
55



56
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
57
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
58

Philip ABBET's avatar
Philip ABBET committed
59
60
61
62
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
    splittability). The actual algorithm is not directly treated by this class -
    it can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
63
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
64
65


Philip ABBET's avatar
Philip ABBET committed
66
    Parameters:
André Anjos's avatar
André Anjos committed
67

Philip ABBET's avatar
Philip ABBET committed
68
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
69

André Anjos's avatar
André Anjos committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
88
89


Philip ABBET's avatar
Philip ABBET committed
90
    Attributes:
André Anjos's avatar
André Anjos committed
91

Philip ABBET's avatar
Philip ABBET committed
92
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
93

Philip ABBET's avatar
Philip ABBET committed
94
95
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
96

Philip ABBET's avatar
Philip ABBET committed
97
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
98

Philip ABBET's avatar
Philip ABBET committed
99
100
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
101

Philip ABBET's avatar
Philip ABBET committed
102
103
104
      dataformats (dict): A dictionary containing all pre-loaded dataformats used
        by this algorithm. Data format objects will be of type
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
105

Philip ABBET's avatar
Philip ABBET committed
106
107
      libraries (dict): A mapping object defining other libraries this algorithm
        needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
108

Philip ABBET's avatar
Philip ABBET committed
109
110
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
111

Philip ABBET's avatar
Philip ABBET committed
112
113
      parameters (dict): A dictionary containing all pre-defined parameters that
        this algorithm accepts.
André Anjos's avatar
André Anjos committed
114

Philip ABBET's avatar
Philip ABBET committed
115
116
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
117

Philip ABBET's avatar
Philip ABBET committed
118
119
120
      input_map (dict): A dictionary where the key is the input name and the
        value, its type. All input names (potentially from different groups) are
        comprised in this dictionary.
André Anjos's avatar
André Anjos committed
121

Philip ABBET's avatar
Philip ABBET committed
122
123
124
      output_map (dict): A dictionary where the key is the output name and the
        value, its type. All output names (potentially from different groups) are
        comprised in this dictionary.
André Anjos's avatar
André Anjos committed
125

Philip ABBET's avatar
Philip ABBET committed
126
127
128
      results (dict): If this algorithm is actually an analyzer (i.e., there are
        no formal outputs, but results that must be saved by the platform), then
        this dictionary contains the names and data types of those elements.
André Anjos's avatar
André Anjos committed
129

Philip ABBET's avatar
Philip ABBET committed
130
131
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
132

Philip ABBET's avatar
Philip ABBET committed
133
134
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
135

Philip ABBET's avatar
Philip ABBET committed
136
137
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
138

Philip ABBET's avatar
Philip ABBET committed
139
140
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
141

Philip ABBET's avatar
Philip ABBET committed
142
    """
André Anjos's avatar
André Anjos committed
143

Philip ABBET's avatar
Philip ABBET committed
144
145
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
146
147


Philip ABBET's avatar
Philip ABBET committed
148
149
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
150

Philip ABBET's avatar
Philip ABBET committed
151
152
153
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
154

Philip ABBET's avatar
Philip ABBET committed
155
156
157
158
159
        self._name = None
        self.storage = None
        self.dataformats = {} # preloaded dataformats
        self.libraries = {} # preloaded libraries
        code = None
André Anjos's avatar
André Anjos committed
160

Philip ABBET's avatar
Philip ABBET committed
161
        if data is None: #loads prototype and validates it
André Anjos's avatar
André Anjos committed
162

Philip ABBET's avatar
Philip ABBET committed
163
164
            data = None
            code = None
André Anjos's avatar
André Anjos committed
165

Philip ABBET's avatar
Philip ABBET committed
166
        elif isinstance(data, (tuple, list)): #user has passed individual info
André Anjos's avatar
André Anjos committed
167

Philip ABBET's avatar
Philip ABBET committed
168
            data, code = data #break down into two components
André Anjos's avatar
André Anjos committed
169
170


Philip ABBET's avatar
Philip ABBET committed
171
        if isinstance(data, six.string_types): #user has passed a file pointer
André Anjos's avatar
André Anjos committed
172

Philip ABBET's avatar
Philip ABBET committed
173
174
175
176
177
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
                self.errors.append('Algorithm declaration file not found: %s' % data)
                return
André Anjos's avatar
André Anjos committed
178

Philip ABBET's avatar
Philip ABBET committed
179
            data = self.storage.json.path #loads data from JSON declaration
André Anjos's avatar
André Anjos committed
180
181


Philip ABBET's avatar
Philip ABBET committed
182
183
184
185
186
187
188
        # At this point, `data' can be a dictionary or ``None``
        if data is None: # loads the default declaration for an algorithm
            self.data, self.errors = prototypes.load('algorithm')
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)
        else: # just assign it
            # this runs basic validation, including JSON loading if required
            self.data, self.errors = schema.validate('algorithm', data)
André Anjos's avatar
André Anjos committed
189
190


Philip ABBET's avatar
Philip ABBET committed
191
        if self.errors: return #don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
192

Philip ABBET's avatar
Philip ABBET committed
193
194
195
196
197
198
199
200
        if self.storage is not None: #loading from the disk, check code
            if not self.storage.code.exists():
                if self.data['language'] != 'cxx':
                    self.errors.append('Algorithm code not found: %s' % \
                            self.storage.code.path)
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
201
202


Philip ABBET's avatar
Philip ABBET committed
203
204
205
206
        # At this point, `code' can be a string (or a binary blob) or ``None``
        if code is None: # loads the default code for an algorithm
            self.code = prototypes.binary_load('algorithm.py')
            self.data['language'] = 'python'
André Anjos's avatar
André Anjos committed
207

Philip ABBET's avatar
Philip ABBET committed
208
209
        else: # just assign it - notice that in this case, no language is set
            self.code = code
André Anjos's avatar
André Anjos committed
210
211


Philip ABBET's avatar
Philip ABBET committed
212
        if self.errors: return #don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
213
214


Philip ABBET's avatar
Philip ABBET committed
215
216
        # if no errors so far, make sense out of the declaration data
        self.groups = self.data['groups']
André Anjos's avatar
André Anjos committed
217

Philip ABBET's avatar
Philip ABBET committed
218
219
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
220

Philip ABBET's avatar
Philip ABBET committed
221
222
223
224
225
        # create maps for easy access to data
        self.input_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g['inputs'].items()])
        self.output_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g.get('outputs', {}).items()])
André Anjos's avatar
André Anjos committed
226

Philip ABBET's avatar
Philip ABBET committed
227
228
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
229

Philip ABBET's avatar
Philip ABBET committed
230
231
232
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
233
234


Philip ABBET's avatar
Philip ABBET committed
235
236
237
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
238

Philip ABBET's avatar
Philip ABBET committed
239
240
241
242
243
        all_input_names = []
        for group in self.groups: all_input_names.extend(group['inputs'].keys())
        if len(set(all_input_names)) != len(all_input_names):
            self.errors.append("repeated input name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_input_names)))
André Anjos's avatar
André Anjos committed
244

Philip ABBET's avatar
Philip ABBET committed
245
246
247
248
249
250
251
252
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
            if 'outputs' not in group: continue
            all_output_names.extend(group['outputs'].keys())
        if len(set(all_output_names)) != len(all_output_names):
            self.errors.append("repeated output name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_output_names)))
André Anjos's avatar
André Anjos committed
253
254


Philip ABBET's avatar
Philip ABBET committed
255
256
257
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
258

Philip ABBET's avatar
Philip ABBET committed
259
        for group in self.groups:
André Anjos's avatar
André Anjos committed
260

Philip ABBET's avatar
Philip ABBET committed
261
262
            for name, input in group['inputs'].items():
                if input['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
263

Philip ABBET's avatar
Philip ABBET committed
264
265
266
267
268
269
                if dataformat_cache and input['type'] in dataformat_cache: #reuse
                    thisformat = dataformat_cache[input['type']]
                else: #load it
                    thisformat = dataformat.DataFormat(self.prefix, input['type'])
                    if dataformat_cache is not None: #update it
                        dataformat_cache[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
270

Philip ABBET's avatar
Philip ABBET committed
271
                self.dataformats[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
272

Philip ABBET's avatar
Philip ABBET committed
273
274
275
276
277
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for input `%s' on algorithm `%s': %s" % \
                            (input['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
278

Philip ABBET's avatar
Philip ABBET committed
279
            if 'outputs' not in group: continue
André Anjos's avatar
André Anjos committed
280

Philip ABBET's avatar
Philip ABBET committed
281
282
            for name, output in group['outputs'].items():
                if output['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
283

Philip ABBET's avatar
Philip ABBET committed
284
285
286
287
288
289
                if dataformat_cache and output['type'] in dataformat_cache: #reuse
                    thisformat = dataformat_cache[output['type']]
                else: #load it
                    thisformat = dataformat.DataFormat(self.prefix, output['type'])
                    if dataformat_cache is not None: #update it
                        dataformat_cache[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
290

Philip ABBET's avatar
Philip ABBET committed
291
                self.dataformats[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
292

Philip ABBET's avatar
Philip ABBET committed
293
294
295
296
297
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for output `%s' on algorithm `%s': %s" % \
                            (output['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
298

Philip ABBET's avatar
Philip ABBET committed
299
        if self.results:
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
302

Philip ABBET's avatar
Philip ABBET committed
303
                if result['type'].find('/') != -1:
André Anjos's avatar
André Anjos committed
304

Philip ABBET's avatar
Philip ABBET committed
305
                    if result['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
306

Philip ABBET's avatar
Philip ABBET committed
307
308
309
310
311
312
                    if dataformat_cache and result['type'] in dataformat_cache: #reuse
                        thisformat = dataformat_cache[result['type']]
                    else:
                        thisformat = dataformat.DataFormat(self.prefix, result['type'])
                        if dataformat_cache is not None: #update it
                            dataformat_cache[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
313

Philip ABBET's avatar
Philip ABBET committed
314
                    self.dataformats[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
315

Philip ABBET's avatar
Philip ABBET committed
316
317
318
319
320
                    if thisformat.errors:
                        self.errors.append("found error validating data format `%s' " \
                                "for result `%s' on algorithm `%s': %s" % \
                                (result['type'], name, self.name,
                                    '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
321
322


Philip ABBET's avatar
Philip ABBET committed
323
324
325
    def _convert_parameter_types(self):
        """Converts types to numpy equivalents, checks defaults, ranges and choices
        """
André Anjos's avatar
André Anjos committed
326

Philip ABBET's avatar
Philip ABBET committed
327
328
329
330
331
332
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
                self.errors.append("%s for parameter `%s' cannot be cast to type " \
                        "`%s': %s" % (desc, name, tp.name, e))
André Anjos's avatar
André Anjos committed
333

Philip ABBET's avatar
Philip ABBET committed
334
        if self.parameters is None: return
André Anjos's avatar
André Anjos committed
335

Philip ABBET's avatar
Philip ABBET committed
336
337
338
339
340
        for name, parameter in self.parameters.items():
            if parameter['type'] == 'string':
                parameter['type'] = numpy.dtype('str')
            else:
                parameter['type'] = numpy.dtype(parameter['type'])
André Anjos's avatar
André Anjos committed
341

Philip ABBET's avatar
Philip ABBET committed
342
343
344
345
346
347
348
349
350
            if 'range' in parameter:
                parameter['range'][0] = _try_convert(name, parameter['type'],
                    parameter['range'][0], 'start of range')
                parameter['range'][1] = _try_convert(name, parameter['type'],
                    parameter['range'][1], 'end of range')
                if parameter['range'][0] >= parameter['range'][1]:
                    self.errors.append("range for parameter `%s' has a start greater " \
                            "then the end value (%r >= %r)" % \
                            (name, parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
351

Philip ABBET's avatar
Philip ABBET committed
352
353
354
355
            if 'choice' in parameter:
                for i, choice in enumerate(parameter['choice']):
                    parameter['choice'][i] = _try_convert(name, parameter['type'],
                        parameter['choice'][i], 'choice[%d]' % i)
André Anjos's avatar
André Anjos committed
356

Philip ABBET's avatar
Philip ABBET committed
357
358
359
            if 'default' in parameter:
                parameter['default'] = _try_convert(name, parameter['type'],
                    parameter['default'], 'default')
André Anjos's avatar
André Anjos committed
360

Philip ABBET's avatar
Philip ABBET committed
361
362
363
364
365
366
                if 'range' in parameter: #check range
                    if parameter['default'] < parameter['range'][0] or \
                            parameter['default'] > parameter['range'][1]:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "within parameter range [%r, %r]" % (name, parameter['default'],
                              parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
367

Philip ABBET's avatar
Philip ABBET committed
368
369
370
371
372
                if 'choice' in parameter: #check choices
                    if parameter['default'] not in parameter['choice']:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "a valid choice `[%s]'" % (name, parameter['default'],
                              ', '.join(['%r' % k for k in parameter['choice']])))
André Anjos's avatar
André Anjos committed
373
374


Philip ABBET's avatar
Philip ABBET committed
375
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
376

Philip ABBET's avatar
Philip ABBET committed
377
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
378

Philip ABBET's avatar
Philip ABBET committed
379
        if self.uses:
André Anjos's avatar
André Anjos committed
380

Philip ABBET's avatar
Philip ABBET committed
381
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
382

Philip ABBET's avatar
Philip ABBET committed
383
384
                self.libraries[value] = library_cache.setdefault(value,
                        library.Library(self.prefix, value, library_cache))
André Anjos's avatar
André Anjos committed
385

Philip ABBET's avatar
Philip ABBET committed
386
387
388
                if not self.libraries[value].valid:
                    self.errors.append("referred library `%s' (%s) is not valid" % \
                            (self.libraries[value].name, name))
André Anjos's avatar
André Anjos committed
389
390


Philip ABBET's avatar
Philip ABBET committed
391
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
392

Philip ABBET's avatar
Philip ABBET committed
393
394
        # all used libraries must be programmed with the same language
        if self.language == 'unknown': return #bail out on unknown language
André Anjos's avatar
André Anjos committed
395

Philip ABBET's avatar
Philip ABBET committed
396
        if self.uses:
André Anjos's avatar
André Anjos committed
397

Philip ABBET's avatar
Philip ABBET committed
398
            for name, library in self.uses.items():
André Anjos's avatar
André Anjos committed
399

Philip ABBET's avatar
Philip ABBET committed
400
                if library not in self.libraries: continue #invalid
André Anjos's avatar
André Anjos committed
401

Philip ABBET's avatar
Philip ABBET committed
402
403
404
405
406
                if self.libraries[library].data is None:
                    self.errors.append("language for used library `%s' cannot be " \
                            "inferred as the library was not properly loaded" % \
                            (library,))
                    continue
André Anjos's avatar
André Anjos committed
407

Philip ABBET's avatar
Philip ABBET committed
408
409
410
411
                if self.libraries[library].language != self.language:
                    self.errors.append("language for used library `%s' (`%s') " \
                            "differs from current language for this algorithm (`%s')" % \
                            (library, self.libraries[library].language, self.language))