algorithm.py 17 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.core module of the BEAT platform.             #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################


"""Validation for algorithms"""

import os
import sys

import six
import numpy
import simplejson

from . import dataformat
from . import library
from . import schema
from . import prototypes
from . import utils

44
45
46
from beat.backend.python.algorithm import Storage
from beat.backend.python.algorithm import Runner
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
47
48
49



50
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
51
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
52

Philip ABBET's avatar
Philip ABBET committed
53
54
55
56
57
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
    splittability). The actual algorithm is not directly treated by this class -
    it can, however, provide you with a loader for actually running the
    algorithmic code (see :py:meth:`Algorithm.runner`).
André Anjos's avatar
André Anjos committed
58
59


Philip ABBET's avatar
Philip ABBET committed
60
    Parameters:
André Anjos's avatar
André Anjos committed
61

Philip ABBET's avatar
Philip ABBET committed
62
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
63

Philip ABBET's avatar
Philip ABBET committed
64
65
66
67
68
69
70
71
      data (object, optional): The piece of data representing the algorithm. It
        must validate against the schema defined for algorithms. If a string is
        passed, it is supposed to be a valid path to an algorithm in the
        designated prefix area. If a tuple is passed (or a list), then we
        consider that the first element represents the algorithm declaration,
        while the second, the code for the algorithm (either in its source format
        or as a binary blob). If ``None`` is passed, loads our default prototype
        for algorithms (source code will be in Python).
André Anjos's avatar
André Anjos committed
72

Philip ABBET's avatar
Philip ABBET committed
73
74
75
76
      dataformat_cache (dict, optional): A dictionary mapping dataformat names to
        loaded dataformats. This parameter is optional and, if passed, may
        greatly speed-up algorithm loading times as dataformats that are already
        loaded may be re-used.
André Anjos's avatar
André Anjos committed
77

Philip ABBET's avatar
Philip ABBET committed
78
79
80
81
      library_cache (dict, optional): A dictionary mapping library names to
        loaded libraries. This parameter is optional and, if passed, may greatly
        speed-up library loading times as libraries that are already loaded may
        be re-used.
André Anjos's avatar
André Anjos committed
82
83


Philip ABBET's avatar
Philip ABBET committed
84
    Attributes:
André Anjos's avatar
André Anjos committed
85

Philip ABBET's avatar
Philip ABBET committed
86
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
87

Philip ABBET's avatar
Philip ABBET committed
88
89
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
90

Philip ABBET's avatar
Philip ABBET committed
91
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
92

Philip ABBET's avatar
Philip ABBET committed
93
94
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
95

Philip ABBET's avatar
Philip ABBET committed
96
97
98
      dataformats (dict): A dictionary containing all pre-loaded dataformats used
        by this algorithm. Data format objects will be of type
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
99

Philip ABBET's avatar
Philip ABBET committed
100
101
      libraries (dict): A mapping object defining other libraries this algorithm
        needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
102

Philip ABBET's avatar
Philip ABBET committed
103
104
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
105

Philip ABBET's avatar
Philip ABBET committed
106
107
      parameters (dict): A dictionary containing all pre-defined parameters that
        this algorithm accepts.
André Anjos's avatar
André Anjos committed
108

Philip ABBET's avatar
Philip ABBET committed
109
110
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
111

Philip ABBET's avatar
Philip ABBET committed
112
113
114
      input_map (dict): A dictionary where the key is the input name and the
        value, its type. All input names (potentially from different groups) are
        comprised in this dictionary.
André Anjos's avatar
André Anjos committed
115

Philip ABBET's avatar
Philip ABBET committed
116
117
118
      output_map (dict): A dictionary where the key is the output name and the
        value, its type. All output names (potentially from different groups) are
        comprised in this dictionary.
André Anjos's avatar
André Anjos committed
119

Philip ABBET's avatar
Philip ABBET committed
120
121
122
      results (dict): If this algorithm is actually an analyzer (i.e., there are
        no formal outputs, but results that must be saved by the platform), then
        this dictionary contains the names and data types of those elements.
André Anjos's avatar
André Anjos committed
123

Philip ABBET's avatar
Philip ABBET committed
124
125
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
126

Philip ABBET's avatar
Philip ABBET committed
127
128
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
129

Philip ABBET's avatar
Philip ABBET committed
130
131
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
132

Philip ABBET's avatar
Philip ABBET committed
133
134
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
135

Philip ABBET's avatar
Philip ABBET committed
136
    """
André Anjos's avatar
André Anjos committed
137

Philip ABBET's avatar
Philip ABBET committed
138
139
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
140
141


Philip ABBET's avatar
Philip ABBET committed
142
143
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
144

Philip ABBET's avatar
Philip ABBET committed
145
146
147
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
148

Philip ABBET's avatar
Philip ABBET committed
149
150
151
152
153
        self._name = None
        self.storage = None
        self.dataformats = {} # preloaded dataformats
        self.libraries = {} # preloaded libraries
        code = None
André Anjos's avatar
André Anjos committed
154

Philip ABBET's avatar
Philip ABBET committed
155
        if data is None: #loads prototype and validates it
André Anjos's avatar
André Anjos committed
156

Philip ABBET's avatar
Philip ABBET committed
157
158
            data = None
            code = None
André Anjos's avatar
André Anjos committed
159

Philip ABBET's avatar
Philip ABBET committed
160
        elif isinstance(data, (tuple, list)): #user has passed individual info
André Anjos's avatar
André Anjos committed
161

Philip ABBET's avatar
Philip ABBET committed
162
            data, code = data #break down into two components
André Anjos's avatar
André Anjos committed
163
164


Philip ABBET's avatar
Philip ABBET committed
165
        if isinstance(data, six.string_types): #user has passed a file pointer
André Anjos's avatar
André Anjos committed
166

Philip ABBET's avatar
Philip ABBET committed
167
168
169
170
171
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
                self.errors.append('Algorithm declaration file not found: %s' % data)
                return
André Anjos's avatar
André Anjos committed
172

Philip ABBET's avatar
Philip ABBET committed
173
            data = self.storage.json.path #loads data from JSON declaration
André Anjos's avatar
André Anjos committed
174
175


Philip ABBET's avatar
Philip ABBET committed
176
177
178
179
180
181
182
        # At this point, `data' can be a dictionary or ``None``
        if data is None: # loads the default declaration for an algorithm
            self.data, self.errors = prototypes.load('algorithm')
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)
        else: # just assign it
            # this runs basic validation, including JSON loading if required
            self.data, self.errors = schema.validate('algorithm', data)
André Anjos's avatar
André Anjos committed
183
184


Philip ABBET's avatar
Philip ABBET committed
185
        if self.errors: return #don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
186

Philip ABBET's avatar
Philip ABBET committed
187
188
189
190
191
192
193
194
        if self.storage is not None: #loading from the disk, check code
            if not self.storage.code.exists():
                if self.data['language'] != 'cxx':
                    self.errors.append('Algorithm code not found: %s' % \
                            self.storage.code.path)
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
195
196


Philip ABBET's avatar
Philip ABBET committed
197
198
199
200
        # At this point, `code' can be a string (or a binary blob) or ``None``
        if code is None: # loads the default code for an algorithm
            self.code = prototypes.binary_load('algorithm.py')
            self.data['language'] = 'python'
André Anjos's avatar
André Anjos committed
201

Philip ABBET's avatar
Philip ABBET committed
202
203
        else: # just assign it - notice that in this case, no language is set
            self.code = code
André Anjos's avatar
André Anjos committed
204
205


Philip ABBET's avatar
Philip ABBET committed
206
        if self.errors: return #don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
207
208


Philip ABBET's avatar
Philip ABBET committed
209
210
        # if no errors so far, make sense out of the declaration data
        self.groups = self.data['groups']
André Anjos's avatar
André Anjos committed
211

Philip ABBET's avatar
Philip ABBET committed
212
213
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
214

Philip ABBET's avatar
Philip ABBET committed
215
216
217
218
219
        # create maps for easy access to data
        self.input_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g['inputs'].items()])
        self.output_map = dict([(k,v['type']) for g in self.groups \
                for k,v in g.get('outputs', {}).items()])
André Anjos's avatar
André Anjos committed
220

Philip ABBET's avatar
Philip ABBET committed
221
222
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
223

Philip ABBET's avatar
Philip ABBET committed
224
225
226
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
227
228


Philip ABBET's avatar
Philip ABBET committed
229
230
231
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
232

Philip ABBET's avatar
Philip ABBET committed
233
234
235
236
237
        all_input_names = []
        for group in self.groups: all_input_names.extend(group['inputs'].keys())
        if len(set(all_input_names)) != len(all_input_names):
            self.errors.append("repeated input name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_input_names)))
André Anjos's avatar
André Anjos committed
238

Philip ABBET's avatar
Philip ABBET committed
239
240
241
242
243
244
245
246
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
            if 'outputs' not in group: continue
            all_output_names.extend(group['outputs'].keys())
        if len(set(all_output_names)) != len(all_output_names):
            self.errors.append("repeated output name in algorithm `%s' " \
                    "declaration: %s" % (self.name, ', '.join(all_output_names)))
André Anjos's avatar
André Anjos committed
247
248


Philip ABBET's avatar
Philip ABBET committed
249
250
251
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
252

Philip ABBET's avatar
Philip ABBET committed
253
        for group in self.groups:
André Anjos's avatar
André Anjos committed
254

Philip ABBET's avatar
Philip ABBET committed
255
256
            for name, input in group['inputs'].items():
                if input['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
257

Philip ABBET's avatar
Philip ABBET committed
258
259
260
261
262
263
                if dataformat_cache and input['type'] in dataformat_cache: #reuse
                    thisformat = dataformat_cache[input['type']]
                else: #load it
                    thisformat = dataformat.DataFormat(self.prefix, input['type'])
                    if dataformat_cache is not None: #update it
                        dataformat_cache[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
264

Philip ABBET's avatar
Philip ABBET committed
265
                self.dataformats[input['type']] = thisformat
André Anjos's avatar
André Anjos committed
266

Philip ABBET's avatar
Philip ABBET committed
267
268
269
270
271
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for input `%s' on algorithm `%s': %s" % \
                            (input['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
272

Philip ABBET's avatar
Philip ABBET committed
273
            if 'outputs' not in group: continue
André Anjos's avatar
André Anjos committed
274

Philip ABBET's avatar
Philip ABBET committed
275
276
            for name, output in group['outputs'].items():
                if output['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
277

Philip ABBET's avatar
Philip ABBET committed
278
279
280
281
282
283
                if dataformat_cache and output['type'] in dataformat_cache: #reuse
                    thisformat = dataformat_cache[output['type']]
                else: #load it
                    thisformat = dataformat.DataFormat(self.prefix, output['type'])
                    if dataformat_cache is not None: #update it
                        dataformat_cache[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
284

Philip ABBET's avatar
Philip ABBET committed
285
                self.dataformats[output['type']] = thisformat
André Anjos's avatar
André Anjos committed
286

Philip ABBET's avatar
Philip ABBET committed
287
288
289
290
291
                if thisformat.errors:
                    self.errors.append("found error validating data format `%s' " \
                            "for output `%s' on algorithm `%s': %s" % \
                            (output['type'], name, self.name,
                                '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
292

Philip ABBET's avatar
Philip ABBET committed
293
        if self.results:
André Anjos's avatar
André Anjos committed
294

Philip ABBET's avatar
Philip ABBET committed
295
            for name, result in self.results.items():
André Anjos's avatar
André Anjos committed
296

Philip ABBET's avatar
Philip ABBET committed
297
                if result['type'].find('/') != -1:
André Anjos's avatar
André Anjos committed
298

Philip ABBET's avatar
Philip ABBET committed
299
                    if result['type'] in self.dataformats: continue
André Anjos's avatar
André Anjos committed
300

Philip ABBET's avatar
Philip ABBET committed
301
302
303
304
305
306
                    if dataformat_cache and result['type'] in dataformat_cache: #reuse
                        thisformat = dataformat_cache[result['type']]
                    else:
                        thisformat = dataformat.DataFormat(self.prefix, result['type'])
                        if dataformat_cache is not None: #update it
                            dataformat_cache[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
307

Philip ABBET's avatar
Philip ABBET committed
308
                    self.dataformats[result['type']] = thisformat
André Anjos's avatar
André Anjos committed
309

Philip ABBET's avatar
Philip ABBET committed
310
311
312
313
314
                    if thisformat.errors:
                        self.errors.append("found error validating data format `%s' " \
                                "for result `%s' on algorithm `%s': %s" % \
                                (result['type'], name, self.name,
                                    '\n'.join(thisformat.errors)))
André Anjos's avatar
André Anjos committed
315
316


Philip ABBET's avatar
Philip ABBET committed
317
318
319
    def _convert_parameter_types(self):
        """Converts types to numpy equivalents, checks defaults, ranges and choices
        """
André Anjos's avatar
André Anjos committed
320

Philip ABBET's avatar
Philip ABBET committed
321
322
323
324
325
326
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
                self.errors.append("%s for parameter `%s' cannot be cast to type " \
                        "`%s': %s" % (desc, name, tp.name, e))
André Anjos's avatar
André Anjos committed
327

Philip ABBET's avatar
Philip ABBET committed
328
        if self.parameters is None: return
André Anjos's avatar
André Anjos committed
329

Philip ABBET's avatar
Philip ABBET committed
330
331
332
333
334
        for name, parameter in self.parameters.items():
            if parameter['type'] == 'string':
                parameter['type'] = numpy.dtype('str')
            else:
                parameter['type'] = numpy.dtype(parameter['type'])
André Anjos's avatar
André Anjos committed
335

Philip ABBET's avatar
Philip ABBET committed
336
337
338
339
340
341
342
343
344
            if 'range' in parameter:
                parameter['range'][0] = _try_convert(name, parameter['type'],
                    parameter['range'][0], 'start of range')
                parameter['range'][1] = _try_convert(name, parameter['type'],
                    parameter['range'][1], 'end of range')
                if parameter['range'][0] >= parameter['range'][1]:
                    self.errors.append("range for parameter `%s' has a start greater " \
                            "then the end value (%r >= %r)" % \
                            (name, parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
345

Philip ABBET's avatar
Philip ABBET committed
346
347
348
349
            if 'choice' in parameter:
                for i, choice in enumerate(parameter['choice']):
                    parameter['choice'][i] = _try_convert(name, parameter['type'],
                        parameter['choice'][i], 'choice[%d]' % i)
André Anjos's avatar
André Anjos committed
350

Philip ABBET's avatar
Philip ABBET committed
351
352
353
            if 'default' in parameter:
                parameter['default'] = _try_convert(name, parameter['type'],
                    parameter['default'], 'default')
André Anjos's avatar
André Anjos committed
354

Philip ABBET's avatar
Philip ABBET committed
355
356
357
358
359
360
                if 'range' in parameter: #check range
                    if parameter['default'] < parameter['range'][0] or \
                            parameter['default'] > parameter['range'][1]:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "within parameter range [%r, %r]" % (name, parameter['default'],
                              parameter['range'][0], parameter['range'][1]))
André Anjos's avatar
André Anjos committed
361

Philip ABBET's avatar
Philip ABBET committed
362
363
364
365
366
                if 'choice' in parameter: #check choices
                    if parameter['default'] not in parameter['choice']:
                        self.errors.append("default for parameter `%s' (%r) is not " \
                          "a valid choice `[%s]'" % (name, parameter['default'],
                              ', '.join(['%r' % k for k in parameter['choice']])))
André Anjos's avatar
André Anjos committed
367
368


Philip ABBET's avatar
Philip ABBET committed
369
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
370

Philip ABBET's avatar
Philip ABBET committed
371
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
372

Philip ABBET's avatar
Philip ABBET committed
373
        if self.uses:
André Anjos's avatar
André Anjos committed
374

Philip ABBET's avatar
Philip ABBET committed
375
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
376

Philip ABBET's avatar
Philip ABBET committed
377
378
                self.libraries[value] = library_cache.setdefault(value,
                        library.Library(self.prefix, value, library_cache))
André Anjos's avatar
André Anjos committed
379

Philip ABBET's avatar
Philip ABBET committed
380
381
382
                if not self.libraries[value].valid:
                    self.errors.append("referred library `%s' (%s) is not valid" % \
                            (self.libraries[value].name, name))
André Anjos's avatar
André Anjos committed
383
384


Philip ABBET's avatar
Philip ABBET committed
385
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
386

Philip ABBET's avatar
Philip ABBET committed
387
388
        # all used libraries must be programmed with the same language
        if self.language == 'unknown': return #bail out on unknown language
André Anjos's avatar
André Anjos committed
389

Philip ABBET's avatar
Philip ABBET committed
390
        if self.uses:
André Anjos's avatar
André Anjos committed
391

Philip ABBET's avatar
Philip ABBET committed
392
            for name, library in self.uses.items():
André Anjos's avatar
André Anjos committed
393

Philip ABBET's avatar
Philip ABBET committed
394
                if library not in self.libraries: continue #invalid
André Anjos's avatar
André Anjos committed
395

Philip ABBET's avatar
Philip ABBET committed
396
397
398
399
400
                if self.libraries[library].data is None:
                    self.errors.append("language for used library `%s' cannot be " \
                            "inferred as the library was not properly loaded" % \
                            (library,))
                    continue
André Anjos's avatar
André Anjos committed
401

Philip ABBET's avatar
Philip ABBET committed
402
403
404
405
                if self.libraries[library].language != self.language:
                    self.errors.append("language for used library `%s' (`%s') " \
                            "differs from current language for this algorithm (`%s')" % \
                            (library, self.libraries[library].language, self.language))