algorithm.py 19.3 KB
Newer Older
André Anjos's avatar
André Anjos committed
1 2 3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
###################################################################################
#                                                                                 #
# Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/               #
# Contact: beat.support@idiap.ch                                                  #
#                                                                                 #
# Redistribution and use in source and binary forms, with or without              #
# modification, are permitted provided that the following conditions are met:     #
#                                                                                 #
# 1. Redistributions of source code must retain the above copyright notice, this  #
# list of conditions and the following disclaimer.                                #
#                                                                                 #
# 2. Redistributions in binary form must reproduce the above copyright notice,    #
# this list of conditions and the following disclaimer in the documentation       #
# and/or other materials provided with the distribution.                          #
#                                                                                 #
# 3. Neither the name of the copyright holder nor the names of its contributors   #
# may be used to endorse or promote products derived from this software without   #
# specific prior written permission.                                              #
#                                                                                 #
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND #
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED   #
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          #
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE    #
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL      #
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR      #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER      #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   #
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE   #
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.            #
#                                                                                 #
###################################################################################

André Anjos's avatar
André Anjos committed
36

37 38 39 40 41 42
"""
=========
algorithm
=========

Validation for algorithms
43 44 45 46

Forward importing from :py:mod:`beat.backend.python.algorithm`
:py:class:`beat.backend.python.algorithm.Storage`
:py:class:`beat.backend.python.algorithm.Runner`
47
"""
André Anjos's avatar
André Anjos committed
48 49


50
import os
André Anjos's avatar
André Anjos committed
51 52
import six
import numpy
53
import pkg_resources
Samuel GAIST's avatar
Samuel GAIST committed
54
import simplejson as json
André Anjos's avatar
André Anjos committed
55 56 57 58 59 60

from . import dataformat
from . import library
from . import schema
from . import prototypes

61
from beat.backend.python.algorithm import Storage
62
from beat.backend.python.algorithm import Runner  # noqa
63
from beat.backend.python.algorithm import Algorithm as BackendAlgorithm
André Anjos's avatar
André Anjos committed
64 65


66
def load_algorithm_prototype(prefix):
67

Samuel GAIST's avatar
Samuel GAIST committed
68 69 70
    prototype_data = pkg_resources.resource_string(
        __name__, "prototypes/algorithm.json"
    )
71
    algorithm_data = json.loads(prototype_data)
72
    ref_dataformats = ["integer", "integers"]
73 74
    dataformat = None

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
    for ref_dataformat in ref_dataformats:
        for root, dirs, _ in os.walk(os.path.join(prefix, "dataformats")):
            if ref_dataformat in dirs:
                dataformat_versions = sorted(
                    os.listdir(os.path.join(root, ref_dataformat))
                )
                version = dataformat_versions[-1].split(".")[0]
                dataformat = "{}/{}/{}".format(
                    os.path.basename(root), ref_dataformat, version
                )
                break

    if dataformat is None:
        raise RuntimeError(
            "Reference data formats [{}] not found".format(",".join(ref_dataformats))
        )
91 92 93 94 95
    algorithm_data["groups"][0]["inputs"]["in_data"]["type"] = dataformat
    algorithm_data["groups"][0]["outputs"]["out_data"]["type"] = dataformat
    return algorithm_data


96
class Algorithm(BackendAlgorithm):
Philip ABBET's avatar
Philip ABBET committed
97
    """Algorithms represent runnable components within the platform.
André Anjos's avatar
André Anjos committed
98

Philip ABBET's avatar
Philip ABBET committed
99 100
    This class can only parse the meta-parameters of the algorithm (i.e., input
    and output declaration, grouping, synchronization details, parameters and
101 102
    splittability). The actual algorithm is not directly treated by this class.
    It can, however, provide you with a loader for actually running the
André Anjos's avatar
André Anjos committed
103
    algorithmic code (see :py:meth:`.runner`).
André Anjos's avatar
André Anjos committed
104 105


Philip ABBET's avatar
Philip ABBET committed
106
    Parameters:
André Anjos's avatar
André Anjos committed
107

Philip ABBET's avatar
Philip ABBET committed
108
      prefix (str): Establishes the prefix of your installation.
André Anjos's avatar
André Anjos committed
109

André Anjos's avatar
André Anjos committed
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
      data (:py:class:`object`, Optional): The piece of data representing the
        algorithm. It must validate against the schema defined for algorithms.
        If a string is passed, it is supposed to be a valid path to an
        algorithm in the designated prefix area. If a tuple is passed (or a
        list), then we consider that the first element represents the algorithm
        declaration, while the second, the code for the algorithm (either in
        its source format or as a binary blob). If ``None`` is passed, loads
        our default prototype for algorithms (source code will be in Python).

      dataformat_cache (:py:class:`dict`, Optional): A dictionary mapping
        dataformat names to loaded dataformats. This parameter is optional and,
        if passed, may greatly speed-up algorithm loading times as dataformats
        that are already loaded may be re-used.

      library_cache (:py:class:`dict`, Optional): A dictionary mapping library
        names to loaded libraries. This parameter is optional and, if passed,
        may greatly speed-up library loading times as libraries that are
        already loaded may be re-used.
André Anjos's avatar
André Anjos committed
128 129


Philip ABBET's avatar
Philip ABBET committed
130
    Attributes:
André Anjos's avatar
André Anjos committed
131

Philip ABBET's avatar
Philip ABBET committed
132
      name (str): The algorithm name
André Anjos's avatar
André Anjos committed
133

Philip ABBET's avatar
Philip ABBET committed
134 135
      description (str): The short description string, loaded from the JSON
        file if one was set.
André Anjos's avatar
André Anjos committed
136

Philip ABBET's avatar
Philip ABBET committed
137
      documentation (str): The full-length docstring for this object.
André Anjos's avatar
André Anjos committed
138

Philip ABBET's avatar
Philip ABBET committed
139 140
      storage (object): A simple object that provides information about file
        paths for this algorithm
André Anjos's avatar
André Anjos committed
141

142 143
      dataformats (dict): A dictionary containing all pre-loaded dataformats
        used by this algorithm. Data format objects will be of type
Philip ABBET's avatar
Philip ABBET committed
144
        :py:class:`beat.core.dataformat.DataFormat`.
André Anjos's avatar
André Anjos committed
145

146 147
      libraries (dict): A mapping object defining other libraries this
        algorithm needs to load so it can work properly.
André Anjos's avatar
André Anjos committed
148

Philip ABBET's avatar
Philip ABBET committed
149 150
      uses (dict): A mapping object defining the required library import name
        (keys) and the full-names (values).
André Anjos's avatar
André Anjos committed
151

152 153
      parameters (dict): A dictionary containing all pre-defined parameters
        that this algorithm accepts.
André Anjos's avatar
André Anjos committed
154

Philip ABBET's avatar
Philip ABBET committed
155 156
      splittable (bool): A boolean value that indicates if this algorithm is
        automatically parallelizeable by our backend.
André Anjos's avatar
André Anjos committed
157

Philip ABBET's avatar
Philip ABBET committed
158
      input_map (dict): A dictionary where the key is the input name and the
159 160
        value, its type. All input names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
161

Philip ABBET's avatar
Philip ABBET committed
162
      output_map (dict): A dictionary where the key is the output name and the
163 164
        value, its type. All output names (potentially from different groups)
        are comprised in this dictionary.
André Anjos's avatar
André Anjos committed
165

166 167 168 169
      results (dict): If this algorithm is actually an analyzer (i.e., there
        are no formal outputs, but results that must be saved by the platform),
        then this dictionary contains the names and data types of those
        elements.
André Anjos's avatar
André Anjos committed
170

Philip ABBET's avatar
Philip ABBET committed
171 172
      groups (dict): A list containing dictionaries with inputs and outputs
        belonging to the same synchronization group.
André Anjos's avatar
André Anjos committed
173

Philip ABBET's avatar
Philip ABBET committed
174 175
      errors (list): A list containing errors found while loading this
        algorithm.
André Anjos's avatar
André Anjos committed
176

Philip ABBET's avatar
Philip ABBET committed
177 178
      data (dict): The original data for this algorithm, as loaded by our JSON
        decoder.
André Anjos's avatar
André Anjos committed
179

Philip ABBET's avatar
Philip ABBET committed
180 181
      code (str): The code that is associated with this algorithm, loaded as a
        text (or binary) file.
André Anjos's avatar
André Anjos committed
182

Philip ABBET's avatar
Philip ABBET committed
183
    """
André Anjos's avatar
André Anjos committed
184

185 186
    dataformat_klass = dataformat.DataFormat

Philip ABBET's avatar
Philip ABBET committed
187 188
    def __init__(self, prefix, data, dataformat_cache=None, library_cache=None):
        super(Algorithm, self).__init__(prefix, data, dataformat_cache, library_cache)
André Anjos's avatar
André Anjos committed
189

Philip ABBET's avatar
Philip ABBET committed
190 191
    def _load(self, data, dataformat_cache, library_cache):
        """Loads the algorithm"""
André Anjos's avatar
André Anjos committed
192

Philip ABBET's avatar
Philip ABBET committed
193 194 195
        self.errors = []
        self.data = None
        self.code = None
André Anjos's avatar
André Anjos committed
196

Philip ABBET's avatar
Philip ABBET committed
197 198
        self._name = None
        self.storage = None
199 200
        self.dataformats = {}  # preloaded dataformats
        self.libraries = {}  # preloaded libraries
Philip ABBET's avatar
Philip ABBET committed
201
        code = None
André Anjos's avatar
André Anjos committed
202

203
        if data is None:  # loads prototype and validates it
André Anjos's avatar
André Anjos committed
204

Philip ABBET's avatar
Philip ABBET committed
205 206
            data = None
            code = None
André Anjos's avatar
André Anjos committed
207

208
        elif isinstance(data, (tuple, list)):  # user has passed individual info
André Anjos's avatar
André Anjos committed
209

210
            data, code = data  # break down into two components
André Anjos's avatar
André Anjos committed
211

212
        if isinstance(data, six.string_types):  # user has passed a file pointer
André Anjos's avatar
André Anjos committed
213

Philip ABBET's avatar
Philip ABBET committed
214 215 216
            self._name = data
            self.storage = Storage(self.prefix, self._name)
            if not self.storage.json.exists():
217
                self.errors.append("Algorithm declaration file not found: %s" % data)
Philip ABBET's avatar
Philip ABBET committed
218
                return
André Anjos's avatar
André Anjos committed
219

220
            data = self.storage.json.path  # loads data from JSON declaration
André Anjos's avatar
André Anjos committed
221

Philip ABBET's avatar
Philip ABBET committed
222
        # At this point, `data' can be a dictionary or ``None``
223
        if data is None:  # loads the default declaration for an algorithm
224 225 226
            algorithm_data = load_algorithm_prototype(self.prefix)
            self.data, self.errors = schema.validate("algorithm", algorithm_data)
            assert not self.errors, "\n  * %s" % "\n  *".join(self.errors)  # nosec
227
        else:  # just assign it
Philip ABBET's avatar
Philip ABBET committed
228
            # this runs basic validation, including JSON loading if required
229
            self.data, self.errors = schema.validate("algorithm", data)
André Anjos's avatar
André Anjos committed
230

231 232
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
233

234
        if self.storage is not None:  # loading from the disk, check code
Philip ABBET's avatar
Philip ABBET committed
235
            if not self.storage.code.exists():
236 237 238 239
                if self.data["language"] != "cxx":
                    self.errors.append(
                        "Algorithm code not found: %s" % self.storage.code.path
                    )
Philip ABBET's avatar
Philip ABBET committed
240 241 242
                    return
            else:
                code = self.storage.code.load()
André Anjos's avatar
André Anjos committed
243

Philip ABBET's avatar
Philip ABBET committed
244
        # At this point, `code' can be a string (or a binary blob) or ``None``
245
        if code is None:  # loads the default code for an algorithm
246 247
            self.code = prototypes.binary_load("algorithm.py")
            self.data["language"] = "python"
André Anjos's avatar
André Anjos committed
248

249
        else:  # just assign it - notice that in this case, no language is set
Philip ABBET's avatar
Philip ABBET committed
250
            self.code = code
André Anjos's avatar
André Anjos committed
251

252 253
        if self.errors:
            return  # don't proceed with the rest of validation
André Anjos's avatar
André Anjos committed
254

Philip ABBET's avatar
Philip ABBET committed
255
        # if no errors so far, make sense out of the declaration data
256
        self.groups = self.data["groups"]
André Anjos's avatar
André Anjos committed
257

Philip ABBET's avatar
Philip ABBET committed
258 259
        # now we check for consistence
        self._check_endpoint_uniqueness()
André Anjos's avatar
André Anjos committed
260

Philip ABBET's avatar
Philip ABBET committed
261
        # create maps for easy access to data
262 263 264 265 266 267 268 269 270 271 272 273 274
        self.input_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g["inputs"].items()]
        )
        self.output_map = dict(
            [
                (k, v["type"])
                for g in self.groups
                for k, v in g.get("outputs", {}).items()
            ]
        )
        self.loop_map = dict(
            [(k, v["type"]) for g in self.groups for k, v in g.get("loop", {}).items()]
        )
André Anjos's avatar
André Anjos committed
275

Philip ABBET's avatar
Philip ABBET committed
276 277
        self._validate_required_dataformats(dataformat_cache)
        self._convert_parameter_types()
André Anjos's avatar
André Anjos committed
278

Philip ABBET's avatar
Philip ABBET committed
279 280 281
        # finally, the libraries
        self._validate_required_libraries(library_cache)
        self._check_language_consistence()
André Anjos's avatar
André Anjos committed
282

Philip ABBET's avatar
Philip ABBET committed
283 284 285
    def _check_endpoint_uniqueness(self):
        """Checks for name clashes accross input/output groups
        """
André Anjos's avatar
André Anjos committed
286

Philip ABBET's avatar
Philip ABBET committed
287
        all_input_names = []
288 289
        for group in self.groups:
            all_input_names.extend(group["inputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
290
        if len(set(all_input_names)) != len(all_input_names):
291 292 293 294
            self.errors.append(
                "repeated input name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_input_names))
            )
André Anjos's avatar
André Anjos committed
295

Philip ABBET's avatar
Philip ABBET committed
296 297 298
        # all outputs must have unique names
        all_output_names = []
        for group in self.groups:
299 300 301
            if "outputs" not in group:
                continue
            all_output_names.extend(group["outputs"].keys())
Philip ABBET's avatar
Philip ABBET committed
302
        if len(set(all_output_names)) != len(all_output_names):
303 304 305 306
            self.errors.append(
                "repeated output name in algorithm `%s' "
                "declaration: %s" % (self.name, ", ".join(all_output_names))
            )
André Anjos's avatar
André Anjos committed
307

308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
    def _validate_format(self, type_name, group_name, entry_name, dataformat):
        if dataformat.errors:
            self.errors.append(
                "found error validating data format `%s' "
                "for %s `%s' on algorithm `%s': %s"
                % (
                    type_name,
                    group_name,
                    entry_name,
                    self.name,
                    "\n".join(dataformat.errors),
                )
            )

    def _validate_dataformats(self, group, group_name, dataformat_cache):
        for name, entry in group[group_name].items():
            type_name = entry["type"]
            thisformat = self._update_dataformat_cache(type_name, dataformat_cache)
            self._validate_format(type_name, group_name, name, thisformat)

Philip ABBET's avatar
Philip ABBET committed
328 329 330
    def _validate_required_dataformats(self, dataformat_cache):
        """Makes sure we can load all requested formats
        """
André Anjos's avatar
André Anjos committed
331

Philip ABBET's avatar
Philip ABBET committed
332
        for group in self.groups:
André Anjos's avatar
André Anjos committed
333

334 335
            for name, input_ in group["inputs"].items():
                self._validate_dataformats(group, "inputs", dataformat_cache)
André Anjos's avatar
André Anjos committed
336

337
            if "outputs" in group:
338
                self._validate_dataformats(group, "outputs", dataformat_cache)
André Anjos's avatar
André Anjos committed
339

340
            if "loop" in group:
341
                self._validate_dataformats(group, "loop", dataformat_cache)
André Anjos's avatar
André Anjos committed
342

Philip ABBET's avatar
Philip ABBET committed
343
        if self.results:
André Anjos's avatar
André Anjos committed
344

Philip ABBET's avatar
Philip ABBET committed
345
            for name, result in self.results.items():
346 347 348 349 350 351 352 353
                result_type = result["type"]
                # results can only contain base types and plots therefore, only
                # process plots
                if result_type.find("/") != -1:
                    thisformat = self._update_dataformat_cache(
                        result_type, dataformat_cache
                    )
                    self._validate_format(result_type, "result", name, thisformat)
André Anjos's avatar
André Anjos committed
354

Philip ABBET's avatar
Philip ABBET committed
355
    def _convert_parameter_types(self):
356 357
        """Converts types to numpy equivalents, checks defaults, ranges and
        choices
Philip ABBET's avatar
Philip ABBET committed
358
        """
André Anjos's avatar
André Anjos committed
359

Philip ABBET's avatar
Philip ABBET committed
360 361 362 363
        def _try_convert(name, tp, value, desc):
            try:
                return tp.type(value)
            except Exception as e:
364 365 366 367
                self.errors.append(
                    "%s for parameter `%s' cannot be cast to type "
                    "`%s': %s" % (desc, name, tp.name, e)
                )
André Anjos's avatar
André Anjos committed
368

369 370
        if self.parameters is None:
            return
André Anjos's avatar
André Anjos committed
371

Philip ABBET's avatar
Philip ABBET committed
372
        for name, parameter in self.parameters.items():
373 374
            if parameter["type"] == "string":
                parameter["type"] = numpy.dtype("str")
Philip ABBET's avatar
Philip ABBET committed
375
            else:
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
                parameter["type"] = numpy.dtype(parameter["type"])

            if "range" in parameter:
                parameter["range"][0] = _try_convert(
                    name, parameter["type"], parameter["range"][0], "start of range"
                )
                parameter["range"][1] = _try_convert(
                    name, parameter["type"], parameter["range"][1], "end of range"
                )
                if parameter["range"][0] >= parameter["range"][1]:
                    self.errors.append(
                        "range for parameter `%s' has a start greater "
                        "then the end value (%r >= %r)"
                        % (name, parameter["range"][0], parameter["range"][1])
                    )

            if "choice" in parameter:
                for i, choice in enumerate(parameter["choice"]):
                    parameter["choice"][i] = _try_convert(
                        name,
                        parameter["type"],
                        parameter["choice"][i],
                        "choice[%d]" % i,
                    )

            if "default" in parameter:
                parameter["default"] = _try_convert(
                    name, parameter["type"], parameter["default"], "default"
                )

                if "range" in parameter:  # check range
                    if (
                        parameter["default"] < parameter["range"][0]
                        or parameter["default"] > parameter["range"][1]
                    ):
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "within parameter range [%r, %r]"
                            % (
                                name,
                                parameter["default"],
                                parameter["range"][0],
                                parameter["range"][1],
                            )
                        )

                if "choice" in parameter:  # check choices
                    if parameter["default"] not in parameter["choice"]:
                        self.errors.append(
                            "default for parameter `%s' (%r) is not "
                            "a valid choice `[%s]'"
                            % (
                                name,
                                parameter["default"],
                                ", ".join(["%r" % k for k in parameter["choice"]]),
                            )
                        )
André Anjos's avatar
André Anjos committed
433

Philip ABBET's avatar
Philip ABBET committed
434
    def _validate_required_libraries(self, library_cache):
André Anjos's avatar
André Anjos committed
435

Philip ABBET's avatar
Philip ABBET committed
436
        # all used libraries must be loadable; cannot use self as a library
André Anjos's avatar
André Anjos committed
437

Philip ABBET's avatar
Philip ABBET committed
438
        if self.uses:
André Anjos's avatar
André Anjos committed
439

Philip ABBET's avatar
Philip ABBET committed
440
            for name, value in self.uses.items():
André Anjos's avatar
André Anjos committed
441

442 443 444
                self.libraries[value] = library_cache.setdefault(
                    value, library.Library(self.prefix, value, library_cache)
                )
André Anjos's avatar
André Anjos committed
445

Philip ABBET's avatar
Philip ABBET committed
446
                if not self.libraries[value].valid:
447 448 449 450
                    self.errors.append(
                        "referred library `%s' (%s) is not valid"
                        % (self.libraries[value].name, name)
                    )
André Anjos's avatar
André Anjos committed
451

Philip ABBET's avatar
Philip ABBET committed
452
    def _check_language_consistence(self):
André Anjos's avatar
André Anjos committed
453

Philip ABBET's avatar
Philip ABBET committed
454
        # all used libraries must be programmed with the same language
455 456
        if self.language == "unknown":
            return  # bail out on unknown language
André Anjos's avatar
André Anjos committed
457

Philip ABBET's avatar
Philip ABBET committed
458
        if self.uses:
André Anjos's avatar
André Anjos committed
459

460
            for name, library_name in self.uses.items():
André Anjos's avatar
André Anjos committed
461

462 463
                if library_name not in self.libraries:
                    continue  # invalid
André Anjos's avatar
André Anjos committed
464

465 466 467 468 469 470
                if self.libraries[library_name].data is None:
                    self.errors.append(
                        "language for used library `%s' cannot be "
                        "inferred as the library was not properly loaded"
                        % (library_name,)
                    )
Philip ABBET's avatar
Philip ABBET committed
471
                    continue
André Anjos's avatar
André Anjos committed
472

473 474 475 476 477 478 479 480 481 482
                if self.libraries[library_name].language != self.language:
                    self.errors.append(
                        "language for used library `%s' (`%s') "
                        "differs from current language for this algorithm (`%s')"
                        % (
                            library_name,
                            self.libraries[library_name].language,
                            self.language,
                        )
                    )