database.py 12.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

###############################################################################
#                                                                             #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/           #
# Contact: beat.support@idiap.ch                                              #
#                                                                             #
# This file is part of the beat.backend.python module of the BEAT platform.   #
#                                                                             #
# Commercial License Usage                                                    #
# Licensees holding valid commercial BEAT licenses may use this file in       #
# accordance with the terms contained in a written agreement between you      #
# and Idiap. For further information contact tto@idiap.ch                     #
#                                                                             #
# Alternatively, this file may be used under the terms of the GNU Affero      #
# Public License version 3 as published by the Free Software and appearing    #
# in the file LICENSE.AGPL included in the packaging of this file.            #
# The BEAT platform is distributed in the hope that it will be useful, but    #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY  #
# or FITNESS FOR A PARTICULAR PURPOSE.                                        #
#                                                                             #
# You should have received a copy of the GNU Affero Public License along      #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/.           #
#                                                                             #
###############################################################################


"""Validation of databases"""

import os
import sys

import six
import simplejson

from . import loader
Philip ABBET's avatar
Philip ABBET committed
38 39 40 41 42 43 44
from . import utils

from .dataformat import DataFormat



class Storage(utils.CodeStorage):
Philip ABBET's avatar
Philip ABBET committed
45
    """Resolves paths for databases
Philip ABBET's avatar
Philip ABBET committed
46

Philip ABBET's avatar
Philip ABBET committed
47
    Parameters:
Philip ABBET's avatar
Philip ABBET committed
48

Philip ABBET's avatar
Philip ABBET committed
49
      prefix (str): Establishes the prefix of your installation.
Philip ABBET's avatar
Philip ABBET committed
50

Philip ABBET's avatar
Philip ABBET committed
51 52
      name (str): The name of the database object in the format
        ``<name>/<version>``.
Philip ABBET's avatar
Philip ABBET committed
53

Philip ABBET's avatar
Philip ABBET committed
54
    """
Philip ABBET's avatar
Philip ABBET committed
55

Philip ABBET's avatar
Philip ABBET committed
56
    def __init__(self, prefix, name):
Philip ABBET's avatar
Philip ABBET committed
57

Philip ABBET's avatar
Philip ABBET committed
58 59
        if name.count('/') != 1:
            raise RuntimeError("invalid database name: `%s'" % name)
Philip ABBET's avatar
Philip ABBET committed
60

Philip ABBET's avatar
Philip ABBET committed
61 62
        self.name, self.version = name.split('/')
        self.fullname = name
Philip ABBET's avatar
Philip ABBET committed
63

Philip ABBET's avatar
Philip ABBET committed
64 65
        path = os.path.join(prefix, 'databases', name)
        super(Storage, self).__init__(path, 'python') #views are coded in Python
66 67 68 69



class View(object):
Philip ABBET's avatar
Philip ABBET committed
70
    '''A special loader class for database views, with specialized methods
71

Philip ABBET's avatar
Philip ABBET committed
72
    Parameters:
73

Philip ABBET's avatar
Philip ABBET committed
74
      db_name (str): The full name of the database object for this view
75

Philip ABBET's avatar
Philip ABBET committed
76 77
      module (module): The preloaded module containing the database views as
        returned by :py:func:`beat.core.loader.load_module`.
78

Philip ABBET's avatar
Philip ABBET committed
79
      prefix (str, path): The prefix path for the current installation
80

Philip ABBET's avatar
Philip ABBET committed
81 82
      root_folder (str, path): The path pointing to the root folder of this
        database
83

Philip ABBET's avatar
Philip ABBET committed
84 85 86
      exc (class): The class to use as base exception when translating the
        exception from the user code. Read the documention of :py:func:`run`
        for more details.
87

Philip ABBET's avatar
Philip ABBET committed
88
      *args: Constructor parameters for the database view. Normally, none.
89

Philip ABBET's avatar
Philip ABBET committed
90
      **kwargs: Constructor parameters for the database view. Normally, none.
91

Philip ABBET's avatar
Philip ABBET committed
92
    '''
93 94


Philip ABBET's avatar
Philip ABBET committed
95 96
    def __init__(self, module, definition, prefix, root_folder, exc=None,
            *args, **kwargs):
97

Philip ABBET's avatar
Philip ABBET committed
98 99 100 101 102 103 104 105
        try:
            class_ = getattr(module, definition['view'])
        except Exception as e:
            if exc is not None:
                type, value, traceback = sys.exc_info()
                six.reraise(exc, exc(value), traceback)
            else:
                raise #just re-raise the user exception
106

Philip ABBET's avatar
Philip ABBET committed
107 108 109 110 111 112 113
        self.obj = loader.run(class_, '__new__', exc, *args, **kwargs)
        self.ready = False
        self.prefix = prefix
        self.root_folder = root_folder
        self.definition = definition
        self.exc = exc or RuntimeError
        self.outputs = None
114 115


Philip ABBET's avatar
Philip ABBET committed
116 117
    def prepare_outputs(self):
        '''Prepares the outputs of the dataset'''
118

Philip ABBET's avatar
Philip ABBET committed
119 120 121
        from .outputs import Output, OutputList
        from .data import MemoryDataSink
        from .dataformat import DataFormat
122

Philip ABBET's avatar
Philip ABBET committed
123 124 125 126 127 128 129 130
        # create the stock outputs for this dataset, so data is dumped
        # on a in-memory sink
        self.outputs = OutputList()
        for out_name, out_format in self.definition.get('outputs', {}).items():
            data_sink = MemoryDataSink()
            data_sink.dataformat = DataFormat(self.prefix, out_format)
            data_sink.setup([])
            self.outputs.add(Output(out_name, data_sink, dataset_output=True))
131 132


Philip ABBET's avatar
Philip ABBET committed
133 134
    def setup(self, *args, **kwargs):
        '''Sets up the view'''
135

Philip ABBET's avatar
Philip ABBET committed
136 137
        kwargs.setdefault('root_folder', self.root_folder)
        kwargs.setdefault('parameters', self.definition.get('parameters', {}))
138

Philip ABBET's avatar
Philip ABBET committed
139 140 141 142
        if 'outputs' not in kwargs:
            kwargs['outputs'] = self.outputs
        else:
            self.outputs = kwargs['outputs'] #record outputs nevertheless
143

Philip ABBET's avatar
Philip ABBET committed
144
        self.ready = loader.run(self.obj, 'setup', self.exc, *args, **kwargs)
145

Philip ABBET's avatar
Philip ABBET committed
146 147
        if not self.ready:
            raise self.exc("unknow setup failure")
148

Philip ABBET's avatar
Philip ABBET committed
149
        return self.ready
150 151


Philip ABBET's avatar
Philip ABBET committed
152 153
    def input_group(self, name='default', exclude_outputs=[]):
        '''A memory-source input group matching the outputs from the view'''
154

Philip ABBET's avatar
Philip ABBET committed
155 156
        if not self.ready:
            raise self.exc("database view not yet setup")
157

Philip ABBET's avatar
Philip ABBET committed
158 159 160
        from .data import MemoryDataSource
        from .outputs import SynchronizationListener
        from .inputs import Input, InputGroup
161

Philip ABBET's avatar
Philip ABBET committed
162 163 164 165 166
        # Setup the inputs
        synchronization_listener = SynchronizationListener()
        input_group = InputGroup(name,
                synchronization_listener=synchronization_listener,
                restricted_access=False)
167

Philip ABBET's avatar
Philip ABBET committed
168 169 170 171 172 173
        for output in self.outputs:
            if output.name in exclude_outputs: continue
            data_source = MemoryDataSource(self.done, next_callback=self.next)
            output.data_sink.data_sources.append(data_source)
            input_group.add(Input(output.name,
                output.data_sink.dataformat, data_source))
174

Philip ABBET's avatar
Philip ABBET committed
175
        return input_group
176 177


Philip ABBET's avatar
Philip ABBET committed
178 179
    def done(self, *args, **kwargs):
        '''Checks if the view is done'''
180

Philip ABBET's avatar
Philip ABBET committed
181 182
        if not self.ready:
            raise self.exc("database view not yet setup")
183

Philip ABBET's avatar
Philip ABBET committed
184
        return loader.run(self.obj, 'done', self.exc, *args, **kwargs)
185 186


Philip ABBET's avatar
Philip ABBET committed
187 188
    def next(self, *args, **kwargs):
        '''Runs through the next data chunk'''
189

Philip ABBET's avatar
Philip ABBET committed
190 191 192
        if not self.ready:
            raise self.exc("database view not yet setup")
        return loader.run(self.obj, 'next', self.exc, *args, **kwargs)
193 194


Philip ABBET's avatar
Philip ABBET committed
195 196 197
    def __getattr__(self, key):
        '''Returns an attribute of the view - only called at last resort'''
        return getattr(self.obj, key)
198 199 200 201



class Database(object):
Philip ABBET's avatar
Philip ABBET committed
202
    """Databases define the start point of the dataflow in an experiment.
203 204


Philip ABBET's avatar
Philip ABBET committed
205
    Parameters:
206

Philip ABBET's avatar
Philip ABBET committed
207
      prefix (str): Establishes the prefix of your installation.
208

Philip ABBET's avatar
Philip ABBET committed
209
      name (str): The fully qualified database name (e.g. ``db/1``)
210

Philip ABBET's avatar
Philip ABBET committed
211 212 213 214 215 216
      dataformat_cache (dict, optional): A dictionary mapping dataformat names
        to loaded dataformats. This parameter is optional and, if passed, may
        greatly speed-up database loading times as dataformats that are already
        loaded may be re-used. If you use this parameter, you must guarantee
        that the cache is refreshed as appropriate in case the underlying
        dataformats change.
217 218


Philip ABBET's avatar
Philip ABBET committed
219
    Attributes:
220

Philip ABBET's avatar
Philip ABBET committed
221
      name (str): The full, valid name of this database
222

Philip ABBET's avatar
Philip ABBET committed
223 224
      data (dict): The original data for this database, as loaded by our JSON
        decoder.
225

Philip ABBET's avatar
Philip ABBET committed
226
    """
227

Philip ABBET's avatar
Philip ABBET committed
228
    def __init__(self, prefix, name, dataformat_cache=None):
229

Philip ABBET's avatar
Philip ABBET committed
230 231 232 233
        self._name = None
        self.prefix = prefix
        self.dataformats = {} # preloaded dataformats
        self.storage = None
234

Philip ABBET's avatar
Philip ABBET committed
235 236
        self.errors = []
        self.data = None
237

Philip ABBET's avatar
Philip ABBET committed
238 239
        # if the user has not provided a cache, still use one for performance
        dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
240

Philip ABBET's avatar
Philip ABBET committed
241
        self._load(name, dataformat_cache)
242 243


Philip ABBET's avatar
Philip ABBET committed
244 245
    def _load(self, data, dataformat_cache):
        """Loads the database"""
246

Philip ABBET's avatar
Philip ABBET committed
247
        self._name = data
Philip ABBET's avatar
Philip ABBET committed
248

Philip ABBET's avatar
Philip ABBET committed
249 250 251 252 253
        self.storage = Storage(self.prefix, self._name)
        json_path = self.storage.json.path
        if not self.storage.json.exists():
            self.errors.append('Database declaration file not found: %s' % json_path)
            return
Philip ABBET's avatar
Philip ABBET committed
254

Philip ABBET's avatar
Philip ABBET committed
255 256
        with open(json_path, 'rb') as f:
            self.data = simplejson.load(f)
Philip ABBET's avatar
Philip ABBET committed
257

Philip ABBET's avatar
Philip ABBET committed
258 259
        for protocol in self.data['protocols']:
            for _set in protocol['sets']:
Philip ABBET's avatar
Philip ABBET committed
260

Philip ABBET's avatar
Philip ABBET committed
261
                for key, value in _set['outputs'].items():
Philip ABBET's avatar
Philip ABBET committed
262

Philip ABBET's avatar
Philip ABBET committed
263 264
                    if value in self.dataformats:
                        continue
Philip ABBET's avatar
Philip ABBET committed
265

Philip ABBET's avatar
Philip ABBET committed
266 267 268 269 270
                    if value in dataformat_cache:
                        dataformat = dataformat_cache[value]
                    else:
                        dataformat = DataFormat(self.prefix, value)
                        dataformat_cache[value] = dataformat
Philip ABBET's avatar
Philip ABBET committed
271

Philip ABBET's avatar
Philip ABBET committed
272
                    self.dataformats[value] = dataformat
273 274


Philip ABBET's avatar
Philip ABBET committed
275 276 277 278 279
    @property
    def name(self):
        """Returns the name of this object
        """
        return self._name or '__unnamed_database__'
280 281


Philip ABBET's avatar
Philip ABBET committed
282 283 284 285
    @property
    def schema_version(self):
        """Returns the schema version"""
        return self.data.get('schema_version', 1)
286 287


Philip ABBET's avatar
Philip ABBET committed
288 289 290
    @property
    def valid(self):
        return not bool(self.errors)
Philip ABBET's avatar
Philip ABBET committed
291 292


Philip ABBET's avatar
Philip ABBET committed
293 294 295
    @property
    def protocols(self):
        """The declaration of all the protocols of the database"""
296

Philip ABBET's avatar
Philip ABBET committed
297 298
        data = self.data['protocols']
        return dict(zip([k['name'] for k in data], data))
299 300


Philip ABBET's avatar
Philip ABBET committed
301 302
    def protocol(self, name):
        """The declaration of a specific protocol in the database"""
303

Philip ABBET's avatar
Philip ABBET committed
304
        return self.protocols[name]
305 306


Philip ABBET's avatar
Philip ABBET committed
307 308 309
    @property
    def protocol_names(self):
        """Names of protocols declared for this database"""
310

Philip ABBET's avatar
Philip ABBET committed
311 312
        data = self.data['protocols']
        return [k['name'] for k in data]
313 314


Philip ABBET's avatar
Philip ABBET committed
315 316
    def sets(self, protocol):
        """The declaration of a specific set in the database protocol"""
317

Philip ABBET's avatar
Philip ABBET committed
318 319
        data = self.protocol(protocol)['sets']
        return dict(zip([k['name'] for k in data], data))
320 321


Philip ABBET's avatar
Philip ABBET committed
322 323
    def set(self, protocol, name):
        """The declaration of all the protocols of the database"""
324

Philip ABBET's avatar
Philip ABBET committed
325
        return self.sets(protocol)[name]
326 327


Philip ABBET's avatar
Philip ABBET committed
328 329
    def set_names(self, protocol):
        """The names of sets in a given protocol for this database"""
330

Philip ABBET's avatar
Philip ABBET committed
331 332
        data = self.protocol(protocol)['sets']
        return [k['name'] for k in data]
333 334


Philip ABBET's avatar
Philip ABBET committed
335 336
    def view(self, protocol, name, exc=None):
        """Returns the database view, given the protocol and the set name
337

Philip ABBET's avatar
Philip ABBET committed
338
        Parameters:
339

Philip ABBET's avatar
Philip ABBET committed
340
          protocol (str): The name of the protocol where to retrieve the view from
341

Philip ABBET's avatar
Philip ABBET committed
342 343
          name (str): The name of the set in the protocol where to retrieve the
            view from
344

Philip ABBET's avatar
Philip ABBET committed
345 346
          exc (class): If passed, must be a valid exception class that will be
            used to report errors in the read-out of this database's view.
347

Philip ABBET's avatar
Philip ABBET committed
348
        Returns:
349

Philip ABBET's avatar
Philip ABBET committed
350 351
          The database view, which will be constructed, but not setup. You
          **must** set it up before using methods ``done`` or ``next``.
352

Philip ABBET's avatar
Philip ABBET committed
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
        """

        if not self._name:
            exc = exc or RuntimeError
            raise exc("database has no name")

        if not self.valid:
            message = "cannot load view for set `%s' of protocol `%s' " \
                    "from invalid database (%s)" % (protocol, name, self.name)
            if exc: raise exc(message)
            raise RuntimeError(message)

        # loads the module only once through the lifetime of the database object
        try:
            if not hasattr(self, '_module'):
                self._module = loader.load_module(self.name.replace(os.sep, '_'),
                          self.storage.code.path, {})
        except Exception as e:
            if exc is not None:
                type, value, traceback = sys.exc_info()
                six.reraise(exc, exc(value), traceback)
            else:
                raise #just re-raise the user exception
376

Philip ABBET's avatar
Philip ABBET committed
377 378
        return View(self._module, self.set(protocol, name), self.prefix,
                self.data['root_folder'], exc)