Commit 24f787ef authored by Philip ABBET's avatar Philip ABBET

Merge branch 'datasets' into 'master'

Refactoring and support of dataset providing in a container

See merge request !8
parents f0c66547 3a9bd3ae
...@@ -28,6 +28,18 @@ ...@@ -28,6 +28,18 @@
This package contains the source code for a python-based backend for the BEAT This package contains the source code for a python-based backend for the BEAT
platform. platform.
It contains the minimum amount of code needed to run an algorithm or serve
data from a dataset. It is designed to be installed in a container.
The ``beat.core`` package extends the functionalities of this one (for
instance, it adds thorough validation of each user contribution, whereas
``beat.backend.python`` assumes that an invalid contribution will never
reach the container).
For this reason (and to keep ``beat.backend.python`` as small as possible),
all the unit tests are located in ``beat.core``.
Installation Installation
------------ ------------
...@@ -39,48 +51,11 @@ Really easy, with ``zc.buildout``:: ...@@ -39,48 +51,11 @@ Really easy, with ``zc.buildout``::
These 2 commands should download and install all non-installed dependencies and These 2 commands should download and install all non-installed dependencies and
get you a fully operational test and development environment. get you a fully operational test and development environment.
.. note::
If you are on the Idiap filesystem, you may use
``/idiap/project/beat/environments/staging/usr/bin/python`` to bootstrap this
package instead. It contains the same setup deployed at the final BEAT
machinery.
Documentation Documentation
------------- -------------
To build the documentation, just do:: To build the documentation, just do::
$ ./bin/sphinx-apidoc --separate -d 2 --output=doc/api beat/backend/python $ ./bin/sphinx-apidoc --separate -d 2 --output=doc/api beat
$ ./bin/sphinx-build doc sphinx $ ./bin/sphinx-build doc sphinx
Testing
-------
After installation, it is possible to run our suite of unit tests. To do so,
use ``nose``::
$ ./bin/nosetests -sv
If you want to skip slow tests (at least those pulling stuff from our servers)
or executing lengthy operations, just do::
$ ./bin/nosetests -sv -a '!slow'
To measure the test coverage, do the following::
$ ./bin/nosetests -sv --with-coverage --cover-package=beat.backend.python
To produce an HTML test coverage report, at the directory `./htmlcov`, do the
following::
$ ./bin/nosetests -sv --with-coverage --cover-package=beat.backend.python --cover-html --cover-html-dir=htmlcov
Our documentation is also interspersed with test units. You can run them using
sphinx::
$ ./bin/sphinx -b doctest doc sphinx
...@@ -38,6 +38,34 @@ import simplejson ...@@ -38,6 +38,34 @@ import simplejson
from . import dataformat from . import dataformat
from . import library from . import library
from . import loader from . import loader
from . import utils
class Storage(utils.CodeStorage):
"""Resolves paths for algorithms
Parameters:
prefix (str): Establishes the prefix of your installation.
name (str): The name of the algorithm object in the format
``<user>/<name>/<version>``.
"""
def __init__(self, prefix, name, language=None):
if name.count('/') != 2:
raise RuntimeError("invalid algorithm name: `%s'" % name)
self.username, self.name, self.version = name.split('/')
self.prefix = prefix
self.fullname = name
path = utils.hashed_or_simple(self.prefix, 'algorithms', name)
super(Storage, self).__init__(path, language)
class Runner(object): class Runner(object):
...@@ -160,6 +188,7 @@ class Runner(object): ...@@ -160,6 +188,7 @@ class Runner(object):
return getattr(self.obj, key) return getattr(self.obj, key)
class Algorithm(object): class Algorithm(object):
"""Algorithms represent runnable components within the platform. """Algorithms represent runnable components within the platform.
...@@ -222,6 +251,9 @@ class Algorithm(object): ...@@ -222,6 +251,9 @@ class Algorithm(object):
groups (dict): A list containing dictionaries with inputs and outputs groups (dict): A list containing dictionaries with inputs and outputs
belonging to the same synchronization group. belonging to the same synchronization group.
errors (list): A list containing errors found while loading this
algorithm.
data (dict): The original data for this algorithm, as loaded by our JSON data (dict): The original data for this algorithm, as loaded by our JSON
decoder. decoder.
...@@ -232,20 +264,34 @@ class Algorithm(object): ...@@ -232,20 +264,34 @@ class Algorithm(object):
def __init__(self, prefix, name, dataformat_cache=None, library_cache=None): def __init__(self, prefix, name, dataformat_cache=None, library_cache=None):
self._name = None
self.storage = None
self.prefix = prefix self.prefix = prefix
self.dataformats = {} self.dataformats = {}
self.libraries = {} self.libraries = {}
self.groups = [] self.groups = []
dataformat_cache = dataformat_cache if dataformat_cache is not None else {} dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
library_cache = library_cache if library_cache is not None else {} library_cache = library_cache if library_cache is not None else {}
self.name = name self._load(name, dataformat_cache, library_cache)
json_path = os.path.join(prefix, 'algorithms', name + '.json')
with open(json_path, 'rb') as f: self.data = simplejson.load(f)
self.code_path = os.path.join(prefix, 'algorithms', name + '.py')
def _load(self, data, dataformat_cache, library_cache):
"""Loads the algorithm"""
self._name = data
self.storage = Storage(self.prefix, data)
json_path = self.storage.json.path
if not self.storage.exists():
self.errors.append('Algorithm declaration file not found: %s' % json_path)
return
with open(json_path, 'rb') as f:
self.data = simplejson.load(f)
self.code_path = self.storage.code.path
self.groups = self.data['groups'] self.groups = self.data['groups']
...@@ -375,6 +421,22 @@ class Algorithm(object): ...@@ -375,6 +421,22 @@ class Algorithm(object):
library.Library(self.prefix, value, library_cache)) library.Library(self.prefix, value, library_cache))
@property
def name(self):
"""Returns the name of this object
"""
return self._name or '__unnamed_algorithm__'
@name.setter
def name(self, value):
if self.data['language'] == 'unknown':
raise RuntimeError("algorithm has no programming language set")
self._name = value
self.storage = Storage(self.prefix, value, self.data['language'])
@property @property
def schema_version(self): def schema_version(self):
...@@ -382,6 +444,20 @@ class Algorithm(object): ...@@ -382,6 +444,20 @@ class Algorithm(object):
return self.data.get('schema_version', 1) return self.data.get('schema_version', 1)
@property
def language(self):
"""Returns the current language set for the executable code"""
return self.data['language']
@language.setter
def language(self, value):
"""Sets the current executable code programming language"""
if self.storage:
self.storage.language = value
self.data['language'] = value
def clean_parameter(self, parameter, value): def clean_parameter(self, parameter, value):
"""Checks if a given value against a declared parameter """Checks if a given value against a declared parameter
...@@ -410,8 +486,8 @@ class Algorithm(object): ...@@ -410,8 +486,8 @@ class Algorithm(object):
ValueError: If the parameter cannot be safe cast into the algorithm's ValueError: If the parameter cannot be safe cast into the algorithm's
type. Alternatively, a ``ValueError`` may also be raised if a range or type. Alternatively, a ``ValueError`` may also be raised if a range or
choice was specified and the value does not obbey those settings choice was specified and the value does not obey those settings
estipulated for the parameter stipulated for the parameter
""" """
...@@ -437,35 +513,72 @@ class Algorithm(object): ...@@ -437,35 +513,72 @@ class Algorithm(object):
return retval return retval
@property
def valid(self):
"""A boolean that indicates if this algorithm is valid or not"""
return not bool(self.errors)
@property @property
def uses(self): def uses(self):
return self.data.get('uses') return self.data.get('uses')
@uses.setter
def uses(self, value):
self.data['uses'] = value
return value
@property @property
def results(self): def results(self):
return self.data.get('results') return self.data.get('results')
@results.setter
def results(self, value):
self.data['results'] = value
return value
@property @property
def parameters(self): def parameters(self):
return self.data.get('parameters') return self.data.get('parameters')
@parameters.setter
def parameters(self, value):
self.data['parameters'] = value
return value
@property @property
def splittable(self): def splittable(self):
return self.data.get('splittable', False) return self.data.get('splittable', False)
@splittable.setter
def splittable(self, value):
self.data['splittable'] = value
return value
def uses_dict(self): def uses_dict(self):
"""Returns the usage dictionary for all dependent modules""" """Returns the usage dictionary for all dependent modules"""
if self.data['language'] == 'unknown':
raise RuntimeError("algorithm has no programming language set")
if not self._name:
raise RuntimeError("algorithm has no name")
retval = {} retval = {}
if self.uses is not None: if self.uses is not None:
for name, value in self.uses.items(): for name, value in self.uses.items():
retval[name] = dict( retval[name] = dict(
path=self.libraries[value].code_path, path=self.libraries[value].storage.code.path,
uses=self.libraries[value].uses_dict(), uses=self.libraries[value].uses_dict(),
) )
...@@ -489,11 +602,24 @@ class Algorithm(object): ...@@ -489,11 +602,24 @@ class Algorithm(object):
before using the ``process`` method. before using the ``process`` method.
""" """
if not self._name:
exc = exc or RuntimeError
raise exc("algorithm has no name")
if self.data['language'] == 'unknown':
exc = exc or RuntimeError
raise exc("algorithm has no programming language set")
if not self.valid:
message = "cannot load code for invalid algorithm (%s)" % (self.name,)
exc = exc or RuntimeError
raise exc(message)
# loads the module only once through the lifetime of the algorithm object # loads the module only once through the lifetime of the algorithm object
try: try:
self.__module = getattr(self, 'module', self.__module = getattr(self, 'module',
loader.load_module(self.name.replace(os.sep, '_'), loader.load_module(self.name.replace(os.sep, '_'),
self.code_path, self.uses_dict())) self.storage.code.path, self.uses_dict()))
except Exception as e: except Exception as e:
if exc is not None: if exc is not None:
type, value, traceback = sys.exc_info() type, value, traceback = sys.exc_info()
...@@ -504,6 +630,52 @@ class Algorithm(object): ...@@ -504,6 +630,52 @@ class Algorithm(object):
return Runner(self.__module, klass, self, exc) return Runner(self.__module, klass, self, exc)
@property
def description(self):
"""The short description for this object"""
return self.data.get('description', None)
@description.setter
def description(self, value):
"""Sets the short description for this object"""
self.data['description'] = value
@property
def documentation(self):
"""The full-length description for this object"""
if not self._name:
raise RuntimeError("algorithm has no name")
if self.storage.doc.exists():
return self.storage.doc.load()
return None
@documentation.setter
def documentation(self, value):
"""Sets the full-length description for this object"""
if not self._name:
raise RuntimeError("algorithm has no name")
if hasattr(value, 'read'):
self.storage.doc.save(value.read())
else:
self.storage.doc.save(value)
def hash(self):
"""Returns the hexadecimal hash for the current algorithm"""
if not self._name:
raise RuntimeError("algorithm has no name")
return self.storage.hash()
def result_dataformat(self): def result_dataformat(self):
"""Generates, on-the-fly, the dataformat for the result readout""" """Generates, on-the-fly, the dataformat for the result readout"""
......
This diff is collapsed.
This diff is collapsed.
...@@ -36,11 +36,43 @@ import six ...@@ -36,11 +36,43 @@ import six
import numpy import numpy
import simplejson import simplejson
from . import utils
from .baseformat import baseformat from .baseformat import baseformat
class Storage(utils.Storage):
"""Resolves paths for dataformats
Parameters:
prefix (str): Establishes the prefix of your installation.
name (str): The name of the dataformat object in the format
``<user>/<name>/<version>``.
"""
def __init__(self, prefix, name):
if name.count('/') != 2:
raise RuntimeError("invalid dataformat name: `%s'" % name)
self.username, self.name, self.version = name.split('/')
self.fullname = name
path = utils.hashed_or_simple(prefix, 'dataformats', name)
super(Storage, self).__init__(path)
def hash(self):
"""The 64-character hash of the database declaration JSON"""
return super(Storage, self).hash('#description')
class DataFormat(object): class DataFormat(object):
"""Data formats define the chunks of data that circulate at data formats. """Data formats define the chunks of data that circulate between blocks.
Parameters: Parameters:
...@@ -67,6 +99,17 @@ class DataFormat(object): ...@@ -67,6 +99,17 @@ class DataFormat(object):
name (str): The full, valid name of this dataformat name (str): The full, valid name of this dataformat
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this dataformat
errors (list of str): A list containing errors found while loading this
dataformat.
data (dict): The original data for this dataformat, as loaded by our JSON data (dict): The original data for this dataformat, as loaded by our JSON
decoder. decoder.
...@@ -83,23 +126,44 @@ class DataFormat(object): ...@@ -83,23 +126,44 @@ class DataFormat(object):
def __init__(self, prefix, data, parent=None, dataformat_cache=None): def __init__(self, prefix, data, parent=None, dataformat_cache=None):
self._name = None
self.storage = None
self.resolved = None self.resolved = None
self.prefix = prefix self.prefix = prefix
self.errors = []
self.data = None
self.resolved = None
self.referenced = {} self.referenced = {}
self.parent = parent self.parent = parent
# if the user has not provided a cache, still use one for performance # if the user has not provided a cache, still use one for performance
dataformat_cache = dataformat_cache if dataformat_cache is not None else {} dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
try:
self._load(data, dataformat_cache)
finally:
if self._name is not None: #registers it into the cache, even if failed
dataformat_cache[self._name] = self
def _load(self, data, dataformat_cache):
"""Loads the dataformat"""
if isinstance(data, dict): if isinstance(data, dict):
self.name = 'analysis:result' self._name = 'analysis:result'
self.data = data self.data = data
else: else:
self.name = data self._name = data
json_path = os.path.join(prefix, 'dataformats', data + '.json') self.storage = Storage(self.prefix, data)
with open(json_path, 'rb') as f: self.data = simplejson.load(f) json_path = self.storage.json.path
if not self.storage.exists():
self.errors.append('Dataformat declaration file not found: %s' % json_path)
return
dataformat_cache[self.name] = self #registers itself into the cache with open(json_path, 'rb') as f:
self.data = simplejson.load(f)
dataformat_cache[self._name] = self #registers itself into the cache
self.resolved = copy.deepcopy(self.data) self.resolved = copy.deepcopy(self.data)
...@@ -152,7 +216,7 @@ class DataFormat(object): ...@@ -152,7 +216,7 @@ class DataFormat(object):
if '#extends' in self.resolved: if '#extends' in self.resolved:
ext = self.data['#extends'] ext = self.data['#extends']
self.referenced[ext] = maybe_load_format(self.name, ext, dataformat_cache) self.referenced[ext] = maybe_load_format(self._name, ext, dataformat_cache)
basetype = self.resolved['#extends'] basetype = self.resolved['#extends']
tmp = self.resolved tmp = self.resolved
self.resolved = basetype.resolved self.resolved = basetype.resolved
...@@ -160,6 +224,23 @@ class DataFormat(object): ...@@ -160,6 +224,23 @@ class DataFormat(object):
del self.resolved['#extends'] #avoids infinite recursion del self.resolved['#extends'] #avoids infinite recursion
@property
def name(self):
"""Returns the name of this object, either from the filename or composed
from the hierarchy it belongs.
"""
if self.parent and self._name is None:
return self.parent[0].name + '.' + self.parent[1] + '_type'
else:
return self._name or '__unnamed_dataformat__'
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
@property @property
def schema_version(self): def schema_version(self):
"""Returns the schema version""" """Returns the schema version"""
...@@ -245,6 +326,57 @@ class DataFormat(object): ...@@ -245,6 +326,57 @@ class DataFormat(object):
) )
@property
def valid(self):
return not bool(self.errors)
@property
def description(self):
"""The short description for this object"""
return self.data.get('#description', None)
@description.setter
def description(self, value):
"""Sets the short description for this object"""
self.data['#description'] = value
@property
def documentation(self):
"""The full-length description for this object"""
if not self._name:
raise RuntimeError("dataformat has no name")
if self.storage.doc.exists():
return self.storage.doc.load()
return None
@documentation.setter
def documentation(self, value):
"""Sets the full-length description for this object"""
if not self._name:
raise RuntimeError("dataformat has no name")
if hasattr(value, 'read'):
self.storage.doc.save(value.read())
else:
self.storage.doc.save(value)
def hash(self):
"""Returns the hexadecimal hash for its declaration"""
if not self._name:
raise RuntimeError("dataformat has no name")
return self.storage.hash()
def validate(self, data): def validate(self, data):
"""Validates a piece of data provided by the user """Validates a piece of data provided by the user
...@@ -292,7 +424,9 @@ class DataFormat(object): ...@@ -292,7 +424,9 @@ class DataFormat(object):
""" """
if other.extends: if other.extends:
if self.name == other.extends: return True if self.name == other.extends:
else: return self.isparent(other.referenced[other.extends]) return True
else:
return self.isparent(other.referenced[other.extends])
return False return False
This diff is collapsed.
...@@ -102,11 +102,11 @@ class Executor(object): ...@@ -102,11 +102,11 @@ class Executor(object):
for name, channel in self.data['inputs'].items(): for name, channel in self.data['inputs'].items():
group = self.input_list.group(channel) group = self.input_list.group(channel)