Commit 917cd6ea authored by Philip ABBET's avatar Philip ABBET
Browse files

Update to beat.backend.python 1.5.x

parent 27797406
Pipeline #15615 failed with stage
in 1 minute and 10 seconds
......@@ -403,85 +403,3 @@ class Algorithm(BackendAlgorithm):
self.errors.append("language for used library `%s' (`%s') " \
"differs from current language for this algorithm (`%s')" % \
(library, self.libraries[library].language, self.language))
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
Parameters:
indent (int): The number of indentation spaces at every indentation level
Returns:
str: The JSON representation for this object
"""
return simplejson.dumps(self.data, indent=indent,
cls=utils.NumpyJSONEncoder)
def __str__(self):
return self.json_dumps()
def write(self, storage=None):
"""Writes contents to prefix location
Parameters:
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
"""
if self.data['language'] == 'unknown':
raise RuntimeError("algorithm has no programming language set")
if storage is None:
if not self._name:
raise RuntimeError("algorithm has no name")
storage = self.storage #overwrite
storage.save(str(self), self.code, self.description)
def export(self, prefix):
"""Recursively exports itself into another prefix
Dataformats and associated libraries are also copied.
Parameters:
prefix (str): A path to a prefix that must different then my own.
Returns:
None
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
"""
if not self._name:
raise RuntimeError("dataformat has no name")
if not self.valid:
raise RuntimeError("dataformat is not valid")
if os.path.samefile(prefix, self.prefix):
raise RuntimeError("Cannot export algorithm to the same prefix (%s == " \
"%s)" % (prefix, self.prefix))
for k in self.libraries.values(): k.export(prefix)
for k in self.dataformats.values(): k.export(prefix)
self.write(Storage(prefix, self.name, self.language))
......@@ -28,12 +28,12 @@
from beat.backend.python.data import mixDataIndices
from beat.backend.python.data import getAllFilenames
from beat.backend.python.data import CachedFileLoader
from beat.backend.python.data import DataSource
from beat.backend.python.data import DataSink
from beat.backend.python.data import CachedDataSource
from beat.backend.python.data import DatabaseOutputDataSource
from beat.backend.python.data import RemoteDataSource
from beat.backend.python.data import DataSink
from beat.backend.python.data import CachedDataSink
from beat.backend.python.data import MemoryDataSource
from beat.backend.python.data import MemoryDataSink
from beat.backend.python.data import StdoutDataSink
from beat.backend.python.data import load_data_index
from beat.backend.python.data import foundSplitRanges
......@@ -3,7 +3,7 @@
###############################################################################
# #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ #
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.core module of the BEAT platform. #
......@@ -25,30 +25,7 @@
# #
###############################################################################
class DoneCrashes:
def setup(self, *args, **kwargs): return True
def next(self): return True
def done(self, last_data_index):
a = b
return True
class NextCrashes:
def setup(self, *args, **kwargs): return True
def done(self, last_data_index): return False
def next(self):
a = b
return True
class SetupCrashes:
def done(self, last_data_index): return True
def next(self): return True
def setup(self, *args, **kwargs):
a = b
return True
class SetupFails:
def setup(self, *args, **kwargs): return False
def done(self, last_data_index): return True
def next(self): return True
from beat.backend.python.data_loaders import DataLoaderList
from beat.backend.python.data_loaders import DataLoader
from beat.backend.python.data_loaders import DataView
......@@ -191,174 +191,3 @@ class Database(BackendDatabase):
"subdirectory or points to a python module, what is " \
"unsupported by this version" % (_set['view'],)
)
@property
def name(self):
"""Returns the name of this object
"""
return self._name or '__unnamed_database__'
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
def hash_output(self, protocol, set, output):
"""Creates a unique hash the represents the output from the dataset
Parameters:
protocol (str): The name of the protocol containing the set and output
of interest
set (str): The name of the set in the protocol containing the output of
reference
output (str): The name of the output in the set.
Returns:
str: The hexadecimal digest of the hash representing the output.
Raises:
KeyError: If the protocol, set or output don't reference an existing
output for this database.
"""
# checks protocol, set and output names
set_data = self.set(protocol, set)
output_data = set_data['outputs'][output]
# dumps the hash
return hash.hashDatasetOutput(self.hash(), protocol, set, output)
@property
def description(self):
"""The short description for this object"""
return self.data.get('description', None)
@description.setter
def description(self, value):
"""Sets the short description for this object"""
self.data['description'] = value
@property
def documentation(self):
"""The full-length description for this object"""
if not self._name:
raise RuntimeError("database has no name")
if self.storage.doc.exists():
return self.storage.doc.load()
return None
@documentation.setter
def documentation(self, value):
"""Sets the full-length description for this object"""
if not self._name:
raise RuntimeError("database has no name")
if hasattr(value, 'read'):
self.storage.doc.save(value.read())
else:
self.storage.doc.save(value)
def hash(self):
"""Returns the hexadecimal hash for its declaration"""
if not self._name:
raise RuntimeError("database has no name")
return self.storage.hash()
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
Parameters:
indent (int): The number of indentation spaces at every indentation level
Returns:
str: The JSON representation for this object
"""
return simplejson.dumps(self.data, indent=indent,
cls=utils.NumpyJSONEncoder)
def __str__(self):
return self.json_dumps()
def write(self, storage=None):
"""Writes contents to prefix location
Parameters:
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
"""
if storage is None:
if not self._name:
raise RuntimeError("database has no name")
storage = self.storage #overwrite
storage.save(str(self), self.code, self.description)
def export(self, prefix):
"""Recursively exports itself into another prefix
Dataformats associated are also exported recursively
Parameters:
prefix (str): A path to a prefix that must different then my own.
Returns:
None
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
"""
if not self._name:
raise RuntimeError("database has no name")
if not self.valid:
raise RuntimeError("database is not valid")
if os.path.samefile(prefix, self.prefix):
raise RuntimeError("Cannot export database to the same prefix (%s == " \
"%s)" % (prefix, self.prefix))
for k in self.dataformats.values(): k.export(prefix)
self.write(Storage(prefix, self.name))
......@@ -223,80 +223,3 @@ class DataFormat(BackendDataFormat):
# in ``self.resolved``.
if self.errors:
self.errors = utils.uniq(self.errors)
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
Parameters:
indent (int): The number of indentation spaces at every indentation level
Returns:
str: The JSON representation for this object
"""
return simplejson.dumps(self.data, indent=indent,
cls=utils.NumpyJSONEncoder)
def __str__(self):
return self.json_dumps()
def write(self, storage=None):
"""Writes contents to prefix location
Parameters:
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
"""
if storage is None:
if not self._name:
raise RuntimeError("dataformat has no name")
storage = self.storage #overwrite
storage.save(str(self), self.description)
def export(self, prefix):
"""Recursively exports itself into another prefix
Other required dataformats are also copied.
Parameters:
prefix (str): A path to a prefix that must different then my own.
Returns:
None
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
"""
if not self._name:
raise RuntimeError("dataformat has no name")
if not self.valid:
raise RuntimeError("dataformat is not valid")
if os.path.samefile(prefix, self.prefix):
raise RuntimeError("Cannot dataformat object to the same prefix (%s " \
"== %s)" % (prefix, self.prefix))
for k in self.referenced.values(): k.export(prefix)
self.write(Storage(prefix, self.name))
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###############################################################################
# #
# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.core module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
'''Execution utilities'''
import os
import sys
import glob
import errno
import tempfile
import subprocess
import logging
logger = logging.getLogger(__name__)
import simplejson
from . import schema
from . import database
from . import algorithm
from . import inputs
from . import outputs
from . import data
from . import stats
from .message_handler import MessageHandler
class DBExecutor(object):
"""Executor specialised in database views
Parameters:
prefix (str): Establishes the prefix of your installation.
data (dict, str): The piece of data representing the block to be executed.
It must validate against the schema defined for execution blocks. If a
string is passed, it is supposed to be a fully qualified absolute path to
a JSON file containing the block execution information.
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up database loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying dataformats
change.
database_cache (dict, optional): A dictionary mapping database names to
loaded databases. This parameter is optional and, if passed, may
greatly speed-up database loading times as databases that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying databases
change.
Attributes:
errors (list): A list containing errors found while loading this execution
block.
data (dict): The original data for this executor, as loaded by our JSON
decoder.
databases (dict): A dictionary in which keys are strings with database
names and values are :py:class:`database.Database`, representing the
databases required for running this block. The dictionary may be empty
in case all inputs are taken from the file cache.
views (dict): A dictionary in which the keys are tuples pointing to the
``(<database-name>, <protocol>, <set>)`` and the value is a setup view
for that particular combination of details. The dictionary may be empty
in case all inputs are taken from the file cache.
input_list (beat.core.inputs.InputList): A list of inputs that will be
served to the algorithm.
data_sources (list): A list with all data-sources created by our execution
loader.
"""
def __init__(self, prefix, data, dataformat_cache=None, database_cache=None):
self.prefix = prefix
# some attributes
self.databases = {}
self.views = {}
self.input_list = None
self.data_sources = []
self.handler = None
self.errors = []
self.data = None
# temporary caches, if the user has not set them, for performance
database_cache = database_cache if database_cache is not None else {}
self.dataformat_cache = dataformat_cache if dataformat_cache is not None else {}
self._load(data, self.dataformat_cache, database_cache)
def _load(self, data, dataformat_cache, database_cache):
"""Loads the block execution information"""
# reset
self.data = None
self.errors = []
self.databases = {}
self.views = {}
self.input_list = None
self.data_sources = []
if not isinstance(data, dict): #user has passed a file name
if not os.path.exists(data):
self.errors.append('File not found: %s' % data)
return
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate('execution', data)
if self.errors: return #don't proceed with the rest of validation
# load databases
for name, details in self.data['inputs'].items():
if 'database' in details:
if details['database'] not in self.databases:
if details['database'] in database_cache: #reuse
db = database_cache[details['database']]
else: #load it
db = database.Database(self.prefix, details['database'],
dataformat_cache)
database_cache[db.name] = db
self.databases[details['database']] = db
if not db.valid:
self.errors += db.errors
continue
if not db.valid:
# do not add errors again
continue
# create and load the required views
key = (details['database'], details['protocol'], details['set'])
if key not in self.views:
view = self.databases[details['database']].view(details['protocol'],
details['set'])
if details['channel'] == self.data['channel']: #synchronized
start_index, end_index = self.data.get('range', (None, None))
else:
start_index, end_index = (None, None)
view.prepare_outputs()
self.views[key] = (view, start_index, end_index)
def __enter__(self):
"""Prepares inputs and outputs for the processing task
Raises:
IOError: in case something cannot be properly setup
"""
self._prepare_inputs()
# The setup() of a database view may call isConnected() on an input
# to set the index at the right location when parallelization is enabled.
# This is why setup() should be called after initialized the inputs.
for key, (view, start_index, end_index) in self.views.items():
if (start_index is None) and (end_index is None):
status = view.setup()
else:
status = view.setup(force_start_index=start_index,
force_end_index=end_index)
if not status:
raise RuntimeError("Could not setup database view `%s'" % key)
return self
def __exit__(self, exc_type, exc_value, traceback):
"""Closes all sinks and disconnects inputs and outputs
"""
self.input_list = None
self.data_sources = []
def _prepare_inputs(self):