Commit 1819d452 authored by Philip ABBET's avatar Philip ABBET
Browse files

Refactoring: No remote input / output anymore

parent 29e6ab5e
......@@ -680,7 +680,7 @@ class RemoteDataSource(DataSource):
answer = self.socket.recv()
if answer == 'err':
self.read_duration += time.time() - _start
self.read_duration += time.time() - t1
kind = self.socket.recv()
message = self.socket.recv()
raise RemoteException(kind, message)
......@@ -710,36 +710,6 @@ class RemoteDataSource(DataSource):
#----------------------------------------------------------
class LegacyDataSource(object):
"""Interface of all the Data Sources
Data Sources are used to provides data to the inputs of an algorithm.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def next(self, load=True):
"""Retrieves the next block of data
Returns:
A tuple (*data*, *start_index*, *end_index*)
"""
pass
@abc.abstractmethod
def hasMoreData(self):
"""Indicates if there is more data to process on some of the inputs"""
pass
#----------------------------------------------------------
class DataSink(object):
"""Interface of all the Data Sinks
......@@ -780,7 +750,7 @@ class StdoutDataSink(DataSink):
"""Data Sink that prints informations about the written data on stdout
Note: The written data is lost! Use ii for debugging purposes
Note: The written data is lost! Use this class for debugging purposes
"""
def __init__(self):
......@@ -824,93 +794,6 @@ class StdoutDataSink(DataSink):
#----------------------------------------------------------
class CachedLegacyDataSource(LegacyDataSource):
"""Data Source that load data from the Cache"""
def __init__(self):
self.cached_file = None
self.dataformat = None
self.next_data_index = 0
def setup(self, filename, prefix, force_start_index=None, force_end_index=None,
unpack=True):
"""Configures the data source
Parameters:
filename (str): Name of the file to read the data from
prefix (str, path): Path to the prefix where the dataformats are stored.
force_start_index (int): The starting index (if not set or set to
``None``, the default, read data from the begin of file)
force_end_index (int): The end index (if not set or set to ``None``, the
default, reads the data until the end)
unpack (bool): Indicates if the data must be unpacked or not
Returns:
``True``, if successful, or ``False`` otherwise.
"""
self.cached_file = CachedDataSource()
if self.cached_file.setup(filename, prefix, start_index=force_start_index,
end_index=force_end_index, unpack=unpack):
self.dataformat = self.cached_file.dataformat
return True
return False
def close(self):
"""Closes the data source"""
if self.cached_file is not None:
self.cached_file.close()
self.cached_file = None
def __del__(self):
"""Makes sure the files are close when the object is deleted"""
self.close()
def next(self):
"""Retrieve the next block of data
Returns:
A tuple (data, start_index, end_index)
"""
if self.next_data_index >= len(self.cached_file):
return (None, None, None)
result = self.cached_file[self.next_data_index]
self.next_data_index += 1
return result
def hasMoreData(self):
"""Indicates if there is more data to process on some of the inputs"""
return (self.next_data_index < len(self.cached_file))
def statistics(self):
"""Return the statistics about the number of bytes read from the cache"""
return self.cached_file.statistics()
#----------------------------------------------------------
class CachedDataSink(DataSink):
"""Data Sink that save data in the Cache
......@@ -1093,92 +976,6 @@ class CachedDataSink(DataSink):
#----------------------------------------------------------
class MemoryLegacyDataSource(LegacyDataSource):
"""Interface of all the Data Sources
Data Sources are used to provides data to the inputs of an algorithm.
"""
def __init__(self, done_callback, next_callback=None, index=None):
self.data = []
self._done_callback = done_callback
self._next_callback = next_callback
self._last_data_index = -1
def add(self, data, start_data_index, end_data_index):
self.data.append((data, start_data_index, end_data_index))
self._last_data_index = end_data_index
def next(self):
"""Retrieves the next block of data
:return: A tuple (*data*, *start_index*, *end_index*)
"""
if (len(self.data) == 0) and (self._next_callback is not None):
if not(self._done_callback(self._last_data_index)):
self._next_callback()
if len(self.data) == 0:
return (None, None, None)
return self.data.pop(0)
def hasMoreData(self):
if len(self.data) != 0:
return True
return not self._done_callback(self._last_data_index)
def statistics(self):
"""Return the statistics about the number of bytes read from the cache"""
return (0, 0)
#----------------------------------------------------------
class MemoryDataSink(DataSink):
"""Data Sink that directly transmit data to associated MemoryLegacyDataSource
objects.
"""
def __init__(self):
self.data_sources = None
def setup(self, data_sources):
"""Configure the data sink
:param list data_sources: The MemoryLegacyDataSource objects to use
"""
self.data_sources = data_sources
def write(self, data, start_data_index, end_data_index):
"""Write a block of data
Parameters:
data (beat.core.baseformat.baseformat) The block of data to write
start_data_index (int): Start index of the written data
end_data_index (int): End index of the written data
"""
for data_source in self.data_sources:
data_source.add(data, start_data_index, end_data_index)
def isConnected(self):
return len(self.data_sources) > 0
#----------------------------------------------------------
def load_data_index(cache_prefix, hash_path):
"""Loads a cached-data index if it exists. Returns empty otherwise.
......
......@@ -100,7 +100,7 @@ class DataView(object):
input_data_indices.append( (current_start, self.data_index_end) )
self.infos[input_name] = dict(
cached_file = infos['cached_file'],
data_source = infos['data_source'],
data_indices = input_data_indices,
data = None,
start_index = -1,
......@@ -132,7 +132,7 @@ class DataView(object):
for input_name, infos in self.infos.items():
if (indices[0] < infos['start_index']) or (infos['end_index'] < indices[0]):
(infos['data'], infos['start_index'], infos['end_index']) = \
infos['cached_file'].getAtDataIndex(indices[0])
infos['data_source'].getAtDataIndex(indices[0])
result[input_name] = infos['data']
......@@ -189,10 +189,10 @@ class DataLoader(object):
self.data_index_end = -1 # Bigger index across all inputs
def add(self, input_name, cached_file):
def add(self, input_name, data_source):
self.infos[input_name] = dict(
cached_file = cached_file,
data_indices = cached_file.data_indices(),
data_source = data_source,
data_indices = data_source.data_indices(),
data = None,
start_index = -1,
end_index = -1,
......@@ -247,7 +247,7 @@ class DataLoader(object):
for input_name, infos in self.infos.items():
if (indices[0] < infos['start_index']) or (infos['end_index'] < indices[0]):
(infos['data'], infos['start_index'], infos['end_index']) = \
infos['cached_file'].getAtDataIndex(indices[0])
infos['data_source'].getAtDataIndex(indices[0])
result[input_name] = infos['data']
......
......@@ -101,21 +101,15 @@ class Executor(object):
if self.algorithm.type == Algorithm.LEGACY:
# Loads algorithm inputs
if self.data['proxy_mode']:
cache_access = AccessMode.REMOTE
else:
cache_access = AccessMode.LOCAL
(self.input_list, self.data_loaders, _) = create_inputs_from_configuration(
self.data, self.algorithm, self.prefix, cache_root,
cache_access=cache_access, db_access=AccessMode.REMOTE,
cache_access=AccessMode.LOCAL, db_access=AccessMode.REMOTE,
socket=self.socket
)
# Loads algorithm outputs
(self.output_list, _) = create_outputs_from_configuration(
self.data, self.algorithm, self.prefix, cache_root, self.input_list,
cache_access=cache_access, socket=self.socket
self.data, self.algorithm, self.prefix, cache_root, self.input_list
)
else:
......@@ -126,8 +120,7 @@ class Executor(object):
# Loads algorithm outputs
(self.output_list, _) = create_outputs_from_configuration(
self.data, self.algorithm, self.prefix, cache_root, self.input_list,
cache_access=AccessMode.LOCAL
self.data, self.algorithm, self.prefix, cache_root, self.input_list
)
......
......@@ -32,8 +32,6 @@ import errno
import logging
logger = logging.getLogger(__name__)
from .data import MemoryLegacyDataSource
from .data import CachedLegacyDataSource
from .data import CachedDataSource
from .data import CachedDataSink
from .data import getAllFilenames
......@@ -41,21 +39,18 @@ from .data_loaders import DataLoaderList
from .data_loaders import DataLoader
from .inputs import InputList
from .inputs import Input
from .inputs import RemoteInput
from .inputs import InputGroup
from .outputs import SynchronizationListener
from .outputs import OutputList
from .outputs import Output
from .outputs import RemoteOutput
from .algorithm import Algorithm
#----------------------------------------------------------
def convert_experiment_configuration_to_container(config, proxy_mode):
def convert_experiment_configuration_to_container(config):
data = {
'proxy_mode': proxy_mode,
'algorithm': config['algorithm'],
'parameters': config['parameters'],
'channel': config['channel'],
......@@ -103,7 +98,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
def _create_local_input(details):
data_source = CachedLegacyDataSource()
data_source = CachedDataSource()
data_sources.append(data_source)
filename = os.path.join(cache_root, details['path'] + '.data')
......@@ -112,8 +107,8 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
status = data_source.setup(
filename=filename,
prefix=prefix,
force_start_index=start_index,
force_end_index=end_index,
start_index=start_index,
end_index=end_index,
unpack=True,
)
else:
......@@ -134,9 +129,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
return input
def _create_data_loader(details):
filename = os.path.join(cache_root, details['path'] + '.data')
def _get_data_loader_for(details):
data_loader = data_loader_list[details['channel']]
if data_loader is None:
data_loader = DataLoader(details['channel'])
......@@ -144,8 +137,16 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
logger.debug("Data loader created: group='%s'" % details['channel'])
cached_file = CachedDataSource()
result = cached_file.setup(
return data_loader
def _create_data_source(details):
data_loader = _get_data_loader_for(details)
filename = os.path.join(cache_root, details['path'] + '.data')
data_source = CachedDataSource()
result = data_source.setup(
filename=filename,
prefix=prefix,
start_index=start_index,
......@@ -156,7 +157,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if not result:
raise IOError("cannot load cache file `%s'" % details['path'])
data_loader.add(name, cached_file)
data_loader.add(name, data_source)
logger.debug("Input '%s' added to data loader: group='%s', dataformat='%s', filename='%s'" % \
(name, details['channel'], algorithm.input_map[name], filename))
......@@ -182,7 +183,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if not views.has_key(channel):
view = db.view(details['protocol'], details['set'])
view.prepare_outputs()
print details
view.setup()
views[channel] = view
......@@ -192,26 +193,34 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
else:
view = views[channel]
# Creation of the input
data_source = MemoryLegacyDataSource(view.done, next_callback=view.next)
output = view.outputs[details['output']]
output.data_sink.data_sources.append(data_source)
input = Input(name, algorithm.input_map[name], data_source)
data_loader = _get_data_loader_for(details)
data_loader.add(name, view.data_sources[details['output']])
logger.debug("Input '%s' created: group='%s', dataformat='%s', database-output='%s/%s/%s:%s'" % \
logger.debug("DatabaseOutputDataSource '%s' created: group='%s', dataformat='%s', database-output='%s/%s/%s:%s'" % \
(name, channel, algorithm.input_map[name], details['database'],
details['protocol'], details['set'], details['output']))
elif db_access == AccessMode.REMOTE:
if socket is None:
raise IOError("No socket provided for remote inputs")
raise IOError("No socket provided for remote data sources")
data_loader = _get_data_loader_for(details)
data_source = RemoteDataSource()
result = data_source.setup(
socket=socket,
input_name=name,
dataformat_name=algorithm.input_map[name],
prefix=prefix,
unpack=True
)
if not result:
raise IOError("cannot setup remote data source '%s'" % name)
input = RemoteInput(name, algorithm.dataformats[algorithm.input_map[name]],
socket, unpack=unpack)
data_loader.add(name, data_source)
logger.debug("RemoteInput '%s' created: group='%s', dataformat='%s', connected to a database" % \
logger.debug("RemoteDataSource '%s' created: group='%s', dataformat='%s', connected to a database" % \
(name, details['channel'], algorithm.input_map[name]))
......@@ -224,21 +233,10 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if details['channel'] == config['channel']: # synchronized
input = _create_local_input(details)
else:
_create_data_loader(details)
_create_data_source(details)
elif algorithm.type == Algorithm.AUTONOMOUS:
_create_data_loader(details)
elif cache_access == AccessMode.REMOTE:
if socket is None:
raise IOError("No socket provided for remote inputs")
input = RemoteInput(name, algorithm.dataformats[algorithm.input_map[name]],
socket, unpack=unpack)
logger.debug("RemoteInput '%s' created: group='%s', dataformat='%s'" % \
(name, details['channel'], algorithm.input_map[name]))
_create_data_source(details)
else:
continue
......@@ -267,8 +265,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
#----------------------------------------------------------
def create_outputs_from_configuration(config, algorithm, prefix, cache_root, input_list,
cache_access=AccessMode.NONE, socket=None):
def create_outputs_from_configuration(config, algorithm, prefix, cache_root, input_list):
data_sinks = []
output_list = OutputList()
......@@ -300,73 +297,56 @@ def create_outputs_from_configuration(config, algorithm, prefix, cache_root, inp
if input_group is not None:
synchronization_listener = input_group.synchronization_listener
if cache_access == AccessMode.LOCAL:
path = os.path.join(cache_root, details['path'] + '.data')
dirname = os.path.dirname(path)
# Make sure that the directory exists while taking care of race
# conditions. see: http://stackoverflow.com/questions/273192/check-if-a-directory-exists-and-create-it-if-necessary
try:
if (len(dirname) > 0):
os.makedirs(dirname)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
if start_index is None:
for k, v in config['inputs'].items():
if v['channel'] == config['channel']:
input_path = os.path.join(cache_root, v['path'] + '.data')
break
(data_filenames, indices_filenames, data_checksum_filenames, indices_checksum_filenames) = \
getAllFilenames(input_path)
end_indices = [ int(x.split('.')[-2]) for x in indices_filenames ]
end_indices.sort()
start_index = 0
end_index = end_indices[-1]
data_sink = CachedDataSink()
data_sinks.append(data_sink)
status = data_sink.setup(
filename=path,
dataformat=dataformat,
start_index=start_index,
end_index=end_index,
encoding='binary'
)
if not status:
raise IOError("Cannot create cache sink '%s'" % details['path'])
output_list.add(Output(name, data_sink,
synchronization_listener=synchronization_listener,
force_start_index=start_index)
)
if 'result' not in config:
logger.debug("Output '%s' created: group='%s', dataformat='%s', filename='%s'" % \
(name, details['channel'], dataformat_name, path))
else:
logger.debug("Output '%s' created: dataformat='%s', filename='%s'" % \
(name, dataformat_name, path))
elif cache_access == AccessMode.REMOTE:
if socket is None:
raise IOError("No socket provided for remote outputs")
output_list.add(RemoteOutput(name, dataformat, socket,
synchronization_listener=synchronization_listener,
force_start_index=start_index or 0)
)
logger.debug("RemoteOutput '%s' created: group='%s', dataformat='%s'" % \
(name, details['channel'], dataformat_name))
path = os.path.join(cache_root, details['path'] + '.data')
dirname = os.path.dirname(path)
# Make sure that the directory exists while taking care of race
# conditions. see: http://stackoverflow.com/questions/273192/check-if-a-directory-exists-and-create-it-if-necessary
try:
if (len(dirname) > 0):
os.makedirs(dirname)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
if start_index is None:
for k, v in config['inputs'].items():
if v['channel'] == config['channel']:
input_path = os.path.join(cache_root, v['path'] + '.data')
break
(data_filenames, indices_filenames, data_checksum_filenames, indices_checksum_filenames) = \
getAllFilenames(input_path)
end_indices = [ int(x.split('.')[-2]) for x in indices_filenames ]
end_indices.sort()
start_index = 0
end_index = end_indices[-1]
data_sink = CachedDataSink()
data_sinks.append(data_sink)
status = data_sink.setup(
filename=path,
dataformat=dataformat,
start_index=start_index,
end_index=end_index,
encoding='binary'
)
if not status:
raise IOError("Cannot create cache sink '%s'" % details['path'])
output_list.add(Output(name, data_sink,
synchronization_listener=synchronization_listener,
force_start_index=start_index)
)
if 'result' not in config:
logger.debug("Output '%s' created: group='%s', dataformat='%s', filename='%s'" % \
(name, details['channel'], dataformat_name, path))
else:
continue
logger.debug("Output '%s' created: dataformat='%s', filename='%s'" % \
(name, dataformat_name, path))
return (output_list, data_sinks)