Commit 4d28ad16 authored by Philip ABBET's avatar Philip ABBET
Browse files

Force 4-spaces indentation

parent 37e667d7
This diff is collapsed.
......@@ -50,315 +50,315 @@ from beat.backend.python.database import Database as BackendDatabase
class Database(BackendDatabase):
"""Databases define the start point of the dataflow in an experiment.
"""Databases define the start point of the dataflow in an experiment.
Parameters:
Parameters:
prefix (str): Establishes the prefix of your installation.
prefix (str): Establishes the prefix of your installation.
data (dict, str): The piece of data representing the database. It must
validate against the schema defined for databases. If a string is passed,
it is supposed to be a valid path to an database in the designated prefix
area.
data (dict, str): The piece of data representing the database. It must
validate against the schema defined for databases. If a string is passed,
it is supposed to be a valid path to an database in the designated prefix
area.
dataformat_cache (dict, optional): A dictionary mapping dataformat names
to loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up database loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee
that the cache is refreshed as appropriate in case the underlying
dataformats change.
dataformat_cache (dict, optional): A dictionary mapping dataformat names
to loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up database loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee
that the cache is refreshed as appropriate in case the underlying
dataformats change.
Attributes:
Attributes:
name (str): The full, valid name of this database
name (str): The full, valid name of this database
description (str): The short description string, loaded from the JSON
file if one was set.
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this database
storage (object): A simple object that provides information about file
paths for this database
errors (list): A list containing errors found while loading this
database.
errors (list): A list containing errors found while loading this
database.
data (dict): The original data for this database, as loaded by our JSON
decoder.
data (dict): The original data for this database, as loaded by our JSON
decoder.
"""
"""
def __init__(self, prefix, data, dataformat_cache=None):
super(Database, self).__init__(prefix, data, dataformat_cache)
def __init__(self, prefix, data, dataformat_cache=None):
super(Database, self).__init__(prefix, data, dataformat_cache)
def _load(self, data, dataformat_cache):
"""Loads the database"""
def _load(self, data, dataformat_cache):
"""Loads the database"""
self._name = None
self.storage = None
self.dataformats = {} # preloaded dataformats
code = None
self._name = None
self.storage = None
self.dataformats = {} # preloaded dataformats
code = None
if isinstance(data, (tuple, list)): #user has passed individual info
if isinstance(data, (tuple, list)): #user has passed individual info
data, code = data #break down into two components
data, code = data #break down into two components
if isinstance(data, six.string_types): #user has passed a file pointer
if isinstance(data, six.string_types): #user has passed a file pointer
self._name = data
self.storage = Storage(self.prefix, self._name)
data = self.storage.json.path
if not self.storage.json.exists():
self.errors.append('Database declaration file not found: %s' % data)
return
self._name = data
self.storage = Storage(self.prefix, self._name)
data = self.storage.json.path
if not self.storage.json.exists():
self.errors.append('Database declaration file not found: %s' % data)
return
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate('database', data)
if self.errors: return #don't proceed with the rest of validation
# this runs basic validation, including JSON loading if required
self.data, self.errors = schema.validate('database', data)
if self.errors: return #don't proceed with the rest of validation
if self.storage is not None: #loading from the disk, check code
if not self.storage.code.exists():
self.errors.append('Database view code not found: %s' % \
self.storage.code.path)
return
else:
code = self.storage.code.load()
if self.storage is not None: #loading from the disk, check code
if not self.storage.code.exists():
self.errors.append('Database view code not found: %s' % \
self.storage.code.path)
return
else:
code = self.storage.code.load()
# At this point, `code' can be a string (or a binary blob) or ``None``
if code is None: # loads the default code for an algorithm
self.code = prototypes.binary_load('view.py')
# At this point, `code' can be a string (or a binary blob) or ``None``
if code is None: # loads the default code for an algorithm
self.code = prototypes.binary_load('view.py')
else: # just assign it - notice that in this case, no language is set
self.code = code
else: # just assign it - notice that in this case, no language is set
self.code = code
if self.errors: return #don't proceed with the rest of validation
if self.errors: return #don't proceed with the rest of validation
self._validate_semantics(dataformat_cache)
self._validate_semantics(dataformat_cache)
def _validate_semantics(self, dataformat_cache):
"""Validates all sematical aspects of the database"""
def _validate_semantics(self, dataformat_cache):
"""Validates all sematical aspects of the database"""
# all protocol names must be unique
protocol_names = [k['name'] for k in self.data['protocols']]
if len(protocol_names) != len(set(protocol_names)):
self.errors.append(
"found different protocols with the same name: %s" % \
(protocol_names,)
)
# all protocol names must be unique
protocol_names = [k['name'] for k in self.data['protocols']]
if len(protocol_names) != len(set(protocol_names)):
self.errors.append(
"found different protocols with the same name: %s" % \
(protocol_names,)
)
# all set names within a protocol must be unique
for protocol in self.data['protocols']:
set_names = [k['name'] for k in protocol['sets']]
if len(set_names) != len(set(set_names)):
self.errors.append(
"found different sets with the same name at protocol " \
"`%s': %s" % (protocol['name'], set_names),
)
# all set names within a protocol must be unique
for protocol in self.data['protocols']:
set_names = [k['name'] for k in protocol['sets']]
if len(set_names) != len(set(set_names)):
self.errors.append(
"found different sets with the same name at protocol " \
"`%s': %s" % (protocol['name'], set_names),
)
# all outputs must have valid data types
for _set in protocol['sets']:
# all outputs must have valid data types
for _set in protocol['sets']:
for key, value in _set['outputs'].items():
for key, value in _set['outputs'].items():
if value in self.dataformats: continue
if value in self.dataformats: continue
if value in dataformat_cache: #re-use
dataformat = dataformat_cache[value]
else:
dataformat = DataFormat(self.prefix, value)
dataformat_cache[value] = dataformat
if value in dataformat_cache: #re-use
dataformat = dataformat_cache[value]
else:
dataformat = DataFormat(self.prefix, value)
dataformat_cache[value] = dataformat
self.dataformats[value] = dataformat
self.dataformats[value] = dataformat
if dataformat.errors:
self.errors.append("found error validating data format `%s' " \
"for output `%s' on set `%s' of protocol `%s': %s" % \
(value, key, _set['name'], protocol['name'],
str(dataformat.errors))
)
if dataformat.errors:
self.errors.append("found error validating data format `%s' " \
"for output `%s' on set `%s' of protocol `%s': %s" % \
(value, key, _set['name'], protocol['name'],
str(dataformat.errors))
)
# all view names must be relative to the database root path
if _set['view'].find('.') != -1 or _set['view'].find(os.sep) != -1:
self.errors.append("dataset views are required to sit inside the " \
"database root folder, but `%s' is either in a " \
"subdirectory or points to a python module, what is " \
"unsupported by this version" % (_set['view'],)
)
# all view names must be relative to the database root path
if _set['view'].find('.') != -1 or _set['view'].find(os.sep) != -1:
self.errors.append("dataset views are required to sit inside the " \
"database root folder, but `%s' is either in a " \
"subdirectory or points to a python module, what is " \
"unsupported by this version" % (_set['view'],)
)
@property
def name(self):
"""Returns the name of this object
"""
return self._name or '__unnamed_database__'
@property
def name(self):
"""Returns the name of this object
"""
return self._name or '__unnamed_database__'
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
@name.setter
def name(self, value):
self._name = value
self.storage = Storage(self.prefix, value)
def hash_output(self, protocol, set, output):
"""Creates a unique hash the represents the output from the dataset
def hash_output(self, protocol, set, output):
"""Creates a unique hash the represents the output from the dataset
Parameters:
Parameters:
protocol (str): The name of the protocol containing the set and output
of interest
protocol (str): The name of the protocol containing the set and output
of interest
set (str): The name of the set in the protocol containing the output of
reference
set (str): The name of the set in the protocol containing the output of
reference
output (str): The name of the output in the set.
output (str): The name of the output in the set.
Returns:
Returns:
str: The hexadecimal digest of the hash representing the output.
str: The hexadecimal digest of the hash representing the output.
Raises:
Raises:
KeyError: If the protocol, set or output don't reference an existing
output for this database.
KeyError: If the protocol, set or output don't reference an existing
output for this database.
"""
"""
# checks protocol, set and output names
set_data = self.set(protocol, set)
output_data = set_data['outputs'][output]
# checks protocol, set and output names
set_data = self.set(protocol, set)
output_data = set_data['outputs'][output]
# dumps the hash
return hash.hashDatasetOutput(self.hash(), protocol, set, output)
# dumps the hash
return hash.hashDatasetOutput(self.hash(), protocol, set, output)
@property
def description(self):
"""The short description for this object"""
return self.data.get('description', None)
@property
def description(self):
"""The short description for this object"""
return self.data.get('description', None)
@description.setter
def description(self, value):
"""Sets the short description for this object"""
self.data['description'] = value
@description.setter
def description(self, value):
"""Sets the short description for this object"""
self.data['description'] = value
@property
def documentation(self):
"""The full-length description for this object"""
@property
def documentation(self):
"""The full-length description for this object"""
if not self._name:
raise RuntimeError("database has no name")
if not self._name:
raise RuntimeError("database has no name")
if self.storage.doc.exists():
return self.storage.doc.load()
return None
if self.storage.doc.exists():
return self.storage.doc.load()
return None
@documentation.setter
def documentation(self, value):
"""Sets the full-length description for this object"""
@documentation.setter
def documentation(self, value):
"""Sets the full-length description for this object"""
if not self._name:
raise RuntimeError("database has no name")
if not self._name:
raise RuntimeError("database has no name")
if hasattr(value, 'read'):
self.storage.doc.save(value.read())
else:
self.storage.doc.save(value)
if hasattr(value, 'read'):
self.storage.doc.save(value.read())
else:
self.storage.doc.save(value)
def hash(self):
"""Returns the hexadecimal hash for its declaration"""
def hash(self):
"""Returns the hexadecimal hash for its declaration"""
if not self._name:
raise RuntimeError("database has no name")
if not self._name:
raise RuntimeError("database has no name")
return self.storage.hash()
return self.storage.hash()
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
def json_dumps(self, indent=4):
"""Dumps the JSON declaration of this object in a string
Parameters:
Parameters:
indent (int): The number of indentation spaces at every indentation level
indent (int): The number of indentation spaces at every indentation level
Returns:
Returns:
str: The JSON representation for this object
str: The JSON representation for this object
"""
"""
return simplejson.dumps(self.data, indent=indent,
cls=utils.NumpyJSONEncoder)
return simplejson.dumps(self.data, indent=indent,
cls=utils.NumpyJSONEncoder)
def __str__(self):
def __str__(self):
return self.json_dumps()
return self.json_dumps()
def write(self, storage=None):
"""Writes contents to prefix location
def write(self, storage=None):
"""Writes contents to prefix location
Parameters:
Parameters:
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
"""
"""
if storage is None:
if not self._name:
raise RuntimeError("database has no name")
storage = self.storage #overwrite
if storage is None:
if not self._name:
raise RuntimeError("database has no name")
storage = self.storage #overwrite
storage.save(str(self), self.code, self.description)
storage.save(str(self), self.code, self.description)
def export(self, prefix):
"""Recursively exports itself into another prefix
def export(self, prefix):
"""Recursively exports itself into another prefix
Dataformats associated are also exported recursively
Dataformats associated are also exported recursively
Parameters:
Parameters:
prefix (str): A path to a prefix that must different then my own.
prefix (str): A path to a prefix that must different then my own.
Returns:
Returns:
None
None
Raises:
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
RuntimeError: If prefix and self.prefix point to the same directory.
"""
"""
if not self._name:
raise RuntimeError("database has no name")
if not self._name:
raise RuntimeError("database has no name")
if not self.valid:
raise RuntimeError("database is not valid")
if not self.valid:
raise RuntimeError("database is not valid")
if os.path.samefile(prefix, self.prefix):
raise RuntimeError("Cannot export database to the same prefix (%s == " \
"%s)" % (prefix, self.prefix))
if os.path.samefile(prefix, self.prefix):
raise RuntimeError("Cannot export database to the same prefix (%s == " \
"%s)" % (prefix, self.prefix))
for k in self.dataformats.values(): k.export(prefix)
self.write(Storage(prefix, self.name))
for k in self.dataformats.values(): k.export(prefix)
self.write(Storage(prefix, self.name))
......@@ -45,258 +45,258 @@ from beat.backend.python.dataformat import DataFormat as BackendDataFormat
class DataFormat(BackendDataFormat):
"""Data formats define the chunks of data that circulate between blocks.
"""Data formats define the chunks of data that circulate between blocks.
Parameters:
Parameters:
prefix (str): Establishes the prefix of your installation.
prefix (str): Establishes the prefix of your installation.
data (object, optional): The piece of data representing the data format. It
must validate against the schema defined for data formats. If a string is
passed, it is supposed to be a valid path to an data format in the
designated prefix area. If ``None`` is passed, loads our default
prototype for data formats.
data (object, optional): The piece of data representing the data format. It
must validate against the schema defined for data formats. If a string is
passed, it is supposed to be a valid path to an data format in the
designated prefix area. If ``None`` is passed, loads our default
prototype for data formats.
parent (tuple, optional): The parent DataFormat for this format. If set to
``None``, this means this dataformat is the first one on the hierarchy
tree. If set to a tuple, the contents are ``(format-instance,
field-name)``, which indicates the originating object that is this
object's parent and the name of the field on that object that points to
this one.
parent (tuple, optional): The parent DataFormat for this format. If set to
``None``, this means this dataformat is the first one on the hierarchy
tree. If set to a tuple, the contents are ``(format-instance,
field-name)``, which indicates the originating object that is this
object's parent and the name of the field on that object that points to
this one.
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up data format loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying dataformats
change.
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up data format loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying dataformats
change.
Attributes:
Attributes:
name (str): The full, valid name of this dataformat
name (str): The full, valid name of this dataformat
description (str): The short description string, loaded from the JSON
file if one was set.
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this dataformat
storage (object): A simple object that provides information about file
paths for this dataformat
errors (list of str): A list containing errors found while loading this
dataformat.
errors (list of str): A list containing errors found while loading this
dataformat.
data (dict): The original data for this dataformat, as loaded by our JSON
decoder.
data (dict): The original data for this dataformat, as loaded by our JSON
decoder.
resolved (dict): A dictionary similar to :py:attr:`data`, but with
references fully resolved.
resolved (dict): A dictionary similar to :py:attr:`data`, but with
references fully resolved.
referenced (dict): A dictionary pointing to all loaded dataformats.
referenced (dict): A dictionary pointing to all loaded dataformats.
parent (beat.core.dataformat.DataFormat): The pointer to the
dataformat to which the current format is part of. It is useful for
internal error reporting.
parent (beat.core.dataformat.DataFormat): The pointer to the
dataformat to which the current format is part of. It is useful for
internal error reporting.
"""
"""
def __init__(self, prefix, data, parent=None, dataformat_cache=None):
super(DataFormat, self).__init__(prefix, data, parent, dataformat_cache)