Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
beat
beat.core
Commits
4d28ad16
Commit
4d28ad16
authored
Oct 04, 2017
by
Philip ABBET
Browse files
Force 4-spaces indentation
parent
37e667d7
Changes
59
Expand all
Hide whitespace changes
Inline
Side-by-side
beat/core/algorithm.py
View file @
4d28ad16
This diff is collapsed.
Click to expand it.
beat/core/database.py
View file @
4d28ad16
...
...
@@ -50,315 +50,315 @@ from beat.backend.python.database import Database as BackendDatabase
class
Database
(
BackendDatabase
):
"""Databases define the start point of the dataflow in an experiment.
"""Databases define the start point of the dataflow in an experiment.
Parameters:
Parameters:
prefix (str): Establishes the prefix of your installation.
prefix (str): Establishes the prefix of your installation.
data (dict, str): The piece of data representing the database. It must
validate against the schema defined for databases. If a string is passed,
it is supposed to be a valid path to an database in the designated prefix
area.
data (dict, str): The piece of data representing the database. It must
validate against the schema defined for databases. If a string is passed,
it is supposed to be a valid path to an database in the designated prefix
area.
dataformat_cache (dict, optional): A dictionary mapping dataformat names
to loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up database loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee
that the cache is refreshed as appropriate in case the underlying
dataformats change.
dataformat_cache (dict, optional): A dictionary mapping dataformat names
to loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up database loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee
that the cache is refreshed as appropriate in case the underlying
dataformats change.
Attributes:
Attributes:
name (str): The full, valid name of this database
name (str): The full, valid name of this database
description (str): The short description string, loaded from the JSON
file if one was set.
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this database
storage (object): A simple object that provides information about file
paths for this database
errors (list): A list containing errors found while loading this
database.
errors (list): A list containing errors found while loading this
database.
data (dict): The original data for this database, as loaded by our JSON
decoder.
data (dict): The original data for this database, as loaded by our JSON
decoder.
"""
"""
def
__init__
(
self
,
prefix
,
data
,
dataformat_cache
=
None
):
super
(
Database
,
self
).
__init__
(
prefix
,
data
,
dataformat_cache
)
def
__init__
(
self
,
prefix
,
data
,
dataformat_cache
=
None
):
super
(
Database
,
self
).
__init__
(
prefix
,
data
,
dataformat_cache
)
def
_load
(
self
,
data
,
dataformat_cache
):
"""Loads the database"""
def
_load
(
self
,
data
,
dataformat_cache
):
"""Loads the database"""
self
.
_name
=
None
self
.
storage
=
None
self
.
dataformats
=
{}
# preloaded dataformats
code
=
None
self
.
_name
=
None
self
.
storage
=
None
self
.
dataformats
=
{}
# preloaded dataformats
code
=
None
if
isinstance
(
data
,
(
tuple
,
list
)):
#user has passed individual info
if
isinstance
(
data
,
(
tuple
,
list
)):
#user has passed individual info
data
,
code
=
data
#break down into two components
data
,
code
=
data
#break down into two components
if
isinstance
(
data
,
six
.
string_types
):
#user has passed a file pointer
if
isinstance
(
data
,
six
.
string_types
):
#user has passed a file pointer
self
.
_name
=
data
self
.
storage
=
Storage
(
self
.
prefix
,
self
.
_name
)
data
=
self
.
storage
.
json
.
path
if
not
self
.
storage
.
json
.
exists
():
self
.
errors
.
append
(
'Database declaration file not found: %s'
%
data
)
return
self
.
_name
=
data
self
.
storage
=
Storage
(
self
.
prefix
,
self
.
_name
)
data
=
self
.
storage
.
json
.
path
if
not
self
.
storage
.
json
.
exists
():
self
.
errors
.
append
(
'Database declaration file not found: %s'
%
data
)
return
# this runs basic validation, including JSON loading if required
self
.
data
,
self
.
errors
=
schema
.
validate
(
'database'
,
data
)
if
self
.
errors
:
return
#don't proceed with the rest of validation
# this runs basic validation, including JSON loading if required
self
.
data
,
self
.
errors
=
schema
.
validate
(
'database'
,
data
)
if
self
.
errors
:
return
#don't proceed with the rest of validation
if
self
.
storage
is
not
None
:
#loading from the disk, check code
if
not
self
.
storage
.
code
.
exists
():
self
.
errors
.
append
(
'Database view code not found: %s'
%
\
self
.
storage
.
code
.
path
)
return
else
:
code
=
self
.
storage
.
code
.
load
()
if
self
.
storage
is
not
None
:
#loading from the disk, check code
if
not
self
.
storage
.
code
.
exists
():
self
.
errors
.
append
(
'Database view code not found: %s'
%
\
self
.
storage
.
code
.
path
)
return
else
:
code
=
self
.
storage
.
code
.
load
()
# At this point, `code' can be a string (or a binary blob) or ``None``
if
code
is
None
:
# loads the default code for an algorithm
self
.
code
=
prototypes
.
binary_load
(
'view.py'
)
# At this point, `code' can be a string (or a binary blob) or ``None``
if
code
is
None
:
# loads the default code for an algorithm
self
.
code
=
prototypes
.
binary_load
(
'view.py'
)
else
:
# just assign it - notice that in this case, no language is set
self
.
code
=
code
else
:
# just assign it - notice that in this case, no language is set
self
.
code
=
code
if
self
.
errors
:
return
#don't proceed with the rest of validation
if
self
.
errors
:
return
#don't proceed with the rest of validation
self
.
_validate_semantics
(
dataformat_cache
)
self
.
_validate_semantics
(
dataformat_cache
)
def
_validate_semantics
(
self
,
dataformat_cache
):
"""Validates all sematical aspects of the database"""
def
_validate_semantics
(
self
,
dataformat_cache
):
"""Validates all sematical aspects of the database"""
# all protocol names must be unique
protocol_names
=
[
k
[
'name'
]
for
k
in
self
.
data
[
'protocols'
]]
if
len
(
protocol_names
)
!=
len
(
set
(
protocol_names
)):
self
.
errors
.
append
(
"found different protocols with the same name: %s"
%
\
(
protocol_names
,)
)
# all protocol names must be unique
protocol_names
=
[
k
[
'name'
]
for
k
in
self
.
data
[
'protocols'
]]
if
len
(
protocol_names
)
!=
len
(
set
(
protocol_names
)):
self
.
errors
.
append
(
"found different protocols with the same name: %s"
%
\
(
protocol_names
,)
)
# all set names within a protocol must be unique
for
protocol
in
self
.
data
[
'protocols'
]:
set_names
=
[
k
[
'name'
]
for
k
in
protocol
[
'sets'
]]
if
len
(
set_names
)
!=
len
(
set
(
set_names
)):
self
.
errors
.
append
(
"found different sets with the same name at protocol "
\
"`%s': %s"
%
(
protocol
[
'name'
],
set_names
),
)
# all set names within a protocol must be unique
for
protocol
in
self
.
data
[
'protocols'
]:
set_names
=
[
k
[
'name'
]
for
k
in
protocol
[
'sets'
]]
if
len
(
set_names
)
!=
len
(
set
(
set_names
)):
self
.
errors
.
append
(
"found different sets with the same name at protocol "
\
"`%s': %s"
%
(
protocol
[
'name'
],
set_names
),
)
# all outputs must have valid data types
for
_set
in
protocol
[
'sets'
]:
# all outputs must have valid data types
for
_set
in
protocol
[
'sets'
]:
for
key
,
value
in
_set
[
'outputs'
].
items
():
for
key
,
value
in
_set
[
'outputs'
].
items
():
if
value
in
self
.
dataformats
:
continue
if
value
in
self
.
dataformats
:
continue
if
value
in
dataformat_cache
:
#re-use
dataformat
=
dataformat_cache
[
value
]
else
:
dataformat
=
DataFormat
(
self
.
prefix
,
value
)
dataformat_cache
[
value
]
=
dataformat
if
value
in
dataformat_cache
:
#re-use
dataformat
=
dataformat_cache
[
value
]
else
:
dataformat
=
DataFormat
(
self
.
prefix
,
value
)
dataformat_cache
[
value
]
=
dataformat
self
.
dataformats
[
value
]
=
dataformat
self
.
dataformats
[
value
]
=
dataformat
if
dataformat
.
errors
:
self
.
errors
.
append
(
"found error validating data format `%s' "
\
"for output `%s' on set `%s' of protocol `%s': %s"
%
\
(
value
,
key
,
_set
[
'name'
],
protocol
[
'name'
],
str
(
dataformat
.
errors
))
)
if
dataformat
.
errors
:
self
.
errors
.
append
(
"found error validating data format `%s' "
\
"for output `%s' on set `%s' of protocol `%s': %s"
%
\
(
value
,
key
,
_set
[
'name'
],
protocol
[
'name'
],
str
(
dataformat
.
errors
))
)
# all view names must be relative to the database root path
if
_set
[
'view'
].
find
(
'.'
)
!=
-
1
or
_set
[
'view'
].
find
(
os
.
sep
)
!=
-
1
:
self
.
errors
.
append
(
"dataset views are required to sit inside the "
\
"database root folder, but `%s' is either in a "
\
"subdirectory or points to a python module, what is "
\
"unsupported by this version"
%
(
_set
[
'view'
],)
)
# all view names must be relative to the database root path
if
_set
[
'view'
].
find
(
'.'
)
!=
-
1
or
_set
[
'view'
].
find
(
os
.
sep
)
!=
-
1
:
self
.
errors
.
append
(
"dataset views are required to sit inside the "
\
"database root folder, but `%s' is either in a "
\
"subdirectory or points to a python module, what is "
\
"unsupported by this version"
%
(
_set
[
'view'
],)
)
@
property
def
name
(
self
):
"""Returns the name of this object
"""
return
self
.
_name
or
'__unnamed_database__'
@
property
def
name
(
self
):
"""Returns the name of this object
"""
return
self
.
_name
or
'__unnamed_database__'
@
name
.
setter
def
name
(
self
,
value
):
self
.
_name
=
value
self
.
storage
=
Storage
(
self
.
prefix
,
value
)
@
name
.
setter
def
name
(
self
,
value
):
self
.
_name
=
value
self
.
storage
=
Storage
(
self
.
prefix
,
value
)
def
hash_output
(
self
,
protocol
,
set
,
output
):
"""Creates a unique hash the represents the output from the dataset
def
hash_output
(
self
,
protocol
,
set
,
output
):
"""Creates a unique hash the represents the output from the dataset
Parameters:
Parameters:
protocol (str): The name of the protocol containing the set and output
of interest
protocol (str): The name of the protocol containing the set and output
of interest
set (str): The name of the set in the protocol containing the output of
reference
set (str): The name of the set in the protocol containing the output of
reference
output (str): The name of the output in the set.
output (str): The name of the output in the set.
Returns:
Returns:
str: The hexadecimal digest of the hash representing the output.
str: The hexadecimal digest of the hash representing the output.
Raises:
Raises:
KeyError: If the protocol, set or output don't reference an existing
output for this database.
KeyError: If the protocol, set or output don't reference an existing
output for this database.
"""
"""
# checks protocol, set and output names
set_data
=
self
.
set
(
protocol
,
set
)
output_data
=
set_data
[
'outputs'
][
output
]
# checks protocol, set and output names
set_data
=
self
.
set
(
protocol
,
set
)
output_data
=
set_data
[
'outputs'
][
output
]
# dumps the hash
return
hash
.
hashDatasetOutput
(
self
.
hash
(),
protocol
,
set
,
output
)
# dumps the hash
return
hash
.
hashDatasetOutput
(
self
.
hash
(),
protocol
,
set
,
output
)
@
property
def
description
(
self
):
"""The short description for this object"""
return
self
.
data
.
get
(
'description'
,
None
)
@
property
def
description
(
self
):
"""The short description for this object"""
return
self
.
data
.
get
(
'description'
,
None
)
@
description
.
setter
def
description
(
self
,
value
):
"""Sets the short description for this object"""
self
.
data
[
'description'
]
=
value
@
description
.
setter
def
description
(
self
,
value
):
"""Sets the short description for this object"""
self
.
data
[
'description'
]
=
value
@
property
def
documentation
(
self
):
"""The full-length description for this object"""
@
property
def
documentation
(
self
):
"""The full-length description for this object"""
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
self
.
storage
.
doc
.
exists
():
return
self
.
storage
.
doc
.
load
()
return
None
if
self
.
storage
.
doc
.
exists
():
return
self
.
storage
.
doc
.
load
()
return
None
@
documentation
.
setter
def
documentation
(
self
,
value
):
"""Sets the full-length description for this object"""
@
documentation
.
setter
def
documentation
(
self
,
value
):
"""Sets the full-length description for this object"""
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
hasattr
(
value
,
'read'
):
self
.
storage
.
doc
.
save
(
value
.
read
())
else
:
self
.
storage
.
doc
.
save
(
value
)
if
hasattr
(
value
,
'read'
):
self
.
storage
.
doc
.
save
(
value
.
read
())
else
:
self
.
storage
.
doc
.
save
(
value
)
def
hash
(
self
):
"""Returns the hexadecimal hash for its declaration"""
def
hash
(
self
):
"""Returns the hexadecimal hash for its declaration"""
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
return
self
.
storage
.
hash
()
return
self
.
storage
.
hash
()
def
json_dumps
(
self
,
indent
=
4
):
"""Dumps the JSON declaration of this object in a string
def
json_dumps
(
self
,
indent
=
4
):
"""Dumps the JSON declaration of this object in a string
Parameters:
Parameters:
indent (int): The number of indentation spaces at every indentation level
indent (int): The number of indentation spaces at every indentation level
Returns:
Returns:
str: The JSON representation for this object
str: The JSON representation for this object
"""
"""
return
simplejson
.
dumps
(
self
.
data
,
indent
=
indent
,
cls
=
utils
.
NumpyJSONEncoder
)
return
simplejson
.
dumps
(
self
.
data
,
indent
=
indent
,
cls
=
utils
.
NumpyJSONEncoder
)
def
__str__
(
self
):
def
__str__
(
self
):
return
self
.
json_dumps
()
return
self
.
json_dumps
()
def
write
(
self
,
storage
=
None
):
"""Writes contents to prefix location
def
write
(
self
,
storage
=
None
):
"""Writes contents to prefix location
Parameters:
Parameters:
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
storage (Storage, optional): If you pass a new storage, then this object
will be written to that storage point rather than its default.
"""
"""
if
storage
is
None
:
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
storage
=
self
.
storage
#overwrite
if
storage
is
None
:
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
storage
=
self
.
storage
#overwrite
storage
.
save
(
str
(
self
),
self
.
code
,
self
.
description
)
storage
.
save
(
str
(
self
),
self
.
code
,
self
.
description
)
def
export
(
self
,
prefix
):
"""Recursively exports itself into another prefix
def
export
(
self
,
prefix
):
"""Recursively exports itself into another prefix
Dataformats associated are also exported recursively
Dataformats associated are also exported recursively
Parameters:
Parameters:
prefix (str): A path to a prefix that must different then my own.
prefix (str): A path to a prefix that must different then my own.
Returns:
Returns:
None
None
Raises:
Raises:
RuntimeError: If prefix and self.prefix point to the same directory.
RuntimeError: If prefix and self.prefix point to the same directory.
"""
"""
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
not
self
.
_name
:
raise
RuntimeError
(
"database has no name"
)
if
not
self
.
valid
:
raise
RuntimeError
(
"database is not valid"
)
if
not
self
.
valid
:
raise
RuntimeError
(
"database is not valid"
)
if
os
.
path
.
samefile
(
prefix
,
self
.
prefix
):
raise
RuntimeError
(
"Cannot export database to the same prefix (%s == "
\
"%s)"
%
(
prefix
,
self
.
prefix
))
if
os
.
path
.
samefile
(
prefix
,
self
.
prefix
):
raise
RuntimeError
(
"Cannot export database to the same prefix (%s == "
\
"%s)"
%
(
prefix
,
self
.
prefix
))
for
k
in
self
.
dataformats
.
values
():
k
.
export
(
prefix
)
self
.
write
(
Storage
(
prefix
,
self
.
name
))
for
k
in
self
.
dataformats
.
values
():
k
.
export
(
prefix
)
self
.
write
(
Storage
(
prefix
,
self
.
name
))
beat/core/dataformat.py
View file @
4d28ad16
...
...
@@ -45,258 +45,258 @@ from beat.backend.python.dataformat import DataFormat as BackendDataFormat
class
DataFormat
(
BackendDataFormat
):
"""Data formats define the chunks of data that circulate between blocks.
"""Data formats define the chunks of data that circulate between blocks.
Parameters:
Parameters:
prefix (str): Establishes the prefix of your installation.
prefix (str): Establishes the prefix of your installation.
data (object, optional): The piece of data representing the data format. It
must validate against the schema defined for data formats. If a string is
passed, it is supposed to be a valid path to an data format in the
designated prefix area. If ``None`` is passed, loads our default
prototype for data formats.
data (object, optional): The piece of data representing the data format. It
must validate against the schema defined for data formats. If a string is
passed, it is supposed to be a valid path to an data format in the
designated prefix area. If ``None`` is passed, loads our default
prototype for data formats.
parent (tuple, optional): The parent DataFormat for this format. If set to
``None``, this means this dataformat is the first one on the hierarchy
tree. If set to a tuple, the contents are ``(format-instance,
field-name)``, which indicates the originating object that is this
object's parent and the name of the field on that object that points to
this one.
parent (tuple, optional): The parent DataFormat for this format. If set to
``None``, this means this dataformat is the first one on the hierarchy
tree. If set to a tuple, the contents are ``(format-instance,
field-name)``, which indicates the originating object that is this
object's parent and the name of the field on that object that points to
this one.
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up data format loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying dataformats
change.
dataformat_cache (dict, optional): A dictionary mapping dataformat names to
loaded dataformats. This parameter is optional and, if passed, may
greatly speed-up data format loading times as dataformats that are already
loaded may be re-used. If you use this parameter, you must guarantee that
the cache is refreshed as appropriate in case the underlying dataformats
change.
Attributes:
Attributes:
name (str): The full, valid name of this dataformat
name (str): The full, valid name of this dataformat
description (str): The short description string, loaded from the JSON
file if one was set.
description (str): The short description string, loaded from the JSON
file if one was set.
documentation (str): The full-length docstring for this object.
documentation (str): The full-length docstring for this object.
storage (object): A simple object that provides information about file
paths for this dataformat
storage (object): A simple object that provides information about file
paths for this dataformat
errors (list of str): A list containing errors found while loading this
dataformat.
errors (list of str): A list containing errors found while loading this
dataformat.
data (dict): The original data for this dataformat, as loaded by our JSON
decoder.
data (dict): The original data for this dataformat, as loaded by our JSON
decoder.
resolved (dict): A dictionary similar to :py:attr:`data`, but with
references fully resolved.
resolved (dict): A dictionary similar to :py:attr:`data`, but with
references fully resolved.
referenced (dict): A dictionary pointing to all loaded dataformats.
referenced (dict): A dictionary pointing to all loaded dataformats.
parent (beat.core.dataformat.DataFormat): The pointer to the
dataformat to which the current format is part of. It is useful for
internal error reporting.
parent (beat.core.dataformat.DataFormat): The pointer to the
dataformat to which the current format is part of. It is useful for
internal error reporting.
"""
"""
def
__init__
(
self
,
prefix
,
data
,
parent
=
None
,
dataformat_cache
=
None
):
super
(
DataFormat
,
self
).
__init__
(
prefix
,
data
,
parent
,
dataformat_cache
)