Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
beat
beat.backend.python
Commits
1819d452
Commit
1819d452
authored
Dec 18, 2017
by
Philip ABBET
Browse files
Refactoring: No remote input / output anymore
parent
29e6ab5e
Changes
15
Hide whitespace changes
Inline
Side-by-side
beat/backend/python/data.py
View file @
1819d452
...
...
@@ -680,7 +680,7 @@ class RemoteDataSource(DataSource):
answer
=
self
.
socket
.
recv
()
if
answer
==
'err'
:
self
.
read_duration
+=
time
.
time
()
-
_star
t
self
.
read_duration
+=
time
.
time
()
-
t
1
kind
=
self
.
socket
.
recv
()
message
=
self
.
socket
.
recv
()
raise
RemoteException
(
kind
,
message
)
...
...
@@ -710,36 +710,6 @@ class RemoteDataSource(DataSource):
#----------------------------------------------------------
class
LegacyDataSource
(
object
):
"""Interface of all the Data Sources
Data Sources are used to provides data to the inputs of an algorithm.
"""
__metaclass__
=
abc
.
ABCMeta
@
abc
.
abstractmethod
def
next
(
self
,
load
=
True
):
"""Retrieves the next block of data
Returns:
A tuple (*data*, *start_index*, *end_index*)
"""
pass
@
abc
.
abstractmethod
def
hasMoreData
(
self
):
"""Indicates if there is more data to process on some of the inputs"""
pass
#----------------------------------------------------------
class
DataSink
(
object
):
"""Interface of all the Data Sinks
...
...
@@ -780,7 +750,7 @@ class StdoutDataSink(DataSink):
"""Data Sink that prints informations about the written data on stdout
Note: The written data is lost! Use
ii
for debugging purposes
Note: The written data is lost! Use
this class
for debugging purposes
"""
def
__init__
(
self
):
...
...
@@ -824,93 +794,6 @@ class StdoutDataSink(DataSink):
#----------------------------------------------------------
class
CachedLegacyDataSource
(
LegacyDataSource
):
"""Data Source that load data from the Cache"""
def
__init__
(
self
):
self
.
cached_file
=
None
self
.
dataformat
=
None
self
.
next_data_index
=
0
def
setup
(
self
,
filename
,
prefix
,
force_start_index
=
None
,
force_end_index
=
None
,
unpack
=
True
):
"""Configures the data source
Parameters:
filename (str): Name of the file to read the data from
prefix (str, path): Path to the prefix where the dataformats are stored.
force_start_index (int): The starting index (if not set or set to
``None``, the default, read data from the begin of file)
force_end_index (int): The end index (if not set or set to ``None``, the
default, reads the data until the end)
unpack (bool): Indicates if the data must be unpacked or not
Returns:
``True``, if successful, or ``False`` otherwise.
"""
self
.
cached_file
=
CachedDataSource
()
if
self
.
cached_file
.
setup
(
filename
,
prefix
,
start_index
=
force_start_index
,
end_index
=
force_end_index
,
unpack
=
unpack
):
self
.
dataformat
=
self
.
cached_file
.
dataformat
return
True
return
False
def
close
(
self
):
"""Closes the data source"""
if
self
.
cached_file
is
not
None
:
self
.
cached_file
.
close
()
self
.
cached_file
=
None
def
__del__
(
self
):
"""Makes sure the files are close when the object is deleted"""
self
.
close
()
def
next
(
self
):
"""Retrieve the next block of data
Returns:
A tuple (data, start_index, end_index)
"""
if
self
.
next_data_index
>=
len
(
self
.
cached_file
):
return
(
None
,
None
,
None
)
result
=
self
.
cached_file
[
self
.
next_data_index
]
self
.
next_data_index
+=
1
return
result
def
hasMoreData
(
self
):
"""Indicates if there is more data to process on some of the inputs"""
return
(
self
.
next_data_index
<
len
(
self
.
cached_file
))
def
statistics
(
self
):
"""Return the statistics about the number of bytes read from the cache"""
return
self
.
cached_file
.
statistics
()
#----------------------------------------------------------
class
CachedDataSink
(
DataSink
):
"""Data Sink that save data in the Cache
...
...
@@ -1093,92 +976,6 @@ class CachedDataSink(DataSink):
#----------------------------------------------------------
class
MemoryLegacyDataSource
(
LegacyDataSource
):
"""Interface of all the Data Sources
Data Sources are used to provides data to the inputs of an algorithm.
"""
def
__init__
(
self
,
done_callback
,
next_callback
=
None
,
index
=
None
):
self
.
data
=
[]
self
.
_done_callback
=
done_callback
self
.
_next_callback
=
next_callback
self
.
_last_data_index
=
-
1
def
add
(
self
,
data
,
start_data_index
,
end_data_index
):
self
.
data
.
append
((
data
,
start_data_index
,
end_data_index
))
self
.
_last_data_index
=
end_data_index
def
next
(
self
):
"""Retrieves the next block of data
:return: A tuple (*data*, *start_index*, *end_index*)
"""
if
(
len
(
self
.
data
)
==
0
)
and
(
self
.
_next_callback
is
not
None
):
if
not
(
self
.
_done_callback
(
self
.
_last_data_index
)):
self
.
_next_callback
()
if
len
(
self
.
data
)
==
0
:
return
(
None
,
None
,
None
)
return
self
.
data
.
pop
(
0
)
def
hasMoreData
(
self
):
if
len
(
self
.
data
)
!=
0
:
return
True
return
not
self
.
_done_callback
(
self
.
_last_data_index
)
def
statistics
(
self
):
"""Return the statistics about the number of bytes read from the cache"""
return
(
0
,
0
)
#----------------------------------------------------------
class
MemoryDataSink
(
DataSink
):
"""Data Sink that directly transmit data to associated MemoryLegacyDataSource
objects.
"""
def
__init__
(
self
):
self
.
data_sources
=
None
def
setup
(
self
,
data_sources
):
"""Configure the data sink
:param list data_sources: The MemoryLegacyDataSource objects to use
"""
self
.
data_sources
=
data_sources
def
write
(
self
,
data
,
start_data_index
,
end_data_index
):
"""Write a block of data
Parameters:
data (beat.core.baseformat.baseformat) The block of data to write
start_data_index (int): Start index of the written data
end_data_index (int): End index of the written data
"""
for
data_source
in
self
.
data_sources
:
data_source
.
add
(
data
,
start_data_index
,
end_data_index
)
def
isConnected
(
self
):
return
len
(
self
.
data_sources
)
>
0
#----------------------------------------------------------
def
load_data_index
(
cache_prefix
,
hash_path
):
"""Loads a cached-data index if it exists. Returns empty otherwise.
...
...
beat/backend/python/data_loaders.py
View file @
1819d452
...
...
@@ -100,7 +100,7 @@ class DataView(object):
input_data_indices
.
append
(
(
current_start
,
self
.
data_index_end
)
)
self
.
infos
[
input_name
]
=
dict
(
cached_fil
e
=
infos
[
'
cached_fil
e'
],
data_sourc
e
=
infos
[
'
data_sourc
e'
],
data_indices
=
input_data_indices
,
data
=
None
,
start_index
=
-
1
,
...
...
@@ -132,7 +132,7 @@ class DataView(object):
for
input_name
,
infos
in
self
.
infos
.
items
():
if
(
indices
[
0
]
<
infos
[
'start_index'
])
or
(
infos
[
'end_index'
]
<
indices
[
0
]):
(
infos
[
'data'
],
infos
[
'start_index'
],
infos
[
'end_index'
])
=
\
infos
[
'
cached_fil
e'
].
getAtDataIndex
(
indices
[
0
])
infos
[
'
data_sourc
e'
].
getAtDataIndex
(
indices
[
0
])
result
[
input_name
]
=
infos
[
'data'
]
...
...
@@ -189,10 +189,10 @@ class DataLoader(object):
self
.
data_index_end
=
-
1
# Bigger index across all inputs
def
add
(
self
,
input_name
,
cached_fil
e
):
def
add
(
self
,
input_name
,
data_sourc
e
):
self
.
infos
[
input_name
]
=
dict
(
cached_file
=
cached_fil
e
,
data_indices
=
cached_fil
e
.
data_indices
(),
data_source
=
data_sourc
e
,
data_indices
=
data_sourc
e
.
data_indices
(),
data
=
None
,
start_index
=
-
1
,
end_index
=
-
1
,
...
...
@@ -247,7 +247,7 @@ class DataLoader(object):
for
input_name
,
infos
in
self
.
infos
.
items
():
if
(
indices
[
0
]
<
infos
[
'start_index'
])
or
(
infos
[
'end_index'
]
<
indices
[
0
]):
(
infos
[
'data'
],
infos
[
'start_index'
],
infos
[
'end_index'
])
=
\
infos
[
'
cached_fil
e'
].
getAtDataIndex
(
indices
[
0
])
infos
[
'
data_sourc
e'
].
getAtDataIndex
(
indices
[
0
])
result
[
input_name
]
=
infos
[
'data'
]
...
...
beat/backend/python/executor.py
View file @
1819d452
...
...
@@ -101,21 +101,15 @@ class Executor(object):
if
self
.
algorithm
.
type
==
Algorithm
.
LEGACY
:
# Loads algorithm inputs
if
self
.
data
[
'proxy_mode'
]:
cache_access
=
AccessMode
.
REMOTE
else
:
cache_access
=
AccessMode
.
LOCAL
(
self
.
input_list
,
self
.
data_loaders
,
_
)
=
create_inputs_from_configuration
(
self
.
data
,
self
.
algorithm
,
self
.
prefix
,
cache_root
,
cache_access
=
cache_access
,
db_access
=
AccessMode
.
REMOTE
,
cache_access
=
AccessMode
.
LOCAL
,
db_access
=
AccessMode
.
REMOTE
,
socket
=
self
.
socket
)
# Loads algorithm outputs
(
self
.
output_list
,
_
)
=
create_outputs_from_configuration
(
self
.
data
,
self
.
algorithm
,
self
.
prefix
,
cache_root
,
self
.
input_list
,
cache_access
=
cache_access
,
socket
=
self
.
socket
self
.
data
,
self
.
algorithm
,
self
.
prefix
,
cache_root
,
self
.
input_list
)
else
:
...
...
@@ -126,8 +120,7 @@ class Executor(object):
# Loads algorithm outputs
(
self
.
output_list
,
_
)
=
create_outputs_from_configuration
(
self
.
data
,
self
.
algorithm
,
self
.
prefix
,
cache_root
,
self
.
input_list
,
cache_access
=
AccessMode
.
LOCAL
self
.
data
,
self
.
algorithm
,
self
.
prefix
,
cache_root
,
self
.
input_list
)
...
...
beat/backend/python/helpers.py
View file @
1819d452
...
...
@@ -32,8 +32,6 @@ import errno
import
logging
logger
=
logging
.
getLogger
(
__name__
)
from
.data
import
MemoryLegacyDataSource
from
.data
import
CachedLegacyDataSource
from
.data
import
CachedDataSource
from
.data
import
CachedDataSink
from
.data
import
getAllFilenames
...
...
@@ -41,21 +39,18 @@ from .data_loaders import DataLoaderList
from
.data_loaders
import
DataLoader
from
.inputs
import
InputList
from
.inputs
import
Input
from
.inputs
import
RemoteInput
from
.inputs
import
InputGroup
from
.outputs
import
SynchronizationListener
from
.outputs
import
OutputList
from
.outputs
import
Output
from
.outputs
import
RemoteOutput
from
.algorithm
import
Algorithm
#----------------------------------------------------------
def
convert_experiment_configuration_to_container
(
config
,
proxy_mode
):
def
convert_experiment_configuration_to_container
(
config
):
data
=
{
'proxy_mode'
:
proxy_mode
,
'algorithm'
:
config
[
'algorithm'
],
'parameters'
:
config
[
'parameters'
],
'channel'
:
config
[
'channel'
],
...
...
@@ -103,7 +98,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
def
_create_local_input
(
details
):
data_source
=
Cached
Legacy
DataSource
()
data_source
=
CachedDataSource
()
data_sources
.
append
(
data_source
)
filename
=
os
.
path
.
join
(
cache_root
,
details
[
'path'
]
+
'.data'
)
...
...
@@ -112,8 +107,8 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
status
=
data_source
.
setup
(
filename
=
filename
,
prefix
=
prefix
,
force_
start_index
=
start_index
,
force_
end_index
=
end_index
,
start_index
=
start_index
,
end_index
=
end_index
,
unpack
=
True
,
)
else
:
...
...
@@ -134,9 +129,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
return
input
def
_create_data_loader
(
details
):
filename
=
os
.
path
.
join
(
cache_root
,
details
[
'path'
]
+
'.data'
)
def
_get_data_loader_for
(
details
):
data_loader
=
data_loader_list
[
details
[
'channel'
]]
if
data_loader
is
None
:
data_loader
=
DataLoader
(
details
[
'channel'
])
...
...
@@ -144,8 +137,16 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
logger
.
debug
(
"Data loader created: group='%s'"
%
details
[
'channel'
])
cached_file
=
CachedDataSource
()
result
=
cached_file
.
setup
(
return
data_loader
def
_create_data_source
(
details
):
data_loader
=
_get_data_loader_for
(
details
)
filename
=
os
.
path
.
join
(
cache_root
,
details
[
'path'
]
+
'.data'
)
data_source
=
CachedDataSource
()
result
=
data_source
.
setup
(
filename
=
filename
,
prefix
=
prefix
,
start_index
=
start_index
,
...
...
@@ -156,7 +157,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if
not
result
:
raise
IOError
(
"cannot load cache file `%s'"
%
details
[
'path'
])
data_loader
.
add
(
name
,
cached_fil
e
)
data_loader
.
add
(
name
,
data_sourc
e
)
logger
.
debug
(
"Input '%s' added to data loader: group='%s', dataformat='%s', filename='%s'"
%
\
(
name
,
details
[
'channel'
],
algorithm
.
input_map
[
name
],
filename
))
...
...
@@ -182,7 +183,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if
not
views
.
has_key
(
channel
):
view
=
db
.
view
(
details
[
'protocol'
],
details
[
'set'
])
view
.
prepare_outputs
()
print
details
view
.
setup
()
views
[
channel
]
=
view
...
...
@@ -192,26 +193,34 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
else
:
view
=
views
[
channel
]
# Creation of the input
data_source
=
MemoryLegacyDataSource
(
view
.
done
,
next_callback
=
view
.
next
)
output
=
view
.
outputs
[
details
[
'output'
]]
output
.
data_sink
.
data_sources
.
append
(
data_source
)
input
=
Input
(
name
,
algorithm
.
input_map
[
name
],
data_source
)
data_loader
=
_get_data_loader_for
(
details
)
data_loader
.
add
(
name
,
view
.
data_sources
[
details
[
'output'
]])
logger
.
debug
(
"
Input
'%s' created: group='%s', dataformat='%s', database-output='%s/%s/%s:%s'"
%
\
logger
.
debug
(
"
DatabaseOutputDataSource
'%s' created: group='%s', dataformat='%s', database-output='%s/%s/%s:%s'"
%
\
(
name
,
channel
,
algorithm
.
input_map
[
name
],
details
[
'database'
],
details
[
'protocol'
],
details
[
'set'
],
details
[
'output'
]))
elif
db_access
==
AccessMode
.
REMOTE
:
if
socket
is
None
:
raise
IOError
(
"No socket provided for remote inputs"
)
raise
IOError
(
"No socket provided for remote data sources"
)
data_loader
=
_get_data_loader_for
(
details
)
data_source
=
RemoteDataSource
()
result
=
data_source
.
setup
(
socket
=
socket
,
input_name
=
name
,
dataformat_name
=
algorithm
.
input_map
[
name
],
prefix
=
prefix
,
unpack
=
True
)
if
not
result
:
raise
IOError
(
"cannot setup remote data source '%s'"
%
name
)
input
=
RemoteInput
(
name
,
algorithm
.
dataformats
[
algorithm
.
input_map
[
name
]],
socket
,
unpack
=
unpack
)
data_loader
.
add
(
name
,
data_source
)
logger
.
debug
(
"Remote
Input
'%s' created: group='%s', dataformat='%s', connected to a database"
%
\
logger
.
debug
(
"Remote
DataSource
'%s' created: group='%s', dataformat='%s', connected to a database"
%
\
(
name
,
details
[
'channel'
],
algorithm
.
input_map
[
name
]))
...
...
@@ -224,21 +233,10 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
if
details
[
'channel'
]
==
config
[
'channel'
]:
# synchronized
input
=
_create_local_input
(
details
)
else
:
_create_data_
loader
(
details
)
_create_data_
source
(
details
)
elif
algorithm
.
type
==
Algorithm
.
AUTONOMOUS
:
_create_data_loader
(
details
)
elif
cache_access
==
AccessMode
.
REMOTE
:
if
socket
is
None
:
raise
IOError
(
"No socket provided for remote inputs"
)
input
=
RemoteInput
(
name
,
algorithm
.
dataformats
[
algorithm
.
input_map
[
name
]],
socket
,
unpack
=
unpack
)
logger
.
debug
(
"RemoteInput '%s' created: group='%s', dataformat='%s'"
%
\
(
name
,
details
[
'channel'
],
algorithm
.
input_map
[
name
]))
_create_data_source
(
details
)
else
:
continue
...
...
@@ -267,8 +265,7 @@ def create_inputs_from_configuration(config, algorithm, prefix, cache_root,
#----------------------------------------------------------
def
create_outputs_from_configuration
(
config
,
algorithm
,
prefix
,
cache_root
,
input_list
,
cache_access
=
AccessMode
.
NONE
,
socket
=
None
):
def
create_outputs_from_configuration
(
config
,
algorithm
,
prefix
,
cache_root
,
input_list
):
data_sinks
=
[]
output_list
=
OutputList
()
...
...
@@ -300,73 +297,56 @@ def create_outputs_from_configuration(config, algorithm, prefix, cache_root, inp
if
input_group
is
not
None
:
synchronization_listener
=
input_group
.
synchronization_listener
if
cache_access
==
AccessMode
.
LOCAL
:
path
=
os
.
path
.
join
(
cache_root
,
details
[
'path'
]
+
'.data'
)
dirname
=
os
.
path
.
dirname
(
path
)
# Make sure that the directory exists while taking care of race
# conditions. see: http://stackoverflow.com/questions/273192/check-if-a-directory-exists-and-create-it-if-necessary
try
:
if
(
len
(
dirname
)
>
0
):
os
.
makedirs
(
dirname
)
except
OSError
as
exception
:
if
exception
.
errno
!=
errno
.
EEXIST
:
raise
if
start_index
is
None
:
for
k
,
v
in
config
[
'inputs'
].
items
():
if
v
[
'channel'
]
==
config
[
'channel'
]:
input_path
=
os
.
path
.
join
(
cache_root
,
v
[
'path'
]
+
'.data'
)
break
(
data_filenames
,
indices_filenames
,
data_checksum_filenames
,
indices_checksum_filenames
)
=
\
getAllFilenames
(
input_path
)
end_indices
=
[
int
(
x
.
split
(
'.'
)[
-
2
])
for
x
in
indices_filenames
]
end_indices
.
sort
()
start_index
=
0
end_index
=
end_indices
[
-
1
]
data_sink
=
CachedDataSink
()
data_sinks
.
append
(
data_sink
)
status
=
data_sink
.
setup
(
filename
=
path
,
dataformat
=
dataformat
,
start_index
=
start_index
,
end_index
=
end_index
,
encoding
=
'binary'
)
if
not
status
:
raise
IOError
(
"Cannot create cache sink '%s'"
%
details
[
'path'
])
output_list
.
add
(
Output
(
name
,
data_sink
,
synchronization_listener
=
synchronization_listener
,
force_start_index
=
start_index
)
)
if
'result'
not
in
config
:
logger
.
debug
(
"Output '%s' created: group='%s', dataformat='%s', filename='%s'"
%
\
(
name
,
details
[
'channel'
],
dataformat_name
,
path
))
else
:
logger
.
debug
(
"Output '%s' created: dataformat='%s', filename='%s'"
%
\
(
name
,
dataformat_name
,
path
))
elif
cache_access
==
AccessMode
.
REMOTE
:
if
socket
is
None
:
raise
IOError
(
"No socket provided for remote outputs"
)
output_list
.
add
(
RemoteOutput
(
name
,
dataformat
,
socket
,
synchronization_listener
=
synchronization_listener
,
force_start_index
=
start_index
or
0
)
)
logger
.
debug
(
"RemoteOutput '%s' created: group='%s', dataformat='%s'"
%
\
(
name
,
details
[
'channel'
],
dataformat_name
))
path
=
os
.
path
.
join
(
cache_root
,
details
[
'path'
]
+
'.data'
)
dirname
=
os
.
path
.
dirname
(
path
)
# Make sure that the directory exists while taking care of race
# conditions. see: http://stackoverflow.com/questions/273192/check-if-a-directory-exists-and-create-it-if-necessary
try
:
if
(
len
(
dirname
)
>
0
):
os
.
makedirs
(
dirname
)
except
OSError
as
exception
:
if
exception
.
errno
!=
errno
.
EEXIST
:
raise
if
start_index
is
None
:
for
k
,
v
in
config
[
'inputs'
].
items
():
if
v
[
'channel'
]
==
config
[
'channel'
]:
input_path
=
os
.
path
.
join
(
cache_root
,
v
[
'path'
]
+
'.data'
)
break
(
data_filenames
,
indices_filenames
,
data_checksum_filenames
,
indices_checksum_filenames
)
=
\
getAllFilenames
(
input_path
)
end_indices
=
[
int
(
x
.
split
(
'.'
)[
-
2
])
for
x
in
indices_filenames
]
end_indices
.
sort
()
start_index
=
0
end_index
=
end_indices
[
-
1
]
data_sink
=
CachedDataSink
()
data_sinks
.
append
(
data_sink
)
status
=
data_sink
.
setup
(
filename
=
path
,
dataformat
=
dataformat
,
start_index
=
start_index
,
end_index
=
end_index
,
encoding
=
'binary'
)
if
not
status
:
raise
IOError
(
"Cannot create cache sink '%s'"
%
details
[
'path'
])
output_list
.
add
(
Output
(
name
,
data_sink
,
synchronization_listener
=
synchronization_listener
,
force_start_index
=
start_index
)
)
if
'result'
not
in
config
:
logger
.
debug
(
"Output '%s' created: group='%s', dataformat='%s', filename='%s'"
%
\
(
name
,
details
[
'channel'
],
dataformat_name
,
path
))
else
:
continue
logger
.
debug
(
"Output '%s' created: dataformat='%s', filename='%s'"
%
\
(
name
,
dataformat_name
,
path
))
return
(
output_list
,
data_sinks
)