Commit cb0c182d authored by Philip ABBET's avatar Philip ABBET

Refactoring: Merge the 'Agent' class in the 'DockerExecutor' one

parent 47be43b7
This diff is collapsed.
......@@ -292,6 +292,26 @@ class Host(object):
return (environments, db_environments)
def create_archive(self, src):
c = six.moves.cStringIO()
with tarfile.open(mode='w', fileobj=c) as tar:
tar.add(src, arcname=os.path.basename(src))
return c.getvalue()
def put_archive(self, container, archive, dest='/tmp', chmod=None):
# Place the tarball into the container
logger.debug("[docker] archive -> %s@%s", container['Id'][:12], dest)
self.client.put_archive(container, dest, archive)
# (If necessary) Change permissions to access the path
if chmod is not None:
ex = self.client.exec_create(container, cmd=['chmod', '-R', chmod, dest])
output = self.client.exec_start(ex) # waits until it is executed
def put_path(self, container, src, dest='/tmp', chmod=None):
"""Puts a given src path into a destination folder
......@@ -322,24 +342,17 @@ class Host(object):
"""
# The docker API only accepts in-memory tar archives
c = six.moves.cStringIO()
with tarfile.open(mode='w', fileobj=c) as tar:
tar.add(src, arcname=os.path.basename(src))
archive = c.getvalue()
archive = self.create_archive(src)
# Place the tarball into the container
path = os.path.join(dest, os.path.basename(src))
logger.debug("[docker] archive -> %s@%s", container['Id'][:12], dest)
self.client.put_archive(container, dest, archive)
if chmod is not None:
# Change permissions to access the path
ex = self.client.exec_create(container, cmd=['chmod', '-R', chmod, dest])
output = self.client.exec_start(ex) #waits until it is executed
self.put_archive(container, archive, dest='/tmp',
# dest=os.path.join(dest, os.path.basename(src)),
chmod=chmod
)
def create_container(self, image, command, tmp_path=None, host_args=None,
**args):
def create_container(self, image, command, configuration_archive=None,
configuration_path=None, host_args=None, **args):
"""Prepares the docker container for running the user code
......@@ -350,8 +363,8 @@ class Host(object):
command (list): A list of strings with the command to run inside the
container.
tmp_path (str): A path with a file name or directory that will be
copied into the container (inside ``/tmp``), supposedly with
configuration_path (str): a path with a file name or directory that will
be copied into the container (inside ``/tmp``), supposedly with
information that is used by the command.
host_args (dict): A dictionary that will be transformed into a
......@@ -397,12 +410,14 @@ class Host(object):
args['host_config'] = self.client.create_host_config(**config_args)
logger.debug("[docker] create_container %s %s", image, ' '.join(command))
container = self.client.create_container(image=image, command=command,
**args)
container = self.client.create_container(image=image, command=command, **args)
self.containers.append(container)
if tmp_path is not None:
self.put_path(container, tmp_path)
if configuration_archive is not None:
self.put_archive(container, configuration_archive)
if configuration_path is not None:
self.put_path(container, configuration_path)
return container
......@@ -495,7 +510,7 @@ class Popen:
'''
def __init__(self, host, image, command, tmp_archive=None,
def __init__(self, host, image, command, configuration_archive=None,
virtual_memory_in_megabytes=0, max_cpu_percent=0, **args):
self.host = host
......@@ -546,7 +561,7 @@ class Popen:
# creates the container
self.container = self.host.create_container(image=image,
command=command, tmp_path=tmp_archive,
command=command, configuration_archive=configuration_archive,
host_args=host_args, **args)
# Starts the container
......
......@@ -29,14 +29,21 @@
'''Execution utilities'''
import os
import sys
import requests
import simplejson
import zmq.green as zmq
import logging
logger = logging.getLogger(__name__)
from gevent import monkey
monkey.patch_socket(dns=False)
monkey.patch_ssl()
from .. import stats
from .. import agent
from .. import message_handler
from .. import utils
from .. import dock
from .base import BaseExecutor
......@@ -149,6 +156,7 @@ class DockerExecutor(BaseExecutor):
self.db_socket = None
self.db_address = None
self.proxy_mode = proxy_mode
self.message_handler = None
# Check if the execution environment supports proxy_mode=False (if necessary)
if not self.proxy_mode:
......@@ -186,8 +194,6 @@ class DockerExecutor(BaseExecutor):
super(DockerExecutor, self).__exit__(exc_type, exc_value, traceback)
self.agent = None
if self.context is not None:
self.context.destroy()
self.context = None
......@@ -223,7 +229,7 @@ class DockerExecutor(BaseExecutor):
def process(self, virtual_memory_in_megabytes=0, max_cpu_percent=0,
timeout_in_minutes=0, daemon=0):
timeout_in_minutes=0):
"""Executes the user algorithm code using an external program.
The execution interface follows the backend API as described in our
......@@ -259,11 +265,6 @@ class DockerExecutor(BaseExecutor):
killed with :py:attr:`signal.SIGKILL`. If set to zero, no timeout will
be applied.
daemon (int): If this variable is set, then we don't really start the
user process, but just kick out 0MQ server, print the command-line and
sleep for that many seconds. You're supposed to start the client by
hand then and debug it.
Returns:
dict: A dictionary which is JSON formattable containing the summary of
......@@ -275,22 +276,190 @@ class DockerExecutor(BaseExecutor):
raise RuntimeError("execution information is bogus:\n * %s" % \
'\n * '.join(self.errors))
with agent.Agent(virtual_memory_in_megabytes, max_cpu_percent) as runner:
self.agent = runner
# Creates an in-memory archive containing all the configuration and files
# needed by the processing container
configuration_path = utils.temporary_directory()
self.dump_runner_configuration(configuration_path)
processing_archive = self.host.create_archive(configuration_path)
# (If necessary) Creates an in-memory archive containing all the configuration
# and files needed by the databases container
if self.db_address is not None:
databases_configuration_path = utils.temporary_directory()
self.dump_databases_provider_configuration(databases_configuration_path)
# Modify the paths to the databases in the dumped configuration files
root_folder = os.path.join(databases_configuration_path, 'prefix', 'databases')
database_paths = {}
if not self.data.has_key('datasets_root_path'):
for db_name in self.databases.keys():
json_path = os.path.join(root_folder, db_name + '.json')
with open(json_path, 'r') as f:
db_data = simplejson.load(f)
database_paths[db_name] = db_data['root_folder']
db_data['root_folder'] = os.path.join('/databases', db_name)
with open(json_path, 'w') as f:
simplejson.dump(db_data, f, indent=4)
databases_archive = self.host.create_archive(databases_configuration_path)
# Creates the message handler
self.message_handler = message_handler.ProxyMessageHandler(
self.input_list, self.output_list, self.host.ip)
# Determine the docker image to use for the processing
processing_environment = '%(name)s (%(version)s)' % self.data['environment']
if processing_environment not in self.host:
raise RuntimeError("Environment `%s' is not available on docker " \
"host `%s' - available environments are %s" % (processing_environment,
self.host, ", ".join(self.host.environments.keys())))
# (If necessary) Instantiate the docker container that provide the databases
databases_container = None
if self.db_address is not None:
# Determine the docker image to use for the databases
try:
databases_environment = self.host.db2docker(database_paths.keys())
except:
raise RuntimeError("No environment found for the databases `%s' " \
"- available environments are %s" % (
", ".join(database_paths.keys()),
", ".join(self.host.db_environments.keys())))
# Specify the volumes to mount inside the container
volumes = {}
if not self.data.has_key('datasets_root_path'):
for db_name, db_path in database_paths.items():
volumes[db_path] = {
'bind': os.path.join('/databases', db_name),
'mode': 'ro',
}
else:
volumes[self.data['datasets_root_path']] = {
'bind': self.data['datasets_root_path'],
'mode': 'ro',
}
# Instantiate the container
# Note: we only support one databases image loaded at the same time
cmd = [
'databases_provider',
self.db_address,
os.path.join('/tmp', os.path.basename(databases_configuration_path))
]
databases_container = dock.Popen(
self.host,
databases_environment,
command=cmd,
configuration_archive=databases_archive,
volumes=volumes
)
# Specify the volumes to mount inside the algorithm container
volumes = {}
if not self.proxy_mode:
volumes[self.cache] = {
'bind': '/cache',
'mode': 'rw',
}
# Instantiate the algorithm container
cmd = [
'execute',
self.message_handler.address,
os.path.join('/tmp', os.path.basename(configuration_path))
]
if logger.getEffectiveLevel() <= logging.DEBUG:
cmd.insert(1, '--debug')
algorithm_container = dock.Popen(
self.host,
processing_environment,
command=cmd,
configuration_archive=processing_archive,
virtual_memory_in_megabytes=virtual_memory_in_megabytes,
max_cpu_percent=max_cpu_percent,
volumes=volumes
)
# Process the messages until the container is done
self.message_handler.set_process(algorithm_container)
self.message_handler.start()
timed_out = False
try:
timeout = (60 * timeout_in_minutes) if timeout_in_minutes else None
status = algorithm_container.wait(timeout)
except requests.exceptions.ReadTimeout:
logger.warn("user process has timed out after %d minutes", timeout_in_minutes)
algorithm_container.kill()
status = algorithm_container.wait()
if databases_container is not None:
databases_container.kill()
databases_container.wait()
timed_out = True
except KeyboardInterrupt: # Developer pushed CTRL-C
logger.info("stopping user process on CTRL-C console request")
algorithm_container.kill()
status = algorithm_container.wait()
if databases_container is not None:
databases_container.kill()
databases_container.wait()
finally:
self.message_handler.stop.set()
# Collects final information and returns to caller
retval = dict(
stdout = algorithm_container.stdout,
stderr = algorithm_container.stderr,
status = status,
timed_out = timed_out,
statistics = self.message_handler.last_statistics,
system_error = self.message_handler.system_error,
user_error = self.message_handler.user_error,
)
if 'data' in retval['statistics']:
stats.update(retval['statistics']['data'], self.io_statistics)
else:
logger.warn("cannot find 'data' entry on returned stats, " \
"therefore not appending I/O info either")
algorithm_container.rm()
#synchronous call - always returns after a certain timeout
retval = runner.run(self, self.host, timeout_in_minutes=timeout_in_minutes,
daemon=daemon, db_address=self.db_address)
if databases_container is not None:
retval['stdout'] += '\n' + databases_container.stdout
retval['stderr'] += '\n' + databases_container.stderr
databases_container.rm()
#adds I/O statistics from the current executor, if its complete already
#otherwise, it means the running process went bananas, ignore it ;-)
if 'statistics' in retval:
if 'data' in retval['statistics']:
stats.update(retval['statistics']['data'], self.io_statistics)
else:
logger.warn("cannot find 'data' entry on returned stats, " \
"therefore not appending I/O info either")
self.message_handler.destroy()
self.message_handler = None
return retval
......@@ -298,8 +467,8 @@ class DockerExecutor(BaseExecutor):
def kill(self):
"""Stops the user process by force - to be called from signal handlers"""
if self.agent is not None:
self.agent.kill()
if self.message_handler is not None:
self.message_handler.kill()
return True
return False
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
###############################################################################
# #
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.core module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
import logging
logger = logging.getLogger(__name__)
import gevent
import zmq.green as zmq
from gevent import monkey
monkey.patch_socket(dns=False)
monkey.patch_ssl()
from beat.backend.python.message_handler import MessageHandler
class ProxyMessageHandler(MessageHandler):
'''A 0MQ server for our communication with the user process
Add support for output-related messages.
'''
def __init__(self, input_list, output_list, host_address):
# Starts our 0MQ server
self.context = zmq.Context()
self.socket = self.context.socket(zmq.PAIR)
self.address = 'tcp://' + host_address
port = self.socket.bind_to_random_port(self.address, min_port=50000)
self.address += ':%d' % port
logger.debug("zmq server bound to `%s'", self.address)
super(ProxyMessageHandler, self).__init__(input_list, self.context, self.socket)
self.output_list = output_list
# implementations
self.callbacks.update(dict(
wrt = self.write,
idm = self.is_data_missing,
oic = self.output_is_connected,
))
def destroy(self):
self.context.destroy()
def __str__(self):
return 'Server(%s)' % self.address
def _get_output_candidate(self, name):
retval = self.output_list[name]
if retval is None: raise RuntimeError("Could not find output `%s'" % name)
return retval
def write(self, name, packed):
"""Syntax: wrt output data"""
logger.debug('recv: wrt %s <bin> (size=%d)', name, len(packed))
# Get output object
output_candidate = self._get_output_candidate(name)
if output_candidate is None:
raise RuntimeError("Could not find output `%s' to write to" % name)
data = output_candidate.data_sink.dataformat.type()
data.unpack(packed)
output_candidate.write(data)
logger.debug('send: ack')
self.socket.send('ack')
def is_data_missing(self, name):
"""Syntax: idm output"""
logger.debug('recv: idm %s', name)
output_candidate = self._get_output_candidate(name)
what = 'tru' if output_candidate.isDataMissing() else 'fal'
logger.debug('send: %s', what)
self.socket.send(what)
def output_is_connected(self, name):
"""Syntax: oic output"""
logger.debug('recv: oic %s', name)
output_candidate = self._get_output_candidate(name)
what = 'tru' if output_candidate.isConnected() else 'fal'
logger.debug('send: %s', what)
self.socket.send(what)
......@@ -253,7 +253,7 @@ class AsyncTest(unittest.TestCase):
with Popen(self.host, 'Python 2.7 (1.1.0)', ['python', tmp_name,
str(processes)], max_cpu_percent=max_cpu_percent,
tmp_archive=program) as p:
configuration_path=program) as p:
p.statistics() # start recording
time.sleep(sleep_time)
......
File mode changed from 100755 to 100644
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment