Commit c1fa5a52 authored by André Anjos's avatar André Anjos 💬
Browse files

Merge branch 'issues_15_16' into 'master'

Issues 15 & 16

Closes #16 and #15

See merge request !5
parents b1ebd124 53073050
Pipeline #9384 passed with stages
in 19 minutes and 59 seconds
......@@ -21,10 +21,10 @@
This package is part of the signal-processing and machine learning toolbox
Bob_. It provides a set of python wrappers around SGE utilities like ``qsub``,
``qstat`` and ``qdel``. It interacts with these tools to submit and manage
grid jobs making up a complete workflow ecosystem. Currently, it is set up to
work with the SGE grid at Idiap, but it is also possible to modify it to be
used in other SGE grids.
``qstat`` and ``qdel``. It interacts with these tools to submit and manage grid
jobs making up a complete workflow ecosystem. Currently, it is set up to work
with the SGE grid at Idiap, but it is also possible to modify it to be used in
other SGE grids.
Since version 1.0.x there is also a local submission system introduced. Instead
of sending jobs to the SGE grid, it executes them in parallel processes on the
......@@ -34,11 +34,9 @@ local machine, using a simple scheduling system.
Installation
------------
Follow our `installation`_ instructions. Then, using the Python interpreter
provided by the distribution, bootstrap and buildout this package::
Follow our `installation`_ instructions. Then, to install this package, run::
$ python bootstrap-buildout.py
$ ./bin/buildout
$ conda install gridtk
Contact
......
##############################################################################
#
# Copyright (c) 2006 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Bootstrap a buildout-based project
Simply run this script in a directory containing a buildout.cfg.
The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
"""
import os
import shutil
import sys
import tempfile
from optparse import OptionParser
__version__ = '2015-07-01'
# See zc.buildout's changelog if this version is up to date.
tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
usage = '''\
[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''
parser = OptionParser(usage=usage)
parser.add_option("--version",
action="store_true", default=False,
help=("Return bootstrap.py version."))
parser.add_option("-t", "--accept-buildout-test-releases",
dest='accept_buildout_test_releases',
action="store_true", default=False,
help=("Normally, if you do not specify a --version, the "
"bootstrap script and buildout gets the newest "
"*final* versions of zc.buildout and its recipes and "
"extensions for you. If you use this flag, "
"bootstrap and buildout will get the newest releases "
"even if they are alphas or betas."))
parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration "
"file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--buildout-version",
help="Use a specific zc.buildout version")
parser.add_option("--setuptools-version",
help="Use a specific setuptools version")
parser.add_option("--setuptools-to-dir",
help=("Allow for re-use of existing directory of "
"setuptools versions"))
options, args = parser.parse_args()
if options.version:
print("bootstrap.py version %s" % __version__)
sys.exit(0)
######################################################################
# load/install setuptools
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
ez = {}
if os.path.exists('ez_setup.py'):
exec(open('ez_setup.py').read(), ez)
else:
exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
if not options.allow_site_packages:
# ez_setup imports site, which adds site packages
# this will remove them from the path to ensure that incompatible versions
# of setuptools are not in the path
import site
# inside a virtualenv, there is no 'getsitepackages'.
# We can't remove these reliably
if hasattr(site, 'getsitepackages'):
for sitepackage_path in site.getsitepackages():
# Strip all site-packages directories from sys.path that
# are not sys.prefix; this is because on Windows
# sys.prefix is a site-package directory.
if sitepackage_path != sys.prefix:
sys.path[:] = [x for x in sys.path
if sitepackage_path not in x]
setup_args = dict(to_dir=tmpeggs, download_delay=0)
if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version
if options.setuptools_to_dir is not None:
setup_args['to_dir'] = options.setuptools_to_dir
ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
# This does not (always?) update the default working set. We will
# do it.
for path in sys.path:
if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path)
######################################################################
# Install buildout
ws = pkg_resources.working_set
setuptools_path = ws.find(
pkg_resources.Requirement.parse('setuptools')).location
# Fix sys.path here as easy_install.pth added before PYTHONPATH
cmd = [sys.executable, '-c',
'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
'from setuptools.command.easy_install import main; main()',
'-mZqNxd', tmpeggs]
find_links = os.environ.get(
'bootstrap-testing-find-links',
options.find_links or
('http://downloads.buildout.org/'
if options.accept_buildout_test_releases else None)
)
if find_links:
cmd.extend(['-f', find_links])
requirement = 'zc.buildout'
version = options.buildout_version
if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout.
import setuptools.package_index
_final_parts = '*final-', '*final'
def _final_version(parsed_version):
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
index = setuptools.package_index.PackageIndex(
search_path=[setuptools_path])
if find_links:
index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement)
if index.obtain(req) is not None:
best = []
bestv = None
for dist in index[req.project_name]:
distv = dist.parsed_version
if _final_version(distv):
if bestv is None or distv > bestv:
best = [dist]
bestv = distv
elif distv == bestv:
best.append(dist)
if best:
best.sort()
version = best[-1].version
if version:
requirement = '=='.join((requirement, version))
cmd.append(requirement)
import subprocess
if subprocess.call(cmd) != 0:
raise Exception(
"Failed to execute command:\n%s" % repr(cmd)[1:-1])
######################################################################
# Import and run buildout
ws.add_entry(tmpeggs)
ws.require(requirement)
import zc.buildout.buildout
if not [a for a in args if '=' not in a]:
args.append('bootstrap')
# if -c was provided, we push it back into args for buildout' main function
if options.config_file is not None:
args[0:0] = ['-c', options.config_file]
zc.buildout.buildout.main(args)
shutil.rmtree(tmpeggs)
......@@ -8,20 +8,6 @@
The Command Line Interface
============================
The command line interface requires the package to be installed properly (see the README file).
When installation has finished, you should have all required dependencies and command line utilities in your environment. To verify the installation, you can call out nose tests:
.. code-block:: sh
$ nosetests -sv
To build the package documentation, do:
.. code-block:: sh
$ sphinx-build doc sphinx
The Job Manager
===============
......
......@@ -12,6 +12,24 @@ import sqlalchemy
from distutils.version import LooseVersion
# Defines an equivalent `which` function to dig-out the location of excutables
import shutil
if sys.version_info[:2] >= (3, 3):
which = shutil.which
else: #define our own
def which(cmd, mode=os.F_OK|os.X_OK, path=None):
from distutils.spawn import find_executable
candidate = find_executable(cmd, path)
st = os.stat(candidate)
if bool(st.st_mode & mode):
return candidate
return None
class JobManager:
"""This job manager defines the basic interface for handling jobs in the SQL database."""
......@@ -21,10 +39,11 @@ class JobManager:
self._session_maker = sqlalchemy.orm.sessionmaker(bind=self._engine)
# store the command that this job manager was called with
if wrapper_script is None:
# try to find the executable, search in the bin path first
import distutils.spawn
wrapper_script = os.path.realpath(distutils.spawn.find_executable('jman', '.' + os.pathsep + 'bin' + os.pathsep + os.environ['PATH']))
if wrapper_script is None: wrapper_script = 'jman'
if not os.path.exists(wrapper_script):
bindir = os.path.join(os.path.realpath(os.curdir), 'bin')
wrapper_script = which(wrapper_script, path=os.pathsep.join((bindir,
os.environ['PATH'])))
if wrapper_script is None:
raise IOError("Could not find the installation path of gridtk. Please specify it in the wrapper_script parameter of the JobManager.")
......
......@@ -10,7 +10,8 @@ import gridtk
import subprocess, signal
import time
from gridtk.models import Job
from ..models import Job
class GridTKTest(unittest.TestCase):
# This class defines tests for the gridtk
......@@ -23,6 +24,11 @@ class GridTKTest(unittest.TestCase):
self.database = os.path.join(self.temp_dir, 'database.sql3')
self.scheduler_job = None
from ..manager import which
bindir = os.path.join(os.path.realpath(os.curdir), 'bin')
self.jman = which('jman', path=os.pathsep.join((bindir,
os.environ['PATH'])))
def tearDown(self):
# make sure that all scheduler jobs are stopped after exiting
......@@ -45,22 +51,22 @@ class GridTKTest(unittest.TestCase):
import nose
# first, add some commands to the database
script_1 = pkg_resources.resource_filename('gridtk.tests', 'test_script.sh')
script_2 = pkg_resources.resource_filename('gridtk.tests', 'test_array.sh')
script_1 = pkg_resources.resource_filename(__name__, 'test_script.sh')
script_2 = pkg_resources.resource_filename(__name__, 'test_array.sh')
rdir = pkg_resources.resource_filename('gridtk', 'tests')
from gridtk.script import jman
# add a simple script that will write some information to the
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_1', bash, script_1])
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_2', '--dependencies', '1', '--parametric', '1-7:2', bash, script_2])
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_3', '--dependencies', '1', '2', '--exec-dir', rdir, bash, "test_array.sh"])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_1', bash, script_1])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_2', '--dependencies', '1', '--parametric', '1-7:2', bash, script_2])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_3', '--dependencies', '1', '2', '--exec-dir', rdir, bash, "test_array.sh"])
# check that the database was created successfully
self.assertTrue(os.path.exists(self.database))
print()
# test that the list command works (should also work with the "default" grid manager
jman.main(['./bin/jman', '--database', self.database, 'list', '--job-ids', '1'])
jman.main(['./bin/jman', '--database', self.database, 'list', '--job-ids', '2', '--print-array-jobs', '--print-dependencies', '--print-times'])
jman.main([self.jman, '--database', self.database, 'list', '--job-ids', '1'])
jman.main([self.jman, '--database', self.database, 'list', '--job-ids', '2', '--print-array-jobs', '--print-dependencies', '--print-times'])
# get insight into the database
job_manager = gridtk.local.JobManagerLocal(database=self.database)
......@@ -94,7 +100,7 @@ class GridTKTest(unittest.TestCase):
job_manager.unlock()
# now, start the local execution of the job in a parallel job
self.scheduler_job = subprocess.Popen(['./bin/jman', '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '5', '--parallel', '2'])
self.scheduler_job = subprocess.Popen([self.jman, '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '5', '--parallel', '2'])
# sleep some time to assure that the scheduler was able to start the first job
time.sleep(4)
......@@ -127,10 +133,10 @@ class GridTKTest(unittest.TestCase):
# reset the job 1
jman.main(['./bin/jman', '--local', '--database', self.database, 'resubmit', '--job-id', '1', '--running-jobs', '--overwrite-command', script_1])
jman.main([self.jman, '--local', '--database', self.database, 'resubmit', '--job-id', '1', '--running-jobs', '--overwrite-command', script_1])
# now, start the local execution of the job in a parallel job
self.scheduler_job = subprocess.Popen(['./bin/jman', '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '5', '--parallel', '2'])
self.scheduler_job = subprocess.Popen([self.jman, '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '5', '--parallel', '2'])
# sleep some time to assure that the scheduler was able to finish the first and start the second job
time.sleep(9)
......@@ -168,7 +174,7 @@ class GridTKTest(unittest.TestCase):
self.assertTrue('This is a text message to std-err' in open(err_file).read().split('\n'))
# resubmit all jobs
jman.main(['./bin/jman', '--local', '--database', self.database, 'resubmit', '--running-jobs'])
jman.main([self.jman, '--local', '--database', self.database, 'resubmit', '--running-jobs'])
# check that the log files have been cleaned
self.assertFalse(os.path.exists(out_file))
self.assertFalse(os.path.exists(err_file))
......@@ -176,7 +182,7 @@ class GridTKTest(unittest.TestCase):
self.assertTrue(os.path.exists(self.log_dir))
# now, let the scheduler run all jobs, but this time in verbose mode
self.scheduler_job = subprocess.Popen(['./bin/jman', '--local', '-vv', '--database', self.database, 'run-scheduler', '--sleep-time', '1', '--parallel', '2', '--die-when-finished'])
self.scheduler_job = subprocess.Popen([self.jman, '--local', '-vv', '--database', self.database, 'run-scheduler', '--sleep-time', '1', '--parallel', '2', '--die-when-finished'])
# and wait for the job to finish (the timeout argument to Popen only exists from python 3.3 onwards)
self.scheduler_job.wait()
self.scheduler_job = None
......@@ -218,25 +224,25 @@ class GridTKTest(unittest.TestCase):
print()
# test that the list command still works
jman.main(['./bin/jman', '--database', self.database, 'list', '--print-array-jobs'])
jman.main([self.jman, '--database', self.database, 'list', '--print-array-jobs'])
print()
# test that the report command works
jman.main(['./bin/jman', '--database', self.database, 'report'])
jman.main([self.jman, '--database', self.database, 'report'])
# clean-up
jman.main(['./bin/jman', '--local', '--database', self.database, 'delete', '--job-ids', '1-3'])
jman.main([self.jman, '--local', '--database', self.database, 'delete', '--job-ids', '1-3'])
# check that the database and the log files are gone
self.assertEqual(len(os.listdir(self.temp_dir)), 0)
# add the scripts again, but this time with the --stop-on-failure option
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_1', '--stop-on-failure', bash, script_1])
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_2', '--dependencies', '1', '--parametric', '1-7:2', '--stop-on-failure', bash, script_2])
jman.main(['./bin/jman', '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_3', '--dependencies', '1', '2', '--exec-dir', rdir, '--stop-on-failure', bash, "test_array.sh"])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_1', '--stop-on-failure', bash, script_1])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_2', '--dependencies', '1', '--parametric', '1-7:2', '--stop-on-failure', bash, script_2])
jman.main([self.jman, '--local', '--database', self.database, 'submit', '--log-dir', self.log_dir, '--name', 'test_3', '--dependencies', '1', '2', '--exec-dir', rdir, '--stop-on-failure', bash, "test_array.sh"])
# and execute them, but without writing the log files
self.scheduler_job = subprocess.Popen(['./bin/jman', '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '0.1', '--parallel', '2', '--die-when-finished', '--no-log-files'])
self.scheduler_job = subprocess.Popen([self.jman, '--local', '--database', self.database, 'run-scheduler', '--sleep-time', '0.1', '--parallel', '2', '--die-when-finished', '--no-log-files'])
# and wait for the job to finish (the timeout argument to Popen only exists from python 3.3 onwards)
self.scheduler_job.wait()
self.scheduler_job = None
......@@ -259,7 +265,7 @@ class GridTKTest(unittest.TestCase):
job_manager.unlock()
# and clean up again
jman.main(['./bin/jman', '--local', '--database', self.database, 'delete'])
jman.main([self.jman, '--local', '--database', self.database, 'delete'])
self.assertEqual(len(os.listdir(self.temp_dir)), 0)
except KeyboardInterrupt:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment