Commit ff2d46ea authored by Manuel Günther's avatar Manuel Günther

First working example that relies on bob.db.utils.

parent 33013f70
......@@ -18,7 +18,11 @@ The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
"""
import os, shutil, sys, tempfile
import os
import shutil
import sys
import tempfile
from optparse import OptionParser
tmpeggs = tempfile.mkdtemp()
......@@ -31,8 +35,8 @@ Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --setup-source and --download-base to point to
local resources, you can keep this script from going over the network.
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''
parser = OptionParser(usage=usage)
......@@ -48,23 +52,21 @@ parser.add_option("-t", "--accept-buildout-test-releases",
"bootstrap and buildout will get the newest releases "
"even if they are alphas or betas."))
parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration "
"file to be used."))
help=("Specify the path to the buildout configuration "
"file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
help=("Specify a URL to search for buildout releases"))
options, args = parser.parse_args()
######################################################################
# load/install distribute
# load/install setuptools
to_reload = False
try:
import pkg_resources, setuptools
if not hasattr(pkg_resources, '_distribute'):
to_reload = True
raise ImportError
import pkg_resources
import setuptools
except ImportError:
ez = {}
......@@ -73,8 +75,10 @@ except ImportError:
except ImportError:
from urllib2 import urlopen
exec(urlopen('http://python-distribute.org/distribute_setup.py').read(), ez)
setup_args = dict(to_dir=tmpeggs, download_delay=0, no_fake=True)
# XXX use a more permanent ez_setup.py URL when available.
exec(urlopen('https://bitbucket.org/pypa/setuptools/raw/0.7.2/ez_setup.py'
).read(), ez)
setup_args = dict(to_dir=tmpeggs, download_delay=0)
ez['use_setuptools'](**setup_args)
if to_reload:
......@@ -86,10 +90,23 @@ except ImportError:
if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path)
######################################################################
# Try to best guess the version of buildout given setuptools
if options.version is None:
try:
from distutils.version import LooseVersion
package = pkg_resources.require('setuptools')[0]
v = LooseVersion(package.version)
if v < LooseVersion('0.7'):
options.version = '2.1.1'
except:
pass
######################################################################
# Install buildout
ws = pkg_resources.working_set
ws = pkg_resources.working_set
cmd = [sys.executable, '-c',
'from setuptools.command.easy_install import main; main()',
......@@ -104,8 +121,8 @@ find_links = os.environ.get(
if find_links:
cmd.extend(['-f', find_links])
distribute_path = ws.find(
pkg_resources.Requirement.parse('distribute')).location
setuptools_path = ws.find(
pkg_resources.Requirement.parse('setuptools')).location
requirement = 'zc.buildout'
version = options.version
......@@ -113,13 +130,14 @@ if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout.
import setuptools.package_index
_final_parts = '*final-', '*final'
def _final_version(parsed_version):
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
index = setuptools.package_index.PackageIndex(
search_path=[distribute_path])
search_path=[setuptools_path])
if find_links:
index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement)
......@@ -142,7 +160,7 @@ if version:
cmd.append(requirement)
import subprocess
if subprocess.call(cmd, env=dict(os.environ, PYTHONPATH=distribute_path)) != 0:
if subprocess.call(cmd, env=dict(os.environ, PYTHONPATH=setuptools_path)) != 0:
raise Exception(
"Failed to execute command:\n%s",
repr(cmd)[1:-1])
......@@ -163,3 +181,4 @@ if options.config_file is not None:
zc.buildout.buildout.main(args)
shutil.rmtree(tmpeggs)
......@@ -2,4 +2,5 @@ import setshell
import tools
import manager
import local
import sge
import easy
This diff is collapsed.
This diff is collapsed.
import sqlalchemy
from sqlalchemy import Table, Column, Integer, String, ForeignKey
from bob.db.sqlalchemy_migration import Enum, relationship
from sqlalchemy.orm import backref
from sqlalchemy.ext.declarative import declarative_base
import os
from cPickle import dumps, loads
Base = declarative_base()
Status = ('waiting', 'executing', 'finished')
class ArrayJob(Base):
"""This class defines one element of an array job."""
__tablename__ = 'ArrayJob'
unique = Column(Integer, primary_key = True)
id = Column(Integer)
job_id = Column(Integer, ForeignKey('Job.id'))
status = Column(Enum(*Status))
result = Column(Integer)
job = relationship("Job", backref='array', order_by=id)
def __init__(self, id, job_id):
self.id = id
self.job_id = job_id
self.status = Status[0]
self.result = None
def std_out_file(self):
return self.job.std_out_file() + "." + str(self.id) if self.job.log_dir else None
def std_err_file(self):
return self.job.std_err_file() + "." + str(self.id) if self.job.log_dir else None
class Job(Base):
"""This class defines one Job that was submitted to the Job Manager."""
__tablename__ = 'Job'
id = Column(Integer, primary_key = True) # The ID of the job (not corresponding to the grid ID)
command_line = Column(String(255)) # The command line to execute, converted to one string
name = Column(String(20)) # A hand-chosen name for the task
arguments = Column(String(255)) # The kwargs arguments for the job submission (e.g. in the grid)
grid_id = Column(Integer, unique = True) # The ID of the job as given from the grid
log_dir = Column(String(255)) # The directory where the log files will be put to
status = Column(Enum(*Status))
result = Column(Integer)
def __init__(self, command_line, name = None, log_dir = None, **kwargs):
"""Constructor taking the job id from the grid."""
self.command_line = dumps(command_line)
self.name = name
self.status = Status[0]
self.result = None
self.log_dir = log_dir
self.arguments = dumps(kwargs)
def get_command_line(self):
return loads(str(self.command_line))
def set_arguments(self, **kwargs):
previous = self.get_arguments()
previous.update(kwargs)
self.arguments = dumps(previous)
def get_arguments(self):
return loads(str(self.arguments))
def std_out_file(self, array_id = None):
return os.path.join(self.log_dir, "o" + str(self.grid_id)) if self.log_dir else None
def std_err_file(self, array_id = None):
return os.path.join(self.log_dir, "e" + str(self.grid_id)) if self.log_dir else None
def __str__(self):
id = "%d" % self.grid_id
if self.array: j = "%s (%d-%d)" % (self.id, self.array[0].id, self.array[-1].id)
else: j = "%s" % id
if self.name is not None: n = "<Job: %s - '%s'>" % (j, self.name)
else: n = "<Job: %s>" % j
if self.result is not None: r = "%s (%d)" % (self.status, self.result)
else: r = "%s" % self.status
return "%s : %s -- %s" % (n, r, " ".join(self.get_command_line()))
def execute(self, manager, index = None):
"""Executes the code for this job on the local machine."""
import copy
environ = copy.deepcopy(os.environ)
manager.lock()
job = manager.get_jobs(self.id)
if 'JOB_ID' in environ:
# we execute a job in the grid
wait_for_job = True
else:
# we execute a job locally
environ['JOB_ID'] = str(self.id)
if index:
environ['SGE_TASK_ID'] = str(index.id)
self.status = "executing"
# return the subprocess pipe to the process
try:
import subprocess
return subprocess.Popen(self.get_command_line(), env=environ, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
except OSError:
self.status = "finished"
raise
class JobDependence(Base):
"""This table defines a many-to-many relationship between Jobs."""
__tablename__ = 'JobDependence'
id = Column(Integer, primary_key=True)
dependent_job_id = Column('dependent_job_id', Integer, ForeignKey('Job.id'))
dependent_job = relationship('Job', backref = 'dependent_jobs', primaryjoin=(Job.id == dependent_job_id), order_by=id) # A list of Jobs that this one depends on
depending_job_id = Column('depending_job_id', Integer, ForeignKey('Job.id'))
depending_job = relationship('Job', backref = 'depending_jobs', primaryjoin=(Job.id == depending_job_id), order_by=id) # A list of Jobs that this one depends on
def __init__(self, depending_job, dependent_job):
self.dependent_job = dependent_job
self.depending_job = depending_job
def add_grid_job(session, data, command_line, kwargs):
"""Helper function to create a job from the results of the grid execution via qsub."""
# create job
job = Job(data=data, command_line=command_line, kwargs=kwargs)
session.add(job)
session.flush()
session.refresh(job)
# add dependent jobs
if 'deps' in kwargs:
dependencies = session.query(Job).filter(id.in_(kwargs['deps']))
assert(len(list(dependencies)) == len(kwargs['deps']))
for d in dependecies:
session.add(JobDependence(job, d))
# create array job if desired
if 'job-array tasks' in data:
import re
b = re.compile(r'^(?P<m>\d+)-(?P<n>\d+):(?P<s>\d+)$').match(data['job-array tasks']).groupdict()
(start, stop, step) = (int(b['m']), int(b['n']), int(b['s']))
# add array jobs
for i in range(start, stop+1, step):
session.add(ArrayJob(i, job.id))
session.commit()
return job
def add_job(session, command_line, name=None, dependencies=[], array=None, log_dir=None, **kwargs):
"""Helper function to create a job that will run on the local machine."""
job = Job(command_line=command_line, name=name, log_dir=log_dir, kwargs=kwargs)
session.add(job)
session.flush()
session.refresh(job)
# by default grid_id and id are identical, but the grid_id might be overwritten later on
job.grid_id = job.id
for d in dependencies:
session.add(JobDependence(job, d))
if array:
(start, stop, step) = array
# add array jobs
for i in range(start, stop+1, step):
session.add(ArrayJob(i, job.id))
session.commit()
return job
\ No newline at end of file
This diff is collapsed.
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Wed 24 Aug 2011 13:06:25 CEST
"""Defines the job manager which can help you managing submitted grid jobs.
"""
from .manager import JobManager
from .setshell import environ
from .models import add_job
from .tools import qsub, qstat, make_shell
import os, sys
class JobManagerSGE(JobManager):
"""The JobManager will submit and control the status of submitted jobs"""
def __init__(self, database='submitted.sql3', context='grid'):
"""Initializes this object with a state file and a method for qsub'bing.
Keyword parameters:
statefile
The file containing a valid status database for the manager. If the file
does not exist it is initialized. If it exists, it is loaded.
context
The context to provide when setting up the environment to call the SGE
utilities such as qsub, qstat and qdel (normally 'grid', which also
happens to be default)
"""
self.context = environ(context)
JobManager.__init__(self, database)
def submit(self, command_line, name = None, array = None, dependencies = [], log_dir = "logs", **kwargs):
"""Submits a job that will be executed in the grid."""
# add job to database
self.lock()
job = add_job(self.session, command_line, name, dependencies, array, log_dir=log_dir, context=self.context, **kwargs)
# ... what we will actually submit to the grid is a wrapper script that will call the desired command...
# get the name of the file that was called originally
jman = os.path.realpath(sys.argv[0])
python = jman.replace('jman', 'python')
# generate call to the wrapper script
command = make_shell(python, [jman, 'run-job', self.database])
if array:
q_array = "%d-%d:%d" % array
grid_id = qsub(command, context=self.context, name=name, deps=dependencies, array=q_array, stdout=log_dir, stderr=log_dir, **kwargs)
# set the grid id of the job
job.grid_id = grid_id
self.session.commit()
# get the result of qstat
status = qstat(grid_id, context=self.context)
for k,v in status.iteritems():
print k, ":", v
# return the job id
job_id = job.id
self.unlock()
return job_id
def resubmit(self, job, stdout='', stderr='', dependencies=[],
failed_only=False):
"""Re-submit jobs automatically"""
if dependencies: job.kwargs['deps'] = dependencies
if stdout: job.kwargs['stdout'] = stdout
if stderr: job.kwargs['stderr'] = stderr
if failed_only and job.is_array():
retval = []
for k in job.check_array():
job.kwargs['array'] = (k,k,1)
retval.append(self.submit(job.args[0], **job.kwargs))
return retval
else: #either failed_only is not set or submit the job as it was, entirely
return self.submit(job.args[0], **job.kwargs)
def keys(self):
return self.job.keys()
def has_key(self, key):
return self.job.has_key(key)
def __getitem__(self, key):
return self.job[key]
def __delitem__(self, key):
if not self.job.has_key(key): raise KeyError, key
qdel(key, context=self.context)
del self.job[key]
def __str__(self):
"""Returns the status of each job still being tracked"""
return self.table(43)
def table(self, maxcmdline=0):
"""Returns the status of each job still being tracked"""
# configuration
fields = ("job-id", "queue", "age", "job-name", "arguments")
lengths = (20, 7, 3, 20, 43)
marker = '='
# work
fmt = "%%%ds %%%ds %%%ds %%%ds %%-%ds" % lengths
delimiter = fmt % tuple([k*marker for k in lengths])
header = [fields[k].center(lengths[k]) for k in range(len(lengths))]
header = ' '.join(header)
return '\n'.join([header] + [delimiter] + \
[self[k].row(fmt, maxcmdline) for k in sorted(self.job.keys())])
def clear(self):
"""Clear the whole job queue"""
for k in self.keys(): del self[k]
def describe(self, key):
"""Returns a string explaining a certain job"""
return str(self[key])
def stdout(self, key, instance=None):
"""Gets the output of a certain job"""
return self[key].stdout(instance)
def stderr(self, key, instance=None):
"""Gets the error output of a certain job"""
return self[key].stderr(instance)
def refresh(self, ignore_warnings=False):
"""Conducts a qstat over all jobs in the cache. If the job is not present
anymore check the logs directory for output and error files. If the size of
the error file is different than zero, warn the user.
Returns two lists: jobs that work and jobs that require attention
(error file does not have size 0).
"""
success = []
error = []
for k in sorted(self.job.keys()):
d = qstat(k, context=self.context)
if not d: #job has finished. check
status = self.job[k].check(ignore_warnings)
if status:
success.append(self.job[k])
del self.job[k]
logger.debug("Job %d completed successfully" % k)
else:
error.append(self.job[k])
del self.job[k]
logger.debug("Job %d probably did not complete successfully" % k)
return success, error
......@@ -45,6 +45,7 @@ def makedirs_safe(fulldir):
try:
if not os.path.exists(fulldir): os.makedirs(fulldir)
except OSError as exc: # Python >2.5
import errno
if exc.errno == errno.EEXIST: pass
else: raise
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment