jgen.py 4.44 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :


"""Script generator for grid jobs

This script can generate multiple output files based on a template and a set of
variables explained in a YAML file. It can also, optionally, generate a single
aggregated file for all possible configuration sets in the YAML file. It can be
used to:

  1. Generate a set of runnable experiment configurations from a single
     template
  2. Generate a single script to launch all runnable experiments

"""

__epilog__ = """\
  To generate a configuration for running experiments and an aggregation script,
  do the following:

    $ %(prog)s vars.yaml config.py 'out/cfg-{{ name }}-.py' run.sh out/run.sh

  In this example, the user dumps all output in a directory called "out". The
  name of each output file uses variable expansion from the file "vars.yaml" to
  create a new file for each configuration set defined inside. In this example,
  we assume it defines at least variable "name" within with multiple values for
  each configuration set. The file "run.sh" represents a template for the
  aggregation and the extrapolated template will be saved at 'out/run.sh'. For
  more information about how to structure these files, read the GridTK manual.

  To only generate the configurations and not the aggregation, omit the last
  two parameters:

    $ %(prog)s vars.yaml config.py 'out/cfg-{{ name }}-.py'

"""

import os
import sys

import argparse
import logging

from .. import generator
from .. import tools


def _setup_logger(verbosity):

  if verbosity > 3: verbosity = 3

  # set up the verbosity level of the logging system
  log_level = {
      0: logging.ERROR,
      1: logging.WARNING,
      2: logging.INFO,
      3: logging.DEBUG
    }[verbosity]

  handler = logging.StreamHandler()
  handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s"))
  logger = logging.getLogger('gridtk')
  logger.addHandler(handler)
  logger.setLevel(log_level)

  return logger


def main(command_line_options = None):

  from ..config import __version__

  basename = os.path.basename(sys.argv[0])
  epilog = __epilog__ % dict(prog=basename)

  formatter = argparse.ArgumentDefaultsHelpFormatter
  parser = argparse.ArgumentParser(description=__doc__, epilog=epilog,
      formatter_class=formatter)

  parser.add_argument('variables', type=str, help="Text file containing the variables in YAML format")
  parser.add_argument('gentmpl', type=str, help="Text file containing the template for generating multiple outputs, one for each configuration set")
  parser.add_argument('genout', type=str, help="Template for generating the output filenames")
  parser.add_argument('aggtmpl', type=str, nargs='?', help="Text file containing the template for generating one single output out of all configuration sets")
  parser.add_argument('aggout', type=str, nargs='?', help="Name of the output aggregation file")
  parser.add_argument('-v', '--verbose', action = 'count', default = 0,
      help = "Increase the verbosity level from 0 (only error messages) to 1 (warnings), 2 (log messages), 3 (debug information) by adding the --verbose option as often as desired (e.g. '-vvv' for debug).")
  parser.add_argument('-V', '--version', action='version',
      version='GridTk version %s' % __version__)


  # parse
  if command_line_options:
    args = parser.parse_args(command_line_options[1:])
    args.wrapper_script = command_line_options[0]
  else:
    args = parser.parse_args()
    args.wrapper_script = sys.argv[0]

  # setup logging first
  logger = _setup_logger(args.verbose)

  # check
  if args.aggtmpl and not args.aggout:
    logger.error('Missing aggregate output name')
    sys.exit(1)

  # do all configurations and store
  with open(args.variables, 'rt') as f:
    args.variables = f.read()

  with open(args.gentmpl, 'rt') as f:
    args.gentmpl = f.read()

  gdata = generator.generate(args.variables, args.gentmpl)
  gname = generator.generate(args.variables, args.genout)
  for fname, data in zip(gname, gdata):
    dirname = os.path.dirname(fname)
    if dirname: tools.makedirs_safe(dirname)
    with open(fname, 'wt') as f: f.write(data)

  # if user passed aggregator, do it as well
  if args.aggtmpl and args.aggout:
    with open(args.aggtmpl, 'rt') as f:
      args.aggtmpl = f.read()
    data = generator.aggregate(args.variables, args.aggtmpl)
    dirname = os.path.dirname(args.aggout)
    if dirname: tools.makedirs_safe(dirname)
    with open(args.aggout, 'wt') as f: f.write(data)

  return 0