diff --git a/bob/bio/base/algorithm/BIC.py b/bob/bio/base/algorithm/BIC.py index ff05283400b56f7f9d90a62864f9664bbd708052..33c0e03b40aa1766cea3e23e054f444bb7dc588a 100644 --- a/bob/bio/base/algorithm/BIC.py +++ b/bob/bio/base/algorithm/BIC.py @@ -136,7 +136,7 @@ class BIC (Algorithm): def read_probe(self, probe_file): """Loads the probe feature from file, using the ``load_function`` specified in the constructor.""" - return self.load_function(bob.io.base.HDF5File(probe_file)) + return self.read_function(bob.io.base.HDF5File(probe_file)) def score(self, model, probe): diff --git a/bob/bio/base/script/grid_search.py b/bob/bio/base/script/grid_search.py new file mode 100755 index 0000000000000000000000000000000000000000..2216eed3d3ab829e1834cc85e71b55f9331a40c2 --- /dev/null +++ b/bob/bio/base/script/grid_search.py @@ -0,0 +1,435 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Manuel Guenther <Manuel.Guenther@idiap.ch> +from __future__ import print_function + +from . import verify + +import argparse, os, sys +import copy # for deep copies of dictionaries +from .. import utils + +import bob.core +logger = bob.core.log.setup("bob.bio.base") + +# the configuration read from config file +global configuration +# the place holder key given on command line +global place_holder_key +# the extracted command line arguments +global args +# the job ids as returned by the call to the faceverify function +global job_ids +# first fake job id (useful for the --dry-run option) +global fake_job_id +fake_job_id = 0 +# the number of grid jobs that are executed +global job_count +# the total number of experiments run +global task_count +# the directories, where score files will be generated +global score_directories + + +# The different steps of the processing chain. +# Use these keywords to change parameters of the specific part +steps = ['preprocess', 'extract', 'project', 'enroll', 'score'] + + +def command_line_options(command_line_parameters): + # set up command line parser + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-c', '--configuration-file', required = True, + help = 'The file containing the information what parameters you want to have tested.') + + parser.add_argument('-k', '--place-holder-key', default = '#', + help = 'The place holder key that starts the place holders which will be replaced.') + + parser.add_argument('-d', '--database', required = True, + help = 'The database that you want to execute the experiments on.') + + parser.add_argument('-P', '--protocol', + help = 'The protocol that you want to use (if not specified, the default protocol for the database is used).') + + parser.add_argument('-s', '--sub-directory', required = True, + help = 'The sub-directory where the files of the current experiment should be stored. Please specify a directory name with a name describing your experiment.') + + parser.add_argument('-p', '--preprocessor', + help = "The preprocessing to be used (will overwrite the 'preprocessor' in the configuration file)") + + parser.add_argument('-e', '--extractor', + help = "The features to be extracted (will overwrite the 'extractor' in the configuration file)") + + parser.add_argument('-a', '--algorithm', + help = "The recognition algorithms to be employed (will overwrite the 'algorithm' in the configuration file)") + + parser.add_argument('-g', '--grid', + help = 'The SGE grid configuration') + + parser.add_argument('-l', '--parallel', type=int, + help = 'Run the algorithms in parallel on the local machine, using the given number of parallel threads') + + parser.add_argument('-L', '--gridtk-database-split-level', type=int, default=-1, + help = 'Split the gridtk databases after the following level -1 - never split; 0 - preprocess; 1 - extract; 2 -- project; 3 -- enroll; 4 -- score;') + + parser.add_argument('-x', '--executable', + help = '(optional) The executable to be executed instead of facereclib/script/faceverify.py (taken *always* from the facereclib, not from the bin directory)') + + parser.add_argument('-R', '--result-directory', default = os.path.join("/idiap/user", os.environ["USER"]), + help = 'The directory where to write the resulting score files to.') + + parser.add_argument('-T', '--temp-directory', default = os.path.join("/idiap/temp", os.environ["USER"]), + help = 'The directory where to write temporary files into.') + + parser.add_argument('-i', '--preprocessed-directory', + help = '(optional) The directory where to read the already preprocessed data from (no preprocessing is performed in this case).') + + parser.add_argument('-G', '--gridtk-database-directory', default = 'grid_db', + help = 'Directory where the submitted.sql3 files should be written into (will create sub-directories on need)') + + parser.add_argument('-w', '--write-commands', + help = '(optional) The file name where to write the calls into (will not write the dependencies, though)') + + parser.add_argument('-q', '--dry-run', action='store_true', + help = 'Just write the commands to console and mimic dependencies, but do not execute the commands') + + parser.add_argument('-j', '--skip-when-existent', action='store_true', + help = 'Skip the submission/execution of jobs when the result directory already exists') + + parser.add_argument('-N', '--replace-variable', + help = 'Use the given variable instead of the "replace" keyword in the configuration file') + + parser.add_argument('parameters', nargs = argparse.REMAINDER, + help = "Parameters directly passed to the face verify script. Use -- to separate this parameters from the parameters of this script. See 'bin/verify.py --help' for a complete list of options.") + + bob.core.log.add_command_line_option(parser) + + global args + args = parser.parse_args(command_line_parameters) + bob.core.log.set_verbosity_level(logger, args.verbose) + + if args.executable: + global verify + verify = __import__('importlib').import_module(args.executable) + + + + +def extract_values(replacements, indices): + """Extracts the value dictionary from the given dictionary of replacements""" + extracted_values = {} + for place_holder in replacements.keys(): + # get all occurrences of the place holder key + parts = place_holder.split(place_holder_key) + # only one part -> no place holder key found -> no strings to be extracted + if len(parts) == 1: + continue + + keys = [part[:1] for part in parts[1:]] + + value_index = indices[place_holder] + + entries = replacements[place_holder] + entry_key = sorted(entries.keys())[value_index] + + # check that the keys are unique + for key in keys: + if key in extracted_values: + raise ValueError("The replacement key '%s' was defined multiple times. Please use each key only once."%key) + + # extract values + if len(keys) == 1: + extracted_values[keys[0]] = entries[entry_key] + + else: + for i in range(len(keys)): + extracted_values[keys[i]] = entries[entry_key][i] + + return extracted_values + + +def replace(string, replacements): + """Replaces the place holders in the given string with the according values from the values dictionary.""" + # get all occurrences of the place holder key + parts = string.split(place_holder_key) + # only one part -> no place holder key found -> return the whole string + if len(parts) == 1: + return string + + keys = [part[:1] for part in parts[1:]] + + retval = parts[0] + for i in range(0, len(keys)): + # replace the place holder by the desired string and add the remaining of the command + retval += str(replacements[keys[i]]) + str(parts[i+1][1:]) + + return retval + + +def create_command_line(replacements): + """Creates the parameters for the function call that will be given to the faceverify script.""" + # get the values to be replaced with + values = {} + for key in configuration.replace: + values.update(extract_values(configuration.replace[key], replacements)) + # replace the place holders with the values + call = [sys.argv[0], '--database', args.database] + if args.protocol: + call += ['--protocol', args.protocol] + call += ['--temp-directory', args.temp_directory, '--result-directory', args.result_directory] + return call + [ + '--preprocessor', replace(configuration.preprocessor, values), + '--extractor', replace(configuration.extractor, values), + '--algorithm', replace(configuration.algorithm, values), + '--imports' + ] + configuration.imports + + + +# Parts that could be skipped when the dependecies are on the indexed level +skips = [[''], + ['--skip-preprocessing'], + ['--skip-extractor-training', '--skip-extraction'], + ['--skip-projector-training', '--skip-projection'], + ['--skip-enroller-training', '--skip-enrollment'] + ] + +# The keywords to parse the job ids to get the according dependencies right +dependency_keys = ['DUMMY', 'preprocess', 'extract', 'project', 'enroll'] + + +def directory_parameters(directories): + """This function generates the faceverify parameters that define the directories, where the data is stored. + The directories are set such that data is reused whenever possible, but disjoint if needed.""" + def _join_dirs(index, subdir): + # collect sub-directories + dirs = [] + for i in range(index+1): + dirs += directories[steps[i]] + if not dirs: + return subdir + else: + dir = dirs[0] + for d in dirs[1:]: + dir = os.path.join(dir, d) + return os.path.join(dir, subdir) + + global args + parameters = [] + + # add directory parameters + # - preprocessing + if args.preprocessed_directory: + parameters += ['--preprocessed-directory', os.path.join(args.preprocessed_directory, _join_dirs(0, 'preprocessed'))] + skips[1] + else: + parameters += ['--preprocessed-directory', _join_dirs(0, 'preprocessed')] + + # - feature extraction + parameters += ['--extracted-directory', _join_dirs(1, 'extracted'), '--extractor-file', _join_dirs(1, 'Extractor.hdf5')] + + # - feature projection + parameters += ['--projected-directory', _join_dirs(2, 'projected'), '--projector-file', _join_dirs(2, 'Projector.hdf5')] + + # - model enrollment + parameters += ['--model-directories', _join_dirs(3, 'N-Models'), _join_dirs(3, 'T-Models'), '--enroller-file', _join_dirs(3, 'Enroller.hdf5')] + + # the sub-dorectory, given on command line + parameters += ['--sub-directory', args.sub_directory] + + global score_directories + score_directories.append(_join_dirs(4, '.')) + + # grid database + if args.grid is not None or args.parallel is not None: + # we get one database per preprocessing job (all others might have job inter-dependencies) + parameters += ['--gridtk-database-file', os.path.join(args.gridtk_database_directory, _join_dirs(args.gridtk_database_split_level, 'submitted.sql3'))] + + return parameters + + +def check_requirements(replacements): + # check if the requirement are met + global configuration + values = {} + for key in configuration.replace: + values.update(extract_values(configuration.replace[key], replacements)) + for requirement in configuration.requirements: + test = replace(requirement, values) + while not isinstance(test, bool): + test = eval(test) + if not test: + return False + return True + + +def execute_dependent_task(command_line, directories, dependency_level): + # add other command line arguments + if args.grid: + command_line += ['--grid', args.grid, '--stop-on-failure'] + if args.parallel: + command_line += ['--grid', 'bob.bio.base.grid.Grid("local", number_of_parallel_processes=%d)' % args.parallel, '--run-local-scheduler', '--stop-on-failure'] + + if args.verbose: + command_line += ['-' + 'v'*args.verbose] + + # create directory parameters + command_line += directory_parameters(directories) + + # add skip parameters according to the dependency level + for i in range(1, dependency_level+1): + command_line += skips[i] + + if args.parameters is not None: + command_line += args.parameters[1:] + + # write the command to file? + if args.write_commands: + index = command_line.index('--gridtk-database-file') + command_file = os.path.join(os.path.dirname(command_line[index+1]), args.write_commands) + bob.io.base.create_directories_safe(os.path.dirname(command_file)) + with open(command_file, 'w') as f: + f.write('bin/verify.py ') + for p in command_line[1:]: + f.write(p + ' ') + f.close() + logger.info("Wrote command line into file '%s'", command_file) + + # extract dependencies + global job_ids + dependencies = [] + for k in sorted(job_ids.keys()): + for i in range(1, dependency_level+1): + if k.find(dependency_keys[i]) != -1: + dependencies.append(job_ids[k]) + + # add dependencies + if dependencies: + command_line += ['--external-dependencies'] + [str(d) for d in dependencies] + + # execute the command + new_job_ids = {} + try: + verif_args = verify.parse_arguments(command_line[1:]) + result_dir = os.path.join(verif_args.result_directory, verif_args.sub_directory) + if not args.skip_when_existent or not os.path.exists(result_dir): + # get the command line parameter for the result directory + if args.dry_run: + if args.verbose: + print ("Would have executed job", utils.command_line(command_line)) + else: + # execute the face verification experiment + global fake_job_id + new_job_ids = verify.verify(verif_args, command_line, external_fake_job_id = fake_job_id) + else: + logger.info("Skipping execution of %s since result directory '%s' already exists", utils.command_line(command_line), result_dir) + + except Exception as e: + logger.error("The execution of job was rejected!\n%s\n Reason:\n%s", " ".join(command_line), e) + + # some statistics + global job_count, task_count + job_count += len(new_job_ids) + task_count += 1 + fake_job_id += 100 + job_ids.update(new_job_ids) + + +def create_recursive(replace_dict, step_index, directories, dependency_level, keys=[]): + """Iterates through all the keywords and replaces all place holders with all keywords in a defined order.""" + + # check if we are at the lowest level + if step_index == len(steps): + # create a call and execute it + if check_requirements(replace_dict): + execute_dependent_task(create_command_line(replace_dict), directories, dependency_level) + else: + if steps[step_index] not in directories: + directories[steps[step_index]] = [] + + # we are at another level + if steps[step_index] not in configuration.replace.keys(): + # nothing to be replaced here, so just go to the next level + create_recursive(replace_dict, step_index+1, directories, dependency_level) + else: + # iterate through the keys + if keys == []: + # call this function recursively by defining the set of keys that we need + create_recursive(replace_dict, step_index, directories, dependency_level, keys = sorted(configuration.replace[steps[step_index]].keys())) + else: + # create a deep copy of the replacement dict to be able to modify it + replace_dict_copy = copy.deepcopy(replace_dict) + directories_copy = copy.deepcopy(directories) + # iterate over all replacements for the first of the keys + key = keys[0] + replacement_directories = sorted(configuration.replace[steps[step_index]][key]) + directories_copy[steps[step_index]].append("") + new_dependency_level = dependency_level + for replacement_index in range(len(replacement_directories)): + # increase the counter of the current replacement + replace_dict_copy[key] = replacement_index + directories_copy[steps[step_index]][-1] = replacement_directories[replacement_index] + # call the function recursively + if len(keys) == 1: + # we have to go to the next level + create_recursive(replace_dict_copy, step_index+1, directories_copy, new_dependency_level) + else: + # we have to subtract the keys + create_recursive(replace_dict_copy, step_index, directories_copy, new_dependency_level, keys = keys[1:]) + new_dependency_level = step_index + + +def main(command_line_parameters = sys.argv): + """Main entry point for the parameter test. Try --help to see the parameters that can be specified.""" + + global task_count, job_count, job_ids, score_directories + job_count = 0 + task_count = 0 + job_ids = {} + score_directories = [] + + command_line_options(command_line_parameters[1:]) + + global configuration, place_holder_key + configuration = utils.read_config_file(args.configuration_file) + place_holder_key = args.place_holder_key + + if args.preprocessor: + configuration.preprocessor = args.preprocessor + if args.extractor: + configuration.extractor = args.extractor + if args.algorithm: + configuration.algorithm = args.algorithm + + if args.replace_variable is not None: + exec("configuration.replace = configuration.%s" % args.replace_variable) + + for attribute in ('preprocessor', 'extractor', 'algorithm'): + if not hasattr(configuration, attribute): + raise ValueError("The given configuration file '%s' does not contain the required attribute '%s', and it was not given on command line either" %(args.configuration_file, attribute)) + + # extract the dictionary of replacements from the configuration + if not hasattr(configuration, 'replace'): + raise ValueError("Please define a set of replacements using the 'replace' keyword.") + if not hasattr(configuration, 'imports'): + configuration.imports = ['bob.bio.base'] + logger.info("No 'imports' specified in configuration file '%s' -> using default %s", args.configuration_file, configuration.imports) + + if not hasattr(configuration, 'requirements'): + configuration.requirements = [] + + replace_dict = {} + for step, replacements in configuration.replace.items(): + for key in replacements.keys(): + if key in replace_dict: + raise ValueError("The replacement key '%s' was defined multiple times. Please use each key only once.") + # we always start with index 0. + replace_dict[key] = 0 + + # now, iterate through the list of replacements and create the according calls + create_recursive(replace_dict, step_index = 0, directories = {}, dependency_level = 0) + + # finally, write some information about the + logger.info("The number of executed tasks is: %d, which are split up into %d jobs that are executed in the grid" %(task_count, job_count)) + + return score_directories diff --git a/bob/bio/base/script/verify.py b/bob/bio/base/script/verify.py index 41f90e4002bfb7abc443c58cce632e559f6c3b77..a699f13d685605790f478aede29354f8c6c2594d 100644 --- a/bob/bio/base/script/verify.py +++ b/bob/bio/base/script/verify.py @@ -378,7 +378,7 @@ def verify(args, command_line_parameters, external_fake_job_id = 0): return {} else: # add jobs - submitter = tools.GridSubmission(args, command_line_parameters, first_fake_job_id = 0) if args.grid else None + submitter = tools.GridSubmission(args, command_line_parameters, first_fake_job_id = external_fake_job_id) if args.grid else None retval = add_jobs(args, submitter) tools.write_info(args, command_line_parameters) diff --git a/bob/bio/base/test/dummy/algorithm.py b/bob/bio/base/test/dummy/algorithm.py index 61c874b25a626112706eef9051abfcdb6f3a45b2..d1d25d487ea756a2ead758fabd266cb64853cd0a 100644 --- a/bob/bio/base/test/dummy/algorithm.py +++ b/bob/bio/base/test/dummy/algorithm.py @@ -8,7 +8,7 @@ _data = [5., 6., 7., 8., 9.] class DummyAlgorithm (Algorithm): """This class is used to test all the possible functions of the tool chain, but it does basically nothing.""" - def __init__(self): + def __init__(self, **kwargs): """Generates a test value that is read and written""" # call base class constructor registering that this tool performs everything. diff --git a/bob/bio/base/test/dummy/extractor.py b/bob/bio/base/test/dummy/extractor.py index 0b95e046717a087fa4bb897b161340a1dff42541..a3aaf6f7ea04347393db8cbc8efd1dac95e98000 100644 --- a/bob/bio/base/test/dummy/extractor.py +++ b/bob/bio/base/test/dummy/extractor.py @@ -6,7 +6,7 @@ from bob.bio.base.extractor import Extractor _data = [0., 1., 2., 3., 4.] class DummyExtractor (Extractor): - def __init__(self): + def __init__(self, **kwargs): Extractor.__init__(self, requires_training=True) self.model = False diff --git a/bob/bio/base/test/dummy/grid_search.py b/bob/bio/base/test/dummy/grid_search.py new file mode 100755 index 0000000000000000000000000000000000000000..76f8e9a2eb5fddf6473e369bb6b658f06a5517f4 --- /dev/null +++ b/bob/bio/base/test/dummy/grid_search.py @@ -0,0 +1,49 @@ +#!/bin/python + +# This file describes an exemplary configuration file that can be used in combination with the bin/parameter_test.py script. + + +# The preprocessor uses two fake parameters, which are called #1 and #4 +preprocessor = "bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(fake_parameter=#1, other_parameters=#4)" + +# The extractor uses the **default** 'dummy' option, which is registered as a resource +extractor = "dummy" + +# The algorithm uses two fake parameters, which are called #2 and #3 +algorithm = "bob.bio.base.test.dummy.algorithm.DummyAlgorithm(fake_distance_function=#2, is_distance_function=#3)" + + +# Here, we define, which placeholder keys (#.) should be replaces by which values in which stage of the processing toolchain +replace = { + # For preprocessing, select two independent dummy parameters + 'preprocess' : { + # Fake parameter to be selected for placeholder #1 + "#1" : { + 'P1' : 10, + 'P2' : 20, + }, + # fake parameter to be selected for placeholder #4 + "#4" : { + 'F1' : 15, + 'F2' : 30 + } + }, + + # For scoring, select two dependent dummy parameters + 'score' : { + # Replace placeholders #2 and #3 **at the same time** + "(#2, #3)" : { + # For distance_function = 'bob.math.histogram_intersection' and is_distance_function = False, place result in sub-directory 'D1' + 'S1' : ('bob.math.histogram_intersection', 'False'), + # For distance_function = 'bob.math.chi_square' and is_distance_function = True, place result in sub-directory 'D2' + 'S2' : ('bob.math.chi_square', 'True') + } + } +} + +# An optional list of requirements +# If these requirements are not fulfilled for the current values of #1 and #4, these experiments will not be executed. +requirements = ["2*#1 > #4"] + +# A list of imports that are required to use the defined preprocessor, extractor and algorithm from above +imports = ['bob.math', 'bob.bio.base.test.dummy'] diff --git a/bob/bio/base/test/dummy/preprocessor.py b/bob/bio/base/test/dummy/preprocessor.py index 2770e2fe37fcb66053da2eba02cca3bc1ad71b27..9584d9fd34562decdb63e9d058105200a8dee1f5 100644 --- a/bob/bio/base/test/dummy/preprocessor.py +++ b/bob/bio/base/test/dummy/preprocessor.py @@ -1,7 +1,7 @@ from bob.bio.base.preprocessor import Preprocessor class DummyPreprocessor (Preprocessor): - def __init__(self): + def __init__(self, **kwargs): Preprocessor.__init__(self) def __call__(self, data, annotation): diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 05d8feb23ade7c136eb60766eb907480a2d2cbf7..cb5acba33fba78da2fa690aa5e1bd297db0952b5 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -136,7 +136,7 @@ def test_verify_parallel(): '-s', 'test_parallel', '--temp-directory', test_dir, '--result-directory', test_dir, - '-g', 'bob.bio.base.grid.Grid(grid = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '-R', + '-g', 'bob.bio.base.grid.Grid(grid = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', '--import', 'bob.io.image' ] @@ -279,31 +279,10 @@ def test_evaluate(): os.rmdir(test_dir) -""" -def test11_baselines_api(self): - self.grid_available() - # test that all of the baselines would execute - from facereclib.script.baselines import available_databases, all_algorithms, main - - for database in available_databases: - parameters = [sys.argv[0], '-d', database, '--dry-run'] - main(parameters) - parameters.append('-g') - main(parameters) - parameters.extend(['-e', 'HTER']) - main(parameters) - - for algorithm in all_algorithms: - parameters = [sys.argv[0], '-a', algorithm, '--dry-run'] - main(parameters) - parameters.append('-g') - main(parameters) - parameters.extend(['-e', 'HTER']) - main(parameters) - +""" def test16_collect_results(self): # simply test that the collect_results script works test_dir = tempfile.mkdtemp(prefix='bobtest_') @@ -311,52 +290,75 @@ def test16_collect_results(self): main(['--directory', test_dir, '--sort', '--sort-key', 'dir', '--criterion', 'FAR', '--self-test']) os.rmdir(test_dir) +""" -def test21_parameter_script(self): - self.grid_available() +@utils.grid_available +def test_grid_search(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # tests that the parameter_test.py script works properly - # first test without grid option - parameters = [ - sys.argv[0], - '-c', os.path.join(base_dir, 'scripts', 'parameter_Test.py'), - '-d', os.path.join(base_dir, 'scripts', 'atnt_Test.py'), - '-f', 'lgbphs', - '-b', 'test_p', - '-s', '.', - '-T', test_dir, - '-R', test_dir, - '--', '--dry-run', - ] - from facereclib.script.parameter_test import main - main(parameters) + try: + # first test without grid option + parameters = [ + sys.argv[0], + '-c', os.path.join(dummy_dir, 'grid_search.py'), + '-d', 'dummy', + '-e', 'dummy', + '-s', 'test_grid_search', + '-T', test_dir, + '-R', test_dir, + '--', '--dry-run', + ] + from bob.bio.base.script.grid_search import main + main(parameters) - # number of jobs should be 12 - self.assertEqual(facereclib.script.parameter_test.task_count, 12) - # but no job in the grid - self.assertEqual(facereclib.script.parameter_test.job_count, 0) + # number of jobs should be 12 + assert bob.bio.base.script.grid_search.task_count == 6 + # but no job in the grid + assert bob.bio.base.script.grid_search.job_count == 0 + + # now, in the grid... + parameters = [ + sys.argv[0], + '-c', os.path.join(dummy_dir, 'grid_search.py'), + '-d', 'dummy', + '-s', 'test_grid_search', + '-i', '.', + '-G', test_dir, + '-T', test_dir, + '-R', test_dir, + '-w', 'Job.txt', + '-g', 'grid', + '--', '--dry-run', + ] + main(parameters) - # now, in the grid... - parameters = [ - sys.argv[0], - '-c', os.path.join(base_dir, 'scripts', 'parameter_Test.py'), - '-d', os.path.join(base_dir, 'scripts', 'atnt_Test.py'), - '-f', 'lgbphs', - '-b', 'test_p', - '-i', '.', - '-s', '.', - '-T', test_dir, - '-R', test_dir, - '-g', 'grid', - '--', '--dry-run', - ] - main(parameters) + # number of jobs should be 12 + assert bob.bio.base.script.grid_search.task_count == 6 + # number of jobs in the grid: 36 (including best possible re-use of files; minus preprocessing) + assert bob.bio.base.script.grid_search.job_count == 30 + + # and now, finally run locally + parameters = [ + sys.argv[0], + '-c', os.path.join(dummy_dir, 'grid_search.py'), + '-d', 'dummy', + '-s', 'test_grid_search', + '-G', test_dir, + '-T', test_dir, + '-R', test_dir, + '-w', 'Job.txt', + '-l', '4', '-L', '-1', '-vv', + '--', '--imports', 'bob.io.image', + '--dry-run' + ] + main(parameters) - # number of jobs should be 12 - self.assertEqual(facereclib.script.parameter_test.task_count, 12) - # number of jobs in the grid: 36 (including best possible re-use of files; minus preprocessing) - self.assertEqual(facereclib.script.parameter_test.job_count, 36) + # number of jobs should be 12 + assert bob.bio.base.script.grid_search.task_count == 6 + # number of jobs in the grid: 36 (including best possible re-use of files; minus preprocessing) + assert bob.bio.base.script.grid_search.job_count == 0 - shutil.rmtree(test_dir) -""" + + finally: + shutil.rmtree(test_dir) diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py index d39d91d0a89530af5a50b494e695806f3871bd66..b2da327edcea91e476037c8f16df50936aece3f1 100644 --- a/bob/bio/base/tools/command_line.py +++ b/bob/bio/base/tools/command_line.py @@ -52,7 +52,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): dir_group = parser.add_argument_group('\nDirectories that can be changed according to your requirements') dir_group.add_argument('-T', '--temp-directory', metavar = 'DIR', help = 'The directory for temporary files, default is: %s.' % temp) - dir_group.add_argument('-U', '--result-directory', metavar = 'DIR', + dir_group.add_argument('-R', '--result-directory', metavar = 'DIR', help = 'The directory for resulting score files, default is: %s.' % results) file_group = parser.add_argument_group('\nName (maybe including a path relative to the --temp-directory, if not specified otherwise) of files that will be generated. Note that not all files will be used by all algorithms') @@ -62,7 +62,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): help = 'Name of the file to write the feature projector into.') file_group.add_argument('--enroller-file' , metavar = 'FILE', default = 'Enroller.hdf5', help = 'Name of the file to write the model enroller into.') - file_group.add_argument('-G', '--gridtk-db-file', metavar = 'FILE', default = 'submitted.sql3', + file_group.add_argument('-G', '--gridtk-database-file', metavar = 'FILE', default = 'submitted.sql3', help = 'The database file in which the submitted jobs will be written; relative to the current directory (only valid with the --grid option).') file_group.add_argument('--experiment-info-file', metavar = 'FILE', default = 'Experiment.info', help = 'The file where the configuration of all parts of the experiments are written; relative to te --result-directory.') @@ -80,7 +80,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): help = 'Name of the directory (relative to --result-directory) where to write the results to') sub_dir_group.add_argument('--zt-directories', metavar = 'DIR', nargs = 5, default = ['zt_norm_A', 'zt_norm_B', 'zt_norm_C', 'zt_norm_D', 'zt_norm_D_sameValue'], help = 'Name of the directories (of --temp-directory) where to write the ZT-norm values; only used with --zt-norm') - sub_dir_group.add_argument('--grid-log-directory', metavar = 'DIR', default = 'grid_tk_logs', + sub_dir_group.add_argument('--grid-log-directory', metavar = 'DIR', default = 'gridtk_logs', help = 'Name of the directory (relative to --temp-directory) where to log files are written; only used with --grid') flag_group = parser.add_argument_group('\nFlags that change the behavior of the experiment') @@ -91,8 +91,8 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): help = 'Force to erase former data if already exist') flag_group.add_argument('-Z', '--write-compressed-score-files', action='store_true', help = 'Writes score files which are compressed with tar.bz2.') - flag_group.add_argument('-R', '--delete-dependent-jobs-on-failure', action='store_true', - help = 'Try to recursively delete the dependent jobs from the SGE grid queue, when a job failed') + flag_group.add_argument('-S', '--stop-on-failure', action='store_true', + help = 'Try to recursively stop the dependent jobs from the SGE grid queue, when a job failed') flag_group.add_argument('-X', '--external-dependencies', type=int, default = [], nargs='+', help = 'The jobs submitted to the grid have dependencies on the given job ids.') flag_group.add_argument('-D', '--timer', choices=('real', 'system', 'user'), nargs = '*', @@ -182,6 +182,45 @@ def initialize(parsers, command_line_parameters = None, skips = []): enroller_sub_dir = protocol if args.database.training_depends_on_protocol and args.algorithm.requires_enroller_training else projector_sub_dir model_sub_dir = protocol if args.database.models_depend_on_protocol else enroller_sub_dir + + # IDIAP-Private directories, which should be automatically replaced + if is_idiap: + images = { + 'ARFACE' : "/idiap/resource/database/AR_Face/images", + 'ATNT' : "/idiap/group/biometric/databases/orl", + 'BANCA' : "/idiap/group/biometric/databases/banca/english/images/images", + 'CAS-PEAL' : "/idiap/resource/database/CAS-PEAL", + 'FRGC' : "/idiap/resource/database/frgc/FRGC-2.0-dist", + 'MBGC-V1' : "/idiap/resource/database/MBGC-V1", + 'LFW' : "/idiap/resource/database/lfw/all_images_aligned_with_funneling", + 'MOBIO_IMAGE' : "/idiap/resource/database/mobio/IMAGES_PNG", + 'MULTI-PIE_IMAGE' : "/idiap/resource/database/Multi-Pie/data", + 'SC_FACE' : "/idiap/group/biometric/databases/scface/images", + 'XM2VTS' : "/idiap/resource/database/xm2vtsdb/images", + } + + annotations = { + 'MOBIO_ANNOTATION' : "/idiap/resource/database/mobio/IMAGE_ANNOTATIONS", + 'MULTI-PIE_ANNOTATION' : "/idiap/group/biometric/annotations/multipie", + } + + try: + for d in images: + if args.database.original_directory == "[YOUR_%s_DIRECTORY]" % d: + args.database.original_directory = images[d] + args.database.database.original_directory = images[d] + except AttributeError: + pass + + try: + for d in annotations: + if args.database.annotation_directory == "[YOUR_%s_DIRECTORY]" % d: + args.database.annotation_directory = annotations[d] + args.database.database.annotation_directory = annotations[d] + except AttributeError: + pass + + # initialize the file selector FileSelector.create( database = args.database, diff --git a/bob/bio/base/tools/grid.py b/bob/bio/base/tools/grid.py index 8a803378977fb6c0048746f18bee323bf78fac25..2eaf27ca598bfd790e3bb4060e3503d88f14894c 100644 --- a/bob/bio/base/tools/grid.py +++ b/bob/bio/base/tools/grid.py @@ -56,7 +56,7 @@ class GridSubmission: # setup logger bob.core.log.set_verbosity_level(bob.core.log.setup("gridtk"), args.verbose) Manager = gridtk.local.JobManagerLocal if args.grid.is_local() else gridtk.sge.JobManagerSGE - self.job_manager = Manager(database = args.gridtk_db_file, wrapper_script=jman) + self.job_manager = Manager(database = args.gridtk_database_file, wrapper_script=jman) self.submitted_job_ids = [] def submit(self, command, number_of_parallel_jobs = 1, dependencies=[], name = None, **kwargs): @@ -89,7 +89,7 @@ class GridSubmission: array = array, dependencies = dependencies, log_dir = log_dir, - stop_on_failure = self.args.delete_dependent_jobs_on_failure, + stop_on_failure = self.args.stop_on_failure, **kwargs ) logger.info("submitted: job '%s' with id '%d' and dependencies '%s'" % (name, job_id, dependencies)) diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py index d14cb4fecf5db794eefb20b51875d7e5b5a6b762..06aecdbd136275da31a78c6240a48c61e368c5a1 100644 --- a/bob/bio/base/tools/scoring.py +++ b/bob/bio/base/tools/scoring.py @@ -112,7 +112,7 @@ def _scores_a(algorithm, model_ids, group, compute_zt_norm, force, write_compres current_probe_objects = fs.probe_objects_for_model(model_id, group) model = algorithm.read_model(fs.model_file(model_id, group)) # get the probe files - current_probe_files = fs.get_paths(current_probe_objects, 'projected' if algorithm.performs_projection else 'features') + current_probe_files = fs.get_paths(current_probe_objects, 'projected' if algorithm.performs_projection else 'extracted') # compute scores a = _scores(algorithm, model, current_probe_files) @@ -131,7 +131,7 @@ def _scores_b(algorithm, model_ids, group, force): # probe files: z_probe_objects = fs.z_probe_objects(group) - z_probe_files = fs.get_paths(z_probe_objects, 'projected' if algorithm.performs_projection else 'features') + z_probe_files = fs.get_paths(z_probe_objects, 'projected' if algorithm.performs_projection else 'extracted') logger.info("- Scoring: computing score matrix B for group '%s'", group) diff --git a/setup.py b/setup.py index b133a06913a4d7c4aa9a495eaa8aca30369349db..95a244d32826b17b629ace4736f8e5b0138a6635 100644 --- a/setup.py +++ b/setup.py @@ -106,6 +106,7 @@ setup( 'verify.py = bob.bio.base.script.verify:main', 'resources.py = bob.bio.base.script.resources:main', 'evaluate.py = bob.bio.base.script.evaluate:main', + 'grid_search.py = bob.bio.base.script.grid_search:main', ], 'bob.bio.database': [