From 37d561185c6991aa75e92d22086604bf136f751c Mon Sep 17 00:00:00 2001 From: Manuel Gunther <siebenkopf@googlemail.com> Date: Tue, 5 Apr 2016 18:26:27 -0600 Subject: [PATCH] Polished score fusion script, added tests and documentation (fixes #13) --- .../script/{fusion_llr.py => fuse_scores.py} | 69 ++++++++++++------- bob/bio/base/test/test_scripts.py | 31 +++++++++ doc/more.rst | 21 ++++++ setup.py | 2 +- version.txt | 2 +- 5 files changed, 97 insertions(+), 28 deletions(-) rename bob/bio/base/script/{fusion_llr.py => fuse_scores.py} (57%) diff --git a/bob/bio/base/script/fusion_llr.py b/bob/bio/base/script/fuse_scores.py similarity index 57% rename from bob/bio/base/script/fusion_llr.py rename to bob/bio/base/script/fuse_scores.py index 5a1cb83d..1d989bf4 100755 --- a/bob/bio/base/script/fusion_llr.py +++ b/bob/bio/base/script/fuse_scores.py @@ -2,7 +2,8 @@ # vim: set fileencoding=utf-8 : # Laurent El Shafey <laurent.el-shafey@idiap.ch> # Elie El Khoury <elie.khoury@idiap.ch> -#Mon 13 Jul 11:55:34 CEST 2015 +# Manuel Guenther <siebenkopf@googlemail.com> +# Mon 13 Jul 11:55:34 CEST 2015 # # Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland # @@ -18,10 +19,10 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -"""This script fuses scores from various systems, -from a score file in four or five column format. +"""This script fuses scores from various systems, from a score file in four or five column format. Note: The score file has to contain the exact probe file names as the 3rd (4column) or 4th (5column) column. +The resulting fused score files will be written in 4 column format. """ @@ -29,6 +30,9 @@ Note: The score file has to contain the exact probe file names as the 3rd (4colu import bob, os, sys import bob.learn.linear +import bob.core +logger = bob.core.log.setup("bob.bio.base") + def parse_command_line(command_line_options): """Parse the program options""" @@ -38,17 +42,30 @@ def parse_command_line(command_line_options): parser = argparse.ArgumentParser(usage=usage, description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) # This option is not normally shown to the user... - parser.add_argument('--self-test', action = 'store_true', help = argparse.SUPPRESS) parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.") parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.") - parser.add_argument('-f', '--score-fused-dev-file', required = True, help = 'The calibrated development score file in 4 or 5 column format to calibrate.') - parser.add_argument('-g', '--score-fused-eval-file', help = 'The calibrated evaluation score file in 4 or 5 column format to calibrate.') - parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'), help="The style of the resulting score files. The default fits to the usual output of score files.") + parser.add_argument('-f', '--fused-dev-file', required = True, help = 'The fused development score file in 4 column format.') + parser.add_argument('-g', '--fused-eval-file', help = 'The fused evaluation score file in 4 column format.') + parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'), help = "The style of the resulting score files. The default fits to the usual output of score files.") + + parser.add_argument('-m', '--max-iterations', type=int, default=10000, help = "Select the maximum number of iterations for the LLR training") + parser.add_argument('-t', '--convergence-threshold', type=float, default=1e-10, help = "Select the convergence threshold for the LLR training") + parser.add_argument('-n', '--no-whitening', action="store_true", help = "If given, disable the score mean/std-normalization prior to fusion (this is not recommended)") + # enable logging + bob.core.log.add_command_line_option(parser) args = parser.parse_args(command_line_options) + bob.core.log.set_verbosity_level(logger, args.verbose) + + if args.eval_files is not None and len(args.eval_files) != len(args.dev_files): + raise ValueError("When --eval-files are specified, there need to be exactly one eval file for each dev file") + + if args.eval_files is not None and args.fused_eval_file is None: + raise ValueError("When --eval-files are specified, the --fused-eval-file needs to be given, too") return args + def main(command_line_options = None): """Score Fusion using Logistic regression""" args = parse_command_line(command_line_options) @@ -57,57 +74,57 @@ def main(command_line_options = None): n_systems = len(args.dev_files) for i in range(n_systems): if not os.path.isfile(args.dev_files[i]): raise IOError("The given score file does not exist") - # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)" + + # collect training data from development sets data = [] for i in range(n_systems): + logger.info("Loading development set score file '%s'", args.dev_files[i]) + # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)" data.append({'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser](args.dev_files[i])) import numpy - data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T.copy() - data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T.copy() - trainer = bob.learn.linear.CGLogRegTrainer(0.5, 1e-10, 10000) + trainer = bob.learn.linear.CGLogRegTrainer(0.5, args.convergence_threshold, args.max_iterations, mean_std_norm=not args.no_whitening) + data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T + data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T machine = trainer.train(data_neg, data_pos) # fuse development scores gen_data_dev = [] for i in range(n_systems): + logger.info("Loading development set score file '%s'", args.dev_files[i]) gen_data_dev.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.dev_files[i])) - outf = open(args.score_fused_dev_file, 'w') + logger.info("Writing fused development set score file '%s'", args.fused_dev_file) + outf = open(args.fused_dev_file, 'w') for line in gen_data_dev[0]: claimed_id = line[0] real_id = line[-3] test_label = line[-2] scores= [ line[-1] ] - for n in range(1, n_systems): + for n in range(1, n_systems): scores.append(gen_data_dev[n].next()[-1]) scores = numpy.array([scores], dtype=numpy.float64) - s_fused = machine.forward(scores)[0,0] + s_fused = machine.forward(scores)[0,0] line = claimed_id + " " + real_id + " " + test_label + " " + str(s_fused) + "\n" outf.write(line) # fuse evaluation scores if args.eval_files is not None: - if len(args.dev_files) != len(args.eval_files): - logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files)) - gen_data_eval = [] for i in range(n_systems): + logger.info("Loading evaluation set score file '%s'", args.eval_files[i]) gen_data_eval.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.eval_files[i])) - - outf = open(args.score_fused_eval_file, 'w') + + logger.info("Writing fused evaluation set score file '%s'", args.fused_eval_file) + outf = open(args.fused_eval_file, 'w') for line in gen_data_eval[0]: claimed_id = line[0] real_id = line[-3] test_label = line[-2] - scores= [ line[-1] ] - for n in range(1, n_systems): + scores= [ line[-1] ] + for n in range(1, n_systems): scores.append(gen_data_eval[n].next()[-1]) scores = numpy.array([scores], dtype=numpy.float64) - s_fused = machine.forward(scores)[0,0] + s_fused = machine.forward(scores)[0,0] line = claimed_id + " " + real_id + " " + test_label + " " + str(s_fused) + "\n" outf.write(line) - return 0 - -if __name__ == '__main__': - main(sys.argv[1:]) diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 5d002d4b..888ca886 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -261,6 +261,37 @@ def test_verify_filelist(): shutil.rmtree(test_dir) +def test_fusion(): + # tests that the fuse_scores script is doing something useful + test_dir = tempfile.mkdtemp(prefix='bobtest_') + reference_files = [os.path.join(data_dir, s) for s in ('scores-nonorm-dev', 'scores-ztnorm-dev')] + output_files = [os.path.join(test_dir, s) for s in ("fused-dev", "fused-eval")] + parameters = [ + '--dev-files', reference_files[0], reference_files[1], + '--eval-files', reference_files[0], reference_files[1], + '--fused-dev-file', output_files[0], + '--fused-eval-file', output_files[1], + '--max-iterations', '100', + '--convergence-threshold', '1e-4', + '-v' + ] + + # execute the script + from bob.bio.base.script.fuse_scores import main + try: + main(parameters) + + # assert that we can read the two files, and that they contain the same number of lines as the original file + for i in (0,1): + assert os.path.exists(output_files[i]) + r = bob.measure.load.four_column(reference_files[i]) + o = bob.measure.load.four_column(output_files[i]) + assert len(list(r)) == len(list(o)) + finally: + shutil.rmtree(test_dir) + + + def test_evaluate(): # tests our 'evaluate' script using the reference files test_dir = tempfile.mkdtemp(prefix='bobtest_') diff --git a/doc/more.rst b/doc/more.rst index 37c4489f..87a67014 100644 --- a/doc/more.rst +++ b/doc/more.rst @@ -41,6 +41,27 @@ All these three ways can be used for any of the five command line options: ``--d You can even mix these three types freely in a single command line. +Score Level Fusion of Different Algorithms on the same Database +--------------------------------------------------------------- + +In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm. +This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__. +As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/fuse_scores.py`` script, generating a new score file: + +.. code-block:: sh + + $ ./bin/fuse_scores.py --dev + +This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems. +Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``. +If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written. + +.. note:: + When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined. + +The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/evaluate.py`` script. + + .. _grid-search: Finding the Optimal Configuration diff --git a/setup.py b/setup.py index 348c59cb..25e288b4 100644 --- a/setup.py +++ b/setup.py @@ -111,7 +111,7 @@ setup( 'extract.py = bob.bio.base.script.extract:main', 'enroll.py = bob.bio.base.script.enroll:main', 'score.py = bob.bio.base.script.score:main', - 'fusion_llr.py = bob.bio.base.script.fusion_llr:main', + 'fuse_scores.py = bob.bio.base.script.fuse_scores:main', ], 'bob.bio.database': [ diff --git a/version.txt b/version.txt index 2a770f88..f177f8a8 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.0.7b0 \ No newline at end of file +2.0.7b1 -- GitLab