From 37d561185c6991aa75e92d22086604bf136f751c Mon Sep 17 00:00:00 2001
From: Manuel Gunther <>
Date: Tue, 5 Apr 2016 18:26:27 -0600
Subject: [PATCH] Polished score fusion script, added tests and documentation
 (fixes #13)

 .../script/{ =>}  | 69 ++++++++++++-------
 bob/bio/base/test/             | 31 +++++++++
 doc/more.rst                                  | 21 ++++++                                      |  2 +-
 version.txt                                   |  2 +-
 5 files changed, 97 insertions(+), 28 deletions(-)
 rename bob/bio/base/script/{ =>} (57%)

diff --git a/bob/bio/base/script/ b/bob/bio/base/script/
similarity index 57%
rename from bob/bio/base/script/
rename to bob/bio/base/script/
index 5a1cb83d..1d989bf4 100755
--- a/bob/bio/base/script/
+++ b/bob/bio/base/script/
@@ -2,7 +2,8 @@
 # vim: set fileencoding=utf-8 :
 # Laurent El Shafey <>
 # Elie El Khoury <>
-#Mon 13 Jul 11:55:34 CEST 2015
+# Manuel Guenther <>
+# Mon 13 Jul 11:55:34 CEST 2015
 # Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
@@ -18,10 +19,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <>.
-"""This script fuses scores from various systems,
-from a score file in four or five column format.
+"""This script fuses scores from various systems, from a score file in four or five column format.
 Note: The score file has to contain the exact probe file names as the 3rd (4column) or 4th (5column) column.
+The resulting fused score files will be written in 4 column format.
@@ -29,6 +30,9 @@ Note: The score file has to contain the exact probe file names as the 3rd (4colu
 import bob, os, sys
 import bob.learn.linear
+import bob.core
+logger = bob.core.log.setup("")
 def parse_command_line(command_line_options):
   """Parse the program options"""
@@ -38,17 +42,30 @@ def parse_command_line(command_line_options):
   parser = argparse.ArgumentParser(usage=usage, description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   # This option is not normally shown to the user...
-  parser.add_argument('--self-test', action = 'store_true', help = argparse.SUPPRESS)
   parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.")
   parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.")
-  parser.add_argument('-f', '--score-fused-dev-file', required = True, help = 'The calibrated development score file in 4 or 5 column format to calibrate.')
-  parser.add_argument('-g', '--score-fused-eval-file', help = 'The calibrated evaluation score file in 4 or 5 column format to calibrate.')
-  parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'),  help="The style of the resulting score files. The default fits to the usual output of score files.")
+  parser.add_argument('-f', '--fused-dev-file', required = True, help = 'The fused development score file in 4 column format.')
+  parser.add_argument('-g', '--fused-eval-file', help = 'The fused evaluation score file in 4 column format.')
+  parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'),  help = "The style of the resulting score files. The default fits to the usual output of score files.")
+  parser.add_argument('-m', '--max-iterations', type=int, default=10000, help = "Select the maximum number of iterations for the LLR training")
+  parser.add_argument('-t', '--convergence-threshold', type=float, default=1e-10, help = "Select the convergence threshold for the LLR training")
+  parser.add_argument('-n', '--no-whitening', action="store_true", help = "If given, disable the score mean/std-normalization prior to fusion (this is not recommended)")
+  # enable logging
+  bob.core.log.add_command_line_option(parser)
   args = parser.parse_args(command_line_options)
+  bob.core.log.set_verbosity_level(logger, args.verbose)
+  if args.eval_files is not None and len(args.eval_files) != len(args.dev_files):
+    raise ValueError("When --eval-files are specified, there need to be exactly one eval file for each dev file")
+  if args.eval_files is not None and args.fused_eval_file is None:
+    raise ValueError("When --eval-files are specified, the --fused-eval-file needs to be given, too")
   return args
 def main(command_line_options = None):
   """Score Fusion using Logistic regression"""
   args = parse_command_line(command_line_options)
@@ -57,57 +74,57 @@ def main(command_line_options = None):
   n_systems = len(args.dev_files)
   for i in range(n_systems):
     if not os.path.isfile(args.dev_files[i]): raise IOError("The given score file does not exist")
-  # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)"
+  # collect training data from development sets
   data = []
   for i in range(n_systems):
+"Loading development set score file '%s'", args.dev_files[i])
+    # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)"
     data.append({'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser](args.dev_files[i]))
   import numpy
-  data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T.copy()
-  data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T.copy()
-  trainer = bob.learn.linear.CGLogRegTrainer(0.5, 1e-10, 10000)
+  trainer = bob.learn.linear.CGLogRegTrainer(0.5, args.convergence_threshold, args.max_iterations, mean_std_norm=not args.no_whitening)
+  data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T
+  data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T
   machine = trainer.train(data_neg, data_pos)
   # fuse development scores
   gen_data_dev = []
   for i in range(n_systems):
+"Loading development set score file '%s'", args.dev_files[i])
     gen_data_dev.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.dev_files[i]))
-  outf = open(args.score_fused_dev_file, 'w')
+"Writing fused development set score file '%s'", args.fused_dev_file)
+  outf = open(args.fused_dev_file, 'w')
   for line in gen_data_dev[0]:
     claimed_id = line[0]
     real_id = line[-3]
     test_label = line[-2]
     scores= [ line[-1] ]
-    for n in range(1, n_systems): 
+    for n in range(1, n_systems):
     scores = numpy.array([scores], dtype=numpy.float64)
-    s_fused = machine.forward(scores)[0,0]  
+    s_fused = machine.forward(scores)[0,0]
     line = claimed_id + " " + real_id + " " + test_label + " "  + str(s_fused) + "\n"
   # fuse evaluation scores
   if args.eval_files is not None:
-    if len(args.dev_files) != len(args.eval_files):
-      logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files))
     gen_data_eval = []
     for i in range(n_systems):
+"Loading evaluation set score file '%s'", args.eval_files[i])
       gen_data_eval.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.eval_files[i]))
-    outf = open(args.score_fused_eval_file, 'w')
+"Writing fused evaluation set score file '%s'", args.fused_eval_file)
+    outf = open(args.fused_eval_file, 'w')
     for line in gen_data_eval[0]:
       claimed_id = line[0]
       real_id = line[-3]
       test_label = line[-2]
-      scores= [ line[-1] ] 
-      for n in range(1, n_systems): 
+      scores= [ line[-1] ]
+      for n in range(1, n_systems):
       scores = numpy.array([scores], dtype=numpy.float64)
-      s_fused = machine.forward(scores)[0,0]  
+      s_fused = machine.forward(scores)[0,0]
       line = claimed_id + " " + real_id + " " + test_label + " "  + str(s_fused) + "\n"
-  return 0
-if __name__ == '__main__':
-  main(sys.argv[1:])
diff --git a/bob/bio/base/test/ b/bob/bio/base/test/
index 5d002d4b..888ca886 100644
--- a/bob/bio/base/test/
+++ b/bob/bio/base/test/
@@ -261,6 +261,37 @@ def test_verify_filelist():
+def test_fusion():
+  # tests that the fuse_scores script is doing something useful
+  test_dir = tempfile.mkdtemp(prefix='bobtest_')
+  reference_files = [os.path.join(data_dir, s) for s in ('scores-nonorm-dev', 'scores-ztnorm-dev')]
+  output_files = [os.path.join(test_dir, s) for s in ("fused-dev", "fused-eval")]
+  parameters = [
+    '--dev-files', reference_files[0], reference_files[1],
+    '--eval-files', reference_files[0], reference_files[1],
+    '--fused-dev-file', output_files[0],
+    '--fused-eval-file', output_files[1],
+    '--max-iterations', '100',
+    '--convergence-threshold', '1e-4',
+    '-v'
+  ]
+  # execute the script
+  from import main
+  try:
+    main(parameters)
+    # assert that we can read the two files, and that they contain the same number of lines as the original file
+    for i in (0,1):
+      assert os.path.exists(output_files[i])
+      r = bob.measure.load.four_column(reference_files[i])
+      o = bob.measure.load.four_column(output_files[i])
+      assert len(list(r)) == len(list(o))
+  finally:
+    shutil.rmtree(test_dir)
 def test_evaluate():
   # tests our 'evaluate' script using the reference files
   test_dir = tempfile.mkdtemp(prefix='bobtest_')
diff --git a/doc/more.rst b/doc/more.rst
index 37c4489f..87a67014 100644
--- a/doc/more.rst
+++ b/doc/more.rst
@@ -41,6 +41,27 @@ All these three ways can be used for any of the five command line options: ``--d
 You can even mix these three types freely in a single command line.
+Score Level Fusion of Different Algorithms on the same Database
+In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm.
+This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <>`__.
+As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/`` script, generating a new score file:
+.. code-block:: sh
+   $ ./bin/ --dev
+This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems.
+Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``.
+If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written.
+.. note::
+   When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined.
+The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/`` script.
 .. _grid-search:
 Finding the Optimal Configuration
diff --git a/ b/
index 348c59cb..25e288b4 100644
--- a/
+++ b/
@@ -111,7 +111,7 @@ setup(
         '        =',
         '         =',
         '          =',
-        '     =',
+        '    =',
       '': [
diff --git a/version.txt b/version.txt
index 2a770f88..f177f8a8 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
\ No newline at end of file