Polished score fusion script, added tests and documentation (fixes #13)

37d56118 · Manuel Günther · b6cbf2b5 · 37d56118 · 37d56118 · 37d56118
Commit 37d56118 authored 9 years ago by Manuel Günther
--- a/bob/bio/base/script/fusion_llr.py
+++ b/bob/bio/base/script/fusion_llr.py
@@ -2,7 +2,8 @@
 # vim: set fileencoding=utf-8 :
 # Laurent El Shafey <laurent.el-shafey@idiap.ch>
 # Elie El Khoury <elie.khoury@idiap.ch>
-#Mon 13 Jul 11:55:34 CEST 2015
+# Manuel Guenther <siebenkopf@googlemail.com>
+# Mon 13 Jul 11:55:34 CEST 2015
 #
 # Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
 #
@@ -18,10 +19,10 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.

-"""This script fuses scores from various systems,
-from a score file in four or five column format.
+"""This script fuses scores from various systems, from a score file in four or five column format.

 Note: The score file has to contain the exact probe file names as the 3rd (4column) or 4th (5column) column.
+The resulting fused score files will be written in 4 column format.
 """


@@ -29,6 +30,9 @@ Note: The score file has to contain the exact probe file names as the 3rd (4colu
 import bob, os, sys
 import bob.learn.linear

+import bob.core
+logger = bob.core.log.setup("bob.bio.base")
+
 def parse_command_line(command_line_options):
  """Parse the program options"""

@@ -38,17 +42,30 @@ def parse_command_line(command_line_options):
  parser = argparse.ArgumentParser(usage=usage, description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)

  # This option is not normally shown to the user...
-  parser.add_argument('--self-test', action = 'store_true', help = argparse.SUPPRESS)
  parser.add_argument('-d', '--dev-files', required=True, nargs='+', help = "A list of score files of the development set.")
  parser.add_argument('-e', '--eval-files', nargs='+', help = "A list of score files of the evaluation set; if given it must be the same number of files as the --dev-files.")
-  parser.add_argument('-f', '--score-fused-dev-file', required = True, help = 'The calibrated development score file in 4 or 5 column format to calibrate.')
-  parser.add_argument('-g', '--score-fused-eval-file', help = 'The calibrated evaluation score file in 4 or 5 column format to calibrate.')
-  parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'),  help="The style of the resulting score files. The default fits to the usual output of score files.")
+  parser.add_argument('-f', '--fused-dev-file', required = True, help = 'The fused development score file in 4 column format.')
+  parser.add_argument('-g', '--fused-eval-file', help = 'The fused evaluation score file in 4 column format.')
+  parser.add_argument('-p', '--parser', default = '4column', choices = ('4column', '5column'),  help = "The style of the resulting score files. The default fits to the usual output of score files.")
+
+  parser.add_argument('-m', '--max-iterations', type=int, default=10000, help = "Select the maximum number of iterations for the LLR training")
+  parser.add_argument('-t', '--convergence-threshold', type=float, default=1e-10, help = "Select the convergence threshold for the LLR training")
+  parser.add_argument('-n', '--no-whitening', action="store_true", help = "If given, disable the score mean/std-normalization prior to fusion (this is not recommended)")

+  # enable logging
+  bob.core.log.add_command_line_option(parser)
  args = parser.parse_args(command_line_options)
+  bob.core.log.set_verbosity_level(logger, args.verbose)
+
+  if args.eval_files is not None and len(args.eval_files) != len(args.dev_files):
+    raise ValueError("When --eval-files are specified, there need to be exactly one eval file for each dev file")
+
+  if args.eval_files is not None and args.fused_eval_file is None:
+    raise ValueError("When --eval-files are specified, the --fused-eval-file needs to be given, too")

  return args

+
 def main(command_line_options = None):
  """Score Fusion using Logistic regression"""
  args = parse_command_line(command_line_options)
@@ -57,57 +74,57 @@ def main(command_line_options = None):
  n_systems = len(args.dev_files)
  for i in range(n_systems):
    if not os.path.isfile(args.dev_files[i]): raise IOError("The given score file does not exist")
-  # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)"
+
+  # collect training data from development sets
  data = []
  for i in range(n_systems):
+    logger.info("Loading development set score file '%s'", args.dev_files[i])
+    # pythonic way: create inline dictionary "{...}", index with desired value "[...]", execute function "(...)"
    data.append({'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser](args.dev_files[i]))
  import numpy

-  data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T.copy()
-  data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T.copy()
-  trainer = bob.learn.linear.CGLogRegTrainer(0.5, 1e-10, 10000)
+  trainer = bob.learn.linear.CGLogRegTrainer(0.5, args.convergence_threshold, args.max_iterations, mean_std_norm=not args.no_whitening)
+  data_neg = numpy.vstack([data[k][0] for k in range(n_systems)]).T
+  data_pos = numpy.vstack([data[k][1] for k in range(n_systems)]).T
  machine = trainer.train(data_neg, data_pos)

  # fuse development scores
  gen_data_dev = []
  for i in range(n_systems):
+    logger.info("Loading development set score file '%s'", args.dev_files[i])
    gen_data_dev.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.dev_files[i]))

-  outf = open(args.score_fused_dev_file, 'w')
+  logger.info("Writing fused development set score file '%s'", args.fused_dev_file)
+  outf = open(args.fused_dev_file, 'w')
  for line in gen_data_dev[0]:
    claimed_id = line[0]
    real_id = line[-3]
    test_label = line[-2]
    scores= [ line[-1] ]
-    for n in range(1, n_systems): 
+    for n in range(1, n_systems):
      scores.append(gen_data_dev[n].next()[-1])
    scores = numpy.array([scores], dtype=numpy.float64)
-    s_fused = machine.forward(scores)[0,0]  
+    s_fused = machine.forward(scores)[0,0]
    line = claimed_id + " " + real_id + " " + test_label + " "  + str(s_fused) + "\n"
    outf.write(line)

  # fuse evaluation scores
  if args.eval_files is not None:
-    if len(args.dev_files) != len(args.eval_files):
-      logger.error("The number of --dev-files (%d) and --eval-files (%d) are not identical", len(args.dev_files), len(args.eval_files))
-    
    gen_data_eval = []
    for i in range(n_systems):
+      logger.info("Loading evaluation set score file '%s'", args.eval_files[i])
      gen_data_eval.append({'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser](args.eval_files[i]))
-      
-    outf = open(args.score_fused_eval_file, 'w')
+
+    logger.info("Writing fused evaluation set score file '%s'", args.fused_eval_file)
+    outf = open(args.fused_eval_file, 'w')
    for line in gen_data_eval[0]:
      claimed_id = line[0]
      real_id = line[-3]
      test_label = line[-2]
-      scores= [ line[-1] ] 
-      for n in range(1, n_systems): 
+      scores= [ line[-1] ]
+      for n in range(1, n_systems):
        scores.append(gen_data_eval[n].next()[-1])
      scores = numpy.array([scores], dtype=numpy.float64)
-      s_fused = machine.forward(scores)[0,0]  
+      s_fused = machine.forward(scores)[0,0]
      line = claimed_id + " " + real_id + " " + test_label + " "  + str(s_fused) + "\n"
      outf.write(line)
-  return 0
-
-if __name__ == '__main__':
-  main(sys.argv[1:])
--- a/bob/bio/base/test/test_scripts.py
+++ b/bob/bio/base/test/test_scripts.py
@@ -261,6 +261,37 @@ def test_verify_filelist():
    shutil.rmtree(test_dir)


+def test_fusion():
+  # tests that the fuse_scores script is doing something useful
+  test_dir = tempfile.mkdtemp(prefix='bobtest_')
+  reference_files = [os.path.join(data_dir, s) for s in ('scores-nonorm-dev', 'scores-ztnorm-dev')]
+  output_files = [os.path.join(test_dir, s) for s in ("fused-dev", "fused-eval")]
+  parameters = [
+    '--dev-files', reference_files[0], reference_files[1],
+    '--eval-files', reference_files[0], reference_files[1],
+    '--fused-dev-file', output_files[0],
+    '--fused-eval-file', output_files[1],
+    '--max-iterations', '100',
+    '--convergence-threshold', '1e-4',
+    '-v'
+  ]
+
+  # execute the script
+  from bob.bio.base.script.fuse_scores import main
+  try:
+    main(parameters)
+
+    # assert that we can read the two files, and that they contain the same number of lines as the original file
+    for i in (0,1):
+      assert os.path.exists(output_files[i])
+      r = bob.measure.load.four_column(reference_files[i])
+      o = bob.measure.load.four_column(output_files[i])
+      assert len(list(r)) == len(list(o))
+  finally:
+    shutil.rmtree(test_dir)
+
+
+
 def test_evaluate():
  # tests our 'evaluate' script using the reference files
  test_dir = tempfile.mkdtemp(prefix='bobtest_')

--- a/doc/more.rst
+++ b/doc/more.rst
@@ -41,6 +41,27 @@ All these three ways can be used for any of the five command line options: ``--d
 You can even mix these three types freely in a single command line.


+Score Level Fusion of Different Algorithms on the same Database
+---------------------------------------------------------------
+
+In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm.
+This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__.
+As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/fuse_scores.py`` script, generating a new score file:
+
+.. code-block:: sh
+
+   $ ./bin/fuse_scores.py --dev
+
+This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems.
+Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``.
+If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written.
+
+.. note::
+   When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined.
+
+The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/evaluate.py`` script.
+
+
 .. _grid-search:

 Finding the Optimal Configuration

--- a/setup.py
+++ b/setup.py
@@ -111,7 +111,7 @@ setup(
        'extract.py        = bob.bio.base.script.extract:main',
        'enroll.py         = bob.bio.base.script.enroll:main',
        'score.py          = bob.bio.base.script.score:main',
-        'fusion_llr.py     = bob.bio.base.script.fusion_llr:main',
+        'fuse_scores.py    = bob.bio.base.script.fuse_scores:main',
      ],

      'bob.bio.database': [

--- a/version.txt
+++ b/version.txt
-2.0.7b0
\ No newline at end of file
+2.0.7b1