stable API hopefuly

d03dc86d · Amir MOHAMMADI · 475aa543 · d03dc86d · 475aa543 · d03dc86d
Commit d03dc86d authored 9 years ago by Amir MOHAMMADI
--- a/bob/fusion/base/algorithm/Algorithm.py
+++ b/bob/fusion/base/algorithm/Algorithm.py
@@ -15,9 +15,8 @@ class Algorithm(object):
  """docstring for Algorithm"""
  def __init__(self,
-               performs_training=False,
-               has_closed_form_solution=False,
               preprocessors=None,
+               classifier=None,
               *args,
               **kwargs
               ):
@@ -29,8 +28,7 @@ class Algorithm(object):
 """
    super(Algorithm, self).__init__()
-    self.performs_training = performs_training
+    self.classifier = classifier
-    self.has_closed_form_solution = has_closed_form_solution
    self.preprocessors = preprocessors
    self._kwargs = kwargs
    self._kwargs['preprocessors'] = preprocessors
@@ -47,21 +45,43 @@ class Algorithm(object):
    return scores
  def train(self, train, devel=None):
-    if devel is None:
-      devel = train
    (negatives, positives) = train
    train_scores = np.vstack((negatives, positives))
    neg_len = negatives.shape[0]
    y = np.zeros((train_scores.shape[0],), dtype='bool')
    y[neg_len:] = True
-    self.fit(train_scores, y)
+    self.classifier.fit(train_scores, y)
  def fuse(self, scores):
-    return self.decision_function(scores)
+    if hasattr(self, 'classifier'):
+      return self.classifier.decision_function(scores)
+    else:
+      return self.decision_function(scores)
+  def __str__(self):
+    """__str__() -> info
-  def plot_boundary_decision(self, score_labels, threshold,
+    This function returns all parameters of this class (and its derived class).
-                             label_system1='',
-                             label_system2='',
+    **Returns:**
+    info : str
+      A string containing the full information of all parameters of this
+        (and the derived) class.
+    """
+    return "%s(%s)" % (str(self.__class__), ", ".join(
+      ["%s=%s" % (key, value) for key, value in
+       self._kwargs.items() if value is not None]))
+  def save(self, model_file):
+    with open(model_file, "wb") as f:
+      pickle.dump(self, f)
+  def load(self, model_file):
+    with open(model_file, "rb") as f:
+      return pickle.load(f)
+  def plot_boundary_decision(self, scores, score_labels, threshold,
                             thres_system1=None,
                             thres_system2=None,
                             do_grouping=False,
@@ -77,15 +97,16 @@ class Algorithm(object):
    '''
    Plots the boundary decision of the Algorithm
-    @param score_labels numpy.array A (self.scores.shape[0]) array containing
+    @param score_labels numpy.array A (scores.shape[0]) array containing
-                                    the true labels of self.scores.
+                                    the true labels of scores.
    @param threshold    float       threshold of the decision boundary
    '''
    if legends is None:
      legends = ['Impostor', 'Genuine']
+    markers = ['x', 'o']
-    if self.scores.shape[1] > 2:
+    if scores.shape[1] > 2:
      raise NotImplementedError(
        "Currently plotting the decision boundary for more than two systems "
        "is not supported.")
@@ -93,28 +114,21 @@ class Algorithm(object):
    import matplotlib.pyplot as plt
    plt.gca()  # this is necessary for subplots to work.
-    X = self.scores[:, [i1, i2]]
+    X = scores[:, [i1, i2]]
    Y = score_labels
    x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad
    y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad
-    h1 = abs(x_max - x_min) / resolution
+    xx, yy = np.meshgrid(
-    h2 = abs(y_max - y_min) / resolution
+      np.linspace(x_min, x_max, resolution),
-    if self.has_closed_form_solution and self.scores.shape[1] == 2:
+      np.linspace(y_min, y_max, resolution))
-      x1 = np.arange(x_min, x_max, h1)
+    temp = np.c_[xx.ravel(), yy.ravel()]
-      x2 = self.closed_form(x1, threshold)
+    temp = self.preprocess(temp)
-      plt.plot(x1, x2, cmap=plt.cm.viridis)
+    Z = (self.fuse(temp) > threshold).reshape(xx.shape)
-    else:
-      xx, yy = np.meshgrid(
-        np.arange(x_min, x_max, h1), np.arange(y_min, y_max, h2))
-      scores = self.scores
-      self.scores = np.c_[xx.ravel(), yy.ravel()]
-      Z = (self() > threshold).reshape(xx.shape)
-      self.scores = scores
-      contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
+    contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
    if do_grouping:
-      positives, negatives = X[Y], X[np.logical_not(Y)]
+      negatives, positives = X[np.logical_not(Y)], X[Y]
      negatives, positives = grouping(negatives, positives, **kwargs)
      X = np.concatenate((negatives, positives), axis=0)
      Y = np.concatenate(
@@ -122,35 +136,19 @@ class Algorithm(object):
         np.ones(positives.shape[0], dtype=np.bool8)),
        axis=0)
-    plt.scatter(
+    negatives, positives = X[np.logical_not(Y)], X[Y]
-      X[:, 0], X[:, 1], c=Y, alpha=alpha, cmap=plt.cm.viridis)
+    colors = plt.cm.viridis(np.linspace(0, 1, 2))
-    # plt.legend(legends)
+    for i, X in enumerate((negatives, positives)):
+      plt.scatter(
+        X[:, 0], X[:, 1], marker=markers[i], alpha=alpha,
+        c=colors[i], label=legends[i])
+    plt.legend()
    if thres_system1 is not None:
      plt.axvline(thres_system1, color='red')
      plt.axhline(thres_system2, color='red')
-    return contourf
+    plt.xlim([x_min, x_max])
+    plt.ylim([y_min, y_max])
-  def __str__(self):
-    """__str__() -> info
-    This function returns all parameters of this class (and its derived class).
-    **Returns:**
-    info : str
-      A string containing the full information of all parameters of this
-        (and the derived) class.
-    """
-    return "%s(%s)" % (str(self.__class__), ", ".join(
-      ["%s=%s" % (key, value) for key, value in
-       self._kwargs.items() if value is not None]))
-  def save(self, model_file):
-    with open(model_file, "wb") as f:
-      pickle.dump(self, f)
-  def load(self, model_file):
+    return contourf
-    with open(model_file, "rb") as f:
-      return pickle.load(f)
--- a/bob/fusion/base/algorithm/LogisticRegression.py
+++ b/bob/fusion/base/algorithm/LogisticRegression.py
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import absolute_import
-import bob.learn.linear
-from sklearn.linear_model import LogisticRegression as LogisticRegression_SK
-from .Algorithm import Algorithm
-import bob.core
-logger = bob.core.log.setup("bob.fusion.base")
-class LogisticRegression(Algorithm, LogisticRegression_SK):
-  __doc__ = LogisticRegression_SK.__doc__
-  def __init__(self,
-               *args, **kwargs):
-    Algorithm.__init__(
-        self, performs_training=True,
-        has_closed_form_solution=True, *args, **kwargs)
-    sk_kwargs = {}
-    for key, value in kwargs.items():
-      if key in ['penalty', 'dual', 'tol', 'C', 'fit_intercept',
-                 'intercept_scaling', 'class_weight',
-                 'random_state', 'solver', 'max_iter',
-                 'multi_class', 'verbose', 'warm_start', 'n_jobs']:
-        sk_kwargs[key] = value
-    LogisticRegression_SK.__init__(self, **sk_kwargs)
-  def closed_form(self, x1, y):
-    w1 = self.coef_[0]
-    w2 = self.coef_[1]
-    x2 = (y - self.intercept_ - x1*w1)/w2
-    return x2
--- a/bob/fusion/base/algorithm/MLP.py
+++ b/bob/fusion/base/algorithm/MLP.py
@@ -6,7 +6,7 @@ from __future__ import absolute_import
 import bob.learn.mlp
 import bob.core.random
 import bob.io.base
-import numpy
+import pickle
 from .Algorithm import Algorithm
 from .mlp_train_helper import MLPTrainer
@@ -23,27 +23,19 @@ class MLP(Algorithm):
  def __init__(self,
               n_systems=2,
               hidden_layers=None,
-               trainer_devel=None,
               seed=None,
               machine=None,
               trainer=None,
               *args, **kwargs):
-    # chicken and egg :D call __init__ twice.
+    super(MLP, self).__init__(
-    super(MLP, self).__init__(performs_training=True, *args, **kwargs)
+        classifier=self,
+        *args, **kwargs)
    if hidden_layers is None:
      hidden_layers = [3]
-    if self.scores is not None:
-      n_systems = numpy.asarray(self.scores).shape[1]
    self.mlp_shape = [n_systems] + hidden_layers + [1]
-    super(MLP, self).__init__(
-        performs_training=True, mlp_shape=self.mlp_shape, seed=seed,
-        machine=str(machine), trainer=str(trainer),
-        *args, **kwargs)
    self.seed = seed
    self.machine = machine
    self.trainer = trainer
-    self.trainer_devel = trainer_devel if trainer_devel else \
-        self.trainer_scores
    self._my_kwargs = kwargs
    self.initialize()
@@ -59,20 +51,16 @@ class MLP(Algorithm):
        bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError(
            self.machine.output_activation), machine=self.machine,
          train_biases=False)
+    self._kwargs = {
+      'seed': self.seed,
+      'mlp_shape': self.mlp_shape,
+      'machine': self.machine,
+      'train': self.train,
+    }
-  def prepare_train(self):
+  def prepare_train(self, train, devel):
-    self.trainer_devel = self.trainer_devel if self.trainer_devel else \
+    (negatives, positives) = train
-        self.trainer_scores
+    n_systems = negatives.shape[1]
-    self.train_helper = MLPTrainer(
-        train=self.trainer_scores[::-1],
-        devel=self.trainer_devel[::-1],
-        mlp_shape=self.mlp_shape,
-        machine=self.machine,
-        trainer=self.trainer,
-        **self._my_kwargs)
-  def fit(self, train_scores, y):
-    n_systems = train_scores.shape[1]
    if n_systems != self.mlp_shape[0]:
      logger.warn(
        'Reinitializing the MLP machine with the shape of {} to {} to match th'
@@ -81,8 +69,18 @@ class MLP(Algorithm):
      self.n_systems = n_systems
      self.hidden_layers = self.mlp_shape[1:-1]
      self.initialize(force=True)
-    self.trainer_scores = (train_scores[numpy.logical_not(y)], train_scores[y])
+    self.train_helper = MLPTrainer(
-    self.prepare_train()
+        train=train[::-1],
+        devel=devel[::-1],
+        mlp_shape=self.mlp_shape,
+        machine=self.machine,
+        trainer=self.trainer,
+        **self._my_kwargs)
+  def train(self, train, devel=None):
+    if devel is None:
+      devel = train
+    self.prepare_train(train, devel)
    self.machine, self.analyzer = self.train_helper()
  def decision_function(self, scores):
@@ -91,16 +89,30 @@ class MLP(Algorithm):
      scores = scores.ravel()
    return scores
+  def _get_hdf5_file(self, model_file):
+    return model_file[:-3] + 'hdf5'
  def save(self, model_file):
-    d5 = bob.io.base.HDF5File(model_file, "w")
+    d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file), "w")
    try:
      self.machine.save(d5)
    finally:
      d5.close()
+    # dump preprocessors in a pickle file because
+    # we don't know how they look like
+    with open(model_file, 'wb') as f:
+      pickle.dump(self.preprocessors, f)
  def load(self, model_file):
-    d5 = bob.io.base.HDF5File(model_file)
+    d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file))
    try:
      self.machine.load(d5)
    finally:
      d5.close()
+    # load preprocessors
+    with open(model_file, "rb") as f:
+      self.preprocessors = pickle.load(f)
+    return self
--- a/bob/fusion/base/algorithm/Weighted_Sum.py
+++ b/bob/fusion/base/algorithm/Weighted_Sum.py
@@ -16,8 +16,9 @@ class Weighted_Sum(Algorithm):
  def __init__(self, weights=None, *args, **kwargs):
    super(Weighted_Sum, self).__init__(
-      performs_training=False, weights=weights,
+      classifier=self,
-      has_closed_form_solution=True, *args, **kwargs)
+      weights=weights,
+      *args, **kwargs)
    self.weights = weights
  def fit(self, X, y):
@@ -30,4 +31,10 @@ class Weighted_Sum(Algorithm):
      return numpy.sum(scores * self.weights, axis=1)
  def closed_form(self, x1, y):
-    return 2*y - x1
+    if self.weights is None:
+      return 2*y - x1
+    else:
+      w1 = self.weights[0]
+      w2 = self.weights[1]
+      x2 = (y - x1*w1)/w2
+      return x2
--- a/bob/fusion/base/algorithm/__init__.py
+++ b/bob/fusion/base/algorithm/__init__.py
 from .Algorithm import Algorithm
 from .Weighted_Sum import Weighted_Sum
-from .LogisticRegression import LogisticRegression
 from .MLP import MLP
 # gets sphinx autodoc done right - don't remove it

--- a/bob/fusion/base/config/algorithm/llr.py
+++ b/bob/fusion/base/config/algorithm/llr.py
 #!/usr/bin/env python
 import bob.fusion.base
-import sklearn.preprocessing
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LogisticRegression
-algorithm = bob.fusion.base.algorithm.LogisticRegression(
+algorithm = bob.fusion.base.algorithm.Algorithm(
-  preprocessors=[(sklearn.preprocessing.RobustScaler(), False)])
+  preprocessors=[StandardScaler()],
+  classifier=LogisticRegression())
--- a/bob/fusion/base/config/algorithm/mlp.py
+++ b/bob/fusion/base/config/algorithm/mlp.py
 #!/usr/bin/env python
 import bob.fusion.base
-import sklearn.preprocessing
+from sklearn.preprocessing import StandardScaler
 algorithm = bob.fusion.base.algorithm.MLP(
-  preprocessors=[(sklearn.preprocessing.RobustScaler(), False)])
+  preprocessors=[StandardScaler()])
--- a/bob/fusion/base/config/algorithm/plr_2.py
+++ b/bob/fusion/base/config/algorithm/plr_2.py
 #!/usr/bin/env python
 import bob.fusion.base
-import sklearn.preprocessing
+from sklearn.preprocessing import StandardScaler, PolynomialFeatures
+from sklearn.linear_model import LogisticRegression
-algorithm = bob.fusion.base.algorithm.LogisticRegression(
+algorithm = bob.fusion.base.algorithm.Algorithm(
-  preprocessors=[(sklearn.preprocessing.RobustScaler(), False),
+  preprocessors=[StandardScaler(), PolynomialFeatures(degree=2)],
-                 (sklearn.preprocessing.PolynomialFeatures(degree=2), False)])
+  classifier=LogisticRegression())
--- a/bob/fusion/base/script/__init__.py
+++ b/bob/fusion/base/script/__init__.py
 from . import fuse
+from . import plot_fusion_decision_boundary
--- a/bob/fusion/base/script/fuse.py
+++ b/bob/fusion/base/script/fuse.py
@@ -9,7 +9,7 @@ import numpy as np
 from bob.io.base import create_directories_safe
 from bob.measure.load import load_score, get_all_scores,\
-    get_negatives_positives_all
+    get_negatives_positives_all, dump_score
 from bob.bio.base import utils
 from ..tools import parse_arguments, write_info
@@ -21,10 +21,6 @@ logger = bob.core.log.setup("bob.fusion.base")
 def fuse(args, command_line_parameters):
  """Do the actual fusion."""
  algorithm = args.algorithm
-  if args.score_type == 4:
-    fmt = '%s %s %s %.6f'
-  else:
-    fmt = '%s %s %s %s %.6f'
  write_info(args, command_line_parameters)
@@ -53,15 +49,23 @@ def fuse(args, command_line_parameters):
        assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id']))
        assert(np.all(score_lines['real_id'] == score_lines0['real_id']))
+  # train the preprocessors
+  algorithm.train_preprocessors(scores_dev)
+  # preprocess data
+  scores_dev = algorithm.preprocess(scores_dev)
+  scores_eval = algorithm.preprocess(scores_eval)
+  neg, pos = trainer_scores
+  neg, pos = algorithm.preprocess(neg), algorithm.preprocess(pos)
+  trainer_scores = (neg, pos)
  # train the model
  if utils.check_file(args.model_file, args.force, 1000):
    logger.info(
      "- Fusion: model '%s' already exists.", args.model_file)
    algorithm = algorithm.load(args.model_file)
-    algorithm.trainer_scores = trainer_scores
+  else:
-  elif algorithm.performs_training:
+    algorithm.train(trainer_scores)
-    algorithm.trainer_scores = trainer_scores
-    algorithm.train()
    algorithm.save(args.model_file)
  # fuse the scores (dev)
@@ -69,12 +73,11 @@ def fuse(args, command_line_parameters):
    logger.info(
      "- Fusion: scores '%s' already exists.", args.fused_dev_file)
  else:
-    algorithm.scores = scores_dev
+    fused_scores_dev = algorithm.fuse(scores_dev)
-    fused_scores_dev = algorithm()
+    score_lines = score_lines_list_dev[0]
-    score_lines = np.array(score_lines_list_dev[0])
    score_lines['score'] = fused_scores_dev
    create_directories_safe(os.path.dirname(args.fused_dev_file))
-    np.savetxt(args.fused_dev_file, score_lines, fmt=fmt)
+    dump_score(args.fused_dev_file, score_lines)
  # fuse the scores (eval)
  if args.eval_files:
@@ -82,12 +85,11 @@ def fuse(args, command_line_parameters):
      logger.info(
        "- Fusion: scores '%s' already exists.", args.fused_eval_file)
    else:
-      algorithm.scores = scores_eval
+      fused_scores_eval = algorithm.fuse(scores_eval)
-      fused_scores_eval = algorithm()
+      score_lines = score_lines_list_eval[0]
-      score_lines = np.array(score_lines_list_eval[0])
      score_lines['score'] = fused_scores_eval
      create_directories_safe(os.path.dirname(args.fused_eval_file))
-      np.savetxt(args.fused_eval_file, score_lines, fmt=fmt)
+      dump_score(args.fused_eval_file, score_lines)
 def main(command_line_parameters=None):

--- a/bob/fusion/base/script/plot_fusion_decision_boundary.py
+++ b/bob/fusion/base/script/plot_fusion_decision_boundary.py
+#!/usr/bin/env python
+"""Plot decision boundraries of the fusion algorithm.
+Usage:
+  plot_fusion_decision_boundary.py SCORE_FILE SCORE_FILE MODEL_FILE
+    [-v... | --verbose...] [options]
+  plot_fusion_decision_boundary.py (-h | --help)
+  plot_fusion_decision_boundary.py (-V | --version)
+Options:
+  -o, --output PLOT_FILE  The path to save the plot. [default: scatter.pdf]
+  --score-type {4,5}      The format the scores are provided. [default: 4]
+  -v, --verbose           Increase the verbosity level from 0 (only error
+                          messages) to 1 (warnings), 2 (log messages), 3 (debug
+                          information) by adding the --verbose option as often
+                          as desired (e.g. '-vvv' for debug). [default: 0]
+  -a, --algorithm Algorithm  The fusion that was used during fusion if they
+                          implement a different load method e.g.
+                          bob.fusion.base.algorithm.MLP.
+                          [default: bob.fusion.base.algorithm.Algorithm]
+  -g, --group N           If given scores will be grouped into N samples.
+                          [default: 500]
+  --grouping {random, kmeans}  The gouping algorithm used. [default: kmeans]
+  -h --help               Show this screen.
+  -V, --version           Show version.
+"""
+from docopt import docopt
+import matplotlib.pyplot as plt
+import numpy
+import bob.fusion.base
+import bob.core
+from bob.measure.load import load_score, get_negatives_positives,\
+  get_all_scores
+from bob.measure import eer_threshold
+logger = bob.core.log.setup("bob.fusion.base")
+def main(command_line_parameters=None):
+  args = docopt(__doc__, argv=command_line_parameters,
+                version=bob.fusion.base.get_config())
+  print(args)
+  bob.core.log.set_verbosity_level(logger, args['--verbose'])
+  # load the algorithm
+  algorithm = eval('{}()'.format(args['--algorithm']))
+  algorithm = algorithm.load(args['MODEL_FILE'])
+  # load the scores
+  score_lines_list = [
+    load_score(path, int(args['--score-type'])) for path in args['SCORE_FILE']]
+  scores = get_all_scores(score_lines_list)
+  score_lines = numpy.array(score_lines_list[0])
+  score_lines['score'] = algorithm.fuse(algorithm.preprocess(scores))
+  threshold = eer_threshold(*get_negatives_positives(score_lines))
+  score_labels = score_lines['claimed_id'] == score_lines['real_id']
+  # plot the decision boundary
+  algorithm.plot_boundary_decision(
+    scores, score_labels, threshold,
+    do_grouping=True,
+    npoints=int(args['--group']),
+    seed=0,
+    gformat=args['--grouping']
+  )
+  plt.savefig(args['--output'])
+  plt.close()
+if __name__ == '__main__':
+  main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,8 +3,9 @@ numpy
 bob.core
 bob.extension
 bob.measure
-bob.learn.linear
+bob.learn.activation
-bob.learn.em
 bob.learn.mlp
 bob.bio.base
+scikit-learn
 matplotlib   # for plotting
+docopt       # for plotting script
\ No newline at end of file
--- a/setup.py
+++ b/setup.py
@@ -102,6 +102,7 @@ setup(
      # scripts should be declared using this entry:
      'console_scripts': [
        'fuse.py     = bob.fusion.base.script.fuse:main',
+        'plot_fusion_decision_boundary.py = bob.fusion.base.script.plot_fusion_decision_boundary:main',
      ],
      'bob.fusion.algorithm': [