diff --git a/bob/fusion/base/algorithm/Algorithm.py b/bob/fusion/base/algorithm/Algorithm.py index 621f4a4ad1899b201e5c9285fce545c4af1188e6..a769e33006843b95bb162821e57d9bd335ab5dcc 100644 --- a/bob/fusion/base/algorithm/Algorithm.py +++ b/bob/fusion/base/algorithm/Algorithm.py @@ -15,9 +15,8 @@ class Algorithm(object): """docstring for Algorithm""" def __init__(self, - performs_training=False, - has_closed_form_solution=False, preprocessors=None, + classifier=None, *args, **kwargs ): @@ -29,8 +28,7 @@ class Algorithm(object): """ super(Algorithm, self).__init__() - self.performs_training = performs_training - self.has_closed_form_solution = has_closed_form_solution + self.classifier = classifier self.preprocessors = preprocessors self._kwargs = kwargs self._kwargs['preprocessors'] = preprocessors @@ -47,21 +45,43 @@ class Algorithm(object): return scores def train(self, train, devel=None): - if devel is None: - devel = train (negatives, positives) = train train_scores = np.vstack((negatives, positives)) neg_len = negatives.shape[0] y = np.zeros((train_scores.shape[0],), dtype='bool') y[neg_len:] = True - self.fit(train_scores, y) + self.classifier.fit(train_scores, y) def fuse(self, scores): - return self.decision_function(scores) + if hasattr(self, 'classifier'): + return self.classifier.decision_function(scores) + else: + return self.decision_function(scores) + + def __str__(self): + """__str__() -> info - def plot_boundary_decision(self, score_labels, threshold, - label_system1='', - label_system2='', + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this + (and the derived) class. + """ + return "%s(%s)" % (str(self.__class__), ", ".join( + ["%s=%s" % (key, value) for key, value in + self._kwargs.items() if value is not None])) + + def save(self, model_file): + with open(model_file, "wb") as f: + pickle.dump(self, f) + + def load(self, model_file): + with open(model_file, "rb") as f: + return pickle.load(f) + + def plot_boundary_decision(self, scores, score_labels, threshold, thres_system1=None, thres_system2=None, do_grouping=False, @@ -77,15 +97,16 @@ class Algorithm(object): ''' Plots the boundary decision of the Algorithm - @param score_labels numpy.array A (self.scores.shape[0]) array containing - the true labels of self.scores. + @param score_labels numpy.array A (scores.shape[0]) array containing + the true labels of scores. @param threshold float threshold of the decision boundary ''' if legends is None: legends = ['Impostor', 'Genuine'] + markers = ['x', 'o'] - if self.scores.shape[1] > 2: + if scores.shape[1] > 2: raise NotImplementedError( "Currently plotting the decision boundary for more than two systems " "is not supported.") @@ -93,28 +114,21 @@ class Algorithm(object): import matplotlib.pyplot as plt plt.gca() # this is necessary for subplots to work. - X = self.scores[:, [i1, i2]] + X = scores[:, [i1, i2]] Y = score_labels x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad - h1 = abs(x_max - x_min) / resolution - h2 = abs(y_max - y_min) / resolution - if self.has_closed_form_solution and self.scores.shape[1] == 2: - x1 = np.arange(x_min, x_max, h1) - x2 = self.closed_form(x1, threshold) - plt.plot(x1, x2, cmap=plt.cm.viridis) - else: - xx, yy = np.meshgrid( - np.arange(x_min, x_max, h1), np.arange(y_min, y_max, h2)) - scores = self.scores - self.scores = np.c_[xx.ravel(), yy.ravel()] - Z = (self() > threshold).reshape(xx.shape) - self.scores = scores + xx, yy = np.meshgrid( + np.linspace(x_min, x_max, resolution), + np.linspace(y_min, y_max, resolution)) + temp = np.c_[xx.ravel(), yy.ravel()] + temp = self.preprocess(temp) + Z = (self.fuse(temp) > threshold).reshape(xx.shape) - contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) + contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) if do_grouping: - positives, negatives = X[Y], X[np.logical_not(Y)] + negatives, positives = X[np.logical_not(Y)], X[Y] negatives, positives = grouping(negatives, positives, **kwargs) X = np.concatenate((negatives, positives), axis=0) Y = np.concatenate( @@ -122,35 +136,19 @@ class Algorithm(object): np.ones(positives.shape[0], dtype=np.bool8)), axis=0) - plt.scatter( - X[:, 0], X[:, 1], c=Y, alpha=alpha, cmap=plt.cm.viridis) - # plt.legend(legends) + negatives, positives = X[np.logical_not(Y)], X[Y] + colors = plt.cm.viridis(np.linspace(0, 1, 2)) + for i, X in enumerate((negatives, positives)): + plt.scatter( + X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, + c=colors[i], label=legends[i]) + plt.legend() if thres_system1 is not None: plt.axvline(thres_system1, color='red') plt.axhline(thres_system2, color='red') - return contourf - - def __str__(self): - """__str__() -> info - - This function returns all parameters of this class (and its derived class). - - **Returns:** - - info : str - A string containing the full information of all parameters of this - (and the derived) class. - """ - return "%s(%s)" % (str(self.__class__), ", ".join( - ["%s=%s" % (key, value) for key, value in - self._kwargs.items() if value is not None])) - - def save(self, model_file): - with open(model_file, "wb") as f: - pickle.dump(self, f) + plt.xlim([x_min, x_max]) + plt.ylim([y_min, y_max]) - def load(self, model_file): - with open(model_file, "rb") as f: - return pickle.load(f) + return contourf diff --git a/bob/fusion/base/algorithm/LogisticRegression.py b/bob/fusion/base/algorithm/LogisticRegression.py deleted file mode 100644 index 38dc5c5a7d337c258414af3b6b94f18b920b42bd..0000000000000000000000000000000000000000 --- a/bob/fusion/base/algorithm/LogisticRegression.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import absolute_import - -import bob.learn.linear -from sklearn.linear_model import LogisticRegression as LogisticRegression_SK - -from .Algorithm import Algorithm - -import bob.core -logger = bob.core.log.setup("bob.fusion.base") - - -class LogisticRegression(Algorithm, LogisticRegression_SK): - __doc__ = LogisticRegression_SK.__doc__ - - def __init__(self, - *args, **kwargs): - Algorithm.__init__( - self, performs_training=True, - has_closed_form_solution=True, *args, **kwargs) - sk_kwargs = {} - for key, value in kwargs.items(): - if key in ['penalty', 'dual', 'tol', 'C', 'fit_intercept', - 'intercept_scaling', 'class_weight', - 'random_state', 'solver', 'max_iter', - 'multi_class', 'verbose', 'warm_start', 'n_jobs']: - sk_kwargs[key] = value - - LogisticRegression_SK.__init__(self, **sk_kwargs) - - def closed_form(self, x1, y): - w1 = self.coef_[0] - w2 = self.coef_[1] - x2 = (y - self.intercept_ - x1*w1)/w2 - return x2 diff --git a/bob/fusion/base/algorithm/MLP.py b/bob/fusion/base/algorithm/MLP.py index 9af9abd279653d3fbf5fd6d5d2e16140a68e74c1..6d8e46670a52ce1fe902230919138d2f526dfa95 100644 --- a/bob/fusion/base/algorithm/MLP.py +++ b/bob/fusion/base/algorithm/MLP.py @@ -6,7 +6,7 @@ from __future__ import absolute_import import bob.learn.mlp import bob.core.random import bob.io.base -import numpy +import pickle from .Algorithm import Algorithm from .mlp_train_helper import MLPTrainer @@ -23,27 +23,19 @@ class MLP(Algorithm): def __init__(self, n_systems=2, hidden_layers=None, - trainer_devel=None, seed=None, machine=None, trainer=None, *args, **kwargs): - # chicken and egg :D call __init__ twice. - super(MLP, self).__init__(performs_training=True, *args, **kwargs) + super(MLP, self).__init__( + classifier=self, + *args, **kwargs) if hidden_layers is None: hidden_layers = [3] - if self.scores is not None: - n_systems = numpy.asarray(self.scores).shape[1] self.mlp_shape = [n_systems] + hidden_layers + [1] - super(MLP, self).__init__( - performs_training=True, mlp_shape=self.mlp_shape, seed=seed, - machine=str(machine), trainer=str(trainer), - *args, **kwargs) self.seed = seed self.machine = machine self.trainer = trainer - self.trainer_devel = trainer_devel if trainer_devel else \ - self.trainer_scores self._my_kwargs = kwargs self.initialize() @@ -59,20 +51,16 @@ class MLP(Algorithm): bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError( self.machine.output_activation), machine=self.machine, train_biases=False) + self._kwargs = { + 'seed': self.seed, + 'mlp_shape': self.mlp_shape, + 'machine': self.machine, + 'train': self.train, + } - def prepare_train(self): - self.trainer_devel = self.trainer_devel if self.trainer_devel else \ - self.trainer_scores - self.train_helper = MLPTrainer( - train=self.trainer_scores[::-1], - devel=self.trainer_devel[::-1], - mlp_shape=self.mlp_shape, - machine=self.machine, - trainer=self.trainer, - **self._my_kwargs) - - def fit(self, train_scores, y): - n_systems = train_scores.shape[1] + def prepare_train(self, train, devel): + (negatives, positives) = train + n_systems = negatives.shape[1] if n_systems != self.mlp_shape[0]: logger.warn( 'Reinitializing the MLP machine with the shape of {} to {} to match th' @@ -81,8 +69,18 @@ class MLP(Algorithm): self.n_systems = n_systems self.hidden_layers = self.mlp_shape[1:-1] self.initialize(force=True) - self.trainer_scores = (train_scores[numpy.logical_not(y)], train_scores[y]) - self.prepare_train() + self.train_helper = MLPTrainer( + train=train[::-1], + devel=devel[::-1], + mlp_shape=self.mlp_shape, + machine=self.machine, + trainer=self.trainer, + **self._my_kwargs) + + def train(self, train, devel=None): + if devel is None: + devel = train + self.prepare_train(train, devel) self.machine, self.analyzer = self.train_helper() def decision_function(self, scores): @@ -91,16 +89,30 @@ class MLP(Algorithm): scores = scores.ravel() return scores + def _get_hdf5_file(self, model_file): + return model_file[:-3] + 'hdf5' + def save(self, model_file): - d5 = bob.io.base.HDF5File(model_file, "w") + d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file), "w") try: self.machine.save(d5) finally: d5.close() + # dump preprocessors in a pickle file because + # we don't know how they look like + with open(model_file, 'wb') as f: + pickle.dump(self.preprocessors, f) + def load(self, model_file): - d5 = bob.io.base.HDF5File(model_file) + d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file)) try: self.machine.load(d5) finally: d5.close() + + # load preprocessors + with open(model_file, "rb") as f: + self.preprocessors = pickle.load(f) + + return self diff --git a/bob/fusion/base/algorithm/Weighted_Sum.py b/bob/fusion/base/algorithm/Weighted_Sum.py index 3f1c9ae229f2c685078085de6809dfd024e85fcc..68fc899e59a0b9a5b32b7c4acbadac8926c8dcc9 100644 --- a/bob/fusion/base/algorithm/Weighted_Sum.py +++ b/bob/fusion/base/algorithm/Weighted_Sum.py @@ -16,8 +16,9 @@ class Weighted_Sum(Algorithm): def __init__(self, weights=None, *args, **kwargs): super(Weighted_Sum, self).__init__( - performs_training=False, weights=weights, - has_closed_form_solution=True, *args, **kwargs) + classifier=self, + weights=weights, + *args, **kwargs) self.weights = weights def fit(self, X, y): @@ -30,4 +31,10 @@ class Weighted_Sum(Algorithm): return numpy.sum(scores * self.weights, axis=1) def closed_form(self, x1, y): - return 2*y - x1 + if self.weights is None: + return 2*y - x1 + else: + w1 = self.weights[0] + w2 = self.weights[1] + x2 = (y - x1*w1)/w2 + return x2 diff --git a/bob/fusion/base/algorithm/__init__.py b/bob/fusion/base/algorithm/__init__.py index 3823cff5480839e9887344c5120688f52c64c575..39cd044f1ebed29a61df2ea8a757f92351221b26 100644 --- a/bob/fusion/base/algorithm/__init__.py +++ b/bob/fusion/base/algorithm/__init__.py @@ -1,6 +1,5 @@ from .Algorithm import Algorithm from .Weighted_Sum import Weighted_Sum -from .LogisticRegression import LogisticRegression from .MLP import MLP # gets sphinx autodoc done right - don't remove it diff --git a/bob/fusion/base/config/algorithm/llr.py b/bob/fusion/base/config/algorithm/llr.py index d5aa17c13bfb4f43426207c62c11047d74ae24f2..9a4c61726809a71050926bc66ed9693e557fb8c6 100644 --- a/bob/fusion/base/config/algorithm/llr.py +++ b/bob/fusion/base/config/algorithm/llr.py @@ -1,7 +1,9 @@ #!/usr/bin/env python import bob.fusion.base -import sklearn.preprocessing +from sklearn.preprocessing import StandardScaler +from sklearn.linear_model import LogisticRegression -algorithm = bob.fusion.base.algorithm.LogisticRegression( - preprocessors=[(sklearn.preprocessing.RobustScaler(), False)]) +algorithm = bob.fusion.base.algorithm.Algorithm( + preprocessors=[StandardScaler()], + classifier=LogisticRegression()) diff --git a/bob/fusion/base/config/algorithm/mlp.py b/bob/fusion/base/config/algorithm/mlp.py index dd02849afd2c3e05976f791c368dd8a0122ebc13..997077dcd40a60479b957402615acd8ba8e32e44 100644 --- a/bob/fusion/base/config/algorithm/mlp.py +++ b/bob/fusion/base/config/algorithm/mlp.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import bob.fusion.base -import sklearn.preprocessing +from sklearn.preprocessing import StandardScaler algorithm = bob.fusion.base.algorithm.MLP( - preprocessors=[(sklearn.preprocessing.RobustScaler(), False)]) + preprocessors=[StandardScaler()]) diff --git a/bob/fusion/base/config/algorithm/plr_2.py b/bob/fusion/base/config/algorithm/plr_2.py index 20da6fa53ac82d73bad4797c4d84713484ccdfea..a47b7be040dd83e903335e8aebc3bb4837ab3668 100644 --- a/bob/fusion/base/config/algorithm/plr_2.py +++ b/bob/fusion/base/config/algorithm/plr_2.py @@ -1,8 +1,9 @@ #!/usr/bin/env python import bob.fusion.base -import sklearn.preprocessing +from sklearn.preprocessing import StandardScaler, PolynomialFeatures +from sklearn.linear_model import LogisticRegression -algorithm = bob.fusion.base.algorithm.LogisticRegression( - preprocessors=[(sklearn.preprocessing.RobustScaler(), False), - (sklearn.preprocessing.PolynomialFeatures(degree=2), False)]) +algorithm = bob.fusion.base.algorithm.Algorithm( + preprocessors=[StandardScaler(), PolynomialFeatures(degree=2)], + classifier=LogisticRegression()) diff --git a/bob/fusion/base/script/__init__.py b/bob/fusion/base/script/__init__.py index c9ab444a2db44da8d8103e68cc46174e97c7672a..b22516fe54dc4528352cf701281c2bc4790b0a3c 100644 --- a/bob/fusion/base/script/__init__.py +++ b/bob/fusion/base/script/__init__.py @@ -1 +1,2 @@ from . import fuse +from . import plot_fusion_decision_boundary diff --git a/bob/fusion/base/script/fuse.py b/bob/fusion/base/script/fuse.py index 29ce8e85a29e1f446e75878f0dd5d2f3b8fad3f3..556d6f2ffcb25ff0361dea04f6680c747dc73eac 100755 --- a/bob/fusion/base/script/fuse.py +++ b/bob/fusion/base/script/fuse.py @@ -9,7 +9,7 @@ import numpy as np from bob.io.base import create_directories_safe from bob.measure.load import load_score, get_all_scores,\ - get_negatives_positives_all + get_negatives_positives_all, dump_score from bob.bio.base import utils from ..tools import parse_arguments, write_info @@ -21,10 +21,6 @@ logger = bob.core.log.setup("bob.fusion.base") def fuse(args, command_line_parameters): """Do the actual fusion.""" algorithm = args.algorithm - if args.score_type == 4: - fmt = '%s %s %s %.6f' - else: - fmt = '%s %s %s %s %.6f' write_info(args, command_line_parameters) @@ -53,15 +49,23 @@ def fuse(args, command_line_parameters): assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + # train the preprocessors + algorithm.train_preprocessors(scores_dev) + + # preprocess data + scores_dev = algorithm.preprocess(scores_dev) + scores_eval = algorithm.preprocess(scores_eval) + neg, pos = trainer_scores + neg, pos = algorithm.preprocess(neg), algorithm.preprocess(pos) + trainer_scores = (neg, pos) + # train the model if utils.check_file(args.model_file, args.force, 1000): logger.info( "- Fusion: model '%s' already exists.", args.model_file) algorithm = algorithm.load(args.model_file) - algorithm.trainer_scores = trainer_scores - elif algorithm.performs_training: - algorithm.trainer_scores = trainer_scores - algorithm.train() + else: + algorithm.train(trainer_scores) algorithm.save(args.model_file) # fuse the scores (dev) @@ -69,12 +73,11 @@ def fuse(args, command_line_parameters): logger.info( "- Fusion: scores '%s' already exists.", args.fused_dev_file) else: - algorithm.scores = scores_dev - fused_scores_dev = algorithm() - score_lines = np.array(score_lines_list_dev[0]) + fused_scores_dev = algorithm.fuse(scores_dev) + score_lines = score_lines_list_dev[0] score_lines['score'] = fused_scores_dev create_directories_safe(os.path.dirname(args.fused_dev_file)) - np.savetxt(args.fused_dev_file, score_lines, fmt=fmt) + dump_score(args.fused_dev_file, score_lines) # fuse the scores (eval) if args.eval_files: @@ -82,12 +85,11 @@ def fuse(args, command_line_parameters): logger.info( "- Fusion: scores '%s' already exists.", args.fused_eval_file) else: - algorithm.scores = scores_eval - fused_scores_eval = algorithm() - score_lines = np.array(score_lines_list_eval[0]) + fused_scores_eval = algorithm.fuse(scores_eval) + score_lines = score_lines_list_eval[0] score_lines['score'] = fused_scores_eval create_directories_safe(os.path.dirname(args.fused_eval_file)) - np.savetxt(args.fused_eval_file, score_lines, fmt=fmt) + dump_score(args.fused_eval_file, score_lines) def main(command_line_parameters=None): diff --git a/bob/fusion/base/script/plot_fusion_decision_boundary.py b/bob/fusion/base/script/plot_fusion_decision_boundary.py new file mode 100644 index 0000000000000000000000000000000000000000..11e5c1fc1ecf493a91cd9a39cb24a0494de3fab0 --- /dev/null +++ b/bob/fusion/base/script/plot_fusion_decision_boundary.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +"""Plot decision boundraries of the fusion algorithm. + +Usage: + plot_fusion_decision_boundary.py SCORE_FILE SCORE_FILE MODEL_FILE + [-v... | --verbose...] [options] + plot_fusion_decision_boundary.py (-h | --help) + plot_fusion_decision_boundary.py (-V | --version) + +Options: + -o, --output PLOT_FILE The path to save the plot. [default: scatter.pdf] + --score-type {4,5} The format the scores are provided. [default: 4] + -v, --verbose Increase the verbosity level from 0 (only error + messages) to 1 (warnings), 2 (log messages), 3 (debug + information) by adding the --verbose option as often + as desired (e.g. '-vvv' for debug). [default: 0] + -a, --algorithm Algorithm The fusion that was used during fusion if they + implement a different load method e.g. + bob.fusion.base.algorithm.MLP. + [default: bob.fusion.base.algorithm.Algorithm] + -g, --group N If given scores will be grouped into N samples. + [default: 500] + --grouping {random, kmeans} The gouping algorithm used. [default: kmeans] + -h --help Show this screen. + -V, --version Show version. + +""" + +from docopt import docopt +import matplotlib.pyplot as plt +import numpy + +import bob.fusion.base +import bob.core +from bob.measure.load import load_score, get_negatives_positives,\ + get_all_scores +from bob.measure import eer_threshold + +logger = bob.core.log.setup("bob.fusion.base") + + +def main(command_line_parameters=None): + args = docopt(__doc__, argv=command_line_parameters, + version=bob.fusion.base.get_config()) + print(args) + bob.core.log.set_verbosity_level(logger, args['--verbose']) + + # load the algorithm + algorithm = eval('{}()'.format(args['--algorithm'])) + algorithm = algorithm.load(args['MODEL_FILE']) + + # load the scores + score_lines_list = [ + load_score(path, int(args['--score-type'])) for path in args['SCORE_FILE']] + scores = get_all_scores(score_lines_list) + score_lines = numpy.array(score_lines_list[0]) + score_lines['score'] = algorithm.fuse(algorithm.preprocess(scores)) + threshold = eer_threshold(*get_negatives_positives(score_lines)) + score_labels = score_lines['claimed_id'] == score_lines['real_id'] + + # plot the decision boundary + algorithm.plot_boundary_decision( + scores, score_labels, threshold, + do_grouping=True, + npoints=int(args['--group']), + seed=0, + gformat=args['--grouping'] + ) + plt.savefig(args['--output']) + plt.close() + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt index a4abcbc1f465dc153b5b967cf52bba4adaf7c508..09801548506cc5688ade4b3f728ec5806bd16217 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,8 +3,9 @@ numpy bob.core bob.extension bob.measure -bob.learn.linear -bob.learn.em +bob.learn.activation bob.learn.mlp bob.bio.base +scikit-learn matplotlib # for plotting +docopt # for plotting script \ No newline at end of file diff --git a/setup.py b/setup.py index 5668c611fe8ffa0815744d3b0971a0b9b1491e88..362aaab9d706087b1cb437375bc8292158a32ccd 100644 --- a/setup.py +++ b/setup.py @@ -102,6 +102,7 @@ setup( # scripts should be declared using this entry: 'console_scripts': [ 'fuse.py = bob.fusion.base.script.fuse:main', + 'plot_fusion_decision_boundary.py = bob.fusion.base.script.plot_fusion_decision_boundary:main', ], 'bob.fusion.algorithm': [