From 1edca176d38b397eb4e7894508f5cc9441cd1758 Mon Sep 17 00:00:00 2001 From: Amir Mohammadi <183.amir@gmail.com> Date: Tue, 31 Jan 2017 17:45:32 +0100 Subject: [PATCH] random fixes, whitespace changes --- bob/fusion/base/algorithm/AlgorithmBob.py | 78 +-- bob/fusion/base/algorithm/LLR.py | 46 +- bob/fusion/base/algorithm/MLP.py | 136 ++-- bob/fusion/base/algorithm/Weighted_Sum.py | 34 +- bob/fusion/base/algorithm/__init__.py | 1 + bob/fusion/base/algorithm/mlp_train_helper.py | 588 +++++++++--------- bob/fusion/base/script/__init__.py | 2 +- .../script/bob_fusion_decision_boundary.py | 259 ++++---- bob/fusion/base/test/test_algorithm.py | 257 ++++---- bob/fusion/base/test/test_scripts.py | 82 +-- bob/fusion/base/tools/command_line.py | 443 ++++++------- bob/fusion/base/tools/common.py | 174 +++--- bob/fusion/base/tools/plotting.py | 30 +- setup.py | 2 +- 14 files changed, 1077 insertions(+), 1055 deletions(-) diff --git a/bob/fusion/base/algorithm/AlgorithmBob.py b/bob/fusion/base/algorithm/AlgorithmBob.py index ed5c3eb..fa7c5d4 100644 --- a/bob/fusion/base/algorithm/AlgorithmBob.py +++ b/bob/fusion/base/algorithm/AlgorithmBob.py @@ -11,42 +11,42 @@ logger = bob.core.log.setup("bob.fusion.base") class AlgorithmBob(Algorithm): - """A class to be used in score fusion using bob machines.""" - - def _get_hdf5_file(self, model_file): - return model_file[:-3] + 'hdf5' - - def custom_save(self, model_file): - # dump preprocessors in a pickle file because - # we don't know how they look like - # saves the class to create it later. - with open(model_file, 'wb') as f: - pickle.dump(type(self), f) - pickle.dump(self.preprocessors, f) - # just for consistent string representation - pickle.dump(self._kwargs, f) - - d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file), "w") - try: - self.machine.save(d5) - finally: - d5.close() - - def load(self, model_file): - # load preprocessors and the class - with open(model_file, "rb") as f: - myclass = pickle.load(f) - preprocessors = pickle.load(f) - _kwargs = pickle.load(f) - - myinstance = myclass(preprocessors=preprocessors) - # just for consistent string representation - myinstance._kwargs.update(_kwargs) - - d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file)) - try: - myinstance.machine.load(d5) - finally: - d5.close() - - return myinstance + """A class to be used in score fusion using bob machines.""" + + def _get_hdf5_file(self, model_file): + return model_file[:-3] + 'hdf5' + + def custom_save(self, model_file): + # dump preprocessors in a pickle file because + # we don't know how they look like + # saves the class to create it later. + with open(model_file, 'wb') as f: + pickle.dump(type(self), f) + pickle.dump(self.preprocessors, f) + # just for consistent string representation + pickle.dump(self._kwargs, f) + + d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file), "w") + try: + self.machine.save(d5) + finally: + d5.close() + + def load(self, model_file): + # load preprocessors and the class + with open(model_file, "rb") as f: + myclass = pickle.load(f) + preprocessors = pickle.load(f) + _kwargs = pickle.load(f) + + myinstance = myclass(preprocessors=preprocessors) + # just for consistent string representation + myinstance._kwargs.update(_kwargs) + + d5 = bob.io.base.HDF5File(self._get_hdf5_file(model_file)) + try: + myinstance.machine.load(d5) + finally: + d5.close() + + return myinstance diff --git a/bob/fusion/base/algorithm/LLR.py b/bob/fusion/base/algorithm/LLR.py index fa4fcac..dd5151e 100644 --- a/bob/fusion/base/algorithm/LLR.py +++ b/bob/fusion/base/algorithm/LLR.py @@ -12,26 +12,26 @@ logger = logging.getLogger("bob.fusion.base") class LLR(AlgorithmBob): - """LLR Score fusion using Bob""" - - def __init__(self, - trainer=None, - *args, **kwargs): - self.trainer = trainer if trainer else \ - bob.learn.linear.CGLogRegTrainer() - # this is needed to be able to load the machine - self.machine = bob.learn.linear.Machine() - super(LLR, self).__init__( - classifier=self, - trainer=str(type(self.trainer)), - *args, **kwargs) - - def train(self, train_neg, train_pos, devel_neg=None, devel_pos=None): - # Trainning the LLR machine - self.machine = self.trainer.train(train_neg, train_pos) - - def decision_function(self, scores): - scores = self.machine(scores) - if scores.ndim == 2 and scores.shape[1] == 1: - scores = scores.ravel() - return scores + """LLR Score fusion using Bob""" + + def __init__(self, + trainer=None, + *args, **kwargs): + self.trainer = trainer if trainer else \ + bob.learn.linear.CGLogRegTrainer() + # this is needed to be able to load the machine + self.machine = bob.learn.linear.Machine() + super(LLR, self).__init__( + classifier=self, + trainer=str(type(self.trainer)), + *args, **kwargs) + + def train(self, train_neg, train_pos, devel_neg=None, devel_pos=None): + # Trainning the LLR machine + self.machine = self.trainer.train(train_neg, train_pos) + + def decision_function(self, scores): + scores = self.machine(scores) + if scores.ndim == 2 and scores.shape[1] == 1: + scores = scores.ravel() + return scores diff --git a/bob/fusion/base/algorithm/MLP.py b/bob/fusion/base/algorithm/MLP.py index 1af5531..77f63ea 100644 --- a/bob/fusion/base/algorithm/MLP.py +++ b/bob/fusion/base/algorithm/MLP.py @@ -15,76 +15,76 @@ logger = bob.core.log.setup("bob.fusion.base") class MLP(AlgorithmBob): - """This MLP is implemented using the bob tools. - The preprocessors used with this class should be pickleable. - """ + """This MLP is implemented using the bob tools. + The preprocessors used with this class should be pickleable. + """ - def __init__(self, - n_systems=2, - hidden_layers=(5,), - seed=None, - machine=None, - trainer=None, - *args, **kwargs): - super(MLP, self).__init__( - classifier=self, - *args, **kwargs) - self.mlp_shape = [n_systems] + list(hidden_layers) + [1] - self.seed = seed - self.machine = machine - self.trainer = trainer - self._my_kwargs = kwargs - self.initialize() + def __init__(self, + n_systems=2, + hidden_layers=(5,), + seed=None, + machine=None, + trainer=None, + *args, **kwargs): + super(MLP, self).__init__( + classifier=self, + *args, **kwargs) + self.mlp_shape = [n_systems] + list(hidden_layers) + [1] + self.seed = seed + self.machine = machine + self.trainer = trainer + self._my_kwargs = kwargs + self.initialize() - def initialize(self, force=False): - self.machine = self.machine if self.machine and not force else \ - bob.learn.mlp.Machine(self.mlp_shape) - if self.seed is not None: - self.rng = bob.core.random.mt19937(self.seed) - self.machine.randomize(rng=self.rng) - else: - self.machine.randomize() - self.trainer = self.trainer if self.trainer and not force else \ - bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError( - self.machine.output_activation), machine=self.machine, - train_biases=False) - self._kwargs.update({ - 'seed': self.seed, - 'mlp_shape': self.mlp_shape, - 'machine': str(self.machine), - 'trainer': str(type(self.trainer))}) - self._kwargs.update(self._my_kwargs) + def initialize(self, force=False): + self.machine = self.machine if self.machine and not force else \ + bob.learn.mlp.Machine(self.mlp_shape) + if self.seed is not None: + self.rng = bob.core.random.mt19937(self.seed) + self.machine.randomize(rng=self.rng) + else: + self.machine.randomize() + self.trainer = self.trainer if self.trainer and not force else \ + bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError( + self.machine.output_activation), machine=self.machine, + train_biases=False) + self._kwargs.update({ + 'seed': self.seed, + 'mlp_shape': self.mlp_shape, + 'machine': str(self.machine), + 'trainer': str(type(self.trainer))}) + self._kwargs.update(self._my_kwargs) - def prepare_train(self, train, devel): - (negatives, positives) = train - n_systems = negatives.shape[1] - if n_systems != self.mlp_shape[0]: - logger.warn( - 'Reinitializing the MLP machine with the shape of {} to {} to match th' - 'e input size.'.format(self.mlp_shape, - [n_systems] + self.mlp_shape[1:])) - self.mlp_shape = [n_systems] + self.mlp_shape[1:] - self.n_systems = n_systems - self.hidden_layers = self.mlp_shape[1:-1] - self.initialize(force=True) - self.train_helper = MLPTrainer( - train=train[::-1], - devel=devel[::-1], - mlp_shape=self.mlp_shape, - machine=self.machine, - trainer=self.trainer, - **self._my_kwargs) + def prepare_train(self, train, devel): + (negatives, positives) = train + n_systems = negatives.shape[1] + if n_systems != self.mlp_shape[0]: + logger.warn( + 'Reinitializing the MLP machine with the shape of {} to {} to match th' + 'e input size.'.format(self.mlp_shape, + [n_systems] + self.mlp_shape[1:])) + self.mlp_shape = [n_systems] + self.mlp_shape[1:] + self.n_systems = n_systems + self.hidden_layers = self.mlp_shape[1:-1] + self.initialize(force=True) + self.train_helper = MLPTrainer( + train=train[::-1], + devel=devel[::-1], + mlp_shape=self.mlp_shape, + machine=self.machine, + trainer=self.trainer, + **self._my_kwargs) - def train(self, train_neg, train_pos, devel_neg=None, devel_pos=None): - if devel_neg is None: - devel_neg = train_neg - if devel_pos is None: - devel_pos = train_pos - self.prepare_train((train_neg, train_pos), (devel_neg, devel_pos)) - self.machine, self.analyzer = self.train_helper() + def train(self, train_neg, train_pos, devel_neg=None, devel_pos=None): + if devel_neg is None: + devel_neg = train_neg + if devel_pos is None: + devel_pos = train_pos + self.prepare_train((train_neg, train_pos), (devel_neg, devel_pos)) + self.machine, self.analyzer = self.train_helper() - def decision_function(self, scores): - scores = self.machine(scores) - if scores.ndim == 2 and scores.shape[1] == 1: - scores = scores.ravel() - return scores + def decision_function(self, scores): + scores = self.machine(scores) + if scores.ndim == 2 and scores.shape[1] == 1: + scores = scores.ravel() + return scores diff --git a/bob/fusion/base/algorithm/Weighted_Sum.py b/bob/fusion/base/algorithm/Weighted_Sum.py index 7a5fcc7..be790bb 100644 --- a/bob/fusion/base/algorithm/Weighted_Sum.py +++ b/bob/fusion/base/algorithm/Weighted_Sum.py @@ -12,20 +12,20 @@ logger = bob.core.log.setup("bob.fusion.base") class Weighted_Sum(Algorithm): - """weighted sum (default: mean)""" - - def __init__(self, weights=None, *args, **kwargs): - super(Weighted_Sum, self).__init__( - classifier=self, - weights=weights, - *args, **kwargs) - self.weights = weights - - def fit(self, X, y): - pass - - def decision_function(self, scores): - if self.weights is None: - return numpy.mean(scores, axis=1) - else: - return numpy.sum(scores * self.weights, axis=1) + """weighted sum (default: mean)""" + + def __init__(self, weights=None, *args, **kwargs): + super(Weighted_Sum, self).__init__( + classifier=self, + weights=weights, + *args, **kwargs) + self.weights = weights + + def fit(self, X, y): + pass + + def decision_function(self, scores): + if self.weights is None: + return numpy.mean(scores, axis=1) + else: + return numpy.sum(scores * self.weights, axis=1) diff --git a/bob/fusion/base/algorithm/__init__.py b/bob/fusion/base/algorithm/__init__.py index 0927c29..7def405 100644 --- a/bob/fusion/base/algorithm/__init__.py +++ b/bob/fusion/base/algorithm/__init__.py @@ -3,6 +3,7 @@ from .AlgorithmBob import AlgorithmBob from .Weighted_Sum import Weighted_Sum from .MLP import MLP from .LLR import LLR +from .GMM import GMM # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/fusion/base/algorithm/mlp_train_helper.py b/bob/fusion/base/algorithm/mlp_train_helper.py index 124e7f6..0701106 100644 --- a/bob/fusion/base/algorithm/mlp_train_helper.py +++ b/bob/fusion/base/algorithm/mlp_train_helper.py @@ -18,300 +18,300 @@ logger = bob.core.log.setup("bob.fusion.base") class Analyzer(object): - """Can analyze results in the end of a run. It can also save itself""" - - def gentargets(self, data, target): - t = numpy.vstack(data.shape[0] * (target,)) - return t, numpy.empty_like(t) - - def __init__(self, train, devel, target): - super(Analyzer, self).__init__() - - self.train = train - self.devel = devel - self.target = target - - real_train = self.gentargets(train[0], target[0]) - attack_train = self.gentargets(train[1], target[1]) - real_devel = self.gentargets(devel[0], target[0]) - attack_devel = self.gentargets(devel[1], target[1]) - - self.train_target = (real_train[0], attack_train[0]) - self.train_output = (real_train[1], attack_train[1]) - self.devel_target = (real_devel[0], attack_devel[0]) - self.devel_output = (real_devel[1], attack_devel[1]) - - self.data = {} # where to store variables that will be saved - self.data['epoch'] = [] - self.data['real-train-rmse'] = [] - self.data['attack-train-rmse'] = [] - self.data['real-devel-rmse'] = [] - self.data['attack-devel-rmse'] = [] - self.data['train-far'] = [] - self.data['train-frr'] = [] - self.data['devel-far'] = [] - self.data['devel-frr'] = [] - - def __call__(self, machine, iteration): - """Computes current outputs and evaluate performance""" - - def evalperf(outputs, targets): - return la.norm(bob.measure.rmse(outputs, targets)) - - for k in range(len(self.train)): - machine(self.train[k], self.train_output[k]) - machine(self.devel[k], self.devel_output[k]) - - self.data['real-train-rmse'].append(evalperf(self.train_output[0], - self.train_target[0])) - self.data['attack-train-rmse'].append(evalperf(self.train_output[1], - self.train_target[1])) - self.data['real-devel-rmse'].append(evalperf(self.devel_output[0], - self.devel_target[0])) - self.data['attack-devel-rmse'].append(evalperf(self.devel_output[1], - self.devel_target[1])) - - thres = bob.measure.eer_threshold(self.train_output[1][:, 0], - self.train_output[0][:, 0]) - train_far, train_frr = bob.measure.farfrr( - self.train_output[1][:, 0], self.train_output[0][:, 0], thres) - devel_far, devel_frr = bob.measure.farfrr( - self.devel_output[1][:, 0], self.devel_output[0][:, 0], thres) - - self.data['train-far'].append(train_far) - self.data['train-frr'].append(train_frr) - self.data['devel-far'].append(devel_far) - self.data['devel-frr'].append(devel_frr) - - self.data['epoch'].append(iteration) - - def str_header(self): - """Returns the string header of what I can print""" - return "iteration: RMSE:real/RMSE:attack (EER:%) ( train | devel )" - - def __str__(self): - """Returns a string representation of myself""" - - retval = "%d: %.4e/%.4e (%.2f%%) | %.4e/%.4e (%.2f%%)" % \ - (self.data['epoch'][-1], - self.data['real-train-rmse'][-1], - self.data['attack-train-rmse'][-1], - 50 * - (self.data['train-far'][-1] + self.data['train-frr'][-1]), - self.data['real-devel-rmse'][-1], - self.data['attack-devel-rmse'][-1], - 50 * - (self.data['devel-far'][-1] + self.data['devel-frr'][-1]), - ) - return retval - - def save(self, f): - """Saves my contents on the bob.io.base.HDF5File you give me.""" - - for k, v in self.data.items(): - f.set(k, numpy.array(v)) - - def load(self, f): - """Loads my contents from the bob.io.base.HDF5File you give me.""" - - for k in f.paths(): - self.data[k.strip('/')] = f.read(k) + """Can analyze results in the end of a run. It can also save itself""" + + def gentargets(self, data, target): + t = numpy.vstack(data.shape[0] * (target,)) + return t, numpy.empty_like(t) + + def __init__(self, train, devel, target): + super(Analyzer, self).__init__() + + self.train = train + self.devel = devel + self.target = target + + real_train = self.gentargets(train[0], target[0]) + attack_train = self.gentargets(train[1], target[1]) + real_devel = self.gentargets(devel[0], target[0]) + attack_devel = self.gentargets(devel[1], target[1]) + + self.train_target = (real_train[0], attack_train[0]) + self.train_output = (real_train[1], attack_train[1]) + self.devel_target = (real_devel[0], attack_devel[0]) + self.devel_output = (real_devel[1], attack_devel[1]) + + self.data = {} # where to store variables that will be saved + self.data['epoch'] = [] + self.data['real-train-rmse'] = [] + self.data['attack-train-rmse'] = [] + self.data['real-devel-rmse'] = [] + self.data['attack-devel-rmse'] = [] + self.data['train-far'] = [] + self.data['train-frr'] = [] + self.data['devel-far'] = [] + self.data['devel-frr'] = [] + + def __call__(self, machine, iteration): + """Computes current outputs and evaluate performance""" + + def evalperf(outputs, targets): + return la.norm(bob.measure.rmse(outputs, targets)) + + for k in range(len(self.train)): + machine(self.train[k], self.train_output[k]) + machine(self.devel[k], self.devel_output[k]) + + self.data['real-train-rmse'].append(evalperf(self.train_output[0], + self.train_target[0])) + self.data['attack-train-rmse'].append(evalperf(self.train_output[1], + self.train_target[1])) + self.data['real-devel-rmse'].append(evalperf(self.devel_output[0], + self.devel_target[0])) + self.data['attack-devel-rmse'].append(evalperf(self.devel_output[1], + self.devel_target[1])) + + thres = bob.measure.eer_threshold(self.train_output[1][:, 0], + self.train_output[0][:, 0]) + train_far, train_frr = bob.measure.farfrr( + self.train_output[1][:, 0], self.train_output[0][:, 0], thres) + devel_far, devel_frr = bob.measure.farfrr( + self.devel_output[1][:, 0], self.devel_output[0][:, 0], thres) + + self.data['train-far'].append(train_far) + self.data['train-frr'].append(train_frr) + self.data['devel-far'].append(devel_far) + self.data['devel-frr'].append(devel_frr) + + self.data['epoch'].append(iteration) + + def str_header(self): + """Returns the string header of what I can print""" + return "iteration: RMSE:real/RMSE:attack (EER:%) ( train | devel )" + + def __str__(self): + """Returns a string representation of myself""" + + retval = "%d: %.4e/%.4e (%.2f%%) | %.4e/%.4e (%.2f%%)" % \ + (self.data['epoch'][-1], + self.data['real-train-rmse'][-1], + self.data['attack-train-rmse'][-1], + 50 * + (self.data['train-far'][-1] + self.data['train-frr'][-1]), + self.data['real-devel-rmse'][-1], + self.data['attack-devel-rmse'][-1], + 50 * + (self.data['devel-far'][-1] + self.data['devel-frr'][-1]), + ) + return retval + + def save(self, f): + """Saves my contents on the bob.io.base.HDF5File you give me.""" + + for k, v in self.data.items(): + f.set(k, numpy.array(v)) + + def load(self, f): + """Loads my contents from the bob.io.base.HDF5File you give me.""" + + for k in f.paths(): + self.data[k.strip('/')] = f.read(k) class MLPTrainer(object): - """Creates a randomly initialized MLP and train it using the input data. - - This method will create an MLP with the shape (`mlp_shape`) that is - provided. Then it will initialize the MLP with random weights and - biases and train it for as long as the development shows improvement - and will stop as soon as it does not anymore or we reach the maximum - number of iterations. - - Performance is evaluated both on the trainining and development set - during the training, every 'epoch' training steps. Each training step - is composed of `batch_size` elements drawn randomly from all classes - available in train set. - - Keyword Parameters: - - train - An iterable (tuple or list) containing two arraysets: the first - contains the real accesses (target = +1) and the second contains - the attacks (target = -1). - - devel - An iterable (tuple or list) containing two arraysets: the first - contains the real accesses (target = +1) and the second contains - the attacks (target = -1). - - batch_size - An integer defining the number of samples per training iteration. - Good values are greater than 100. - - mlp_shape - Shape of the MLP machine. - - epoch - The number of training steps to wait until we measure the error. - - max_iter - If given (and different than zero), should tell us the maximum - number of training steps to train the network for. If set to 0 - just train until the development sets reaches the valley (in RMSE - terms). - - no_improvements - If given (and different than zero), should tell us the maximum - number of iterations we should continue trying for in case we have - no more improvements on the development set average RMSE term. - This value, if set, should not be too small as this may cause a - too-early stop. Values in the order of 10% of the max_iter should - be fine. - - """ - - def __init__(self, - train, - devel, - mlp_shape, - batch_size=1, - epoch=1, - max_iter=1000, - no_improvements=0, - valley_condition=1, - machine=None, - trainer=None, - *args, **kwargs - ): - super(MLPTrainer, self).__init__() - self.train = train - self.devel = devel - self.mlp_shape = mlp_shape - self.batch_size = batch_size - self.epoch = epoch - self.max_iter = max_iter - self.no_improvements = no_improvements - self.valley_condition = valley_condition - self.machine = machine - self.trainer = trainer - - def __call__(self): - return self.make_mlp() - - def make_mlp(self): - - # of the minimum devel. set RMSE detected so far - VALLEY_CONDITION = self.valley_condition - last_devel_rmse = 0 - - def stop_condition(min_devel_rmse, devel_rmse, last_devel_rmse): - """This method will detect a valley in the devel set RMSE""" - stop = (VALLEY_CONDITION * devel_rmse) > (min_devel_rmse) or \ - abs(devel_rmse - last_devel_rmse) / \ - (devel_rmse + last_devel_rmse) < 0.00001 - return stop - - target = [ - numpy.array([+1], 'float64'), - numpy.array([-1], 'float64'), - ] - - logger.info("Preparing analysis framework...") - analyze = Analyzer(self.train, self.devel, target) - - logger.info("Setting up training infrastructure...") - shuffler = bob.learn.mlp.DataShuffler(self.train, target) - shuffler.auto_stdnorm = True - - # shape = (shuffler.data_width, nhidden, 1) - # machine = bob.learn.mlp.Machine(self.shape) - # machine.activation = bob.learn.activation.HyperbolicTangent() #the - # defaults are anyway Hyperbolic Tangent for hidden and output layer - self.machine.input_subtract, self.machine.input_divide = \ - shuffler.stdnorm() - - # trainer = bob.learn.mlp.RProp( - # self.batch_size, - # bob.learn.mlp.SquareError(machine.output_activation), machine) - - self.trainer.train_biases = True - - continue_training = True - iteration = 0 - min_devel_rmse = sys.float_info.max - self.best_machine = bob.learn.mlp.Machine(self.machine) # deep copy - best_machine_iteration = 0 - - # temporary training data selected by the shuffer - shuffled_input = numpy.ndarray( - (self.batch_size, shuffler.data_width), 'float64') - shuffled_target = numpy.ndarray( - (self.batch_size, shuffler.target_width), 'float64') - - logger.info(analyze.str_header()) - - try: - while continue_training: - - analyze(self.machine, iteration) - - logger.info(analyze) - - avg_devel_rmse = (analyze.data['real-devel-rmse'][-1] + - analyze.data['attack-devel-rmse'][-1]) / 2 - - # save best network, record minima - if avg_devel_rmse < min_devel_rmse: - best_machine_iteration = iteration - self.best_machine = bob.learn.mlp.Machine( - self.machine) # deep copy - logger.info("%d: Saving best network so far with average " - "devel. RMSE = %.4e", iteration, avg_devel_rmse) - min_devel_rmse = avg_devel_rmse - logger.info("%d: New valley stop threshold set to %.4e", - iteration, avg_devel_rmse / VALLEY_CONDITION) - if stop_condition(min_devel_rmse, avg_devel_rmse, last_devel_rmse) \ - or numpy.allclose(avg_devel_rmse / VALLEY_CONDITION, 0): - logger.info("%d: Stopping on devel valley condition", iteration) - logger.info("%d: Best machine happened on iteration %d with average " - "devel. RMSE of %.4e", iteration, best_machine_iteration, - min_devel_rmse) - - break - last_devel_rmse = avg_devel_rmse - - # train for 'epoch' times w/o stopping for tests - for i in range(self.epoch): - shuffler(data=shuffled_input, target=shuffled_target) - self.trainer.batch_size = len(shuffled_input) - self.trainer.train( - self.machine, shuffled_input, shuffled_target) - iteration += 1 - - if self.max_iter > 0 and iteration > self.max_iter: - logger.info("%d: Stopping on max. iterations condition", iteration) - logger.info("%d: Best machine happened on iteration %d with average " - "devel. RMSE of %.4e", iteration, best_machine_iteration, - min_devel_rmse) - break - - if self.no_improvements > 0 and \ - (iteration - best_machine_iteration) > self.no_improvements: - logger.info("%d: Stopping because did not observe MLP performance " - "improvements for %d iterations", - iteration, iteration - best_machine_iteration) - logger.info("%d: Best machine happened on iteration %d with average " - "devel. RMSE of %.4e", - iteration, best_machine_iteration, min_devel_rmse) - break - - except KeyboardInterrupt: - logger.info("%d: User interruption captured - exiting in a clean way", - iteration) - logger.info("%d: Best machine happened on iteration %d " - "with average devel. RMSE of %.4e", - iteration, best_machine_iteration, min_devel_rmse) - - analyze(self.machine, iteration) - - return self.best_machine, analyze + """Creates a randomly initialized MLP and train it using the input data. + + This method will create an MLP with the shape (`mlp_shape`) that is + provided. Then it will initialize the MLP with random weights and + biases and train it for as long as the development shows improvement + and will stop as soon as it does not anymore or we reach the maximum + number of iterations. + + Performance is evaluated both on the trainining and development set + during the training, every 'epoch' training steps. Each training step + is composed of `batch_size` elements drawn randomly from all classes + available in train set. + + Keyword Parameters: + + train + An iterable (tuple or list) containing two arraysets: the first + contains the real accesses (target = +1) and the second contains + the attacks (target = -1). + + devel + An iterable (tuple or list) containing two arraysets: the first + contains the real accesses (target = +1) and the second contains + the attacks (target = -1). + + batch_size + An integer defining the number of samples per training iteration. + Good values are greater than 100. + + mlp_shape + Shape of the MLP machine. + + epoch + The number of training steps to wait until we measure the error. + + max_iter + If given (and different than zero), should tell us the maximum + number of training steps to train the network for. If set to 0 + just train until the development sets reaches the valley (in RMSE + terms). + + no_improvements + If given (and different than zero), should tell us the maximum + number of iterations we should continue trying for in case we have + no more improvements on the development set average RMSE term. + This value, if set, should not be too small as this may cause a + too-early stop. Values in the order of 10% of the max_iter should + be fine. + + """ + + def __init__(self, + train, + devel, + mlp_shape, + batch_size=1, + epoch=1, + max_iter=1000, + no_improvements=0, + valley_condition=1, + machine=None, + trainer=None, + *args, **kwargs + ): + super(MLPTrainer, self).__init__() + self.train = train + self.devel = devel + self.mlp_shape = mlp_shape + self.batch_size = batch_size + self.epoch = epoch + self.max_iter = max_iter + self.no_improvements = no_improvements + self.valley_condition = valley_condition + self.machine = machine + self.trainer = trainer + + def __call__(self): + return self.make_mlp() + + def make_mlp(self): + + # of the minimum devel. set RMSE detected so far + VALLEY_CONDITION = self.valley_condition + last_devel_rmse = 0 + + def stop_condition(min_devel_rmse, devel_rmse, last_devel_rmse): + """This method will detect a valley in the devel set RMSE""" + stop = (VALLEY_CONDITION * devel_rmse) > (min_devel_rmse) or \ + abs(devel_rmse - last_devel_rmse) / \ + (devel_rmse + last_devel_rmse) < 0.00001 + return stop + + target = [ + numpy.array([+1], 'float64'), + numpy.array([-1], 'float64'), + ] + + logger.info("Preparing analysis framework...") + analyze = Analyzer(self.train, self.devel, target) + + logger.info("Setting up training infrastructure...") + shuffler = bob.learn.mlp.DataShuffler(self.train, target) + shuffler.auto_stdnorm = True + + # shape = (shuffler.data_width, nhidden, 1) + # machine = bob.learn.mlp.Machine(self.shape) + # machine.activation = bob.learn.activation.HyperbolicTangent() #the + # defaults are anyway Hyperbolic Tangent for hidden and output layer + self.machine.input_subtract, self.machine.input_divide = \ + shuffler.stdnorm() + + # trainer = bob.learn.mlp.RProp( + # self.batch_size, + # bob.learn.mlp.SquareError(machine.output_activation), machine) + + self.trainer.train_biases = True + + continue_training = True + iteration = 0 + min_devel_rmse = sys.float_info.max + self.best_machine = bob.learn.mlp.Machine(self.machine) # deep copy + best_machine_iteration = 0 + + # temporary training data selected by the shuffer + shuffled_input = numpy.ndarray( + (self.batch_size, shuffler.data_width), 'float64') + shuffled_target = numpy.ndarray( + (self.batch_size, shuffler.target_width), 'float64') + + logger.info(analyze.str_header()) + + try: + while continue_training: + + analyze(self.machine, iteration) + + logger.info(analyze) + + avg_devel_rmse = (analyze.data['real-devel-rmse'][-1] + + analyze.data['attack-devel-rmse'][-1]) / 2 + + # save best network, record minima + if avg_devel_rmse < min_devel_rmse: + best_machine_iteration = iteration + self.best_machine = bob.learn.mlp.Machine( + self.machine) # deep copy + logger.info("%d: Saving best network so far with average " + "devel. RMSE = %.4e", iteration, avg_devel_rmse) + min_devel_rmse = avg_devel_rmse + logger.info("%d: New valley stop threshold set to %.4e", + iteration, avg_devel_rmse / VALLEY_CONDITION) + if stop_condition(min_devel_rmse, avg_devel_rmse, last_devel_rmse) \ + or numpy.allclose(avg_devel_rmse / VALLEY_CONDITION, 0): + logger.info("%d: Stopping on devel valley condition", iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", iteration, best_machine_iteration, + min_devel_rmse) + + break + last_devel_rmse = avg_devel_rmse + + # train for 'epoch' times w/o stopping for tests + for i in range(self.epoch): + shuffler(data=shuffled_input, target=shuffled_target) + self.trainer.batch_size = len(shuffled_input) + self.trainer.train( + self.machine, shuffled_input, shuffled_target) + iteration += 1 + + if self.max_iter > 0 and iteration > self.max_iter: + logger.info("%d: Stopping on max. iterations condition", iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", iteration, best_machine_iteration, + min_devel_rmse) + break + + if self.no_improvements > 0 and \ + (iteration - best_machine_iteration) > self.no_improvements: + logger.info("%d: Stopping because did not observe MLP performance " + "improvements for %d iterations", + iteration, iteration - best_machine_iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", + iteration, best_machine_iteration, min_devel_rmse) + break + + except KeyboardInterrupt: + logger.info("%d: User interruption captured - exiting in a clean way", + iteration) + logger.info("%d: Best machine happened on iteration %d " + "with average devel. RMSE of %.4e", + iteration, best_machine_iteration, min_devel_rmse) + + analyze(self.machine, iteration) + + return self.best_machine, analyze diff --git a/bob/fusion/base/script/__init__.py b/bob/fusion/base/script/__init__.py index 8c298d2..e40140b 100644 --- a/bob/fusion/base/script/__init__.py +++ b/bob/fusion/base/script/__init__.py @@ -1,2 +1,2 @@ from . import bob_fuse -from . import plot_fusion_decision_boundary +from . import bob_fusion_decision_boundary diff --git a/bob/fusion/base/script/bob_fusion_decision_boundary.py b/bob/fusion/base/script/bob_fusion_decision_boundary.py index 85a1dd6..4ed45bf 100644 --- a/bob/fusion/base/script/bob_fusion_decision_boundary.py +++ b/bob/fusion/base/script/bob_fusion_decision_boundary.py @@ -10,7 +10,7 @@ import bob.fusion.base import bob.core from bob.measure.load import load_score from ..tools import grouping, get_gza_from_lines_list, \ - get_scores, remove_nan, check_consistency + get_scores, remove_nan, check_consistency logger = bob.core.log.setup("bob.fusion.base") @@ -18,159 +18,166 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, thres_system1=None, thres_system2=None, do_grouping=False, - resolution=100, - x_pad=0.5, - y_pad=0.5, + resolution=2000, + # x_pad=0.5, + # y_pad=0.5, alpha=0.75, legends=None, i1=0, i2=1, **kwargs ): - ''' - Plots the boundary decision of the Algorithm - - @param score_labels np.array A (scores.shape[0]) array containing - the true labels of scores. - - @param threshold float threshold of the decision boundary - ''' - if legends is None: - legends = ['Zero Effort Impostor', 'Presentation Attack', 'Genuine'] - markers = ['x', 'o', 's'] - - if scores.shape[1] > 2: - raise NotImplementedError( - "Currently plotting the decision boundary for more than two systems " - "is not supported.") - - import matplotlib.pyplot as plt - plt.gca() # this is necessary for subplots to work. - - X = scores[:, [i1, i2]] - Y = score_labels - x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad - y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad - xx, yy = np.meshgrid( - np.linspace(x_min, x_max, resolution), - np.linspace(y_min, y_max, resolution)) - temp = np.c_[xx.ravel(), yy.ravel()] - temp = algorithm.preprocess(temp) - Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape) - - contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.gray) - - if do_grouping: - gen = grouping(X[Y == 0, :], **kwargs) - zei = grouping(X[Y == 1, :], **kwargs) - atk = grouping(X[Y == 2, :], **kwargs) - else: - gen = X[Y == 0, :] - zei = X[Y == 1, :] - atk = X[Y == 2, :] - colors = plt.cm.viridis(np.linspace(0, 1, 3)) - for i, X in enumerate((zei, atk, gen)): - plt.scatter( - X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, - c=colors[i], label=legends[i]) - plt.legend() - - if thres_system1 is not None: - plt.axvline(thres_system1, color='red') - plt.axhline(thres_system2, color='red') - - plt.xlim([x_min, x_max]) - plt.ylim([y_min, y_max]) - plt.grid('on') - - return contourf + ''' + Plots the boundary decision of the Algorithm + + @param score_labels np.array A (scores.shape[0]) array containing + the true labels of scores. + + @param threshold float threshold of the decision boundary + ''' + if legends is None: + legends = ['Zero Effort Impostor', 'Presentation Attack', 'Genuine'] + markers = ['x', 'o', 's'] + + if scores.shape[1] > 2: + raise NotImplementedError( + "Currently plotting the decision boundary for more than two systems " + "is not supported.") + + import matplotlib.pyplot as plt + plt.gca() # this is necessary for subplots to work. + + X = scores[:, [i1, i2]] + Y = score_labels + x_pad = (X[:, i1].max() - X[:, i1].min()) * 0.1 + y_pad = (X[:, i2].max() - X[:, i2].min()) * 0.1 + x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad + y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad + xx, yy = np.meshgrid( + np.linspace(x_min, x_max, resolution), + np.linspace(y_min, y_max, resolution)) + temp = np.c_[xx.ravel(), yy.ravel()] + temp = algorithm.preprocess(temp) + Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape) + + contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.gray) + + if do_grouping: + gen = grouping(X[Y == 0, :], **kwargs) + zei = grouping(X[Y == 1, :], **kwargs) + atk = grouping(X[Y == 2, :], **kwargs) + else: + gen = X[Y == 0, :] + zei = X[Y == 1, :] + atk = X[Y == 2, :] + colors = plt.cm.viridis(np.linspace(0, 1, 3)) + for i, X in enumerate((zei, atk, gen)): + plt.scatter( + X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, + c=colors[i], label=legends[i]) + # plt.legend(loc='best') + plt.legend(bbox_to_anchor=(-0.05, 1.02, 1.05, .102), loc=3, + ncol=3, mode="expand", borderaxespad=0., fontsize=14) + + if thres_system1 is not None: + plt.axvline(thres_system1, color='red') + plt.axhline(thres_system2, color='red') + + plt.xlim([x_min, x_max]) + plt.ylim([y_min, y_max]) + plt.grid('on') + + plt.xlabel('Face recognition scores') + plt.ylabel('PAD scores') + + return contourf def main(command_line_parameters=None): - # setup command line parameters - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + # setup command line parameters + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-e', '--eval-files', nargs='+', required=True, - help='A list of score files to be plotted usually the ' - 'evaluation set.') + parser.add_argument('-e', '--eval-files', nargs='+', required=True, + help='A list of score files to be plotted usually the ' + 'evaluation set.') - parser.add_argument('-m', '--model-file', required=True, - help='Path to Model.pkl of a saved bob.fusion.algorithm.') + parser.add_argument('-m', '--model-file', required=True, + help='Path to Model.pkl of a saved bob.fusion.algorithm.') - parser.add_argument('-t', '--threshold', type=float, default=0, required=True, - help='The threshold to classify scores after fusion.' - 'Usually calculated from fused development set.') + parser.add_argument('-t', '--threshold', type=float, default=0, required=True, + help='The threshold to classify scores after fusion.' + 'Usually calculated from fused development set.') - parser.add_argument('-o', '--output', default='scatter.pdf', - help='The path to save the plot.') + parser.add_argument('-o', '--output', default='scatter.pdf', + help='The path to save the plot.') - parser.add_argument('-g', '--group', default=0, type=int, - help='If given scores will be grouped into N samples.') + parser.add_argument('-g', '--group', default=0, type=int, + help='If given scores will be grouped into N samples.') - parser.add_argument('-G', '--grouping', choices=('random', 'kmeans'), - default='kmeans', - help='The gouping algorithm to be used.') + parser.add_argument('-G', '--grouping', choices=('random', 'kmeans'), + default='kmeans', + help='The gouping algorithm to be used.') - parser.add_argument('--skip-check', action='store_true', - help='If provided, score files are not checked ' - 'for consistency') + parser.add_argument('--skip-check', action='store_true', + help='If provided, score files are not checked ' + 'for consistency') - parser.add_argument('--score-type', choices=(4, 5), default=None, - help='The format the scores are provided.') + parser.add_argument('--score-type', choices=(4, 5), default=None, + help='The format the scores are provided.') - bob.core.log.add_command_line_option(parser) + bob.core.log.add_command_line_option(parser) - # parse command line options - args = parser.parse_args(command_line_parameters) - bob.core.log.set_verbosity_level(logger, args.verbose) + # parse command line options + args = parser.parse_args(command_line_parameters) + bob.core.log.set_verbosity_level(logger, args.verbose) - # load the algorithm - algorithm = bob.fusion.base.algorithm.Algorithm() - algorithm = algorithm.load(args.model_file) + # load the algorithm + algorithm = bob.fusion.base.algorithm.Algorithm() + algorithm = algorithm.load(args.model_file) - # load the scores - score_lines_list_eval = [load_score(path, ncolumns=args.score_type) - for path in args.eval_files] + # load the scores + score_lines_list_eval = [load_score(path, ncolumns=args.score_type) + for path in args.eval_files] - # genuine, zero effort impostor, and attack list - idx1, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval) + # genuine, zero effort impostor, and attack list + idx1, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval) - # check if score lines are consistent - if not args.skip_check: - check_consistency(gen_le, zei_le, atk_le) + # check if score lines are consistent + if not args.skip_check: + check_consistency(gen_le, zei_le, atk_le) - # concatenate the scores and create the labels - scores = get_scores(gen_le, zei_le, atk_le) - score_labels = np.zeros((scores.shape[0],)) - gensize = gen_le[0].shape[0] - zeisize = zei_le[0].shape[0] - score_labels[:gensize] = 0 - score_labels[gensize: gensize + zeisize] = 1 - score_labels[gensize + zeisize:] = 2 - found_nan, nan_idx, scores = remove_nan(scores, False) - score_labels = score_labels[~nan_idx] + # concatenate the scores and create the labels + scores = get_scores(gen_le, zei_le, atk_le) + score_labels = np.zeros((scores.shape[0],)) + gensize = gen_le[0].shape[0] + zeisize = zei_le[0].shape[0] + score_labels[:gensize] = 0 + score_labels[gensize: gensize + zeisize] = 1 + score_labels[gensize + zeisize:] = 2 + found_nan, nan_idx, scores = remove_nan(scores, False) + score_labels = score_labels[~nan_idx] - if found_nan: - logger.warn('{} nan values were removed.'.format(np.sum(nan_idx))) + if found_nan: + logger.warn('{} nan values were removed.'.format(np.sum(nan_idx))) - # plot the decision boundary - do_grouping = True - if args.group < 1: - do_grouping = False + # plot the decision boundary + do_grouping = True + if args.group < 1: + do_grouping = False - plot_boundary_decision( - algorithm, scores, score_labels, args.threshold, - do_grouping=do_grouping, - npoints=args.group, - seed=0, - gformat=args.grouping - ) - plt.savefig(args.output) - plt.close() + plot_boundary_decision( + algorithm, scores, score_labels, args.threshold, + do_grouping=do_grouping, + npoints=args.group, + seed=0, + gformat=args.grouping + ) + plt.savefig(args.output, transparent=True) + plt.close() if __name__ == '__main__': - main() + main() diff --git a/bob/fusion/base/test/test_algorithm.py b/bob/fusion/base/test/test_algorithm.py index 359e733..51a0eff 100644 --- a/bob/fusion/base/test/test_algorithm.py +++ b/bob/fusion/base/test/test_algorithm.py @@ -43,146 +43,155 @@ TEST = array([[-1.04855297, -1.42001794], def run_steps(algorithm): - algorithm.train_preprocessors(X) - neg = algorithm.preprocess(NEG) - pos = algorithm.preprocess(POS) - algorithm.train(neg, pos) - fused = algorithm.fuse(TEST) - with NamedTemporaryFile(suffix='.pkl') as f: - algorithm.save(f.name) - loaded_algorithm = algorithm.load(f.name) - - try: - assert str(algorithm) == str(loaded_algorithm) - except Exception: - warnings.warn("String comparison of algorithms do not match which is OK.") - print(str(algorithm)) - print(str(loaded_algorithm)) - if algorithm.preprocessors: - assert len(algorithm.preprocessors) == len(loaded_algorithm.preprocessors) - assert fused.ndim == 1 - - return neg, pos, fused, loaded_algorithm + algorithm.train_preprocessors(X) + neg = algorithm.preprocess(NEG) + pos = algorithm.preprocess(POS) + algorithm.train(neg, pos) + fused = algorithm.fuse(TEST) + with NamedTemporaryFile(suffix='.pkl') as f: + algorithm.save(f.name) + loaded_algorithm = algorithm.load(f.name) + + try: + assert str(algorithm) == str(loaded_algorithm) + except Exception: + warnings.warn("String comparison of algorithms do not match which is OK.") + print(str(algorithm)) + print(str(loaded_algorithm)) + if algorithm.preprocessors: + assert len(algorithm.preprocessors) == len(loaded_algorithm.preprocessors) + assert fused.ndim == 1 + + return neg, pos, fused, loaded_algorithm def test_algorithm_llr_sklearn(): - algorithm = bob.fusion.base.algorithm.Algorithm( - preprocessors=[ - StandardScaler(**{'copy': True, 'with_mean': True, 'with_std': True})], - classifier=LogisticRegression(**{'C': 1.0, - 'class_weight': None, - 'dual': False, - 'fit_intercept': True, - 'intercept_scaling': 1, - 'max_iter': 100, - 'multi_class': 'ovr', - 'n_jobs': 1, - 'penalty': 'l2', - 'random_state': None, - 'solver': 'liblinear', - 'tol': 0.0001, - 'verbose': 0, - 'warm_start': False})) - neg, pos, fused, loaded_algorithm = run_steps(algorithm) - assert str(algorithm) == "<class 'bob.fusion.base.algorithm.Algorithm.Algorithm'>(preprocessors=[StandardScaler(copy=True, with_mean=True, with_std=True)], classifier=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False))" - - assert numpy.allclose(algorithm.preprocessors[0].mean_, array([0.52676307, 0.09832188])) - assert numpy.allclose(algorithm.preprocessors[0].scale_, array([2.857145, 2.98815147])) - - assert numpy.allclose(neg, array([[-0.61694829, -0.90295445], - [-0.89180811, -0.28694284], - [-0.58072134, -1.36392007], - [-0.90183545, -1.08752154], - [-1.27049272, -0.89946022], - [-1.18395093, -0.5501891], - [-0.96800314, -0.99614993], - [-1.07901413, -0.92520328], - [-0.71143886, -1.10552634], - [-1.12479253, -1.32269654]])) - assert numpy.allclose(pos, array([[-0.02791349, 1.18979803], - [1.16818472, 0.72269198], - [1.6600458, 0.48435043], - [0.88164775, 0.90841923], - [1.4021046, 1.4627896], - [0.91986384, 1.09761526], - [0.5549075, 0.3081777], - [0.74386312, 1.02338423], - [1.29623369, 1.37344375], - [0.73006799, 0.86989411]])) - - assert numpy.allclose(algorithm.classifier.intercept_, array([0.04577333])) - assert numpy.allclose(algorithm.classifier.classes_, array([False, True], dtype=bool)) - assert numpy.allclose(algorithm.classifier.coef_, array([[1.33489128, 1.38092354]])) - - assert numpy.allclose(fused, array([-3.31486708, 0.4619598, -1.23950404, -0.55291754, -2.40238289, - -0.61529441, -2.26645877, 0.59964668, 0.55225715, -1.30189552])) - - assert numpy.allclose(algorithm.preprocessors[0].mean_, loaded_algorithm.preprocessors[0].mean_) - assert numpy.allclose(algorithm.preprocessors[0].scale_, loaded_algorithm.preprocessors[0].scale_) - assert numpy.allclose(algorithm.classifier.intercept_, loaded_algorithm.classifier.intercept_) - assert numpy.allclose(algorithm.classifier.classes_, loaded_algorithm.classifier.classes_) - assert numpy.allclose(algorithm.classifier.coef_, loaded_algorithm.classifier.coef_) + algorithm = bob.fusion.base.algorithm.Algorithm( + preprocessors=[ + StandardScaler(**{'copy': True, 'with_mean': True, 'with_std': True})], + classifier=LogisticRegression(**{'C': 1.0, + 'class_weight': None, + 'dual': False, + 'fit_intercept': True, + 'intercept_scaling': 1, + 'max_iter': 100, + 'multi_class': 'ovr', + 'n_jobs': 1, + 'penalty': 'l2', + 'random_state': None, + 'solver': 'liblinear', + 'tol': 0.0001, + 'verbose': 0, + 'warm_start': False})) + neg, pos, fused, loaded_algorithm = run_steps(algorithm) + assert str(algorithm) == "<class 'bob.fusion.base.algorithm.Algorithm.Algorithm'>(preprocessors=[StandardScaler(copy=True, with_mean=True, with_std=True)], classifier=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n verbose=0, warm_start=False))" + + assert numpy.allclose(algorithm.preprocessors[0].mean_, array([0.52676307, 0.09832188])) + assert numpy.allclose(algorithm.preprocessors[0].scale_, array([2.857145, 2.98815147])) + + assert numpy.allclose(neg, array([[-0.61694829, -0.90295445], + [-0.89180811, -0.28694284], + [-0.58072134, -1.36392007], + [-0.90183545, -1.08752154], + [-1.27049272, -0.89946022], + [-1.18395093, -0.5501891], + [-0.96800314, -0.99614993], + [-1.07901413, -0.92520328], + [-0.71143886, -1.10552634], + [-1.12479253, -1.32269654]])) + assert numpy.allclose(pos, array([[-0.02791349, 1.18979803], + [1.16818472, 0.72269198], + [1.6600458, 0.48435043], + [0.88164775, 0.90841923], + [1.4021046, 1.4627896], + [0.91986384, 1.09761526], + [0.5549075, 0.3081777], + [0.74386312, 1.02338423], + [1.29623369, 1.37344375], + [0.73006799, 0.86989411]])) + + assert numpy.allclose(algorithm.classifier.intercept_, array([0.04577333])) + assert numpy.allclose(algorithm.classifier.classes_, array([False, True], dtype=bool)) + assert numpy.allclose(algorithm.classifier.coef_, array([[1.33489128, 1.38092354]])) + + assert numpy.allclose(fused, array([-3.31486708, 0.4619598, -1.23950404, -0.55291754, -2.40238289, + -0.61529441, -2.26645877, 0.59964668, 0.55225715, -1.30189552])) + + assert numpy.allclose(algorithm.preprocessors[0].mean_, loaded_algorithm.preprocessors[0].mean_) + assert numpy.allclose(algorithm.preprocessors[0].scale_, loaded_algorithm.preprocessors[0].scale_) + assert numpy.allclose(algorithm.classifier.intercept_, loaded_algorithm.classifier.intercept_) + assert numpy.allclose(algorithm.classifier.classes_, loaded_algorithm.classifier.classes_) + assert numpy.allclose(algorithm.classifier.coef_, loaded_algorithm.classifier.coef_) def test_algorithm_llr_bob(): - algorithm = bob.fusion.base.algorithm.LLR( - preprocessors=[StandardScaler(**{'copy': True, - 'with_mean': True, - 'with_std': True})], - trainer=None) - neg, pos, fused, loaded_algorithm = run_steps(algorithm) + algorithm = bob.fusion.base.algorithm.LLR( + preprocessors=[StandardScaler(**{'copy': True, + 'with_mean': True, + 'with_std': True})], + trainer=None) + neg, pos, fused, loaded_algorithm = run_steps(algorithm) - algorithm = bob.fusion.base.algorithm.LLR( - preprocessors=[StandardScaler(**{'copy': True, - 'with_mean': True, - 'with_std': True})], - trainer=bob.learn.linear.CGLogRegTrainer( - prior=0.5, convergence_threshold=1e-5, - max_iterations=10000, reg=1., mean_std_norm=False)) - neg, pos, fused, loaded_algorithm = run_steps(algorithm) - assert str(algorithm) == "<class 'bob.fusion.base.algorithm.LLR.LLR'>(trainer=<type 'bob.learn.linear.CGLogRegTrainer'>, preprocessors=[StandardScaler(copy=True, with_mean=True, with_std=True)])" + algorithm = bob.fusion.base.algorithm.LLR( + preprocessors=[StandardScaler(**{'copy': True, + 'with_mean': True, + 'with_std': True})], + trainer=bob.learn.linear.CGLogRegTrainer( + prior=0.5, convergence_threshold=1e-5, + max_iterations=10000, reg=1., mean_std_norm=False)) + neg, pos, fused, loaded_algorithm = run_steps(algorithm) + assert str(algorithm) == "<class 'bob.fusion.base.algorithm.LLR.LLR'>(trainer=<type 'bob.learn.linear.CGLogRegTrainer'>, preprocessors=[StandardScaler(copy=True, with_mean=True, with_std=True)])" - assert numpy.allclose(algorithm.machine.biases, array([0.04577333]), atol=0.05) - assert numpy.allclose(algorithm.machine.weights, array([[1.33489128, 1.38092354]]), atol=0.05) + assert numpy.allclose(algorithm.machine.biases, array([0.04577333]), atol=0.05) + assert numpy.allclose(algorithm.machine.weights, array([[1.33489128, 1.38092354]]), atol=0.05) - assert numpy.allclose(fused, array([-3.31486708, 0.4619598, -1.23950404, -0.55291754, -2.40238289, - -0.61529441, -2.26645877, 0.59964668, 0.55225715, -1.30189552]), atol=0.05) + assert numpy.allclose(fused, array([-3.31486708, 0.4619598, -1.23950404, -0.55291754, -2.40238289, + -0.61529441, -2.26645877, 0.59964668, 0.55225715, -1.30189552]), atol=0.05) - assert numpy.allclose(algorithm.machine.biases, loaded_algorithm.machine.biases) - assert numpy.allclose(algorithm.machine.weights, loaded_algorithm.machine.weights) + assert numpy.allclose(algorithm.machine.biases, loaded_algorithm.machine.biases) + assert numpy.allclose(algorithm.machine.weights, loaded_algorithm.machine.weights) def test_weighted_sum_1(): - algorithm = bob.fusion.base.algorithm.Weighted_Sum() - neg, pos, fused, loaded_algorithm = run_steps(algorithm) - assert str(algorithm) == "<class 'bob.fusion.base.algorithm.Weighted_Sum.Weighted_Sum'>()" - assert numpy.allclose(fused, numpy.mean(TEST, axis=1)) - assert algorithm.weights == loaded_algorithm.weights + algorithm = bob.fusion.base.algorithm.Weighted_Sum() + neg, pos, fused, loaded_algorithm = run_steps(algorithm) + assert str(algorithm) == "<class 'bob.fusion.base.algorithm.Weighted_Sum.Weighted_Sum'>()" + assert numpy.allclose(fused, numpy.mean(TEST, axis=1)) + assert algorithm.weights == loaded_algorithm.weights + + +def test_routine_fusion(): + bob.fusion.base.script.bob_fuse.routine_fusion(algorithm, model_file, + scores_train_lines, scores_train, train_neg, train_pos, fused_train_file, + scores_dev_lines=None, scores_dev=None, dev_neg=None, dev_pos=None, fused_dev_file=None, + scores_eval_lines=None, scores_eval=None, fused_eval_file=None, + force=False, min_file_size=1000, + ) def test_weighted_sum_2(): - weights = [0.3, 0.7] - algorithm = bob.fusion.base.algorithm.Weighted_Sum(weights=weights) - neg, pos, fused, loaded_algorithm = run_steps(algorithm) - assert str( - algorithm) == "<class 'bob.fusion.base.algorithm.Weighted_Sum.Weighted_Sum'>(weights=[0.3, 0.7])" - assert numpy.allclose(fused, numpy.sum(TEST * weights, axis=1)) - assert algorithm.weights == loaded_algorithm.weights + weights = [0.3, 0.7] + algorithm = bob.fusion.base.algorithm.Weighted_Sum(weights=weights) + neg, pos, fused, loaded_algorithm = run_steps(algorithm) + assert str( + algorithm) == "<class 'bob.fusion.base.algorithm.Weighted_Sum.Weighted_Sum'>(weights=[0.3, 0.7])" + assert numpy.allclose(fused, numpy.sum(TEST * weights, axis=1)) + assert algorithm.weights == loaded_algorithm.weights def test_mlp(): - algorithm = bob.fusion.base.algorithm.MLP( - n_systems=2, hidden_layers=[3], seed=0, batch_size=10, epoch=1, - max_iter=1000, no_improvements=0, valley_condition=0.9, - preprocessors=[StandardScaler(**{'copy': True, - 'with_mean': True, - 'with_std': True})]) - assert numpy.allclose(algorithm.machine.weights[0], array([[0.0097627, 0.01856892, 0.04303787], - [0.06885315, 0.02055267, 0.07158912]])) - assert numpy.allclose(algorithm.machine.weights[1], array([[0.02471274], - [0.02917882], - [-0.02312366]])) - assert numpy.allclose(algorithm.machine.biases[0], array([0.00897664, 0.06945035, -0.01526904])) - assert numpy.allclose(algorithm.machine.biases[1], array([-0.01248256])) - _, _, fused, loaded_algorithm = run_steps(algorithm) - assert numpy.allclose(fused, [-1, 1, -1, -1, -1, -1, -1, 1, 1, -1], atol=0.001) + algorithm = bob.fusion.base.algorithm.MLP( + n_systems=2, hidden_layers=[3], seed=0, batch_size=10, epoch=1, + max_iter=1000, no_improvements=0, valley_condition=0.9, + preprocessors=[StandardScaler(**{'copy': True, + 'with_mean': True, + 'with_std': True})]) + assert numpy.allclose(algorithm.machine.weights[0], array([[0.0097627, 0.01856892, 0.04303787], + [0.06885315, 0.02055267, 0.07158912]])) + assert numpy.allclose(algorithm.machine.weights[1], array([[0.02471274], + [0.02917882], + [-0.02312366]])) + assert numpy.allclose(algorithm.machine.biases[0], array([0.00897664, 0.06945035, -0.01526904])) + assert numpy.allclose(algorithm.machine.biases[1], array([-0.01248256])) + _, _, fused, loaded_algorithm = run_steps(algorithm) + assert numpy.allclose(fused, [-1, 1, -1, -1, -1, -1, -1, 1, 1, -1], atol=0.001) diff --git a/bob/fusion/base/test/test_scripts.py b/bob/fusion/base/test/test_scripts.py index ef143b8..0447ce3 100644 --- a/bob/fusion/base/test/test_scripts.py +++ b/bob/fusion/base/test/test_scripts.py @@ -5,7 +5,7 @@ import shutil import tempfile from bob.fusion.base.script.bob_fuse import main as bob_fuse -from bob.fusion.base.script.plot_fusion_decision_boundary import main as plot_fusion_decision_boundary +from bob.fusion.base.script.bob_fusion_decision_boundary import main as bob_fusion_decision_boundary from bob.io.base.test_utils import datafile train_files = [datafile("scores-dev-1", 'bob.fusion.base'), @@ -16,44 +16,44 @@ eval_files = [datafile("scores-eval-1", 'bob.fusion.base'), def test_scripts(): - tmpdir = tempfile.mkdtemp() - try: - fused_dev_file = os.path.join(tmpdir, 'scores-dev') - fused_eval_file = os.path.join(tmpdir, 'scores-eval') - - # test normally - cmd = ['-s', tmpdir, '-t'] + train_files + ['-o', fused_dev_file, '-a', 'llr'] - bob_fuse(cmd) - - cmd = ['-s', tmpdir, '-t'] + train_files + ['-e'] + eval_files + ['-o', fused_dev_file, '-O', fused_eval_file, '-a', 'llr'] - bob_fuse(cmd) - - # make inconsistency - wrong_dev2 = os.path.join(tmpdir, 'scores-dev-2') - with open(wrong_dev2, 'w') as f1, open(train_files[1]) as f2: - lines = f2.readlines() - temp = lines[0].split() - temp = (temp[0], 'temp1_id', temp[2], temp[3]) - lines[0] = ' '.join(temp) + '\n' - f1.writelines(lines) - - cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr'] + tmpdir = tempfile.mkdtemp() try: - bob_fuse(cmd) - except AssertionError: - pass - else: - raise Exception('An AssertionError should have been raised.') - - # this should not raise an error - cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr', '--skip-check'] - bob_fuse(cmd) - - # test plot - model_file = os.path.join(tmpdir, 'Model.pkl') - output = os.path.join(tmpdir, 'scatter.pdf') - cmd = train_files + [model_file, '-o', output] - plot_fusion_decision_boundary(cmd) - - finally: - shutil.rmtree(tmpdir) + fused_dev_file = os.path.join(tmpdir, 'scores-dev') + fused_eval_file = os.path.join(tmpdir, 'scores-eval') + + # test normally + cmd = ['-s', tmpdir, '-t'] + train_files + ['-D', fused_dev_file, '-a', 'llr'] + bob_fuse(cmd) + + cmd = ['-s', tmpdir, '-t'] + train_files + ['-e'] + eval_files + ['-D', fused_dev_file, '-E', fused_eval_file, '-a', 'llr'] + bob_fuse(cmd) + + # make inconsistency + wrong_dev2 = os.path.join(tmpdir, 'scores-dev-2') + with open(wrong_dev2, 'w') as f1, open(train_files[1]) as f2: + lines = f2.readlines() + temp = lines[0].split() + temp = (temp[0], 'temp1_id', temp[2], temp[3]) + lines[0] = ' '.join(temp) + '\n' + f1.writelines(lines) + + cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-D', fused_dev_file, '-a', 'llr'] + try: + bob_fuse(cmd) + except AssertionError: + pass + else: + raise Exception('An AssertionError should have been raised.') + + # this should not raise an error + cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-D', fused_dev_file, '-a', 'llr', '--skip-check'] + bob_fuse(cmd) + + # test plot + model_file = os.path.join(tmpdir, 'Model.pkl') + output = os.path.join(tmpdir, 'scatter.pdf') + cmd = ['-e'] + eval_files + ['-m', model_file, '-t', '0', '-o', output] + bob_fusion_decision_boundary(cmd) + + finally: + shutil.rmtree(tmpdir) diff --git a/bob/fusion/base/tools/command_line.py b/bob/fusion/base/tools/command_line.py index edbdd5e..ce12011 100644 --- a/bob/fusion/base/tools/command_line.py +++ b/bob/fusion/base/tools/command_line.py @@ -16,249 +16,254 @@ valid_keywords = ('algorithm') def _get_entry_points(keyword, strip=[]): - """Returns the list of entry points for registered resources with the given - keyword.""" - return [entry_point for entry_point in - pkg_resources.iter_entry_points('bob.fusion.' + keyword) - if not entry_point.name.startswith(tuple(strip))] + """Returns the list of entry points for registered resources with the given + keyword.""" + return [entry_point for entry_point in + pkg_resources.iter_entry_points('bob.fusion.' + keyword) + if not entry_point.name.startswith(tuple(strip))] def resource_keys(keyword, exclude_packages=[], strip=['dummy']): - """Reads and returns all resources that are registered with the given keyword. - Entry points from the given ``exclude_packages`` are ignored.""" - return sorted([entry_point.name for entry_point in - _get_entry_points(keyword, strip) if - entry_point.dist.project_name not in exclude_packages]) + """Reads and returns all resources that are registered with the given keyword. + Entry points from the given ``exclude_packages`` are ignored.""" + return sorted([entry_point.name for entry_point in + _get_entry_points(keyword, strip) if + entry_point.dist.project_name not in exclude_packages]) def command_line_parser(description=__doc__, exclude_resources_from=[]): - parser = argparse.ArgumentParser( - description=description, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('-t', '--train-files', required=True, - nargs='+', help="A list of score files of " - "the train set.") - parser.add_argument('-d', '--dev-files', nargs='+', - help="A list of score files of the development set; " - "if given it must be the same number of files " - "as the --train-files.") - parser.add_argument('-e', '--eval-files', nargs='+', - help="A list of score files of the evaluation set; " - "if given it must be the same number of files " - "as the --train-files.") - parser.add_argument('-o', '--fused-dev-file', default=None, - help='The fused development score file. ' - 'Default is "scores-dev" in the --save-directory') - parser.add_argument('-O', '--fused-eval-file', default=None, - help='The fused evaluation score file. ' - 'Default is "scores-eval" in the --save-directory') - parser.add_argument('--score-type', choices=[4, 5], default=None, - help='The format the scores are provided. If not ' - 'provided, the number of columns will be guessed.') - parser.add_argument('--skip-check', action='store_true', - help='If provided, score files are not checked ' - 'for consistency') - parser.add_argument('-s', '--save-directory', help='The directory to save ' - 'the experiment artifacts.', default='fusion_result') - - config_group = parser.add_argument_group( - 'Parameters defining the experiment', ' Most of these parameters can be a' - ' registered resource, a configuration file, or even a string that ' - 'defines a newly created object') - config_group.add_argument( - '-a', '--algorithm', metavar='x', required=True, - help='Fusion; registered algorithms are: %s' % resource_keys( - 'algorithm', exclude_resources_from)) - config_group.add_argument( - '-m', '--imports', metavar='LIB', nargs='+', - default=['bob.fusion.base'], help='If one of your configuration files is' - ' an actual command, please specify the lists of' - ' required libraries (imports) to execute this command') - - flag_group = parser.add_argument_group( - 'Flags that change the behavior of the experiment') - bob.core.log.add_command_line_option(flag_group) - flag_group.add_argument('-F', '--force', action='store_true', - help='Force to erase former data if already exist') - - return { - 'main': parser, - 'config': config_group, - 'flag': flag_group - } + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-t', '--train-files', required=True, + nargs='+', help="A list of score files of " + "the train set.") + parser.add_argument('-d', '--dev-files', nargs='+', + help="A list of score files of the development set; " + "if given it must be the same number of files " + "as the --train-files.") + parser.add_argument('-e', '--eval-files', nargs='+', + help="A list of score files of the evaluation set; " + "if given it must be the same number of files " + "as the --train-files.") + parser.add_argument('-T', '--fused-train-file', default=None, + help='The fused train score file. ' + 'Default is "scores-train" in the --save-directory') + parser.add_argument('-D', '--fused-dev-file', default=None, + help='The fused development score file. ' + 'Default is "scores-dev" in the --save-directory') + parser.add_argument('-E', '--fused-eval-file', default=None, + help='The fused evaluation score file. ' + 'Default is "scores-eval" in the --save-directory') + parser.add_argument('--score-type', choices=[4, 5], default=None, + help='The format the scores are provided. If not ' + 'provided, the number of columns will be guessed.') + parser.add_argument('--skip-check', action='store_true', + help='If provided, score files are not checked ' + 'for consistency') + parser.add_argument('-s', '--save-directory', help='The directory to save ' + 'the experiment artifacts.', default='fusion_result') + + config_group = parser.add_argument_group( + 'Parameters defining the experiment', ' Most of these parameters can be a' + ' registered resource, a configuration file, or even a string that ' + 'defines a newly created object') + config_group.add_argument( + '-a', '--algorithm', metavar='x', required=True, + help='Fusion; registered algorithms are: %s' % resource_keys( + 'algorithm', exclude_resources_from)) + config_group.add_argument( + '-m', '--imports', metavar='LIB', nargs='+', + default=['bob.fusion.base'], help='If one of your configuration files is' + ' an actual command, please specify the lists of' + ' required libraries (imports) to execute this command') + + flag_group = parser.add_argument_group( + 'Flags that change the behavior of the experiment') + bob.core.log.add_command_line_option(flag_group) + flag_group.add_argument('-F', '--force', action='store_true', + help='Force to erase former data if already exist') + + return { + 'main': parser, + 'config': config_group, + 'flag': flag_group + } def initialize(parsers, command_line_parameters=None, skips=[]): - args = parsers['main'].parse_args(command_line_parameters) + args = parsers['main'].parse_args(command_line_parameters) - # logging - bob.core.log.set_verbosity_level(logger, args.verbose) + # logging + bob.core.log.set_verbosity_level(logger, args.verbose) - # load configuration resources - args.algorithm = load_resource( - args.algorithm, 'algorithm', imports=args.imports) + # load configuration resources + args.algorithm = load_resource( + args.algorithm, 'algorithm', imports=args.imports) - # set base directories - if args.fused_dev_file is None: - args.fused_dev_file = os.path.join(args.save_directory, 'scores-dev') - if args.fused_eval_file is None: - args.fused_eval_file = os.path.join(args.save_directory, 'scores-eval') + # set base directories + if args.fused_train_file is None: + args.fused_train_file = os.path.join(args.save_directory, 'scores-train') + if args.fused_dev_file is None: + args.fused_dev_file = os.path.join(args.save_directory, 'scores-dev') + if args.fused_eval_file is None: + args.fused_eval_file = os.path.join(args.save_directory, 'scores-eval') - # result files - args.info_file = os.path.join(args.save_directory, 'Experiment.info') + # result files + args.info_file = os.path.join(args.save_directory, 'Experiment.info') - args.model_file = os.path.join(args.save_directory, 'Model.pkl') + args.model_file = os.path.join(args.save_directory, 'Model.pkl') - return args + return args def write_info(args, command_line_parameters): - """Writes information about the current experimental setup into a file - specified on command line. - - **Parameters:** - - args : namespace - The interpreted command line arguments as returned by the - :py:func:`initialize` function. - - command_line_parameters : [str] or ``None`` - The command line parameters that have been interpreted. - If ``None``, the parameters specified by the user on command line - are considered. - - executable : str - The name of the executable (such as ``'./bin/verify.py'``) that is used - to run the experiments. - """ - if command_line_parameters is None: - command_line_parameters = sys.argv[1:] - executable = sys.argv[0] - # write configuration - try: - create_directories_safe(os.path.dirname(args.info_file)) - with open(args.info_file, 'w') as f: - f.write("Command line:\n") - f.write( - tools.command_line([executable] + command_line_parameters) + "\n\n") - f.write("Configuration:\n\n") - f.write("Algorithm:\n%s\n\n" % args.algorithm) - except IOError: - logger.error( - "Could not write the experimental setup into file '%s'", args.info_file) + """Writes information about the current experimental setup into a file + specified on command line. + + **Parameters:** + + args : namespace + The interpreted command line arguments as returned by the + :py:func:`initialize` function. + + command_line_parameters : [str] or ``None`` + The command line parameters that have been interpreted. + If ``None``, the parameters specified by the user on command line + are considered. + + executable : str + The name of the executable (such as ``'./bin/verify.py'``) that is used + to run the experiments. + """ + if command_line_parameters is None: + command_line_parameters = sys.argv[1:] + executable = sys.argv[0] + # write configuration + try: + create_directories_safe(os.path.dirname(args.info_file)) + with open(args.info_file, 'w') as f: + f.write("Command line:\n") + f.write( + tools.command_line([executable] + command_line_parameters) + "\n\n") + f.write("Configuration:\n\n") + f.write("Algorithm:\n%s\n\n" % args.algorithm) + except IOError: + logger.error( + "Could not write the experimental setup into file '%s'", args.info_file) def parse_arguments(command_line_parameters, exclude_resources_from=[]): - """This function parses the given options (which by default are the command - line options). If exclude_resources_from is specified (as a list), the - resources from the given packages are not listed in the help message.""" - # set up command line parser - parsers = command_line_parser(exclude_resources_from=exclude_resources_from) + """This function parses the given options (which by default are the command + line options). If exclude_resources_from is specified (as a list), the + resources from the given packages are not listed in the help message.""" + # set up command line parser + parsers = command_line_parser(exclude_resources_from=exclude_resources_from) - # now that we have set up everything, get the command line arguments - return initialize(parsers, command_line_parameters) + # now that we have set up everything, get the command line arguments + return initialize(parsers, command_line_parameters) def load_resource(resource, keyword, imports=['bob.fusion.base'], preferred_package=None): - """Loads the given resource that is registered with the given keyword. - The resource can be: - - 1. a resource as defined in the setup.py - 2. a configuration file - 3. a string defining the construction of an object. If imports are required - for the construction of this object, they can be given as list of strings. - - **Parameters:** - - resource : str - Any string interpretable as a resource (see above). - - keyword : str - A valid resource keyword, can be one of :py:attr:`valid_keywords`. - - imports : [str] - A list of strings defining which modules to import, when constructing new - objects (option 3). - - preferred_package : str or ``None`` - When several resources with the same name are found in different packages - (e.g., in different ``bob.bio`` or other packages), this specifies the - preferred package to load the resource from. If not specified, the - extension that is **not** from ``bob.bio`` is selected. - - **Returns:** - - resource : object - The resulting resource object is returned, either read from file or - resource, or created newly. - """ - - # first, look if the resource is a file name - if os.path.isfile(resource): - return utils.read_config_file(resource, keyword) - - if keyword not in valid_keywords: - raise ValueError("The given keyword '%s' is not valid. " - "Please use one of %s!" % (str(keyword), - str(valid_keywords))) - - # now, we check if the resource is registered as an entry point in the - # resource files - entry_points = [entry_point for entry_point in _get_entry_points( - keyword) if entry_point.name == resource] - - if len(entry_points): - if len(entry_points) == 1: - return entry_points[0].load() - else: - # TODO: extract current package name and use this one, if possible - - # Now: check if there are only two entry points, and one is from the - # bob.fusion.base, then use the other one - index = -1 - if preferred_package is not None: - for i, p in enumerate(entry_points): - if p.dist.project_name == preferred_package: - index = i - break - - if index == -1: - # by default, use the first one that is not from bob.bio - for i, p in enumerate(entry_points): - if not p.dist.project_name.startswith('bob.bio'): - index = i - break - - if index != -1: - logger.debug("RESOURCES: Using the resource '%s' from '%s', " - "and ignoring the one from '%s'", - resource, entry_points[index].module_name, - entry_points[1 - index].module_name) - return entry_points[index].load() - else: - logger.warn("Under the desired name '%s', there are multiple " - "entry points defined, we return the first one: %s", - resource, - [entry_point.module_name for entry_point in entry_points]) - return entry_points[0].load() - - # if the resource is neither a config file nor an entry point, - # just execute it as a command - try: - # first, execute all import commands that are required - for i in imports: - exec("import %s" % i) - # now, evaluate the resource (re-evaluate if the resource is still a - # string) - while isinstance(resource, str): - resource = eval(resource) - return resource - - except Exception as e: - raise ImportError("The given command line option '%s' is neither a " - "resource for a '%s', nor an existing configuration" - " file, nor could be interpreted as a command " - "(error: %s)" % (resource, keyword, str(e))) + """Loads the given resource that is registered with the given keyword. + The resource can be: + + 1. a resource as defined in the setup.py + 2. a configuration file + 3. a string defining the construction of an object. If imports are required + for the construction of this object, they can be given as list of strings. + + **Parameters:** + + resource : str + Any string interpretable as a resource (see above). + + keyword : str + A valid resource keyword, can be one of :py:attr:`valid_keywords`. + + imports : [str] + A list of strings defining which modules to import, when constructing new + objects (option 3). + + preferred_package : str or ``None`` + When several resources with the same name are found in different packages + (e.g., in different ``bob.bio`` or other packages), this specifies the + preferred package to load the resource from. If not specified, the + extension that is **not** from ``bob.bio`` is selected. + + **Returns:** + + resource : object + The resulting resource object is returned, either read from file or + resource, or created newly. + """ + + # first, look if the resource is a file name + if os.path.isfile(resource): + return utils.read_config_file(resource, keyword) + + if keyword not in valid_keywords: + raise ValueError("The given keyword '%s' is not valid. " + "Please use one of %s!" % (str(keyword), + str(valid_keywords))) + + # now, we check if the resource is registered as an entry point in the + # resource files + entry_points = [entry_point for entry_point in _get_entry_points( + keyword) if entry_point.name == resource] + + if len(entry_points): + if len(entry_points) == 1: + return entry_points[0].load() + else: + # TODO: extract current package name and use this one, if possible + + # Now: check if there are only two entry points, and one is from the + # bob.fusion.base, then use the other one + index = -1 + if preferred_package is not None: + for i, p in enumerate(entry_points): + if p.dist.project_name == preferred_package: + index = i + break + + if index == -1: + # by default, use the first one that is not from bob.bio + for i, p in enumerate(entry_points): + if not p.dist.project_name.startswith('bob.bio'): + index = i + break + + if index != -1: + logger.debug("RESOURCES: Using the resource '%s' from '%s', " + "and ignoring the one from '%s'", + resource, entry_points[index].module_name, + entry_points[1 - index].module_name) + return entry_points[index].load() + else: + logger.warn("Under the desired name '%s', there are multiple " + "entry points defined, we return the first one: %s", + resource, + [entry_point.module_name for entry_point in entry_points]) + return entry_points[0].load() + + # if the resource is neither a config file nor an entry point, + # just execute it as a command + try: + # first, execute all import commands that are required + for i in imports: + exec("import %s" % i) + # now, evaluate the resource (re-evaluate if the resource is still a + # string) + while isinstance(resource, str): + resource = eval(resource) + return resource + + except Exception as e: + raise ImportError("The given command line option '%s' is neither a " + "resource for a '%s', nor an existing configuration" + " file, nor could be interpreted as a command " + "(error: %s)" % (resource, keyword, str(e))) diff --git a/bob/fusion/base/tools/common.py b/bob/fusion/base/tools/common.py index f440fb2..8be4c41 100644 --- a/bob/fusion/base/tools/common.py +++ b/bob/fusion/base/tools/common.py @@ -6,105 +6,105 @@ logger = bob.core.log.setup("bob.fusion.base") def get_2negatives_1positive(score_lines): - gen_mask = score_lines['claimed_id'] == score_lines['real_id'] - atk_mask = np.logical_or(np.char.count(score_lines['real_id'], 'spoof/') > 0, - np.char.count(score_lines['real_id'], 'attack') > 0) - zei_mask = np.logical_and(np.logical_not(gen_mask), np.logical_not(atk_mask)) - gen = score_lines[gen_mask] - zei = score_lines[zei_mask] - atk = score_lines[atk_mask] - return (gen, zei, atk, gen_mask, zei_mask, atk_mask) + gen_mask = score_lines['claimed_id'] == score_lines['real_id'] + atk_mask = np.logical_or(np.char.count(score_lines['real_id'], 'spoof/') > 0, + np.char.count(score_lines['real_id'], 'attack') > 0) + zei_mask = np.logical_and(np.logical_not(gen_mask), np.logical_not(atk_mask)) + gen = score_lines[gen_mask] + zei = score_lines[zei_mask] + atk = score_lines[atk_mask] + return (gen, zei, atk, gen_mask, zei_mask, atk_mask) def check_consistency(gen_l, zei_l, atk_l): - if len(gen_l) < 2: - logger.error('Check failed since less than two system is available.') - for score_lines_list in (gen_l, zei_l, atk_l): - if not score_lines_list: - continue - score_lines0 = score_lines_list[0] - for score_lines in score_lines_list[1:]: - assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) - assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + if len(gen_l) < 2: + logger.error('Check failed since less than two system is available.') + for score_lines_list in (gen_l, zei_l, atk_l): + if not score_lines_list: + continue + score_lines0 = score_lines_list[0] + for score_lines in score_lines_list[1:]: + assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) + assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) def get_scores(*args): - scores = [] - for temp in zip(*args): - scores.append(np.concatenate([a['score'] for a in temp], axis=0)) - return np.vstack(scores).T + scores = [] + for temp in zip(*args): + scores.append(np.concatenate([a['score'] for a in temp], axis=0)) + return np.vstack(scores).T def get_score_lines(*args): - # get the dtype names - names = list(args[0][0].dtype.names) - if len(names) != 4: - names = [n for n in names if 'model_label' not in n] - logger.debug(names) - - # find the (max) size of strigns - dtypes = [a.dtype for temp in zip(*args) for a in temp] - lengths = defaultdict(list) - for name in names: - for d in dtypes: - lengths[name].append(d[name].itemsize // 4) - - # make a new dtype - new_dtype = [] - for name in names[:-1]: - new_dtype.append((name, 'U{}'.format(max(lengths[name])))) - new_dtype.append((names[-1], float)) - - score_lines = [] - for temp in zip(*args): - for a in temp: - score_lines.extend(a[names].tolist()) - score_lines = np.array(score_lines, dtype=new_dtype) - return score_lines + # get the dtype names + names = list(args[0][0].dtype.names) + if len(names) != 4: + names = [n for n in names if 'model_label' not in n] + logger.debug(names) + + # find the (max) size of strigns + dtypes = [a.dtype for temp in zip(*args) for a in temp] + lengths = defaultdict(list) + for name in names: + for d in dtypes: + lengths[name].append(d[name].itemsize // 4) + + # make a new dtype + new_dtype = [] + for name in names[:-1]: + new_dtype.append((name, 'U{}'.format(max(lengths[name])))) + new_dtype.append((names[-1], float)) + + score_lines = [] + for temp in zip(*args): + for a in temp: + score_lines.extend(a[names].tolist()) + score_lines = np.array(score_lines, dtype=new_dtype) + return score_lines def remove_nan(samples, found_nan): - ncls = samples.shape[1] - nans = np.isnan(samples[:, 0]) - for i in range(1, ncls): - nans = np.logical_or(nans, np.isnan(samples[:, i])) - return np.any(nans) or found_nan, nans, samples[~nans, :] + ncls = samples.shape[1] + nans = np.isnan(samples[:, 0]) + for i in range(1, ncls): + nans = np.logical_or(nans, np.isnan(samples[:, i])) + return np.any(nans) or found_nan, nans, samples[~nans, :] def get_gza_from_lines_list(score_lines_list): - gen_l, zei_l, atk_l = [], [], [] - for score_lines in score_lines_list: - gen, zei, atk, _, _, _ = get_2negatives_1positive(score_lines) - gen_l.append(gen) - zei_l.append(zei) - atk_l.append(atk) - zei_lengths = [] - for zei in zei_l: - zei_lengths.append(zei.size) - zei_lengths = np.array(zei_lengths) - idx1 = 0 # used later if it does not enter the if. - if not (np.all(zei_lengths == 0) or np.all(zei_lengths > 0)): - # generate the missing ones - # find one that has zei - idx1 = zei_lengths.nonzero()[0][0] - zei_full = zei_l[idx1] - for idx2 in np.where(zei_lengths == 0)[0]: - if zei_l[idx2] is None: - continue - temp = np.array(zei_full) - # make sure we replace all scores. - temp['score'] = np.nan - # get the list of ids - real_ids = np.unique(temp['real_id']) - # find pad score of that id and replace the score - for real_id in real_ids: - # get the list of test_labels - test_labels = np.unique(temp['test_label'][temp['real_id'] == real_id]) - for test_label in test_labels: - idx3 = np.logical_and(temp['real_id'] == real_id, - temp['test_label'] == test_label) - idx4 = np.logical_and(gen_l[idx2]['real_id'] == real_id, - gen_l[idx2]['test_label'] == test_label) - temp['score'][idx3] = gen_l[idx2]['score'][idx4] - zei_l[idx2] = temp - return idx1, gen_l, zei_l, atk_l + gen_l, zei_l, atk_l = [], [], [] + for score_lines in score_lines_list: + gen, zei, atk, _, _, _ = get_2negatives_1positive(score_lines) + gen_l.append(gen) + zei_l.append(zei) + atk_l.append(atk) + zei_lengths = [] + for zei in zei_l: + zei_lengths.append(zei.size) + zei_lengths = np.array(zei_lengths) + idx1 = 0 # used later if it does not enter the if. + if not (np.all(zei_lengths == 0) or np.all(zei_lengths > 0)): + # generate the missing ones + # find one that has zei + idx1 = zei_lengths.nonzero()[0][0] + zei_full = zei_l[idx1] + for idx2 in np.where(zei_lengths == 0)[0]: + if zei_l[idx2] is None: + continue + temp = np.array(zei_full) + # make sure we replace all scores. + temp['score'] = np.nan + # get the list of ids + real_ids = np.unique(temp['real_id']) + # find pad score of that id and replace the score + for real_id in real_ids: + # get the list of test_labels + test_labels = np.unique(temp['test_label'][temp['real_id'] == real_id]) + for test_label in test_labels: + idx3 = np.logical_and(temp['real_id'] == real_id, + temp['test_label'] == test_label) + idx4 = np.logical_and(gen_l[idx2]['real_id'] == real_id, + gen_l[idx2]['test_label'] == test_label) + temp['score'][idx3] = gen_l[idx2]['score'][idx4] + zei_l[idx2] = temp + return idx1, gen_l, zei_l, atk_l diff --git a/bob/fusion/base/tools/plotting.py b/bob/fusion/base/tools/plotting.py index 06ef61b..05d066a 100644 --- a/bob/fusion/base/tools/plotting.py +++ b/bob/fusion/base/tools/plotting.py @@ -6,22 +6,22 @@ import bob.learn.em def grouping(scores, gformat='random', npoints=500, seed=None, **kwargs): - scores = numpy.asarray(scores) + scores = numpy.asarray(scores) - if(gformat == "kmeans"): - kmeans_machine = bob.learn.em.KMeansMachine(npoints, 2) - kmeans_trainer = bob.learn.em.KMeansTrainer() - bob.learn.em.train( - kmeans_trainer, kmeans_machine, scores, max_iterations=500, - convergence_threshold=0.1) - scores = kmeans_machine.means + if(gformat == "kmeans"): + kmeans_machine = bob.learn.em.KMeansMachine(npoints, 2) + kmeans_trainer = bob.learn.em.KMeansTrainer() + bob.learn.em.train( + kmeans_trainer, kmeans_machine, scores, max_iterations=500, + convergence_threshold=0.1) + scores = kmeans_machine.means - elif(gformat == "random"): - if seed is not None: - numpy.random.seed(seed) - scores_indexes = numpy.array( - numpy.random.rand(npoints) * scores.shape[0], dtype=int) + elif(gformat == "random"): + if seed is not None: + numpy.random.seed(seed) + scores_indexes = numpy.array( + numpy.random.rand(npoints) * scores.shape[0], dtype=int) - scores = scores[scores_indexes] + scores = scores[scores_indexes] - return scores + return scores diff --git a/setup.py b/setup.py index 57bcd3b..bd43cf5 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ setup( # information before releasing code publicly. name='bob.fusion.base', version=open("version.txt").read().rstrip(), - description='Tools for running score fusion in biometric experiments', + description='Score fusion in biometric experiments', url='https://gitlab.idiap.ch/bob/bob.fusion.base', license='GPLv3', -- GitLab