From fd18322634501dedcd0ae76eaa823fcf37751ac8 Mon Sep 17 00:00:00 2001 From: Amir Mohammadi <amir.mohammadi@idiap.ch> Date: Mon, 25 Jul 2016 14:10:34 +0200 Subject: [PATCH] New handling of scores --- bob/fusion/base/algorithm/MLP.py | 6 +- bob/fusion/base/script/bob_fuse.py | 80 ++++++--- .../script/plot_fusion_decision_boundary.py | 159 ++++++++++-------- bob/fusion/base/test/test_scripts.py | 16 +- bob/fusion/base/tools/__init__.py | 1 + bob/fusion/base/tools/command_line.py | 36 ++-- bob/fusion/base/tools/common.py | 81 +++++++++ bob/fusion/base/tools/plotting.py | 34 ++-- 8 files changed, 273 insertions(+), 140 deletions(-) create mode 100644 bob/fusion/base/tools/common.py diff --git a/bob/fusion/base/algorithm/MLP.py b/bob/fusion/base/algorithm/MLP.py index e3898e8..44bf878 100644 --- a/bob/fusion/base/algorithm/MLP.py +++ b/bob/fusion/base/algorithm/MLP.py @@ -21,7 +21,7 @@ class MLP(AlgorithmBob): def __init__(self, n_systems=2, - hidden_layers=None, + hidden_layers=(3,), seed=None, machine=None, trainer=None, @@ -29,9 +29,7 @@ class MLP(AlgorithmBob): super(MLP, self).__init__( classifier=self, *args, **kwargs) - if hidden_layers is None: - hidden_layers = [3] - self.mlp_shape = [n_systems] + hidden_layers + [1] + self.mlp_shape = [n_systems] + list(hidden_layers) + [1] self.seed = seed self.machine = machine self.trainer = trainer diff --git a/bob/fusion/base/script/bob_fuse.py b/bob/fusion/base/script/bob_fuse.py index 435ce68..8d50149 100755 --- a/bob/fusion/base/script/bob_fuse.py +++ b/bob/fusion/base/script/bob_fuse.py @@ -5,14 +5,13 @@ from __future__ import print_function, absolute_import, division import os -import numpy as np from bob.io.base import create_directories_safe -from bob.measure.load import load_score, get_all_scores,\ - get_negatives_positives_all, dump_score +from bob.measure.load import load_score, dump_score from bob.bio.base import utils -from ..tools import parse_arguments, write_info +from ..tools import parse_arguments, write_info, get_gza_from_lines_list, \ + check_consistency, get_scores, remove_nan import bob.core logger = bob.core.log.setup("bob.fusion.base") @@ -25,35 +24,68 @@ def fuse(args, command_line_parameters): write_info(args, command_line_parameters) # load the scores - score_lines_list_dev = [load_score(path, ncolumns=args.score_type) - for path in args.dev_files] - scores_dev = get_all_scores(score_lines_list_dev) - (neg, pos) = get_negatives_positives_all(score_lines_list_dev) + score_lines_list_train = [load_score(path, ncolumns=args.score_type) + for path in args.train_files] + if args.dev_files: + score_lines_list_dev = [load_score(path, ncolumns=args.score_type) + for path in args.dev_files] if args.eval_files: score_lines_list_eval = [load_score(path, ncolumns=args.score_type) for path in args.eval_files] - scores_eval = get_all_scores(score_lines_list_eval) + + # genuine, zero effort impostor, and attack list of + # train, development and evaluation data. + idx1, gen_lt, zei_lt, atk_lt = get_gza_from_lines_list(score_lines_list_train) + if args.dev_files: + _, gen_ld, zei_ld, atk_ld = get_gza_from_lines_list(score_lines_list_dev) + if args.eval_files: + _, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval) # check if score lines are consistent if not args.skip_check: - score_lines0 = score_lines_list_dev[0] - for score_lines in score_lines_list_dev[1:]: - assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) - assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + check_consistency(gen_lt, zei_lt, atk_lt) + if args.dev_files: + check_consistency(gen_ld, zei_ld, atk_ld) if args.eval_files: - score_lines0 = score_lines_list_eval[0] - for score_lines in score_lines_list_eval[1:]: - assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) - assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + check_consistency(gen_le, zei_le, atk_le) + + scores_train = get_scores(gen_lt, zei_lt, atk_lt) + train_neg = get_scores(zei_lt, atk_lt) + train_pos = get_scores(gen_lt) + if args.dev_files: + scores_dev = get_scores(gen_ld, zei_ld, atk_ld) + dev_neg = get_scores(zei_ld, atk_ld) + dev_pos = get_scores(gen_ld) + else: + dev_neg, dev_pos = None, None + if args.eval_files: + scores_eval = get_scores(gen_le, zei_le, atk_le) + + # check for nan values + found_nan = False + found_nan, _, scores_train = remove_nan(scores_train, found_nan) + found_nan, _, train_neg = remove_nan(train_neg, found_nan) + found_nan, _, train_pos = remove_nan(train_pos, found_nan) + if args.dev_files: + found_nan, nan_dev, scores_dev = remove_nan(scores_dev, found_nan) + found_nan, _, dev_neg = remove_nan(dev_neg, found_nan) + found_nan, _, dev_pos = remove_nan(dev_pos, found_nan) + if args.eval_files: + found_nan, nan_eval, scores_eval = remove_nan(scores_eval, found_nan) + + if found_nan: + logger.warn('Some nan values were removed.') # train the preprocessors - algorithm.train_preprocessors(scores_dev) + algorithm.train_preprocessors(scores_train) # preprocess data - scores_dev = algorithm.preprocess(scores_dev) + train_neg, train_pos = algorithm.preprocess(train_neg), algorithm.preprocess(train_pos) + if args.dev_files: + scores_dev = algorithm.preprocess(scores_dev) + dev_neg, dev_pos = algorithm.preprocess(dev_neg), algorithm.preprocess(dev_pos) if args.eval_files: scores_eval = algorithm.preprocess(scores_eval) - neg, pos = algorithm.preprocess(neg), algorithm.preprocess(pos) # train the model if utils.check_file(args.model_file, args.force, 1000): @@ -61,16 +93,16 @@ def fuse(args, command_line_parameters): "- Fusion: model '%s' already exists.", args.model_file) algorithm = algorithm.load(args.model_file) else: - algorithm.train(neg, pos) + algorithm.train(train_neg, train_pos, dev_neg, dev_pos) algorithm.save(args.model_file) # fuse the scores (dev) if utils.check_file(args.fused_dev_file, args.force, 1000): logger.info( "- Fusion: scores '%s' already exists.", args.fused_dev_file) - else: + elif args.dev_files: fused_scores_dev = algorithm.fuse(scores_dev) - score_lines = score_lines_list_dev[0] + score_lines = score_lines_list_dev[idx1][~nan_dev] score_lines['score'] = fused_scores_dev create_directories_safe(os.path.dirname(args.fused_dev_file)) dump_score(args.fused_dev_file, score_lines) @@ -82,7 +114,7 @@ def fuse(args, command_line_parameters): "- Fusion: scores '%s' already exists.", args.fused_eval_file) else: fused_scores_eval = algorithm.fuse(scores_eval) - score_lines = score_lines_list_eval[0] + score_lines = score_lines_list_eval[idx1][~nan_eval] score_lines['score'] = fused_scores_eval create_directories_safe(os.path.dirname(args.fused_eval_file)) dump_score(args.fused_eval_file, score_lines) diff --git a/bob/fusion/base/script/plot_fusion_decision_boundary.py b/bob/fusion/base/script/plot_fusion_decision_boundary.py index 346d3c1..3e5c6ae 100644 --- a/bob/fusion/base/script/plot_fusion_decision_boundary.py +++ b/bob/fusion/base/script/plot_fusion_decision_boundary.py @@ -1,39 +1,16 @@ #!/usr/bin/env python -"""Plot decision boundraries of the fusion algorithm. - -Usage: - plot_fusion_decision_boundary.py SCORE_FILE SCORE_FILE MODEL_FILE - [-v... | --verbose...] [options] - plot_fusion_decision_boundary.py (-h | --help) - plot_fusion_decision_boundary.py (-V | --version) - -Options: - -o, --output PLOT_FILE The path to save the plot. [default: scatter.pdf] - --score-type {4,5} The format the scores are provided. [default: 4] - -v, --verbose Increase the verbosity level from 0 (only error - messages) to 1 (warnings), 2 (log messages), 3 (debug - information) by adding the --verbose option as often - as desired (e.g. '-vvv' for debug). [default: 0] - -g, --group N If given scores will be grouped into N samples. - Give -1 for no grouping. [default: -1] - --grouping {random, kmeans} The gouping algorithm used. [default: kmeans] - -h --help Show this screen. - -V, --version Show version. - -""" - -from docopt import docopt +"""Plot decision boundraries of the fusion algorithm.""" + +import argparse import matplotlib.pyplot as plt -import numpy +import numpy as np import bob.fusion.base import bob.core -from bob.measure.load import load_score, get_negatives_positives,\ - get_all_scores -from bob.measure import eer_threshold -from ..tools import grouping - +from bob.measure.load import load_score +from ..tools import grouping, get_gza_from_lines_list, \ + get_scores, remove_nan, check_consistency logger = bob.core.log.setup("bob.fusion.base") @@ -53,14 +30,14 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, ''' Plots the boundary decision of the Algorithm - @param score_labels numpy.array A (scores.shape[0]) array containing + @param score_labels np.array A (scores.shape[0]) array containing the true labels of scores. @param threshold float threshold of the decision boundary ''' if legends is None: - legends = ['Impostor', 'Genuine'] - markers = ['x', 'o'] + legends = ['Impostor', 'Attack', 'Genuine'] + markers = ['x', 'o', 's'] if scores.shape[1] > 2: raise NotImplementedError( @@ -74,27 +51,25 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, Y = score_labels x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad - xx, yy = numpy.meshgrid( - numpy.linspace(x_min, x_max, resolution), - numpy.linspace(y_min, y_max, resolution)) - temp = numpy.c_[xx.ravel(), yy.ravel()] + xx, yy = np.meshgrid( + np.linspace(x_min, x_max, resolution), + np.linspace(y_min, y_max, resolution)) + temp = np.c_[xx.ravel(), yy.ravel()] temp = algorithm.preprocess(temp) Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape) - contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) + contourf = plt.contourf(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) if do_grouping: - negatives, positives = X[numpy.logical_not(Y)], X[Y] - negatives, positives = grouping(negatives, positives, **kwargs) - X = numpy.concatenate((negatives, positives), axis=0) - Y = numpy.concatenate( - (numpy.zeros(negatives.shape[0], dtype=numpy.bool8), - numpy.ones(positives.shape[0], dtype=numpy.bool8)), - axis=0) - - negatives, positives = X[numpy.logical_not(Y)], X[Y] - colors = plt.cm.viridis(numpy.linspace(0, 1, 2)) - for i, X in enumerate((negatives, positives)): + gen = grouping(X[Y == 0, :], **kwargs) + zei = grouping(X[Y == 1, :], **kwargs) + atk = grouping(X[Y == 2, :], **kwargs) + else: + gen = X[Y == 0, :] + zei = X[Y == 1, :] + atk = X[Y == 2, :] + colors = plt.cm.viridis(np.linspace(0, 1, 3)) + for i, X in enumerate((zei, atk, gen)): plt.scatter( X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, c=colors[i], label=legends[i]) @@ -112,38 +87,90 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, def main(command_line_parameters=None): - args = docopt(__doc__, argv=command_line_parameters, - version=bob.fusion.base.get_config()) - bob.core.log.set_verbosity_level(logger, args['--verbose']) + + # setup command line parameters + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-e', '--eval-files', nargs='+', required=True, + help='A list of score files to be plotted usually the ' + 'evaluation set.') + + parser.add_argument('-m', '--model-file', required=True, + help='Path to Model.pkl of a saved bob.fusion.algorithm.') + + parser.add_argument('-t', '--threshold', type=float, default=0, required=True, + help='The threshold to classify scores after fusion.' + 'Usually calculated from fused development set.') + + parser.add_argument('-o', '--output', default='scatter.pdf', + help='The path to save the plot.') + + parser.add_argument('-g', '--group', default=0, type=int, + help='If given scores will be grouped into N samples.') + + parser.add_argument('-G', '--grouping', choices=('random', 'kmeans'), + default='kmeans', + help='The gouping algorithm to be used.') + + parser.add_argument('--skip-check', action='store_true', + help='If provided, score files are not checked ' + 'for consistency') + + parser.add_argument('--score-type', choices=(4, 5), default=None, + help='The format the scores are provided.') + + bob.core.log.add_command_line_option(parser) + + # parse command line options + args = parser.parse_args(command_line_parameters) + bob.core.log.set_verbosity_level(logger, args.verbose) # load the algorithm algorithm = bob.fusion.base.algorithm.Algorithm() - algorithm = algorithm.load(args['MODEL_FILE']) + algorithm = algorithm.load(args.model_file) # load the scores - score_lines_list = [ - load_score(path, int(args['--score-type'])) for - path in args['SCORE_FILE']] - scores = get_all_scores(score_lines_list) - score_lines = numpy.array(score_lines_list[0]) - score_lines['score'] = algorithm.fuse(algorithm.preprocess(scores)) - threshold = eer_threshold(*get_negatives_positives(score_lines)) - score_labels = score_lines['claimed_id'] == score_lines['real_id'] + score_lines_list_eval = [load_score(path, ncolumns=args.score_type) + for path in args.eval_files] + + # genuine, zero effort impostor, and attack list + idx1, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval) + + # check if score lines are consistent + if not args.skip_check: + check_consistency(gen_le, zei_le, atk_le) + + # concatenate the scores and create the labels + scores = get_scores(gen_le, zei_le, atk_le) + score_labels = np.zeros((scores.shape[0],)) + gensize = gen_le[0].shape[0] + zeisize = zei_le[0].shape[0] + score_labels[:gensize] = 0 + score_labels[gensize: gensize + zeisize] = 1 + score_labels[gensize + zeisize:] = 2 + found_nan, nan_idx, scores = remove_nan(scores, False) + score_labels = score_labels[~nan_idx] + + if found_nan: + logger.warn('{} nan values were removed.'.format(np.sum(nan_idx))) # plot the decision boundary do_grouping = True - if int(args['--group']) == -1: + if args.group < 1: do_grouping = False plot_boundary_decision( - algorithm, scores, score_labels, threshold, + algorithm, scores, score_labels, args.threshold, do_grouping=do_grouping, - npoints=int(args['--group']), + npoints=args.group, seed=0, - gformat=args['--grouping'] + gformat=args.grouping ) - plt.savefig(args['--output']) + plt.savefig(args.output) plt.close() + if __name__ == '__main__': main() diff --git a/bob/fusion/base/test/test_scripts.py b/bob/fusion/base/test/test_scripts.py index 9bfe632..ef143b8 100644 --- a/bob/fusion/base/test/test_scripts.py +++ b/bob/fusion/base/test/test_scripts.py @@ -8,8 +8,8 @@ from bob.fusion.base.script.bob_fuse import main as bob_fuse from bob.fusion.base.script.plot_fusion_decision_boundary import main as plot_fusion_decision_boundary from bob.io.base.test_utils import datafile -dev_files = [datafile("scores-dev-1", 'bob.fusion.base'), - datafile("scores-dev-2", 'bob.fusion.base')] +train_files = [datafile("scores-dev-1", 'bob.fusion.base'), + datafile("scores-dev-2", 'bob.fusion.base')] eval_files = [datafile("scores-eval-1", 'bob.fusion.base'), datafile("scores-eval-2", 'bob.fusion.base')] @@ -22,22 +22,22 @@ def test_scripts(): fused_eval_file = os.path.join(tmpdir, 'scores-eval') # test normally - cmd = ['-i'] + dev_files + ['-o', fused_dev_file, '-a', 'llr'] + cmd = ['-s', tmpdir, '-t'] + train_files + ['-o', fused_dev_file, '-a', 'llr'] bob_fuse(cmd) - cmd = ['-i'] + dev_files + ['-I'] + eval_files + ['-o', fused_dev_file, '-O', fused_eval_file, '-a', 'llr'] + cmd = ['-s', tmpdir, '-t'] + train_files + ['-e'] + eval_files + ['-o', fused_dev_file, '-O', fused_eval_file, '-a', 'llr'] bob_fuse(cmd) # make inconsistency wrong_dev2 = os.path.join(tmpdir, 'scores-dev-2') - with open(wrong_dev2, 'w') as f1, open(dev_files[1]) as f2: + with open(wrong_dev2, 'w') as f1, open(train_files[1]) as f2: lines = f2.readlines() temp = lines[0].split() temp = (temp[0], 'temp1_id', temp[2], temp[3]) lines[0] = ' '.join(temp) + '\n' f1.writelines(lines) - cmd = ['-i'] + dev_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr'] + cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr'] try: bob_fuse(cmd) except AssertionError: @@ -46,13 +46,13 @@ def test_scripts(): raise Exception('An AssertionError should have been raised.') # this should not raise an error - cmd = ['-i'] + dev_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr', '--skip-check'] + cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr', '--skip-check'] bob_fuse(cmd) # test plot model_file = os.path.join(tmpdir, 'Model.pkl') output = os.path.join(tmpdir, 'scatter.pdf') - cmd = dev_files + [model_file, '-o', output] + cmd = train_files + [model_file, '-o', output] plot_fusion_decision_boundary(cmd) finally: diff --git a/bob/fusion/base/tools/__init__.py b/bob/fusion/base/tools/__init__.py index 133f547..3edd02f 100644 --- a/bob/fusion/base/tools/__init__.py +++ b/bob/fusion/base/tools/__init__.py @@ -1,3 +1,4 @@ +from common import * from command_line import * from plotting import * diff --git a/bob/fusion/base/tools/command_line.py b/bob/fusion/base/tools/command_line.py index f9662df..edbdd5e 100644 --- a/bob/fusion/base/tools/command_line.py +++ b/bob/fusion/base/tools/command_line.py @@ -36,25 +36,31 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('-i', '--dev-files', required=True, + parser.add_argument('-t', '--train-files', required=True, nargs='+', help="A list of score files of " - "the development set.") - parser.add_argument('-I', '--eval-files', nargs='+', + "the train set.") + parser.add_argument('-d', '--dev-files', nargs='+', + help="A list of score files of the development set; " + "if given it must be the same number of files " + "as the --train-files.") + parser.add_argument('-e', '--eval-files', nargs='+', help="A list of score files of the evaluation set; " "if given it must be the same number of files " - "as the --dev-files.") - parser.add_argument('-o', '--fused-dev-file', - required=True, help='The fused development score file.') - parser.add_argument( - '-O', '--fused-eval-file', help='The fused evaluation score file.') - parser.add_argument('--score-type', choices=[4, 5], default=4, - help='The format the scores are provided.') + "as the --train-files.") + parser.add_argument('-o', '--fused-dev-file', default=None, + help='The fused development score file. ' + 'Default is "scores-dev" in the --save-directory') + parser.add_argument('-O', '--fused-eval-file', default=None, + help='The fused evaluation score file. ' + 'Default is "scores-eval" in the --save-directory') + parser.add_argument('--score-type', choices=[4, 5], default=None, + help='The format the scores are provided. If not ' + 'provided, the number of columns will be guessed.') parser.add_argument('--skip-check', action='store_true', help='If provided, score files are not checked ' 'for consistency') parser.add_argument('-s', '--save-directory', help='The directory to save ' - 'the experiment artifacts. If not given, the directory' - ' of fused-dev-file will be used.') + 'the experiment artifacts.', default='fusion_result') config_group = parser.add_argument_group( 'Parameters defining the experiment', ' Most of these parameters can be a' @@ -95,8 +101,10 @@ def initialize(parsers, command_line_parameters=None, skips=[]): args.algorithm, 'algorithm', imports=args.imports) # set base directories - if args.save_directory is None: - args.save_directory = os.path.dirname(args.fused_dev_file) + if args.fused_dev_file is None: + args.fused_dev_file = os.path.join(args.save_directory, 'scores-dev') + if args.fused_eval_file is None: + args.fused_eval_file = os.path.join(args.save_directory, 'scores-eval') # result files args.info_file = os.path.join(args.save_directory, 'Experiment.info') diff --git a/bob/fusion/base/tools/common.py b/bob/fusion/base/tools/common.py new file mode 100644 index 0000000..09235eb --- /dev/null +++ b/bob/fusion/base/tools/common.py @@ -0,0 +1,81 @@ +import numpy as np + +import bob.core +logger = bob.core.log.setup("bob.fusion.base") + + +def get_2negatives_1positive(score_lines): + gen_mask = score_lines['claimed_id'] == score_lines['real_id'] + atk_mask = np.logical_or(np.char.count(score_lines['real_id'], 'spoof/') > 0, + np.char.count(score_lines['real_id'], 'attack') > 0) + zei_mask = np.logical_and(np.logical_not(gen_mask), np.logical_not(atk_mask)) + gen = score_lines[gen_mask] + zei = score_lines[zei_mask] + atk = score_lines[atk_mask] + return (gen, zei, atk, gen_mask, zei_mask, atk_mask) + + +def check_consistency(gen_l, zei_l, atk_l): + if len(gen_l) < 2: + logger.error('Check failed since less than two system is available.') + for score_lines_list in (gen_l, zei_l, atk_l): + if not score_lines_list: + continue + score_lines0 = score_lines_list[0] + for score_lines in score_lines_list[1:]: + assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) + assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + + +def get_scores(*args): + scores = [] + for temp in zip(*args): + scores.append(np.concatenate([a['score'] for a in temp], axis=0)) + return np.vstack(scores).T + + +def remove_nan(samples, found_nan): + ncls = samples.shape[1] + nans = np.isnan(samples[:, 0]) + for i in range(1, ncls): + nans = np.logical_or(nans, np.isnan(samples[:, i])) + return np.any(nans) or found_nan, nans, samples[~nans, :] + + +def get_gza_from_lines_list(score_lines_list): + gen_l, zei_l, atk_l = [], [], [] + for score_lines in score_lines_list: + gen, zei, atk, _, _, _ = get_2negatives_1positive(score_lines) + gen_l.append(gen) + zei_l.append(zei) + atk_l.append(atk) + zei_lengths = [] + for zei in zei_l: + zei_lengths.append(zei.size) + zei_lengths = np.array(zei_lengths) + idx1 = 0 # used later if it does not enter the if. + if not (np.all(zei_lengths == 0) or np.all(zei_lengths > 0)): + # generate the missing ones + # find one that has zei + idx1 = zei_lengths.nonzero()[0][0] + zei_full = zei_l[idx1] + for idx2 in np.where(zei_lengths == 0)[0]: + if zei_l[idx2] is None: + continue + temp = np.array(zei_full) + # make sure we replace all scores. + temp['score'] = np.nan + # get the list of ids + real_ids = np.unique(temp['real_id']) + # find pad score of that id and replace the score + for real_id in real_ids: + # get the list of test_labels + test_labels = np.unique(temp['test_label'][temp['real_id'] == real_id]) + for test_label in test_labels: + idx3 = np.logical_and(temp['real_id'] == real_id, + temp['test_label'] == test_label) + idx4 = np.logical_and(gen_l[idx2]['real_id'] == real_id, + gen_l[idx2]['test_label'] == test_label) + temp['score'][idx3] = gen_l[idx2]['score'][idx4] + zei_l[idx2] = temp + return idx1, gen_l, zei_l, atk_l diff --git a/bob/fusion/base/tools/plotting.py b/bob/fusion/base/tools/plotting.py index 4459912..06ef61b 100644 --- a/bob/fusion/base/tools/plotting.py +++ b/bob/fusion/base/tools/plotting.py @@ -4,38 +4,24 @@ import numpy import bob.learn.em -def grouping(negatives, positives, - gformat='random', npoints=500, seed=None, **kwargs): +def grouping(scores, gformat='random', npoints=500, seed=None, **kwargs): - negatives = numpy.asarray(negatives) - positives = numpy.asarray(positives) + scores = numpy.asarray(scores) if(gformat == "kmeans"): - - kmeans_negatives = bob.learn.em.KMeansMachine(npoints, 2) - kmeans_positives = bob.learn.em.KMeansMachine(npoints, 2) - - kmeansTrainer = bob.learn.em.KMeansTrainer() - + kmeans_machine = bob.learn.em.KMeansMachine(npoints, 2) + kmeans_trainer = bob.learn.em.KMeansTrainer() bob.learn.em.train( - kmeansTrainer, kmeans_negatives, negatives, max_iterations=500, + kmeans_trainer, kmeans_machine, scores, max_iterations=500, convergence_threshold=0.1) - bob.learn.em.train( - kmeansTrainer, kmeans_positives, positives, max_iterations=500, - convergence_threshold=0.1) - - negatives = kmeans_negatives.means - positives = kmeans_positives.means + scores = kmeans_machine.means elif(gformat == "random"): if seed is not None: numpy.random.seed(seed) - negatives_indexes = numpy.array( - numpy.random.rand(npoints) * negatives.shape[0], dtype=int) - positives_indexes = numpy.array( - numpy.random.rand(npoints) * positives.shape[0], dtype=int) + scores_indexes = numpy.array( + numpy.random.rand(npoints) * scores.shape[0], dtype=int) - negatives = negatives[negatives_indexes] - positives = positives[positives_indexes] + scores = scores[scores_indexes] - return negatives, positives + return scores -- GitLab