Skip to content
Snippets Groups Projects
Commit fd183226 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

New handling of scores

parent d605a113
No related branches found
No related tags found
No related merge requests found
...@@ -21,7 +21,7 @@ class MLP(AlgorithmBob): ...@@ -21,7 +21,7 @@ class MLP(AlgorithmBob):
def __init__(self, def __init__(self,
n_systems=2, n_systems=2,
hidden_layers=None, hidden_layers=(3,),
seed=None, seed=None,
machine=None, machine=None,
trainer=None, trainer=None,
...@@ -29,9 +29,7 @@ class MLP(AlgorithmBob): ...@@ -29,9 +29,7 @@ class MLP(AlgorithmBob):
super(MLP, self).__init__( super(MLP, self).__init__(
classifier=self, classifier=self,
*args, **kwargs) *args, **kwargs)
if hidden_layers is None: self.mlp_shape = [n_systems] + list(hidden_layers) + [1]
hidden_layers = [3]
self.mlp_shape = [n_systems] + hidden_layers + [1]
self.seed = seed self.seed = seed
self.machine = machine self.machine = machine
self.trainer = trainer self.trainer = trainer
......
...@@ -5,14 +5,13 @@ ...@@ -5,14 +5,13 @@
from __future__ import print_function, absolute_import, division from __future__ import print_function, absolute_import, division
import os import os
import numpy as np
from bob.io.base import create_directories_safe from bob.io.base import create_directories_safe
from bob.measure.load import load_score, get_all_scores,\ from bob.measure.load import load_score, dump_score
get_negatives_positives_all, dump_score
from bob.bio.base import utils from bob.bio.base import utils
from ..tools import parse_arguments, write_info from ..tools import parse_arguments, write_info, get_gza_from_lines_list, \
check_consistency, get_scores, remove_nan
import bob.core import bob.core
logger = bob.core.log.setup("bob.fusion.base") logger = bob.core.log.setup("bob.fusion.base")
...@@ -25,35 +24,68 @@ def fuse(args, command_line_parameters): ...@@ -25,35 +24,68 @@ def fuse(args, command_line_parameters):
write_info(args, command_line_parameters) write_info(args, command_line_parameters)
# load the scores # load the scores
score_lines_list_dev = [load_score(path, ncolumns=args.score_type) score_lines_list_train = [load_score(path, ncolumns=args.score_type)
for path in args.dev_files] for path in args.train_files]
scores_dev = get_all_scores(score_lines_list_dev) if args.dev_files:
(neg, pos) = get_negatives_positives_all(score_lines_list_dev) score_lines_list_dev = [load_score(path, ncolumns=args.score_type)
for path in args.dev_files]
if args.eval_files: if args.eval_files:
score_lines_list_eval = [load_score(path, ncolumns=args.score_type) score_lines_list_eval = [load_score(path, ncolumns=args.score_type)
for path in args.eval_files] for path in args.eval_files]
scores_eval = get_all_scores(score_lines_list_eval)
# genuine, zero effort impostor, and attack list of
# train, development and evaluation data.
idx1, gen_lt, zei_lt, atk_lt = get_gza_from_lines_list(score_lines_list_train)
if args.dev_files:
_, gen_ld, zei_ld, atk_ld = get_gza_from_lines_list(score_lines_list_dev)
if args.eval_files:
_, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval)
# check if score lines are consistent # check if score lines are consistent
if not args.skip_check: if not args.skip_check:
score_lines0 = score_lines_list_dev[0] check_consistency(gen_lt, zei_lt, atk_lt)
for score_lines in score_lines_list_dev[1:]: if args.dev_files:
assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) check_consistency(gen_ld, zei_ld, atk_ld)
assert(np.all(score_lines['real_id'] == score_lines0['real_id']))
if args.eval_files: if args.eval_files:
score_lines0 = score_lines_list_eval[0] check_consistency(gen_le, zei_le, atk_le)
for score_lines in score_lines_list_eval[1:]:
assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) scores_train = get_scores(gen_lt, zei_lt, atk_lt)
assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) train_neg = get_scores(zei_lt, atk_lt)
train_pos = get_scores(gen_lt)
if args.dev_files:
scores_dev = get_scores(gen_ld, zei_ld, atk_ld)
dev_neg = get_scores(zei_ld, atk_ld)
dev_pos = get_scores(gen_ld)
else:
dev_neg, dev_pos = None, None
if args.eval_files:
scores_eval = get_scores(gen_le, zei_le, atk_le)
# check for nan values
found_nan = False
found_nan, _, scores_train = remove_nan(scores_train, found_nan)
found_nan, _, train_neg = remove_nan(train_neg, found_nan)
found_nan, _, train_pos = remove_nan(train_pos, found_nan)
if args.dev_files:
found_nan, nan_dev, scores_dev = remove_nan(scores_dev, found_nan)
found_nan, _, dev_neg = remove_nan(dev_neg, found_nan)
found_nan, _, dev_pos = remove_nan(dev_pos, found_nan)
if args.eval_files:
found_nan, nan_eval, scores_eval = remove_nan(scores_eval, found_nan)
if found_nan:
logger.warn('Some nan values were removed.')
# train the preprocessors # train the preprocessors
algorithm.train_preprocessors(scores_dev) algorithm.train_preprocessors(scores_train)
# preprocess data # preprocess data
scores_dev = algorithm.preprocess(scores_dev) train_neg, train_pos = algorithm.preprocess(train_neg), algorithm.preprocess(train_pos)
if args.dev_files:
scores_dev = algorithm.preprocess(scores_dev)
dev_neg, dev_pos = algorithm.preprocess(dev_neg), algorithm.preprocess(dev_pos)
if args.eval_files: if args.eval_files:
scores_eval = algorithm.preprocess(scores_eval) scores_eval = algorithm.preprocess(scores_eval)
neg, pos = algorithm.preprocess(neg), algorithm.preprocess(pos)
# train the model # train the model
if utils.check_file(args.model_file, args.force, 1000): if utils.check_file(args.model_file, args.force, 1000):
...@@ -61,16 +93,16 @@ def fuse(args, command_line_parameters): ...@@ -61,16 +93,16 @@ def fuse(args, command_line_parameters):
"- Fusion: model '%s' already exists.", args.model_file) "- Fusion: model '%s' already exists.", args.model_file)
algorithm = algorithm.load(args.model_file) algorithm = algorithm.load(args.model_file)
else: else:
algorithm.train(neg, pos) algorithm.train(train_neg, train_pos, dev_neg, dev_pos)
algorithm.save(args.model_file) algorithm.save(args.model_file)
# fuse the scores (dev) # fuse the scores (dev)
if utils.check_file(args.fused_dev_file, args.force, 1000): if utils.check_file(args.fused_dev_file, args.force, 1000):
logger.info( logger.info(
"- Fusion: scores '%s' already exists.", args.fused_dev_file) "- Fusion: scores '%s' already exists.", args.fused_dev_file)
else: elif args.dev_files:
fused_scores_dev = algorithm.fuse(scores_dev) fused_scores_dev = algorithm.fuse(scores_dev)
score_lines = score_lines_list_dev[0] score_lines = score_lines_list_dev[idx1][~nan_dev]
score_lines['score'] = fused_scores_dev score_lines['score'] = fused_scores_dev
create_directories_safe(os.path.dirname(args.fused_dev_file)) create_directories_safe(os.path.dirname(args.fused_dev_file))
dump_score(args.fused_dev_file, score_lines) dump_score(args.fused_dev_file, score_lines)
...@@ -82,7 +114,7 @@ def fuse(args, command_line_parameters): ...@@ -82,7 +114,7 @@ def fuse(args, command_line_parameters):
"- Fusion: scores '%s' already exists.", args.fused_eval_file) "- Fusion: scores '%s' already exists.", args.fused_eval_file)
else: else:
fused_scores_eval = algorithm.fuse(scores_eval) fused_scores_eval = algorithm.fuse(scores_eval)
score_lines = score_lines_list_eval[0] score_lines = score_lines_list_eval[idx1][~nan_eval]
score_lines['score'] = fused_scores_eval score_lines['score'] = fused_scores_eval
create_directories_safe(os.path.dirname(args.fused_eval_file)) create_directories_safe(os.path.dirname(args.fused_eval_file))
dump_score(args.fused_eval_file, score_lines) dump_score(args.fused_eval_file, score_lines)
......
#!/usr/bin/env python #!/usr/bin/env python
"""Plot decision boundraries of the fusion algorithm. """Plot decision boundraries of the fusion algorithm."""
Usage: import argparse
plot_fusion_decision_boundary.py SCORE_FILE SCORE_FILE MODEL_FILE
[-v... | --verbose...] [options]
plot_fusion_decision_boundary.py (-h | --help)
plot_fusion_decision_boundary.py (-V | --version)
Options:
-o, --output PLOT_FILE The path to save the plot. [default: scatter.pdf]
--score-type {4,5} The format the scores are provided. [default: 4]
-v, --verbose Increase the verbosity level from 0 (only error
messages) to 1 (warnings), 2 (log messages), 3 (debug
information) by adding the --verbose option as often
as desired (e.g. '-vvv' for debug). [default: 0]
-g, --group N If given scores will be grouped into N samples.
Give -1 for no grouping. [default: -1]
--grouping {random, kmeans} The gouping algorithm used. [default: kmeans]
-h --help Show this screen.
-V, --version Show version.
"""
from docopt import docopt
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy import numpy as np
import bob.fusion.base import bob.fusion.base
import bob.core import bob.core
from bob.measure.load import load_score, get_negatives_positives,\ from bob.measure.load import load_score
get_all_scores from ..tools import grouping, get_gza_from_lines_list, \
from bob.measure import eer_threshold get_scores, remove_nan, check_consistency
from ..tools import grouping
logger = bob.core.log.setup("bob.fusion.base") logger = bob.core.log.setup("bob.fusion.base")
...@@ -53,14 +30,14 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, ...@@ -53,14 +30,14 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold,
''' '''
Plots the boundary decision of the Algorithm Plots the boundary decision of the Algorithm
@param score_labels numpy.array A (scores.shape[0]) array containing @param score_labels np.array A (scores.shape[0]) array containing
the true labels of scores. the true labels of scores.
@param threshold float threshold of the decision boundary @param threshold float threshold of the decision boundary
''' '''
if legends is None: if legends is None:
legends = ['Impostor', 'Genuine'] legends = ['Impostor', 'Attack', 'Genuine']
markers = ['x', 'o'] markers = ['x', 'o', 's']
if scores.shape[1] > 2: if scores.shape[1] > 2:
raise NotImplementedError( raise NotImplementedError(
...@@ -74,27 +51,25 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, ...@@ -74,27 +51,25 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold,
Y = score_labels Y = score_labels
x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad
y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad
xx, yy = numpy.meshgrid( xx, yy = np.meshgrid(
numpy.linspace(x_min, x_max, resolution), np.linspace(x_min, x_max, resolution),
numpy.linspace(y_min, y_max, resolution)) np.linspace(y_min, y_max, resolution))
temp = numpy.c_[xx.ravel(), yy.ravel()] temp = np.c_[xx.ravel(), yy.ravel()]
temp = algorithm.preprocess(temp) temp = algorithm.preprocess(temp)
Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape) Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape)
contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) contourf = plt.contourf(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
if do_grouping: if do_grouping:
negatives, positives = X[numpy.logical_not(Y)], X[Y] gen = grouping(X[Y == 0, :], **kwargs)
negatives, positives = grouping(negatives, positives, **kwargs) zei = grouping(X[Y == 1, :], **kwargs)
X = numpy.concatenate((negatives, positives), axis=0) atk = grouping(X[Y == 2, :], **kwargs)
Y = numpy.concatenate( else:
(numpy.zeros(negatives.shape[0], dtype=numpy.bool8), gen = X[Y == 0, :]
numpy.ones(positives.shape[0], dtype=numpy.bool8)), zei = X[Y == 1, :]
axis=0) atk = X[Y == 2, :]
colors = plt.cm.viridis(np.linspace(0, 1, 3))
negatives, positives = X[numpy.logical_not(Y)], X[Y] for i, X in enumerate((zei, atk, gen)):
colors = plt.cm.viridis(numpy.linspace(0, 1, 2))
for i, X in enumerate((negatives, positives)):
plt.scatter( plt.scatter(
X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, X[:, 0], X[:, 1], marker=markers[i], alpha=alpha,
c=colors[i], label=legends[i]) c=colors[i], label=legends[i])
...@@ -112,38 +87,90 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold, ...@@ -112,38 +87,90 @@ def plot_boundary_decision(algorithm, scores, score_labels, threshold,
def main(command_line_parameters=None): def main(command_line_parameters=None):
args = docopt(__doc__, argv=command_line_parameters,
version=bob.fusion.base.get_config()) # setup command line parameters
bob.core.log.set_verbosity_level(logger, args['--verbose']) parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-e', '--eval-files', nargs='+', required=True,
help='A list of score files to be plotted usually the '
'evaluation set.')
parser.add_argument('-m', '--model-file', required=True,
help='Path to Model.pkl of a saved bob.fusion.algorithm.')
parser.add_argument('-t', '--threshold', type=float, default=0, required=True,
help='The threshold to classify scores after fusion.'
'Usually calculated from fused development set.')
parser.add_argument('-o', '--output', default='scatter.pdf',
help='The path to save the plot.')
parser.add_argument('-g', '--group', default=0, type=int,
help='If given scores will be grouped into N samples.')
parser.add_argument('-G', '--grouping', choices=('random', 'kmeans'),
default='kmeans',
help='The gouping algorithm to be used.')
parser.add_argument('--skip-check', action='store_true',
help='If provided, score files are not checked '
'for consistency')
parser.add_argument('--score-type', choices=(4, 5), default=None,
help='The format the scores are provided.')
bob.core.log.add_command_line_option(parser)
# parse command line options
args = parser.parse_args(command_line_parameters)
bob.core.log.set_verbosity_level(logger, args.verbose)
# load the algorithm # load the algorithm
algorithm = bob.fusion.base.algorithm.Algorithm() algorithm = bob.fusion.base.algorithm.Algorithm()
algorithm = algorithm.load(args['MODEL_FILE']) algorithm = algorithm.load(args.model_file)
# load the scores # load the scores
score_lines_list = [ score_lines_list_eval = [load_score(path, ncolumns=args.score_type)
load_score(path, int(args['--score-type'])) for for path in args.eval_files]
path in args['SCORE_FILE']]
scores = get_all_scores(score_lines_list) # genuine, zero effort impostor, and attack list
score_lines = numpy.array(score_lines_list[0]) idx1, gen_le, zei_le, atk_le = get_gza_from_lines_list(score_lines_list_eval)
score_lines['score'] = algorithm.fuse(algorithm.preprocess(scores))
threshold = eer_threshold(*get_negatives_positives(score_lines)) # check if score lines are consistent
score_labels = score_lines['claimed_id'] == score_lines['real_id'] if not args.skip_check:
check_consistency(gen_le, zei_le, atk_le)
# concatenate the scores and create the labels
scores = get_scores(gen_le, zei_le, atk_le)
score_labels = np.zeros((scores.shape[0],))
gensize = gen_le[0].shape[0]
zeisize = zei_le[0].shape[0]
score_labels[:gensize] = 0
score_labels[gensize: gensize + zeisize] = 1
score_labels[gensize + zeisize:] = 2
found_nan, nan_idx, scores = remove_nan(scores, False)
score_labels = score_labels[~nan_idx]
if found_nan:
logger.warn('{} nan values were removed.'.format(np.sum(nan_idx)))
# plot the decision boundary # plot the decision boundary
do_grouping = True do_grouping = True
if int(args['--group']) == -1: if args.group < 1:
do_grouping = False do_grouping = False
plot_boundary_decision( plot_boundary_decision(
algorithm, scores, score_labels, threshold, algorithm, scores, score_labels, args.threshold,
do_grouping=do_grouping, do_grouping=do_grouping,
npoints=int(args['--group']), npoints=args.group,
seed=0, seed=0,
gformat=args['--grouping'] gformat=args.grouping
) )
plt.savefig(args['--output']) plt.savefig(args.output)
plt.close() plt.close()
if __name__ == '__main__': if __name__ == '__main__':
main() main()
...@@ -8,8 +8,8 @@ from bob.fusion.base.script.bob_fuse import main as bob_fuse ...@@ -8,8 +8,8 @@ from bob.fusion.base.script.bob_fuse import main as bob_fuse
from bob.fusion.base.script.plot_fusion_decision_boundary import main as plot_fusion_decision_boundary from bob.fusion.base.script.plot_fusion_decision_boundary import main as plot_fusion_decision_boundary
from bob.io.base.test_utils import datafile from bob.io.base.test_utils import datafile
dev_files = [datafile("scores-dev-1", 'bob.fusion.base'), train_files = [datafile("scores-dev-1", 'bob.fusion.base'),
datafile("scores-dev-2", 'bob.fusion.base')] datafile("scores-dev-2", 'bob.fusion.base')]
eval_files = [datafile("scores-eval-1", 'bob.fusion.base'), eval_files = [datafile("scores-eval-1", 'bob.fusion.base'),
datafile("scores-eval-2", 'bob.fusion.base')] datafile("scores-eval-2", 'bob.fusion.base')]
...@@ -22,22 +22,22 @@ def test_scripts(): ...@@ -22,22 +22,22 @@ def test_scripts():
fused_eval_file = os.path.join(tmpdir, 'scores-eval') fused_eval_file = os.path.join(tmpdir, 'scores-eval')
# test normally # test normally
cmd = ['-i'] + dev_files + ['-o', fused_dev_file, '-a', 'llr'] cmd = ['-s', tmpdir, '-t'] + train_files + ['-o', fused_dev_file, '-a', 'llr']
bob_fuse(cmd) bob_fuse(cmd)
cmd = ['-i'] + dev_files + ['-I'] + eval_files + ['-o', fused_dev_file, '-O', fused_eval_file, '-a', 'llr'] cmd = ['-s', tmpdir, '-t'] + train_files + ['-e'] + eval_files + ['-o', fused_dev_file, '-O', fused_eval_file, '-a', 'llr']
bob_fuse(cmd) bob_fuse(cmd)
# make inconsistency # make inconsistency
wrong_dev2 = os.path.join(tmpdir, 'scores-dev-2') wrong_dev2 = os.path.join(tmpdir, 'scores-dev-2')
with open(wrong_dev2, 'w') as f1, open(dev_files[1]) as f2: with open(wrong_dev2, 'w') as f1, open(train_files[1]) as f2:
lines = f2.readlines() lines = f2.readlines()
temp = lines[0].split() temp = lines[0].split()
temp = (temp[0], 'temp1_id', temp[2], temp[3]) temp = (temp[0], 'temp1_id', temp[2], temp[3])
lines[0] = ' '.join(temp) + '\n' lines[0] = ' '.join(temp) + '\n'
f1.writelines(lines) f1.writelines(lines)
cmd = ['-i'] + dev_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr'] cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr']
try: try:
bob_fuse(cmd) bob_fuse(cmd)
except AssertionError: except AssertionError:
...@@ -46,13 +46,13 @@ def test_scripts(): ...@@ -46,13 +46,13 @@ def test_scripts():
raise Exception('An AssertionError should have been raised.') raise Exception('An AssertionError should have been raised.')
# this should not raise an error # this should not raise an error
cmd = ['-i'] + dev_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr', '--skip-check'] cmd = ['-s', tmpdir, '-t'] + train_files[0:1] + [wrong_dev2] + ['-o', fused_dev_file, '-a', 'llr', '--skip-check']
bob_fuse(cmd) bob_fuse(cmd)
# test plot # test plot
model_file = os.path.join(tmpdir, 'Model.pkl') model_file = os.path.join(tmpdir, 'Model.pkl')
output = os.path.join(tmpdir, 'scatter.pdf') output = os.path.join(tmpdir, 'scatter.pdf')
cmd = dev_files + [model_file, '-o', output] cmd = train_files + [model_file, '-o', output]
plot_fusion_decision_boundary(cmd) plot_fusion_decision_boundary(cmd)
finally: finally:
......
from common import *
from command_line import * from command_line import *
from plotting import * from plotting import *
......
...@@ -36,25 +36,31 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): ...@@ -36,25 +36,31 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
description=description, description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', '--dev-files', required=True, parser.add_argument('-t', '--train-files', required=True,
nargs='+', help="A list of score files of " nargs='+', help="A list of score files of "
"the development set.") "the train set.")
parser.add_argument('-I', '--eval-files', nargs='+', parser.add_argument('-d', '--dev-files', nargs='+',
help="A list of score files of the development set; "
"if given it must be the same number of files "
"as the --train-files.")
parser.add_argument('-e', '--eval-files', nargs='+',
help="A list of score files of the evaluation set; " help="A list of score files of the evaluation set; "
"if given it must be the same number of files " "if given it must be the same number of files "
"as the --dev-files.") "as the --train-files.")
parser.add_argument('-o', '--fused-dev-file', parser.add_argument('-o', '--fused-dev-file', default=None,
required=True, help='The fused development score file.') help='The fused development score file. '
parser.add_argument( 'Default is "scores-dev" in the --save-directory')
'-O', '--fused-eval-file', help='The fused evaluation score file.') parser.add_argument('-O', '--fused-eval-file', default=None,
parser.add_argument('--score-type', choices=[4, 5], default=4, help='The fused evaluation score file. '
help='The format the scores are provided.') 'Default is "scores-eval" in the --save-directory')
parser.add_argument('--score-type', choices=[4, 5], default=None,
help='The format the scores are provided. If not '
'provided, the number of columns will be guessed.')
parser.add_argument('--skip-check', action='store_true', parser.add_argument('--skip-check', action='store_true',
help='If provided, score files are not checked ' help='If provided, score files are not checked '
'for consistency') 'for consistency')
parser.add_argument('-s', '--save-directory', help='The directory to save ' parser.add_argument('-s', '--save-directory', help='The directory to save '
'the experiment artifacts. If not given, the directory' 'the experiment artifacts.', default='fusion_result')
' of fused-dev-file will be used.')
config_group = parser.add_argument_group( config_group = parser.add_argument_group(
'Parameters defining the experiment', ' Most of these parameters can be a' 'Parameters defining the experiment', ' Most of these parameters can be a'
...@@ -95,8 +101,10 @@ def initialize(parsers, command_line_parameters=None, skips=[]): ...@@ -95,8 +101,10 @@ def initialize(parsers, command_line_parameters=None, skips=[]):
args.algorithm, 'algorithm', imports=args.imports) args.algorithm, 'algorithm', imports=args.imports)
# set base directories # set base directories
if args.save_directory is None: if args.fused_dev_file is None:
args.save_directory = os.path.dirname(args.fused_dev_file) args.fused_dev_file = os.path.join(args.save_directory, 'scores-dev')
if args.fused_eval_file is None:
args.fused_eval_file = os.path.join(args.save_directory, 'scores-eval')
# result files # result files
args.info_file = os.path.join(args.save_directory, 'Experiment.info') args.info_file = os.path.join(args.save_directory, 'Experiment.info')
......
import numpy as np
import bob.core
logger = bob.core.log.setup("bob.fusion.base")
def get_2negatives_1positive(score_lines):
gen_mask = score_lines['claimed_id'] == score_lines['real_id']
atk_mask = np.logical_or(np.char.count(score_lines['real_id'], 'spoof/') > 0,
np.char.count(score_lines['real_id'], 'attack') > 0)
zei_mask = np.logical_and(np.logical_not(gen_mask), np.logical_not(atk_mask))
gen = score_lines[gen_mask]
zei = score_lines[zei_mask]
atk = score_lines[atk_mask]
return (gen, zei, atk, gen_mask, zei_mask, atk_mask)
def check_consistency(gen_l, zei_l, atk_l):
if len(gen_l) < 2:
logger.error('Check failed since less than two system is available.')
for score_lines_list in (gen_l, zei_l, atk_l):
if not score_lines_list:
continue
score_lines0 = score_lines_list[0]
for score_lines in score_lines_list[1:]:
assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id']))
assert(np.all(score_lines['real_id'] == score_lines0['real_id']))
def get_scores(*args):
scores = []
for temp in zip(*args):
scores.append(np.concatenate([a['score'] for a in temp], axis=0))
return np.vstack(scores).T
def remove_nan(samples, found_nan):
ncls = samples.shape[1]
nans = np.isnan(samples[:, 0])
for i in range(1, ncls):
nans = np.logical_or(nans, np.isnan(samples[:, i]))
return np.any(nans) or found_nan, nans, samples[~nans, :]
def get_gza_from_lines_list(score_lines_list):
gen_l, zei_l, atk_l = [], [], []
for score_lines in score_lines_list:
gen, zei, atk, _, _, _ = get_2negatives_1positive(score_lines)
gen_l.append(gen)
zei_l.append(zei)
atk_l.append(atk)
zei_lengths = []
for zei in zei_l:
zei_lengths.append(zei.size)
zei_lengths = np.array(zei_lengths)
idx1 = 0 # used later if it does not enter the if.
if not (np.all(zei_lengths == 0) or np.all(zei_lengths > 0)):
# generate the missing ones
# find one that has zei
idx1 = zei_lengths.nonzero()[0][0]
zei_full = zei_l[idx1]
for idx2 in np.where(zei_lengths == 0)[0]:
if zei_l[idx2] is None:
continue
temp = np.array(zei_full)
# make sure we replace all scores.
temp['score'] = np.nan
# get the list of ids
real_ids = np.unique(temp['real_id'])
# find pad score of that id and replace the score
for real_id in real_ids:
# get the list of test_labels
test_labels = np.unique(temp['test_label'][temp['real_id'] == real_id])
for test_label in test_labels:
idx3 = np.logical_and(temp['real_id'] == real_id,
temp['test_label'] == test_label)
idx4 = np.logical_and(gen_l[idx2]['real_id'] == real_id,
gen_l[idx2]['test_label'] == test_label)
temp['score'][idx3] = gen_l[idx2]['score'][idx4]
zei_l[idx2] = temp
return idx1, gen_l, zei_l, atk_l
...@@ -4,38 +4,24 @@ import numpy ...@@ -4,38 +4,24 @@ import numpy
import bob.learn.em import bob.learn.em
def grouping(negatives, positives, def grouping(scores, gformat='random', npoints=500, seed=None, **kwargs):
gformat='random', npoints=500, seed=None, **kwargs):
negatives = numpy.asarray(negatives) scores = numpy.asarray(scores)
positives = numpy.asarray(positives)
if(gformat == "kmeans"): if(gformat == "kmeans"):
kmeans_machine = bob.learn.em.KMeansMachine(npoints, 2)
kmeans_negatives = bob.learn.em.KMeansMachine(npoints, 2) kmeans_trainer = bob.learn.em.KMeansTrainer()
kmeans_positives = bob.learn.em.KMeansMachine(npoints, 2)
kmeansTrainer = bob.learn.em.KMeansTrainer()
bob.learn.em.train( bob.learn.em.train(
kmeansTrainer, kmeans_negatives, negatives, max_iterations=500, kmeans_trainer, kmeans_machine, scores, max_iterations=500,
convergence_threshold=0.1) convergence_threshold=0.1)
bob.learn.em.train( scores = kmeans_machine.means
kmeansTrainer, kmeans_positives, positives, max_iterations=500,
convergence_threshold=0.1)
negatives = kmeans_negatives.means
positives = kmeans_positives.means
elif(gformat == "random"): elif(gformat == "random"):
if seed is not None: if seed is not None:
numpy.random.seed(seed) numpy.random.seed(seed)
negatives_indexes = numpy.array( scores_indexes = numpy.array(
numpy.random.rand(npoints) * negatives.shape[0], dtype=int) numpy.random.rand(npoints) * scores.shape[0], dtype=int)
positives_indexes = numpy.array(
numpy.random.rand(npoints) * positives.shape[0], dtype=int)
negatives = negatives[negatives_indexes] scores = scores[scores_indexes]
positives = positives[positives_indexes]
return negatives, positives return scores
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment