Commit 4e89bcb3 authored by Anjith GEORGE's avatar Anjith GEORGE

Merge branch 'correct-apcer-calculation' into 'master'

Correct apcer calculation

Closes #31

See merge request !60
parents 47b3f2d0 8f66c86e
Pipeline #30336 passed with stages
in 12 minutes and 22 seconds
...@@ -7,11 +7,8 @@ from bob.pad.base.algorithm import Algorithm ...@@ -7,11 +7,8 @@ from bob.pad.base.algorithm import Algorithm
import bob.learn.mlp import bob.learn.mlp
import bob.io.base import bob.io.base
from bob.bio.video.utils import FrameContainer import logging
from bob.pad.base.utils import convert_frame_cont_to_array logger = logging.getLogger(__name__)
from bob.core.log import setup
logger = setup("bob.pad.base")
class MLP(Algorithm): class MLP(Algorithm):
...@@ -42,11 +39,11 @@ class MLP(Algorithm): ...@@ -42,11 +39,11 @@ class MLP(Algorithm):
criterion to stop the training: if the difference criterion to stop the training: if the difference
between current and last loss is smaller than between current and last loss is smaller than
this number, then stop training. this number, then stop training.
""" """
Algorithm.__init__(self, Algorithm.__init__(self,
performs_projection=True, performs_projection=True,
requires_projector_training=True, requires_projector_training=True,
**kwargs) **kwargs)
self.hidden_units = hidden_units self.hidden_units = hidden_units
...@@ -54,32 +51,31 @@ class MLP(Algorithm): ...@@ -54,32 +51,31 @@ class MLP(Algorithm):
self.precision = precision self.precision = precision
self.mlp = None self.mlp = None
def train_projector(self, training_features, projector_file): def train_projector(self, training_features, projector_file):
"""Trains the MLP """Trains the MLP
Parameters Parameters
---------- ----------
training_features : :any:`list` of :py:class:`numpy.ndarray` training_features : :any:`list` of :py:class:`numpy.ndarray`
Data used to train the MLP. The real attempts are in training_features[0] and the attacks are in training_features[1] Data used to train the MLP. The real attempts are in training_features[0] and the attacks are in training_features[1]
projector_file : str projector_file : str
Filename where to save the trained model. Filename where to save the trained model.
""" """
# training is done in batch (i.e. using all training data) # training is done in batch (i.e. using all training data)
batch_size = len(training_features[0]) + len(training_features[1]) batch_size = len(training_features[0]) + len(training_features[1])
# The labels # The labels
label_real = numpy.zeros((len(training_features[0]), 2), dtype='float64') label_real = numpy.zeros((len(training_features[0]), 2), dtype='float64')
label_real[:, 0] = 1 label_real[:, 0] = 1
label_attack = numpy.zeros((len(training_features[1]), 2), dtype='float64') label_attack = numpy.zeros((len(training_features[1]), 2), dtype='float64')
label_attack[:, 1] = 0 label_attack[:, 1] = 0
real = numpy.array(training_features[0]) real = numpy.array(training_features[0])
attack = numpy.array(training_features[1]) attack = numpy.array(training_features[1])
X = numpy.vstack([real, attack]) X = numpy.vstack([real, attack])
Y = numpy.vstack([label_real, label_attack]) Y = numpy.vstack([label_real, label_attack])
# Building MLP architecture # Building MLP architecture
input_dim = real.shape[1] input_dim = real.shape[1]
shape = [] shape = []
...@@ -89,16 +85,16 @@ class MLP(Algorithm): ...@@ -89,16 +85,16 @@ class MLP(Algorithm):
# last layer contains two units: one for each class (i.e. real and attack) # last layer contains two units: one for each class (i.e. real and attack)
shape.append(2) shape.append(2)
shape = tuple(shape) shape = tuple(shape)
self.mlp = bob.learn.mlp.Machine(shape) self.mlp = bob.learn.mlp.Machine(shape)
self.mlp.output_activation = bob.learn.activation.Logistic() self.mlp.output_activation = bob.learn.activation.Logistic()
self.mlp.randomize() self.mlp.randomize()
trainer = bob.learn.mlp.BackProp(batch_size, bob.learn.mlp.CrossEntropyLoss(self.mlp.output_activation), self.mlp, train_biases=True) trainer = bob.learn.mlp.BackProp(batch_size, bob.learn.mlp.CrossEntropyLoss(self.mlp.output_activation), self.mlp, train_biases=True)
n_iter = 0 n_iter = 0
previous_cost = 0 previous_cost = 0
current_cost = 1 current_cost = 1
while (n_iter < self.max_iter) and (abs(previous_cost - current_cost) > self.precision): while (n_iter < self.max_iter) and (abs(previous_cost - current_cost) > self.precision):
previous_cost = current_cost previous_cost = current_cost
trainer.train(self.mlp, X, Y) trainer.train(self.mlp, X, Y)
current_cost = trainer.cost(self.mlp, X, Y) current_cost = trainer.cost(self.mlp, X, Y)
...@@ -107,14 +103,13 @@ class MLP(Algorithm): ...@@ -107,14 +103,13 @@ class MLP(Algorithm):
f = bob.io.base.HDF5File(projector_file, 'w') f = bob.io.base.HDF5File(projector_file, 'w')
self.mlp.save(f) self.mlp.save(f)
def project(self, feature): def project(self, feature):
"""Project the given feature """Project the given feature
Parameters Parameters
---------- ----------
feature : :py:class:`numpy.ndarray` feature : :py:class:`numpy.ndarray`
The feature to classify The feature to classify
Returns Returns
...@@ -126,18 +121,17 @@ class MLP(Algorithm): ...@@ -126,18 +121,17 @@ class MLP(Algorithm):
# feature = convert_frame_cont_to_array(feature) # feature = convert_frame_cont_to_array(feature)
return self.mlp(feature) return self.mlp(feature)
def score(self, toscore): def score(self, toscore):
"""Returns the probability of the real class. """Returns the probability of the real class.
Parameters Parameters
---------- ----------
toscore : :py:class:`numpy.ndarray` toscore : :py:class:`numpy.ndarray`
Returns Returns
------- -------
float float
probability of the authentication attempt to be real. probability of the authentication attempt to be real.
""" """
if toscore.ndim == 1: if toscore.ndim == 1:
return [toscore[0]] return [toscore[0]]
......
This diff is collapsed.
...@@ -7,93 +7,235 @@ import bob.bio.base.script.gen as bio_gen ...@@ -7,93 +7,235 @@ import bob.bio.base.script.gen as bio_gen
import bob.measure.script.figure as measure_figure import bob.measure.script.figure as measure_figure
from bob.bio.base.score import load from bob.bio.base.score import load
from . import pad_figure as figure from . import pad_figure as figure
from .error_utils import negatives_per_pai_and_positives
from functools import partial
SCORE_FORMAT = ( SCORE_FORMAT = (
"Files must be 4-col format, see " "Files must be 4-col format, see " ":py:func:`bob.bio.base.score.load.four_column`."
":py:func:`bob.bio.base.score.load.four_column`.") )
CRITERIA = ('eer', 'min-hter', 'bpcer20') CRITERIA = (
"eer",
"min-hter",
"far",
"bpcer5000",
"bpcer2000",
"bpcer1000",
"bpcer500",
"bpcer200",
"bpcer100",
"bpcer50",
"bpcer20",
"bpcer10",
"bpcer5",
"bpcer2",
"bpcer1",
)
def metrics_option(
sname="-m",
lname="--metrics",
name="metrics",
help="List of metrics to print. Provide a string with comma separated metric "
"names. For possible values see the default value.",
default="apcer_pais,apcer,bpcer,acer,fta,fpr,fnr,hter,far,frr,precision,recall,f1_score",
**kwargs
):
"""The metrics option"""
def custom_metrics_option(func):
def callback(ctx, param, value):
if value is not None:
value = value.split(",")
ctx.meta[name] = value
return value
return click.option(
sname,
lname,
default=default,
help=help,
show_default=True,
callback=callback,
**kwargs
)(func)
return custom_metrics_option
def regexps_option(
help="A list of regular expressions (by repeating this option) to be used to "
"categorize PAIs. Each regexp must match one type of PAI.",
**kwargs
):
def custom_regexps_option(func):
def callback(ctx, param, value):
ctx.meta["regexps"] = value
return value
return click.option(
"-r",
"--regexps",
default=None,
multiple=True,
help=help,
callback=callback,
**kwargs
)(func)
return custom_regexps_option
def regexp_column_option(
help="The column in the score files to match the regular expressions against.",
**kwargs
):
def custom_regexp_column_option(func):
def callback(ctx, param, value):
ctx.meta["regexp_column"] = value
return value
return click.option(
"-rc",
"--regexp-column",
default="real_id",
type=click.Choice(("claimed_id", "real_id", "test_label")),
help=help,
show_default=True,
callback=callback,
**kwargs
)(func)
return custom_regexp_column_option
@click.command() @click.command()
@click.argument('outdir') @click.argument("outdir")
@click.option('-mm', '--mean-match', default=10, type=click.FLOAT, @click.option("-mm", "--mean-match", default=10, type=click.FLOAT, show_default=True)
show_default=True) @click.option(
@click.option('-mnm', '--mean-non-match', default=-10, "-mnm", "--mean-non-match", default=-10, type=click.FLOAT, show_default=True
type=click.FLOAT, show_default=True) )
@click.option('-n', '--n-sys', default=1, type=click.INT, show_default=True) @click.option("-n", "--n-sys", default=1, type=click.INT, show_default=True)
@verbosity_option() @verbosity_option()
@click.pass_context @click.pass_context
def gen(ctx, outdir, mean_match, mean_non_match, n_sys, **kwargs): def gen(ctx, outdir, mean_match, mean_non_match, n_sys, **kwargs):
"""Generate random scores. """Generate random scores.
Generates random scores in 4col or 5col format. The scores are generated Generates random scores in 4col or 5col format. The scores are generated
using Gaussian distribution whose mean is an input using Gaussian distribution whose mean is an input
parameter. The generated scores can be used as hypothetical datasets. parameter. The generated scores can be used as hypothetical datasets.
Invokes :py:func:`bob.bio.base.script.commands.gen`. Invokes :py:func:`bob.bio.base.script.commands.gen`.
""" """
ctx.meta['five_col'] = False ctx.meta["five_col"] = False
ctx.forward(bio_gen.gen) ctx.forward(bio_gen.gen)
@common_options.metrics_command(common_options.METRICS_HELP.format( @common_options.metrics_command(
names='FtA, APCER, BPCER, FAR, FRR, ACER', common_options.METRICS_HELP.format(
criteria=CRITERIA, score_format=SCORE_FORMAT, names="FtA, APCER, BPCER, FPR, FNR, FAR, FRR, ACER, HTER, precision, recall, f1_score",
hter_note='Note that FAR = APCER * (1 - FtA), ' criteria=CRITERIA,
'FRR = FtA + BPCER * (1 - FtA) and ACER = (APCER + BPCER) / 2.', score_format=SCORE_FORMAT,
command='bob pad metrics'), criteria=CRITERIA) hter_note="Note that APCER = max(APCER_pais), BPCER=FNR, "
def metrics(ctx, scores, evaluation, **kwargs): "FAR = FPR * (1 - FtA), "
process = figure.Metrics(ctx, scores, evaluation, load.split) "FRR = FtA + FNR * (1 - FtA), "
process.run() "ACER = (APCER + BPCER) / 2, "
"and HTER = (FPR + FNR) / 2. "
"You can control which metrics are printed using the --metrics option. "
"You can use --regexps and --regexp_column options to change the behavior "
"of finding Presentation Attack Instrument (PAI) types",
command="bob pad metrics",
),
criteria=CRITERIA,
epilog="""\b
More Examples:
\b
bob pad metrics -vvv -e -lg IQM,LBP -r print -r video -m fta,apcer_pais,apcer,bpcer,acer,hter \
/scores/oulunpu/{qm-svm,lbp-svm}/Protocol_1/scores/scores-{dev,eval}
See also ``bob pad multi-metrics``.
""",
)
@regexps_option()
@regexp_column_option()
@metrics_option()
def metrics(ctx, scores, evaluation, regexps, regexp_column, metrics, **kwargs):
load_fn = partial(
negatives_per_pai_and_positives, regexps=regexps, regexp_column=regexp_column
)
process = figure.Metrics(ctx, scores, evaluation, load_fn, metrics)
process.run()
@common_options.roc_command( @common_options.roc_command(
common_options.ROC_HELP.format( common_options.ROC_HELP.format(score_format=SCORE_FORMAT, command="bob pad roc")
score_format=SCORE_FORMAT, command='bob pad roc')) )
def roc(ctx, scores, evaluation, **kwargs): def roc(ctx, scores, evaluation, **kwargs):
process = figure.Roc(ctx, scores, evaluation, load.split) process = figure.Roc(ctx, scores, evaluation, load.split)
process.run() process.run()
@common_options.det_command( @common_options.det_command(
common_options.DET_HELP.format( common_options.DET_HELP.format(score_format=SCORE_FORMAT, command="bob pad det")
score_format=SCORE_FORMAT, command='bob pad det')) )
def det(ctx, scores, evaluation, **kwargs): def det(ctx, scores, evaluation, **kwargs):
process = figure.Det(ctx, scores, evaluation, load.split) process = figure.Det(ctx, scores, evaluation, load.split)
process.run() process.run()
@common_options.epc_command( @common_options.epc_command(
common_options.EPC_HELP.format( common_options.EPC_HELP.format(score_format=SCORE_FORMAT, command="bob pad epc")
score_format=SCORE_FORMAT, command='bob pad epc')) )
def epc(ctx, scores, **kwargs): def epc(ctx, scores, **kwargs):
process = measure_figure.Epc(ctx, scores, True, load.split, hter='ACER') process = measure_figure.Epc(ctx, scores, True, load.split, hter="ACER")
process.run() process.run()
@common_options.hist_command( @common_options.hist_command(
common_options.HIST_HELP.format( common_options.HIST_HELP.format(score_format=SCORE_FORMAT, command="bob pad hist")
score_format=SCORE_FORMAT, command='bob pad hist')) )
def hist(ctx, scores, evaluation, **kwargs): def hist(ctx, scores, evaluation, **kwargs):
process = figure.Hist(ctx, scores, evaluation, load.split) process = figure.Hist(ctx, scores, evaluation, load.split)
process.run() process.run()
@common_options.evaluate_command( @common_options.evaluate_command(
common_options.EVALUATE_HELP.format( common_options.EVALUATE_HELP.format(
score_format=SCORE_FORMAT, command='bob pad evaluate'), score_format=SCORE_FORMAT, command="bob pad evaluate"
criteria=CRITERIA) ),
criteria=CRITERIA,
)
def evaluate(ctx, scores, evaluation, **kwargs): def evaluate(ctx, scores, evaluation, **kwargs):
common_options.evaluate_flow( common_options.evaluate_flow(
ctx, scores, evaluation, metrics, roc, det, epc, hist, **kwargs) ctx, scores, evaluation, metrics, roc, det, epc, hist, **kwargs
)
@common_options.multi_metrics_command( @common_options.multi_metrics_command(
common_options.MULTI_METRICS_HELP.format( common_options.MULTI_METRICS_HELP.format(
names='FtA, APCER, BPCER, FAR, FRR, ACER', names="FtA, APCER, BPCER, FAR, FRR, ACER, HTER, precision, recall, f1_score",
criteria=CRITERIA, score_format=SCORE_FORMAT, criteria=CRITERIA,
command='bob pad multi-metrics'), score_format=SCORE_FORMAT,
criteria=CRITERIA) command="bob pad multi-metrics",
def multi_metrics(ctx, scores, evaluation, protocols_number, **kwargs): ),
ctx.meta['min_arg'] = protocols_number * (2 if evaluation else 1) criteria=CRITERIA,
process = figure.MultiMetrics( epilog="""\b
ctx, scores, evaluation, load.split) More examples:
process.run()
\b
bob pad multi-metrics -vvv -e -pn 6 -lg IQM,LBP -r print -r video \
/scores/oulunpu/{qm-svm,lbp-svm}/Protocol_3_{1,2,3,4,5,6}/scores/scores-{dev,eval}
See also ``bob pad metrics``.
""",
)
@regexps_option()
@regexp_column_option()
@metrics_option(default="fta,apcer_pais,apcer,bpcer,acer,hter")
def multi_metrics(
ctx, scores, evaluation, protocols_number, regexps, regexp_column, metrics, **kwargs
):
ctx.meta["min_arg"] = protocols_number * (2 if evaluation else 1)
load_fn = partial(
negatives_per_pai_and_positives, regexps=regexps, regexp_column=regexp_column
)
process = figure.MultiMetrics(ctx, scores, evaluation, load_fn, metrics)
process.run()
This diff is collapsed.
This diff is collapsed.
from bob.io.base.test_utils import datafile
from bob.io.base import HDF5File
from bob.pad.base.script.error_utils import (
negatives_per_pai_and_positives,
apcer_bpcer,
calc_threshold,
)
import nose
import numpy as np
GENERATE_REFERENCES = False
scores_dev = datafile("per_pai_scores/scores-dev", module=__name__)
scores_dev_reference_mask = datafile(
"per_pai_scores/scores-dev-{i}.hdf5", module=__name__
)
def _dump_dict(f, d, name):
f[f"{name}_len"] = len(d)
for i, (k, v) in enumerate(d.items()):
f[f"{name}_key_{i}"] = k
f[f"{name}_value_{i}"] = v
def _read_dict(f, name):
ret = dict()
for i in range(f[f"{name}_len"]):
k = f[f"{name}_key_{i}"]
v = f[f"{name}_value_{i}"]
if isinstance(v, np.ndarray):
v = v.tolist()
ret[k] = v
return ret
def test_per_pai_apcer():
for i, regexps in enumerate((None, ["x[0-2]", "x[3-4]"], ["x[1-2]", "x[3-4]"])):
try:
pos, negs = negatives_per_pai_and_positives(scores_dev, regexps)
except ValueError:
if i == 2:
continue
raise
all_negs = [s for scores in negs.values() for s in scores]
thresholds = dict()
for method in ("bpcer20", "far", "eer", "min-hter"):
thresholds[method] = calc_threshold(
method, pos, negs.values(), all_negs, far_value=0.1
)
metrics = dict()
for method, threshold in thresholds.items():
apcers, apcer, bpcer = apcer_bpcer(threshold, pos, *negs.values())
metrics[method] = apcers + [apcer, bpcer]
scores_dev_reference = scores_dev_reference_mask.format(i=i)
if GENERATE_REFERENCES:
with HDF5File(scores_dev_reference, "w") as f:
f["pos"] = pos
_dump_dict(f, negs, "negs")
_dump_dict(f, thresholds, "thresholds")
_dump_dict(f, metrics, "metrics")
with HDF5File(scores_dev_reference, "r") as f:
ref_pos = f["pos"].tolist()
ref_negs = _read_dict(f, "negs")
ref_thresholds = _read_dict(f, "thresholds")
ref_metrics = _read_dict(f, "metrics")
nose.tools.assert_list_equal(pos, ref_pos)
nose.tools.assert_dict_equal(negs, ref_negs)
nose.tools.assert_dict_equal(thresholds, ref_thresholds)
nose.tools.assert_dict_equal(metrics, ref_metrics)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment