Commit aeb1bb57
Merge branch 'metrics' into 'master'

Replace the old pad iso metrics class with a script

parents 82f9a36e e4efe403
......@@ -3,7 +3,6 @@ from .utils import *
from . import database
from . import algorithm
from . import tools
from . import evaluation
from . import script
from . import test
Created on 16 Nov 2016
@author: sbhatta
import sys, os
import numpy as np
import bob.measure
class PadIsoMetrics():
def __init__(self):
""" constructor. """
self.real_name = 'bonafide' #real_presentation_name #'real'
self.attack_name = 'attack' #attack_presentation_name #'attack'
def save_scores_hdf5(self, outfile, scores_dict):
""" saves input scores_dict dictionary in a hdf5 formatted file"""
h5out =, "w")
for p in scores_dict.keys():
if len(scores_dict[p]) == 1: # real_set
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('presentation', self.real_name, p)
#write attacks
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('attack_potential', scores_dict[p][1], p)
h5out.set_attribute('presentation', self.attack_name, p)
del h5out
def load_scores_hdf5(self, infile):
""" loads a hdf5 file, and trys to construct a dictionary of scores. Returns the score-dictionary."""
h5in =, "r")
scores_dict = {}'/')
class_labels = h5in.keys(relative='True')
for p in class_labels:
scores = h5in.get(p)
attrs = h5in.get_attributes(p)
if len(attrs) == 2: #then the two elements are 'presentation' and 'attack_potential'
ap = attrs['attack_potential']
scores_dict[p] = [scores, ap]
scores_dict[p] = [scores]
del h5in
return scores_dict
def eer(self, scores_dict):
""" computes EER threshold using the scores in the supplied dictionary
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
tuple of three floats: (eer_threshold, far, frr). These are computed using functions from bob.measure.
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no development score-set provided for computing EER'
for k in scores_dict.keys():
keyvalue = scores_dict[k]
if len(keyvalue)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
if len(keyvalue)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
self.threshEER_dev = bob.measure.eer_threshold(attack_scores, real_scores)
self.dev_far, self.dev_frr = bob.measure.farfrr(attack_scores, real_scores, self.threshEER_dev)
# self.eer_devel = 50.0*(self.dev_far + self.dev_frr)
# print('eer()::threshEER: %s' % self.threshEER_dev)
return (self.threshEER_dev, self.dev_far, self.dev_frr)
def hter(self, scores_dict, score_threshold):
""" computes HTER on test-set scores, using the supplied score-threshold.
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
score_threshold: (float) value to be used for thresholding scores.
tuple of three floats: (hter, far, frr). These are computed using functions from bob.measure.
assert ((score_threshold is not None) and isinstance(score_threshold, (int, long, float)) ), 'input score_threshold should be a number (float or integer).'
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no test score-set available for computing HTER'
for k in scores_dict.keys():
key_value = scores_dict[k]
if len(key_value)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
if len(key_value)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
test_far, test_frr = bob.measure.farfrr(attack_scores, real_scores, score_threshold)
# test_good_neg = bob.measure.correctly_classified_negatives(attack_scores, score_threshold).sum()
# test_good_pos = bob.measure.correctly_classified_positives(real_scores, score_threshold).sum()
hter = (test_far+test_frr)/2.0
return (hter, test_far, test_frr)
def _check_attack_potential(self, attack_potential):
""" For now, we assume three levels of attack-potential: 'C'>'B'>'A' """
if attack_potential is None:
attack_potential = 'C'
if attack_potential not in ['A', 'B', 'C']:
attack_potential = 'C'
return attack_potential
def bpcer(self, scores, score_threshold=0.0):
""" computes BPCER on test-set scores, using either the supplied score-threshold,
or the threshold computed from the EER of the development set
scores: a 1D numpy-array of scores corresponding to genuine (bona-fide) presentations.
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
floating-point number representing the bpcer computed for the input score-set
bonafide_scores = None
if isinstance(scores, dict):
#extract 'real' scores from dictionary
for k in scores.keys():
key_value = scores[k]
if len(key_value) == 1:
bonafide_scores = key_value[0]
#verify that scores is a 1D numpy array
if isinstance(scores, np.ndarray) and len(scores.shape)==1:
bonafide_scores = scores
assert bonafide_scores is not None, 'input scores does not contain bona-fide scores, for computing BPCER.'
assert isinstance(score_threshold, (int, long, float)), 'input score_threshold should be a number (float or integer).'
correct_scores = bonafide_scores[bonafide_scores<score_threshold].shape[0]
return correct_scores/float(bonafide_scores.shape[0])
def apcer(self, scores_dict, attack_potential='C', score_threshold=0.0):
"""computes APCER as defined in ISO standard. For now, we assume three levels of attack-potential: 'C'>'B'>'A'
scores_dict: a dictionary where each key corresponds to a specific PAI (presentation-attack-instrument)
Keys corresponding to PAIs will have as value a list of 2 elements:
1st element: a 1D numpy-array of scores
2nd element: a single letter 'A', 'B', or 'C', specifying the attack-potential of the PAI.
attack_potential: a letter 'A', 'B', or 'C', specifying the attack_potential at which the APCER is to be computed
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
tuple consisting of 2 elements:
1st element: apcer at specified attack-potential
2nd element: dictionary of hter of individual PAIs that have attack-potential at or below input-parameter attack_potential.
attack_potential = self._check_attack_potential( attack_potential)
attack_perf_dict = {} #dictionary to store the hter for each attack-type that is at or below specified attack-potential
result_list = []
for k in scores_dict.keys():
if len(scores_dict[k]) == 2: #consider only the keys where the value is a list of 2 elements
if scores_dict[k][1] <= attack_potential:
scores = scores_dict[k][0]
result = (scores[scores>=score_threshold].shape[0])/float(scores.shape[0])
return (max(result_list), attack_perf_dict)
from .PadIsoMetrics import PadIsoMetrics
def __appropriate__(*args):
"""Says object was actually declared here, and not in the import module.
Fixing sphinx warnings of not being able to find classes, when path is
The objects that you want sphinx to believe that are defined here.
Resolves `Sphinx referencing issues <https//
for obj in args:
obj.__module__ = __name__
__all__ = [_ for _ in dir() if not _.startswith('_')]
"""Calculates PAD ISO compliant metrics based on the score files
import logging
import click
from bob.extension.scripts.click_helper import verbosity_option
from bob.measure.load import split
from bob.measure import (
farfrr, far_threshold, eer_threshold, min_hter_threshold)
logger = logging.getLogger(__name__)
ALL_CRITERIA = ('bpcer20', 'eer', 'min-hter')
def scores_dev_eval(development_scores, evaluation_scores):
dev_neg, dev_pos = split(development_scores)
if evaluation_scores is None:
logger.debug("No evaluation scores were provided.")
eval_neg, eval_pos = None, None
eval_neg, eval_pos = split(evaluation_scores)
return dev_neg, dev_pos, eval_neg, eval_pos
def report(dev_neg, dev_pos, eval_neg, eval_pos, threshold):
for group, neg, pos in [
('Development', dev_neg, dev_pos),
('Evaluation', eval_neg, eval_pos),
if neg is None:
click.echo("{} set:".format(group))
apcer, bpcer = farfrr(neg, pos, threshold)
click.echo("APCER: {:>5.1f}%".format(apcer * 100))
click.echo("BPCER: {:>5.1f}%".format(bpcer * 100))
click.echo("HTER: {:>5.1f}%".format((apcer + bpcer) * 50))
@click.command(context_settings=dict(token_normalize_func=lambda x: x.lower()))
@click.argument('evaluation_scores', required=False)
'-c', '--criterion', multiple=True, default=['bpcer20'],
type=click.Choice(ALL_CRITERIA), help='The criteria to select. You can '
'select multiple criteria by passing this option multiple times.',
def metrics(development_scores, evaluation_scores, criterion):
"""PAD ISO compliant metrics.
Reports several metrics based on a selected threshold on the development
set. The thresholds are selected based on different criteria:
bpcer20 When APCER is set to 5%.
eer When BPCER == APCER.
min-hter When HTER is minimum.
Most metrics are according to the ISO/IEC 30107-3:2017 "Information
technology -- Biometric presentation attack detection -- Part 3: Testing
and reporting" standard. The reported metrics are:
APCER: Attack Presentation Classification Error Rate
BPCER: Bona-fide Presentation Classification Error Rate
HTER (non-ISO): Half Total Error Rate ((BPCER+APCER)/2)
$ bob pad metrics /path/to/scores-dev
$ bob pad metrics /path/to/scores-dev /path/to/scores-eval
$ bob pad metrics /path/to/scores-{dev,eval} # using bash expansion
$ bob pad metrics -c bpcer20 -c eer /path/to/scores-dev
dev_neg, dev_pos, eval_neg, eval_pos = scores_dev_eval(
development_scores, evaluation_scores)
for method in criterion:
if method == 'bpcer20':
threshold = far_threshold(dev_neg, dev_pos, 0.05, True)
elif method == 'eer':
threshold = eer_threshold(dev_neg, dev_pos, True)
elif method == 'min-hter':
threshold = min_hter_threshold(dev_neg, dev_pos, True)
raise ValueError("Unknown threshold criteria: {}".format(method))
click.echo("\nThreshold of {} selected with the {} criteria".format(
threshold, method))
report(dev_neg, dev_pos, eval_neg, eval_pos, threshold)
"""The main entry for bob.pad (click-based) scripts.
import click
import pkg_resources
from click_plugins import with_plugins
def pad():
"""Entry for bob.pad commands."""
Created on 16 Nov 2016
@author: sbhatta
import sys, os
import pkg_resources
import numpy as np
import bob.measure
from bob.pad.base.evaluation import PadIsoMetrics
#def main(arguments):
def main(command_line_parameters=None):
scorefile_devel = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5')
scorefile_test = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_test_replaymobile_IqmScores_SVM.hdf5')
# PAI_labels = [('mattescreen-photo', 'A'), ('mattescreen-video', 'A'), ('print-fixed', 'A'), ('print-hand','A') ]
#rms = PadIsoMetrics.PadIsoMetrics() # PadIsoMetrics(PAI_labels)
rms = PadIsoMetrics()
devel_dict = rms.load_scores_hdf5(scorefile_devel)
test_dict = rms.load_scores_hdf5(scorefile_test)
threshEER_dev, dev_far, dev_frr = rms.eer(devel_dict)
eer_devel = 50.0*(dev_far + dev_frr)
print('threshEER_dev (grandtest): %s' % threshEER_dev)
print('FRR, FAR (devel): %s %s' % (dev_frr, dev_far))
print('EER (%%): %.3f%%' % eer_devel)
test_hter, test_far, test_frr = rms.hter(test_dict, threshEER_dev)
print(" * FAR : %.3f%%" % (100*test_far))
print(" * FRR : %.3f%%" % (100*test_frr))
print(" * HTER: %.3f%%" % (100*test_hter))
test_bpcer = 100.0*rms.bpcer(test_dict, threshEER_dev)
print('BPCER from dict: %.3f%%' % test_bpcer )
bf_scores = test_dict['real'][0]
test_bpcer = 100.0*rms.bpcer(bf_scores, threshEER_dev)
print('BPCER from np-array: %.3f%%' % test_bpcer )
attack_apcer, attack_perf_dict = rms.apcer(test_dict, 'C', threshEER_dev)
print('\nAPCER: %.3f%%' % (100.0*attack_apcer) )
print('Performance for individual PAIs:')
for k in attack_perf_dict.keys():
print('%s: %.3f%%' %(k, 100.0*attack_perf_dict[k]))
if __name__ == '__main__':
......@@ -88,21 +88,13 @@ Finally, the :py:class:`bob.pad.base.algorithm.Algorithm` class provides default
This package includes a class `bob.pad.base.evaluation.PadIsoMetrics`, that can be used to compute the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
The most important methods in the class are: ``eer()``, ``hter()``, ``apcer()``, and ``bpcer()``.
The main point to note about these methods is that the input-scores should be organized in a dictionary.
One dictionary should be created for each group ('devel', 'test', etc.).
The keys of the dictionary refer to the presentation-type ('bona-fide' or some presentation-attack-instrument (PAI)).
The value associated with each key is a tuple, containing either one or two elements.
For each key corresponding to a PAI, the value should be a tuple of 2 elements: (scores, attack_potential), where 'scores' is a 1D numpy-array of scores corresponding to presentations of that PAI, and 'attack_potential' is a single letter, either 'A', or 'B', or 'C', signifying the attack-potential of the PAI.
For bona-fide presentations, no attack-potential is defined. Therefore, for a key representing bona-fide presentations, the value will be a tuple consisting of only one element: a 1D numpy-array of scores.
Consequently, a key for which the value is a tuple of length 1 is interpretted as representing a bona-fide presentation.
This package includes a script `bob pad metrics`, that can be used to compute
the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
To learn more about it run:
The methods ``eer()`` and ``hter()`` call the corresponding functions in `bob.measure` to compute the relevant thresholds and performance-measures, based on the input score-dictionary.
.. code-block:: sh
The class also provides methods for saving the score-dictionaries in a hdf5-file (``save_scores_hdf5()``), and for loading such a file (``load_scores_hdf5()``).
For an example of how to use this class to evaluate a score-distribution, see the code provided in file `bob.pad.base/bob/pad/base/test/`.
$ bob pad metrics --help
Implemented Tools
......@@ -37,11 +37,6 @@ Algorithms
.. automodule:: bob.pad.base.algorithm
.. automodule:: bob.pad.base.evaluation
......@@ -35,12 +35,6 @@ Scoring
.. autosummary::
.. autosummary::
......@@ -131,6 +131,17 @@ setup(
'bob.pad.grid': [
'demanding = bob.pad.base.config.grid.demanding:grid',
# main entry for bob pad cli
'bob.cli': [
'pad = bob.pad.base.script.pad:pad',
# bob pad scripts
'bob.pad.cli': [
'metrics = bob.pad.base.script.metrics:metrics',
# Classifiers are important if you plan to distribute this package through
\ No newline at end of file
