Skip to content
Snippets Groups Projects
Commit aeb1bb57 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV
Browse files

Merge branch 'metrics' into 'master'

Replace the old pad iso metrics class with a script

See merge request !27
parents 82f9a36e e4efe403
No related branches found
No related tags found
1 merge request!27Replace the old pad iso metrics class with a script
Pipeline #
......@@ -3,7 +3,6 @@ from .utils import *
from . import database
from . import algorithm
from . import tools
from . import evaluation
from . import script
from . import test
......
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import bob.io.base
import numpy as np
import bob.measure
class PadIsoMetrics():
def __init__(self):
""" constructor. """
self.real_name = 'bonafide' #real_presentation_name #'real'
self.attack_name = 'attack' #attack_presentation_name #'attack'
def save_scores_hdf5(self, outfile, scores_dict):
""" saves input scores_dict dictionary in a hdf5 formatted file"""
h5out = bob.io.base.HDF5File(outfile, "w")
for p in scores_dict.keys():
if len(scores_dict[p]) == 1: # real_set
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('presentation', self.real_name, p)
else:
#write attacks
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('attack_potential', scores_dict[p][1], p)
h5out.set_attribute('presentation', self.attack_name, p)
del h5out
def load_scores_hdf5(self, infile):
""" loads a hdf5 file, and trys to construct a dictionary of scores. Returns the score-dictionary."""
h5in = bob.io.base.HDF5File(infile, "r")
scores_dict = {}
h5in.cd('/')
class_labels = h5in.keys(relative='True')
for p in class_labels:
scores = h5in.get(p)
attrs = h5in.get_attributes(p)
if len(attrs) == 2: #then the two elements are 'presentation' and 'attack_potential'
ap = attrs['attack_potential']
scores_dict[p] = [scores, ap]
else:
scores_dict[p] = [scores]
del h5in
return scores_dict
def eer(self, scores_dict):
""" computes EER threshold using the scores in the supplied dictionary
Input:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
Return:
tuple of three floats: (eer_threshold, far, frr). These are computed using functions from bob.measure.
"""
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no development score-set provided for computing EER'
for k in scores_dict.keys():
keyvalue = scores_dict[k]
if len(keyvalue)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(keyvalue)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
self.threshEER_dev = bob.measure.eer_threshold(attack_scores, real_scores)
self.dev_far, self.dev_frr = bob.measure.farfrr(attack_scores, real_scores, self.threshEER_dev)
# self.eer_devel = 50.0*(self.dev_far + self.dev_frr)
# print('eer()::threshEER: %s' % self.threshEER_dev)
return (self.threshEER_dev, self.dev_far, self.dev_frr)
def hter(self, scores_dict, score_threshold):
""" computes HTER on test-set scores, using the supplied score-threshold.
Inputs:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
score_threshold: (float) value to be used for thresholding scores.
Return:
tuple of three floats: (hter, far, frr). These are computed using functions from bob.measure.
"""
assert ((score_threshold is not None) and isinstance(score_threshold, (int, long, float)) ), 'input score_threshold should be a number (float or integer).'
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no test score-set available for computing HTER'
for k in scores_dict.keys():
key_value = scores_dict[k]
if len(key_value)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(key_value)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
test_far, test_frr = bob.measure.farfrr(attack_scores, real_scores, score_threshold)
# test_good_neg = bob.measure.correctly_classified_negatives(attack_scores, score_threshold).sum()
# test_good_pos = bob.measure.correctly_classified_positives(real_scores, score_threshold).sum()
hter = (test_far+test_frr)/2.0
return (hter, test_far, test_frr)
def _check_attack_potential(self, attack_potential):
""" For now, we assume three levels of attack-potential: 'C'>'B'>'A' """
if attack_potential is None:
attack_potential = 'C'
if attack_potential not in ['A', 'B', 'C']:
attack_potential = 'C'
return attack_potential
def bpcer(self, scores, score_threshold=0.0):
""" computes BPCER on test-set scores, using either the supplied score-threshold,
or the threshold computed from the EER of the development set
Inputs:
scores: a 1D numpy-array of scores corresponding to genuine (bona-fide) presentations.
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Return:
floating-point number representing the bpcer computed for the input score-set
"""
bonafide_scores = None
if isinstance(scores, dict):
#extract 'real' scores from dictionary
for k in scores.keys():
key_value = scores[k]
if len(key_value) == 1:
bonafide_scores = key_value[0]
else:
#verify that scores is a 1D numpy array
if isinstance(scores, np.ndarray) and len(scores.shape)==1:
bonafide_scores = scores
assert bonafide_scores is not None, 'input scores does not contain bona-fide scores, for computing BPCER.'
assert isinstance(score_threshold, (int, long, float)), 'input score_threshold should be a number (float or integer).'
correct_scores = bonafide_scores[bonafide_scores<score_threshold].shape[0]
return correct_scores/float(bonafide_scores.shape[0])
def apcer(self, scores_dict, attack_potential='C', score_threshold=0.0):
"""computes APCER as defined in ISO standard. For now, we assume three levels of attack-potential: 'C'>'B'>'A'
Inputs:
scores_dict: a dictionary where each key corresponds to a specific PAI (presentation-attack-instrument)
Keys corresponding to PAIs will have as value a list of 2 elements:
1st element: a 1D numpy-array of scores
2nd element: a single letter 'A', 'B', or 'C', specifying the attack-potential of the PAI.
attack_potential: a letter 'A', 'B', or 'C', specifying the attack_potential at which the APCER is to be computed
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Returns:
tuple consisting of 2 elements:
1st element: apcer at specified attack-potential
2nd element: dictionary of hter of individual PAIs that have attack-potential at or below input-parameter attack_potential.
"""
attack_potential = self._check_attack_potential( attack_potential)
attack_perf_dict = {} #dictionary to store the hter for each attack-type that is at or below specified attack-potential
result_list = []
for k in scores_dict.keys():
if len(scores_dict[k]) == 2: #consider only the keys where the value is a list of 2 elements
if scores_dict[k][1] <= attack_potential:
scores = scores_dict[k][0]
result = (scores[scores>=score_threshold].shape[0])/float(scores.shape[0])
result_list.append(result)
attack_perf_dict[k]=result
return (max(result_list), attack_perf_dict)
from .PadIsoMetrics import PadIsoMetrics
def __appropriate__(*args):
"""Says object was actually declared here, and not in the import module.
Fixing sphinx warnings of not being able to find classes, when path is
shortened.
Parameters
----------
*args
The objects that you want sphinx to believe that are defined here.
Resolves `Sphinx referencing issues <https//github.com/sphinx-
doc/sphinx/issues/3048>`
"""
for obj in args:
obj.__module__ = __name__
__appropriate__(
PadIsoMetrics,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
"""Calculates PAD ISO compliant metrics based on the score files
"""
import logging
import click
from bob.extension.scripts.click_helper import verbosity_option
from bob.measure.load import split
from bob.measure import (
farfrr, far_threshold, eer_threshold, min_hter_threshold)
logger = logging.getLogger(__name__)
ALL_CRITERIA = ('bpcer20', 'eer', 'min-hter')
def scores_dev_eval(development_scores, evaluation_scores):
dev_neg, dev_pos = split(development_scores)
dev_neg.sort()
dev_pos.sort()
if evaluation_scores is None:
logger.debug("No evaluation scores were provided.")
eval_neg, eval_pos = None, None
else:
eval_neg, eval_pos = split(evaluation_scores)
eval_neg.sort()
eval_pos.sort()
return dev_neg, dev_pos, eval_neg, eval_pos
def report(dev_neg, dev_pos, eval_neg, eval_pos, threshold):
for group, neg, pos in [
('Development', dev_neg, dev_pos),
('Evaluation', eval_neg, eval_pos),
]:
if neg is None:
continue
click.echo("{} set:".format(group))
apcer, bpcer = farfrr(neg, pos, threshold)
click.echo("APCER: {:>5.1f}%".format(apcer * 100))
click.echo("BPCER: {:>5.1f}%".format(bpcer * 100))
click.echo("HTER: {:>5.1f}%".format((apcer + bpcer) * 50))
@click.command(context_settings=dict(token_normalize_func=lambda x: x.lower()))
@click.argument('development_scores')
@click.argument('evaluation_scores', required=False)
@click.option(
'-c', '--criterion', multiple=True, default=['bpcer20'],
type=click.Choice(ALL_CRITERIA), help='The criteria to select. You can '
'select multiple criteria by passing this option multiple times.',
show_default=True)
@verbosity_option()
def metrics(development_scores, evaluation_scores, criterion):
"""PAD ISO compliant metrics.
Reports several metrics based on a selected threshold on the development
set. The thresholds are selected based on different criteria:
bpcer20 When APCER is set to 5%.
eer When BPCER == APCER.
min-hter When HTER is minimum.
Most metrics are according to the ISO/IEC 30107-3:2017 "Information
technology -- Biometric presentation attack detection -- Part 3: Testing
and reporting" standard. The reported metrics are:
APCER: Attack Presentation Classification Error Rate
BPCER: Bona-fide Presentation Classification Error Rate
HTER (non-ISO): Half Total Error Rate ((BPCER+APCER)/2)
Examples:
$ bob pad metrics /path/to/scores-dev
$ bob pad metrics /path/to/scores-dev /path/to/scores-eval
$ bob pad metrics /path/to/scores-{dev,eval} # using bash expansion
$ bob pad metrics -c bpcer20 -c eer /path/to/scores-dev
"""
dev_neg, dev_pos, eval_neg, eval_pos = scores_dev_eval(
development_scores, evaluation_scores)
for method in criterion:
if method == 'bpcer20':
threshold = far_threshold(dev_neg, dev_pos, 0.05, True)
elif method == 'eer':
threshold = eer_threshold(dev_neg, dev_pos, True)
elif method == 'min-hter':
threshold = min_hter_threshold(dev_neg, dev_pos, True)
else:
raise ValueError("Unknown threshold criteria: {}".format(method))
click.echo("\nThreshold of {} selected with the {} criteria".format(
threshold, method))
report(dev_neg, dev_pos, eval_neg, eval_pos, threshold)
"""The main entry for bob.pad (click-based) scripts.
"""
import click
import pkg_resources
from click_plugins import with_plugins
@with_plugins(pkg_resources.iter_entry_points('bob.pad.cli'))
@click.group()
def pad():
"""Entry for bob.pad commands."""
pass
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import pkg_resources
import bob.io.base
import numpy as np
import bob.measure
from bob.pad.base.evaluation import PadIsoMetrics
#def main(arguments):
def main(command_line_parameters=None):
scorefile_devel = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5')
scorefile_test = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_test_replaymobile_IqmScores_SVM.hdf5')
# PAI_labels = [('mattescreen-photo', 'A'), ('mattescreen-video', 'A'), ('print-fixed', 'A'), ('print-hand','A') ]
#rms = PadIsoMetrics.PadIsoMetrics() # PadIsoMetrics(PAI_labels)
rms = PadIsoMetrics()
devel_dict = rms.load_scores_hdf5(scorefile_devel)
test_dict = rms.load_scores_hdf5(scorefile_test)
threshEER_dev, dev_far, dev_frr = rms.eer(devel_dict)
eer_devel = 50.0*(dev_far + dev_frr)
print('threshEER_dev (grandtest): %s' % threshEER_dev)
print('FRR, FAR (devel): %s %s' % (dev_frr, dev_far))
print('EER (%%): %.3f%%' % eer_devel)
test_hter, test_far, test_frr = rms.hter(test_dict, threshEER_dev)
print(" * FAR : %.3f%%" % (100*test_far))
print(" * FRR : %.3f%%" % (100*test_frr))
print(" * HTER: %.3f%%" % (100*test_hter))
test_bpcer = 100.0*rms.bpcer(test_dict, threshEER_dev)
print('BPCER from dict: %.3f%%' % test_bpcer )
bf_scores = test_dict['real'][0]
test_bpcer = 100.0*rms.bpcer(bf_scores, threshEER_dev)
print('BPCER from np-array: %.3f%%' % test_bpcer )
attack_apcer, attack_perf_dict = rms.apcer(test_dict, 'C', threshEER_dev)
print('\nAPCER: %.3f%%' % (100.0*attack_apcer) )
print('Performance for individual PAIs:')
for k in attack_perf_dict.keys():
print('%s: %.3f%%' %(k, 100.0*attack_perf_dict[k]))
'''
'''
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -88,21 +88,13 @@ Finally, the :py:class:`bob.pad.base.algorithm.Algorithm` class provides default
Evaluation
~~~~~~~~~~
This package includes a class `bob.pad.base.evaluation.PadIsoMetrics`, that can be used to compute the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
The most important methods in the class are: ``eer()``, ``hter()``, ``apcer()``, and ``bpcer()``.
The main point to note about these methods is that the input-scores should be organized in a dictionary.
One dictionary should be created for each group ('devel', 'test', etc.).
The keys of the dictionary refer to the presentation-type ('bona-fide' or some presentation-attack-instrument (PAI)).
The value associated with each key is a tuple, containing either one or two elements.
For each key corresponding to a PAI, the value should be a tuple of 2 elements: (scores, attack_potential), where 'scores' is a 1D numpy-array of scores corresponding to presentations of that PAI, and 'attack_potential' is a single letter, either 'A', or 'B', or 'C', signifying the attack-potential of the PAI.
For bona-fide presentations, no attack-potential is defined. Therefore, for a key representing bona-fide presentations, the value will be a tuple consisting of only one element: a 1D numpy-array of scores.
Consequently, a key for which the value is a tuple of length 1 is interpretted as representing a bona-fide presentation.
This package includes a script `bob pad metrics`, that can be used to compute
the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
To learn more about it run:
The methods ``eer()`` and ``hter()`` call the corresponding functions in `bob.measure` to compute the relevant thresholds and performance-measures, based on the input score-dictionary.
.. code-block:: sh
The class also provides methods for saving the score-dictionaries in a hdf5-file (``save_scores_hdf5()``), and for loading such a file (``load_scores_hdf5()``).
For an example of how to use this class to evaluate a score-distribution, see the code provided in file `bob.pad.base/bob/pad/base/test/test_PadIsoMetrics.py/test/test_PadIsoMetrics.py`.
$ bob pad metrics --help
Implemented Tools
......
......@@ -37,11 +37,6 @@ Algorithms
.. automodule:: bob.pad.base.algorithm
Evaluation
~~~~~~~~~~
.. automodule:: bob.pad.base.evaluation
Databases
---------
......
......@@ -35,12 +35,6 @@ Scoring
.. autosummary::
bob.bio.base.tools.compute_scores
Evaluation
~~~~~~~~~~
.. autosummary::
bob.pad.base.evaluation.PadIsoMetrics
Details
-------
......
......@@ -131,6 +131,17 @@ setup(
'bob.pad.grid': [
'demanding = bob.pad.base.config.grid.demanding:grid',
],
# main entry for bob pad cli
'bob.cli': [
'pad = bob.pad.base.script.pad:pad',
],
# bob pad scripts
'bob.pad.cli': [
'metrics = bob.pad.base.script.metrics:metrics',
],
},
# Classifiers are important if you plan to distribute this package through
......
1.0.9a0
\ No newline at end of file
2.0.0a0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment