Skip to content
Snippets Groups Projects
Commit 6b07fba1 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV
Browse files

Merge branch 'iso_metrics' into 'master'

Iso metrics

This branch includes code for computing ISO metrics APCER and BPCER. Please merge it to master.

See merge request !6
parents 1af7a507 f3e6ae8d
No related branches found
No related tags found
1 merge request!6Iso metrics
Pipeline #
from . import database
from . import algorithm
from . import tools
from . import evaluation
from . import script
from . import test
......
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import bob.io.base
import numpy as np
import bob.measure
class PadIsoMetrics():
def __init__(self):
""" constructor. """
self.real_name = 'bonafide' #real_presentation_name #'real'
self.attack_name = 'attack' #attack_presentation_name #'attack'
def save_scores_hdf5(self, outfile, scores_dict):
""" saves input scores_dict dictionary in a hdf5 formatted file"""
h5out = bob.io.base.HDF5File(outfile, "w")
for p in scores_dict.keys():
if len(scores_dict[p]) == 1: # real_set
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('presentation', self.real_name, p)
else:
#write attacks
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('attack_potential', scores_dict[p][1], p)
h5out.set_attribute('presentation', self.attack_name, p)
del h5out
def load_scores_hdf5(self, infile):
""" loads a hdf5 file, and trys to construct a dictionary of scores. Returns the score-dictionary."""
h5in = bob.io.base.HDF5File(infile, "r")
scores_dict = {}
h5in.cd('/')
class_labels = h5in.keys(relative='True')
for p in class_labels:
scores = h5in.get(p)
attrs = h5in.get_attributes(p)
if len(attrs) == 2: #then the two elements are 'presentation' and 'attack_potential'
ap = attrs['attack_potential']
scores_dict[p] = [scores, ap]
else:
scores_dict[p] = [scores]
del h5in
return scores_dict
def eer(self, scores_dict):
""" computes EER threshold using the scores in the supplied dictionary
Input:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
Return:
tuple of three floats: (eer_threshold, far, frr). These are computed using functions from bob.measure.
"""
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no development score-set provided for computing EER'
for k in scores_dict.keys():
keyvalue = scores_dict[k]
if len(keyvalue)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(keyvalue)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
self.threshEER_dev = bob.measure.eer_threshold(attack_scores, real_scores)
self.dev_far, self.dev_frr = bob.measure.farfrr(attack_scores, real_scores, self.threshEER_dev)
# self.eer_devel = 50.0*(self.dev_far + self.dev_frr)
# print('eer()::threshEER: %s' % self.threshEER_dev)
return (self.threshEER_dev, self.dev_far, self.dev_frr)
def hter(self, scores_dict, score_threshold):
""" computes HTER on test-set scores, using the supplied score-threshold.
Inputs:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
score_threshold: (float) value to be used for thresholding scores.
Return:
tuple of three floats: (hter, far, frr). These are computed using functions from bob.measure.
"""
assert ((score_threshold is not None) and isinstance(score_threshold, (int, long, float)) ), 'input score_threshold should be a number (float or integer).'
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no test score-set available for computing HTER'
for k in scores_dict.keys():
key_value = scores_dict[k]
if len(key_value)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(key_value)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
test_far, test_frr = bob.measure.farfrr(attack_scores, real_scores, score_threshold)
# test_good_neg = bob.measure.correctly_classified_negatives(attack_scores, score_threshold).sum()
# test_good_pos = bob.measure.correctly_classified_positives(real_scores, score_threshold).sum()
hter = (test_far+test_frr)/2.0
return (hter, test_far, test_frr)
def _check_attack_potential(self, attack_potential):
""" For now, we assume three levels of attack-potential: 'C'>'B'>'A' """
if attack_potential is None:
attack_potential = 'C'
if attack_potential not in ['A', 'B', 'C']:
attack_potential = 'C'
return attack_potential
def bpcer(self, scores, score_threshold=0.0):
""" computes BPCER on test-set scores, using either the supplied score-threshold,
or the threshold computed from the EER of the development set
Inputs:
scores: a 1D numpy-array of scores corresponding to genuine (bona-fide) presentations.
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Return:
floating-point number representing the bpcer computed for the input score-set
"""
bonafide_scores = None
if isinstance(scores, dict):
#extract 'real' scores from dictionary
for k in scores.keys():
key_value = scores[k]
if len(key_value) == 1:
bonafide_scores = key_value[0]
else:
#verify that scores is a 1D numpy array
if isinstance(scores, np.ndarray) and len(scores.shape)==1:
bonafide_scores = scores
assert bonafide_scores is not None, 'input scores does not contain bona-fide scores, for computing BPCER.'
assert isinstance(score_threshold, (int, long, float)), 'input score_threshold should be a number (float or integer).'
correct_scores = bonafide_scores[bonafide_scores<score_threshold].shape[0]
return correct_scores/float(bonafide_scores.shape[0])
def apcer(self, scores_dict, attack_potential='C', score_threshold=0.0):
"""computes APCER as defined in ISO standard. For now, we assume three levels of attack-potential: 'C'>'B'>'A'
Inputs:
scores_dict: a dictionary where each key corresponds to a specific PAI (presentation-attack-instrument)
Keys corresponding to PAIs will have as value a list of 2 elements:
1st element: a 1D numpy-array of scores
2nd element: a single letter 'A', 'B', or 'C', specifying the attack-potential of the PAI.
attack_potential: a letter 'A', 'B', or 'C', specifying the attack_potential at which the APCER is to be computed
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Returns:
tuple consisting of 2 elements:
1st element: apcer at specified attack-potential
2nd element: dictionary of hter of individual PAIs that have attack-potential at or below input-parameter attack_potential.
"""
attack_potential = self._check_attack_potential( attack_potential)
attack_perf_dict = {} #dictionary to store the hter for each attack-type that is at or below specified attack-potential
result_list = []
for k in scores_dict.keys():
if len(scores_dict[k]) == 2: #consider only the keys where the value is a list of 2 elements
if scores_dict[k][1] <= attack_potential:
scores = scores_dict[k][0]
result = (scores[scores>=score_threshold].shape[0])/float(scores.shape[0])
result_list.append(result)
attack_perf_dict[k]=result
return (max(result_list), attack_perf_dict)
from .PadIsoMetrics import PadIsoMetrics
# to fix sphinx warnings of not able to find classes, when path is shortened
PadIsoMetrics.__module__ = "bob.pad.base.evaluation"
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
File added
File added
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import pkg_resources
import bob.io.base
import numpy as np
import bob.measure
from bob.pad.base.evaluation import PadIsoMetrics
#def main(arguments):
def main(command_line_parameters=None):
scorefile_devel = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5')
scorefile_test = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_test_replaymobile_IqmScores_SVM.hdf5')
# PAI_labels = [('mattescreen-photo', 'A'), ('mattescreen-video', 'A'), ('print-fixed', 'A'), ('print-hand','A') ]
#rms = PadIsoMetrics.PadIsoMetrics() # PadIsoMetrics(PAI_labels)
rms = PadIsoMetrics()
devel_dict = rms.load_scores_hdf5(scorefile_devel)
test_dict = rms.load_scores_hdf5(scorefile_test)
threshEER_dev, dev_far, dev_frr = rms.eer(devel_dict)
eer_devel = 50.0*(dev_far + dev_frr)
print('threshEER_dev (grandtest): %s' % threshEER_dev)
print('FRR, FAR (devel): %s %s' % (dev_frr, dev_far))
print('EER (%%): %.3f%%' % eer_devel)
test_hter, test_far, test_frr = rms.hter(test_dict, threshEER_dev)
print(" * FAR : %.3f%%" % (100*test_far))
print(" * FRR : %.3f%%" % (100*test_frr))
print(" * HTER: %.3f%%" % (100*test_hter))
test_bpcer = 100.0*rms.bpcer(test_dict, threshEER_dev)
print('BPCER from dict: %.3f%%' % test_bpcer )
bf_scores = test_dict['real'][0]
test_bpcer = 100.0*rms.bpcer(bf_scores, threshEER_dev)
print('BPCER from np-array: %.3f%%' % test_bpcer )
attack_apcer, attack_perf_dict = rms.apcer(test_dict, 'C', threshEER_dev)
print('\nAPCER: %.3f%%' % (100.0*attack_apcer) )
print('Performance for individual PAIs:')
for k in attack_perf_dict.keys():
print('%s: %.3f%%' %(k, 100.0*attack_perf_dict[k]))
'''
'''
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -39,6 +39,7 @@ nitpicky = True
# Ignores stuff we can't easily resolve on other project's sphinx manuals
nitpick_ignore = []
keep_warnings = True
# Allows the user to override warnings from a separate file
if os.path.exists('nitpick-exceptions.txt'):
......
......@@ -86,6 +86,24 @@ Finally, the :py:class:`bob.pad.base.algorithm.Algorithm` class provides default
* ``score_for_multiple_projections(self, toscore)``: In case your object store several features or scores, **call** this function to compute the average (or min, max, ...) of the scores.
Evaluation
~~~~~~~~~~
This package includes a class `bob.pad.base.evaluation.PadIsoMetrics`, that can be used to compute the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
The most important methods in the class are: ``eer()``, ``hter()``, ``apcer()``, and ``bpcer()``.
The main point to note about these methods is that the input-scores should be organized in a dictionary.
One dictionary should be created for each group ('devel', 'test', etc.).
The keys of the dictionary refer to the presentation-type ('bona-fide' or some presentation-attack-instrument (PAI)).
The value associated with each key is a tuple, containing either one or two elements.
For each key corresponding to a PAI, the value should be a tuple of 2 elements: (scores, attack_potential), where 'scores' is a 1D numpy-array of scores corresponding to presentations of that PAI, and 'attack_potential' is a single letter, either 'A', or 'B', or 'C', signifying the attack-potential of the PAI.
For bona-fide presentations, no attack-potential is defined. Therefore, for a key representing bona-fide presentations, the value will be a tuple consisting of only one element: a 1D numpy-array of scores.
Consequently, a key for which the value is a tuple of length 1 is interpretted as representing a bona-fide presentation.
The methods ``eer()`` and ``hter()`` call the corresponding functions in `bob.measure` to compute the relevant thresholds and performance-measures, based on the input score-dictionary.
The class also provides methods for saving the score-dictionaries in a hdf5-file (``save_scores_hdf5()``), and for loading such a file (``load_scores_hdf5()``).
For an example of how to use this class to evaluate a score-distribution, see the code provided in file `bob.pad.base/bob/pad/base/test/test_PadIsoMetrics.py/test/test_PadIsoMetrics.py`.
Implemented Tools
-----------------
......
......@@ -44,6 +44,12 @@ Algorithms
.. automodule:: bob.pad.base.algorithm
Evaluation
~~~~~~~~~~
.. automodule:: bob.pad.base.evaluation
Databases
---------
......
......@@ -35,6 +35,12 @@ Scoring
.. autosummary::
bob.bio.base.tools.compute_scores
Evaluation
~~~~~~~~~~
.. autosummary::
bob.pad.base.evaluation.PadIsoMetrics
Details
-------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment