Commit 6b07fba1 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV

Merge branch 'iso_metrics' into 'master'

Iso metrics

This branch includes code for computing ISO metrics APCER and BPCER. Please merge it to master.

See merge request !6
parents 1af7a507 f3e6ae8d
Pipeline #5685 passed with stages
in 10 minutes and 45 seconds
from . import database
from . import algorithm
from . import tools
from . import evaluation
from . import script
from . import test
......
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import bob.io.base
import numpy as np
import bob.measure
class PadIsoMetrics():
def __init__(self):
""" constructor. """
self.real_name = 'bonafide' #real_presentation_name #'real'
self.attack_name = 'attack' #attack_presentation_name #'attack'
def save_scores_hdf5(self, outfile, scores_dict):
""" saves input scores_dict dictionary in a hdf5 formatted file"""
h5out = bob.io.base.HDF5File(outfile, "w")
for p in scores_dict.keys():
if len(scores_dict[p]) == 1: # real_set
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('presentation', self.real_name, p)
else:
#write attacks
h5out.set(p, scores_dict[p][0] )
h5out.set_attribute('attack_potential', scores_dict[p][1], p)
h5out.set_attribute('presentation', self.attack_name, p)
del h5out
def load_scores_hdf5(self, infile):
""" loads a hdf5 file, and trys to construct a dictionary of scores. Returns the score-dictionary."""
h5in = bob.io.base.HDF5File(infile, "r")
scores_dict = {}
h5in.cd('/')
class_labels = h5in.keys(relative='True')
for p in class_labels:
scores = h5in.get(p)
attrs = h5in.get_attributes(p)
if len(attrs) == 2: #then the two elements are 'presentation' and 'attack_potential'
ap = attrs['attack_potential']
scores_dict[p] = [scores, ap]
else:
scores_dict[p] = [scores]
del h5in
return scores_dict
def eer(self, scores_dict):
""" computes EER threshold using the scores in the supplied dictionary
Input:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
Return:
tuple of three floats: (eer_threshold, far, frr). These are computed using functions from bob.measure.
"""
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no development score-set provided for computing EER'
for k in scores_dict.keys():
keyvalue = scores_dict[k]
if len(keyvalue)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(keyvalue)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
self.threshEER_dev = bob.measure.eer_threshold(attack_scores, real_scores)
self.dev_far, self.dev_frr = bob.measure.farfrr(attack_scores, real_scores, self.threshEER_dev)
# self.eer_devel = 50.0*(self.dev_far + self.dev_frr)
# print('eer()::threshEER: %s' % self.threshEER_dev)
return (self.threshEER_dev, self.dev_far, self.dev_frr)
def hter(self, scores_dict, score_threshold):
""" computes HTER on test-set scores, using the supplied score-threshold.
Inputs:
scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type),
and the corresponding value is a tuple: (scores, attack_potential).
'scores' should be a 1D numpy-array of floats containing scores
'attack_potential' should be one of the 3 letters 'A', 'B', or 'C')
Scores for 'real' presentations will not have an associated 'attack_potential',
so, if the value of a key is a tuple of length 1, the key-value pair is assumed
to represent a 'real'-presentation set.
score_threshold: (float) value to be used for thresholding scores.
Return:
tuple of three floats: (hter, far, frr). These are computed using functions from bob.measure.
"""
assert ((score_threshold is not None) and isinstance(score_threshold, (int, long, float)) ), 'input score_threshold should be a number (float or integer).'
real_scores = None
attack_scores = None
assert scores_dict is not None, 'no test score-set available for computing HTER'
for k in scores_dict.keys():
key_value = scores_dict[k]
if len(key_value)==2:
if attack_scores is None:
attack_scores = scores_dict[k][0]
else:
attack_scores = np.concatenate((attack_scores, scores_dict[k][0]))
else:
if len(key_value)==1:
real_scores = scores_dict[k][0]
assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER'
assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.'
test_far, test_frr = bob.measure.farfrr(attack_scores, real_scores, score_threshold)
# test_good_neg = bob.measure.correctly_classified_negatives(attack_scores, score_threshold).sum()
# test_good_pos = bob.measure.correctly_classified_positives(real_scores, score_threshold).sum()
hter = (test_far+test_frr)/2.0
return (hter, test_far, test_frr)
def _check_attack_potential(self, attack_potential):
""" For now, we assume three levels of attack-potential: 'C'>'B'>'A' """
if attack_potential is None:
attack_potential = 'C'
if attack_potential not in ['A', 'B', 'C']:
attack_potential = 'C'
return attack_potential
def bpcer(self, scores, score_threshold=0.0):
""" computes BPCER on test-set scores, using either the supplied score-threshold,
or the threshold computed from the EER of the development set
Inputs:
scores: a 1D numpy-array of scores corresponding to genuine (bona-fide) presentations.
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Return:
floating-point number representing the bpcer computed for the input score-set
"""
bonafide_scores = None
if isinstance(scores, dict):
#extract 'real' scores from dictionary
for k in scores.keys():
key_value = scores[k]
if len(key_value) == 1:
bonafide_scores = key_value[0]
else:
#verify that scores is a 1D numpy array
if isinstance(scores, np.ndarray) and len(scores.shape)==1:
bonafide_scores = scores
assert bonafide_scores is not None, 'input scores does not contain bona-fide scores, for computing BPCER.'
assert isinstance(score_threshold, (int, long, float)), 'input score_threshold should be a number (float or integer).'
correct_scores = bonafide_scores[bonafide_scores<score_threshold].shape[0]
return correct_scores/float(bonafide_scores.shape[0])
def apcer(self, scores_dict, attack_potential='C', score_threshold=0.0):
"""computes APCER as defined in ISO standard. For now, we assume three levels of attack-potential: 'C'>'B'>'A'
Inputs:
scores_dict: a dictionary where each key corresponds to a specific PAI (presentation-attack-instrument)
Keys corresponding to PAIs will have as value a list of 2 elements:
1st element: a 1D numpy-array of scores
2nd element: a single letter 'A', 'B', or 'C', specifying the attack-potential of the PAI.
attack_potential: a letter 'A', 'B', or 'C', specifying the attack_potential at which the APCER is to be computed
score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject.
Returns:
tuple consisting of 2 elements:
1st element: apcer at specified attack-potential
2nd element: dictionary of hter of individual PAIs that have attack-potential at or below input-parameter attack_potential.
"""
attack_potential = self._check_attack_potential( attack_potential)
attack_perf_dict = {} #dictionary to store the hter for each attack-type that is at or below specified attack-potential
result_list = []
for k in scores_dict.keys():
if len(scores_dict[k]) == 2: #consider only the keys where the value is a list of 2 elements
if scores_dict[k][1] <= attack_potential:
scores = scores_dict[k][0]
result = (scores[scores>=score_threshold].shape[0])/float(scores.shape[0])
result_list.append(result)
attack_perf_dict[k]=result
return (max(result_list), attack_perf_dict)
from .PadIsoMetrics import PadIsoMetrics
# to fix sphinx warnings of not able to find classes, when path is shortened
PadIsoMetrics.__module__ = "bob.pad.base.evaluation"
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
'''
Created on 16 Nov 2016
@author: sbhatta
'''
import sys, os
import pkg_resources
import bob.io.base
import numpy as np
import bob.measure
from bob.pad.base.evaluation import PadIsoMetrics
#def main(arguments):
def main(command_line_parameters=None):
scorefile_devel = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5')
scorefile_test = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_test_replaymobile_IqmScores_SVM.hdf5')
# PAI_labels = [('mattescreen-photo', 'A'), ('mattescreen-video', 'A'), ('print-fixed', 'A'), ('print-hand','A') ]
#rms = PadIsoMetrics.PadIsoMetrics() # PadIsoMetrics(PAI_labels)
rms = PadIsoMetrics()
devel_dict = rms.load_scores_hdf5(scorefile_devel)
test_dict = rms.load_scores_hdf5(scorefile_test)
threshEER_dev, dev_far, dev_frr = rms.eer(devel_dict)
eer_devel = 50.0*(dev_far + dev_frr)
print('threshEER_dev (grandtest): %s' % threshEER_dev)
print('FRR, FAR (devel): %s %s' % (dev_frr, dev_far))
print('EER (%%): %.3f%%' % eer_devel)
test_hter, test_far, test_frr = rms.hter(test_dict, threshEER_dev)
print(" * FAR : %.3f%%" % (100*test_far))
print(" * FRR : %.3f%%" % (100*test_frr))
print(" * HTER: %.3f%%" % (100*test_hter))
test_bpcer = 100.0*rms.bpcer(test_dict, threshEER_dev)
print('BPCER from dict: %.3f%%' % test_bpcer )
bf_scores = test_dict['real'][0]
test_bpcer = 100.0*rms.bpcer(bf_scores, threshEER_dev)
print('BPCER from np-array: %.3f%%' % test_bpcer )
attack_apcer, attack_perf_dict = rms.apcer(test_dict, 'C', threshEER_dev)
print('\nAPCER: %.3f%%' % (100.0*attack_apcer) )
print('Performance for individual PAIs:')
for k in attack_perf_dict.keys():
print('%s: %.3f%%' %(k, 100.0*attack_perf_dict[k]))
'''
'''
if __name__ == '__main__':
main(sys.argv[1:])
......@@ -39,6 +39,7 @@ nitpicky = True
# Ignores stuff we can't easily resolve on other project's sphinx manuals
nitpick_ignore = []
keep_warnings = True
# Allows the user to override warnings from a separate file
if os.path.exists('nitpick-exceptions.txt'):
......@@ -261,4 +262,4 @@ def member_function_test(app, what, name, obj, skip, options):
return False
def setup(app):
app.connect('autodoc-skip-member', member_function_test)
\ No newline at end of file
app.connect('autodoc-skip-member', member_function_test)
......@@ -86,6 +86,24 @@ Finally, the :py:class:`bob.pad.base.algorithm.Algorithm` class provides default
* ``score_for_multiple_projections(self, toscore)``: In case your object store several features or scores, **call** this function to compute the average (or min, max, ...) of the scores.
Evaluation
~~~~~~~~~~
This package includes a class `bob.pad.base.evaluation.PadIsoMetrics`, that can be used to compute the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard.
The most important methods in the class are: ``eer()``, ``hter()``, ``apcer()``, and ``bpcer()``.
The main point to note about these methods is that the input-scores should be organized in a dictionary.
One dictionary should be created for each group ('devel', 'test', etc.).
The keys of the dictionary refer to the presentation-type ('bona-fide' or some presentation-attack-instrument (PAI)).
The value associated with each key is a tuple, containing either one or two elements.
For each key corresponding to a PAI, the value should be a tuple of 2 elements: (scores, attack_potential), where 'scores' is a 1D numpy-array of scores corresponding to presentations of that PAI, and 'attack_potential' is a single letter, either 'A', or 'B', or 'C', signifying the attack-potential of the PAI.
For bona-fide presentations, no attack-potential is defined. Therefore, for a key representing bona-fide presentations, the value will be a tuple consisting of only one element: a 1D numpy-array of scores.
Consequently, a key for which the value is a tuple of length 1 is interpretted as representing a bona-fide presentation.
The methods ``eer()`` and ``hter()`` call the corresponding functions in `bob.measure` to compute the relevant thresholds and performance-measures, based on the input score-dictionary.
The class also provides methods for saving the score-dictionaries in a hdf5-file (``save_scores_hdf5()``), and for loading such a file (``load_scores_hdf5()``).
For an example of how to use this class to evaluate a score-distribution, see the code provided in file `bob.pad.base/bob/pad/base/test/test_PadIsoMetrics.py/test/test_PadIsoMetrics.py`.
Implemented Tools
-----------------
......
......@@ -44,6 +44,12 @@ Algorithms
.. automodule:: bob.pad.base.algorithm
Evaluation
~~~~~~~~~~
.. automodule:: bob.pad.base.evaluation
Databases
---------
......
......@@ -35,6 +35,12 @@ Scoring
.. autosummary::
bob.bio.base.tools.compute_scores
Evaluation
~~~~~~~~~~
.. autosummary::
bob.pad.base.evaluation.PadIsoMetrics
Details
-------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment