diff --git a/bob/pad/base/__init__.py b/bob/pad/base/__init__.py index 722c959bb7670ba1fe57e8e79270378841143816..c4f2695a24c5523e775a5a28c1570b31b8681fa0 100644 --- a/bob/pad/base/__init__.py +++ b/bob/pad/base/__init__.py @@ -1,6 +1,7 @@ from . import database from . import algorithm from . import tools +from . import evaluation from . import script from . import test diff --git a/bob/pad/base/evaluation/PadIsoMetrics.py b/bob/pad/base/evaluation/PadIsoMetrics.py new file mode 100644 index 0000000000000000000000000000000000000000..e4b0544d06bccf5797f2bb7435cdfb29bc7c70b4 --- /dev/null +++ b/bob/pad/base/evaluation/PadIsoMetrics.py @@ -0,0 +1,212 @@ +''' +Created on 16 Nov 2016 + +@author: sbhatta +''' + +import sys, os +import bob.io.base +import numpy as np +import bob.measure + +class PadIsoMetrics(): + + def __init__(self): + """ constructor. """ + + self.real_name = 'bonafide' #real_presentation_name #'real' + self.attack_name = 'attack' #attack_presentation_name #'attack' + + def save_scores_hdf5(self, outfile, scores_dict): + """ saves input scores_dict dictionary in a hdf5 formatted file""" + + h5out = bob.io.base.HDF5File(outfile, "w") + + for p in scores_dict.keys(): + if len(scores_dict[p]) == 1: # real_set + h5out.set(p, scores_dict[p][0] ) + h5out.set_attribute('presentation', self.real_name, p) + else: + #write attacks + h5out.set(p, scores_dict[p][0] ) + h5out.set_attribute('attack_potential', scores_dict[p][1], p) + h5out.set_attribute('presentation', self.attack_name, p) + + del h5out + + def load_scores_hdf5(self, infile): + """ loads a hdf5 file, and trys to construct a dictionary of scores. Returns the score-dictionary.""" + + h5in = bob.io.base.HDF5File(infile, "r") + + scores_dict = {} + h5in.cd('/') + class_labels = h5in.keys(relative='True') + for p in class_labels: + scores = h5in.get(p) + attrs = h5in.get_attributes(p) + if len(attrs) == 2: #then the two elements are 'presentation' and 'attack_potential' + ap = attrs['attack_potential'] + scores_dict[p] = [scores, ap] + else: + scores_dict[p] = [scores] + + del h5in + return scores_dict + + + def eer(self, scores_dict): + """ computes EER threshold using the scores in the supplied dictionary + Input: + scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type), + and the corresponding value is a tuple: (scores, attack_potential). + 'scores' should be a 1D numpy-array of floats containing scores + 'attack_potential' should be one of the 3 letters 'A', 'B', or 'C') + Scores for 'real' presentations will not have an associated 'attack_potential', + so, if the value of a key is a tuple of length 1, the key-value pair is assumed + to represent a 'real'-presentation set. + Return: + tuple of three floats: (eer_threshold, far, frr). These are computed using functions from bob.measure. + """ + + real_scores = None + attack_scores = None + assert scores_dict is not None, 'no development score-set provided for computing EER' + + for k in scores_dict.keys(): + keyvalue = scores_dict[k] + if len(keyvalue)==2: + if attack_scores is None: + attack_scores = scores_dict[k][0] + else: + attack_scores = np.concatenate((attack_scores, scores_dict[k][0])) + else: + if len(keyvalue)==1: + real_scores = scores_dict[k][0] + + assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER' + assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.' + self.threshEER_dev = bob.measure.eer_threshold(attack_scores, real_scores) + + self.dev_far, self.dev_frr = bob.measure.farfrr(attack_scores, real_scores, self.threshEER_dev) +# self.eer_devel = 50.0*(self.dev_far + self.dev_frr) +# print('eer()::threshEER: %s' % self.threshEER_dev) + return (self.threshEER_dev, self.dev_far, self.dev_frr) + + + def hter(self, scores_dict, score_threshold): + """ computes HTER on test-set scores, using the supplied score-threshold. + Inputs: + scores_dict: dictionary where each key is the name of the presentation ('real' or one attack-type), + and the corresponding value is a tuple: (scores, attack_potential). + 'scores' should be a 1D numpy-array of floats containing scores + 'attack_potential' should be one of the 3 letters 'A', 'B', or 'C') + Scores for 'real' presentations will not have an associated 'attack_potential', + so, if the value of a key is a tuple of length 1, the key-value pair is assumed + to represent a 'real'-presentation set. + score_threshold: (float) value to be used for thresholding scores. + Return: + tuple of three floats: (hter, far, frr). These are computed using functions from bob.measure. + """ + + assert ((score_threshold is not None) and isinstance(score_threshold, (int, long, float)) ), 'input score_threshold should be a number (float or integer).' + + real_scores = None + attack_scores = None + assert scores_dict is not None, 'no test score-set available for computing HTER' + + for k in scores_dict.keys(): + key_value = scores_dict[k] + if len(key_value)==2: + if attack_scores is None: + attack_scores = scores_dict[k][0] + else: + attack_scores = np.concatenate((attack_scores, scores_dict[k][0])) + else: + if len(key_value)==1: + real_scores = scores_dict[k][0] + + assert (attack_scores is not None), 'Empty attack-scores list. Cannot compute EER' + assert (real_scores is not None), 'Empty real-scores list. Cannot compute EER.' + test_far, test_frr = bob.measure.farfrr(attack_scores, real_scores, score_threshold) +# test_good_neg = bob.measure.correctly_classified_negatives(attack_scores, score_threshold).sum() +# test_good_pos = bob.measure.correctly_classified_positives(real_scores, score_threshold).sum() + hter = (test_far+test_frr)/2.0 + + return (hter, test_far, test_frr) + + + def _check_attack_potential(self, attack_potential): + """ For now, we assume three levels of attack-potential: 'C'>'B'>'A' """ + + if attack_potential is None: + attack_potential = 'C' + if attack_potential not in ['A', 'B', 'C']: + attack_potential = 'C' + + return attack_potential + + + def bpcer(self, scores, score_threshold=0.0): + """ computes BPCER on test-set scores, using either the supplied score-threshold, + or the threshold computed from the EER of the development set + Inputs: + scores: a 1D numpy-array of scores corresponding to genuine (bona-fide) presentations. + score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject. + + Return: + floating-point number representing the bpcer computed for the input score-set + """ + + bonafide_scores = None + if isinstance(scores, dict): + #extract 'real' scores from dictionary + for k in scores.keys(): + key_value = scores[k] + if len(key_value) == 1: + bonafide_scores = key_value[0] + else: + #verify that scores is a 1D numpy array + if isinstance(scores, np.ndarray) and len(scores.shape)==1: + bonafide_scores = scores + + assert bonafide_scores is not None, 'input scores does not contain bona-fide scores, for computing BPCER.' + assert isinstance(score_threshold, (int, long, float)), 'input score_threshold should be a number (float or integer).' + + correct_scores = bonafide_scores[bonafide_scores<score_threshold].shape[0] + + return correct_scores/float(bonafide_scores.shape[0]) + + + def apcer(self, scores_dict, attack_potential='C', score_threshold=0.0): + """computes APCER as defined in ISO standard. For now, we assume three levels of attack-potential: 'C'>'B'>'A' + + Inputs: + scores_dict: a dictionary where each key corresponds to a specific PAI (presentation-attack-instrument) + Keys corresponding to PAIs will have as value a list of 2 elements: + 1st element: a 1D numpy-array of scores + 2nd element: a single letter 'A', 'B', or 'C', specifying the attack-potential of the PAI. + + attack_potential: a letter 'A', 'B', or 'C', specifying the attack_potential at which the APCER is to be computed + score_threshold: a floating point number specifying the score-threshold to be used for deciding accept/reject. + + Returns: + tuple consisting of 2 elements: + 1st element: apcer at specified attack-potential + 2nd element: dictionary of hter of individual PAIs that have attack-potential at or below input-parameter attack_potential. + """ + + attack_potential = self._check_attack_potential( attack_potential) + + attack_perf_dict = {} #dictionary to store the hter for each attack-type that is at or below specified attack-potential + result_list = [] + for k in scores_dict.keys(): + if len(scores_dict[k]) == 2: #consider only the keys where the value is a list of 2 elements + if scores_dict[k][1] <= attack_potential: + scores = scores_dict[k][0] + result = (scores[scores>=score_threshold].shape[0])/float(scores.shape[0]) + result_list.append(result) + attack_perf_dict[k]=result + + return (max(result_list), attack_perf_dict) + diff --git a/bob/pad/base/evaluation/__init__.py b/bob/pad/base/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be2c91db240bbd32163443621c27a7cf9d9c4beb --- /dev/null +++ b/bob/pad/base/evaluation/__init__.py @@ -0,0 +1,7 @@ +from .PadIsoMetrics import PadIsoMetrics + +# to fix sphinx warnings of not able to find classes, when path is shortened +PadIsoMetrics.__module__ = "bob.pad.base.evaluation" +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] + diff --git a/bob/pad/base/test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5 b/bob/pad/base/test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..ca47cc21cc9da401223c42079457dba58173a263 Binary files /dev/null and b/bob/pad/base/test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5 differ diff --git a/bob/pad/base/test/data/pad_test_replaymobile_IqmScores_SVM.hdf5 b/bob/pad/base/test/data/pad_test_replaymobile_IqmScores_SVM.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..5eda8360405750b027d1a9fbaaa50de866dcff85 Binary files /dev/null and b/bob/pad/base/test/data/pad_test_replaymobile_IqmScores_SVM.hdf5 differ diff --git a/bob/pad/base/test/test_PadIsoMetrics.py b/bob/pad/base/test/test_PadIsoMetrics.py new file mode 100644 index 0000000000000000000000000000000000000000..a167fb0b2aff0185b45857cd9ff23145fb3df2e7 --- /dev/null +++ b/bob/pad/base/test/test_PadIsoMetrics.py @@ -0,0 +1,58 @@ +''' +Created on 16 Nov 2016 + +@author: sbhatta +''' + +import sys, os +import pkg_resources +import bob.io.base +import numpy as np +import bob.measure +from bob.pad.base.evaluation import PadIsoMetrics + + +#def main(arguments): +def main(command_line_parameters=None): + + scorefile_devel = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_devel_replaymobile_IqmScores_SVM.hdf5') + scorefile_test = pkg_resources.resource_filename('bob.pad.base', 'test/data/pad_test_replaymobile_IqmScores_SVM.hdf5') + +# PAI_labels = [('mattescreen-photo', 'A'), ('mattescreen-video', 'A'), ('print-fixed', 'A'), ('print-hand','A') ] + + #rms = PadIsoMetrics.PadIsoMetrics() # PadIsoMetrics(PAI_labels) + rms = PadIsoMetrics() + + devel_dict = rms.load_scores_hdf5(scorefile_devel) + test_dict = rms.load_scores_hdf5(scorefile_test) + + threshEER_dev, dev_far, dev_frr = rms.eer(devel_dict) + + eer_devel = 50.0*(dev_far + dev_frr) + print('threshEER_dev (grandtest): %s' % threshEER_dev) + print('FRR, FAR (devel): %s %s' % (dev_frr, dev_far)) + print('EER (%%): %.3f%%' % eer_devel) + + test_hter, test_far, test_frr = rms.hter(test_dict, threshEER_dev) + print(" * FAR : %.3f%%" % (100*test_far)) + print(" * FRR : %.3f%%" % (100*test_frr)) + print(" * HTER: %.3f%%" % (100*test_hter)) + + test_bpcer = 100.0*rms.bpcer(test_dict, threshEER_dev) + print('BPCER from dict: %.3f%%' % test_bpcer ) + + bf_scores = test_dict['real'][0] + test_bpcer = 100.0*rms.bpcer(bf_scores, threshEER_dev) + print('BPCER from np-array: %.3f%%' % test_bpcer ) + + attack_apcer, attack_perf_dict = rms.apcer(test_dict, 'C', threshEER_dev) + print('\nAPCER: %.3f%%' % (100.0*attack_apcer) ) + print('Performance for individual PAIs:') + for k in attack_perf_dict.keys(): + print('%s: %.3f%%' %(k, 100.0*attack_perf_dict[k])) + + +''' +''' +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/doc/conf.py b/doc/conf.py index 5e0205c46f71d8c92ce1fdfc0d8c3851baac9e09..a00bb35b5cd00d91fd0c9d5c9dc189be428575ce 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -39,6 +39,7 @@ nitpicky = True # Ignores stuff we can't easily resolve on other project's sphinx manuals nitpick_ignore = [] +keep_warnings = True # Allows the user to override warnings from a separate file if os.path.exists('nitpick-exceptions.txt'): @@ -261,4 +262,4 @@ def member_function_test(app, what, name, obj, skip, options): return False def setup(app): - app.connect('autodoc-skip-member', member_function_test) \ No newline at end of file + app.connect('autodoc-skip-member', member_function_test) diff --git a/doc/implementation.rst b/doc/implementation.rst index 599ec462531fb01f54d323c4bb470acfa5034073..1b1a3c6a57320ef4de5dab601fe0ed879bba11e5 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -86,6 +86,24 @@ Finally, the :py:class:`bob.pad.base.algorithm.Algorithm` class provides default * ``score_for_multiple_projections(self, toscore)``: In case your object store several features or scores, **call** this function to compute the average (or min, max, ...) of the scores. +Evaluation +~~~~~~~~~~ +This package includes a class `bob.pad.base.evaluation.PadIsoMetrics`, that can be used to compute the PAD metrics APCER and BPCER as defined in the ISO/IEC 30107 part3 standard. +The most important methods in the class are: ``eer()``, ``hter()``, ``apcer()``, and ``bpcer()``. +The main point to note about these methods is that the input-scores should be organized in a dictionary. +One dictionary should be created for each group ('devel', 'test', etc.). +The keys of the dictionary refer to the presentation-type ('bona-fide' or some presentation-attack-instrument (PAI)). +The value associated with each key is a tuple, containing either one or two elements. +For each key corresponding to a PAI, the value should be a tuple of 2 elements: (scores, attack_potential), where 'scores' is a 1D numpy-array of scores corresponding to presentations of that PAI, and 'attack_potential' is a single letter, either 'A', or 'B', or 'C', signifying the attack-potential of the PAI. +For bona-fide presentations, no attack-potential is defined. Therefore, for a key representing bona-fide presentations, the value will be a tuple consisting of only one element: a 1D numpy-array of scores. +Consequently, a key for which the value is a tuple of length 1 is interpretted as representing a bona-fide presentation. + +The methods ``eer()`` and ``hter()`` call the corresponding functions in `bob.measure` to compute the relevant thresholds and performance-measures, based on the input score-dictionary. + +The class also provides methods for saving the score-dictionaries in a hdf5-file (``save_scores_hdf5()``), and for loading such a file (``load_scores_hdf5()``). + +For an example of how to use this class to evaluate a score-distribution, see the code provided in file `bob.pad.base/bob/pad/base/test/test_PadIsoMetrics.py/test/test_PadIsoMetrics.py`. + Implemented Tools ----------------- diff --git a/doc/implemented.rst b/doc/implemented.rst index 7228f2b51b85b5cedbe54cce12ecc08f7161cdec..9beafd992485d51cd40723906e0fa9a3e0f16ab5 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -44,6 +44,12 @@ Algorithms .. automodule:: bob.pad.base.algorithm +Evaluation +~~~~~~~~~~ + +.. automodule:: bob.pad.base.evaluation + + Databases --------- diff --git a/doc/py_api.rst b/doc/py_api.rst index 144fa54d598b2ee345e508b910a43e010082ca50..619f7511b8dfafce95adea51f8ac1ba13619abb0 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -35,6 +35,12 @@ Scoring .. autosummary:: bob.bio.base.tools.compute_scores +Evaluation +~~~~~~~~~~ + +.. autosummary:: + bob.pad.base.evaluation.PadIsoMetrics + Details -------