__init__.py 7.66 KB
 Manuel Günther committed Aug 20, 2014 1 2 # import Libraries of other lib packages import bob.math  Manuel Günther committed May 21, 2015 3 import bob.io.base  Manuel Günther committed Aug 20, 2014 4   André Anjos committed Dec 11, 2013 5 from ._library import *  André Anjos committed Mar 22, 2014 6 7 from . import version from .version import module as __version__  André Anjos committed Nov 21, 2013 8 9 10 11  from . import plot from . import load from . import calibration  Manuel Günther committed Sep 17, 2015 12 from . import openbr  André Anjos committed Nov 21, 2013 13 14 15 import numpy def mse (estimation, target):  Manuel Günther committed Nov 25, 2015 16 17 18  """mse(estimation, target) -> error Calculates the mean square error between a set of outputs and target  André Anjos committed Nov 21, 2013 19 20 21 22 23 24 25 26 27 28 29 30 31 32  values using the following formula: .. math:: MSE(\hat{\Theta}) = E[(\hat{\Theta} - \Theta)^2] Estimation (:math:\hat{\Theta}) and target (:math:\Theta) are supposed to have 2 dimensions. Different examples are organized as rows while different features in the estimated values or targets are organized as different columns. """ return numpy.mean((estimation - target)**2, 0) def rmse (estimation, target):  Manuel Günther committed Nov 25, 2015 33 34 35  """rmse(estimation, target) -> error Calculates the root mean square error between a set of outputs and target  André Anjos committed Nov 21, 2013 36 37 38 39 40 41 42 43 44 45 46 47 48 49  values using the following formula: .. math:: RMSE(\hat{\Theta}) = \sqrt(E[(\hat{\Theta} - \Theta)^2]) Estimation (:math:\hat{\Theta}) and target (:math:\Theta) are supposed to have 2 dimensions. Different examples are organized as rows while different features in the estimated values or targets are organized as different columns. """ return numpy.sqrt(mse(estimation, target)) def relevance (input, machine):  Manuel Günther committed Nov 25, 2015 50 51 52  """relevance (input, machine) -> relevances Calculates the relevance of every input feature to the estimation process  André Anjos committed Nov 21, 2013 53 54  using the following definition from:  Manuel Günther committed Nov 25, 2015 55 56 57  Neural Triggering System Operating on High Resolution Calorimetry Information, Anjos et al, April 2006, Nuclear Instruments and Methods in Physics Research, volume 559, pages 134-138  André Anjos committed Nov 21, 2013 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80  .. math:: R(x_{i}) = |E[(o(x) - o(x|x_{i}=E[x_{i}]))^2]| In other words, the relevance of a certain input feature **i** is the change on the machine output value when such feature is replaced by its mean for all input vectors. For this to work, the input parameter has to be a 2D array with features arranged column-wise while different examples are arranged row-wise. """ o = machine(input) i2 = input.copy() retval = numpy.ndarray((input.shape[1],), 'float64') retval.fill(0) for k in range(input.shape[1]): i2[:,:] = input #reset i2[:,k] = numpy.mean(input[:,k]) retval[k] = (mse(machine(i2), o).sum())**0.5 return retval  Manuel Günther committed Nov 25, 2015 81   Tiago de Freitas Pereira committed Mar 09, 2016 82 def recognition_rate(cmc_scores, threshold=None):  Tiago de Freitas Pereira committed Mar 09, 2016 83  """recognition_rate(cmc_scores, threshold) -> RR  Tiago de Freitas Pereira committed Mar 09, 2016 84   Manuel Günther committed Nov 25, 2015 85  Calculates the recognition rate from the given input, which is identical  André Anjos committed Dec 11, 2013 86 87  to the rank 1 (C)MC value.  Manuel Günther committed Nov 25, 2015 88  The input has a specific format, which is a list of two-element tuples. Each  André Anjos committed Dec 11, 2013 89 90  of the tuples contains the negative and the positive scores for one test item. To read the lists from score files in 4 or 5 column format, please use  André Anjos committed May 26, 2014 91  the :py:func:bob.measure.load.cmc_four_column or  André Anjos committed Dec 11, 2013 92 93  :py:func:bob.measure.load.cmc_five_column function.  Tiago de Freitas Pereira committed Mar 09, 2016 94  If **threshold** is set to None, the recognition rate is defined as the number of test items, for which the  André Anjos committed Dec 11, 2013 95  positive score is greater than or equal to all negative scores, divided by  Tiago de Freitas Pereira committed Mar 09, 2016 96  the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.  Manuel Günther committed Nov 25, 2015 97   Tiago de Freitas Pereira committed Mar 09, 2016 98 99 100 101 102 103  If **threshold** assumes one value, the recognition rate is defined as the number of test items, for which the positive score is greater than or equal to all negative scores and the threshold divided by the number of all test items. If several positive scores for one test item exist, the **highest** score is taken. **Parameters:**  Tiago de Freitas Pereira committed Mar 09, 2016 104  cmc_scores : CMC scores loaded with one of the functions (:py:func:bob.measure.load.cmc_four_column or :py:func:bob.measure.load.cmc_five_column)  Manuel Günther committed Nov 25, 2015 105   Tiago de Freitas Pereira committed Mar 09, 2016 106  threshold : Decision threshold. If None, the decision threshold will be the **highest** positive score.  Tiago de Freitas Pereira committed Mar 09, 2016 107   Manuel Günther committed Nov 25, 2015 108 109 110 111  **Returns:** RR : float The rank 1 recognition rate, i.e., the relative number of correctly identified identities  André Anjos committed Nov 21, 2013 112  """  Manuel Günther committed Oct 16, 2015 113 114  # If no scores are given, the recognition rate is exactly 0. if not cmc_scores:  Manuel Günther committed Nov 25, 2015 115  return 0.  Manuel Günther committed Oct 16, 2015 116   André Anjos committed Nov 21, 2013 117 118  correct = 0. for neg, pos in cmc_scores:  Tiago de Freitas Pereira committed Mar 09, 2016 119 120 121 122 123 124 125 126 127 128 129 130  #If threshold is none, let's use the highest positive score as the decision threshold if(threshold is None): # get the maximum positive score for the current probe item # (usually, there is only one positive score, but just in case...) max_pos = numpy.max(pos) # check if the positive score is smaller than all negative scores if (neg < max_pos).all(): correct += 1. else: #If threshold is NOT None, we have an openset identification  Tiago de Freitas Pereira committed Mar 09, 2016 131  max_pos = numpy.max(pos)  André Anjos committed Nov 21, 2013 132   Tiago de Freitas Pereira committed Mar 09, 2016 133 134  if((threshold < max_pos) and (neg < max_pos).all()): correct += 1.  Manuel Günther committed Oct 16, 2015 135  # return relative number of correctly matched scores  André Anjos committed Nov 21, 2013 136 137  return correct / float(len(cmc_scores))  Tiago de Freitas Pereira committed Mar 09, 2016 138 139   André Anjos committed Nov 21, 2013 140 def cmc(cmc_scores):  Manuel Günther committed Nov 25, 2015 141 142 143  """cmc(cmc_scores) -> curve Calculates the cumulative match characteristic (CMC) from the given input.  André Anjos committed Dec 11, 2013 144   Manuel Günther committed Nov 25, 2015 145  The input has a specific format, which is a list of two-element tuples. Each  André Anjos committed Dec 11, 2013 146 147  of the tuples contains the negative and the positive scores for one test item. To read the lists from score files in 4 or 5 column format, please use  André Anjos committed May 26, 2014 148  the :py:func:bob.measure.load.cmc_four_column or  André Anjos committed Dec 11, 2013 149 150 151 152 153 154 155  :py:func:bob.measure.load.cmc_five_column function. For each test item the probability that the rank r of the positive score is calculated. The rank is computed as the number of negative scores that are higher than the positive score. If several positive scores for one test item exist, the **highest** positive score is taken. The CMC finally computes how many test items have rank r or higher.  Manuel Günther committed Nov 25, 2015 156 157 158 159 160 161  **Parameters:** cmc_scores : [(array_like(1D, float), array_like(1D, float))] A list of tuples, where each tuple contains the negative and positive scores for one probe of the database  Tiago de Freitas Pereira committed Mar 09, 2016 162 163  threshold : Decision threshold. If None, the decision threshold will be the **highest** positive score.  Manuel Günther committed Nov 25, 2015 164 165 166 167  **Returns:** curve : array_like(2D, float) The CMC curve, with the Rank in the first column and the number of correctly classified clients (in this rank) in the second column.  André Anjos committed Nov 21, 2013 168  """  André Anjos committed Dec 11, 2013 169   Manuel Günther committed Oct 16, 2015 170 171 172 173 174  # If no scores are given, we cannot plot anything probe_count = float(len(cmc_scores)) if not probe_count: raise ValueError("The given set of scores is empty")  André Anjos committed Nov 21, 2013 175 176  # compute MC match_characteristic = numpy.zeros((max([len(neg) for (neg,pos) in cmc_scores])+1,), numpy.int)  Tiago de Freitas Pereira committed Mar 09, 2016 177   André Anjos committed Nov 21, 2013 178  for neg, pos in cmc_scores:  Tiago de Freitas Pereira committed Mar 09, 2016 179 180 181  if((type(pos)!=float) and (len(pos) == 0)): raise ValueError("For the CMC computation at least one positive score is necessary. Please review who you are loading the scores. You must set load_only_negatives=False in the :py:func:bob.measure.load.cmc_four_column or :py:func:bob.measure.load.cmc_five_column methods.")  André Anjos committed Nov 21, 2013 182  # get the maximum positive score for the current probe item  Tiago de Freitas Pereira committed Mar 09, 2016 183  # (usually, there is only one positive score, but just in case...)  André Anjos committed Nov 21, 2013 184 185  max_pos = numpy.max(pos)  Tiago de Freitas Pereira committed Mar 09, 2016 186 187 188 189  # count the number of negative scores that are higher than the best positive score index = numpy.sum(neg >= max_pos) match_characteristic[index] += 1  André Anjos committed Nov 21, 2013 190 191 192 193 194 195 196 197  # cumulate cumulative_match_characteristic = numpy.ndarray(match_characteristic.shape, numpy.float64) count = 0. for i in range(match_characteristic.shape[0]): count += match_characteristic[i] cumulative_match_characteristic[i] = count / probe_count return cumulative_match_characteristic  André Anjos committed Mar 17, 2014 198   Manuel Günther committed Apr 30, 2015 199   Tiago de Freitas Pereira committed Mar 09, 2016 200   André Anjos committed Mar 23, 2014 201 202 203 def get_config(): """Returns a string containing the configuration information. """  Manuel Günther committed Nov 25, 2015 204  import bob.extension  Manuel Günther committed Apr 30, 2015 205  return bob.extension.get_config(__name__, version.externals)  André Anjos committed Mar 23, 2014 206 207   André Anjos committed Mar 17, 2014 208 209 # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith('_')]`