__init__.py 7.66 KB
Newer Older
1
2
# import Libraries of other lib packages
import bob.math
3
import bob.io.base
4

André Anjos's avatar
André Anjos committed
5
from ._library import *
André Anjos's avatar
André Anjos committed
6
7
from . import version
from .version import module as __version__
André Anjos's avatar
André Anjos committed
8
9
10
11

from . import plot
from . import load
from . import calibration
12
from . import openbr
André Anjos's avatar
André Anjos committed
13
14
15
import numpy

def mse (estimation, target):
16
17
18
  """mse(estimation, target) -> error

  Calculates the mean square error between a set of outputs and target
André Anjos's avatar
André Anjos committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
  values using the following formula:

  .. math::

    MSE(\hat{\Theta}) = E[(\hat{\Theta} - \Theta)^2]

  Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to
  have 2 dimensions. Different examples are organized as rows while different
  features in the estimated values or targets are organized as different
  columns.
  """
  return numpy.mean((estimation - target)**2, 0)

def rmse (estimation, target):
33
34
35
  """rmse(estimation, target) -> error

  Calculates the root mean square error between a set of outputs and target
André Anjos's avatar
André Anjos committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
  values using the following formula:

  .. math::

    RMSE(\hat{\Theta}) = \sqrt(E[(\hat{\Theta} - \Theta)^2])

  Estimation (:math:`\hat{\Theta}`) and target (:math:`\Theta`) are supposed to
  have 2 dimensions. Different examples are organized as rows while different
  features in the estimated values or targets are organized as different
  columns.
  """
  return numpy.sqrt(mse(estimation, target))

def relevance (input, machine):
50
51
52
  """relevance (input, machine) -> relevances

  Calculates the relevance of every input feature to the estimation process
André Anjos's avatar
André Anjos committed
53
54
  using the following definition from:

55
56
57
    Neural Triggering System Operating on High Resolution Calorimetry
    Information, Anjos et al, April 2006, Nuclear Instruments and Methods in
    Physics Research, volume 559, pages 134-138
André Anjos's avatar
André Anjos committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

  .. math::

    R(x_{i}) = |E[(o(x) - o(x|x_{i}=E[x_{i}]))^2]|

  In other words, the relevance of a certain input feature **i** is the change
  on the machine output value when such feature is replaced by its mean for all
  input vectors. For this to work, the `input` parameter has to be a 2D array
  with features arranged column-wise while different examples are arranged
  row-wise.
  """

  o = machine(input)
  i2 = input.copy()
  retval = numpy.ndarray((input.shape[1],), 'float64')
  retval.fill(0)
  for k in range(input.shape[1]):
    i2[:,:] = input #reset
    i2[:,k] = numpy.mean(input[:,k])
    retval[k] = (mse(machine(i2), o).sum())**0.5

  return retval

81

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
82
def recognition_rate(cmc_scores, threshold=None):
83
  """recognition_rate(cmc_scores, threshold) -> RR 
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
84
  
85
  Calculates the recognition rate from the given input, which is identical
86
87
  to the rank 1 (C)MC value.

88
  The input has a specific format, which is a list of two-element tuples. Each
89
90
  of the tuples contains the negative and the positive scores for one test
  item.  To read the lists from score files in 4 or 5 column format, please use
André Anjos's avatar
André Anjos committed
91
  the :py:func:`bob.measure.load.cmc_four_column` or
92
93
  :py:func:`bob.measure.load.cmc_five_column` function.

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
94
  If **threshold** is set to `None`, the recognition rate is defined as the number of test items, for which the
95
  positive score is greater than or equal to all negative scores, divided by
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
96
  the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
97

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
98
99
100
101
102
103
  If **threshold** assumes one value, the recognition rate is defined as the number of test items, for which the
  positive score is greater than or equal to all negative scores and the threshold divided by
  the number of all test items. If several positive scores for one test item exist, the **highest** score is taken.
  
  **Parameters:**
  
104
    ``cmc_scores`` : CMC scores loaded with one of the functions (:py:func:`bob.measure.load.cmc_four_column` or :py:func:`bob.measure.load.cmc_five_column`)
105

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
106
    ``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.
107
    
108
109
110
111
  **Returns:**

  ``RR`` : float
    The rank 1 recognition rate, i.e., the relative number of correctly identified identities
André Anjos's avatar
André Anjos committed
112
  """
113
114
  # If no scores are given, the recognition rate is exactly 0.
  if not cmc_scores:
115
    return 0.
116

André Anjos's avatar
André Anjos committed
117
118
  correct = 0.
  for neg, pos in cmc_scores:
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
119
120
121
122
123
124
125
126
127
128
129
130

    #If threshold is none, let's use the highest positive score as the decision threshold
    if(threshold is None):
      # get the maximum positive score for the current probe item
      # (usually, there is only one positive score, but just in case...)
      max_pos = numpy.max(pos)
      # check if the positive score is smaller than all negative scores
      if (neg < max_pos).all():
        correct += 1.
        
    else:
      #If threshold is NOT None, we have an openset identification
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
131
      max_pos = numpy.max(pos)
André Anjos's avatar
André Anjos committed
132

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
133
134
      if((threshold < max_pos) and (neg < max_pos).all()):
          correct += 1.
135
  # return relative number of correctly matched scores
André Anjos's avatar
André Anjos committed
136
137
  return correct / float(len(cmc_scores))

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
138
139


André Anjos's avatar
André Anjos committed
140
def cmc(cmc_scores):
141
142
143
  """cmc(cmc_scores) -> curve

  Calculates the cumulative match characteristic (CMC) from the given input.
144

145
  The input has a specific format, which is a list of two-element tuples. Each
146
147
  of the tuples contains the negative and the positive scores for one test
  item.  To read the lists from score files in 4 or 5 column format, please use
André Anjos's avatar
André Anjos committed
148
  the :py:func:`bob.measure.load.cmc_four_column` or
149
150
151
152
153
154
155
  :py:func:`bob.measure.load.cmc_five_column` function.

  For each test item the probability that the rank r of the positive score is
  calculated.  The rank is computed as the number of negative scores that are
  higher than the positive score.  If several positive scores for one test item
  exist, the **highest** positive score is taken. The CMC finally computes how
  many test items have rank r or higher.
156
157
158
159
160
161

  **Parameters:**

  ``cmc_scores`` : [(array_like(1D, float), array_like(1D, float))]
    A list of tuples, where each tuple contains the ``negative`` and ``positive`` scores for one probe of the database

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
162
163
  ``threshold`` : Decision threshold. If `None`, the decision threshold will be the **highest** positive score.

164
165
166
167
  **Returns:**

  ``curve`` : array_like(2D, float)
    The CMC curve, with the Rank in the first column and the number of correctly classified clients (in this rank) in the second column.
André Anjos's avatar
André Anjos committed
168
  """
169

170
171
172
173
174
  # If no scores are given, we cannot plot anything
  probe_count = float(len(cmc_scores))
  if not probe_count:
    raise ValueError("The given set of scores is empty")

André Anjos's avatar
André Anjos committed
175
176
  # compute MC
  match_characteristic = numpy.zeros((max([len(neg) for (neg,pos) in cmc_scores])+1,), numpy.int)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
177

André Anjos's avatar
André Anjos committed
178
  for neg, pos in cmc_scores:
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
179
180
181
    if((type(pos)!=float) and (len(pos) == 0)):
      raise ValueError("For the CMC computation at least one positive score is necessary. Please review who you are loading the scores. You must set `load_only_negatives=False` in the :py:func:`bob.measure.load.cmc_four_column` or `:py:func:`bob.measure.load.cmc_five_column` methods.")

André Anjos's avatar
André Anjos committed
182
    # get the maximum positive score for the current probe item
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
183
    # (usually, there is only one positive score, but just in case...)    
André Anjos's avatar
André Anjos committed
184
185
    max_pos = numpy.max(pos)

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
186
187
188
189
    # count the number of negative scores that are higher than the best positive score            
    index = numpy.sum(neg >= max_pos)
    match_characteristic[index] += 1  
    
André Anjos's avatar
André Anjos committed
190
191
192
193
194
195
196
197
  # cumulate
  cumulative_match_characteristic = numpy.ndarray(match_characteristic.shape, numpy.float64)
  count = 0.
  for i in range(match_characteristic.shape[0]):
    count += match_characteristic[i]
    cumulative_match_characteristic[i] = count / probe_count

  return cumulative_match_characteristic
198

199

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
200

201
202
203
def get_config():
  """Returns a string containing the configuration information.
  """
204
  import bob.extension
205
  return bob.extension.get_config(__name__, version.externals)
206
207


208
209
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]