test_error.py 12.9 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
4
# Wed 11 Dec 15:14:08 2013 CET
André Anjos's avatar
André Anjos committed
5
6
7
8
9
10
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland

"""Basic tests for the error measuring system of bob
"""

11
import os
André Anjos's avatar
André Anjos committed
12
import numpy
13
import nose.tools
André Anjos's avatar
André Anjos committed
14
import bob.io.base
André Anjos's avatar
André Anjos committed
15
16
17

def F(f):
  """Returns the test file on the "data" subdirectory"""
18
  import pkg_resources
André Anjos's avatar
André Anjos committed
19
20
  return pkg_resources.resource_filename(__name__, os.path.join('data', f))

21

André Anjos's avatar
André Anjos committed
22
23
def save(fname, data):
  """Saves a single array into a file in the 'data' directory."""
André Anjos's avatar
André Anjos committed
24
  bob.io.base.Array(data).save(os.path.join('data', fname))
25
26
27
28


def test_basic_ratios():

André Anjos's avatar
André Anjos committed
29
  from . import farfrr, precision_recall, f_score
30
31
32
33

  # We test the basic functionaly on FAR and FRR calculation. The first
  # example is separable, with a separation threshold of about 3.0

André Anjos's avatar
André Anjos committed
34
35
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
36
37
38
39
40
41
42
43
44
45
46
47

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR
  # should be 0.0. Precision should be 0.5, recall should be 1.0
  far, frr = farfrr(negatives, positives, minimum-0.1)
  nose.tools.eq_(far, 1.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, minimum-0.1)
  nose.tools.eq_(prec, 0.5)
  nose.tools.eq_(recall, 1.0)
48

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
  # Similarly, if we take a threshold on the maximum, the FRR should be 1.0
  # while the FAR should be 0.0. Both precision and recall should be 0.0.
  far, frr = farfrr(negatives, positives, maximum+0.1)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 1.0)
  prec, recall = precision_recall(negatives, positives, maximum+0.1)
  nose.tools.eq_(prec, 0.0)
  nose.tools.eq_(recall, 0.0)

  # If we choose the appropriate threshold, we should get 0.0 for both FAR
  # and FRR. Precision will be 1.0, recall will be 1.0
  far, frr = farfrr(negatives, positives, 3.0)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, 3.0)
  nose.tools.eq_(prec, 1.0)
  nose.tools.eq_(recall, 1.0)
66

67
  # Testing the values of F-score depending on different choices of the threshold
André Anjos's avatar
André Anjos committed
68
69
70
71
  f_score_ = f_score(negatives, positives, minimum-0.1)
  nose.tools.assert_almost_equal(f_score_, 0.66666667)
  f_score_ = f_score(negatives, positives, minimum-0.1, 2)
  nose.tools.assert_almost_equal(f_score_, 0.83333333)
72

André Anjos's avatar
André Anjos committed
73
74
75
76
  f_score_ = f_score(negatives, positives, maximum+0.1)
  nose.tools.eq_(f_score_, 0.0)
  f_score_ = f_score(negatives, positives, maximum+0.1, 2)
  nose.tools.eq_(f_score_, 0.0)
77

André Anjos's avatar
André Anjos committed
78
79
80
81
  f_score_ = f_score(negatives, positives, 3.0)
  nose.tools.eq_(f_score_, 1.0)
  f_score_ = f_score(negatives, positives, 3.0, 2)
  nose.tools.eq_(f_score_, 1.0)
82

83
84
85

def test_indexing():

André Anjos's avatar
André Anjos committed
86
  from . import correctly_classified_positives, correctly_classified_negatives
87
88
89

  # This test verifies that the output of correctly_classified_positives() and
  # correctly_classified_negatives() makes sense.
André Anjos's avatar
André Anjos committed
90
91
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
92
93
94
95
96
97
98

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If the threshold is minimum, we should have all positive samples
  # correctly classified and none of the negative samples correctly
  # classified.
99
100
  assert correctly_classified_positives(positives, minimum-0.1).all()
  assert not correctly_classified_negatives(negatives, minimum-0.1).any()
101
102

  # The inverse is true if the threshold is a bit above the maximum.
103
104
  assert not correctly_classified_positives(positives, maximum+0.1).any()
  assert correctly_classified_negatives(negatives, maximum+0.1).all()
105
106
107

  # If the threshold separates the sets, than all should be correctly
  # classified.
108
109
  assert correctly_classified_positives(positives, 3).all()
  assert correctly_classified_negatives(negatives, 3).all()
110
111
112
113


def test_thresholding():

André Anjos's avatar
André Anjos committed
114
  from . import eer_threshold, far_threshold, frr_threshold, farfrr, correctly_classified_positives, correctly_classified_negatives, min_hter_threshold
115

André Anjos's avatar
André Anjos committed
116
117
  def count(array, value=True):
    """Counts occurrences of a certain value in an array"""
118
    return list(array == value).count(True)
André Anjos's avatar
André Anjos committed
119

120
121
122
123
  # This example will demonstrate and check the use of eer_threshold() to
  # calculate the threshold that minimizes the EER.

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
124
125
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
126
127
  threshold = eer_threshold(negatives, positives)

128
129
130
  sorted_positives = numpy.sort(positives)
  sorted_negatives = numpy.sort(negatives)

131
132
133
134
135
136
137
  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives,threshold))
  ccn = count(correctly_classified_negatives(negatives,threshold))
  assert (ccp - ccn) <= 1

  for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
    # Lets also test the far_threshold and the frr_threshold functions
138
139
    threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
    threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
140
141
142
143
144
145
146
147
148
149
150
151
152
153
    # Check that the requested FAR and FRR values are smaller than the requested ones
    far = farfrr(negatives, positives, threshold_far)[0]
    frr = farfrr(negatives, positives, threshold_frr)[1]
    assert far + 1e-7 > t
    assert frr + 1e-7 > t
    # test that the values are at least somewhere in the range
    assert far-t <= 0.15
    assert frr-t <= 0.15


  # If the set is separable, the calculation of the threshold is a little bit
  # trickier, as you have no points in the middle of the range to compare
  # things to. This is where the currently used recursive algorithm seems to
  # do better. Let's verify
André Anjos's avatar
André Anjos committed
154
155
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
156
  threshold = eer_threshold(negatives, positives)
157
158
  # the result here is 3.2 (which is what is expect ;-)
  assert threshold == 3.2
159
160
161
162
163
164
165
166
167

  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives,threshold))
  ccn = count(correctly_classified_negatives(negatives,threshold))
  nose.tools.eq_(ccp, ccn)

  # The second option for the calculation of the threshold is to use the
  # minimum HTER.
  threshold2 = min_hter_threshold(negatives, positives)
168
  assert threshold2 == 3.2
169
170
171
172
173
174
175
176
177
178
  nose.tools.eq_(threshold, threshold2) #in this particular case

  # Of course we have to make sure that will set the EER correctly:
  ccp = count(correctly_classified_positives(positives,threshold2))
  ccn = count(correctly_classified_negatives(negatives,threshold2))
  nose.tools.eq_(ccp, ccn)


def test_plots():

179
  from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, epc
180
181

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
182
183
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
184
185
186
187
188
189
  threshold = eer_threshold(negatives, positives)

  # This example will test the ROC plot calculation functionality.
  xy = roc(negatives, positives, 100)
  # uncomment the next line to save a reference value
  # save('nonsep-roc.hdf5', xy)
André Anjos's avatar
André Anjos committed
190
  xyref = bob.io.base.load(F('nonsep-roc.hdf5'))
191
  assert numpy.array_equal(xy, xyref)
192

193
194
195
196
197
198
199
200
  # This example will test the ROC for FAR plot calculation functionality.
  far = [0.01, 0.1, 1]
  ref = [0.48, 0.22, 0]
  xy = roc_for_far(negatives, positives, far)
  # uncomment the next line to save a reference value
  assert numpy.array_equal(xy[0], far)
  assert numpy.array_equal(xy[1], ref)

201
202
203
204
  # This example will test the Precision-Recall plot calculation functionality.
  xy = precision_recall_curve(negatives, positives, 100)
  # uncomment the next line to save a reference value
  # save('nonsep-roc.hdf5', xy)
André Anjos's avatar
André Anjos committed
205
  xyref = bob.io.base.load(F('nonsep-precisionrecall.hdf5'))
206
  assert numpy.array_equal(xy, xyref)
207

208
209
210
211
  # This example will test the DET plot calculation functionality.
  det_xyzw = det(negatives, positives, 100)
  # uncomment the next line to save a reference value
  # save('nonsep-det.hdf5', det_xyzw)
André Anjos's avatar
André Anjos committed
212
  det_xyzw_ref = bob.io.base.load(F('nonsep-det.hdf5'))
213
214
215
216
217
218
219
220
221
222
223
224
225
  assert numpy.allclose(det_xyzw, det_xyzw_ref, atol=1e-15)

  # This example will test the EPC plot calculation functionality. For the
  # EPC curve, you need to have a development and a test set. We will split,
  # by the middle, the negatives and positives sample we have, just for the
  # sake of testing
  dev_negatives = negatives[:(negatives.shape[0]/2)]
  test_negatives = negatives[(negatives.shape[0]/2):]
  dev_positives = positives[:(positives.shape[0]/2)]
  test_positives = positives[(positives.shape[0]/2):]
  xy = epc(dev_negatives, dev_positives,
      test_negatives, test_positives, 100)
  # uncomment the next line to save a reference value
226
  # save('nonsep-epc.hdf5', xy)
André Anjos's avatar
André Anjos committed
227
  xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
228
229
230
231
232
  assert numpy.allclose(xy, xyref, atol=1e-15)


def test_rocch():

André Anjos's avatar
André Anjos committed
233
  from . import rocch, rocch2eer, eer_rocch
234
235
236
237
238

  # This example will demonstrate and check the use of eer_rocch_threshold() to
  # calculate the threshold that minimizes the EER on the ROC Convex Hull

  # This test set is separable.
André Anjos's avatar
André Anjos committed
239
240
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
241
  # References obtained using Bosaris 1.06
242
  pmiss_pfa_ref = numpy.array([[1., 0., 0.], [0., 0., 1.]])
243
244
245
246
247
248
249
250
251
252
  eer_ref = 0.
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
  assert abs(eer-eer_ref) < 1e-4
  eer = eer_rocch(negatives, positives)
  assert abs(eer-eer_ref) < 1e-4

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
253
254
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
255
  # References obtained using Bosaris 1.06
256
  pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [0, 0, 0.08, 0.12, 0.22, 0.48, 1.]])
257
258
259
260
261
262
263
264
  eer_ref = 0.116363636363636
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
  assert abs(eer-eer_ref) < 1e-4
  eer = eer_rocch(negatives, positives)
  assert abs(eer-eer_ref) < 1e-4
265

266
267
268

def test_cmc():

André Anjos's avatar
André Anjos committed
269
  from . import recognition_rate, cmc, load
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298

  # tests the CMC calculation
  # test data; should give match characteristics [1/2,1/4,1/3] and CMC [1/3,2/3,1]
  test_data = [((0.3, 1.1, 0.5), (0.7)), ((1.4, -1.3, 0.6), (0.2)), ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4))]
  # compute recognition rate
  rr = recognition_rate(test_data)
  nose.tools.eq_(rr, 0.5)
  # compute CMC
  cmc_ = cmc(test_data)
  assert (cmc_ == [0.5, 0.75, 1., 1., 1]).all()

  # load test data
  desired_rr = 0.76
  desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
  data = load.cmc_four_column(F('scores-cmc-4col.txt'))
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()

  data = load.cmc_five_column(F('scores-cmc-5col.txt'))
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()


def test_calibration():

André Anjos's avatar
André Anjos committed
299
  from . import calibration
300

André Anjos's avatar
André Anjos committed
301
  # Tests the cllr and min_cllr measures
302
  # This test set is separable.
André Anjos's avatar
André Anjos committed
303
304
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
305
306
307
308
309
310
311
312
313
314

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
  nose.tools.assert_almost_equal(cllr, 1.2097942129)
  # Since the test set is separable, the min_cllr needs to be zero
  nose.tools.assert_almost_equal(min_cllr, 0.)

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
315
316
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
317
318
319
320
321
322
323

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
  assert cllr, 3.61833457
  assert min_cllr, 0.337364136
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
  
  

def test_open_set_recognition_rate():
  
  #No error files
  scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set.txt"),   load_only_negatives=True)
  assert bob.measure.recognition_rate(scores, threshold=0.5), 1.0
  assert bob.measure.recognition_rate(scores, threshold=10.), 0.222222222222
  
  #One error
  scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-one-error.txt"), 
  load_only_negatives=True)
  assert bob.measure.recognition_rate(scores, threshold=0.5), 0.888888888889
  assert bob.measure.recognition_rate(scores, threshold=10.), 0.222222222222

  #Two errors
  scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-two-errors.txt"), 
  load_only_negatives=True)
  assert bob.measure.recognition_rate(scores, threshold=0.5), 0.777777777778
  assert bob.measure.recognition_rate(scores, threshold=10.), 0.111111111111