test_error.py 17.7 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
4
# Wed 11 Dec 15:14:08 2013 CET
André Anjos's avatar
André Anjos committed
5
6
7
8
9
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland

"""Basic tests for the error measuring system of bob
"""
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
10
from __future__ import division
11
import os
André Anjos's avatar
André Anjos committed
12
import numpy
13
import nose.tools
André Anjos's avatar
André Anjos committed
14
import bob.io.base
15
import math
André Anjos's avatar
André Anjos committed
16

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
17

André Anjos's avatar
André Anjos committed
18
19
def F(f):
  """Returns the test file on the "data" subdirectory"""
20
  import pkg_resources
André Anjos's avatar
André Anjos committed
21
22
  return pkg_resources.resource_filename(__name__, os.path.join('data', f))

23

André Anjos's avatar
André Anjos committed
24
25
def save(fname, data):
  """Saves a single array into a file in the 'data' directory."""
26
  bob.io.base.save(data, os.path.join('bob/measure/data', fname))
27
28
29
30


def test_basic_ratios():

André Anjos's avatar
André Anjos committed
31
  from . import farfrr, precision_recall, f_score
32
33
34
35

  # We test the basic functionaly on FAR and FRR calculation. The first
  # example is separable, with a separation threshold of about 3.0

André Anjos's avatar
André Anjos committed
36
37
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
38
39
40
41
42
43

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR
  # should be 0.0. Precision should be 0.5, recall should be 1.0
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
44
  far, frr = farfrr(negatives, positives, minimum - 0.1)
45
46
  nose.tools.eq_(far, 1.0)
  nose.tools.eq_(frr, 0.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
47
  prec, recall = precision_recall(negatives, positives, minimum - 0.1)
48
49
  nose.tools.eq_(prec, 0.5)
  nose.tools.eq_(recall, 1.0)
50

51
52
  # Similarly, if we take a threshold on the maximum, the FRR should be 1.0
  # while the FAR should be 0.0. Both precision and recall should be 0.0.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
53
  far, frr = farfrr(negatives, positives, maximum + 0.1)
54
55
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 1.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
56
  prec, recall = precision_recall(negatives, positives, maximum + 0.1)
57
58
59
60
61
62
63
64
65
66
67
  nose.tools.eq_(prec, 0.0)
  nose.tools.eq_(recall, 0.0)

  # If we choose the appropriate threshold, we should get 0.0 for both FAR
  # and FRR. Precision will be 1.0, recall will be 1.0
  far, frr = farfrr(negatives, positives, 3.0)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, 3.0)
  nose.tools.eq_(prec, 1.0)
  nose.tools.eq_(recall, 1.0)
68

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
69
70
71
  # Testing the values of F-score depending on different choices of the
  # threshold
  f_score_ = f_score(negatives, positives, minimum - 0.1)
André Anjos's avatar
André Anjos committed
72
  nose.tools.assert_almost_equal(f_score_, 0.66666667)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
73
  f_score_ = f_score(negatives, positives, minimum - 0.1, 2)
André Anjos's avatar
André Anjos committed
74
  nose.tools.assert_almost_equal(f_score_, 0.83333333)
75

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
76
  f_score_ = f_score(negatives, positives, maximum + 0.1)
André Anjos's avatar
André Anjos committed
77
  nose.tools.eq_(f_score_, 0.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
78
  f_score_ = f_score(negatives, positives, maximum + 0.1, 2)
André Anjos's avatar
André Anjos committed
79
  nose.tools.eq_(f_score_, 0.0)
80

André Anjos's avatar
André Anjos committed
81
82
83
84
  f_score_ = f_score(negatives, positives, 3.0)
  nose.tools.eq_(f_score_, 1.0)
  f_score_ = f_score(negatives, positives, 3.0, 2)
  nose.tools.eq_(f_score_, 1.0)
85

86

87
def test_for_uncomputable_thresholds():
88
89
  # in some cases, we cannot compute an FAR or FRR threshold, e.g., when we
  # have too little data or too many equal scores in these cases, the methods
90
  # should return a threshold which a supports a lower value.
91
92
93
  from . import far_threshold, frr_threshold

  # case 1: several scores are identical
94
95
  pos = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
96
97

  # test that reasonable thresholds for reachable data points are provided
98
  threshold = far_threshold(neg, pos, 0.5)
99
  assert threshold == 1.0, threshold
100
  threshold = frr_threshold(neg, pos, 0.5)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
101
  assert numpy.isclose(threshold, 0.1), threshold
102

103
104
105
106
  threshold = far_threshold(neg, pos, 0.4)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.4)
  assert threshold >= pos[0], threshold
107

108
  # test the same with even number of scores
109
110
  pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
111

112
  threshold = far_threshold(neg, pos, 0.5)
113
  assert threshold == 1.0, threshold
114
115
116
117
118
  assert numpy.isclose(frr_threshold(neg, pos, 0.51), 0.1)
  threshold = far_threshold(neg, pos, 0.49)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.49)
  assert threshold >= pos[0], threshold
119

120
  # case 2: too few scores for the desired threshold
121
122
  pos = numpy.array(range(10), dtype=float)
  neg = numpy.array(range(10), dtype=float)
123

124
125
126
127
  threshold = far_threshold(neg, pos, 0.09)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.09)
  assert threshold >= pos[0], threshold
128
129
  # there is no limit above; the threshold will just be the largest possible
  # value
130
  threshold = far_threshold(neg, pos, 0.11)
131
  assert threshold == 9., threshold
132
  threshold = far_threshold(neg, pos, 0.91)
133
  assert threshold == 1., threshold
134
  threshold = far_threshold(neg, pos, 1)
135
  assert threshold <= 0., threshold
136
137
138
139
  threshold = frr_threshold(neg, pos, 0.11)
  assert numpy.isclose(threshold, 1.), threshold
  threshold = frr_threshold(neg, pos, 0.91)
  assert numpy.isclose(threshold, 9.), threshold
140
141


142
143
def test_indexing():

André Anjos's avatar
André Anjos committed
144
  from . import correctly_classified_positives, correctly_classified_negatives
145
146
147

  # This test verifies that the output of correctly_classified_positives() and
  # correctly_classified_negatives() makes sense.
André Anjos's avatar
André Anjos committed
148
149
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
150
151
152
153
154
155
156

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If the threshold is minimum, we should have all positive samples
  # correctly classified and none of the negative samples correctly
  # classified.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
157
158
  assert correctly_classified_positives(positives, minimum - 0.1).all()
  assert not correctly_classified_negatives(negatives, minimum - 0.1).any()
159
160

  # The inverse is true if the threshold is a bit above the maximum.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
161
162
  assert not correctly_classified_positives(positives, maximum + 0.1).any()
  assert correctly_classified_negatives(negatives, maximum + 0.1).all()
163
164
165

  # If the threshold separates the sets, than all should be correctly
  # classified.
166
167
  assert correctly_classified_positives(positives, 3).all()
  assert correctly_classified_negatives(negatives, 3).all()
168
169


Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
170
171
172
def test_obvious_thresholds():
  from . import far_threshold, frr_threshold, farfrr
  M = 10
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
173
174
  neg = numpy.arange(M, dtype=float)
  pos = numpy.arange(M, 2 * M, dtype=float)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
175

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
176
177
178
179
  for far, frr in zip(numpy.arange(0, 2 * M + 1, dtype=float) / M / 2,
                      numpy.arange(0, 2 * M + 1, dtype=float) / M / 2):
    far, expected_far = round(far, 2), math.floor(far * 10) / 10
    frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
180
    calculated_far_threshold = far_threshold(neg, pos, far)
181
    pred_far, _ = farfrr(neg, pos, calculated_far_threshold)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
182
183

    calculated_frr_threshold = frr_threshold(neg, pos, frr)
184
185
186
187
188
    _, pred_frr = farfrr(neg, pos, calculated_frr_threshold)
    assert pred_far <= far, (pred_far, far, calculated_far_threshold)
    assert pred_far == expected_far, (pred_far, far, calculated_far_threshold)
    assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold)
    assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
189
190


191
192
def test_thresholding():

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
193
194
195
  from . import eer_threshold, far_threshold, frr_threshold, farfrr, \
      correctly_classified_positives, correctly_classified_negatives, \
      min_hter_threshold
196

André Anjos's avatar
André Anjos committed
197
198
  def count(array, value=True):
    """Counts occurrences of a certain value in an array"""
199
    return list(array == value).count(True)
André Anjos's avatar
André Anjos committed
200

201
202
203
204
  # This example will demonstrate and check the use of eer_threshold() to
  # calculate the threshold that minimizes the EER.

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
205
206
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
207
208
  threshold = eer_threshold(negatives, positives)

209
210
211
  sorted_positives = numpy.sort(positives)
  sorted_negatives = numpy.sort(negatives)

212
  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
213
214
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
215
216
217
218
  assert (ccp - ccn) <= 1

  for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
    # Lets also test the far_threshold and the frr_threshold functions
219
220
    threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
    threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
221
222
    # Check that the requested FAR and FRR values are smaller than the
    # requested ones
223
224
    far = farfrr(negatives, positives, threshold_far)[0]
    frr = farfrr(negatives, positives, threshold_frr)[1]
225
    if not math.isnan(threshold_far):
226
      assert far <= t, (far, t)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
227
      assert t - far <= 0.1
228
    if not math.isnan(threshold_frr):
229
      assert frr <= t, (frr, t)
230
      # test that the values are at least somewhere in the range
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
231
      assert t - frr <= 0.1
232
233
234
235
236

  # If the set is separable, the calculation of the threshold is a little bit
  # trickier, as you have no points in the middle of the range to compare
  # things to. This is where the currently used recursive algorithm seems to
  # do better. Let's verify
André Anjos's avatar
André Anjos committed
237
238
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
239
  threshold = eer_threshold(negatives, positives)
240
241
  # the result here is 3.2 (which is what is expect ;-)
  assert threshold == 3.2
242
243

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
244
245
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
246
247
248
249
250
  nose.tools.eq_(ccp, ccn)

  # The second option for the calculation of the threshold is to use the
  # minimum HTER.
  threshold2 = min_hter_threshold(negatives, positives)
251
  assert threshold2 == 3.2
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
252
  nose.tools.eq_(threshold, threshold2)  # in this particular case
253
254

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
255
256
  ccp = count(correctly_classified_positives(positives, threshold2))
  ccn = count(correctly_classified_negatives(negatives, threshold2))
257
258
259
  nose.tools.eq_(ccp, ccn)


260
261
def test_empty_raises():
  # tests that
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
262
263
  from bob.measure import farfrr, precision_recall, f_score, eer_threshold, \
      min_hter_threshold, min_weighted_error_rate_threshold
264

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
265
266
267
  for func in (
          farfrr, precision_recall,
          f_score, min_weighted_error_rate_threshold):
268
269
270
271
272
273
274
275
276
277
    nose.tools.assert_raises(RuntimeError, func, [], [1.], 0)
    nose.tools.assert_raises(RuntimeError, func, [1.], [], 0)
    nose.tools.assert_raises(RuntimeError, func, [], [], 0)

  for func in (eer_threshold, min_hter_threshold):
    nose.tools.assert_raises(RuntimeError, func, [], [1.])
    nose.tools.assert_raises(RuntimeError, func, [1.], [])
    nose.tools.assert_raises(RuntimeError, func, [], [])


278
279
def test_plots():

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
280
281
  from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, \
      epc
282
283

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
284
285
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
286
287
288
289
290
  threshold = eer_threshold(negatives, positives)

  # This example will test the ROC plot calculation functionality.
  xy = roc(negatives, positives, 100)
  # uncomment the next line to save a reference value
291
  # save(F('nonsep-roc.hdf5'), xy)
André Anjos's avatar
André Anjos committed
292
  xyref = bob.io.base.load(F('nonsep-roc.hdf5'))
293
  assert numpy.array_equal(xy, xyref)
294

295
296
  # This example will test the ROC for FAR plot calculation functionality.
  far = [0.01, 0.1, 1]
297
  ref = [0.42, 0.12, 0]
298
  xy = roc_for_far(negatives, positives, far)
299

300
301
302
  assert numpy.array_equal(xy[0], far)
  assert numpy.array_equal(xy[1], ref)

303
304
305
306
  # This example will test the Precision-Recall plot calculation functionality.
  xy = precision_recall_curve(negatives, positives, 100)
  # uncomment the next line to save a reference value
  # save('nonsep-roc.hdf5', xy)
André Anjos's avatar
André Anjos committed
307
  xyref = bob.io.base.load(F('nonsep-precisionrecall.hdf5'))
308
  assert numpy.array_equal(xy, xyref)
309

310
311
312
  # This example will test the DET plot calculation functionality.
  det_xyzw = det(negatives, positives, 100)
  # uncomment the next line to save a reference value
313
  # save(F('nonsep-det.hdf5'), det_xyzw)
André Anjos's avatar
André Anjos committed
314
  det_xyzw_ref = bob.io.base.load(F('nonsep-det.hdf5'))
315
316
317
318
319
320
  assert numpy.allclose(det_xyzw, det_xyzw_ref, atol=1e-15)

  # This example will test the EPC plot calculation functionality. For the
  # EPC curve, you need to have a development and a test set. We will split,
  # by the middle, the negatives and positives sample we have, just for the
  # sake of testing
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
321
322
323
324
  dev_negatives = negatives[:(negatives.shape[0] // 2)]
  test_negatives = negatives[(negatives.shape[0] // 2):]
  dev_positives = positives[:(positives.shape[0] // 2)]
  test_positives = positives[(positives.shape[0] // 2):]
325
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
326
           test_negatives, test_positives, 100)
André Anjos's avatar
André Anjos committed
327
  xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
328
  assert numpy.allclose(xy, xyref[:2], atol=1e-15)
329
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
330
           test_negatives, test_positives, 100, False, True)
331
332
333
  # uncomment the next line to save a reference value
  # save('nonsep-epc.hdf5', xy)
  assert numpy.allclose(xy, xyref, atol=1e-15)
334
335
336
337


def test_rocch():

André Anjos's avatar
André Anjos committed
338
  from . import rocch, rocch2eer, eer_rocch
339
340
341
342
343

  # This example will demonstrate and check the use of eer_rocch_threshold() to
  # calculate the threshold that minimizes the EER on the ROC Convex Hull

  # This test set is separable.
André Anjos's avatar
André Anjos committed
344
345
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
346
  # References obtained using Bosaris 1.06
347
  pmiss_pfa_ref = numpy.array([[1., 0., 0.], [0., 0., 1.]])
348
349
350
351
352
  eer_ref = 0.
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
353
  assert abs(eer - eer_ref) < 1e-4
354
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
355
  assert abs(eer - eer_ref) < 1e-4
356
357

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
358
359
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
360
  # References obtained using Bosaris 1.06
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
361
362
  pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [
                              0, 0, 0.08, 0.12, 0.22, 0.48, 1.]])
363
364
365
366
367
  eer_ref = 0.116363636363636
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
368
  assert abs(eer - eer_ref) < 1e-4
369
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
370
  assert abs(eer - eer_ref) < 1e-4
371

372
373
374

def test_cmc():

André Anjos's avatar
André Anjos committed
375
  from . import recognition_rate, cmc, load
376
377

  # tests the CMC calculation
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
378
379
380
381
  # test data; should give match characteristics [1/2,1/4,1/3] and CMC
  # [1/3,2/3,1]
  test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)),
               ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))]
382
383
384
385
386
387
388
389
390
  # compute recognition rate
  rr = recognition_rate(test_data)
  nose.tools.eq_(rr, 0.5)
  # compute CMC
  cmc_ = cmc(test_data)
  assert (cmc_ == [0.5, 0.75, 1., 1., 1]).all()

  # load test data
  desired_rr = 0.76
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
391
392
  desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1.,
                 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
393
394
395
396
397
398
399
400
401
402
403
404
  data = load.cmc_four_column(F('scores-cmc-4col.txt'))
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()

  data = load.cmc_five_column(F('scores-cmc-5col.txt'))
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()

405
406
407
408
409
410
411
  data = load.cmc(F('scores-cmc-5col.txt'))
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()


412
413
def test_calibration():

André Anjos's avatar
André Anjos committed
414
  from . import calibration
415

André Anjos's avatar
André Anjos committed
416
  # Tests the cllr and min_cllr measures
417
  # This test set is separable.
André Anjos's avatar
André Anjos committed
418
419
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
420
421
422
423
424
425
426
427
428
429

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
  nose.tools.assert_almost_equal(cllr, 1.2097942129)
  # Since the test set is separable, the min_cllr needs to be zero
  nose.tools.assert_almost_equal(min_cllr, 0.)

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
430
431
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
432
433
434
435
436

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
437
438
439
440
  assert abs(cllr - 3.61833) < 1e-5, cllr
  assert abs(min_cllr - 0.33736) < 1e-5, min_cllr


441
def test_open_set_rates():
442

443
  # No error files
444
  cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set.txt"))
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
445
446
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
447
448
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
449
450
451
  assert abs(bob.measure.recognition_rate(cmc_scores) - 7. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
452

453
  # One error
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
454
455
456
457
  cmc_scores = bob.measure.load.cmc(
      F("scores-cmc-4col-open-set-one-error.txt"))
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
458
459
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
460
461
462
  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
463

464
  # Two errors
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
465
466
467
468
469
470
471
472
473
474
  cmc_scores = bob.measure.load.cmc_four_column(
      F("scores-cmc-4col-open-set-two-errors.txt"))
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
  assert abs(bob.measure.false_alarm_rate(
      cmc_scores, threshold=0.5) - 0.5) < 1e-8

  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 8.) < 1e-8