test_error.py 18.5 KB
Newer Older
André Anjos's avatar
André Anjos committed
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
4
# Wed 11 Dec 15:14:08 2013 CET
André Anjos's avatar
André Anjos committed
5
6
7
8
9
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland

"""Basic tests for the error measuring system of bob
"""
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
10
from __future__ import division
11
import os
André Anjos's avatar
André Anjos committed
12
import numpy
13
import nose.tools
André Anjos's avatar
André Anjos committed
14
import bob.io.base
15
import math
André Anjos's avatar
André Anjos committed
16

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
17

André Anjos's avatar
André Anjos committed
18
19
def F(f):
  """Returns the test file on the "data" subdirectory"""
20
  import pkg_resources
André Anjos's avatar
André Anjos committed
21
22
  return pkg_resources.resource_filename(__name__, os.path.join('data', f))

23

André Anjos's avatar
André Anjos committed
24
25
def save(fname, data):
  """Saves a single array into a file in the 'data' directory."""
26
  bob.io.base.save(data, os.path.join('bob/measure/data', fname))
27
28
29
30


def test_basic_ratios():

André Anjos's avatar
André Anjos committed
31
  from . import farfrr, precision_recall, f_score
32
33
34
35

  # We test the basic functionaly on FAR and FRR calculation. The first
  # example is separable, with a separation threshold of about 3.0

André Anjos's avatar
André Anjos committed
36
37
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
38
39
40
41
42
43

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR
  # should be 0.0. Precision should be 0.5, recall should be 1.0
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
44
  far, frr = farfrr(negatives, positives, minimum - 0.1)
45
46
  nose.tools.eq_(far, 1.0)
  nose.tools.eq_(frr, 0.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
47
  prec, recall = precision_recall(negatives, positives, minimum - 0.1)
48
49
  nose.tools.eq_(prec, 0.5)
  nose.tools.eq_(recall, 1.0)
50

51
52
  # Similarly, if we take a threshold on the maximum, the FRR should be 1.0
  # while the FAR should be 0.0. Both precision and recall should be 0.0.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
53
  far, frr = farfrr(negatives, positives, maximum + 0.1)
54
55
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 1.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
56
  prec, recall = precision_recall(negatives, positives, maximum + 0.1)
57
58
59
60
61
62
63
64
65
66
67
  nose.tools.eq_(prec, 0.0)
  nose.tools.eq_(recall, 0.0)

  # If we choose the appropriate threshold, we should get 0.0 for both FAR
  # and FRR. Precision will be 1.0, recall will be 1.0
  far, frr = farfrr(negatives, positives, 3.0)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, 3.0)
  nose.tools.eq_(prec, 1.0)
  nose.tools.eq_(recall, 1.0)
68

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
69
70
71
  # Testing the values of F-score depending on different choices of the
  # threshold
  f_score_ = f_score(negatives, positives, minimum - 0.1)
André Anjos's avatar
André Anjos committed
72
  nose.tools.assert_almost_equal(f_score_, 0.66666667)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
73
  f_score_ = f_score(negatives, positives, minimum - 0.1, 2)
André Anjos's avatar
André Anjos committed
74
  nose.tools.assert_almost_equal(f_score_, 0.83333333)
75

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
76
  f_score_ = f_score(negatives, positives, maximum + 0.1)
André Anjos's avatar
André Anjos committed
77
  nose.tools.eq_(f_score_, 0.0)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
78
  f_score_ = f_score(negatives, positives, maximum + 0.1, 2)
André Anjos's avatar
André Anjos committed
79
  nose.tools.eq_(f_score_, 0.0)
80

André Anjos's avatar
André Anjos committed
81
82
83
84
  f_score_ = f_score(negatives, positives, 3.0)
  nose.tools.eq_(f_score_, 1.0)
  f_score_ = f_score(negatives, positives, 3.0, 2)
  nose.tools.eq_(f_score_, 1.0)
85

86

87
def test_for_uncomputable_thresholds():
88
89
  # in some cases, we cannot compute an FAR or FRR threshold, e.g., when we
  # have too little data or too many equal scores in these cases, the methods
90
  # should return a threshold which a supports a lower value.
91
92
93
  from . import far_threshold, frr_threshold

  # case 1: several scores are identical
94
95
  pos = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
96
97

  # test that reasonable thresholds for reachable data points are provided
98
  threshold = far_threshold(neg, pos, 0.5)
99
  assert threshold == 1.0, threshold
100
  threshold = frr_threshold(neg, pos, 0.5)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
101
  assert numpy.isclose(threshold, 0.1), threshold
102

103
104
105
106
  threshold = far_threshold(neg, pos, 0.4)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.4)
  assert threshold >= pos[0], threshold
107

108
  # test the same with even number of scores
109
110
  pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
111

112
  threshold = far_threshold(neg, pos, 0.5)
113
  assert threshold == 1.0, threshold
114
115
116
117
118
  assert numpy.isclose(frr_threshold(neg, pos, 0.51), 0.1)
  threshold = far_threshold(neg, pos, 0.49)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.49)
  assert threshold >= pos[0], threshold
119

120
  # case 2: too few scores for the desired threshold
121
122
  pos = numpy.array(range(10), dtype=float)
  neg = numpy.array(range(10), dtype=float)
123

124
125
126
127
  threshold = far_threshold(neg, pos, 0.09)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.09)
  assert threshold >= pos[0], threshold
128
129
  # there is no limit above; the threshold will just be the largest possible
  # value
130
  threshold = far_threshold(neg, pos, 0.11)
131
  assert threshold == 9., threshold
132
  threshold = far_threshold(neg, pos, 0.91)
133
  assert threshold == 1., threshold
134
  threshold = far_threshold(neg, pos, 1)
135
  assert threshold <= 0., threshold
136
137
138
139
  threshold = frr_threshold(neg, pos, 0.11)
  assert numpy.isclose(threshold, 1.), threshold
  threshold = frr_threshold(neg, pos, 0.91)
  assert numpy.isclose(threshold, 9.), threshold
140
141


142
143
def test_indexing():

André Anjos's avatar
André Anjos committed
144
  from . import correctly_classified_positives, correctly_classified_negatives
145
146
147

  # This test verifies that the output of correctly_classified_positives() and
  # correctly_classified_negatives() makes sense.
André Anjos's avatar
André Anjos committed
148
149
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
150
151
152
153
154
155
156

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If the threshold is minimum, we should have all positive samples
  # correctly classified and none of the negative samples correctly
  # classified.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
157
158
  assert correctly_classified_positives(positives, minimum - 0.1).all()
  assert not correctly_classified_negatives(negatives, minimum - 0.1).any()
159
160

  # The inverse is true if the threshold is a bit above the maximum.
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
161
162
  assert not correctly_classified_positives(positives, maximum + 0.1).any()
  assert correctly_classified_negatives(negatives, maximum + 0.1).all()
163
164
165

  # If the threshold separates the sets, than all should be correctly
  # classified.
166
167
  assert correctly_classified_positives(positives, 3).all()
  assert correctly_classified_negatives(negatives, 3).all()
168
169


Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
170
171
172
def test_obvious_thresholds():
  from . import far_threshold, frr_threshold, farfrr
  M = 10
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
173
174
  neg = numpy.arange(M, dtype=float)
  pos = numpy.arange(M, 2 * M, dtype=float)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
175

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
176
177
178
179
  for far, frr in zip(numpy.arange(0, 2 * M + 1, dtype=float) / M / 2,
                      numpy.arange(0, 2 * M + 1, dtype=float) / M / 2):
    far, expected_far = round(far, 2), math.floor(far * 10) / 10
    frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
180
    calculated_far_threshold = far_threshold(neg, pos, far)
181
    pred_far, _ = farfrr(neg, pos, calculated_far_threshold)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
182
183

    calculated_frr_threshold = frr_threshold(neg, pos, frr)
184
185
186
187
188
    _, pred_frr = farfrr(neg, pos, calculated_frr_threshold)
    assert pred_far <= far, (pred_far, far, calculated_far_threshold)
    assert pred_far == expected_far, (pred_far, far, calculated_far_threshold)
    assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold)
    assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
189
190


191
192
def test_thresholding():

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
193
194
195
  from . import eer_threshold, far_threshold, frr_threshold, farfrr, \
      correctly_classified_positives, correctly_classified_negatives, \
      min_hter_threshold
196

André Anjos's avatar
André Anjos committed
197
198
  def count(array, value=True):
    """Counts occurrences of a certain value in an array"""
199
    return list(array == value).count(True)
André Anjos's avatar
André Anjos committed
200

201
202
203
204
  # This example will demonstrate and check the use of eer_threshold() to
  # calculate the threshold that minimizes the EER.

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
205
206
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
207
208
  threshold = eer_threshold(negatives, positives)

209
210
211
  sorted_positives = numpy.sort(positives)
  sorted_negatives = numpy.sort(negatives)

212
  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
213
214
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
215
216
217
218
  assert (ccp - ccn) <= 1

  for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
    # Lets also test the far_threshold and the frr_threshold functions
219
220
    threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
    threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
221
222
    # Check that the requested FAR and FRR values are smaller than the
    # requested ones
223
224
    far = farfrr(negatives, positives, threshold_far)[0]
    frr = farfrr(negatives, positives, threshold_frr)[1]
225
    if not math.isnan(threshold_far):
226
      assert far <= t, (far, t)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
227
      assert t - far <= 0.1
228
    if not math.isnan(threshold_frr):
229
      assert frr <= t, (frr, t)
230
      # test that the values are at least somewhere in the range
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
231
      assert t - frr <= 0.1
232
233
234
235
236

  # If the set is separable, the calculation of the threshold is a little bit
  # trickier, as you have no points in the middle of the range to compare
  # things to. This is where the currently used recursive algorithm seems to
  # do better. Let's verify
André Anjos's avatar
André Anjos committed
237
238
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
239
  threshold = eer_threshold(negatives, positives)
240
241
  # the result here is 3.2 (which is what is expect ;-)
  assert threshold == 3.2
242
243

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
244
245
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
246
247
248
249
250
  nose.tools.eq_(ccp, ccn)

  # The second option for the calculation of the threshold is to use the
  # minimum HTER.
  threshold2 = min_hter_threshold(negatives, positives)
251
  assert threshold2 == 3.2
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
252
  nose.tools.eq_(threshold, threshold2)  # in this particular case
253
254

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
255
256
  ccp = count(correctly_classified_positives(positives, threshold2))
  ccn = count(correctly_classified_negatives(negatives, threshold2))
257
258
259
  nose.tools.eq_(ccp, ccn)


260
261
def test_empty_raises():
  # tests that
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
262
263
  from bob.measure import farfrr, precision_recall, f_score, eer_threshold, \
      min_hter_threshold, min_weighted_error_rate_threshold
264

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
265
266
267
  for func in (
          farfrr, precision_recall,
          f_score, min_weighted_error_rate_threshold):
268
269
270
271
272
273
274
275
276
277
    nose.tools.assert_raises(RuntimeError, func, [], [1.], 0)
    nose.tools.assert_raises(RuntimeError, func, [1.], [], 0)
    nose.tools.assert_raises(RuntimeError, func, [], [], 0)

  for func in (eer_threshold, min_hter_threshold):
    nose.tools.assert_raises(RuntimeError, func, [], [1.])
    nose.tools.assert_raises(RuntimeError, func, [1.], [])
    nose.tools.assert_raises(RuntimeError, func, [], [])


278
279
def test_plots():

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
280
281
  from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, \
      epc
282
283

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
284
285
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
286
287
288
289
290
  threshold = eer_threshold(negatives, positives)

  # This example will test the ROC plot calculation functionality.
  xy = roc(negatives, positives, 100)
  # uncomment the next line to save a reference value
291
  # save(F('nonsep-roc.hdf5'), xy)
André Anjos's avatar
André Anjos committed
292
  xyref = bob.io.base.load(F('nonsep-roc.hdf5'))
293
  assert numpy.array_equal(xy, xyref)
294

295
  # This example will test the ROC for FAR plot calculation functionality.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
296
297
298
299
  requested_far = [0.01, 0.1, 1]
  expected_far = [0.0, 0.1, 1]
  expected_frr = [0.48, 0.12, 0]
  xy = roc_for_far(negatives, positives, requested_far)
300

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
301
302
  assert numpy.array_equal(xy[0], expected_far), xy[0]
  assert numpy.array_equal(xy[1], expected_frr), xy[1]
303

304
305
306
  # This example will test the Precision-Recall plot calculation functionality.
  xy = precision_recall_curve(negatives, positives, 100)
  # uncomment the next line to save a reference value
307
  # save('nonsep-precisionrecall.hdf5', xy)
André Anjos's avatar
André Anjos committed
308
  xyref = bob.io.base.load(F('nonsep-precisionrecall.hdf5'))
309
  assert numpy.array_equal(xy, xyref)
310

311
312
313
  # This example will test the DET plot calculation functionality.
  det_xyzw = det(negatives, positives, 100)
  # uncomment the next line to save a reference value
314
  # save(F('nonsep-det.hdf5'), det_xyzw)
André Anjos's avatar
André Anjos committed
315
  det_xyzw_ref = bob.io.base.load(F('nonsep-det.hdf5'))
316
317
318
319
320
321
  assert numpy.allclose(det_xyzw, det_xyzw_ref, atol=1e-15)

  # This example will test the EPC plot calculation functionality. For the
  # EPC curve, you need to have a development and a test set. We will split,
  # by the middle, the negatives and positives sample we have, just for the
  # sake of testing
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
322
323
324
325
  dev_negatives = negatives[:(negatives.shape[0] // 2)]
  test_negatives = negatives[(negatives.shape[0] // 2):]
  dev_positives = positives[:(positives.shape[0] // 2)]
  test_positives = positives[(positives.shape[0] // 2):]
326
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
327
           test_negatives, test_positives, 100)
André Anjos's avatar
André Anjos committed
328
  xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
329
  assert numpy.allclose(xy, xyref[:2], atol=1e-15)
330
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
331
           test_negatives, test_positives, 100, False, True)
332
333
334
  # uncomment the next line to save a reference value
  # save('nonsep-epc.hdf5', xy)
  assert numpy.allclose(xy, xyref, atol=1e-15)
335
336
337
338


def test_rocch():

André Anjos's avatar
André Anjos committed
339
  from . import rocch, rocch2eer, eer_rocch
340
341
342
343
344

  # This example will demonstrate and check the use of eer_rocch_threshold() to
  # calculate the threshold that minimizes the EER on the ROC Convex Hull

  # This test set is separable.
André Anjos's avatar
André Anjos committed
345
346
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
347
  # References obtained using Bosaris 1.06
348
  pmiss_pfa_ref = numpy.array([[1., 0., 0.], [0., 0., 1.]])
349
350
351
352
353
  eer_ref = 0.
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
354
  assert abs(eer - eer_ref) < 1e-4
355
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
356
  assert abs(eer - eer_ref) < 1e-4
357
358

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
359
360
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
361
  # References obtained using Bosaris 1.06
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
362
363
  pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [
                              0, 0, 0.08, 0.12, 0.22, 0.48, 1.]])
364
365
366
367
368
  eer_ref = 0.116363636363636
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
369
  assert abs(eer - eer_ref) < 1e-4
370
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
371
  assert abs(eer - eer_ref) < 1e-4
372

373
374
375

def test_cmc():

376
  from . import recognition_rate, cmc
377
378

  # tests the CMC calculation
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
379
380
381
382
  # test data; should give match characteristics [1/2,1/4,1/3] and CMC
  # [1/3,2/3,1]
  test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)),
               ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))]
383
384
385
386
387
388
389
390
391
  # compute recognition rate
  rr = recognition_rate(test_data)
  nose.tools.eq_(rr, 0.5)
  # compute CMC
  cmc_ = cmc(test_data)
  assert (cmc_ == [0.5, 0.75, 1., 1., 1]).all()

  # load test data
  desired_rr = 0.76
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
392
393
  desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1.,
                 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
394
395
396
  f = bob.io.base.HDF5File(F('test-cmc.hdf5'))
  data = list(zip(f.read('data-neg'), f.read('data-pos')))
  del f
397
398
399
400
401
402
403
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()

def test_calibration():

André Anjos's avatar
André Anjos committed
404
  from . import calibration
405

André Anjos's avatar
André Anjos committed
406
  # Tests the cllr and min_cllr measures
407
  # This test set is separable.
André Anjos's avatar
André Anjos committed
408
409
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
410
411
412
413
414
415
416
417
418
419

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
  nose.tools.assert_almost_equal(cllr, 1.2097942129)
  # Since the test set is separable, the min_cllr needs to be zero
  nose.tools.assert_almost_equal(min_cllr, 0.)

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
420
421
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
422
423
424
425
426

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
427
428
429
430
  assert abs(cllr - 3.61833) < 1e-5, cllr
  assert abs(min_cllr - 0.33736) < 1e-5, min_cllr


431
def test_open_set_rates():
432
  # No error files
433
434
435
436
437
438
439
440
441
442
443
  f = bob.io.base.HDF5File(F('test0-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
444
445
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
446
447
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
448
449
450
  assert abs(bob.measure.recognition_rate(cmc_scores) - 7. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
451

452
  # One error
453
454
455
456
457
458
459
460
461
462
463
  f = bob.io.base.HDF5File(F('test1-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
464
465
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
466
467
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
468
469
470
  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
471

472
  # Two errors
473
474
475
476
477
478
479
480
481
482
483
  f = bob.io.base.HDF5File(F('test2-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
lint    
Amir MOHAMMADI committed
484
485
486
487
488
489
490
491
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
  assert abs(bob.measure.false_alarm_rate(
      cmc_scores, threshold=0.5) - 0.5) < 1e-8

  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 8.) < 1e-8
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506


def test_mindcf():
  """ Test outlier scores in negative set
  """
  from bob.measure import min_weighted_error_rate_threshold, farfrr
  cost = 0.99
  negatives = [-3, -2, -1, -0.5, 4]
  positives = [0.5, 3]
  th = min_weighted_error_rate_threshold(negatives, positives, cost, True)
  far, frr = farfrr(negatives, positives, th)
  mindcf = (cost * far + (1-cost)*frr)*100
  assert mindcf <= 1.0