test_error.py 18.5 KB
Newer Older
André Anjos's avatar
André Anjos committed
1 2 3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
4
# Wed 11 Dec 15:14:08 2013 CET
André Anjos's avatar
André Anjos committed
5 6 7 8 9
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland

"""Basic tests for the error measuring system of bob
"""
10
from __future__ import division
11
import os
André Anjos's avatar
André Anjos committed
12
import numpy
13
import nose.tools
André Anjos's avatar
André Anjos committed
14
import bob.io.base
15
import math
André Anjos's avatar
André Anjos committed
16

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
17

André Anjos's avatar
André Anjos committed
18 19
def F(f):
  """Returns the test file on the "data" subdirectory"""
20
  import pkg_resources
André Anjos's avatar
André Anjos committed
21 22
  return pkg_resources.resource_filename(__name__, os.path.join('data', f))

23

André Anjos's avatar
André Anjos committed
24 25
def save(fname, data):
  """Saves a single array into a file in the 'data' directory."""
26
  bob.io.base.save(data, os.path.join('bob/measure/data', fname))
27 28 29 30


def test_basic_ratios():

31
  from . import farfrr, precision_recall, f_score
32 33 34 35

  # We test the basic functionaly on FAR and FRR calculation. The first
  # example is separable, with a separation threshold of about 3.0

André Anjos's avatar
André Anjos committed
36 37
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
38 39 40 41 42 43

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR
  # should be 0.0. Precision should be 0.5, recall should be 1.0
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
44
  far, frr = farfrr(negatives, positives, minimum - 0.1)
45 46
  nose.tools.eq_(far, 1.0)
  nose.tools.eq_(frr, 0.0)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
47
  prec, recall = precision_recall(negatives, positives, minimum - 0.1)
48 49
  nose.tools.eq_(prec, 0.5)
  nose.tools.eq_(recall, 1.0)
50

51 52
  # Similarly, if we take a threshold on the maximum, the FRR should be 1.0
  # while the FAR should be 0.0. Both precision and recall should be 0.0.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
53
  far, frr = farfrr(negatives, positives, maximum + 0.1)
54 55
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 1.0)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
56
  prec, recall = precision_recall(negatives, positives, maximum + 0.1)
57 58 59 60 61 62 63 64 65 66 67
  nose.tools.eq_(prec, 0.0)
  nose.tools.eq_(recall, 0.0)

  # If we choose the appropriate threshold, we should get 0.0 for both FAR
  # and FRR. Precision will be 1.0, recall will be 1.0
  far, frr = farfrr(negatives, positives, 3.0)
  nose.tools.eq_(far, 0.0)
  nose.tools.eq_(frr, 0.0)
  prec, recall = precision_recall(negatives, positives, 3.0)
  nose.tools.eq_(prec, 1.0)
  nose.tools.eq_(recall, 1.0)
68

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
69 70 71
  # Testing the values of F-score depending on different choices of the
  # threshold
  f_score_ = f_score(negatives, positives, minimum - 0.1)
André Anjos's avatar
André Anjos committed
72
  nose.tools.assert_almost_equal(f_score_, 0.66666667)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
73
  f_score_ = f_score(negatives, positives, minimum - 0.1, 2)
André Anjos's avatar
André Anjos committed
74
  nose.tools.assert_almost_equal(f_score_, 0.83333333)
75

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
76
  f_score_ = f_score(negatives, positives, maximum + 0.1)
André Anjos's avatar
André Anjos committed
77
  nose.tools.eq_(f_score_, 0.0)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
78
  f_score_ = f_score(negatives, positives, maximum + 0.1, 2)
André Anjos's avatar
André Anjos committed
79
  nose.tools.eq_(f_score_, 0.0)
80

André Anjos's avatar
André Anjos committed
81 82 83 84
  f_score_ = f_score(negatives, positives, 3.0)
  nose.tools.eq_(f_score_, 1.0)
  f_score_ = f_score(negatives, positives, 3.0, 2)
  nose.tools.eq_(f_score_, 1.0)
85

86

87
def test_for_uncomputable_thresholds():
88 89
  # in some cases, we cannot compute an FAR or FRR threshold, e.g., when we
  # have too little data or too many equal scores in these cases, the methods
90
  # should return a threshold which a supports a lower value.
91 92 93
  from . import far_threshold, frr_threshold

  # case 1: several scores are identical
94 95
  pos = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
96 97

  # test that reasonable thresholds for reachable data points are provided
98
  threshold = far_threshold(neg, pos, 0.5)
99
  assert threshold == 1.0, threshold
100
  threshold = frr_threshold(neg, pos, 0.5)
101
  assert numpy.isclose(threshold, 0.1), threshold
102

103 104 105 106
  threshold = far_threshold(neg, pos, 0.4)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.4)
  assert threshold >= pos[0], threshold
107

108
  # test the same with even number of scores
109 110
  pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
  neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
111

112
  threshold = far_threshold(neg, pos, 0.5)
113
  assert threshold == 1.0, threshold
114 115 116 117 118
  assert numpy.isclose(frr_threshold(neg, pos, 0.51), 0.1)
  threshold = far_threshold(neg, pos, 0.49)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.49)
  assert threshold >= pos[0], threshold
119

120
  # case 2: too few scores for the desired threshold
121 122
  pos = numpy.array(range(10), dtype=float)
  neg = numpy.array(range(10), dtype=float)
123

124 125 126 127
  threshold = far_threshold(neg, pos, 0.09)
  assert threshold > neg[-1], threshold
  threshold = frr_threshold(neg, pos, 0.09)
  assert threshold >= pos[0], threshold
128 129
  # there is no limit above; the threshold will just be the largest possible
  # value
130
  threshold = far_threshold(neg, pos, 0.11)
131
  assert threshold == 9., threshold
132
  threshold = far_threshold(neg, pos, 0.91)
133
  assert threshold == 1., threshold
134
  threshold = far_threshold(neg, pos, 1)
135
  assert threshold <= 0., threshold
136 137 138 139
  threshold = frr_threshold(neg, pos, 0.11)
  assert numpy.isclose(threshold, 1.), threshold
  threshold = frr_threshold(neg, pos, 0.91)
  assert numpy.isclose(threshold, 9.), threshold
140 141


142 143
def test_indexing():

144
  from . import correctly_classified_positives, correctly_classified_negatives
145 146 147

  # This test verifies that the output of correctly_classified_positives() and
  # correctly_classified_negatives() makes sense.
André Anjos's avatar
André Anjos committed
148 149
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
150 151 152 153 154 155 156

  minimum = min(positives.min(), negatives.min())
  maximum = max(positives.max(), negatives.max())

  # If the threshold is minimum, we should have all positive samples
  # correctly classified and none of the negative samples correctly
  # classified.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
157 158
  assert correctly_classified_positives(positives, minimum - 0.1).all()
  assert not correctly_classified_negatives(negatives, minimum - 0.1).any()
159 160

  # The inverse is true if the threshold is a bit above the maximum.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
161 162
  assert not correctly_classified_positives(positives, maximum + 0.1).any()
  assert correctly_classified_negatives(negatives, maximum + 0.1).all()
163 164 165

  # If the threshold separates the sets, than all should be correctly
  # classified.
166 167
  assert correctly_classified_positives(positives, 3).all()
  assert correctly_classified_negatives(negatives, 3).all()
168 169


170 171 172
def test_obvious_thresholds():
  from . import far_threshold, frr_threshold, farfrr
  M = 10
173 174
  neg = numpy.arange(M, dtype=float)
  pos = numpy.arange(M, 2 * M, dtype=float)
175

176 177 178 179
  for far, frr in zip(numpy.arange(0, 2 * M + 1, dtype=float) / M / 2,
                      numpy.arange(0, 2 * M + 1, dtype=float) / M / 2):
    far, expected_far = round(far, 2), math.floor(far * 10) / 10
    frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10
180
    calculated_far_threshold = far_threshold(neg, pos, far)
181
    pred_far, _ = farfrr(neg, pos, calculated_far_threshold)
182 183

    calculated_frr_threshold = frr_threshold(neg, pos, frr)
184 185 186 187 188
    _, pred_frr = farfrr(neg, pos, calculated_frr_threshold)
    assert pred_far <= far, (pred_far, far, calculated_far_threshold)
    assert pred_far == expected_far, (pred_far, far, calculated_far_threshold)
    assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold)
    assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
189 190


191 192
def test_thresholding():

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
193 194 195
  from . import eer_threshold, far_threshold, frr_threshold, farfrr, \
      correctly_classified_positives, correctly_classified_negatives, \
      min_hter_threshold
196

André Anjos's avatar
André Anjos committed
197 198
  def count(array, value=True):
    """Counts occurrences of a certain value in an array"""
199
    return list(array == value).count(True)
André Anjos's avatar
André Anjos committed
200

201 202 203 204
  # This example will demonstrate and check the use of eer_threshold() to
  # calculate the threshold that minimizes the EER.

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
205 206
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
207 208
  threshold = eer_threshold(negatives, positives)

209 210 211
  sorted_positives = numpy.sort(positives)
  sorted_negatives = numpy.sort(negatives)

212
  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
213 214
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
215 216 217 218
  assert (ccp - ccn) <= 1

  for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1):
    # Lets also test the far_threshold and the frr_threshold functions
219 220
    threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True)
    threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
221 222
    # Check that the requested FAR and FRR values are smaller than the
    # requested ones
223 224
    far = farfrr(negatives, positives, threshold_far)[0]
    frr = farfrr(negatives, positives, threshold_frr)[1]
225
    if not math.isnan(threshold_far):
226
      assert far <= t, (far, t)
227
      assert t - far <= 0.1
228
    if not math.isnan(threshold_frr):
229
      assert frr <= t, (frr, t)
230
      # test that the values are at least somewhere in the range
231
      assert t - frr <= 0.1
232 233 234 235 236

  # If the set is separable, the calculation of the threshold is a little bit
  # trickier, as you have no points in the middle of the range to compare
  # things to. This is where the currently used recursive algorithm seems to
  # do better. Let's verify
André Anjos's avatar
André Anjos committed
237 238
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
239
  threshold = eer_threshold(negatives, positives)
240 241
  # the result here is 3.2 (which is what is expect ;-)
  assert threshold == 3.2
242 243

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
244 245
  ccp = count(correctly_classified_positives(positives, threshold))
  ccn = count(correctly_classified_negatives(negatives, threshold))
246 247 248 249 250
  nose.tools.eq_(ccp, ccn)

  # The second option for the calculation of the threshold is to use the
  # minimum HTER.
  threshold2 = min_hter_threshold(negatives, positives)
251
  assert threshold2 == 3.2
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
252
  nose.tools.eq_(threshold, threshold2)  # in this particular case
253 254

  # Of course we have to make sure that will set the EER correctly:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
255 256
  ccp = count(correctly_classified_positives(positives, threshold2))
  ccn = count(correctly_classified_negatives(negatives, threshold2))
257 258 259
  nose.tools.eq_(ccp, ccn)


260 261
def test_empty_raises():
  # tests that
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
262 263
  from bob.measure import farfrr, precision_recall, f_score, eer_threshold, \
      min_hter_threshold, min_weighted_error_rate_threshold
264

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
265 266 267
  for func in (
          farfrr, precision_recall,
          f_score, min_weighted_error_rate_threshold):
268 269 270 271 272 273 274 275 276 277
    nose.tools.assert_raises(RuntimeError, func, [], [1.], 0)
    nose.tools.assert_raises(RuntimeError, func, [1.], [], 0)
    nose.tools.assert_raises(RuntimeError, func, [], [], 0)

  for func in (eer_threshold, min_hter_threshold):
    nose.tools.assert_raises(RuntimeError, func, [], [1.])
    nose.tools.assert_raises(RuntimeError, func, [1.], [])
    nose.tools.assert_raises(RuntimeError, func, [], [])


278 279
def test_plots():

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
280 281
  from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, \
      epc
282 283

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
284 285
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
286 287 288 289 290
  threshold = eer_threshold(negatives, positives)

  # This example will test the ROC plot calculation functionality.
  xy = roc(negatives, positives, 100)
  # uncomment the next line to save a reference value
291
  # save(F('nonsep-roc.hdf5'), xy)
André Anjos's avatar
André Anjos committed
292
  xyref = bob.io.base.load(F('nonsep-roc.hdf5'))
293
  assert numpy.array_equal(xy, xyref)
294

295
  # This example will test the ROC for FAR plot calculation functionality.
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
296 297 298 299
  requested_far = [0.01, 0.1, 1]
  expected_far = [0.0, 0.1, 1]
  expected_frr = [0.48, 0.12, 0]
  xy = roc_for_far(negatives, positives, requested_far)
300

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
301 302
  assert numpy.array_equal(xy[0], expected_far), xy[0]
  assert numpy.array_equal(xy[1], expected_frr), xy[1]
303

304 305 306
  # This example will test the Precision-Recall plot calculation functionality.
  xy = precision_recall_curve(negatives, positives, 100)
  # uncomment the next line to save a reference value
307
  # save('nonsep-precisionrecall.hdf5', xy)
André Anjos's avatar
André Anjos committed
308
  xyref = bob.io.base.load(F('nonsep-precisionrecall.hdf5'))
309
  assert numpy.array_equal(xy, xyref)
310

311 312 313
  # This example will test the DET plot calculation functionality.
  det_xyzw = det(negatives, positives, 100)
  # uncomment the next line to save a reference value
314
  # save(F('nonsep-det.hdf5'), det_xyzw)
André Anjos's avatar
André Anjos committed
315
  det_xyzw_ref = bob.io.base.load(F('nonsep-det.hdf5'))
316 317 318 319 320 321
  assert numpy.allclose(det_xyzw, det_xyzw_ref, atol=1e-15)

  # This example will test the EPC plot calculation functionality. For the
  # EPC curve, you need to have a development and a test set. We will split,
  # by the middle, the negatives and positives sample we have, just for the
  # sake of testing
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
322 323 324 325
  dev_negatives = negatives[:(negatives.shape[0] // 2)]
  test_negatives = negatives[(negatives.shape[0] // 2):]
  dev_positives = positives[:(positives.shape[0] // 2)]
  test_positives = positives[(positives.shape[0] // 2):]
326
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
327
           test_negatives, test_positives, 100)
André Anjos's avatar
André Anjos committed
328
  xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
329
  assert numpy.allclose(xy, xyref[:2], atol=1e-15)
330
  xy = epc(dev_negatives, dev_positives,
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
331
           test_negatives, test_positives, 100, False, True)
332 333 334
  # uncomment the next line to save a reference value
  # save('nonsep-epc.hdf5', xy)
  assert numpy.allclose(xy, xyref, atol=1e-15)
335 336 337 338


def test_rocch():

339
  from . import rocch, rocch2eer, eer_rocch
340 341 342 343 344

  # This example will demonstrate and check the use of eer_rocch_threshold() to
  # calculate the threshold that minimizes the EER on the ROC Convex Hull

  # This test set is separable.
André Anjos's avatar
André Anjos committed
345 346
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
347
  # References obtained using Bosaris 1.06
348
  pmiss_pfa_ref = numpy.array([[1., 0., 0.], [0., 0., 1.]])
349 350 351 352 353
  eer_ref = 0.
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
354
  assert abs(eer - eer_ref) < 1e-4
355
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
356
  assert abs(eer - eer_ref) < 1e-4
357 358

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
359 360
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
361
  # References obtained using Bosaris 1.06
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
362 363
  pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [
                              0, 0, 0.08, 0.12, 0.22, 0.48, 1.]])
364 365 366 367 368
  eer_ref = 0.116363636363636
  # Computes
  pmiss_pfa = rocch(negatives, positives)
  assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15)
  eer = rocch2eer(pmiss_pfa)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
369
  assert abs(eer - eer_ref) < 1e-4
370
  eer = eer_rocch(negatives, positives)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
371
  assert abs(eer - eer_ref) < 1e-4
372

373 374 375

def test_cmc():

376
  from . import recognition_rate, cmc
377 378

  # tests the CMC calculation
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
379 380 381 382
  # test data; should give match characteristics [1/2,1/4,1/3] and CMC
  # [1/3,2/3,1]
  test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)),
               ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))]
383 384 385 386 387 388 389 390 391
  # compute recognition rate
  rr = recognition_rate(test_data)
  nose.tools.eq_(rr, 0.5)
  # compute CMC
  cmc_ = cmc(test_data)
  assert (cmc_ == [0.5, 0.75, 1., 1., 1]).all()

  # load test data
  desired_rr = 0.76
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
392 393
  desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1.,
                 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]
394 395 396
  f = bob.io.base.HDF5File(F('test-cmc.hdf5'))
  data = list(zip(f.read('data-neg'), f.read('data-pos')))
  del f
397 398 399 400 401 402 403
  rr = recognition_rate(data)
  nose.tools.eq_(rr, desired_rr)
  cmc_ = cmc(data)
  assert (cmc_ == desired_cmc).all()

def test_calibration():

404
  from . import calibration
405

406
  # Tests the cllr and min_cllr measures
407
  # This test set is separable.
André Anjos's avatar
André Anjos committed
408 409
  positives = bob.io.base.load(F('linsep-positives.hdf5'))
  negatives = bob.io.base.load(F('linsep-negatives.hdf5'))
410 411 412 413 414 415 416 417 418 419

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
  nose.tools.assert_almost_equal(cllr, 1.2097942129)
  # Since the test set is separable, the min_cllr needs to be zero
  nose.tools.assert_almost_equal(min_cllr, 0.)

  # This test set is not separable.
André Anjos's avatar
André Anjos committed
420 421
  positives = bob.io.base.load(F('nonsep-positives.hdf5'))
  negatives = bob.io.base.load(F('nonsep-negatives.hdf5'))
422 423 424 425 426

  cllr = calibration.cllr(negatives, positives)
  min_cllr = calibration.min_cllr(negatives, positives)

  assert min_cllr <= cllr
427 428 429 430
  assert abs(cllr - 3.61833) < 1e-5, cllr
  assert abs(min_cllr - 0.33736) < 1e-5, min_cllr


431
def test_open_set_rates():
432
  # No error files
433 434 435 436 437 438 439 440 441 442 443
  f = bob.io.base.HDF5File(F('test0-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
444 445
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
446 447
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
448 449 450
  assert abs(bob.measure.recognition_rate(cmc_scores) - 7. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 1.0) < 1e-8
451

452
  # One error
453 454 455 456 457 458 459 460 461 462 463
  f = bob.io.base.HDF5File(F('test1-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
464 465
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
466 467
  assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
468 469 470
  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
471

472
  # Two errors
473 474 475 476 477 478 479 480 481 482 483
  f = bob.io.base.HDF5File(F('test2-open-set.hdf5'))
  negative = []
  positive = []
  for key in f.keys():
    which = negative if 'neg' in key else positive
    val = f.read(key)
    if str(val) == 'None':
      val = None
    which.append(val)
  del f
  cmc_scores = list(zip(negative, positive))
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
484 485 486 487 488 489 490 491
  assert abs(bob.measure.detection_identification_rate(
      cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8
  assert abs(bob.measure.false_alarm_rate(
      cmc_scores, threshold=0.5) - 0.5) < 1e-8

  assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8
  assert abs(bob.measure.recognition_rate(
      cmc_scores, threshold=0.5) - 6. / 8.) < 1e-8
492 493 494


def test_mindcf():
495
  # Test outlier scores in negative set
496 497 498 499 500 501 502
  from bob.measure import min_weighted_error_rate_threshold, farfrr
  cost = 0.99
  negatives = [-3, -2, -1, -0.5, 4]
  positives = [0.5, 3]
  th = min_weighted_error_rate_threshold(negatives, positives, cost, True)
  far, frr = farfrr(negatives, positives, th)
  mindcf = (cost * far + (1-cost)*frr)*100
Saeed SARFJOO's avatar
Saeed SARFJOO committed
503
  assert mindcf< 1.0 + 1e-8
504 505