From 6aadf2d10e41a8ca0300f755970ced69eed12336 Mon Sep 17 00:00:00 2001 From: Manuel Gunther Date: Thu, 30 Mar 2017 13:16:40 -0600 Subject: [PATCH] Fixed FAR and FRR threshold computation and return NaN when threshold cannot be computed --- bob/measure/cpp/error.cpp | 40 +++++++++++++++++---------------------- bob/measure/test_error.py | 25 ++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/bob/measure/cpp/error.cpp b/bob/measure/cpp/error.cpp index f25485a..909204f 100644 --- a/bob/measure/cpp/error.cpp +++ b/bob/measure/cpp/error.cpp @@ -120,26 +120,23 @@ double bob::measure::farThreshold(const blitz::Array &negatives, // compute position of the threshold double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR) - double crr_index = crr * neg.extent(0); + double crr_index = crr * neg.extent(0) - 1.; // compute the index above the current CRR value - int index = std::min((int)std::floor(crr_index), neg.extent(0) - 1); + int index = std::min((int)std::ceil(crr_index), neg.extent(0)-1); - // correct index if we have multiple score values at the requested position - while (index && neg(index) == neg(index - 1)) + // increase the threshold when we have several negatives with the same score + while (index < neg.extent(0)-1 && neg(index) == neg(index+1)) ++index; --index; // we compute a correction term to assure that we are in the middle of two - // cases - double correction; - if (index) { + if (index < neg.extent(0)-1){ // assure that we are in the middle of two cases - correction = 0.5 * (neg(index) - neg(index - 1)); + double correction = 0.5 * (neg(index+1) - neg(index)); + return neg(index) + correction; } else { - // add an overall correction term - correction = 0.5 * (neg(neg.extent(0) - 1) - neg(0)) / neg.extent(0); + // We cannot reach the desired threshold, as we have too many identical lowest scores, or the number of scores is too low + return std::numeric_limits::quiet_NaN(); } - - return neg(index) - correction; } double bob::measure::frrThreshold(const blitz::Array &, @@ -163,26 +160,23 @@ double bob::measure::frrThreshold(const blitz::Array &, sort(positives, pos, is_sorted); // compute position of the threshold - double frr_index = frr_value * pos.extent(0); + double frr_index = frr_value * pos.extent(0) - 1.; // compute the index above the current CAR value int index = std::min((int)std::ceil(frr_index), pos.extent(0) - 1); - // correct index if we have multiple score values at the requested position - while (index < pos.extent(0) - 1 && pos(index) == pos(index + 1)) + // lower the threshold when several positives have the same score + while (index && pos(index) == pos(index-1)) --index; ++index; // we compute a correction term to assure that we are in the middle of two - // cases - double correction; - if (index < pos.extent(0) - 1) { + if (index){ // assure that we are in the middle of two cases - correction = 0.5 * (pos(index + 1) - pos(index)); + double correction = 0.5 * (pos(index) - pos(index-1)); + return pos(index) - correction; } else { - // add an overall correction term - correction = 0.5 * (pos(pos.extent(0) - 1) - pos(0)) / pos.extent(0); + // We cannot reach the desired threshold, as we have too many identical highest scores + return std::numeric_limits::quiet_NaN(); } - - return pos(index) + correction; } /** diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py index 14e2f94..ad2bf8d 100644 --- a/bob/measure/test_error.py +++ b/bob/measure/test_error.py @@ -12,6 +12,7 @@ import os import numpy import nose.tools import bob.io.base +import math def F(f): @@ -83,6 +84,30 @@ def test_basic_ratios(): nose.tools.eq_(f_score_, 1.0) +def test_nan_for_uncomputable_thresholds(): + # in some cases, we cannot compute an FAR or FRR threshold, e.g., when we have too little data or too many equal scores + # in these cases, the methods should return NaN + from . import far_threshold, frr_threshold + + # case 1: several scores are identical + positives = [0., 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5] + negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1., 1., 1., 1.] + + # test that reasonable thresholds for reachable data points are provided + assert far_threshold(negatives, positives, 0.5) == 0.95, far_threshold(negatives, positives, 0.5) + assert frr_threshold(negatives, positives, 0.5) == 0.05, frr_threshold(negatives, positives, 0.5) + + assert math.isnan(far_threshold(negatives, positives, 0.4)) + assert math.isnan(frr_threshold(negatives, positives, 0.4)) + + # case 2: too few scores for the desired threshold + positives = numpy.arange(10.) + negatives = numpy.arange(10.) + + assert math.isnan(far_threshold(negatives, positives, 0.09)) + assert math.isnan(frr_threshold(negatives, positives, 0.09)) + + def test_indexing(): from . import correctly_classified_positives, correctly_classified_negatives -- 2.21.0