Commit 5ace8e26 authored by André Anjos's avatar André Anjos 💬

Merge branch...

Merge branch '27-far-and-frr-thresholds-are-computed-even-when-there-is-no-data-support' into 'master'

Resolve "FAR and FRR thresholds are computed even when there is no data support"

Closes #27

See merge request !39
parents db7447e9 a92c5e3e
Pipeline #12131 passed with stages
in 14 minutes and 8 seconds
......@@ -16,6 +16,7 @@
#include <bob.core/array_sort.h>
#include <bob.core/assert.h>
#include <bob.core/cast.h>
#include <bob.core/logging.h>
#include <bob.math/linsolve.h>
#include <bob.math/pavx.h>
......@@ -37,6 +38,10 @@ std::pair<double, double>
bob::measure::farfrr(const blitz::Array<double, 1> &negatives,
const blitz::Array<double, 1> &positives,
double threshold) {
if (std::isnan(threshold)){
bob::core::error << "Cannot compute FAR or FRR with threshold NaN";
return std::make_pair(1.,1.);
}
if (!negatives.size())
throw std::runtime_error("Cannot compute FAR when no negatives are given");
if (!positives.size())
......@@ -120,26 +125,21 @@ double bob::measure::farThreshold(const blitz::Array<double, 1> &negatives,
// compute position of the threshold
double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR)
double crr_index = crr * neg.extent(0);
double crr_index = crr * neg.extent(0) - 1.;
// compute the index above the current CRR value
int index = std::min((int)std::floor(crr_index), neg.extent(0) - 1);
int index = (int)std::ceil(crr_index);
// correct index if we have multiple score values at the requested position
while (index && neg(index) == neg(index - 1))
--index;
// increase the threshold when we have several negatives with the same score
while (index < neg.extent(0)-1 && neg(index) == neg(index+1))
++index;
// we compute a correction term to assure that we are in the middle of two
// cases
double correction;
if (index) {
// assure that we are in the middle of two cases
correction = 0.5 * (neg(index) - neg(index - 1));
if (index < neg.extent(0)-1){
// return the threshold that is just above the desired FAR
return neg(index);
} else {
// add an overall correction term
correction = 0.5 * (neg(neg.extent(0) - 1) - neg(0)) / neg.extent(0);
// We cannot reach the desired threshold, as we have too many identical lowest scores, or the number of scores is too low
return std::numeric_limits<double>::quiet_NaN();
}
return neg(index) - correction;
}
double bob::measure::frrThreshold(const blitz::Array<double, 1> &,
......@@ -163,26 +163,22 @@ double bob::measure::frrThreshold(const blitz::Array<double, 1> &,
sort(positives, pos, is_sorted);
// compute position of the threshold
double frr_index = frr_value * pos.extent(0);
// compute the index above the current CAR value
int index = std::min((int)std::ceil(frr_index), pos.extent(0) - 1);
double frr_index = frr_value * pos.extent(0) - 1.;
// compute the index below the current FAR value
int index = (int)std::ceil(frr_index);
// correct index if we have multiple score values at the requested position
while (index < pos.extent(0) - 1 && pos(index) == pos(index + 1))
++index;
// lower the threshold when several positives have the same score
while (index && pos(index) == pos(index-1))
--index;
// we compute a correction term to assure that we are in the middle of two
// cases
double correction;
if (index < pos.extent(0) - 1) {
// assure that we are in the middle of two cases
correction = 0.5 * (pos(index + 1) - pos(index));
if (index){
// return the FRR threshold that is just above the desired FRR
// We have to add a little noise to since the FRR calculation excludes the threshold
return pos(index) + 1e-8 * pos(index);
} else {
// add an overall correction term
correction = 0.5 * (pos(pos.extent(0) - 1) - pos(0)) / pos.extent(0);
// We cannot reach the desired threshold, as we have too many identical highest scores
return std::numeric_limits<double>::quiet_NaN();
}
return pos(index) + correction;
}
/**
......
......@@ -713,7 +713,11 @@ static PyObject *precision_recall_curve(PyObject *, PyObject *args,
static auto far_threshold_doc =
bob::extension::FunctionDoc(
"far_threshold", "Computes the threshold such that the real FAR is "
"**at least** the requested ``far_value``",
"**at least** the requested ``far_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"highest scores exist, so that the desired ``far_value`` cannot be "
"reached by any threshold.\n\n"
".. note::\n\n"
" The scores will be sorted internally, requiring the scores to be "
"copied.\n"
......@@ -769,7 +773,11 @@ static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) {
static auto frr_threshold_doc =
bob::extension::FunctionDoc(
"frr_threshold", "Computes the threshold such that the real FRR is "
"**at least** the requested ``frr_value``",
"**at least** the requested ``frr_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"lowest scores exist, so that the desired ``frr_value`` cannot be "
"reached by any threshold.\n\n"
".. note::\n\n"
" The scores will be sorted internally, requiring the scores to be "
"copied.\n"
......
......@@ -537,6 +537,7 @@ def detection_identification_curve(cmc_scores, far_values=log_values(), rank=1,
"""
import numpy
import math
from matplotlib import pyplot
from . import far_threshold, detection_identification_rate
......@@ -553,8 +554,8 @@ def detection_identification_curve(cmc_scores, far_values=log_values(), rank=1,
# compute detection and identification rate based on the thresholds for
# the given rank
rates = [
100. * detection_identification_rate(cmc_scores, t, rank) for t in thresholds]
rates = [detection_identification_rate(cmc_scores, t, rank)
if not math.isnan(t) else numpy.nan for t in thresholds]
# plot curve
if logx:
......
......@@ -12,6 +12,7 @@ import os
import numpy
import nose.tools
import bob.io.base
import math
def F(f):
......@@ -83,6 +84,45 @@ def test_basic_ratios():
nose.tools.eq_(f_score_, 1.0)
def test_nan_for_uncomputable_thresholds():
# in some cases, we cannot compute an FAR or FRR threshold, e.g., when we have too little data or too many equal scores
# in these cases, the methods should return NaN
from . import far_threshold, frr_threshold
# case 1: several scores are identical
positives = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
# test that reasonable thresholds for reachable data points are provided
assert far_threshold(negatives, positives, 0.5) == 0.9
assert numpy.isclose(frr_threshold(negatives, positives, 0.5), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.4))
assert math.isnan(frr_threshold(negatives, positives, 0.4))
# test the same with even number of scores
positives = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
assert far_threshold(negatives, positives, 0.5) == 0.9
assert numpy.isclose(frr_threshold(negatives, positives, 0.51), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.49))
assert math.isnan(frr_threshold(negatives, positives, 0.5))
# case 2: too few scores for the desired threshold
positives = numpy.arange(10.)
negatives = numpy.arange(10.)
assert math.isnan(far_threshold(negatives, positives, 0.09))
assert math.isnan(frr_threshold(negatives, positives, 0.09))
# there is no limit above; the threshold will just be the largest possible value
assert far_threshold(negatives, positives, 0.11) == 8.
assert far_threshold(negatives, positives, 0.91) == 0.
assert numpy.isclose(frr_threshold(negatives, positives, 0.11), 1.)
assert numpy.isclose(frr_threshold(negatives, positives, 0.91), 9.)
def test_indexing():
from . import correctly_classified_positives, correctly_classified_negatives
......@@ -145,11 +185,13 @@ def test_thresholding():
# requested ones
far = farfrr(negatives, positives, threshold_far)[0]
frr = farfrr(negatives, positives, threshold_frr)[1]
assert far + 1e-7 > t
assert frr + 1e-7 > t
if not math.isnan(threshold_far):
assert far + 1e-7 > t, (far,t)
assert far - t <= 0.1
if not math.isnan(threshold_frr):
assert frr + 1e-7 > t, (frr,t)
# test that the values are at least somewhere in the range
assert far - t <= 0.15
assert frr - t <= 0.15
assert frr - t <= 0.1
# If the set is separable, the calculation of the threshold is a little bit
# trickier, as you have no points in the middle of the range to compare
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment