Skip to content
Snippets Groups Projects
Commit 10846a47 authored by Manuel Günther's avatar Manuel Günther
Browse files

Corrected handling of FAR and FRR thresholds

parent 1438376d
No related branches found
No related tags found
1 merge request!39Resolve "FAR and FRR thresholds are computed even when there is no data support"
Pipeline #
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <bob.core/array_sort.h> #include <bob.core/array_sort.h>
#include <bob.core/assert.h> #include <bob.core/assert.h>
#include <bob.core/cast.h> #include <bob.core/cast.h>
#include <bob.core/logging.h>
#include <bob.math/linsolve.h> #include <bob.math/linsolve.h>
#include <bob.math/pavx.h> #include <bob.math/pavx.h>
...@@ -37,6 +38,10 @@ std::pair<double, double> ...@@ -37,6 +38,10 @@ std::pair<double, double>
bob::measure::farfrr(const blitz::Array<double, 1> &negatives, bob::measure::farfrr(const blitz::Array<double, 1> &negatives,
const blitz::Array<double, 1> &positives, const blitz::Array<double, 1> &positives,
double threshold) { double threshold) {
if (std::isnan(threshold)){
bob::core::error << "Cannot compute FAR or FRR with threshold NaN";
return std::make_pair(1.,1.);
}
if (!negatives.size()) if (!negatives.size())
throw std::runtime_error("Cannot compute FAR when no negatives are given"); throw std::runtime_error("Cannot compute FAR when no negatives are given");
if (!positives.size()) if (!positives.size())
...@@ -120,19 +125,17 @@ double bob::measure::farThreshold(const blitz::Array<double, 1> &negatives, ...@@ -120,19 +125,17 @@ double bob::measure::farThreshold(const blitz::Array<double, 1> &negatives,
// compute position of the threshold // compute position of the threshold
double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR) double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR)
double crr_index = std::max(crr * neg.extent(0) - 1., 0.); double crr_index = crr * neg.extent(0) - 1.;
// compute the index above the current CRR value // compute the index above the current CRR value
int index = std::min((int)std::ceil(crr_index), neg.extent(0)-1); int index = (int)std::ceil(crr_index);
// increase the threshold when we have several negatives with the same score // increase the threshold when we have several negatives with the same score
while (index < neg.extent(0)-1 && neg(index) == neg(index+1)) ++index; while (index < neg.extent(0)-1 && neg(index) == neg(index+1))
--index; ++index;
// we compute a correction term to assure that we are in the middle of two
if (index < neg.extent(0)-1){ if (index < neg.extent(0)-1){
// assure that we are in the middle of two cases // return the threshold that is just above the desired FAR
double correction = 0.5 * (neg(index+1) - neg(index)); return neg(index);
return neg(index) + correction;
} else { } else {
// We cannot reach the desired threshold, as we have too many identical lowest scores, or the number of scores is too low // We cannot reach the desired threshold, as we have too many identical lowest scores, or the number of scores is too low
return std::numeric_limits<double>::quiet_NaN(); return std::numeric_limits<double>::quiet_NaN();
...@@ -161,18 +164,17 @@ double bob::measure::frrThreshold(const blitz::Array<double, 1> &, ...@@ -161,18 +164,17 @@ double bob::measure::frrThreshold(const blitz::Array<double, 1> &,
// compute position of the threshold // compute position of the threshold
double frr_index = frr_value * pos.extent(0) - 1.; double frr_index = frr_value * pos.extent(0) - 1.;
// compute the index above the current CAR value // compute the index below the current FAR value
int index = std::min((int)std::ceil(frr_index), pos.extent(0) - 1); int index = (int)std::ceil(frr_index);
// lower the threshold when several positives have the same score // lower the threshold when several positives have the same score
while (index && pos(index) == pos(index-1)) --index; while (index && pos(index) == pos(index-1))
++index; --index;
// we compute a correction term to assure that we are in the middle of two
if (index){ if (index){
// assure that we are in the middle of two cases // return the FRR threshold that is just above the desired FRR
double correction = 0.5 * (pos(index) - pos(index-1)); // We have to add a little noise to since the FRR calculation excludes the threshold
return pos(index) - correction; return pos(index) + 1e-8 * pos(index);
} else { } else {
// We cannot reach the desired threshold, as we have too many identical highest scores // We cannot reach the desired threshold, as we have too many identical highest scores
return std::numeric_limits<double>::quiet_NaN(); return std::numeric_limits<double>::quiet_NaN();
......
...@@ -713,7 +713,11 @@ static PyObject *precision_recall_curve(PyObject *, PyObject *args, ...@@ -713,7 +713,11 @@ static PyObject *precision_recall_curve(PyObject *, PyObject *args,
static auto far_threshold_doc = static auto far_threshold_doc =
bob::extension::FunctionDoc( bob::extension::FunctionDoc(
"far_threshold", "Computes the threshold such that the real FAR is " "far_threshold", "Computes the threshold such that the real FAR is "
"**at least** the requested ``far_value``", "**at least** the requested ``far_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"highest scores exist, so that the desired ``far_value`` cannot be "
"reached by any threshold.\n\n"
".. note::\n\n" ".. note::\n\n"
" The scores will be sorted internally, requiring the scores to be " " The scores will be sorted internally, requiring the scores to be "
"copied.\n" "copied.\n"
...@@ -769,7 +773,11 @@ static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) { ...@@ -769,7 +773,11 @@ static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) {
static auto frr_threshold_doc = static auto frr_threshold_doc =
bob::extension::FunctionDoc( bob::extension::FunctionDoc(
"frr_threshold", "Computes the threshold such that the real FRR is " "frr_threshold", "Computes the threshold such that the real FRR is "
"**at least** the requested ``frr_value``", "**at least** the requested ``frr_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"lowest scores exist, so that the desired ``frr_value`` cannot be "
"reached by any threshold.\n\n"
".. note::\n\n" ".. note::\n\n"
" The scores will be sorted internally, requiring the scores to be " " The scores will be sorted internally, requiring the scores to be "
"copied.\n" "copied.\n"
......
...@@ -90,46 +90,37 @@ def test_nan_for_uncomputable_thresholds(): ...@@ -90,46 +90,37 @@ def test_nan_for_uncomputable_thresholds():
from . import far_threshold, frr_threshold from . import far_threshold, frr_threshold
# case 1: several scores are identical # case 1: several scores are identical
positives = [0., 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5] positives = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1., 1., 1., 1.] negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
# test that reasonable thresholds for reachable data points are provided # test that reasonable thresholds for reachable data points are provided
assert far_threshold(negatives, positives, 0.5) == 0.95, far_threshold(negatives, positives, 0.5) assert far_threshold(negatives, positives, 0.5) == 0.9
assert frr_threshold(negatives, positives, 0.5) == 0.05, frr_threshold(negatives, positives, 0.5) assert numpy.isclose(frr_threshold(negatives, positives, 0.5), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.4)) assert math.isnan(far_threshold(negatives, positives, 0.4))
assert math.isnan(frr_threshold(negatives, positives, 0.4)) assert math.isnan(frr_threshold(negatives, positives, 0.4))
# case 2: too few scores for the desired threshold # test the same with even number of scores
positives = numpy.arange(10.) positives = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = numpy.arange(10.) negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
assert math.isnan(far_threshold(negatives, positives, 0.09)) assert far_threshold(negatives, positives, 0.5) == 0.9
assert math.isnan(frr_threshold(negatives, positives, 0.09)) assert numpy.isclose(frr_threshold(negatives, positives, 0.51), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.49))
assert math.isnan(frr_threshold(negatives, positives, 0.5))
def test_nan_for_uncomputable_thresholds():
# in some cases, we cannot compute an FAR or FRR threshold, e.g., when we have too little data or too many equal scores
# in these cases, the methods should return NaN
from . import far_threshold, frr_threshold
# case 1: several scores are identical
positives = [0., 0., 0., 0., 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1., 1., 1., 1.]
# test that reasonable thresholds for reachable data points are provided
assert far_threshold(negatives, positives, 0.5) == 0.95, far_threshold(negatives, positives, 0.5)
assert frr_threshold(negatives, positives, 0.5) == 0.05, frr_threshold(negatives, positives, 0.5)
assert math.isnan(far_threshold(negatives, positives, 0.4))
assert math.isnan(frr_threshold(negatives, positives, 0.4))
# case 2: too few scores for the desired threshold # case 2: too few scores for the desired threshold
positives = numpy.arange(10.) positives = numpy.arange(10.)
negatives = numpy.arange(10.) negatives = numpy.arange(10.)
assert math.isnan(far_threshold(negatives, positives, 0.09)) assert math.isnan(far_threshold(negatives, positives, 0.09))
assert math.isnan(frr_threshold(negatives, positives, 0.09)) assert math.isnan(frr_threshold(negatives, positives, 0.09))
# there is no limit above; the threshold will just be the largest possible value
assert far_threshold(negatives, positives, 0.11) == 8.
assert far_threshold(negatives, positives, 0.91) == 0.
assert numpy.isclose(frr_threshold(negatives, positives, 0.11), 1.)
assert numpy.isclose(frr_threshold(negatives, positives, 0.91), 9.)
def test_indexing(): def test_indexing():
...@@ -194,11 +185,13 @@ def test_thresholding(): ...@@ -194,11 +185,13 @@ def test_thresholding():
# requested ones # requested ones
far = farfrr(negatives, positives, threshold_far)[0] far = farfrr(negatives, positives, threshold_far)[0]
frr = farfrr(negatives, positives, threshold_frr)[1] frr = farfrr(negatives, positives, threshold_frr)[1]
assert far + 1e-7 > t if not math.isnan(threshold_far):
assert frr + 1e-7 > t assert far + 1e-7 > t, (far,t)
# test that the values are at least somewhere in the range assert far - t <= 0.1
assert far - t <= 0.15 if not math.isnan(threshold_frr):
assert frr - t <= 0.15 assert frr + 1e-7 > t, (frr,t)
# test that the values are at least somewhere in the range
assert frr - t <= 0.1
# If the set is separable, the calculation of the threshold is a little bit # If the set is separable, the calculation of the threshold is a little bit
# trickier, as you have no points in the middle of the range to compare # trickier, as you have no points in the middle of the range to compare
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment