Commit 5142b646 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Correct the threshold calculation implementation in far_threshold and...

Correct the threshold calculation implementation in far_threshold and frr_threshold. Now they return a threshold that gurantees when used will produce at most that far or frr value.
parent 61237f50
Pipeline #14052 passed with stages
in 14 minutes and 39 seconds
......@@ -39,7 +39,7 @@ bob::measure::farfrr(const blitz::Array<double, 1> &negatives,
const blitz::Array<double, 1> &positives,
double threshold) {
if (std::isnan(threshold)){
bob::core::error << "Cannot compute FAR or FRR with threshold NaN";
bob::core::error << "Cannot compute FAR or FRR with threshold NaN.\n";
return std::make_pair(1.,1.);
}
if (!negatives.size())
......@@ -123,24 +123,50 @@ double bob::measure::farThreshold(const blitz::Array<double, 1> &negatives,
blitz::Array<double, 1> neg;
sort(negatives, neg, is_sorted);
// compute position of the threshold
double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR)
double crr_index = std::max(crr * neg.extent(0) - 1., 0.);
// compute the index above the current CRR value
int index = (int)std::ceil(crr_index);
// increase the threshold when we have several negatives with the same score
while (index < neg.extent(0)-1 && neg(index) == neg(index+1))
++index;
if (index < neg.extent(0)-1){
// return the threshold that is just above the desired FAR
return neg(index);
} else {
// We cannot reach the desired threshold, as we have too many identical lowest scores, or the number of scores is too low
bob::core::warn << "The threshold cannot be computed for an FAR value of " << far_value;
// Calculate the minimum possible FAR that can be requested besides 0. This
// is done by counting the number of repeated samples at the end of
// negatives.
double counter = 1.;
int check_index = neg.extent(0)-1;
while (check_index >= 1 && neg(check_index) == neg(check_index-1)) {
--check_index;
++counter;
}
// if requested FAR is less than the least possible value. We cannot reach
// the desired threshold, as we have too many identical lowest scores, or the
// number of scores is too low
if (far_value >= 1e-12 && far_value < counter / (double)neg.extent(0)) {
bob::core::error << "The threshold cannot be computed for an FAR value of "
<< far_value << ". There are either too many repeated largest scores or "
"the number of scores is too low. The minimum possible FAR value is "
<< counter / (double)neg.extent(0) << "\n";
return std::numeric_limits<double>::quiet_NaN();
}
int index = neg.extent(0)-1;
// far == 0 is a corner case
if (far_value <= 1e-12)
return neg(index) + 1e-12;
// far == 1 is a corner case
if (far_value >= 1 - 1e-12)
return neg(0) - 1e-12;
// move to the left of array changing the threshold until we pass the desired
// FAR value.
double threshold;
double future_far;
while (index >= 0) {
threshold = neg(index);
if (index == 0)
break;
future_far = blitz::count(neg >= neg(index-1)) / (double)neg.extent(0);
if (future_far > far_value)
break;
--index;
}
return threshold;
}
double bob::measure::frrThreshold(const blitz::Array<double, 1> &,
......@@ -163,24 +189,50 @@ double bob::measure::frrThreshold(const blitz::Array<double, 1> &,
blitz::Array<double, 1> pos;
sort(positives, pos, is_sorted);
// compute position of the threshold
double frr_index = std::max(frr_value * pos.extent(0) - 1., 0.);
// compute the index below the current FAR value
int index = (int)std::ceil(frr_index);
// lower the threshold when several positives have the same score
while (index && pos(index) == pos(index-1))
--index;
if (index){
// return the FRR threshold that is just above the desired FRR
// We have to add a little noise to since the FRR calculation excludes the threshold
return pos(index) + 1e-8 * pos(index);
} else {
// We cannot reach the desired threshold, as we have too many identical highest scores
bob::core::warn << "The threshold cannot be computed for an FRR value of " << frr_value;
// Calculate the minimum possible FRR that can be requested besides 0. This
// is done by counting the number of repeated samples at the beginning of
// positives.
double counter = 1.;
int check_index = 0;
while (check_index < pos.extent(0)-1 && pos(check_index) == pos(check_index+1)) {
++check_index;
++counter;
}
// if requested FRR is less than the least possible value. We cannot reach
// the desired threshold, as we have too many identical lowest scores, or the
// number of scores is too low
if (frr_value >= 1e-12 && frr_value < counter / (double)pos.extent(0)) {
bob::core::error << "The threshold cannot be computed for an FRR value of "
<< frr_value << ". There are either too many repeated lowest scores or "
"the number of scores is too low. The minimum possible FRR value is "
<< counter / (double)pos.extent(0) << "\n";
return std::numeric_limits<double>::quiet_NaN();
}
int index = 0;
// frr == 0 is a corner case
if (frr_value <= 1e-12)
return pos(0) - 1e-12;
// frr == 1 is a corner case
if (frr_value >= 1 - 1e-12)
return pos(pos.extent(0)-1) + 1e-12;
// move to the right of array changing the threshold until we pass the
// desired FRR value.
double threshold;
double future_frr;
while (index < pos.extent(0)) {
threshold = pos(index);
if (index == pos.extent(0)-1)
break;
future_frr = blitz::count(pos < pos(index+1)) / (double)pos.extent(0);
if (future_frr > frr_value)
break;
++index;
}
return threshold;
}
/**
......
......@@ -713,7 +713,7 @@ static PyObject *precision_recall_curve(PyObject *, PyObject *args,
static auto far_threshold_doc =
bob::extension::FunctionDoc(
"far_threshold", "Computes the threshold such that the real FAR is "
"**at least** the requested ``far_value`` if possible",
"**at most** the requested ``far_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"highest scores exist, so that the desired ``far_value`` cannot be "
......@@ -742,7 +742,7 @@ static auto far_threshold_doc =
"will require more memory")
.add_return(
"threshold", "float",
"The threshold such that the real FAR is at least ``far_value``");
"The threshold such that the real FAR is at most ``far_value``");
static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) {
BOB_TRY
static char **kwlist = far_threshold_doc.kwlist();
......@@ -773,7 +773,7 @@ static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) {
static auto frr_threshold_doc =
bob::extension::FunctionDoc(
"frr_threshold", "Computes the threshold such that the real FRR is "
"**at least** the requested ``frr_value`` if possible",
"**at most** the requested ``frr_value`` if possible",
"If no such threshold can be computed, ``NaN`` is returned. It is "
"impossible to compute the threshold, when too few non-identical "
"lowest scores exist, so that the desired ``frr_value`` cannot be "
......@@ -802,7 +802,7 @@ static auto frr_threshold_doc =
"will require more memory")
.add_return(
"threshold", "float",
"The threshold such that the real FRR is at least ``frr_value``");
"The threshold such that the real FRR is at most ``frr_value``");
static PyObject *frr_threshold(PyObject *, PyObject *args, PyObject *kwds) {
BOB_TRY
char **kwlist = frr_threshold_doc.kwlist();
......
......@@ -85,8 +85,9 @@ def test_basic_ratios():
def test_nan_for_uncomputable_thresholds():
# in some cases, we cannot compute an FAR or FRR threshold, e.g., when we have too little data or too many equal scores
# in these cases, the methods should return NaN
# in some cases, we cannot compute an FAR or FRR threshold, e.g., when we
# have too little data or too many equal scores in these cases, the methods
# should return NaN
from . import far_threshold, frr_threshold
# case 1: several scores are identical
......@@ -95,7 +96,7 @@ def test_nan_for_uncomputable_thresholds():
# test that reasonable thresholds for reachable data points are provided
threshold = far_threshold(negatives, positives, 0.5)
assert threshold == 0.9, threshold
assert threshold == 1.0, threshold
threshold = frr_threshold(negatives, positives, 0.5)
assert numpy.isclose(threshold, 0.1), threshold
......@@ -108,10 +109,11 @@ def test_nan_for_uncomputable_thresholds():
positives = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
assert far_threshold(negatives, positives, 0.5) == 0.9
threshold = far_threshold(negatives, positives, 0.5)
assert threshold == 1.0, threshold
assert numpy.isclose(frr_threshold(negatives, positives, 0.51), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.49))
assert math.isnan(frr_threshold(negatives, positives, 0.5))
assert math.isnan(frr_threshold(negatives, positives, 0.49))
# case 2: too few scores for the desired threshold
positives = numpy.arange(10.)
......@@ -121,9 +123,14 @@ def test_nan_for_uncomputable_thresholds():
assert math.isnan(threshold), threshold
threshold = frr_threshold(negatives, positives, 0.09)
assert math.isnan(threshold), threshold
# there is no limit above; the threshold will just be the largest possible value
assert far_threshold(negatives, positives, 0.11) == 8.
assert far_threshold(negatives, positives, 0.91) == 0.
# there is no limit above; the threshold will just be the largest possible
# value
threshold = far_threshold(negatives, positives, 0.11)
assert threshold == 9., threshold
threshold = far_threshold(negatives, positives, 0.91)
assert threshold == 1., threshold
threshold = far_threshold(negatives, positives, 1)
assert threshold <= 0., threshold
assert numpy.isclose(frr_threshold(negatives, positives, 0.11), 1.)
assert numpy.isclose(frr_threshold(negatives, positives, 0.91), 9.)
......@@ -212,12 +219,12 @@ def test_thresholding():
far = farfrr(negatives, positives, threshold_far)[0]
frr = farfrr(negatives, positives, threshold_frr)[1]
if not math.isnan(threshold_far):
assert far + 1e-7 > t, (far,t)
assert far - t <= 0.1
assert far <= t, (far, t)
assert t - far <= 0.1, (far, t)
if not math.isnan(threshold_frr):
assert frr + 1e-7 > t, (frr,t)
assert frr <= t, (frr, t)
# test that the values are at least somewhere in the range
assert frr - t <= 0.1
assert t - frr <= 0.1, (frr, t)
# If the set is separable, the calculation of the threshold is a little bit
# trickier, as you have no points in the middle of the range to compare
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment