Commit 57cc7005 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Never return NaNs for {far,frr}_threshold functions anymore

parent f93e2f96
Pipeline #14094 passed with stages
in 8 minutes and 24 seconds
......@@ -123,45 +123,22 @@ double bob::measure::farThreshold(const blitz::Array<double, 1> &negatives,
blitz::Array<double, 1> neg;
sort(negatives, neg, is_sorted);
// Calculate the minimum possible FAR that can be requested besides 0. This
// is done by counting the number of repeated samples at the end of
// negatives.
double counter = 1.;
int index = neg.extent(0)-1;
while (index >= 1 && neg(index) == neg(index-1)) {
--index;
++counter;
}
// if requested FAR is less than the least possible value. We cannot reach
// the desired threshold, as we have too many identical largest scores, or
// the number of scores is too low.
if (far_value >= 1e-12 && far_value < counter / (double)neg.extent(0)) {
bob::core::error << "The threshold cannot be computed for an FAR value of "
<< far_value << ". There are either too many repeated largest scores or "
"the number of scores is too low. The minimum possible FAR value is "
<< counter / (double)neg.extent(0) << "\n";
return std::numeric_limits<double>::quiet_NaN();
}
index = neg.extent(0)-1;
// far == 0 is a corner case
if (far_value <= 1e-12)
return neg(index) + 1e-12;
// far == 1 is a corner case
if (far_value >= 1 - 1e-12)
return neg(0) - 1e-12;
// move to the left of array changing the threshold until we pass the desired
// FAR value.
double threshold = neg(index);
// Move towards the beginning of array changing the threshold until we pass
// the desired FAR value. Start with a threshold that corresponds to FAR ==
// 0.
int index = neg.extent(0) - 1;
double threshold = neg(index) + 1e-12;
double future_far;
while (index > 0) {
future_far = blitz::count(neg >= neg(index-1)) / (double)neg.extent(0);
while (index >= 0) {
future_far = blitz::count(neg >= neg(index)) / (double)neg.extent(0);
if (future_far > far_value)
break;
threshold = neg(--index);
threshold = neg(index);
--index;
}
return threshold;
}
......@@ -186,45 +163,22 @@ double bob::measure::frrThreshold(const blitz::Array<double, 1> &negatives,
blitz::Array<double, 1> pos;
sort(positives, pos, is_sorted);
// Calculate the minimum possible FRR that can be requested besides 0. This
// is done by counting the number of repeated samples at the beginning of
// positives.
double counter = 1.;
int index = 0;
while (index < pos.extent(0)-1 && pos(index) == pos(index+1)) {
++index;
++counter;
}
// if requested FRR is less than the least possible value. We cannot reach
// the desired threshold, as we have too many identical lowest scores, or the
// number of scores is too low.
if (frr_value >= 1e-12 && frr_value < counter / (double)pos.extent(0)) {
bob::core::error << "The threshold cannot be computed for an FRR value of "
<< frr_value << ". There are either too many repeated lowest scores or "
"the number of scores is too low. The minimum possible FRR value is "
<< counter / (double)pos.extent(0) << "\n";
return std::numeric_limits<double>::quiet_NaN();
}
index = 0;
// frr == 0 is a corner case
if (frr_value <= 1e-12)
return pos(0) - 1e-12;
// frr == 1 is a corner case
if (frr_value >= 1 - 1e-12)
return pos(pos.extent(0)-1) + 1e-12;
// move to the right of array changing the threshold until we pass the
// desired FRR value.
double threshold = pos(index);
// Move towards the end of array changing the threshold until we pass
// the desired FRR value. Start with a threshold that corresponds to FRR ==
// 0.
int index = 0;
double threshold = pos(index) - 1e-12;
double future_frr;
while (index < pos.extent(0)-1) {
future_frr = blitz::count(pos < pos(index+1)) / (double)pos.extent(0);
while (index < pos.extent(0)) {
future_frr = blitz::count(pos < pos(index)) / (double)pos.extent(0);
if (future_frr > frr_value)
break;
threshold = pos(++index);
threshold = pos(index);
++index;
}
return threshold;
}
......
......@@ -84,55 +84,59 @@ def test_basic_ratios():
nose.tools.eq_(f_score_, 1.0)
def test_nan_for_uncomputable_thresholds():
def test_for_uncomputable_thresholds():
# in some cases, we cannot compute an FAR or FRR threshold, e.g., when we
# have too little data or too many equal scores in these cases, the methods
# should return NaN
# should return a threshold which a supports a lower value.
from . import far_threshold, frr_threshold
# case 1: several scores are identical
positives = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
pos = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0]
# test that reasonable thresholds for reachable data points are provided
threshold = far_threshold(negatives, positives, 0.5)
threshold = far_threshold(neg, pos, 0.5)
assert threshold == 1.0, threshold
threshold = frr_threshold(negatives, positives, 0.5)
threshold = frr_threshold(neg, pos, 0.5)
assert numpy.isclose(threshold, 0.1), threshold
threshold = far_threshold(negatives, positives, 0.4)
assert math.isnan(threshold), threshold
threshold = frr_threshold(negatives, positives, 0.4)
assert math.isnan(threshold), threshold
threshold = far_threshold(neg, pos, 0.4)
assert threshold > neg[-1], threshold
threshold = frr_threshold(neg, pos, 0.4)
assert threshold >= pos[0], threshold
# test the same with even number of scores
positives = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0]
threshold = far_threshold(negatives, positives, 0.5)
threshold = far_threshold(neg, pos, 0.5)
assert threshold == 1.0, threshold
assert numpy.isclose(frr_threshold(negatives, positives, 0.51), 0.1)
assert math.isnan(far_threshold(negatives, positives, 0.49))
assert math.isnan(frr_threshold(negatives, positives, 0.49))
assert numpy.isclose(frr_threshold(neg, pos, 0.51), 0.1)
threshold = far_threshold(neg, pos, 0.49)
assert threshold > neg[-1], threshold
threshold = frr_threshold(neg, pos, 0.49)
assert threshold >= pos[0], threshold
# case 2: too few scores for the desired threshold
positives = numpy.arange(10.)
negatives = numpy.arange(10.)
pos = numpy.array(range(10), dtype=float)
neg = numpy.array(range(10), dtype=float)
threshold = far_threshold(negatives, positives, 0.09)
assert math.isnan(threshold), threshold
threshold = frr_threshold(negatives, positives, 0.09)
assert math.isnan(threshold), threshold
threshold = far_threshold(neg, pos, 0.09)
assert threshold > neg[-1], threshold
threshold = frr_threshold(neg, pos, 0.09)
assert threshold >= pos[0], threshold
# there is no limit above; the threshold will just be the largest possible
# value
threshold = far_threshold(negatives, positives, 0.11)
threshold = far_threshold(neg, pos, 0.11)
assert threshold == 9., threshold
threshold = far_threshold(negatives, positives, 0.91)
threshold = far_threshold(neg, pos, 0.91)
assert threshold == 1., threshold
threshold = far_threshold(negatives, positives, 1)
threshold = far_threshold(neg, pos, 1)
assert threshold <= 0., threshold
assert numpy.isclose(frr_threshold(negatives, positives, 0.11), 1.)
assert numpy.isclose(frr_threshold(negatives, positives, 0.91), 9.)
threshold = frr_threshold(neg, pos, 0.11)
assert numpy.isclose(threshold, 1.), threshold
threshold = frr_threshold(neg, pos, 0.91)
assert numpy.isclose(threshold, 9.), threshold
def test_indexing():
......@@ -174,26 +178,14 @@ def test_obvious_thresholds():
far, expected_far = round(far, 2), math.floor(far * 10) / 10
frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10
calculated_far_threshold = far_threshold(neg, pos, far)
predicted_far, _ = farfrr(neg, pos, calculated_far_threshold)
pred_far, _ = farfrr(neg, pos, calculated_far_threshold)
calculated_frr_threshold = frr_threshold(neg, pos, frr)
_, predicted_frr = farfrr(neg, pos, calculated_frr_threshold)
if far < 1. / M and far != 0:
assert math.isnan(calculated_far_threshold), (predicted_far,
far,
calculated_far_threshold)
assert math.isnan(calculated_frr_threshold), (predicted_frr,
frr,
calculated_frr_threshold)
else:
assert predicted_far <= far, (predicted_far,
far, calculated_far_threshold)
assert predicted_far == expected_far, (predicted_far,
far, calculated_far_threshold)
assert predicted_frr <= frr, (predicted_frr,
frr, calculated_frr_threshold)
assert predicted_frr == expected_frr, (predicted_frr,
frr, calculated_frr_threshold)
_, pred_frr = farfrr(neg, pos, calculated_frr_threshold)
assert pred_far <= far, (pred_far, far, calculated_far_threshold)
assert pred_far == expected_far, (pred_far, far, calculated_far_threshold)
assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold)
assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold)
def test_thresholding():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment