### Never return NaNs for {far,frr}_threshold functions anymore

parent f93e2f96
Pipeline #14094 passed with stages
in 8 minutes and 24 seconds
 ... ... @@ -123,45 +123,22 @@ double bob::measure::farThreshold(const blitz::Array &negatives, blitz::Array neg; sort(negatives, neg, is_sorted); // Calculate the minimum possible FAR that can be requested besides 0. This // is done by counting the number of repeated samples at the end of // negatives. double counter = 1.; int index = neg.extent(0)-1; while (index >= 1 && neg(index) == neg(index-1)) { --index; ++counter; } // if requested FAR is less than the least possible value. We cannot reach // the desired threshold, as we have too many identical largest scores, or // the number of scores is too low. if (far_value >= 1e-12 && far_value < counter / (double)neg.extent(0)) { bob::core::error << "The threshold cannot be computed for an FAR value of " << far_value << ". There are either too many repeated largest scores or " "the number of scores is too low. The minimum possible FAR value is " << counter / (double)neg.extent(0) << "\n"; return std::numeric_limits::quiet_NaN(); } index = neg.extent(0)-1; // far == 0 is a corner case if (far_value <= 1e-12) return neg(index) + 1e-12; // far == 1 is a corner case if (far_value >= 1 - 1e-12) return neg(0) - 1e-12; // move to the left of array changing the threshold until we pass the desired // FAR value. double threshold = neg(index); // Move towards the beginning of array changing the threshold until we pass // the desired FAR value. Start with a threshold that corresponds to FAR == // 0. int index = neg.extent(0) - 1; double threshold = neg(index) + 1e-12; double future_far; while (index > 0) { future_far = blitz::count(neg >= neg(index-1)) / (double)neg.extent(0); while (index >= 0) { future_far = blitz::count(neg >= neg(index)) / (double)neg.extent(0); if (future_far > far_value) break; threshold = neg(--index); threshold = neg(index); --index; } return threshold; } ... ... @@ -186,45 +163,22 @@ double bob::measure::frrThreshold(const blitz::Array &negatives, blitz::Array pos; sort(positives, pos, is_sorted); // Calculate the minimum possible FRR that can be requested besides 0. This // is done by counting the number of repeated samples at the beginning of // positives. double counter = 1.; int index = 0; while (index < pos.extent(0)-1 && pos(index) == pos(index+1)) { ++index; ++counter; } // if requested FRR is less than the least possible value. We cannot reach // the desired threshold, as we have too many identical lowest scores, or the // number of scores is too low. if (frr_value >= 1e-12 && frr_value < counter / (double)pos.extent(0)) { bob::core::error << "The threshold cannot be computed for an FRR value of " << frr_value << ". There are either too many repeated lowest scores or " "the number of scores is too low. The minimum possible FRR value is " << counter / (double)pos.extent(0) << "\n"; return std::numeric_limits::quiet_NaN(); } index = 0; // frr == 0 is a corner case if (frr_value <= 1e-12) return pos(0) - 1e-12; // frr == 1 is a corner case if (frr_value >= 1 - 1e-12) return pos(pos.extent(0)-1) + 1e-12; // move to the right of array changing the threshold until we pass the // desired FRR value. double threshold = pos(index); // Move towards the end of array changing the threshold until we pass // the desired FRR value. Start with a threshold that corresponds to FRR == // 0. int index = 0; double threshold = pos(index) - 1e-12; double future_frr; while (index < pos.extent(0)-1) { future_frr = blitz::count(pos < pos(index+1)) / (double)pos.extent(0); while (index < pos.extent(0)) { future_frr = blitz::count(pos < pos(index)) / (double)pos.extent(0); if (future_frr > frr_value) break; threshold = pos(++index); threshold = pos(index); ++index; } return threshold; } ... ...
 ... ... @@ -84,55 +84,59 @@ def test_basic_ratios(): nose.tools.eq_(f_score_, 1.0) def test_nan_for_uncomputable_thresholds(): def test_for_uncomputable_thresholds(): # in some cases, we cannot compute an FAR or FRR threshold, e.g., when we # have too little data or too many equal scores in these cases, the methods # should return NaN # should return a threshold which a supports a lower value. from . import far_threshold, frr_threshold # case 1: several scores are identical positives = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5] negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0] pos = [0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5] neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0] # test that reasonable thresholds for reachable data points are provided threshold = far_threshold(negatives, positives, 0.5) threshold = far_threshold(neg, pos, 0.5) assert threshold == 1.0, threshold threshold = frr_threshold(negatives, positives, 0.5) threshold = frr_threshold(neg, pos, 0.5) assert numpy.isclose(threshold, 0.1), threshold threshold = far_threshold(negatives, positives, 0.4) assert math.isnan(threshold), threshold threshold = frr_threshold(negatives, positives, 0.4) assert math.isnan(threshold), threshold threshold = far_threshold(neg, pos, 0.4) assert threshold > neg[-1], threshold threshold = frr_threshold(neg, pos, 0.4) assert threshold >= pos, threshold # test the same with even number of scores positives = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5] negatives = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0] pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5] neg = [0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.0, 1.0, 1.0, 1.0] threshold = far_threshold(negatives, positives, 0.5) threshold = far_threshold(neg, pos, 0.5) assert threshold == 1.0, threshold assert numpy.isclose(frr_threshold(negatives, positives, 0.51), 0.1) assert math.isnan(far_threshold(negatives, positives, 0.49)) assert math.isnan(frr_threshold(negatives, positives, 0.49)) assert numpy.isclose(frr_threshold(neg, pos, 0.51), 0.1) threshold = far_threshold(neg, pos, 0.49) assert threshold > neg[-1], threshold threshold = frr_threshold(neg, pos, 0.49) assert threshold >= pos, threshold # case 2: too few scores for the desired threshold positives = numpy.arange(10.) negatives = numpy.arange(10.) pos = numpy.array(range(10), dtype=float) neg = numpy.array(range(10), dtype=float) threshold = far_threshold(negatives, positives, 0.09) assert math.isnan(threshold), threshold threshold = frr_threshold(negatives, positives, 0.09) assert math.isnan(threshold), threshold threshold = far_threshold(neg, pos, 0.09) assert threshold > neg[-1], threshold threshold = frr_threshold(neg, pos, 0.09) assert threshold >= pos, threshold # there is no limit above; the threshold will just be the largest possible # value threshold = far_threshold(negatives, positives, 0.11) threshold = far_threshold(neg, pos, 0.11) assert threshold == 9., threshold threshold = far_threshold(negatives, positives, 0.91) threshold = far_threshold(neg, pos, 0.91) assert threshold == 1., threshold threshold = far_threshold(negatives, positives, 1) threshold = far_threshold(neg, pos, 1) assert threshold <= 0., threshold assert numpy.isclose(frr_threshold(negatives, positives, 0.11), 1.) assert numpy.isclose(frr_threshold(negatives, positives, 0.91), 9.) threshold = frr_threshold(neg, pos, 0.11) assert numpy.isclose(threshold, 1.), threshold threshold = frr_threshold(neg, pos, 0.91) assert numpy.isclose(threshold, 9.), threshold def test_indexing(): ... ... @@ -174,26 +178,14 @@ def test_obvious_thresholds(): far, expected_far = round(far, 2), math.floor(far * 10) / 10 frr, expected_frr = round(frr, 2), math.floor(frr * 10) / 10 calculated_far_threshold = far_threshold(neg, pos, far) predicted_far, _ = farfrr(neg, pos, calculated_far_threshold) pred_far, _ = farfrr(neg, pos, calculated_far_threshold) calculated_frr_threshold = frr_threshold(neg, pos, frr) _, predicted_frr = farfrr(neg, pos, calculated_frr_threshold) if far < 1. / M and far != 0: assert math.isnan(calculated_far_threshold), (predicted_far, far, calculated_far_threshold) assert math.isnan(calculated_frr_threshold), (predicted_frr, frr, calculated_frr_threshold) else: assert predicted_far <= far, (predicted_far, far, calculated_far_threshold) assert predicted_far == expected_far, (predicted_far, far, calculated_far_threshold) assert predicted_frr <= frr, (predicted_frr, frr, calculated_frr_threshold) assert predicted_frr == expected_frr, (predicted_frr, frr, calculated_frr_threshold) _, pred_frr = farfrr(neg, pos, calculated_frr_threshold) assert pred_far <= far, (pred_far, far, calculated_far_threshold) assert pred_far == expected_far, (pred_far, far, calculated_far_threshold) assert pred_frr <= frr, (pred_frr, frr, calculated_frr_threshold) assert pred_frr == expected_frr, (pred_frr, frr, calculated_frr_threshold) def test_thresholding(): ... ...
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!