From c2697a0196a75e88dbd625b1e288707a30181996 Mon Sep 17 00:00:00 2001 From: Amir Mohammadi <183.amir@gmail.com> Date: Thu, 20 Apr 2017 13:56:42 +0200 Subject: [PATCH] lint --- bob/measure/calibration.py | 17 +- bob/measure/cpp/error.cpp | 433 ++++--- bob/measure/cpp/error.h | 727 ++++++------ bob/measure/load.py | 43 +- bob/measure/main.cpp | 1581 +++++++++++++------------ bob/measure/openbr.py | 123 +- bob/measure/plot.py | 59 +- bob/measure/script/apply_threshold.py | 15 +- bob/measure/test_error.py | 134 ++- bob/measure/test_io.py | 186 +-- bob/measure/test_scripts.py | 30 +- bob/measure/version.cpp | 101 +- 12 files changed, 1857 insertions(+), 1592 deletions(-) diff --git a/bob/measure/calibration.py b/bob/measure/calibration.py index e025e0c..72c1dab 100644 --- a/bob/measure/calibration.py +++ b/bob/measure/calibration.py @@ -66,12 +66,13 @@ def min_cllr(negatives, positives): pos = sorted(positives) N = len(neg) P = len(pos) - I = N+P - # now, iterate through both score sets and add a 0 for negative and 1 for positive scores - n, p = 0,0 + I = N + P + # now, iterate through both score sets and add a 0 for negative and 1 for + # positive scores + n, p = 0, 0 ideal = numpy.zeros(I) - neg_indices = [0]*N - pos_indices = [0]*P + neg_indices = [0] * N + pos_indices = [0] * P for i in range(I): if p < P and (n == N or neg[n] > pos[p]): pos_indices[p] = i @@ -88,12 +89,12 @@ def min_cllr(negatives, positives): # disable runtime warnings for a short time since log(0) will raise a warning old_warn_setup = numpy.seterr(divide='ignore') # ... compute logs - posterior_log_odds = numpy.log(popt)-numpy.log(1.-popt); - log_prior_odds = math.log(float(P)/float(N)); + posterior_log_odds = numpy.log(popt) - numpy.log(1. - popt) + log_prior_odds = math.log(float(P) / float(N)) # ... activate old warnings numpy.seterr(**old_warn_setup) - llrs = posterior_log_odds - log_prior_odds; + llrs = posterior_log_odds - log_prior_odds # some weired addition # for i in range(I): diff --git a/bob/measure/cpp/error.cpp b/bob/measure/cpp/error.cpp index 6063e9d..e350a6f 100644 --- a/bob/measure/cpp/error.cpp +++ b/bob/measure/cpp/error.cpp @@ -7,153 +7,179 @@ * Copyright (C) Idiap Research Institute, Martigny, Switzerland */ -#include #include -#include #include +#include +#include -#include -#include #include #include +#include +#include -#include #include +#include #include "error.h" template -static void sort(const blitz::Array& a, blitz::Array& b, bool is_sorted){ - if (is_sorted){ +static void sort(const blitz::Array &a, blitz::Array &b, + bool is_sorted) { + if (is_sorted) { b.reference(a); } else { - bob::core::array::ccopy(a,b); + bob::core::array::ccopy(a, b); bob::core::array::sort(b); } } -std::pair bob::measure::farfrr(const blitz::Array& negatives, - const blitz::Array& positives, double threshold) { - if (!negatives.size()) throw std::runtime_error("Cannot compute FAR when no negatives are given"); - if (!positives.size()) throw std::runtime_error("Cannot compute FRR when no positives are given"); +std::pair +bob::measure::farfrr(const blitz::Array &negatives, + const blitz::Array &positives, + double threshold) { + if (!negatives.size()) + throw std::runtime_error("Cannot compute FAR when no negatives are given"); + if (!positives.size()) + throw std::runtime_error("Cannot compute FRR when no positives are given"); blitz::sizeType total_negatives = negatives.extent(blitz::firstDim); blitz::sizeType total_positives = positives.extent(blitz::firstDim); blitz::sizeType false_accepts = blitz::count(negatives >= threshold); blitz::sizeType false_rejects = blitz::count(positives < threshold); - return std::make_pair(false_accepts/(double)total_negatives, - false_rejects/(double)total_positives); + return std::make_pair(false_accepts / (double)total_negatives, + false_rejects / (double)total_positives); } -std::pair bob::measure::precision_recall(const blitz::Array& negatives, - const blitz::Array& positives, double threshold) { - if (!negatives.size() || !positives.size()) throw std::runtime_error("Cannot compute precision or recall when no positives or no negatives are given"); +std::pair +bob::measure::precision_recall(const blitz::Array &negatives, + const blitz::Array &positives, + double threshold) { + if (!negatives.size() || !positives.size()) + throw std::runtime_error("Cannot compute precision or recall when no " + "positives or no negatives are given"); blitz::sizeType total_positives = positives.extent(blitz::firstDim); blitz::sizeType false_positives = blitz::count(negatives >= threshold); blitz::sizeType true_positives = blitz::count(positives >= threshold); blitz::sizeType total_classified_positives = true_positives + false_positives; - if (!total_classified_positives) total_classified_positives = 1; //avoids division by zero - if (!total_positives) total_positives = 1; //avoids division by zero - return std::make_pair(true_positives/(double)(total_classified_positives), - true_positives/(double)(total_positives)); + if (!total_classified_positives) + total_classified_positives = 1; // avoids division by zero + if (!total_positives) + total_positives = 1; // avoids division by zero + return std::make_pair(true_positives / (double)(total_classified_positives), + true_positives / (double)(total_positives)); } - -double bob::measure::f_score(const blitz::Array& negatives, - const blitz::Array& positives, double threshold, double weight) { +double bob::measure::f_score(const blitz::Array &negatives, + const blitz::Array &positives, + double threshold, double weight) { std::pair ratios = bob::measure::precision_recall(negatives, positives, threshold); double precision = ratios.first; double recall = ratios.second; - if (weight <= 0) weight = 1; + if (weight <= 0) + weight = 1; if (precision == 0 && recall == 0) return 0; - return (1 + weight*weight) * precision * recall / (weight * weight * precision + recall); + return (1 + weight * weight) * precision * recall / + (weight * weight * precision + recall); } -double eer_predicate(double far, double frr) { - return std::abs(far - frr); -} +double eer_predicate(double far, double frr) { return std::abs(far - frr); } -double bob::measure::eerThreshold(const blitz::Array& negatives, const blitz::Array& positives, bool is_sorted) { - blitz::Array neg, pos; +double bob::measure::eerThreshold(const blitz::Array &negatives, + const blitz::Array &positives, + bool is_sorted) { + blitz::Array neg, pos; sort(negatives, neg, is_sorted); sort(positives, pos, is_sorted); return bob::measure::minimizingThreshold(neg, pos, eer_predicate); } -double bob::measure::eerRocch(const blitz::Array& negatives, const blitz::Array& positives) { +double bob::measure::eerRocch(const blitz::Array &negatives, + const blitz::Array &positives) { return bob::measure::rocch2eer(bob::measure::rocch(negatives, positives)); } -double bob::measure::farThreshold(const blitz::Array& negatives, const blitz::Array&, double far_value, bool is_sorted) { +double bob::measure::farThreshold(const blitz::Array &negatives, + const blitz::Array &, + double far_value, bool is_sorted) { // check the parameters are valid if (far_value < 0. || far_value > 1.) { - boost::format m("the argument for far_value' cannot take the value %f - the value must be in the interval [0.,1.]"); + boost::format m("the argument for far_value' cannot take the value %f - " + "the value must be in the interval [0.,1.]"); m % far_value; throw std::runtime_error(m.str()); } if (negatives.size() < 2) { - throw std::runtime_error("the number of negative scores must be at least 2"); + throw std::runtime_error( + "the number of negative scores must be at least 2"); } // sort the array, if necessary - blitz::Array neg; + blitz::Array neg; sort(negatives, neg, is_sorted); // compute position of the threshold - double crr = 1.-far_value; // (Correct Rejection Rate; = 1 - FAR) + double crr = 1. - far_value; // (Correct Rejection Rate; = 1 - FAR) double crr_index = crr * neg.extent(0); // compute the index above the current CRR value - int index = std::min((int)std::floor(crr_index), neg.extent(0)-1); + int index = std::min((int)std::floor(crr_index), neg.extent(0) - 1); // correct index if we have multiple score values at the requested position - while (index && neg(index) == neg(index-1)) --index; + while (index && neg(index) == neg(index - 1)) + --index; - // we compute a correction term to assure that we are in the middle of two cases + // we compute a correction term to assure that we are in the middle of two + // cases double correction; - if (index){ + if (index) { // assure that we are in the middle of two cases - correction = 0.5 * (neg(index) - neg(index-1)); + correction = 0.5 * (neg(index) - neg(index - 1)); } else { // add an overall correction term - correction = 0.5 * (neg(neg.extent(0)-1) - neg(0)) / neg.extent(0); + correction = 0.5 * (neg(neg.extent(0) - 1) - neg(0)) / neg.extent(0); } return neg(index) - correction; } -double bob::measure::frrThreshold(const blitz::Array&, const blitz::Array& positives, double frr_value, bool is_sorted) { +double bob::measure::frrThreshold(const blitz::Array &, + const blitz::Array &positives, + double frr_value, bool is_sorted) { // check the parameters are valid if (frr_value < 0. || frr_value > 1.) { - boost::format m("the argument for frr_value' cannot take the value %f - the value must be in the interval [0.,1.]"); + boost::format m("the argument for frr_value' cannot take the value %f - " + "the value must be in the interval [0.,1.]"); m % frr_value; throw std::runtime_error(m.str()); } if (positives.size() < 2) { - throw std::runtime_error("the number of positive scores must be at least 2"); + throw std::runtime_error( + "the number of positive scores must be at least 2"); } // sort positive scores descendantly, if necessary - blitz::Array pos; + blitz::Array pos; sort(positives, pos, is_sorted); // compute position of the threshold double frr_index = frr_value * pos.extent(0); // compute the index above the current CAR value - int index = std::min((int)std::ceil(frr_index), pos.extent(0)-1); + int index = std::min((int)std::ceil(frr_index), pos.extent(0) - 1); // correct index if we have multiple score values at the requested position - while (index < pos.extent(0)-1 && pos(index) == pos(index+1)) ++index; + while (index < pos.extent(0) - 1 && pos(index) == pos(index + 1)) + ++index; - // we compute a correction term to assure that we are in the middle of two cases + // we compute a correction term to assure that we are in the middle of two + // cases double correction; - if (index < pos.extent(0)-1){ + if (index < pos.extent(0) - 1) { // assure that we are in the middle of two cases - correction = 0.5 * (pos(index+1) - pos(index)); + correction = 0.5 * (pos(index + 1) - pos(index)); } else { // add an overall correction term - correction = 0.5 * (pos(pos.extent(0)-1) - pos(0)) / pos.extent(0); + correction = 0.5 * (pos(pos.extent(0) - 1) - pos(0)) / pos.extent(0); } return pos(index) + correction; @@ -166,54 +192,59 @@ class weighted_error { double m_weight; ///< The weighting factor - public: //api - - weighted_error(double weight): m_weight(weight) { - if (weight > 1.0) m_weight = 1.0; - if (weight < 0.0) m_weight = 0.0; +public: // api + weighted_error(double weight) : m_weight(weight) { + if (weight > 1.0) + m_weight = 1.0; + if (weight < 0.0) + m_weight = 0.0; } - inline double operator() (double far, double frr) const { - return (m_weight*far) + ((1.0-m_weight)*frr); + inline double operator()(double far, double frr) const { + return (m_weight * far) + ((1.0 - m_weight) * frr); } - }; -double bob::measure::minWeightedErrorRateThreshold(const blitz::Array& negatives, const blitz::Array& positives, double cost, bool is_sorted) { - blitz::Array neg, pos; +double bob::measure::minWeightedErrorRateThreshold( + const blitz::Array &negatives, + const blitz::Array &positives, double cost, bool is_sorted) { + blitz::Array neg, pos; sort(negatives, neg, is_sorted); sort(positives, pos, is_sorted); weighted_error predicate(cost); return bob::measure::minimizingThreshold(neg, pos, predicate); } -blitz::Array bob::measure::roc(const blitz::Array& negatives, - const blitz::Array& positives, size_t points) { +blitz::Array +bob::measure::roc(const blitz::Array &negatives, + const blitz::Array &positives, size_t points) { double min = std::min(blitz::min(negatives), blitz::min(positives)); double max = std::max(blitz::max(negatives), blitz::max(positives)); - double step = (max-min)/((double)points-1.0); - blitz::Array retval(2, points); - for (int i=0; i<(int)points; ++i) { + double step = (max - min) / ((double)points - 1.0); + blitz::Array retval(2, points); + for (int i = 0; i < (int)points; ++i) { std::pair ratios = - bob::measure::farfrr(negatives, positives, min + i*step); + bob::measure::farfrr(negatives, positives, min + i * step); // preserve X x Y ordering (FAR x FRR) - retval(0,i) = ratios.first; - retval(1,i) = ratios.second; + retval(0, i) = ratios.first; + retval(1, i) = ratios.second; } return retval; } -blitz::Array bob::measure::precision_recall_curve(const blitz::Array& negatives, - const blitz::Array& positives, size_t points) { +blitz::Array +bob::measure::precision_recall_curve(const blitz::Array &negatives, + const blitz::Array &positives, + size_t points) { double min = std::min(blitz::min(negatives), blitz::min(positives)); double max = std::max(blitz::max(negatives), blitz::max(positives)); - double step = (max-min)/((double)points-1.0); - blitz::Array retval(2, points); - for (int i=0; i<(int)points; ++i) { + double step = (max - min) / ((double)points - 1.0); + blitz::Array retval(2, points); + for (int i = 0; i < (int)points; ++i) { std::pair ratios = - bob::measure::precision_recall(negatives, positives, min + i*step); - retval(0,i) = ratios.first; - retval(1,i) = ratios.second; + bob::measure::precision_recall(negatives, positives, min + i * step); + retval(0, i) = ratios.first; + retval(1, i) = ratios.second; } return retval; } @@ -221,124 +252,115 @@ blitz::Array bob::measure::precision_recall_curve(const blitz::Array &v): - m_v(v) - { - } +struct ComparePairs { + ComparePairs(const blitz::Array &v) : m_v(v) {} - bool operator()(size_t a, size_t b) - { - return m_v(a) < m_v(b); - } + bool operator()(size_t a, size_t b) { return m_v(a) < m_v(b); } - blitz::Array m_v; + blitz::Array m_v; }; /** * Sort an array and get the permutations (using stable_sort) */ -void sortWithPermutation(const blitz::Array& values, std::vector& v) -{ +void sortWithPermutation(const blitz::Array &values, + std::vector &v) { int N = values.extent(0); bob::core::array::assertSameDimensionLength(N, v.size()); - for(int i=0; i bob::measure::rocch(const blitz::Array& negatives, - const blitz::Array& positives) -{ +blitz::Array +bob::measure::rocch(const blitz::Array &negatives, + const blitz::Array &positives) { // Number of positive and negative scores size_t Nt = positives.extent(0); size_t Nn = negatives.extent(0); size_t N = Nt + Nn; // Create a big array with all scores - blitz::Array scores(N); + blitz::Array scores(N); blitz::Range rall = blitz::Range::all(); - scores(blitz::Range(0,Nt-1)) = positives(rall); - scores(blitz::Range(Nt,N-1)) = negatives(rall); + scores(blitz::Range(0, Nt - 1)) = positives(rall); + scores(blitz::Range(Nt, N - 1)) = negatives(rall); - // It is important here that scores that are the same (i.e. already in order) should NOT be swapped. + // It is important here that scores that are the same (i.e. already in order) + // should NOT be swapped. // std::stable_sort has this property. std::vector perturb(N); sortWithPermutation(scores, perturb); // Apply permutation - blitz::Array Pideal(N); - for(size_t i=0; i Pideal(N); + for (size_t i = 0; i < N; ++i) Pideal(i) = (perturb[i] < Nt ? 1 : 0); - blitz::Array Pideal_d = bob::core::array::cast(Pideal); + blitz::Array Pideal_d = bob::core::array::cast(Pideal); // Apply the PAVA algorithm - blitz::Array Popt(N); - blitz::Array width = bob::math::pavxWidth(Pideal_d, Popt); + blitz::Array Popt(N); + blitz::Array width = bob::math::pavxWidth(Pideal_d, Popt); // Allocate output int nbins = width.extent(0); - blitz::Array retval(2,nbins+1); // FAR, FRR + blitz::Array retval(2, nbins + 1); // FAR, FRR // Fill in output size_t left = 0; size_t fa = Nn; size_t miss = 0; - for(int i=0; i= 1) - miss = blitz::sum(Pideal(blitz::Range(0,left-1))); + if (left >= 1) + miss = blitz::sum(Pideal(blitz::Range(0, left - 1))); else miss = 0; - if(Pideal.extent(0)-1 >= (int)left) - fa = N - left - blitz::sum(Pideal(blitz::Range(left,Pideal.extent(0)-1))); + if (Pideal.extent(0) - 1 >= (int)left) + fa = N - left - + blitz::sum(Pideal(blitz::Range(left, Pideal.extent(0) - 1))); else fa = 0; } - retval(0,nbins) = fa / (double)Nn; // pfa - retval(1,nbins) = miss / (double)Nt; // pmiss + retval(0, nbins) = fa / (double)Nn; // pfa + retval(1, nbins) = miss / (double)Nt; // pmiss return retval; } -double bob::measure::rocch2eer(const blitz::Array& pfa_pmiss) -{ +double bob::measure::rocch2eer(const blitz::Array &pfa_pmiss) { bob::core::array::assertSameDimensionLength(2, pfa_pmiss.extent(0)); const int N = pfa_pmiss.extent(1); double eer = 0.; - blitz::Array XY(2,2); - blitz::Array one(2); + blitz::Array XY(2, 2); + blitz::Array one(2); one = 1.; - blitz::Array seg(2); - double& XY00 = XY(0,0); - double& XY01 = XY(0,1); - double& XY10 = XY(1,0); - double& XY11 = XY(1,1); + blitz::Array seg(2); + double &XY00 = XY(0, 0); + double &XY01 = XY(0, 1); + double &XY10 = XY(1, 0); + double &XY11 = XY(1, 1); double eerseg = 0.; - for(int i=0; i::epsilon()) + if (std::min(abs_dd0, abs_dd1) < std::numeric_limits::epsilon()) eerseg = 0.; - else - { + else { // Find line coefficients seg s.t. XY.seg = 1, bob::math::linsolve_(XY, seg, one); // Candidate for the EER (to be compared to current value) @@ -359,77 +381,91 @@ double bob::measure::rocch2eer(const blitz::Array& pfa_pmiss) * @param positives Client scores * @param far_list The list of FAR values where the FRR should be calculated * - * @return The ROC curve with the FAR in the first row and the FRR in the second. + * @return The ROC curve with the FAR in the first row and the FRR in the + * second. */ -blitz::Array bob::measure::roc_for_far(const blitz::Array& negatives, - const blitz::Array& positives, const blitz::Array& far_list, bool is_sorted) { +blitz::Array +bob::measure::roc_for_far(const blitz::Array &negatives, + const blitz::Array &positives, + const blitz::Array &far_list, + bool is_sorted) { int n_points = far_list.extent(0); - if (negatives.extent(0) == 0) throw std::runtime_error("The given set of negatives is empty."); - if (positives.extent(0) == 0) throw std::runtime_error("The given set of positives is empty."); + if (negatives.extent(0) == 0) + throw std::runtime_error("The given set of negatives is empty."); + if (positives.extent(0) == 0) + throw std::runtime_error("The given set of positives is empty."); // sort negative and positive scores ascendantly - blitz::Array neg, pos; + blitz::Array neg, pos; sort(negatives, neg, is_sorted); sort(positives, pos, is_sorted); // do some magic to compute the FRR list - blitz::Array retval(2, n_points); + blitz::Array retval(2, n_points); // index into the FAR and FRR list - int far_index = n_points-1; + int far_index = n_points - 1; int pos_index = 0, neg_index = 0; int n_pos = pos.extent(0), n_neg = neg.extent(0); // iterators into the result lists auto pos_it = pos.begin(), neg_it = neg.begin(); // do some fast magic to compute the FRR values ;-) - do{ - // check whether the current positive value is less than the current negative one - if (*pos_it < *neg_it){ + do { + // check whether the current positive value is less than the current + // negative one + if (*pos_it < *neg_it) { // increase the positive count ++pos_index; // go to the next positive value ++pos_it; - }else{ + } else { // increase the negative count ++neg_index; // go to the next negative value ++neg_it; } // check, if we have reached a new FAR limit, - // i.e. if the relative number of negative similarities is greater than 1-FAR (which is the CRR) - + // i.e. if the relative number of negative similarities is greater than + // 1-FAR (which is the CRR) if (((double)neg_index / (double)n_neg > 1. - far_list(far_index)) && - !(bob::core::isClose ((double)neg_index / (double)n_neg, 1. - far_list(far_index), 1e-9, 1e-9))) { + !(bob::core::isClose((double)neg_index / (double)n_neg, + 1. - far_list(far_index), 1e-9, 1e-9))) { // copy the far value - retval(0,far_index) = far_list(far_index); + retval(0, far_index) = far_list(far_index); // calculate the FRR for the current FAR - retval(1,far_index) = (double)pos_index / (double)n_pos; + retval(1, far_index) = (double)pos_index / (double)n_pos; // go to the next FAR value --far_index; } - // do this, as long as there are elements in both lists left and not all FRR elements where calculated yet + // do this, as long as there are elements in both lists left and not all FRR + // elements where calculated yet } while (pos_it != pos.end() && neg_it != neg.end() && far_index >= 0); // check if all FRR values have been set - if (far_index >= 0){ - // walk to the end of both lists; at least one of both lists should already have reached its limit. - while (pos_it++ != pos.end()) ++pos_index; - while (neg_it++ != neg.end()) ++neg_index; + if (far_index >= 0) { + // walk to the end of both lists; at least one of both lists should already + // have reached its limit. + while (pos_it++ != pos.end()) + ++pos_index; + while (neg_it++ != neg.end()) + ++neg_index; // fill in the remaining elements of the CAR list do { // copy the FAR value - retval(0,far_index) = far_list(far_index); - // check if the criterion is fulfilled (should be, as long as the lowest far is not below 0) - if ((double)neg_index / (double)n_neg > 1. - far_list(far_index)){ + retval(0, far_index) = far_list(far_index); + // check if the criterion is fulfilled (should be, as long as the lowest + // far is not below 0) + if ((double)neg_index / (double)n_neg > 1. - far_list(far_index)) { // calculate the FRR for the current FAR - retval(1,far_index) = (double)pos_index / (double)n_pos; + retval(1, far_index) = (double)pos_index / (double)n_pos; } else { - // set FRR to 1 (this should never happen, but might be due to numerical issues) - retval(1,far_index) = 1.; + // set FRR to 1 (this should never happen, but might be due to numerical + // issues) + retval(1, far_index) = 1.; } } while (far_index--); } @@ -437,7 +473,6 @@ blitz::Array bob::measure::roc_for_far(const blitz::Array& n return retval; } - /** * The input to this function is a cumulative probability. The output from * this function is the Normal deviate that corresponds to that probability. @@ -454,8 +489,8 @@ blitz::Array bob::measure::roc_for_far(const blitz::Array& n * 0.999 | 3.090 */ static double _ppndf(double p) { - //some constants we need for the calculation. - //these come from the NIST implementation... + // some constants we need for the calculation. + // these come from the NIST implementation... static const double SPLIT = 0.42; static const double A0 = 2.5066282388; static const double A1 = -18.6150006252; @@ -475,65 +510,69 @@ static double _ppndf(double p) { double retval; - if (p >= 1.0) p = 1 - eps; - if (p <= 0.0) p = eps; + if (p >= 1.0) + p = 1 - eps; + if (p <= 0.0) + p = eps; double q = p - 0.5; if (std::abs(q) <= SPLIT) { double r = q * q; retval = q * (((A3 * r + A2) * r + A1) * r + A0) / - ((((B4 * r + B3) * r + B2) * r + B1) * r + 1.0); - } - else { - //r = sqrt (log (0.5 - abs(q))); - double r = (q > 0.0 ? 1.0 - p : p); - if (r <= 0.0) throw std::runtime_error("measure::ppndf(): r <= 0.0!"); - r = sqrt ((-1.0) * log (r)); + ((((B4 * r + B3) * r + B2) * r + B1) * r + 1.0); + } else { + // r = sqrt (log (0.5 - abs(q))); + double r = (q > 0.0 ? 1.0 - p : p); + if (r <= 0.0) + throw std::runtime_error("measure::ppndf(): r <= 0.0!"); + r = sqrt((-1.0) * log(r)); retval = (((C3 * r + C2) * r + C1) * r + C0) / ((D2 * r + D1) * r + 1.0); - if (q < 0) retval *= -1.0; + if (q < 0) + retval *= -1.0; } return retval; } namespace blitz { - BZ_DECLARE_FUNCTION(_ppndf) ///< A blitz::Array binding +BZ_DECLARE_FUNCTION(_ppndf) ///< A blitz::Array binding } -double bob::measure::ppndf (double value) { return _ppndf(value); } +double bob::measure::ppndf(double value) { return _ppndf(value); } -blitz::Array bob::measure::det(const blitz::Array& negatives, - const blitz::Array& positives, size_t points) { - blitz::Array retval(2, points); +blitz::Array +bob::measure::det(const blitz::Array &negatives, + const blitz::Array &positives, size_t points) { + blitz::Array retval(2, points); retval = blitz::_ppndf(bob::measure::roc(negatives, positives, points)); return retval; } -blitz::Array bob::measure::epc -(const blitz::Array& dev_negatives, - const blitz::Array& dev_positives, - const blitz::Array& test_negatives, - const blitz::Array& test_positives, size_t points, bool is_sorted, - bool thresholds) { +blitz::Array +bob::measure::epc(const blitz::Array &dev_negatives, + const blitz::Array &dev_positives, + const blitz::Array &test_negatives, + const blitz::Array &test_positives, size_t points, + bool is_sorted, bool thresholds) { - blitz::Array dev_neg, dev_pos; + blitz::Array dev_neg, dev_pos; sort(dev_negatives, dev_neg, is_sorted); sort(dev_positives, dev_pos, is_sorted); - double step = 1.0/((double)points-1.0); + double step = 1.0 / ((double)points - 1.0); auto retval_shape0 = (thresholds) ? 3 : 2; - blitz::Array retval(retval_shape0, points); - for (int i=0; i<(int)points; ++i) { - double alpha = (double)i*step; - retval(0,i) = alpha; - double threshold = bob::measure::minWeightedErrorRateThreshold(dev_neg, - dev_pos, alpha, true); + blitz::Array retval(retval_shape0, points); + for (int i = 0; i < (int)points; ++i) { + double alpha = (double)i * step; + retval(0, i) = alpha; + double threshold = bob::measure::minWeightedErrorRateThreshold( + dev_neg, dev_pos, alpha, true); std::pair ratios = - bob::measure::farfrr(test_negatives, test_positives, threshold); - retval(1,i) = (ratios.first + ratios.second) / 2; + bob::measure::farfrr(test_negatives, test_positives, threshold); + retval(1, i) = (ratios.first + ratios.second) / 2; if (thresholds) { - retval(2,i) = threshold; + retval(2, i) = threshold; } } return retval; diff --git a/bob/measure/cpp/error.h b/bob/measure/cpp/error.h index 8f7ccdb..f4f6f80 100644 --- a/bob/measure/cpp/error.h +++ b/bob/measure/cpp/error.h @@ -14,384 +14,411 @@ #include #include -namespace bob { namespace measure { +namespace bob { +namespace measure { - /** - * Calculates the FA ratio and the FR ratio given positive and negative - * scores and a threshold. 'positives' holds the score information for - * samples that are labeled to belong to a certain class (a.k.a., "signal" - * or "client"). 'negatives' holds the score information for samples that are - * labeled *not* to belong to the class (a.k.a., "noise" or "impostor"). - * - * It is expected that 'positive' scores are, at least by design, greater - * than 'negative' scores. So, every positive value that falls bellow the - * threshold is considered a false-rejection (FR). 'negative' samples that - * fall above the threshold are considered a false-accept (FA). - * - * Positives that fall on the threshold (exactly) are considered correctly - * classified. Negatives that fall on the threshold (exactly) are considered - * *incorrectly* classified. This equivalent to setting the comparison like - * this pseudo-code: - * - * foreach (positive as K) if K < threshold: falseRejectionCount += 1 - * foreach (negative as K) if K >= threshold: falseAcceptCount += 1 - * - * The 'threshold' value does not necessarily have to fall in the range - * covered by the input scores (negatives and positives altogether), but if - * it does not, the output will be either (1.0, 0.0) or (0.0, 1.0) - * depending on the side the threshold falls. - * - * The output is in form of a std::pair of two double-precision real numbers. - * The numbers range from 0 to 1. The first element of the pair is the - * false-accept ratio. The second element of the pair is the false-rejection - * ratio. - * - * It is possible that scores are inverted in the negative/positive sense. In - * some setups the designer may have setup the system so 'positive' samples - * have a smaller score than the 'negative' ones. In this case, make sure you - * normalize the scores so positive samples have greater scores before - * feeding them into this method. - */ - std::pair farfrr(const blitz::Array& negatives, - const blitz::Array& positives, double threshold); - - /** - * Calculates the precision and recall (sensitiveness) values given positive and negative - * scores and a threshold. 'positives' holds the score information for - * samples that are labeled to belong to a certain class (a.k.a., "signal" - * or "client"). 'negatives' holds the score information for samples that are - * labeled *not* to belong to the class (a.k.a., "noise" or "impostor"). - * - * For more precise details about how the method considers error rates, please refer to the documentation of the method bob.measure.farfrr. - * - * It is possible that scores are inverted in the negative/positive sense. In - * some setups the designer may have setup the system so 'positive' samples - * have a smaller score than the 'negative' ones. In this case, make sure you - * normalize the scores so positive samples have greater scores before - * feeding them into this method. - */ - std::pair precision_recall(const blitz::Array& negatives, - const blitz::Array& positives, double threshold); +/** + * Calculates the FA ratio and the FR ratio given positive and negative + * scores and a threshold. 'positives' holds the score information for + * samples that are labeled to belong to a certain class (a.k.a., "signal" + * or "client"). 'negatives' holds the score information for samples that are + * labeled *not* to belong to the class (a.k.a., "noise" or "impostor"). + * + * It is expected that 'positive' scores are, at least by design, greater + * than 'negative' scores. So, every positive value that falls bellow the + * threshold is considered a false-rejection (FR). 'negative' samples that + * fall above the threshold are considered a false-accept (FA). + * + * Positives that fall on the threshold (exactly) are considered correctly + * classified. Negatives that fall on the threshold (exactly) are considered + * *incorrectly* classified. This equivalent to setting the comparison like + * this pseudo-code: + * + * foreach (positive as K) if K < threshold: falseRejectionCount += 1 + * foreach (negative as K) if K >= threshold: falseAcceptCount += 1 + * + * The 'threshold' value does not necessarily have to fall in the range + * covered by the input scores (negatives and positives altogether), but if + * it does not, the output will be either (1.0, 0.0) or (0.0, 1.0) + * depending on the side the threshold falls. + * + * The output is in form of a std::pair of two double-precision real numbers. + * The numbers range from 0 to 1. The first element of the pair is the + * false-accept ratio. The second element of the pair is the false-rejection + * ratio. + * + * It is possible that scores are inverted in the negative/positive sense. In + * some setups the designer may have setup the system so 'positive' samples + * have a smaller score than the 'negative' ones. In this case, make sure you + * normalize the scores so positive samples have greater scores before + * feeding them into this method. + */ +std::pair farfrr(const blitz::Array &negatives, + const blitz::Array &positives, + double threshold); - /** - * This method computes F score of the accuracy of the classification. It is a weighted mean of precision and recall measurements. The weight parameter needs to be non-negative real value. In case the weight parameter is 1, the F-score is called F1 score and is a harmonic mean between precision and recall values. - */ - double f_score(const blitz::Array& negatives, - const blitz::Array& positives, double threshold, double weight) ; +/** + * Calculates the precision and recall (sensitiveness) values given positive and + * negative + * scores and a threshold. 'positives' holds the score information for + * samples that are labeled to belong to a certain class (a.k.a., "signal" + * or "client"). 'negatives' holds the score information for samples that are + * labeled *not* to belong to the class (a.k.a., "noise" or "impostor"). + * + * For more precise details about how the method considers error rates, please + * refer to the documentation of the method bob.measure.farfrr. + * + * It is possible that scores are inverted in the negative/positive sense. In + * some setups the designer may have setup the system so 'positive' samples + * have a smaller score than the 'negative' ones. In this case, make sure you + * normalize the scores so positive samples have greater scores before + * feeding them into this method. + */ +std::pair +precision_recall(const blitz::Array &negatives, + const blitz::Array &positives, double threshold); - /** - * This method returns a blitz::Array composed of booleans that pin-point - * which positives where correctly classified in a 'positive' score sample, - * given a threshold. It runs the formula: - * - * foreach (element k in positive) - * if positive[k] >= threshold: returnValue[k] = true - * else: returnValue[k] = false - */ - inline blitz::Array correctlyClassifiedPositives - (const blitz::Array& positives, double threshold) { - return blitz::Array(positives >= threshold); - } +/** + * This method computes F score of the accuracy of the classification. It is a + * weighted mean of precision and recall measurements. The weight parameter + * needs to be non-negative real value. In case the weight parameter is 1, the + * F-score is called F1 score and is a harmonic mean between precision and + * recall values. + */ +double f_score(const blitz::Array &negatives, + const blitz::Array &positives, double threshold, + double weight); - /** - * This method returns a blitz::Array composed of booleans that pin-point - * which negatives where correctly classified in a 'negative' score sample, - * given a threshold. It runs the formula: - * - * foreach (element k in negative) - * if negative[k] < threshold: returnValue[k] = true - * else: returnValue[k] = false - */ - inline blitz::Array correctlyClassifiedNegatives - (const blitz::Array& negatives, double threshold) { - return blitz::Array(negatives < threshold); - } +/** + * This method returns a blitz::Array composed of booleans that pin-point + * which positives where correctly classified in a 'positive' score sample, + * given a threshold. It runs the formula: + * + * foreach (element k in positive) + * if positive[k] >= threshold: returnValue[k] = true + * else: returnValue[k] = false + */ +inline blitz::Array +correctlyClassifiedPositives(const blitz::Array &positives, + double threshold) { + return blitz::Array(positives >= threshold); +} - /** - * This method can calculate a threshold based on a set of scores (positives - * and negatives) given a certain minimization criteria, input as a - * functional predicate. For a discussion on 'positive' and 'negative' see - * bob::measure::farfrr(). - * Here, it is expected that the positives and the negatives are sorted ascendantly. - * - * The predicate method gives back the current minimum given false-acceptance - * (FA) and false-rejection (FR) ratios for the input data. As a predicate, - * it has to be a non-member method or a pre-configured functor where we can - * use operator(). The API for the method is: - * - * double predicate(double fa_ratio, double fr_ratio); - * - * Please note that this method will only work with single-minimum smooth - * predicates. - * - * The minimization is carried out in a data-driven way. - * Starting from the lowest score (might be a positive or a negative), it - * increases the threshold based on the distance between the current score - * and the following higher score (also keeping track of duplicate scores) - * and computes the predicate for each possible threshold. - * - * Finally, that threshold is returned, for which the predicate returned the - * lowest value. - */ - template - double minimizingThreshold(const blitz::Array& negatives, const blitz::Array& positives, T& predicate){ - if (!negatives.size() || !positives.size()) throw std::runtime_error("Cannot compute threshold when no positives or no negatives are given"); +/** + * This method returns a blitz::Array composed of booleans that pin-point + * which negatives where correctly classified in a 'negative' score sample, + * given a threshold. It runs the formula: + * + * foreach (element k in negative) + * if negative[k] < threshold: returnValue[k] = true + * else: returnValue[k] = false + */ +inline blitz::Array +correctlyClassifiedNegatives(const blitz::Array &negatives, + double threshold) { + return blitz::Array(negatives < threshold); +} - // iterate over the whole set of points - auto pos_it = positives.begin(), neg_it = negatives.begin(); +/** + * This method can calculate a threshold based on a set of scores (positives + * and negatives) given a certain minimization criteria, input as a + * functional predicate. For a discussion on 'positive' and 'negative' see + * bob::measure::farfrr(). + * Here, it is expected that the positives and the negatives are sorted + * ascendantly. + * + * The predicate method gives back the current minimum given false-acceptance + * (FA) and false-rejection (FR) ratios for the input data. As a predicate, + * it has to be a non-member method or a pre-configured functor where we can + * use operator(). The API for the method is: + * + * double predicate(double fa_ratio, double fr_ratio); + * + * Please note that this method will only work with single-minimum smooth + * predicates. + * + * The minimization is carried out in a data-driven way. + * Starting from the lowest score (might be a positive or a negative), it + * increases the threshold based on the distance between the current score + * and the following higher score (also keeping track of duplicate scores) + * and computes the predicate for each possible threshold. + * + * Finally, that threshold is returned, for which the predicate returned the + * lowest value. + */ +template +double minimizingThreshold(const blitz::Array &negatives, + const blitz::Array &positives, + T &predicate) { + if (!negatives.size() || !positives.size()) + throw std::runtime_error( + "Cannot compute threshold when no positives or no negatives are given"); - // iterate over all possible far and frr points and compute the predicate for each possible threshold... - double min_predicate = 1e8; - double min_threshold = 1e8; - double current_predicate = 1e8; - // we start with the extreme values for far and frr - double far = 1., frr = 0.; - // the decrease/increase for far/frr when moving one negative/positive - double far_decrease = 1./negatives.extent(0), frr_increase = 1./positives.extent(0); - // we start with the threshold based on the minimum score - double current_threshold = std::min(*pos_it, *neg_it); - // now, iterate over both lists, in a sorted order - while (pos_it != positives.end() && neg_it != negatives.end()){ - // compute predicate - current_predicate = predicate(far, frr); - if (current_predicate <= min_predicate){ - min_predicate = current_predicate; - min_threshold = current_threshold; - } - if (*pos_it >= *neg_it){ - // compute current threshold - current_threshold = *neg_it; - // go to the next negative value - ++neg_it; - far -= far_decrease; - } else { - // compute current threshold - current_threshold = *pos_it; - // go to the next positive - ++pos_it; - frr += frr_increase; - } - // increase positive and negative as long as they contain the same value - while (neg_it != negatives.end() && *neg_it == current_threshold) { - // go to next negative - ++neg_it; - far -= far_decrease; - } - while (pos_it != positives.end() && *pos_it == current_threshold) { - // go to next positive - ++pos_it; - frr += frr_increase; - } - // compute a new threshold based on the center between last and current score, if we are not already at the end of the score lists - if (neg_it != negatives.end() || pos_it != positives.end()){ - if (neg_it != negatives.end() && pos_it != positives.end()) - current_threshold += std::min(*pos_it, *neg_it); - else if (neg_it != negatives.end()) - current_threshold += *neg_it; - else - current_threshold += *pos_it; - current_threshold /= 2; - } - } // while + // iterate over the whole set of points + auto pos_it = positives.begin(), neg_it = negatives.begin(); - // now, we have reached the end of one list (usually the negatives) - // so, finally compute predicate for the last time + // iterate over all possible far and frr points and compute the predicate for + // each possible threshold... + double min_predicate = 1e8; + double min_threshold = 1e8; + double current_predicate = 1e8; + // we start with the extreme values for far and frr + double far = 1., frr = 0.; + // the decrease/increase for far/frr when moving one negative/positive + double far_decrease = 1. / negatives.extent(0), + frr_increase = 1. / positives.extent(0); + // we start with the threshold based on the minimum score + double current_threshold = std::min(*pos_it, *neg_it); + // now, iterate over both lists, in a sorted order + while (pos_it != positives.end() && neg_it != negatives.end()) { + // compute predicate current_predicate = predicate(far, frr); - if (current_predicate < min_predicate){ + if (current_predicate <= min_predicate) { min_predicate = current_predicate; min_threshold = current_threshold; } + if (*pos_it >= *neg_it) { + // compute current threshold + current_threshold = *neg_it; + // go to the next negative value + ++neg_it; + far -= far_decrease; + } else { + // compute current threshold + current_threshold = *pos_it; + // go to the next positive + ++pos_it; + frr += frr_increase; + } + // increase positive and negative as long as they contain the same value + while (neg_it != negatives.end() && *neg_it == current_threshold) { + // go to next negative + ++neg_it; + far -= far_decrease; + } + while (pos_it != positives.end() && *pos_it == current_threshold) { + // go to next positive + ++pos_it; + frr += frr_increase; + } + // compute a new threshold based on the center between last and current + // score, if we are not already at the end of the score lists + if (neg_it != negatives.end() || pos_it != positives.end()) { + if (neg_it != negatives.end() && pos_it != positives.end()) + current_threshold += std::min(*pos_it, *neg_it); + else if (neg_it != negatives.end()) + current_threshold += *neg_it; + else + current_threshold += *pos_it; + current_threshold /= 2; + } + } // while - // return the best threshold found - return min_threshold; + // now, we have reached the end of one list (usually the negatives) + // so, finally compute predicate for the last time + current_predicate = predicate(far, frr); + if (current_predicate < min_predicate) { + min_predicate = current_predicate; + min_threshold = current_threshold; } - /** - * Calculates the threshold that is, as close as possible, to the - * equal-error-rate (EER) given the input data. The EER should be the point - * where the FAR equals the FRR. Graphically, this would be equivalent to the - * intersection between the R.O.C. (or D.E.T.) curves and the identity. - */ - double eerThreshold(const blitz::Array& negatives, const blitz::Array& positives, bool is_sorted = false); + // return the best threshold found + return min_threshold; +} - /** - * Calculates the equal-error-rate (EER) given the input data, on the ROC - * Convex Hull, as performed in the Bosaris toolkit. - * (https://sites.google.com/site/bosaristoolkit/) - */ - double eerRocch(const blitz::Array& negatives, const blitz::Array& positives); +/** + * Calculates the threshold that is, as close as possible, to the + * equal-error-rate (EER) given the input data. The EER should be the point + * where the FAR equals the FRR. Graphically, this would be equivalent to the + * intersection between the R.O.C. (or D.E.T.) curves and the identity. + */ +double eerThreshold(const blitz::Array &negatives, + const blitz::Array &positives, + bool is_sorted = false); - /** - * Calculates the threshold that minimizes the error rate, given the input - * data. An optional parameter 'cost' determines the relative importance - * between false-accepts and false-rejections. This number should be between - * 0 and 1 and will be clipped to those extremes. - * - * The value to minimize becomes: - * - * ER_cost = [cost * FAR] + [(1-cost) * FRR] - * - * The higher the cost, the higher the importance given to *not* making - * mistakes classifying negatives/noise/impostors. - */ - double minWeightedErrorRateThreshold(const blitz::Array& negatives, const blitz::Array& positives, double cost, bool is_sorted = false); +/** + * Calculates the equal-error-rate (EER) given the input data, on the ROC + * Convex Hull, as performed in the Bosaris toolkit. + * (https://sites.google.com/site/bosaristoolkit/) + */ +double eerRocch(const blitz::Array &negatives, + const blitz::Array &positives); - /** - * Calculates the minWeightedErrorRateThreshold() when the cost is 0.5. - */ - inline double minHterThreshold(const blitz::Array& negatives, const blitz::Array& positives, bool is_sorted = false) { - return minWeightedErrorRateThreshold(negatives, positives, 0.5, is_sorted); - } +/** + * Calculates the threshold that minimizes the error rate, given the input + * data. An optional parameter 'cost' determines the relative importance + * between false-accepts and false-rejections. This number should be between + * 0 and 1 and will be clipped to those extremes. + * + * The value to minimize becomes: + * + * ER_cost = [cost * FAR] + [(1-cost) * FRR] + * + * The higher the cost, the higher the importance given to *not* making + * mistakes classifying negatives/noise/impostors. + */ +double minWeightedErrorRateThreshold(const blitz::Array &negatives, + const blitz::Array &positives, + double cost, bool is_sorted = false); - /** - * Computes the threshold such that the real FAR is as close as possible - * to the requested far_value. - * - * @param negatives The impostor scores to be used for computing the FAR - * @param positives The client scores; ignored by this function - * @param far_value The FAR value where the threshold should be computed - * - * @return The computed threshold - */ - double farThreshold(const blitz::Array& negatives, - const blitz::Array& positives, double far_value, bool is_sorted = false); +/** + * Calculates the minWeightedErrorRateThreshold() when the cost is 0.5. + */ +inline double minHterThreshold(const blitz::Array &negatives, + const blitz::Array &positives, + bool is_sorted = false) { + return minWeightedErrorRateThreshold(negatives, positives, 0.5, is_sorted); +} - /** - * Computes the threshold such that the real FRR is as close as possible - * to the requested frr_value. - * - * @param negatives The impostor scores; ignored by this function - * @param positives The client scores to be used for computing the FRR - * @param frr_value The FRR value where the threshold should be computed - * - * @return The computed threshold - */ - double frrThreshold(const blitz::Array& negatives, - const blitz::Array& positives, double frr_value, bool is_sorted = false); +/** + * Computes the threshold such that the real FAR is as close as possible + * to the requested far_value. + * + * @param negatives The impostor scores to be used for computing the FAR + * @param positives The client scores; ignored by this function + * @param far_value The FAR value where the threshold should be computed + * + * @return The computed threshold + */ +double farThreshold(const blitz::Array &negatives, + const blitz::Array &positives, double far_value, + bool is_sorted = false); - /** - * Calculates the ROC curve given a set of positive and negative scores and a - * number of desired points. Returns a two-dimensional blitz::Array of - * doubles that express the X (FRR) and Y (FAR) coordinates in this order. - * The points in which the ROC curve are calculated are distributed - * uniformly in the range [min(negatives, positives), max(negatives, - * positives)]. - */ - blitz::Array roc - (const blitz::Array& negatives, - const blitz::Array& positives, size_t points); +/** + * Computes the threshold such that the real FRR is as close as possible + * to the requested frr_value. + * + * @param negatives The impostor scores; ignored by this function + * @param positives The client scores to be used for computing the FRR + * @param frr_value The FRR value where the threshold should be computed + * + * @return The computed threshold + */ +double frrThreshold(const blitz::Array &negatives, + const blitz::Array &positives, double frr_value, + bool is_sorted = false); - /** - * Calculates the precision-recall curve given a set of positive and negative scores and a - * number of desired points. Returns a two-dimensional blitz::Array of - * doubles that express the X (precision) and Y (recall) coordinates in this order. - * The points in which the curve is calculated are distributed - * uniformly in the range [min(negatives, positives), max(negatives, - * positives)]. - */ - blitz::Array precision_recall_curve - (const blitz::Array& negatives, - const blitz::Array& positives, size_t points); +/** + * Calculates the ROC curve given a set of positive and negative scores and a + * number of desired points. Returns a two-dimensional blitz::Array of + * doubles that express the X (FRR) and Y (FAR) coordinates in this order. + * The points in which the ROC curve are calculated are distributed + * uniformly in the range [min(negatives, positives), max(negatives, + * positives)]. + */ +blitz::Array roc(const blitz::Array &negatives, + const blitz::Array &positives, + size_t points); - /** - * Calculates the ROC Convex Hull (ROCCH) given a set of positive and - * negative scores and a number of desired points. Returns a - * two-dimensional blitz::Array of doubles that contain the coordinates - * of the vertices of the ROC Convex Hull (the first row is for "pmiss" - * and the second row is for "pfa"). - * Reference: Bosaris toolkit - * (https://sites.google.com/site/bosaristoolkit/) - */ - blitz::Array rocch - (const blitz::Array& negatives, - const blitz::Array& positives); +/** + * Calculates the precision-recall curve given a set of positive and negative + * scores and a + * number of desired points. Returns a two-dimensional blitz::Array of + * doubles that express the X (precision) and Y (recall) coordinates in this + * order. + * The points in which the curve is calculated are distributed + * uniformly in the range [min(negatives, positives), max(negatives, + * positives)]. + */ +blitz::Array +precision_recall_curve(const blitz::Array &negatives, + const blitz::Array &positives, size_t points); - /** - * Calculates the Equal Error Rate (EER) on the ROC Convex Hull (ROCCH) - * from the 2-row matrices containing the pmiss and pfa vectors - * (which is the output of the bob::measure::rocch()). - * Note: pmiss and pfa contain the coordinates of the vertices of the - * ROC Convex Hull. - * Reference: Bosaris toolkit - * (https://sites.google.com/site/bosaristoolkit/) - */ - double rocch2eer(const blitz::Array& pmiss_pfa); +/** + * Calculates the ROC Convex Hull (ROCCH) given a set of positive and + * negative scores and a number of desired points. Returns a + * two-dimensional blitz::Array of doubles that contain the coordinates + * of the vertices of the ROC Convex Hull (the first row is for "pmiss" + * and the second row is for "pfa"). + * Reference: Bosaris toolkit + * (https://sites.google.com/site/bosaristoolkit/) + */ +blitz::Array rocch(const blitz::Array &negatives, + const blitz::Array &positives); - /** - * Calculates the ROC curve given a set of positive and negative scores at - * the given FAR coordinates. Returns a two-dimensional blitz::Array of - * doubles that express the X (FAR) and Y (CAR) coordinates in this order. - */ - blitz::Array roc_for_far( - const blitz::Array& negatives, - const blitz::Array& positives, - const blitz::Array& far_list, - bool is_sorted = false); +/** + * Calculates the Equal Error Rate (EER) on the ROC Convex Hull (ROCCH) + * from the 2-row matrices containing the pmiss and pfa vectors + * (which is the output of the bob::measure::rocch()). + * Note: pmiss and pfa contain the coordinates of the vertices of the + * ROC Convex Hull. + * Reference: Bosaris toolkit + * (https://sites.google.com/site/bosaristoolkit/) + */ +double rocch2eer(const blitz::Array &pmiss_pfa); - /** - * Returns the Deviate Scale equivalent of a false rejection/acceptance - * ratio. - * - * The algorithm that calculates the deviate scale is based on function - * ppndf() from the NIST package DETware version 2.1, freely available on the - * internet. Please consult it for more details. - */ - double ppndf(double value); +/** + * Calculates the ROC curve given a set of positive and negative scores at + * the given FAR coordinates. Returns a two-dimensional blitz::Array of + * doubles that express the X (FAR) and Y (CAR) coordinates in this order. + */ +blitz::Array roc_for_far(const blitz::Array &negatives, + const blitz::Array &positives, + const blitz::Array &far_list, + bool is_sorted = false); - /** - * Calculates the DET curve given a set of positive and negative scores and a - * number of desired points. Returns a two-dimensional blitz::Array of - * doubles that express on its rows: - * - * 0: X axis values in the normal deviate scale for the false-rejections - * 1: Y axis values in the normal deviate scale for the false-accepts - * - * You can plot the results using your preferred tool to first create a plot - * using rows 0 and 1 from the returned value and then place replace the X/Y - * axis annotation using a pre-determined set of tickmarks as recommended by - * NIST. - * - * The algorithm that calculates the deviate scale is based on function - * ppndf() from the NIST package DETware version 2.1, freely available on the - * internet. Please consult it for more details. - * - * By 20.04.2011, you could find such package here: - * http://www.itl.nist.gov/iad/mig/tools/ - */ - blitz::Array det - (const blitz::Array& negatives, - const blitz::Array& positives, size_t points); +/** + * Returns the Deviate Scale equivalent of a false rejection/acceptance + * ratio. + * + * The algorithm that calculates the deviate scale is based on function + * ppndf() from the NIST package DETware version 2.1, freely available on the + * internet. Please consult it for more details. + */ +double ppndf(double value); - /** - * Calculates the EPC curve given a set of positive and negative scores and a - * number of desired points. Returns a two-dimensional blitz::Array of - * doubles that express the X (cost), Y (HTER on the test set given the min. - * HTER threshold on the development set) coordinates, and the thresholds - * which were used during calculations (dependig on the thresholds - * argument) in this order. Please note that, in order to calculate the EPC - * curve, one needs two sets of data comprising a development set and a test - * set. The minimum weighted error is calculated on the development set and - * then applied to the test set to evaluate the half-total error rate at that - * position. - * - * The EPC curve plots the HTER on the test set for various values of 'cost'. - * For each value of 'cost', a threshold is found that provides the minimum - * weighted error (see minWeightedErrorRateThreshold()) on the development - * set. Each threshold is consecutively applied to the test set and the - * resulting HTER values are plotted in the EPC. - * - * The cost points in which the EPC curve are calculated are distributed - * uniformly in the range [0.0, 1.0]. - */ - blitz::Array epc - (const blitz::Array& dev_negatives, - const blitz::Array& dev_positives, - const blitz::Array& test_negatives, - const blitz::Array& test_positives, - size_t points, - bool is_sorted = false, - bool thresholds = false); +/** + * Calculates the DET curve given a set of positive and negative scores and a + * number of desired points. Returns a two-dimensional blitz::Array of + * doubles that express on its rows: + * + * 0: X axis values in the normal deviate scale for the false-rejections + * 1: Y axis values in the normal deviate scale for the false-accepts + * + * You can plot the results using your preferred tool to first create a plot + * using rows 0 and 1 from the returned value and then place replace the X/Y + * axis annotation using a pre-determined set of tickmarks as recommended by + * NIST. + * + * The algorithm that calculates the deviate scale is based on function + * ppndf() from the NIST package DETware version 2.1, freely available on the + * internet. Please consult it for more details. + * + * By 20.04.2011, you could find such package here: + * http://www.itl.nist.gov/iad/mig/tools/ + */ +blitz::Array det(const blitz::Array &negatives, + const blitz::Array &positives, + size_t points); -}} +/** + * Calculates the EPC curve given a set of positive and negative scores and a + * number of desired points. Returns a two-dimensional blitz::Array of + * doubles that express the X (cost), Y (HTER on the test set given the min. + * HTER threshold on the development set) coordinates, and the thresholds + * which were used during calculations (dependig on the thresholds + * argument) in this order. Please note that, in order to calculate the EPC + * curve, one needs two sets of data comprising a development set and a test + * set. The minimum weighted error is calculated on the development set and + * then applied to the test set to evaluate the half-total error rate at that + * position. + * + * The EPC curve plots the HTER on the test set for various values of 'cost'. + * For each value of 'cost', a threshold is found that provides the minimum + * weighted error (see minWeightedErrorRateThreshold()) on the development + * set. Each threshold is consecutively applied to the test set and the + * resulting HTER values are plotted in the EPC. + * + * The cost points in which the EPC curve are calculated are distributed + * uniformly in the range [0.0, 1.0]. + */ +blitz::Array epc(const blitz::Array &dev_negatives, + const blitz::Array &dev_positives, + const blitz::Array &test_negatives, + const blitz::Array &test_positives, + size_t points, bool is_sorted = false, + bool thresholds = false); +} +} #endif /* BOB_MEASURE_ERROR_H */ diff --git a/bob/measure/load.py b/bob/measure/load.py index 41e6e6c..d89b18d 100644 --- a/bob/measure/load.py +++ b/bob/measure/load.py @@ -55,7 +55,8 @@ def open_file(filename, mode='rt'): tar_info = tar.next() # check that one file was found in the archive if tar_info is None: - raise IOError("The given file is a .tar file, but it does not contain any file.") + raise IOError( + "The given file is a .tar file, but it does not contain any file.") # open the file for reading return tar.extractfile(tar_info) @@ -348,7 +349,6 @@ def cmc(filename, ncolumns=None): return cmc_five_column(filename) - def load_score(filename, ncolumns=None, minimal=False, **kwargs): """Load scores using numpy.loadtxt and return the data as a numpy array. @@ -383,29 +383,29 @@ def load_score(filename, ncolumns=None, minimal=False, **kwargs): if ncolumns == 4: names = ('claimed_id', 'real_id', 'test_label', 'score') converters = { - 0: convertfunc, - 1: convertfunc, - 2: convertfunc, - 3: float} + 0: convertfunc, + 1: convertfunc, + 2: convertfunc, + 3: float} if minimal: usecols = (0, 1, 3) elif ncolumns == 5: names = ('claimed_id', 'model_label', 'real_id', 'test_label', 'score') converters = { - 0: convertfunc, - 1: convertfunc, - 2: convertfunc, - 3: convertfunc, - 4: float} + 0: convertfunc, + 1: convertfunc, + 2: convertfunc, + 3: convertfunc, + 4: float} if minimal: usecols = (0, 2, 4) else: raise ValueError("ncolumns of 4 and 5 are supported only.") score_lines = numpy.genfromtxt( - open_file(filename, mode='rb'), dtype=None, names=names, - converters=converters, invalid_raise=True, usecols=usecols, **kwargs) + open_file(filename, mode='rb'), dtype=None, names=names, + converters=converters, invalid_raise=True, usecols=usecols, **kwargs) new_dtype = [] for name in score_lines.dtype.names[:-1]: new_dtype.append((name, str(score_lines.dtype[name]).replace('S', 'U'))) @@ -506,17 +506,19 @@ def _iterate_score_file(filename): yield splits -def _split_scores(score_lines, real_id_index, claimed_id_index = 0, score_index = -1): +def _split_scores(score_lines, real_id_index, claimed_id_index=0, score_index=-1): """Take the output of :py:func:four_column or :py:func:five_column and return negatives and positives. """ positives, negatives = [], [] for line in score_lines: - which = positives if line[claimed_id_index] == line[real_id_index] else negatives + which = positives if line[claimed_id_index] == line[ + real_id_index] else negatives which.append(line[score_index]) return (numpy.array(negatives), numpy.array(positives)) -def _split_cmc_scores(score_lines, real_id_index, probe_name_index = None, claimed_id_index = 0, score_index = -1): + +def _split_cmc_scores(score_lines, real_id_index, probe_name_index=None, claimed_id_index=0, score_index=-1): """Takes the output of :py:func:four_column or :py:func:five_column and return cmc scores. """ if probe_name_index is None: @@ -526,7 +528,8 @@ def _split_cmc_scores(score_lines, real_id_index, probe_name_index = None, claim neg_dict = {} # read four column list for line in score_lines: - which = pos_dict if line[claimed_id_index] == line[real_id_index] else neg_dict + which = pos_dict if line[claimed_id_index] == line[ + real_id_index] else neg_dict probe_name = line[probe_name_index] # append score if probe_name not in which: @@ -537,6 +540,8 @@ def _split_cmc_scores(score_lines, real_id_index, probe_name_index = None, claim probe_names = sorted(set(neg_dict.keys()).union(set(pos_dict.keys()))) # get all scores in the desired format return [( - numpy.array(neg_dict[probe_name], numpy.float64) if probe_name in neg_dict else None, - numpy.array(pos_dict[probe_name], numpy.float64) if probe_name in pos_dict else None + numpy.array(neg_dict[probe_name], + numpy.float64) if probe_name in neg_dict else None, + numpy.array(pos_dict[probe_name], + numpy.float64) if probe_name in pos_dict else None ) for probe_name in probe_names] diff --git a/bob/measure/main.cpp b/bob/measure/main.cpp index aa9c494..ae03c57 100644 --- a/bob/measure/main.cpp +++ b/bob/measure/main.cpp @@ -8,974 +8,1061 @@ #ifdef NO_IMPORT_ARRAY #undef NO_IMPORT_ARRAY #endif -#include #include +#include #include -#include #include +#include #include "cpp/error.h" -static int double1d_converter(PyObject* o, PyBlitzArrayObject** a) { - if (PyBlitzArray_Converter(o, a) == 0) return 0; +static int double1d_converter(PyObject *o, PyBlitzArrayObject **a) { + if (PyBlitzArray_Converter(o, a) == 0) + return 0; // in this case, *a is set to a new reference if ((*a)->type_num != NPY_FLOAT64 || (*a)->ndim != 1) { - PyErr_Format(PyExc_TypeError, "cannot convert blitz::Array<%s,%" PY_FORMAT_SIZE_T "d> to a blitz::Array", PyBlitzArray_TypenumAsString((*a)->type_num), (*a)->ndim); + PyErr_Format(PyExc_TypeError, + "cannot convert blitz::Array<%s,%" PY_FORMAT_SIZE_T + "d> to a blitz::Array", + PyBlitzArray_TypenumAsString((*a)->type_num), (*a)->ndim); return 0; } return 1; } -static int double2d_converter(PyObject* o, PyBlitzArrayObject** a) { - if (PyBlitzArray_Converter(o, a) == 0) return 0; +static int double2d_converter(PyObject *o, PyBlitzArrayObject **a) { + if (PyBlitzArray_Converter(o, a) == 0) + return 0; // in this case, *a is set to a new reference if ((*a)->type_num != NPY_FLOAT64 || (*a)->ndim != 2) { - PyErr_Format(PyExc_TypeError, "cannot convert blitz::Array<%s,%" PY_FORMAT_SIZE_T "d> to a blitz::Array", PyBlitzArray_TypenumAsString((*a)->type_num), (*a)->ndim); + PyErr_Format(PyExc_TypeError, + "cannot convert blitz::Array<%s,%" PY_FORMAT_SIZE_T + "d> to a blitz::Array", + PyBlitzArray_TypenumAsString((*a)->type_num), (*a)->ndim); return 0; } return 1; } - -static auto epc_doc = bob::extension::FunctionDoc( - "epc", - "Calculates points of an Expected Performance Curve (EPC)", - "Calculates the EPC curve given a set of positive and negative scores and a desired number of points. " - "Returns a two-dimensional :py:class:numpy.ndarray of type float with the " - "shape of (2, points) or (3, points) depending on the thresholds argument. " - "The rows correspond to the X (cost), Y (weighted error rate on the test set given the min. threshold on the development set), and the thresholds which were used to calculate the error (if the thresholds argument was set to True), respectively. " - "Please note that, in order to calculate the EPC curve, one needs two sets of data comprising a development set and a test set. " - "The minimum weighted error is calculated on the development set and then applied to the test set to evaluate the weighted error rate at that position.\n\n" - "The EPC curve plots the HTER on the test set for various values of 'cost'. " - "For each value of 'cost', a threshold is found that provides the minimum weighted error (see :py:func:bob.measure.min_weighted_error_rate_threshold) on the development set. " - "Each threshold is consecutively applied to the test set and the resulting weighted error values are plotted in the EPC.\n\n" - "The cost points in which the EPC curve are calculated are distributed uniformly in the range :math:[0.0, 1.0].\n\n" - ".. note:: It is more memory efficient, when sorted arrays of scores are provided and the is_sorted parameter is set to True." -) -.add_prototype("dev_negatives, dev_positives, test_negatives, test_positives, n_points, [is_sorted], [thresholds]", "curve") -.add_parameter("dev_negatives, dev_positives, test_negatives, test_positives", "array_like(1D, float)", "The scores for negatives and positives of the development and test set") -.add_parameter("n_points", "int", "The number of weights for which the EPC curve should be computed") -.add_parameter("is_sorted", "bool", "[Default: False] Set this to True if the scores are already sorted. If False, scores will be sorted internally, which will require more memory") -.add_parameter("thresholds", "bool", "[Default: False] If True the function returns an array with the shape of (3, points) where the third row contains the thresholds that were calculated on the development set.") -.add_return("curve", "array_like(2D, float)", "The EPC curve, with the first row containing the weights and the second row containing the weighted errors on the test set. If thresholds is True, there is also a third row which contains the thresholds that were calculated on the development set.") -; -static PyObject* epc(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY +static auto epc_doc = + bob::extension::FunctionDoc( + "epc", "Calculates points of an Expected Performance Curve (EPC)", + "Calculates the EPC curve given a set of positive and negative scores " + "and a desired number of points. " + "Returns a two-dimensional :py:class:numpy.ndarray of type float " + "with the " + "shape of (2, points) or (3, points) depending on the " + "thresholds argument. " + "The rows correspond to the X (cost), Y (weighted error rate on the " + "test set given the min. threshold on the development set), and the " + "thresholds which were used to calculate the error (if the " + "thresholds argument was set to True), respectively. " + "Please note that, in order to calculate the EPC curve, one needs two " + "sets of data comprising a development set and a test set. " + "The minimum weighted error is calculated on the development set and " + "then applied to the test set to evaluate the weighted error rate at " + "that position.\n\n" + "The EPC curve plots the HTER on the test set for various values of " + "'cost'. " + "For each value of 'cost', a threshold is found that provides the " + "minimum weighted error (see " + ":py:func:bob.measure.min_weighted_error_rate_threshold) on the " + "development set. " + "Each threshold is consecutively applied to the test set and the " + "resulting weighted error values are plotted in the EPC.\n\n" + "The cost points in which the EPC curve are calculated are distributed " + "uniformly in the range :math:[0.0, 1.0].\n\n" + ".. note:: It is more memory efficient, when sorted arrays of scores " + "are provided and the is_sorted parameter is set to True.") + .add_prototype("dev_negatives, dev_positives, test_negatives, " + "test_positives, n_points, [is_sorted], [thresholds]", + "curve") + .add_parameter( + "dev_negatives, dev_positives, test_negatives, test_positives", + "array_like(1D, float)", "The scores for negatives and positives " + "of the development and test set") + .add_parameter( + "n_points", "int", + "The number of weights for which the EPC curve should be computed") + .add_parameter("is_sorted", "bool", + "[Default: False] Set this to True if the " + "scores are already sorted. If False, scores will " + "be sorted internally, which will require more memory") + .add_parameter("thresholds", "bool", + "[Default: False] If True the function returns " + "an array with the shape of (3, points) where the " + "third row contains the thresholds that were calculated " + "on the development set.") + .add_return("curve", "array_like(2D, float)", + "The EPC curve, with the first row containing the weights " + "and the second row containing the weighted errors on the " + "test set. If thresholds is True, there is also a " + "third row which contains the thresholds that were " + "calculated on the development set."); +static PyObject *epc(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY /* Parses input arguments in a single shot */ - char** kwlist = epc_doc.kwlist(); + char **kwlist = epc_doc.kwlist(); - PyBlitzArrayObject* dev_neg; - PyBlitzArrayObject* dev_pos; - PyBlitzArrayObject* test_neg; - PyBlitzArrayObject* test_pos; + PyBlitzArrayObject *dev_neg; + PyBlitzArrayObject *dev_pos; + PyBlitzArrayObject *test_neg; + PyBlitzArrayObject *test_pos; Py_ssize_t n_points; - PyObject* is_sorted = Py_False; - PyObject* thresholds = Py_False; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&n|OO", - kwlist, - &double1d_converter, &dev_neg, - &double1d_converter, &dev_pos, - &double1d_converter, &test_neg, - &double1d_converter, &test_pos, - &n_points, - &is_sorted, - &thresholds - )) return 0; - - //protects acquired resources through this scope + PyObject *is_sorted = Py_False; + PyObject *thresholds = Py_False; + + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&O&O&n|OO", kwlist, &double1d_converter, &dev_neg, + &double1d_converter, &dev_pos, &double1d_converter, &test_neg, + &double1d_converter, &test_pos, &n_points, &is_sorted, &thresholds)) + return 0; + + // protects acquired resources through this scope auto dev_neg_ = make_safe(dev_neg); auto dev_pos_ = make_safe(dev_pos); auto test_neg_ = make_safe(test_neg); auto test_pos_ = make_safe(test_pos); - auto result = bob::measure::epc( - *PyBlitzArrayCxx_AsBlitz(dev_neg), - *PyBlitzArrayCxx_AsBlitz(dev_pos), - *PyBlitzArrayCxx_AsBlitz(test_neg), - *PyBlitzArrayCxx_AsBlitz(test_pos), - n_points, PyObject_IsTrue(is_sorted), PyObject_IsTrue(thresholds)); + auto result = bob::measure::epc(*PyBlitzArrayCxx_AsBlitz(dev_neg), + *PyBlitzArrayCxx_AsBlitz(dev_pos), + *PyBlitzArrayCxx_AsBlitz(test_neg), + *PyBlitzArrayCxx_AsBlitz(test_pos), + n_points, PyObject_IsTrue(is_sorted), + PyObject_IsTrue(thresholds)); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("epc", 0) + BOB_CATCH_FUNCTION("epc", 0) } -static auto det_doc = bob::extension::FunctionDoc( - "det", - "Calculates points of an Detection Error-Tradeoff (DET) curve", - "Calculates the DET curve given a set of negative and positive scores and a desired number of points. Returns a two-dimensional array of doubles that express on its rows:\n\n" - "[0] X axis values in the normal deviate scale for the false-accepts\n\n" - "[1] Y axis values in the normal deviate scale for the false-rejections\n\n" - "You can plot the results using your preferred tool to first create a plot using rows 0 and 1 from the returned value and then replace the X/Y axis annotation using a pre-determined set of tickmarks as recommended by NIST. " - "The derivative scales are computed with the :py:func:bob.measure.ppndf function." -) -.add_prototype("negatives, positives, n_points", "curve") -.add_parameter("negatives, positives", "array_like(1D, float)", "The list of negative and positive scores to compute the DET for") -.add_parameter("n_points", "int", "The number of points on the DET curve, for which the DET should be evaluated") -.add_return("curve", "array_like(2D, float)", "The DET curve, with the FAR in the first and the FRR in the second row") -; -static PyObject* det(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = det_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto det_doc = + bob::extension::FunctionDoc( + "det", "Calculates points of an Detection Error-Tradeoff (DET) curve", + "Calculates the DET curve given a set of negative and positive scores " + "and a desired number of points. Returns a two-dimensional array of " + "doubles that express on its rows:\n\n" + "[0] X axis values in the normal deviate scale for the " + "false-accepts\n\n" + "[1] Y axis values in the normal deviate scale for the " + "false-rejections\n\n" + "You can plot the results using your preferred tool to first create a " + "plot using rows 0 and 1 from the returned value and then replace the " + "X/Y axis annotation using a pre-determined set of tickmarks as " + "recommended by NIST. " + "The derivative scales are computed with the " + ":py:func:bob.measure.ppndf function.") + .add_prototype("negatives, positives, n_points", "curve") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The list of negative and positive scores to compute the DET for") + .add_parameter("n_points", "int", "The number of points on the DET " + "curve, for which the DET should be " + "evaluated") + .add_return("curve", "array_like(2D, float)", + "The DET curve, with the FAR in the first and the FRR in " + "the second row"); +static PyObject *det(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + char **kwlist = det_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; Py_ssize_t n_points; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &n_points - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &n_points)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::det( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - n_points); + auto result = + bob::measure::det(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), n_points); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("det", 0) + BOB_CATCH_FUNCTION("det", 0) } -static auto ppndf_doc = bob::extension::FunctionDoc( - "ppndf", - "Returns the Deviate Scale equivalent of a false rejection/acceptance ratio", - "The algorithm that calculates the deviate scale is based on function ppndf() from the NIST package DETware version 2.1, freely available on the internet. " - "Please consult it for more details. " - "By 20.04.2011, you could find such package here _." -) -.add_prototype("value", "ppndf") -.add_parameter("value", "float", "The value (usually FAR or FRR) for which the ppndf should be calculated") -.add_return("ppndf", "float", "The derivative scale of the given value") -; -static PyObject* ppndf(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = ppndf_doc.kwlist(); +static auto ppndf_doc = + bob::extension::FunctionDoc( + "ppndf", "Returns the Deviate Scale equivalent of a false " + "rejection/acceptance ratio", + "The algorithm that calculates the deviate scale is based on function " + "ppndf() from the NIST package DETware version 2.1, freely available " + "on the internet. " + "Please consult it for more details. " + "By 20.04.2011, you could find such package here " + "_.") + .add_prototype("value", "ppndf") + .add_parameter("value", "float", "The value (usually FAR or FRR) for " + "which the ppndf should be calculated") + .add_return("ppndf", "float", + "The derivative scale of the given value"); +static PyObject *ppndf(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + char **kwlist = ppndf_doc.kwlist(); double v; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "d", kwlist, &v)) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "d", kwlist, &v)) + return 0; return Py_BuildValue("d", bob::measure::ppndf(v)); -BOB_CATCH_FUNCTION("ppndf", 0) + BOB_CATCH_FUNCTION("ppndf", 0) } -static auto roc_doc = bob::extension::FunctionDoc( - "roc", - "Calculates points of an Receiver Operating Characteristic (ROC)", - "Calculates the ROC curve given a set of negative and positive scores and a desired number of points. " -) -.add_prototype("negatives, positives, n_points", "curve") -.add_parameter("negatives, positives", "array_like(1D, float)", "The negative and positive scores, for which the ROC curve should be calculated") -.add_parameter("n_points", "int", "The number of points, in which the ROC curve are calculated, which are distributed uniformly in the range [min(negatives, positives), max(negatives, positives)]") -.add_return("curve", "array_like(2D, float)", "A two-dimensional array of doubles that express the X (FAR) and Y (FRR) coordinates in this order") -; -static PyObject* roc(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = roc_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto roc_doc = + bob::extension::FunctionDoc( + "roc", + "Calculates points of an Receiver Operating Characteristic (ROC)", + "Calculates the ROC curve given a set of negative and positive scores " + "and a desired number of points. ") + .add_prototype("negatives, positives, n_points", "curve") + .add_parameter("negatives, positives", "array_like(1D, float)", + "The negative and positive scores, for which the ROC " + "curve should be calculated") + .add_parameter("n_points", "int", "The number of points, in which the " + "ROC curve are calculated, which are " + "distributed uniformly in the range " + "[min(negatives, positives), " + "max(negatives, positives)]") + .add_return("curve", "array_like(2D, float)", + "A two-dimensional array of doubles that express the X " + "(FAR) and Y (FRR) coordinates in this order"); +static PyObject *roc(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + static char **kwlist = roc_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; Py_ssize_t n_points; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &n_points - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &n_points)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::roc( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - n_points); + auto result = + bob::measure::roc(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), n_points); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("roc", 0) + BOB_CATCH_FUNCTION("roc", 0) } -static auto farfrr_doc = bob::extension::FunctionDoc( - "farfrr", - "Calculates the false-acceptance (FA) ratio and the false-rejection (FR) ratio for the given positive and negative scores and a score threshold", - "positives holds the score information for samples that are labeled to belong to a certain class (a.k.a., 'signal' or 'client'). " - "negatives holds the score information for samples that are labeled **not** to belong to the class (a.k.a., 'noise' or 'impostor'). " - "It is expected that 'positive' scores are, at least by design, greater than 'negative' scores. " - "So, every 'positive' value that falls bellow the threshold is considered a false-rejection (FR). " - "negative samples that fall above the threshold are considered a false-accept (FA).\n\n" - "Positives that fall on the threshold (exactly) are considered correctly classified. " - "Negatives that fall on the threshold (exactly) are considered **incorrectly** classified. " - "This equivalent to setting the comparison like this pseudo-code:\n\n" - " foreach (positive as K) if K < threshold: falseRejectionCount += 1\n\n" - " foreach (negative as K) if K >= threshold: falseAcceptCount += 1\n\n" - "The output is in form of a tuple of two double-precision real numbers. " - "The numbers range from 0 to 1. " - "The first element of the pair is the false-accept ratio (FAR), the second element the false-rejection ratio (FRR).\n\n" - "The threshold value does not necessarily have to fall in the range covered by the input scores (negatives and positives altogether), but if it does not, the output will be either (1.0, 0.0) or (0.0, 1.0), depending on the side the threshold falls.\n\n" - "It is possible that scores are inverted in the negative/positive sense. " - "In some setups the designer may have setup the system so 'positive' samples have a smaller score than the 'negative' ones. " - "In this case, make sure you normalize the scores so positive samples have greater scores before feeding them into this method." -) -.add_prototype("negatives, positives, threshold", "far, frr") -.add_parameter("negatives", "array_like(1D, float)", "The scores for comparisons of objects of different classes") -.add_parameter("positives", "array_like(1D, float)", "The scores for comparisons of objects of the same class") -.add_parameter("threshold", "float", "The threshold to separate correctly and incorrectly classified scores") -.add_return("far", "float", "The False Accept Rate (FAR) for the given threshold") -.add_return("frr", "float", "The False Reject Rate (FRR) for the given threshold") -; -static PyObject* farfrr(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = farfrr_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto farfrr_doc = + bob::extension::FunctionDoc( + "farfrr", "Calculates the false-acceptance (FA) ratio and the " + "false-rejection (FR) ratio for the given positive and " + "negative scores and a score threshold", + "positives holds the score information for samples that are " + "labeled to belong to a certain class (a.k.a., 'signal' or 'client'). " + "negatives holds the score information for samples that are " + "labeled **not** to belong to the class (a.k.a., 'noise' or " + "'impostor'). " + "It is expected that 'positive' scores are, at least by design, " + "greater than 'negative' scores. " + "So, every 'positive' value that falls bellow the threshold is " + "considered a false-rejection (FR). " + "negative samples that fall above the threshold are considered a " + "false-accept (FA).\n\n" + "Positives that fall on the threshold (exactly) are considered " + "correctly classified. " + "Negatives that fall on the threshold (exactly) are considered " + "**incorrectly** classified. " + "This equivalent to setting the comparison like this pseudo-code:\n\n" + " foreach (positive as K) if K < threshold: falseRejectionCount += " + "1\n\n" + " foreach (negative as K) if K >= threshold: falseAcceptCount += " + "1\n\n" + "The output is in form of a tuple of two double-precision real " + "numbers. " + "The numbers range from 0 to 1. " + "The first element of the pair is the false-accept ratio (FAR), the " + "second element the false-rejection ratio (FRR).\n\n" + "The threshold value does not necessarily have to fall in the " + "range covered by the input scores (negatives and positives " + "altogether), but if it does not, the output will be either (1.0, 0.0) " + "or (0.0, 1.0), depending on the side the threshold falls.\n\n" + "It is possible that scores are inverted in the negative/positive " + "sense. " + "In some setups the designer may have setup the system so 'positive' " + "samples have a smaller score than the 'negative' ones. " + "In this case, make sure you normalize the scores so positive samples " + "have greater scores before feeding them into this method.") + .add_prototype("negatives, positives, threshold", "far, frr") + .add_parameter( + "negatives", "array_like(1D, float)", + "The scores for comparisons of objects of different classes") + .add_parameter( + "positives", "array_like(1D, float)", + "The scores for comparisons of objects of the same class") + .add_parameter("threshold", "float", "The threshold to separate " + "correctly and incorrectly " + "classified scores") + .add_return("far", "float", + "The False Accept Rate (FAR) for the given threshold") + .add_return("frr", "float", + "The False Reject Rate (FRR) for the given threshold"); +static PyObject *farfrr(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + char **kwlist = farfrr_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double threshold; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &threshold - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &threshold)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::farfrr( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - threshold); + auto result = + bob::measure::farfrr(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), threshold); return Py_BuildValue("dd", result.first, result.second); -BOB_CATCH_FUNCTION("farfrr", 0) + BOB_CATCH_FUNCTION("farfrr", 0) } -static auto eer_threshold_doc = bob::extension::FunctionDoc( - "eer_threshold", - "Calculates the threshold that is as close as possible to the equal-error-rate (EER) for the given input data", - "The EER should be the point where the FAR equals the FRR. " - "Graphically, this would be equivalent to the intersection between the ROC (or DET) curves and the identity.\n\n" - ".. note::\n\n" - " The scores will be sorted internally, requiring the scores to be copied.\n" - " To avoid this copy, you can sort both sets of scores externally in ascendant order, and set the is_sorted parameter to True" -) -.add_prototype("negatives, positives, [is_sorted]", "threshold") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the threshold") -.add_parameter("is_sorted", "bool", "[Default: False] Are both sets of scores already in ascendantly sorted order?") -.add_return("threshold", "float", "The threshold (i.e., as used in :py:func:bob.measure.farfrr) where FAR and FRR are as close as possible") -; -static PyObject* eer_threshold(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = eer_threshold_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; - PyObject* is_sorted = Py_False; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &is_sorted - )) return 0; - - //protects acquired resources through this scope +static auto eer_threshold_doc = + bob::extension::FunctionDoc( + "eer_threshold", "Calculates the threshold that is as close as " + "possible to the equal-error-rate (EER) for the given " + "input data", + "The EER should be the point where the FAR equals the FRR. " + "Graphically, this would be equivalent to the intersection between the " + "ROC (or DET) curves and the identity.\n\n" + ".. note::\n\n" + " The scores will be sorted internally, requiring the scores to be " + "copied.\n" + " To avoid this copy, you can sort both sets of scores externally in " + "ascendant order, and set the is_sorted parameter to True") + .add_prototype("negatives, positives, [is_sorted]", "threshold") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the threshold") + .add_parameter("is_sorted", "bool", "[Default: False] Are both " + "sets of scores already in " + "ascendantly sorted order?") + .add_return("threshold", "float", "The threshold (i.e., as used in " + ":py:func:bob.measure.farfrr) " + "where FAR and FRR are as close as " + "possible"); +static PyObject *eer_threshold(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + char **kwlist = eer_threshold_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; + PyObject *is_sorted = Py_False; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &is_sorted)) + return 0; + + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); double result = bob::measure::eerThreshold( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - PyObject_IsTrue(is_sorted)); + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), PyObject_IsTrue(is_sorted)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("eer_threshold", 0) + BOB_CATCH_FUNCTION("eer_threshold", 0) } -static auto min_weighted_error_rate_threshold_doc = bob::extension::FunctionDoc( - "min_weighted_error_rate_threshold", - "Calculates the threshold that minimizes the error rate for the given input data", - "The cost parameter determines the relative importance between false-accepts and false-rejections. " - "This number should be between 0 and 1 and will be clipped to those extremes. " - "The value to minimize becomes: :math:ER_{cost} = cost * FAR + (1-cost) * FRR. " - "The higher the cost, the higher the importance given to **not** making mistakes classifying negatives/noise/impostors.\n\n" - ".. note:: " - "The scores will be sorted internally, requiring the scores to be copied. " - "To avoid this copy, you can sort both sets of scores externally in ascendant order, and set the is_sorted parameter to True" -) -.add_prototype("negatives, positives, cost, [is_sorted]", "threshold") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the threshold") -.add_parameter("cost", "float", "The relative cost over FAR with respect to FRR in the threshold calculation") -.add_parameter("is_sorted", "bool", "[Default: False] Are both sets of scores already in ascendantly sorted order?") -.add_return("threshold", "float", "The threshold for which the weighted error rate is minimal") -; -static PyObject* min_weighted_error_rate_threshold(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = min_weighted_error_rate_threshold_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto min_weighted_error_rate_threshold_doc = + bob::extension::FunctionDoc( + "min_weighted_error_rate_threshold", "Calculates the threshold that " + "minimizes the error rate for the " + "given input data", + "The cost parameter determines the relative importance between " + "false-accepts and false-rejections. " + "This number should be between 0 and 1 and will be clipped to those " + "extremes. " + "The value to minimize becomes: :math:ER_{cost} = cost * FAR + " + "(1-cost) * FRR. " + "The higher the cost, the higher the importance given to **not** " + "making mistakes classifying negatives/noise/impostors.\n\n" + ".. note:: " + "The scores will be sorted internally, requiring the scores to be " + "copied. " + "To avoid this copy, you can sort both sets of scores externally in " + "ascendant order, and set the is_sorted parameter to True") + .add_prototype("negatives, positives, cost, [is_sorted]", "threshold") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the threshold") + .add_parameter("cost", "float", "The relative cost over FAR with " + "respect to FRR in the threshold " + "calculation") + .add_parameter("is_sorted", "bool", "[Default: False] Are both " + "sets of scores already in " + "ascendantly sorted order?") + .add_return( + "threshold", "float", + "The threshold for which the weighted error rate is minimal"); +static PyObject *min_weighted_error_rate_threshold(PyObject *, PyObject *args, + PyObject *kwds) { + BOB_TRY + char **kwlist = min_weighted_error_rate_threshold_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double cost; - PyObject* is_sorted = Py_False; + PyObject *is_sorted = Py_False; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d|O", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &cost, - &is_sorted - )) return 0; + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&d|O", kwlist, &double1d_converter, &neg, + &double1d_converter, &pos, &cost, &is_sorted)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); double result = bob::measure::minWeightedErrorRateThreshold( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - cost, + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), cost, PyObject_IsTrue(is_sorted)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("min_weighted_error_rate_threshold", 0) + BOB_CATCH_FUNCTION("min_weighted_error_rate_threshold", 0) } -static auto min_hter_threshold_doc = bob::extension::FunctionDoc( - "min_hter_threshold", - "Calculates the :py:func:bob.measure.min_weighted_error_rate_threshold with cost=0.5" -) -.add_prototype("negatives, positives, [is_sorted]", "threshold") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the threshold") -.add_parameter("is_sorted", "bool", "[Default: False] Are both sets of scores already in ascendantly sorted order?") -.add_return("threshold", "float", "The threshold for which the weighted error rate is minimal") -; -static PyObject* min_hter_threshold(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = min_hter_threshold_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; - PyObject* is_sorted = Py_False; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &is_sorted - )) return 0; - - //protects acquired resources through this scope +static auto min_hter_threshold_doc = + bob::extension::FunctionDoc("min_hter_threshold", + "Calculates the " + ":py:func:bob.measure.min_weighted_error_rate_" + "threshold with cost=0.5") + .add_prototype("negatives, positives, [is_sorted]", "threshold") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the threshold") + .add_parameter("is_sorted", "bool", "[Default: False] Are both " + "sets of scores already in " + "ascendantly sorted order?") + .add_return( + "threshold", "float", + "The threshold for which the weighted error rate is minimal"); +static PyObject *min_hter_threshold(PyObject *, PyObject *args, + PyObject *kwds) { + BOB_TRY + char **kwlist = min_hter_threshold_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; + PyObject *is_sorted = Py_False; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &is_sorted)) + return 0; + + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); double result = bob::measure::minHterThreshold( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - PyObject_IsTrue(is_sorted) - ); + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), PyObject_IsTrue(is_sorted)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("min_hter_threshold", 0) + BOB_CATCH_FUNCTION("min_hter_threshold", 0) } -static auto precision_recall_doc = bob::extension::FunctionDoc( - "precision_recall", - "Calculates the precision and recall (sensitiveness) values given negative and positive scores and a threshold", - "Precision and recall are computed as:\n\n" - ".. math::\n\n" - " \\mathrm{precision} = \\frac{tp}{tp + fp}\n\n" - " \\mathrm{recall} = \\frac{tp}{tp + fn}\n\n" - "where :math:tp are the true positives, :math:fp are the false positives and :math:fn are the false negatives.\n\n" - "positives holds the score information for samples that are labeled to belong to a certain class (a.k.a., 'signal' or 'client'). " - "negatives holds the score information for samples that are labeled **not** to belong to the class (a.k.a., 'noise' or 'impostor'). " - "For more precise details about how the method considers error rates, see :py:func:bob.measure.farfrr." -) -.add_prototype("negatives, positives, threshold", "precision, recall") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the measurements") -.add_parameter("threshold", "float", "The threshold to compute the measures for") -.add_return("precision", "float", "The precision value for the given negatives and positives") -.add_return("recall", "float", "The recall value for the given negatives and positives") -; -static PyObject* precision_recall(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = precision_recall_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto precision_recall_doc = + bob::extension::FunctionDoc( + "precision_recall", "Calculates the precision and recall " + "(sensitiveness) values given negative and " + "positive scores and a threshold", + "Precision and recall are computed as:\n\n" + ".. math::\n\n" + " \\mathrm{precision} = \\frac{tp}{tp + fp}\n\n" + " \\mathrm{recall} = \\frac{tp}{tp + fn}\n\n" + "where :math:tp are the true positives, :math:fp are the false " + "positives and :math:fn are the false negatives.\n\n" + "positives holds the score information for samples that are " + "labeled to belong to a certain class (a.k.a., 'signal' or 'client'). " + "negatives holds the score information for samples that are " + "labeled **not** to belong to the class (a.k.a., 'noise' or " + "'impostor'). " + "For more precise details about how the method considers error rates, " + "see :py:func:bob.measure.farfrr.") + .add_prototype("negatives, positives, threshold", "precision, recall") + .add_parameter("negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the " + "measurements") + .add_parameter("threshold", "float", + "The threshold to compute the measures for") + .add_return("precision", "float", + "The precision value for the given negatives and positives") + .add_return("recall", "float", + "The recall value for the given negatives and positives"); +static PyObject *precision_recall(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + static char **kwlist = precision_recall_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double threshold; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &threshold - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &threshold)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); auto result = bob::measure::precision_recall( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - threshold); + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), threshold); return Py_BuildValue("dd", result.first, result.second); -BOB_CATCH_FUNCTION("precision_recall", 0) + BOB_CATCH_FUNCTION("precision_recall", 0) } -static auto f_score_doc = bob::extension::FunctionDoc( - "f_score", - "This method computes the F-score of the accuracy of the classification", - "The F-score is a weighted mean of precision and recall measurements, see :py:func:bob.measure.precision_recall. " - "It is computed as:\n\n" - ".. math::\n\n" - " \\mathrm{f-score} = (1 + w^2)\\frac{\\mathrm{precision}\\cdot{}\\mathrm{recall}}{w^2\\cdot{}\\mathrm{precision} + \\mathrm{recall}}\n\n" - "The weight :math:w needs to be non-negative real value. " - "In case the weight parameter is 1 (the default), the F-score is called F1 score and is a harmonic mean between precision and recall values." -) -.add_prototype("negatives, positives, threshold, [weight]", "f_score") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the precision and recall") -.add_parameter("threshold", "float", "The threshold to compute the precision and recall for") -.add_parameter("weight", "float", "[Default: 1] The weight :math:w between precision and recall") -.add_return("f_score", "float", "The computed f-score for the given scores and the given threshold") -; -static PyObject* f_score(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = f_score_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto f_score_doc = + bob::extension::FunctionDoc( + "f_score", "This method computes the F-score of the accuracy of the " + "classification", + "The F-score is a weighted mean of precision and recall measurements, " + "see :py:func:bob.measure.precision_recall. " + "It is computed as:\n\n" + ".. math::\n\n" + " \\mathrm{f-score} = (1 + " + "w^2)\\frac{\\mathrm{precision}\\cdot{}\\mathrm{recall}}{w^2\\cdot{}" + "\\mathrm{precision} + \\mathrm{recall}}\n\n" + "The weight :math:w needs to be non-negative real value. " + "In case the weight parameter is 1 (the default), the F-score is " + "called F1 score and is a harmonic mean between precision and recall " + "values.") + .add_prototype("negatives, positives, threshold, [weight]", "f_score") + .add_parameter("negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the " + "precision and recall") + .add_parameter("threshold", "float", + "The threshold to compute the precision and recall for") + .add_parameter("weight", "float", "[Default: 1] The weight " + ":math:w between precision and " + "recall") + .add_return("f_score", "float", "The computed f-score for the given " + "scores and the given threshold"); +static PyObject *f_score(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + static char **kwlist = f_score_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double threshold; double weight = 1.0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&d|d", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &threshold, &weight - )) return 0; + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&d|d", kwlist, &double1d_converter, &neg, + &double1d_converter, &pos, &threshold, &weight)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::f_score( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - threshold, weight); + auto result = bob::measure::f_score(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), + threshold, weight); - return Py_BuildValue("d",result); -BOB_CATCH_FUNCTION("f_score", 0) + return Py_BuildValue("d", result); + BOB_CATCH_FUNCTION("f_score", 0) } -static auto correctly_classified_negatives_doc = bob::extension::FunctionDoc( - "correctly_classified_negatives", - "This method returns an array composed of booleans that pin-point, which negatives where correctly classified for the given threshold", - "The pseudo-code for this function is:\n\n" - " foreach (k in negatives) if negatives[k] < threshold: classified[k] = true else: classified[k] = false" -) -.add_prototype("negatives, threshold", "classified") -.add_parameter("negatives", "array_like(1D, float)", "The scores generated by comparing objects of different classes") -.add_parameter("threshold", "float", "The threshold, for which scores should be considered to be correctly classified") -.add_return("classified", "array_like(1D, bool)", "The decision for each of the negatives") -; -static PyObject* correctly_classified_negatives(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = correctly_classified_negatives_doc.kwlist(); - - PyBlitzArrayObject* neg; +static auto correctly_classified_negatives_doc = + bob::extension::FunctionDoc( + "correctly_classified_negatives", + "This method returns an array composed of booleans that pin-point, " + "which negatives where correctly classified for the given threshold", + "The pseudo-code for this function is:\n\n" + " foreach (k in negatives) if negatives[k] < threshold: " + "classified[k] = true else: classified[k] = false") + .add_prototype("negatives, threshold", "classified") + .add_parameter( + "negatives", "array_like(1D, float)", + "The scores generated by comparing objects of different classes") + .add_parameter("threshold", "float", "The threshold, for which scores " + "should be considered to be " + "correctly classified") + .add_return("classified", "array_like(1D, bool)", + "The decision for each of the negatives"); +static PyObject *correctly_classified_negatives(PyObject *, PyObject *args, + PyObject *kwds) { + BOB_TRY + static char **kwlist = correctly_classified_negatives_doc.kwlist(); + + PyBlitzArrayObject *neg; double threshold; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&d", - kwlist, - &double1d_converter, &neg, - &threshold - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&d", kwlist, + &double1d_converter, &neg, &threshold)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto result = bob::measure::correctlyClassifiedNegatives( - *PyBlitzArrayCxx_AsBlitz(neg), - threshold); + *PyBlitzArrayCxx_AsBlitz(neg), threshold); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("correctly_classified_negatives", 0) + BOB_CATCH_FUNCTION("correctly_classified_negatives", 0) } -static auto correctly_classified_positives_doc = bob::extension::FunctionDoc( - "correctly_classified_positives", - "This method returns an array composed of booleans that pin-point, which positives where correctly classified for the given threshold", - "The pseudo-code for this function is:\n\n" - " foreach (k in positives) if positives[k] >= threshold: classified[k] = true else: classified[k] = false" -) -.add_prototype("positives, threshold", "classified") -.add_parameter("positives", "array_like(1D, float)", "The scores generated by comparing objects of the same classes") -.add_parameter("threshold", "float", "The threshold, for which scores should be considered to be correctly classified") -.add_return("classified", "array_like(1D, bool)", "The decision for each of the positives") -; -static PyObject* correctly_classified_positives(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = correctly_classified_positives_doc.kwlist(); - - PyBlitzArrayObject* pos; +static auto correctly_classified_positives_doc = + bob::extension::FunctionDoc( + "correctly_classified_positives", + "This method returns an array composed of booleans that pin-point, " + "which positives where correctly classified for the given threshold", + "The pseudo-code for this function is:\n\n" + " foreach (k in positives) if positives[k] >= threshold: " + "classified[k] = true else: classified[k] = false") + .add_prototype("positives, threshold", "classified") + .add_parameter( + "positives", "array_like(1D, float)", + "The scores generated by comparing objects of the same classes") + .add_parameter("threshold", "float", "The threshold, for which scores " + "should be considered to be " + "correctly classified") + .add_return("classified", "array_like(1D, bool)", + "The decision for each of the positives"); +static PyObject *correctly_classified_positives(PyObject *, PyObject *args, + PyObject *kwds) { + BOB_TRY + static char **kwlist = correctly_classified_positives_doc.kwlist(); + + PyBlitzArrayObject *pos; double threshold; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&d", - kwlist, - &double1d_converter, &pos, - &threshold - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&d", kwlist, + &double1d_converter, &pos, &threshold)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto pos_ = make_safe(pos); auto result = bob::measure::correctlyClassifiedPositives( - *PyBlitzArrayCxx_AsBlitz(pos), - threshold); + *PyBlitzArrayCxx_AsBlitz(pos), threshold); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("correctly_classified_positives", 0) + BOB_CATCH_FUNCTION("correctly_classified_positives", 0) } -static auto precision_recall_curve_doc = bob::extension::FunctionDoc( - "precision_recall_curve", - "Calculates the precision-recall curve given a set of positive and negative scores and a number of desired points" , - "The points in which the curve is calculated are distributed uniformly in the range [min(negatives, positives), max(negatives, positives)]" -) -.add_prototype("negatives, positives, n_points", "curve") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the measurements") -.add_parameter("n_points", "int", "The number of thresholds for which precision and recall should be evaluated") -.add_return("curve", "array_like(2D, float)", "2D array of floats that express the X (precision) and Y (recall) coordinates") -; -static PyObject* precision_recall_curve(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = precision_recall_curve_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto precision_recall_curve_doc = + bob::extension::FunctionDoc( + "precision_recall_curve", "Calculates the precision-recall curve given " + "a set of positive and negative scores and a " + "number of desired points", + "The points in which the curve is calculated are distributed uniformly " + "in the range [min(negatives, positives), max(negatives, " + "positives)]") + .add_prototype("negatives, positives, n_points", "curve") + .add_parameter("negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the " + "measurements") + .add_parameter("n_points", "int", "The number of thresholds for which " + "precision and recall should be " + "evaluated") + .add_return("curve", "array_like(2D, float)", + "2D array of floats that express the X (precision) and Y " + "(recall) coordinates"); +static PyObject *precision_recall_curve(PyObject *, PyObject *args, + PyObject *kwds) { + BOB_TRY + char **kwlist = precision_recall_curve_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; Py_ssize_t n_points; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &n_points - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&n", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos, &n_points)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); auto result = bob::measure::precision_recall_curve( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - n_points); + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), n_points); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("precision_recall_curve", 0) + BOB_CATCH_FUNCTION("precision_recall_curve", 0) } -static auto far_threshold_doc = bob::extension::FunctionDoc( - "far_threshold", - "Computes the threshold such that the real FAR is **at least** the requested far_value", - ".. note::\n\n" - " The scores will be sorted internally, requiring the scores to be copied.\n" - " To avoid this copy, you can sort the negatives scores externally in ascendant order, and set the is_sorted parameter to True" -) -.add_prototype("negatives, positives, [far_value], [is_sorted]", "threshold") -.add_parameter("negatives", "array_like(1D, float)", "The set of negative scores to compute the FAR threshold") -.add_parameter("positives", "array_like(1D, float)", "Ignored, but needs to be specified -- may be given as []") -.add_parameter("far_value", "float", "[Default: 0.001] The FAR value, for which the threshold should be computed") -.add_parameter("is_sorted", "bool", "[Default: False] Set this to True if the negatives are already sorted in ascending order. If False, scores will be sorted internally, which will require more memory") -.add_return("threshold", "float", "The threshold such that the real FAR is at least far_value") -; -static PyObject* far_threshold(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = far_threshold_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto far_threshold_doc = + bob::extension::FunctionDoc( + "far_threshold", "Computes the threshold such that the real FAR is " + "**at least** the requested far_value", + ".. note::\n\n" + " The scores will be sorted internally, requiring the scores to be " + "copied.\n" + " To avoid this copy, you can sort the negatives scores " + "externally in ascendant order, and set the is_sorted parameter to " + "True") + .add_prototype("negatives, positives, [far_value], [is_sorted]", + "threshold") + .add_parameter( + "negatives", "array_like(1D, float)", + "The set of negative scores to compute the FAR threshold") + .add_parameter( + "positives", "array_like(1D, float)", + "Ignored, but needs to be specified -- may be given as []") + .add_parameter("far_value", "float", "[Default: 0.001] The FAR " + "value, for which the threshold " + "should be computed") + .add_parameter("is_sorted", "bool", + "[Default: False] Set this to True if the " + "negatives are already sorted in ascending order. " + "If False, scores will be sorted internally, which " + "will require more memory") + .add_return( + "threshold", "float", + "The threshold such that the real FAR is at least far_value"); +static PyObject *far_threshold(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + static char **kwlist = far_threshold_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double far_value = 0.001; - PyObject* is_sorted = Py_False; + PyObject *is_sorted = Py_False; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|dO", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &far_value, - is_sorted - )) return 0; + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&|dO", kwlist, &double1d_converter, &neg, + &double1d_converter, &pos, &far_value, is_sorted)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::farThreshold( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - far_value, - PyObject_IsTrue(is_sorted) - ); + auto result = + bob::measure::farThreshold(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), + far_value, PyObject_IsTrue(is_sorted)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("far_threshold", 0) + BOB_CATCH_FUNCTION("far_threshold", 0) } -static auto frr_threshold_doc = bob::extension::FunctionDoc( - "frr_threshold", - "Computes the threshold such that the real FRR is **at least** the requested frr_value", - ".. note::\n\n" - " The scores will be sorted internally, requiring the scores to be copied.\n" - " To avoid this copy, you can sort the positives scores externally in ascendant order, and set the is_sorted parameter to True" -) -.add_prototype("negatives, positives, [frr_value], [is_sorted]", "threshold") -.add_parameter("negatives", "array_like(1D, float)", "Ignored, but needs to be specified -- may be given as []") -.add_parameter("positives", "array_like(1D, float)", "The set of positive scores to compute the FRR threshold") -.add_parameter("frr_value", "float", "[Default: 0.001] The FRR value, for which the threshold should be computed") -.add_parameter("is_sorted", "bool", "[Default: False] Set this to True if the positives are already sorted in ascendant order. If False, scores will be sorted internally, which will require more memory") -.add_return("threshold", "float", "The threshold such that the real FRR is at least frr_value") -; -static PyObject* frr_threshold(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - char** kwlist = frr_threshold_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; +static auto frr_threshold_doc = + bob::extension::FunctionDoc( + "frr_threshold", "Computes the threshold such that the real FRR is " + "**at least** the requested frr_value", + ".. note::\n\n" + " The scores will be sorted internally, requiring the scores to be " + "copied.\n" + " To avoid this copy, you can sort the positives scores " + "externally in ascendant order, and set the is_sorted parameter to " + "True") + .add_prototype("negatives, positives, [frr_value], [is_sorted]", + "threshold") + .add_parameter( + "negatives", "array_like(1D, float)", + "Ignored, but needs to be specified -- may be given as []") + .add_parameter( + "positives", "array_like(1D, float)", + "The set of positive scores to compute the FRR threshold") + .add_parameter("frr_value", "float", "[Default: 0.001] The FRR " + "value, for which the threshold " + "should be computed") + .add_parameter("is_sorted", "bool", + "[Default: False] Set this to True if the " + "positives are already sorted in ascendant order. " + "If False, scores will be sorted internally, which " + "will require more memory") + .add_return( + "threshold", "float", + "The threshold such that the real FRR is at least frr_value"); +static PyObject *frr_threshold(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + char **kwlist = frr_threshold_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; double frr_value = 0.001; - PyObject* is_sorted = Py_False; + PyObject *is_sorted = Py_False; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|dO", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &frr_value, - &is_sorted - )) return 0; + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&|dO", kwlist, &double1d_converter, &neg, + &double1d_converter, &pos, &frr_value, &is_sorted)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::frrThreshold( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - frr_value, - PyObject_IsTrue(is_sorted) - ); + auto result = + bob::measure::frrThreshold(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), + frr_value, PyObject_IsTrue(is_sorted)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("frr_threshold", 0) + BOB_CATCH_FUNCTION("frr_threshold", 0) } -static auto eer_rocch_doc = bob::extension::FunctionDoc( - "eer_rocch", - "Calculates the equal-error-rate (EER) given the input data, on the ROC Convex Hull (ROCCH)", - "It replicates the EER calculation from the Bosaris toolkit (https://sites.google.com/site/bosaristoolkit/)." -) -.add_prototype("negatives, positives", "threshold") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the threshold") -.add_return("threshold", "float", "The threshold for the equal error rate") -; -static PyObject* eer_rocch(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY +static auto eer_rocch_doc = + bob::extension::FunctionDoc( + "eer_rocch", "Calculates the equal-error-rate (EER) given the input " + "data, on the ROC Convex Hull (ROCCH)", + "It replicates the EER calculation from the Bosaris toolkit " + "(https://sites.google.com/site/bosaristoolkit/).") + .add_prototype("negatives, positives", "threshold") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the threshold") + .add_return("threshold", "float", + "The threshold for the equal error rate"); +static PyObject *eer_rocch(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY /* Parses input arguments in a single shot */ - char** kwlist = eer_rocch_doc.kwlist(); + char **kwlist = eer_rocch_doc.kwlist(); - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::eerRocch( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos) - ); + auto result = + bob::measure::eerRocch(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("eer_rocch", 0) + BOB_CATCH_FUNCTION("eer_rocch", 0) } -static auto rocch_doc = bob::extension::FunctionDoc( - "rocch", - "Calculates the ROC Convex Hull (ROCCH) curve given a set of positive and negative scores" -) -.add_prototype("negatives, positives", "curve") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the curve") -.add_return("curve", "array_like(2D, float)", "The ROC curve, with the first row containing the FAR, and the second row containing the FRR") -; -static PyObject* rocch(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY +static auto rocch_doc = + bob::extension::FunctionDoc("rocch", "Calculates the ROC Convex Hull " + "(ROCCH) curve given a set of " + "positive and negative scores") + .add_prototype("negatives, positives", "curve") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the curve") + .add_return("curve", "array_like(2D, float)", + "The ROC curve, with the first row containing the FAR, and " + "the second row containing the FRR"); +static PyObject *rocch(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY /* Parses input arguments in a single shot */ - char** kwlist = rocch_doc.kwlist(); + char **kwlist = rocch_doc.kwlist(); - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos - )) return 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&", kwlist, + &double1d_converter, &neg, + &double1d_converter, &pos)) + return 0; - //protects acquired resources through this scope + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); - auto result = bob::measure::rocch( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos) - ); + auto result = bob::measure::rocch(*PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos)); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("rocch", 0) + BOB_CATCH_FUNCTION("rocch", 0) } -static auto rocch2eer_doc = bob::extension::FunctionDoc( - "rocch2eer", - "Calculates the threshold that is as close as possible to the equal-error-rate (EER) given the input data" -) -.add_prototype("pmiss_pfa", "threshold") -// I don't know, what the pmiss_pfa parameter is, so I leave out its documentation (a .. todo:: will be generated automatically) -//.add_parameter("pmiss_pfa", "array_like(2D, float)", "???") -.add_return("threshold", "float", "The computed threshold, at which the EER can be obtained") -; -static PyObject* rocch2eer(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY - static char** kwlist = rocch2eer_doc.kwlist(); - - PyBlitzArrayObject* p; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", - kwlist, - &double2d_converter, &p - )) return 0; +static auto rocch2eer_doc = + bob::extension::FunctionDoc( + "rocch2eer", "Calculates the threshold that is as close as possible to " + "the equal-error-rate (EER) given the input data") + .add_prototype("pmiss_pfa", "threshold") + // I don't know, what the pmiss_pfa parameter is, so I leave out its + // documentation (a .. todo:: will be generated automatically) + //.add_parameter("pmiss_pfa", "array_like(2D, float)", "???") + .add_return("threshold", "float", + "The computed threshold, at which the EER can be obtained"); +static PyObject *rocch2eer(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY + static char **kwlist = rocch2eer_doc.kwlist(); + + PyBlitzArrayObject *p; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&", kwlist, + &double2d_converter, &p)) + return 0; auto p_ = make_safe(p); - auto result = bob::measure::rocch2eer(*PyBlitzArrayCxx_AsBlitz(p)); + auto result = bob::measure::rocch2eer(*PyBlitzArrayCxx_AsBlitz(p)); return Py_BuildValue("d", result); -BOB_CATCH_FUNCTION("rocch2eer", 0) + BOB_CATCH_FUNCTION("rocch2eer", 0) } -static auto roc_for_far_doc = bob::extension::FunctionDoc( - "roc_for_far", - "Calculates the ROC curve for a given set of positive and negative scores and the FAR values, for which the FRR should be computed", - ".. note::\n\n" - " The scores will be sorted internally, requiring the scores to be copied.\n" - " To avoid this copy, you can sort both sets of scores externally in ascendant order, and set the is_sorted parameter to True" -) -.add_prototype("negatives, positives, far_list, [is_sorted]", "curve") -.add_parameter("negatives, positives", "array_like(1D, float)", "The set of negative and positive scores to compute the curve") -.add_parameter("far_list", "array_like(1D, float)", "A list of FAR values, for which the FRR values should be computed") -.add_parameter("is_sorted", "bool", "[Default: False] Set this to True if both sets of scores are already sorted in ascending order. If False, scores will be sorted internally, which will require more memory") -.add_return("curve", "array_like(2D, float)", "The ROC curve, which holds a copy of the given FAR values in row 0, and the corresponding FRR values in row 1") -; -static PyObject* roc_for_far(PyObject*, PyObject* args, PyObject* kwds) { -BOB_TRY +static auto roc_for_far_doc = + bob::extension::FunctionDoc( + "roc_for_far", "Calculates the ROC curve for a given set of positive " + "and negative scores and the FAR values, for which the " + "FRR should be computed", + ".. note::\n\n" + " The scores will be sorted internally, requiring the scores to be " + "copied.\n" + " To avoid this copy, you can sort both sets of scores externally in " + "ascendant order, and set the is_sorted parameter to True") + .add_prototype("negatives, positives, far_list, [is_sorted]", "curve") + .add_parameter( + "negatives, positives", "array_like(1D, float)", + "The set of negative and positive scores to compute the curve") + .add_parameter( + "far_list", "array_like(1D, float)", + "A list of FAR values, for which the FRR values should be computed") + .add_parameter("is_sorted", "bool", + "[Default: False] Set this to True if both sets " + "of scores are already sorted in ascending order. If " + "False, scores will be sorted internally, which " + "will require more memory") + .add_return("curve", "array_like(2D, float)", + "The ROC curve, which holds a copy of the given FAR values " + "in row 0, and the corresponding FRR values in row 1"); +static PyObject *roc_for_far(PyObject *, PyObject *args, PyObject *kwds) { + BOB_TRY /* Parses input arguments in a single shot */ - char** kwlist = roc_for_far_doc.kwlist(); - - PyBlitzArrayObject* neg; - PyBlitzArrayObject* pos; - PyBlitzArrayObject* far; - PyObject* is_sorted = Py_False; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&|O", - kwlist, - &double1d_converter, &neg, - &double1d_converter, &pos, - &double1d_converter, &far, - &is_sorted - )) return 0; - - //protects acquired resources through this scope + char **kwlist = roc_for_far_doc.kwlist(); + + PyBlitzArrayObject *neg; + PyBlitzArrayObject *pos; + PyBlitzArrayObject *far; + PyObject *is_sorted = Py_False; + + if (!PyArg_ParseTupleAndKeywords( + args, kwds, "O&O&O&|O", kwlist, &double1d_converter, &neg, + &double1d_converter, &pos, &double1d_converter, &far, &is_sorted)) + return 0; + + // protects acquired resources through this scope auto neg_ = make_safe(neg); auto pos_ = make_safe(pos); auto far_ = make_safe(far); auto result = bob::measure::roc_for_far( - *PyBlitzArrayCxx_AsBlitz(neg), - *PyBlitzArrayCxx_AsBlitz(pos), - *PyBlitzArrayCxx_AsBlitz(far), - PyObject_IsTrue(is_sorted) - ); + *PyBlitzArrayCxx_AsBlitz(neg), + *PyBlitzArrayCxx_AsBlitz(pos), + *PyBlitzArrayCxx_AsBlitz(far), PyObject_IsTrue(is_sorted)); return PyBlitzArrayCxx_AsNumpy(result); -BOB_CATCH_FUNCTION("roc_for_far", 0) + BOB_CATCH_FUNCTION("roc_for_far", 0) } static PyMethodDef module_methods[] = { - { - epc_doc.name(), - (PyCFunction)epc, - METH_VARARGS|METH_KEYWORDS, - epc_doc.doc() - }, - { - det_doc.name(), - (PyCFunction)det, - METH_VARARGS|METH_KEYWORDS, - det_doc.doc() - }, - { - ppndf_doc.name(), - (PyCFunction)ppndf, - METH_VARARGS|METH_KEYWORDS, - ppndf_doc.doc() - }, - { - roc_doc.name(), - (PyCFunction)roc, - METH_VARARGS|METH_KEYWORDS, - roc_doc.doc() - }, - { - farfrr_doc.name(), - (PyCFunction)farfrr, - METH_VARARGS|METH_KEYWORDS, - farfrr_doc.doc() - }, - { - eer_threshold_doc.name(), - (PyCFunction)eer_threshold, - METH_VARARGS|METH_KEYWORDS, - eer_threshold_doc.doc() - }, - { - min_weighted_error_rate_threshold_doc.name(), - (PyCFunction)min_weighted_error_rate_threshold, - METH_VARARGS|METH_KEYWORDS, - min_weighted_error_rate_threshold_doc.doc() - }, - { - min_hter_threshold_doc.name(), - (PyCFunction)min_hter_threshold, - METH_VARARGS|METH_KEYWORDS, - min_hter_threshold_doc.doc() - }, - { - precision_recall_doc.name(), - (PyCFunction)precision_recall, - METH_VARARGS|METH_KEYWORDS, - precision_recall_doc.doc() - }, - { - f_score_doc.name(), - (PyCFunction)f_score, - METH_VARARGS|METH_KEYWORDS, - f_score_doc.doc() - }, - { - correctly_classified_negatives_doc.name(), - (PyCFunction)correctly_classified_negatives, - METH_VARARGS|METH_KEYWORDS, - correctly_classified_negatives_doc.doc() - }, - { - correctly_classified_positives_doc.name(), - (PyCFunction)correctly_classified_positives, - METH_VARARGS|METH_KEYWORDS, - correctly_classified_positives_doc.doc() - }, - { - precision_recall_curve_doc.name(), - (PyCFunction)precision_recall_curve, - METH_VARARGS|METH_KEYWORDS, - precision_recall_curve_doc.doc() - }, - { - far_threshold_doc.name(), - (PyCFunction)far_threshold, - METH_VARARGS|METH_KEYWORDS, - far_threshold_doc.doc() - }, - { - frr_threshold_doc.name(), - (PyCFunction)frr_threshold, - METH_VARARGS|METH_KEYWORDS, - frr_threshold_doc.doc() - }, - { - eer_rocch_doc.name(), - (PyCFunction)eer_rocch, - METH_VARARGS|METH_KEYWORDS, - eer_rocch_doc.doc() - }, - { - rocch_doc.name(), - (PyCFunction)rocch, - METH_VARARGS|METH_KEYWORDS, - rocch_doc.doc() - }, - { - rocch2eer_doc.name(), - (PyCFunction)rocch2eer, - METH_VARARGS|METH_KEYWORDS, - rocch2eer_doc.doc() - }, - { - roc_for_far_doc.name(), - (PyCFunction)roc_for_far, - METH_VARARGS|METH_KEYWORDS, - roc_for_far_doc.doc() - }, - {0} /* Sentinel */ + {epc_doc.name(), (PyCFunction)epc, METH_VARARGS | METH_KEYWORDS, + epc_doc.doc()}, + {det_doc.name(), (PyCFunction)det, METH_VARARGS | METH_KEYWORDS, + det_doc.doc()}, + {ppndf_doc.name(), (PyCFunction)ppndf, METH_VARARGS | METH_KEYWORDS, + ppndf_doc.doc()}, + {roc_doc.name(), (PyCFunction)roc, METH_VARARGS | METH_KEYWORDS, + roc_doc.doc()}, + {farfrr_doc.name(), (PyCFunction)farfrr, METH_VARARGS | METH_KEYWORDS, + farfrr_doc.doc()}, + {eer_threshold_doc.name(), (PyCFunction)eer_threshold, + METH_VARARGS | METH_KEYWORDS, eer_threshold_doc.doc()}, + {min_weighted_error_rate_threshold_doc.name(), + (PyCFunction)min_weighted_error_rate_threshold, + METH_VARARGS | METH_KEYWORDS, min_weighted_error_rate_threshold_doc.doc()}, + {min_hter_threshold_doc.name(), (PyCFunction)min_hter_threshold, + METH_VARARGS | METH_KEYWORDS, min_hter_threshold_doc.doc()}, + {precision_recall_doc.name(), (PyCFunction)precision_recall, + METH_VARARGS | METH_KEYWORDS, precision_recall_doc.doc()}, + {f_score_doc.name(), (PyCFunction)f_score, METH_VARARGS | METH_KEYWORDS, + f_score_doc.doc()}, + {correctly_classified_negatives_doc.name(), + (PyCFunction)correctly_classified_negatives, METH_VARARGS | METH_KEYWORDS, + correctly_classified_negatives_doc.doc()}, + {correctly_classified_positives_doc.name(), + (PyCFunction)correctly_classified_positives, METH_VARARGS | METH_KEYWORDS, + correctly_classified_positives_doc.doc()}, + {precision_recall_curve_doc.name(), (PyCFunction)precision_recall_curve, + METH_VARARGS | METH_KEYWORDS, precision_recall_curve_doc.doc()}, + {far_threshold_doc.name(), (PyCFunction)far_threshold, + METH_VARARGS | METH_KEYWORDS, far_threshold_doc.doc()}, + {frr_threshold_doc.name(), (PyCFunction)frr_threshold, + METH_VARARGS | METH_KEYWORDS, frr_threshold_doc.doc()}, + {eer_rocch_doc.name(), (PyCFunction)eer_rocch, METH_VARARGS | METH_KEYWORDS, + eer_rocch_doc.doc()}, + {rocch_doc.name(), (PyCFunction)rocch, METH_VARARGS | METH_KEYWORDS, + rocch_doc.doc()}, + {rocch2eer_doc.name(), (PyCFunction)rocch2eer, METH_VARARGS | METH_KEYWORDS, + rocch2eer_doc.doc()}, + {roc_for_far_doc.name(), (PyCFunction)roc_for_far, + METH_VARARGS | METH_KEYWORDS, roc_for_far_doc.doc()}, + {0} /* Sentinel */ }; PyDoc_STRVAR(module_docstr, "Bob metrics and performance figures"); #if PY_VERSION_HEX >= 0x03000000 -static PyModuleDef module_definition = { - PyModuleDef_HEAD_INIT, - BOB_EXT_MODULE_NAME, - module_docstr, - -1, - module_methods, - 0, 0, 0, 0 -}; +static PyModuleDef module_definition = {PyModuleDef_HEAD_INIT, + BOB_EXT_MODULE_NAME, + module_docstr, + -1, + module_methods, + 0, + 0, + 0, + 0}; #endif -static PyObject* create_module (void) { +static PyObject *create_module(void) { -# if PY_VERSION_HEX >= 0x03000000 - PyObject* m = PyModule_Create(&module_definition); +#if PY_VERSION_HEX >= 0x03000000 + PyObject *m = PyModule_Create(&module_definition); auto m_ = make_xsafe(m); - const char* ret = "O"; -# else - PyObject* m = Py_InitModule3(BOB_EXT_MODULE_NAME, module_methods, module_docstr); - const char* ret = "N"; -# endif - if (!m) return 0; + const char *ret = "O"; +#else + PyObject *m = + Py_InitModule3(BOB_EXT_MODULE_NAME, module_methods, module_docstr); + const char *ret = "N"; +#endif + if (!m) + return 0; /* imports bob.blitz C-API + dependencies */ - if (import_bob_blitz() < 0) return 0; - if (import_bob_core_logging() < 0) return 0; - if (import_bob_io_base() < 0) return 0; + if (import_bob_blitz() < 0) + return 0; + if (import_bob_core_logging() < 0) + return 0; + if (import_bob_io_base() < 0) + return 0; return Py_BuildValue(ret, m); } -PyMODINIT_FUNC BOB_EXT_ENTRY_NAME (void) { -# if PY_VERSION_HEX >= 0x03000000 +PyMODINIT_FUNC BOB_EXT_ENTRY_NAME(void) { +#if PY_VERSION_HEX >= 0x03000000 return -# endif - create_module(); +#endif + create_module(); } diff --git a/bob/measure/openbr.py b/bob/measure/openbr.py index 22fe26e..e32bdb0 100644 --- a/bob/measure/openbr.py +++ b/bob/measure/openbr.py @@ -15,15 +15,15 @@ from .load import open_file, four_column, five_column def write_matrix( - score_file, - matrix_file, - mask_file, - model_names = None, - probe_names = None, - score_file_format = '4column', - gallery_file_name = 'unknown-gallery.lst', - probe_file_name = 'unknown-probe.lst', - search = None): + score_file, + matrix_file, + mask_file, + model_names=None, + probe_names=None, + score_file_format='4column', + gallery_file_name='unknown-gallery.lst', + probe_file_name='unknown-probe.lst', + search=None): """Writes the OpenBR matrix and mask files (version 2), given a score file. If gallery and probe names are provided, the matrices in both files will be @@ -87,12 +87,14 @@ def write_matrix( """ def _write_matrix(filename, matrix): - ## Helper function to write a matrix file as required by OpenBR + # Helper function to write a matrix file as required by OpenBR with open(filename, 'wb') as f: # write the first four lines - header = "S2\n%s\n%s\nM%s %d %d " % (gallery_file_name, probe_file_name, 'B' if matrix.dtype == numpy.uint8 else 'F', matrix.shape[0], matrix.shape[1]) + header = "S2\n%s\n%s\nM%s %d %d " % ( + gallery_file_name, probe_file_name, 'B' if matrix.dtype == numpy.uint8 else 'F', matrix.shape[0], matrix.shape[1]) footer = "\n" - if sys.version_info[0] > 2: header, footer = header.encode('utf-8'), footer.encode('utf-8') + if sys.version_info[0] > 2: + header, footer = header.encode('utf-8'), footer.encode('utf-8') f.write(header) # write magic number numpy.array(0x12345678, numpy.int32).tofile(f) @@ -100,10 +102,10 @@ def write_matrix( # write the matrix matrix.tofile(f) - # define read functions, and which information should be read - read_function = {'4column' : four_column, '5column' : five_column}[score_file_format] - offset = {'4column' : 0, '5column' : 1}[score_file_format] + read_function = {'4column': four_column, + '5column': five_column}[score_file_format] + offset = {'4column': 0, '5column': 1}[score_file_format] # first, read the score file and estimate model and probe names, if not given if model_names is None or probe_names is None: @@ -112,7 +114,7 @@ def write_matrix( # read the score file for line in read_function(score_file): - model, probe = line[offset], line[2+offset] + model, probe = line[offset], line[2 + offset] if model not in model_set: model_names.append(model) model_set.add(model) @@ -121,10 +123,13 @@ def write_matrix( probe_set.add(probe) if search is None: - # create a shortcut to get indices for client and probe subset (to increase speed) + # create a shortcut to get indices for client and probe subset (to + # increase speed) model_dict, probe_dict = {}, {} - for i,m in enumerate(model_names): model_dict[m]=i - for i,p in enumerate(probe_names): probe_dict[p]=i + for i, m in enumerate(model_names): + model_dict[m] = i + for i, p in enumerate(probe_names): + probe_dict[p] = i # create the matrices in the desired size matrix = numpy.ndarray((len(probe_names), len(model_names)), numpy.float32) @@ -133,7 +138,8 @@ def write_matrix( # now, iterate through the score file and fill in the matrix for line in read_function(score_file): - client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset] + client, model, id, probe, score = line[0], line[offset], line[ + 1 + offset], line[2 + offset], line[3 + offset] assert model in model_dict, "model " + model + " unknown" assert probe in probe_dict, "probe " + probe + " unknown" @@ -143,7 +149,8 @@ def write_matrix( # check, if we have already written something into that matrix element if mask[probe_index, model_index]: - logger.warn("Overwriting existing matrix '%f' element of client '%s' and probe '%s' with '%f'", matrix[probe_index, model_index], client, probe, score) + logger.warn("Overwriting existing matrix '%f' element of client '%s' and probe '%s' with '%f'", matrix[ + probe_index, model_index], client, probe, score) matrix[probe_index, model_index] = score mask[probe_index, model_index] = 0xff if client == id else 0x7f @@ -161,7 +168,8 @@ def write_matrix( # get the scores, sorted by probe scores = {} for line in read_function(score_file): - client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset] + client, model, id, probe, score = line[0], line[offset], line[ + 1 + offset], line[2 + offset], line[3 + offset] if probe not in scores: scores[probe] = [] @@ -169,14 +177,14 @@ def write_matrix( # go ahead and sort the scores per probe sorted_scores = {} - for k,v in scores.items(): sorted_scores[k] = sorted(v, key=lambda x: x[0], reverse=True) + for k, v in scores.items(): + sorted_scores[k] = sorted(v, key=lambda x: x[0], reverse=True) # now, write matrix for p, probe in enumerate(probe_names): if probe in scores: for m in range(min(search, len(sorted_scores[probe]))): - matrix[p,m], mask[p,m] = sorted_scores[probe][m] - + matrix[p, m], mask[p, m] = sorted_scores[probe][m] # OK, now finally write the file in the desired format _write_matrix(mask_file, mask) @@ -187,12 +195,12 @@ def write_score_file( matrix_file, mask_file, score_file, - models_ids = None, - probes_ids = None, - model_names = None, - probe_names = None, - score_file_format = '4column', - replace_nan = None + models_ids=None, + probes_ids=None, + model_names=None, + probe_names=None, + score_file_format='4column', + replace_nan=None ): """Writes the Bob score file in the desired format from OpenBR files. @@ -266,32 +274,36 @@ def write_score_file( """ def _read_matrix(filename): - py3 = sys.version_info[0] >=3 - ## Helper function to read a matrix file as written by OpenBR + py3 = sys.version_info[0] >= 3 + # Helper function to read a matrix file as written by OpenBR with open(filename, 'rb') as f: # get version header = f.readline() - if py3: header = header.decode("utf-8") + if py3: + header = header.decode("utf-8") assert header[:2] == "S2" # skip gallery and probe files f.readline() f.readline() # read size and type of matrix size = f.readline() - if py3: size = size.decode("utf-8") + if py3: + size = size.decode("utf-8") splits = size.rstrip().split() # TODO: check the endianess of the magic number stored in split[3] assert splits[0][0] == 'M' - w,h = int(splits[1]), int(splits[2]) + w, h = int(splits[1]), int(splits[2]) # read matrix data - data = numpy.fromfile(f, dtype={'B':numpy.uint8, 'F': numpy.float32}[splits[0][1]]) - assert data.shape[0] == w*h - data.shape = (w,h) + data = numpy.fromfile( + f, dtype={'B': numpy.uint8, 'F': numpy.float32}[splits[0][1]]) + assert data.shape[0] == w * h + data.shape = (w, h) return data # check parameters if score_file_format not in ("4column", "5column"): - raise ValueError("The given score file format %s is not known; choose one of ('4column', '5column')" % score_file_format) + raise ValueError( + "The given score file format %s is not known; choose one of ('4column', '5column')" % score_file_format) # get type of score file four_col = score_file_format == "4column" @@ -301,7 +313,7 @@ def write_score_file( # generate the id lists, if not given if models_ids is None: - models_ids = [str(g+1) for g in range(mask.shape[1])] + models_ids = [str(g + 1) for g in range(mask.shape[1])] assert len(models_ids) == mask.shape[1] if probes_ids is None: @@ -321,29 +333,36 @@ def write_score_file( # check that the probes client ids are in the correct order for p in range(mask.shape[0]): for g in range(mask.shape[1]): - if mask[p,g] == 0x7f: - if models_ids[g] == probes_ids[p]: raise ValueError("The probe id %s with index %d should not be identical to model id %s with index %d" % (probes_ids[p], p, models_ids[g], g)) - elif mask[p,g] == 0xff: - if models_ids[g] != probes_ids[p]: raise ValueError("The probe id %s with index %d should be identical to model id %s with index %d" % (probes_ids[p], p, models_ids[g], g)) + if mask[p, g] == 0x7f: + if models_ids[g] == probes_ids[p]: + raise ValueError("The probe id %s with index %d should not be identical to model id %s with index %d" % ( + probes_ids[p], p, models_ids[g], g)) + elif mask[p, g] == 0xff: + if models_ids[g] != probes_ids[p]: + raise ValueError("The probe id %s with index %d should be identical to model id %s with index %d" % ( + probes_ids[p], p, models_ids[g], g)) # generate model and probe names, if not given if not four_col and model_names is None: - model_names = [str(g+1) for g in range(mask.shape[1])] + model_names = [str(g + 1) for g in range(mask.shape[1])] if probe_names is None: - probe_names = [str(p+1) for p in range(mask.shape[0])] + probe_names = [str(p + 1) for p in range(mask.shape[0])] # iterate through the files and write scores with open(score_file, 'w') as f: for g in range(mask.shape[1]): for p in range(mask.shape[0]): - if mask[p,g]: - score = scores[p,g] + if mask[p, g]: + score = scores[p, g] # handle NaN values if numpy.isnan(score): - if replace_nan is None: continue + if replace_nan is None: + continue score = replace_nan # write score file if four_col: - f.write("%s %s %s %3.8f\n" % (models_ids[g], probes_ids[p], probe_names[p], score)) + f.write("%s %s %s %3.8f\n" % + (models_ids[g], probes_ids[p], probe_names[p], score)) else: - f.write("%s %s %s %s %3.8f\n" % (models_ids[g], model_names[g], probes_ids[p], probe_names[p], score)) + f.write("%s %s %s %s %3.8f\n" % (models_ids[g], model_names[ + g], probes_ids[p], probe_names[p], score)) diff --git a/bob/measure/plot.py b/bob/measure/plot.py index e103c02..b195d9a 100644 --- a/bob/measure/plot.py +++ b/bob/measure/plot.py @@ -3,7 +3,7 @@ # Mon 23 May 2011 14:36:14 CEST -def log_values(min_step = -4, counts_per_step = 4): +def log_values(min_step=-4, counts_per_step=4): """Computes log-scaled values between :math:10^{M} and 1 This function computes log-scaled values between :math:10^{M} and 1 @@ -32,7 +32,7 @@ def log_values(min_step = -4, counts_per_step = 4): """ import math - return [math.pow(10., i * 1./counts_per_step) for i in range(min_step*counts_per_step,0)] + [1.] + return [math.pow(10., i * 1. / counts_per_step) for i in range(min_step * counts_per_step, 0)] + [1.] def roc(negatives, positives, npoints=100, CAR=False, **kwargs): @@ -88,12 +88,12 @@ def roc(negatives, positives, npoints=100, CAR=False, **kwargs): from . import roc as calc out = calc(negatives, positives, npoints) if not CAR: - return pyplot.plot(100.0*out[0,:], 100.0*out[1,:], **kwargs) + return pyplot.plot(100.0 * out[0, :], 100.0 * out[1, :], **kwargs) else: - return pyplot.semilogx(100.0*out[0,:], 100.0*(1-out[1,:]), **kwargs) + return pyplot.semilogx(100.0 * out[0, :], 100.0 * (1 - out[1, :]), **kwargs) -def roc_for_far(negatives, positives, far_values = log_values(), **kwargs): +def roc_for_far(negatives, positives, far_values=log_values(), **kwargs): """Plots the ROC curve for the given list of False Acceptance Rates (FAR). This method will call matplotlib to plot the ROC curve for a system which @@ -141,7 +141,7 @@ def roc_for_far(negatives, positives, far_values = log_values(), **kwargs): from matplotlib import pyplot from . import roc_for_far as calc out = calc(negatives, positives, far_values) - return pyplot.semilogx(100.0*out[0,:], 100.0*(1-out[1,:]), **kwargs) + return pyplot.semilogx(100.0 * out[0, :], 100.0 * (1 - out[1, :]), **kwargs) def precision_recall_curve(negatives, positives, npoints=100, **kwargs): @@ -189,11 +189,11 @@ def precision_recall_curve(negatives, positives, npoints=100, **kwargs): from matplotlib import pyplot from . import precision_recall_curve as calc out = calc(negatives, positives, npoints) - return pyplot.plot(100.0*out[0,:], 100.0*out[1,:], **kwargs) + return pyplot.plot(100.0 * out[0, :], 100.0 * out[1, :], **kwargs) def epc(dev_negatives, dev_positives, test_negatives, test_positives, - npoints=100, **kwargs): + npoints=100, **kwargs): """Plots Expected Performance Curve (EPC) as defined in the paper: Bengio, S., Keller, M., MariĆ©thoz, J. (2004). The Expected Performance Curve. @@ -255,8 +255,8 @@ def epc(dev_negatives, dev_positives, test_negatives, test_positives, from . import epc as calc out = calc(dev_negatives, dev_positives, test_negatives, test_positives, - npoints) - return pyplot.plot(out[0,:], 100.0*out[1,:], **kwargs) + npoints) + return pyplot.plot(out[0, :], 100.0 * out[1, :], **kwargs) def det(negatives, positives, npoints=100, axisfontsize='x-small', **kwargs): @@ -354,7 +354,7 @@ def det(negatives, positives, npoints=100, axisfontsize='x-small', **kwargs): "0.995", "0.998", "0.999", "0.9995", "0.9998", "0.9999", "0.99995", "0.99998", "0.99999" - ] + ] desiredLabels = [ "0.001", "0.002", "0.005", @@ -366,7 +366,7 @@ def det(negatives, positives, npoints=100, axisfontsize='x-small', **kwargs): "99.5", "99.8", "99.9", "99.95", "99.98", "99.99", "99.995", "99.998", "99.999" - ] + ] # this will actually do the plotting from matplotlib import pyplot @@ -374,11 +374,11 @@ def det(negatives, positives, npoints=100, axisfontsize='x-small', **kwargs): from . import ppndf out = calc(negatives, positives, npoints) - retval = pyplot.plot(out[0,:], out[1,:], **kwargs) + retval = pyplot.plot(out[0, :], out[1, :], **kwargs) # now the trick: we must plot the tick marks by hand using the PPNDF method pticks = [ppndf(float(v)) for v in desiredTicks] - ax = pyplot.gca() #and finally we set our own tick marks + ax = pyplot.gca() # and finally we set our own tick marks ax.set_xticks(pticks) ax.set_xticklabels(desiredLabels, size=axisfontsize) ax.set_yticks(pticks) @@ -421,9 +421,10 @@ def det_axis(v, **kwargs): # treat input try: - tv = list(v) #normal input - if len(tv) != 4: raise IndexError - tv = [ppndf(float(k)/100) for k in tv] + tv = list(v) # normal input + if len(tv) != 4: + raise IndexError + tv = [ppndf(float(k) / 100) for k in tv] cur = pyplot.axis() # limits must be within bounds @@ -446,7 +447,7 @@ def det_axis(v, **kwargs): return pyplot.axis(tv, **kwargs) -def cmc(cmc_scores, logx = True, **kwargs): +def cmc(cmc_scores, logx=True, **kwargs): """Plots the (cumulative) match characteristics and returns the maximum rank. This function plots a CMC curve using the given CMC scores, which can be read @@ -483,15 +484,14 @@ def cmc(cmc_scores, logx = True, **kwargs): out = calc(cmc_scores) if logx: - pyplot.semilogx(range(1, len(out)+1), out * 100, **kwargs) + pyplot.semilogx(range(1, len(out) + 1), out * 100, **kwargs) else: - pyplot.plot(range(1, len(out)+1), out * 100, **kwargs) + pyplot.plot(range(1, len(out) + 1), out * 100, **kwargs) return len(out) -def detection_identification_curve(cmc_scores, far_values = log_values(), rank - = 1, logx = True, **kwargs): +def detection_identification_curve(cmc_scores, far_values=log_values(), rank=1, logx=True, **kwargs): """Plots the Detection & Identification curve over the FAR This curve is designed to be used in an open set identification protocol, and @@ -539,16 +539,21 @@ def detection_identification_curve(cmc_scores, far_values = log_values(), rank from matplotlib import pyplot from . import far_threshold, detection_identification_rate - # for each probe, for which no positives exists, get the highest negative score; and sort them to compute the FAR thresholds - negatives = sorted(max(neg) for neg,pos in cmc_scores if (pos is None or not numpy.array(pos).size) and neg is not None) + # for each probe, for which no positives exists, get the highest negative + # score; and sort them to compute the FAR thresholds + negatives = sorted(max(neg) for neg, pos in cmc_scores if ( + pos is None or not numpy.array(pos).size) and neg is not None) if not negatives: - raise ValueError("There need to be at least one pair with only negative scores") + raise ValueError( + "There need to be at least one pair with only negative scores") # compute thresholds based on FAR values thresholds = [far_threshold(negatives, [], v, True) for v in far_values] - # compute detection and identification rate based on the thresholds for the given rank - rates = [100.*detection_identification_rate(cmc_scores, t, rank) for t in thresholds] + # compute detection and identification rate based on the thresholds for + # the given rank + rates = [ + 100. * detection_identification_rate(cmc_scores, t, rank) for t in thresholds] # plot curve if logx: diff --git a/bob/measure/script/apply_threshold.py b/bob/measure/script/apply_threshold.py index 49770b0..89fa557 100644 --- a/bob/measure/script/apply_threshold.py +++ b/bob/measure/script/apply_threshold.py @@ -33,11 +33,10 @@ Examples: import os import sys +from .eval_threshold import apthres import bob.core logger = bob.core.log.setup("bob.measure") -from .eval_threshold import apthres - def main(user_input=None): @@ -51,14 +50,12 @@ def main(user_input=None): completions = dict( prog=os.path.basename(sys.argv[0]), - version=pkg_resources.require('bob.measure')[0].version - ) + version=pkg_resources.require('bob.measure')[0].version) args = docopt.docopt( __doc__ % completions, argv=argv, - version=completions['version'], - ) + version=completions['version'],) # Sets-up logging verbosity = int(args['--verbose']) @@ -67,9 +64,9 @@ def main(user_input=None): # handles threshold validation try: args[''] = float(args['']) - except: - raise docopt.DocoptExit("cannot convert %s into float for threshold" % \ - args['']) + except Exception: + raise docopt.DocoptExit("cannot convert %s into float for threshold" % + args['']) from ..load import load_score, get_negatives_positives neg, pos = get_negatives_positives(load_score(args[''])) diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py index c1759cb..14e2f94 100644 --- a/bob/measure/test_error.py +++ b/bob/measure/test_error.py @@ -13,6 +13,7 @@ import numpy import nose.tools import bob.io.base + def F(f): """Returns the test file on the "data" subdirectory""" import pkg_resources @@ -39,19 +40,19 @@ def test_basic_ratios(): # If we take a threshold on the minimum, the FAR should be 1.0 and the FRR # should be 0.0. Precision should be 0.5, recall should be 1.0 - far, frr = farfrr(negatives, positives, minimum-0.1) + far, frr = farfrr(negatives, positives, minimum - 0.1) nose.tools.eq_(far, 1.0) nose.tools.eq_(frr, 0.0) - prec, recall = precision_recall(negatives, positives, minimum-0.1) + prec, recall = precision_recall(negatives, positives, minimum - 0.1) nose.tools.eq_(prec, 0.5) nose.tools.eq_(recall, 1.0) # Similarly, if we take a threshold on the maximum, the FRR should be 1.0 # while the FAR should be 0.0. Both precision and recall should be 0.0. - far, frr = farfrr(negatives, positives, maximum+0.1) + far, frr = farfrr(negatives, positives, maximum + 0.1) nose.tools.eq_(far, 0.0) nose.tools.eq_(frr, 1.0) - prec, recall = precision_recall(negatives, positives, maximum+0.1) + prec, recall = precision_recall(negatives, positives, maximum + 0.1) nose.tools.eq_(prec, 0.0) nose.tools.eq_(recall, 0.0) @@ -64,15 +65,16 @@ def test_basic_ratios(): nose.tools.eq_(prec, 1.0) nose.tools.eq_(recall, 1.0) - # Testing the values of F-score depending on different choices of the threshold - f_score_ = f_score(negatives, positives, minimum-0.1) + # Testing the values of F-score depending on different choices of the + # threshold + f_score_ = f_score(negatives, positives, minimum - 0.1) nose.tools.assert_almost_equal(f_score_, 0.66666667) - f_score_ = f_score(negatives, positives, minimum-0.1, 2) + f_score_ = f_score(negatives, positives, minimum - 0.1, 2) nose.tools.assert_almost_equal(f_score_, 0.83333333) - f_score_ = f_score(negatives, positives, maximum+0.1) + f_score_ = f_score(negatives, positives, maximum + 0.1) nose.tools.eq_(f_score_, 0.0) - f_score_ = f_score(negatives, positives, maximum+0.1, 2) + f_score_ = f_score(negatives, positives, maximum + 0.1, 2) nose.tools.eq_(f_score_, 0.0) f_score_ = f_score(negatives, positives, 3.0) @@ -96,12 +98,12 @@ def test_indexing(): # If the threshold is minimum, we should have all positive samples # correctly classified and none of the negative samples correctly # classified. - assert correctly_classified_positives(positives, minimum-0.1).all() - assert not correctly_classified_negatives(negatives, minimum-0.1).any() + assert correctly_classified_positives(positives, minimum - 0.1).all() + assert not correctly_classified_negatives(negatives, minimum - 0.1).any() # The inverse is true if the threshold is a bit above the maximum. - assert not correctly_classified_positives(positives, maximum+0.1).any() - assert correctly_classified_negatives(negatives, maximum+0.1).all() + assert not correctly_classified_positives(positives, maximum + 0.1).any() + assert correctly_classified_negatives(negatives, maximum + 0.1).all() # If the threshold separates the sets, than all should be correctly # classified. @@ -111,7 +113,9 @@ def test_indexing(): def test_thresholding(): - from . import eer_threshold, far_threshold, frr_threshold, farfrr, correctly_classified_positives, correctly_classified_negatives, min_hter_threshold + from . import eer_threshold, far_threshold, frr_threshold, farfrr, \ + correctly_classified_positives, correctly_classified_negatives, \ + min_hter_threshold def count(array, value=True): """Counts occurrences of a certain value in an array""" @@ -129,23 +133,23 @@ def test_thresholding(): sorted_negatives = numpy.sort(negatives) # Of course we have to make sure that will set the EER correctly: - ccp = count(correctly_classified_positives(positives,threshold)) - ccn = count(correctly_classified_negatives(negatives,threshold)) + ccp = count(correctly_classified_positives(positives, threshold)) + ccn = count(correctly_classified_negatives(negatives, threshold)) assert (ccp - ccn) <= 1 for t in (0, 0.001, 0.1, 0.5, 0.9, 0.999, 1): # Lets also test the far_threshold and the frr_threshold functions threshold_far = far_threshold(sorted_negatives, [], t, is_sorted=True) threshold_frr = frr_threshold([], sorted_positives, t, is_sorted=True) - # Check that the requested FAR and FRR values are smaller than the requested ones + # Check that the requested FAR and FRR values are smaller than the + # requested ones far = farfrr(negatives, positives, threshold_far)[0] frr = farfrr(negatives, positives, threshold_frr)[1] assert far + 1e-7 > t assert frr + 1e-7 > t # test that the values are at least somewhere in the range - assert far-t <= 0.15 - assert frr-t <= 0.15 - + assert far - t <= 0.15 + assert frr - t <= 0.15 # If the set is separable, the calculation of the threshold is a little bit # trickier, as you have no points in the middle of the range to compare @@ -158,27 +162,30 @@ def test_thresholding(): assert threshold == 3.2 # Of course we have to make sure that will set the EER correctly: - ccp = count(correctly_classified_positives(positives,threshold)) - ccn = count(correctly_classified_negatives(negatives,threshold)) + ccp = count(correctly_classified_positives(positives, threshold)) + ccn = count(correctly_classified_negatives(negatives, threshold)) nose.tools.eq_(ccp, ccn) # The second option for the calculation of the threshold is to use the # minimum HTER. threshold2 = min_hter_threshold(negatives, positives) assert threshold2 == 3.2 - nose.tools.eq_(threshold, threshold2) #in this particular case + nose.tools.eq_(threshold, threshold2) # in this particular case # Of course we have to make sure that will set the EER correctly: - ccp = count(correctly_classified_positives(positives,threshold2)) - ccn = count(correctly_classified_negatives(negatives,threshold2)) + ccp = count(correctly_classified_positives(positives, threshold2)) + ccn = count(correctly_classified_negatives(negatives, threshold2)) nose.tools.eq_(ccp, ccn) def test_empty_raises(): # tests that - from bob.measure import farfrr, precision_recall, f_score, eer_threshold, min_hter_threshold, min_weighted_error_rate_threshold + from bob.measure import farfrr, precision_recall, f_score, eer_threshold, \ + min_hter_threshold, min_weighted_error_rate_threshold - for func in (farfrr, precision_recall, f_score, min_weighted_error_rate_threshold): + for func in ( + farfrr, precision_recall, + f_score, min_weighted_error_rate_threshold): nose.tools.assert_raises(RuntimeError, func, [], [1.], 0) nose.tools.assert_raises(RuntimeError, func, [1.], [], 0) nose.tools.assert_raises(RuntimeError, func, [], [], 0) @@ -191,7 +198,8 @@ def test_empty_raises(): def test_plots(): - from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, epc + from . import eer_threshold, roc, roc_for_far, precision_recall_curve, det, \ + epc # This test set is not separable. positives = bob.io.base.load(F('nonsep-positives.hdf5')) @@ -231,16 +239,16 @@ def test_plots(): # EPC curve, you need to have a development and a test set. We will split, # by the middle, the negatives and positives sample we have, just for the # sake of testing - dev_negatives = negatives[:(negatives.shape[0]//2)] - test_negatives = negatives[(negatives.shape[0]//2):] - dev_positives = positives[:(positives.shape[0]//2)] - test_positives = positives[(positives.shape[0]//2):] + dev_negatives = negatives[:(negatives.shape[0] // 2)] + test_negatives = negatives[(negatives.shape[0] // 2):] + dev_positives = positives[:(positives.shape[0] // 2)] + test_positives = positives[(positives.shape[0] // 2):] xy = epc(dev_negatives, dev_positives, - test_negatives, test_positives, 100) + test_negatives, test_positives, 100) xyref = bob.io.base.load(F('nonsep-epc.hdf5')) assert numpy.allclose(xy, xyref[:2], atol=1e-15) xy = epc(dev_negatives, dev_positives, - test_negatives, test_positives, 100, False, True) + test_negatives, test_positives, 100, False, True) # uncomment the next line to save a reference value # save('nonsep-epc.hdf5', xy) assert numpy.allclose(xy, xyref, atol=1e-15) @@ -263,23 +271,24 @@ def test_rocch(): pmiss_pfa = rocch(negatives, positives) assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15) eer = rocch2eer(pmiss_pfa) - assert abs(eer-eer_ref) < 1e-4 + assert abs(eer - eer_ref) < 1e-4 eer = eer_rocch(negatives, positives) - assert abs(eer-eer_ref) < 1e-4 + assert abs(eer - eer_ref) < 1e-4 # This test set is not separable. positives = bob.io.base.load(F('nonsep-positives.hdf5')) negatives = bob.io.base.load(F('nonsep-negatives.hdf5')) # References obtained using Bosaris 1.06 - pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [0, 0, 0.08, 0.12, 0.22, 0.48, 1.]]) + pmiss_pfa_ref = numpy.array([[1., 0.68, 0.28, 0.1, 0.06, 0., 0.], [ + 0, 0, 0.08, 0.12, 0.22, 0.48, 1.]]) eer_ref = 0.116363636363636 # Computes pmiss_pfa = rocch(negatives, positives) assert numpy.allclose(pmiss_pfa, pmiss_pfa_ref, atol=1e-15) eer = rocch2eer(pmiss_pfa) - assert abs(eer-eer_ref) < 1e-4 + assert abs(eer - eer_ref) < 1e-4 eer = eer_rocch(negatives, positives) - assert abs(eer-eer_ref) < 1e-4 + assert abs(eer - eer_ref) < 1e-4 def test_cmc(): @@ -287,8 +296,10 @@ def test_cmc(): from . import recognition_rate, cmc, load # tests the CMC calculation - # test data; should give match characteristics [1/2,1/4,1/3] and CMC [1/3,2/3,1] - test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)), ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))] + # test data; should give match characteristics [1/2,1/4,1/3] and CMC + # [1/3,2/3,1] + test_data = [((0.3, 1.1, 0.5), (0.7,)), ((1.4, -1.3, 0.6), (0.2,)), + ((0.8, 0., 1.5), (-0.8, 1.8)), ((2., 1.3, 1.6, 0.9), (2.4,))] # compute recognition rate rr = recognition_rate(test_data) nose.tools.eq_(rr, 0.5) @@ -298,7 +309,8 @@ def test_cmc(): # load test data desired_rr = 0.76 - desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] + desired_cmc = [0.76, 0.89, 0.96, 0.98, 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.] data = load.cmc_four_column(F('scores-cmc-4col.txt')) rr = recognition_rate(data) nose.tools.eq_(rr, desired_rr) @@ -318,7 +330,6 @@ def test_cmc(): assert (cmc_ == desired_cmc).all() - def test_calibration(): from . import calibration @@ -348,30 +359,37 @@ def test_calibration(): assert abs(min_cllr - 0.33736) < 1e-5, min_cllr - def test_open_set_rates(): # No error files cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set.txt")) - assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 + assert abs(bob.measure.detection_identification_rate( + cmc_scores, threshold=0.5) - 1.0) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 - assert abs(bob.measure.recognition_rate(cmc_scores) - 7./9.) < 1e-8 - assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 1.0) < 1e-8 + assert abs(bob.measure.recognition_rate(cmc_scores) - 7. / 9.) < 1e-8 + assert abs(bob.measure.recognition_rate( + cmc_scores, threshold=0.5) - 1.0) < 1e-8 # One error - cmc_scores = bob.measure.load.cmc(F("scores-cmc-4col-open-set-one-error.txt")) - assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 + cmc_scores = bob.measure.load.cmc( + F("scores-cmc-4col-open-set-one-error.txt")) + assert abs(bob.measure.detection_identification_rate( + cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8 assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5)) < 1e-8 - assert abs(bob.measure.recognition_rate(cmc_scores) - 6./9.) < 1e-8 - assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 - + assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8 + assert abs(bob.measure.recognition_rate( + cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8 # Two errors - cmc_scores = bob.measure.load.cmc_four_column(F("scores-cmc-4col-open-set-two-errors.txt")) - assert abs(bob.measure.detection_identification_rate(cmc_scores, threshold=0.5) - 6./7.) < 1e-8 - assert abs(bob.measure.false_alarm_rate(cmc_scores, threshold=0.5) - 0.5) < 1e-8 - - assert abs(bob.measure.recognition_rate(cmc_scores) - 6./9.) < 1e-8 - assert abs(bob.measure.recognition_rate(cmc_scores, threshold=0.5) - 6./8.) < 1e-8 + cmc_scores = bob.measure.load.cmc_four_column( + F("scores-cmc-4col-open-set-two-errors.txt")) + assert abs(bob.measure.detection_identification_rate( + cmc_scores, threshold=0.5) - 6. / 7.) < 1e-8 + assert abs(bob.measure.false_alarm_rate( + cmc_scores, threshold=0.5) - 0.5) < 1e-8 + + assert abs(bob.measure.recognition_rate(cmc_scores) - 6. / 9.) < 1e-8 + assert abs(bob.measure.recognition_rate( + cmc_scores, threshold=0.5) - 6. / 8.) < 1e-8 diff --git a/bob/measure/test_io.py b/bob/measure/test_io.py index 84fdc3d..af8368c 100644 --- a/bob/measure/test_io.py +++ b/bob/measure/test_io.py @@ -9,33 +9,41 @@ import bob.measure import numpy -import tempfile, os, shutil +import tempfile +import os +import shutil import bob.io.base.test_utils + def test_load_scores(): - # This function tests the IO functionality of loading score files in different ways + # This function tests the IO functionality of loading score files in + # different ways - load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column} - cols = {'4col' : 4, '5col' : 5} + load_functions = {'4col': bob.measure.load.four_column, + '5col': bob.measure.load.five_column} + cols = {'4col': 4, '5col': 5} for variant in cols: # read score file in normal way - normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') + normal_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.txt' % variant, 'bob.measure') normal_scores = list(load_functions[variant](normal_score_file)) assert len(normal_scores) == 910 assert all(len(s) == cols[variant] for s in normal_scores) # read the compressed score file - compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure') + compressed_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.tar.gz' % variant, 'bob.measure') compressed_scores = list(load_functions[variant](compressed_score_file)) assert len(compressed_scores) == len(normal_scores) assert all(len(c) == cols[variant] for c in compressed_scores) - assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant])) + assert all(c[i] == s[i] for c, s in zip(compressed_scores, + normal_scores) for i in range(cols[variant])) - ### Use auto-estimated score file contents + # Use auto-estimated score file contents # read score file in normal way normal_scores = list(bob.measure.load.scores(normal_score_file)) @@ -47,31 +55,36 @@ def test_load_scores(): assert len(compressed_scores) == len(normal_scores) assert all(len(c) == cols[variant] for c in compressed_scores) - assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant])) + assert all(c[i] == s[i] for c, s in zip(compressed_scores, + normal_scores) for i in range(cols[variant])) def test_split_scores(): - # This function tests the IO functionality of loading score files in different ways + # This function tests the IO functionality of loading score files in + # different ways - split_functions = {'4col' : bob.measure.load.split_four_column, '5col' : bob.measure.load.split_five_column} - cols = {'4col' : 4, '5col' : 5} + split_functions = {'4col': bob.measure.load.split_four_column, + '5col': bob.measure.load.split_five_column} + cols = {'4col': 4, '5col': 5} for variant in cols: # read score file in normal way - normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') + normal_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.txt' % variant, 'bob.measure') negatives, positives = split_functions[variant](normal_score_file) assert len(negatives) == 520, len(negatives) assert len(positives) == 390, len(positives) # read the compressed score file - compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure') + compressed_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.tar.gz' % variant, 'bob.measure') negatives, positives = split_functions[variant](compressed_score_file) assert len(negatives) == 520, len(negatives) assert len(positives) == 390, len(positives) - ### Use auto-estimated score file contents + # Use auto-estimated score file contents # read score file in normal way negatives, positives = bob.measure.load.split(normal_score_file) @@ -86,22 +99,27 @@ def test_split_scores(): def test_load_score(): - # This function tests the IO functionality of loading score files in different ways + # This function tests the IO functionality of loading score files in + # different ways scores = [] - cols = {'4col' : 4, '5col' : 5} + cols = {'4col': 4, '5col': 5} for variant in cols: # read score file in normal way - normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') - normal_scores = bob.measure.load.load_score(normal_score_file, cols[variant]) + normal_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.txt' % variant, 'bob.measure') + normal_scores = bob.measure.load.load_score( + normal_score_file, cols[variant]) assert len(normal_scores) == 910 assert len(normal_scores.dtype) == cols[variant] # read the compressed score file - compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure') - compressed_scores = bob.measure.load.load_score(compressed_score_file, cols[variant]) + compressed_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.tar.gz' % variant, 'bob.measure') + compressed_scores = bob.measure.load.load_score( + compressed_score_file, cols[variant]) assert len(compressed_scores) == len(normal_scores) assert len(compressed_scores.dtype) == cols[variant] @@ -109,7 +127,8 @@ def test_load_score(): assert all(normal_scores[name] == compressed_scores[name]) # test minimal loading - minimal_scores = bob.measure.load.load_score(normal_score_file, minimal=True) + minimal_scores = bob.measure.load.load_score( + normal_score_file, minimal=True) assert len(minimal_scores) == 910 assert len(minimal_scores.dtype) == 3 assert minimal_scores.dtype.names == ('claimed_id', 'real_id', 'score') @@ -119,12 +138,14 @@ def test_dump_score(): # This function tests the IO functionality of dumping score files scores = [] - cols = {'4col' : 4, '5col' : 5} + cols = {'4col': 4, '5col': 5} for variant in cols: # read score file - normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure') - normal_scores = bob.measure.load.load_score(normal_score_file, cols[variant]) + normal_score_file = bob.io.base.test_utils.datafile( + 'dev-%s.txt' % variant, 'bob.measure') + normal_scores = bob.measure.load.load_score( + normal_score_file, cols[variant]) with tempfile.TemporaryFile() as f: bob.measure.load.dump_score(f, normal_scores) @@ -139,101 +160,125 @@ def _check_binary_identical(name1, name2): # see: http://www.peterbe.com/plog/using-md5-to-check-equality-between-files from hashlib import md5 # tests if two files are binary identical - with open(name1,'rb') as f1: - with open(name2,'rb') as f2: + with open(name1, 'rb') as f1: + with open(name2, 'rb') as f2: assert md5(f1.read()).digest() == md5(f2.read()).digest() def test_openbr_verify(): - # This function tests that the conversion to the OpenBR verify file works as expected + # This function tests that the conversion to the OpenBR verify file works + # as expected temp_dir = tempfile.mkdtemp(prefix='bob_test') # define output files openbr_extensions = ('.mtx', '.mask') - matrix_file, mask_file = [os.path.join(temp_dir, "scores%s") % ext for ext in openbr_extensions] + matrix_file, mask_file = [os.path.join( + temp_dir, "scores%s") % ext for ext in openbr_extensions] try: for variant in ('4col', '5col'): # get score file - score_file = bob.io.base.test_utils.datafile('scores-cmc-%s.txt' % variant, 'bob.measure') + score_file = bob.io.base.test_utils.datafile( + 'scores-cmc-%s.txt' % variant, 'bob.measure') - # first round, do not define keyword arguments -- let the file get the gallery and probe ids automatically + # first round, do not define keyword arguments -- let the file get the + # gallery and probe ids automatically kwargs = {} for i in range(2): # get the files by automatically obtaining the identities - bob.measure.openbr.write_matrix(score_file, matrix_file, mask_file, score_file_format = "%sumn" % variant, **kwargs) + bob.measure.openbr.write_matrix( + score_file, matrix_file, mask_file, score_file_format="%sumn" % variant, **kwargs) assert os.path.isfile(matrix_file) and os.path.isfile(mask_file) - # check that they are binary identical to the reference files (which are tested to work and give the same results with OpenBR) - matrix_ref, mask_ref = [bob.io.base.test_utils.datafile('scores%s' % ext, 'bob.measure') for ext in openbr_extensions] + # check that they are binary identical to the reference files (which + # are tested to work and give the same results with OpenBR) + matrix_ref, mask_ref = [bob.io.base.test_utils.datafile( + 'scores%s' % ext, 'bob.measure') for ext in openbr_extensions] _check_binary_identical(matrix_file, matrix_ref) _check_binary_identical(mask_file, mask_ref) # define new kwargs for second round, i.e., define model and probe names - # these names are identical to what is found in the score file, which in turn comes from the AT&T database - model_type = {"4col" : "%d", "5col" : "s%d"}[variant] - dev_ids = (3,4,7,8,9,13,15,18,19,22,23,25,28,30,31,32,35,37,38,40) + # these names are identical to what is found in the score file, which + # in turn comes from the AT&T database + model_type = {"4col": "%d", "5col": "s%d"}[variant] + dev_ids = (3, 4, 7, 8, 9, 13, 15, 18, 19, 22, 23, + 25, 28, 30, 31, 32, 35, 37, 38, 40) kwargs['model_names'] = [model_type % c for c in dev_ids] - kwargs['probe_names'] = ["s%d/%d" %(c,i) for c in dev_ids for i in (1,3,6,8,10)] + kwargs['probe_names'] = ["s%d/%d" % + (c, i) for c in dev_ids for i in (1, 3, 6, 8, 10)] finally: shutil.rmtree(temp_dir) def test_openbr_search(): - # This function tests that the conversion to the OpenBR search file works as expected + # This function tests that the conversion to the OpenBR search file works + # as expected temp_dir = tempfile.mkdtemp(prefix='bob_test') # define output files openbr_extensions = ('.mtx', '.mask') - matrix_file, mask_file = [os.path.join(temp_dir, "search%s") % ext for ext in openbr_extensions] + matrix_file, mask_file = [os.path.join( + temp_dir, "search%s") % ext for ext in openbr_extensions] try: for variant in ('4col', '5col'): # get score file - score_file = bob.io.base.test_utils.datafile('scores-cmc-%s.txt' % variant, 'bob.measure') + score_file = bob.io.base.test_utils.datafile( + 'scores-cmc-%s.txt' % variant, 'bob.measure') - # first round, do not define keyword arguments -- let the file get the gallery and probe ids automatically + # first round, do not define keyword arguments -- let the file get the + # gallery and probe ids automatically kwargs = {} for i in range(2): # get the files by automatically obtaining the identities - bob.measure.openbr.write_matrix(score_file, matrix_file, mask_file, score_file_format = "%sumn" % variant, search=50, **kwargs) + bob.measure.openbr.write_matrix( + score_file, matrix_file, mask_file, score_file_format="%sumn" % variant, search=50, **kwargs) assert os.path.isfile(matrix_file) and os.path.isfile(mask_file) - # check that they are binary identical to the reference files (which are tested to work and give the same results with OpenBR) - matrix_ref, mask_ref = [bob.io.base.test_utils.datafile('search%s' % ext, 'bob.measure') for ext in openbr_extensions] + # check that they are binary identical to the reference files (which + # are tested to work and give the same results with OpenBR) + matrix_ref, mask_ref = [bob.io.base.test_utils.datafile( + 'search%s' % ext, 'bob.measure') for ext in openbr_extensions] _check_binary_identical(matrix_file, matrix_ref) _check_binary_identical(mask_file, mask_ref) # define new kwargs for second round, i.e., define model and probe names - # these names are identical to what is found in the score file, which in turn comes from the AT&T database - model_type = {"4col" : "%d", "5col" : "s%d"}[variant] - dev_ids = (3,4,7,8,9,13,15,18,19,22,23,25,28,30,31,32,35,37,38,40) + # these names are identical to what is found in the score file, which + # in turn comes from the AT&T database + model_type = {"4col": "%d", "5col": "s%d"}[variant] + dev_ids = (3, 4, 7, 8, 9, 13, 15, 18, 19, 22, 23, + 25, 28, 30, 31, 32, 35, 37, 38, 40) kwargs['model_names'] = [model_type % c for c in dev_ids] - kwargs['probe_names'] = ["s%d/%d" %(c,i) for c in dev_ids for i in (1,3,6,8,10)] + kwargs['probe_names'] = ["s%d/%d" % + (c, i) for c in dev_ids for i in (1, 3, 6, 8, 10)] finally: shutil.rmtree(temp_dir) - def test_from_openbr(): - # This function tests that the conversion from the OpenBR matrices work as expected + # This function tests that the conversion from the OpenBR matrices work as + # expected temp_dir = tempfile.mkdtemp(prefix='bob_test') # define input files openbr_extensions = ('.mtx', '.mask') - matrix_file, mask_file = [bob.io.base.test_utils.datafile('scores%s' % ext, 'bob.measure') for ext in openbr_extensions] + matrix_file, mask_file = [bob.io.base.test_utils.datafile( + 'scores%s' % ext, 'bob.measure') for ext in openbr_extensions] score_file = os.path.join(temp_dir, "scores") - load_functions = {'4col' : bob.measure.load.four_column, '5col' : bob.measure.load.five_column} + load_functions = {'4col': bob.measure.load.four_column, + '5col': bob.measure.load.five_column} try: for variant in load_functions: - # first, do not define keyword arguments -- let the file get the model and probe ids being created automatically - bob.measure.openbr.write_score_file(matrix_file, mask_file, score_file, score_file_format="%sumn"%variant) + # first, do not define keyword arguments -- let the file get the model + # and probe ids being created automatically + bob.measure.openbr.write_score_file( + matrix_file, mask_file, score_file, score_file_format="%sumn" % variant) assert os.path.exists(score_file) # read the score file with bobs functionality columns = list(load_functions[variant](score_file)) @@ -242,17 +287,21 @@ def test_from_openbr(): assert len(columns) == 2000 # now, generate model and probe names and ids - model_type = {"4col" : "%d", "5col" : "s%d"}[variant] - dev_ids = (3,4,7,8,9,13,15,18,19,22,23,25,28,30,31,32,35,37,38,40) + model_type = {"4col": "%d", "5col": "s%d"}[variant] + dev_ids = (3, 4, 7, 8, 9, 13, 15, 18, 19, 22, 23, + 25, 28, 30, 31, 32, 35, 37, 38, 40) model_names = ["s%d" % c for c in dev_ids] - probe_names = ["s%d/%d" %(c,i) for c in dev_ids for i in (1,3,6,8,10)] + probe_names = ["s%d/%d" % (c, i) + for c in dev_ids for i in (1, 3, 6, 8, 10)] models_ids = ["%d" % c for c in dev_ids] - probes_ids = ["%d" % c for c in dev_ids for i in (1,3,6,8,10)] + probes_ids = ["%d" % c for c in dev_ids for i in (1, 3, 6, 8, 10)] - bob.measure.openbr.write_score_file(matrix_file, mask_file, score_file, models_ids=models_ids, probes_ids=probes_ids, model_names=model_names, probe_names=probe_names, score_file_format="%sumn"%variant) + bob.measure.openbr.write_score_file(matrix_file, mask_file, score_file, models_ids=models_ids, probes_ids=probes_ids, + model_names=model_names, probe_names=probe_names, score_file_format="%sumn" % variant) # check that we re-generated the bob score file - reference_file = bob.io.base.test_utils.datafile('scores-cmc-%s.txt' % variant, 'bob.measure') + reference_file = bob.io.base.test_utils.datafile( + 'scores-cmc-%s.txt' % variant, 'bob.measure') # assert that we can (almost) reproduce the score file # ... read both files @@ -260,15 +309,18 @@ def test_from_openbr(): reference = list(load_functions[variant](reference_file)) assert len(columns) == len(reference) for i in range(len(columns)): - for j in range(len(columns[i])-1): + for j in range(len(columns[i]) - 1): # check that the model and probe names are fine - assert columns[i][j] == reference[i][j], str(columns[i]) + " != " + str(reference[i]) - # check that the score is close (OpenBR write scores in float32 precision only) - assert abs(columns[i][-1] - numpy.float32(reference[i][-1])) <= 1e-8, str(columns[i][-1]) + " != " + str(reference[i][-1]) + assert columns[i][j] == reference[i][j], str( + columns[i]) + " != " + str(reference[i]) + # check that the score is close (OpenBR write scores in float32 + # precision only) + assert abs(columns[i][-1] - numpy.float32(reference[i][-1]) + ) <= 1e-8, str(columns[i][-1]) + " != " + str(reference[i][-1]) #assert numpy.isclose(columns[i][-1], reference[i][-1], atol = 1e-3, rtol=1e-8), str(columns[i][-1]) + " != " + str(reference[i][-1]) - assert numpy.allclose(columns[i][-1], reference[i][-1], atol = 1e-3, rtol=1e-8), str(columns[i][-1]) + " != " + str(reference[i][-1]) - + assert numpy.allclose(columns[i][-1], reference[i][-1], atol=1e-3, + rtol=1e-8), str(columns[i][-1]) + " != " + str(reference[i][-1]) finally: shutil.rmtree(temp_dir) diff --git a/bob/measure/test_scripts.py b/bob/measure/test_scripts.py index 21d0915..2932efe 100644 --- a/bob/measure/test_scripts.py +++ b/bob/measure/test_scripts.py @@ -40,7 +40,7 @@ def test_compute_perf(): DEV_SCORES, TEST_SCORES, '--output=' + tmp_output.name, - ] + ] from .script.compute_perf import main nose.tools.eq_(main(cmdline), 0) @@ -56,7 +56,7 @@ def test_compute_perf_only_dev(): cmdline = [ DEV_SCORES, '--output=' + tmp_output.name, - ] + ] from .script.compute_perf import main nose.tools.eq_(main(cmdline), 0) @@ -81,7 +81,7 @@ def test_apply_threshold(): cmdline = [ '0.5', TEST_SCORES, - ] + ] from .script.apply_threshold import main nose.tools.eq_(main(cmdline), 0) @@ -99,7 +99,7 @@ def test_compute_perf_5col(): DEV_SCORES_5COL, TEST_SCORES_5COL, '--output=' + tmp_output.name, - ] + ] from .script.compute_perf import main nose.tools.eq_(main(cmdline), 0) @@ -117,22 +117,22 @@ def test_compute_cmc(): tmp_output = tempfile.NamedTemporaryFile(prefix=__name__, suffix='.pdf') nose.tools.eq_(main([ - SCORES_4COL_CMC, - '--log-x-scale', - '--output=%s' % tmp_output.name, - ]), 0) + SCORES_4COL_CMC, + '--log-x-scale', + '--output=%s' % tmp_output.name, + ]), 0) tmp_output = tempfile.NamedTemporaryFile(prefix=__name__, suffix='.pdf') nose.tools.eq_(main([ - SCORES_5COL_CMC, - '--output=%s' % tmp_output.name, - ]), 0) + SCORES_5COL_CMC, + '--output=%s' % tmp_output.name, + ]), 0) tmp_output = tempfile.NamedTemporaryFile(prefix=__name__, suffix='.pdf') nose.tools.eq_(main([ - SCORES_4COL_CMC_OS, - '--rank=1', - '--output=%s' % tmp_output.name, - ]), 0) + SCORES_4COL_CMC_OS, + '--rank=1', + '--output=%s' % tmp_output.name, + ]), 0) diff --git a/bob/measure/version.cpp b/bob/measure/version.cpp index 8ffb61e..1290c85 100644 --- a/bob/measure/version.cpp +++ b/bob/measure/version.cpp @@ -8,74 +8,89 @@ #define BOB_IMPORT_VERSION #include #include -#include #include +#include +static PyObject *build_version_dictionary() { -static PyObject* build_version_dictionary() { - - PyObject* retval = PyDict_New(); - if (!retval) return 0; + PyObject *retval = PyDict_New(); + if (!retval) + return 0; auto retval_ = make_safe(retval); - if (!dict_steal(retval, "Blitz++", blitz_version())) return 0; - if (!dict_steal(retval, "Boost", boost_version())) return 0; - if (!dict_steal(retval, "Compiler", compiler_version())) return 0; - if (!dict_steal(retval, "Python", python_version())) return 0; - if (!dict_steal(retval, "NumPy", numpy_version())) return 0; - if (!dict_steal(retval, "HDF5", hdf5_version())) return 0; - if (!dict_steal(retval, "bob.blitz", bob_blitz_version())) return 0; - if (!dict_steal(retval, "bob.core", bob_core_version())) return 0; - if (!dict_steal(retval, "bob.math", bob_math_version())) return 0; - if (!dict_steal(retval, "bob.io.base", bob_io_base_version())) return 0; + if (!dict_steal(retval, "Blitz++", blitz_version())) + return 0; + if (!dict_steal(retval, "Boost", boost_version())) + return 0; + if (!dict_steal(retval, "Compiler", compiler_version())) + return 0; + if (!dict_steal(retval, "Python", python_version())) + return 0; + if (!dict_steal(retval, "NumPy", numpy_version())) + return 0; + if (!dict_steal(retval, "HDF5", hdf5_version())) + return 0; + if (!dict_steal(retval, "bob.blitz", bob_blitz_version())) + return 0; + if (!dict_steal(retval, "bob.core", bob_core_version())) + return 0; + if (!dict_steal(retval, "bob.math", bob_math_version())) + return 0; + if (!dict_steal(retval, "bob.io.base", bob_io_base_version())) + return 0; return Py_BuildValue("O", retval); } static PyMethodDef module_methods[] = { - {0} /* Sentinel */ + {0} /* Sentinel */ }; PyDoc_STRVAR(module_docstr, -"Information about software used to compile the C++ Bob API" -); + "Information about software used to compile the C++ Bob API"); #if PY_VERSION_HEX >= 0x03000000 -static PyModuleDef module_definition = { - PyModuleDef_HEAD_INIT, - BOB_EXT_MODULE_NAME, - module_docstr, - -1, - module_methods, - 0, 0, 0, 0 -}; +static PyModuleDef module_definition = {PyModuleDef_HEAD_INIT, + BOB_EXT_MODULE_NAME, + module_docstr, + -1, + module_methods, + 0, + 0, + 0, + 0}; #endif -static PyObject* create_module (void) { +static PyObject *create_module(void) { -# if PY_VERSION_HEX >= 0x03000000 - PyObject* m = PyModule_Create(&module_definition); +#if PY_VERSION_HEX >= 0x03000000 + PyObject *m = PyModule_Create(&module_definition); auto m_ = make_xsafe(m); - const char* ret = "O"; -# else - PyObject* m = Py_InitModule3(BOB_EXT_MODULE_NAME, module_methods, module_docstr); - const char* ret = "N"; -# endif - if (!m) return 0; + const char *ret = "O"; +#else + PyObject *m = + Py_InitModule3(BOB_EXT_MODULE_NAME, module_methods, module_docstr); + const char *ret = "N"; +#endif + if (!m) + return 0; /* register version numbers and constants */ - if (PyModule_AddStringConstant(m, "module", BOB_EXT_MODULE_VERSION) < 0) return 0; + if (PyModule_AddStringConstant(m, "module", BOB_EXT_MODULE_VERSION) < 0) + return 0; - PyObject* externals = build_version_dictionary(); - if (!externals) return 0; - if (PyModule_AddObject(m, "externals", externals) < 0) return 0; + PyObject *externals = build_version_dictionary(); + if (!externals) + return 0; + if (PyModule_AddObject(m, "externals", externals) < 0) + return 0; return Py_BuildValue(ret, m); } -PyMODINIT_FUNC BOB_EXT_ENTRY_NAME (void) { -# if PY_VERSION_HEX >= 0x03000000 +PyMODINIT_FUNC BOB_EXT_ENTRY_NAME(void) { +#if PY_VERSION_HEX >= 0x03000000 return -# endif - create_module(); +#endif + create_module(); } -- 2.21.0