Commit 2df96e13 authored by Manuel Günther's avatar Manuel Günther
Browse files

Started implementing sorted/unsorted versions of threshold calculations

parent 9ddad2a4
......@@ -16,10 +16,23 @@
#include <bob.core/assert.h>
#include <bob.core/cast.h>
#include <bob.core/array_copy.h>
#include <bob.core/array_sort.h>
#include <bob.math/pavx.h>
#include <bob.math/linsolve.h>
template <typename T, typename predicate = std::less<T>>
static void sort(const blitz::Array<T,1>& a, blitz::Array<T,1>& b, bool isSorted){
if (isSorted){
b.reference(a);
} else {
bob::core::array::ccopy(a,b);
bob::core::array::sort<T,predicate>(b);
}
}
std::pair<double, double> bob::measure::farfrr(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, double threshold) {
blitz::sizeType total_negatives = negatives.extent(blitz::firstDim);
......@@ -61,18 +74,18 @@ double eer_predicate(double far, double frr) {
return std::abs(far - frr);
}
double bob::measure::eerThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives) {
return bob::measure::minimizingThreshold(negatives, positives, eer_predicate);
double bob::measure::eerThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, bool isSorted) {
blitz::Array<double,1> neg, pos;
sort(negatives, neg, isSorted);
sort(positives, pos, isSorted);
return bob::measure::minimizingThreshold(neg, pos, eer_predicate);
}
double bob::measure::eerRocch(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives) {
double bob::measure::eerRocch(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives) {
return bob::measure::rocch2eer(bob::measure::rocch(negatives, positives));
}
double bob::measure::farThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>&, double far_value) {
double bob::measure::farThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>&, double far_value, bool isSorted) {
// check the parameters are valid
if (far_value < 0. || far_value > 1.) {
boost::format m("the argument for `far_value' cannot take the value %f - the value must be in the interval [0.,1.]");
......@@ -83,35 +96,33 @@ double bob::measure::farThreshold(const blitz::Array<double,1>& negatives,
throw std::runtime_error("the number of negative scores must be at least 2");
}
// sort negative scores ascendingly
std::vector<double> negatives_(negatives.shape()[0]);
std::copy(negatives.begin(), negatives.end(), negatives_.begin());
std::sort(negatives_.begin(), negatives_.end(), std::less<double>());
// sort the array, if necessary
blitz::Array<double,1> neg;
sort(negatives, neg, isSorted);
// compute position of the threshold
double crr = 1.-far_value; // (Correct Rejection Rate; = 1 - FAR)
double crr_index = crr * negatives_.size();
double crr_index = crr * neg.extent(0);
// compute the index above the current CRR value
int index = std::min((int)std::floor(crr_index), (int)negatives_.size()-1);
int index = std::min((int)std::floor(crr_index), neg.extent(0)-1);
// correct index if we have multiple score values at the requested position
while (index && negatives_[index] == negatives_[index-1]) --index;
while (index && neg(index) == neg(index-1)) --index;
// we compute a correction term
double correction;
if (index){
// assure that we are in the middle of two cases
correction = 0.5 * (negatives_[index] - negatives_[index-1]);
correction = 0.5 * (neg(index) - neg(index-1));
} else {
// add an overall correction term
correction = 0.5 * (negatives_.back() - negatives_.front()) / negatives_.size();
correction = 0.5 * (neg(neg.extent(0)-1) - neg(0)) / neg.extent(0);
}
return negatives_[index] - correction;
return neg(index) - correction;
}
double bob::measure::frrThreshold(const blitz::Array<double,1>&,
const blitz::Array<double,1>& positives, double frr_value) {
double bob::measure::frrThreshold(const blitz::Array<double,1>&, const blitz::Array<double,1>& positives, double frr_value, bool isSorted) {
// check the parameters are valid
if (frr_value < 0. || frr_value > 1.) {
......@@ -123,32 +134,31 @@ double bob::measure::frrThreshold(const blitz::Array<double,1>&,
throw std::runtime_error("the number of positive scores must be at least 2");
}
// sort positive scores descendingly
std::vector<double> positives_(positives.shape()[0]);
std::copy(positives.begin(), positives.end(), positives_.begin());
std::sort(positives_.begin(), positives_.end(), std::greater<double>());
// sort positive scores descendantly, if necessary
blitz::Array<double,1> pos;
sort<double,std::greater<double>>(positives, pos, isSorted);
// compute position of the threshold
double car = 1.-frr_value; // (Correct Acceptance Rate; = 1 - FRR)
double car_index = car * positives_.size();
double car_index = car * pos.extent(0);
// compute the index above the current CRR value
int index = std::min((int)std::floor(car_index), (int)positives_.size()-1);
int index = std::min((int)std::floor(car_index), pos.extent(0)-1);
// correct index if we have multiple score values at the requested position
while (index && positives_[index] == positives_[index-1]) --index;
while (index && pos(index) == pos(index-1)) --index;
// we compute a correction term to assure that we are in the middle of two cases
// we compute a correction term
double correction;
if (index){
// assure that we are in the middle of two cases
correction = 0.5 * (positives_[index-1] - positives_[index]);
correction = 0.5 * (pos(index-1) - pos(index));
} else {
// add an overall correction term
correction = 0.5 * (positives_.front() - positives_.back()) / positives_.size();
correction = 0.5 * (pos(0) - pos(pos.extent(0)-1)) / pos.extent(0);
}
return positives_[index] + correction;
return pos(index) + correction;
}
/**
......@@ -171,11 +181,12 @@ class weighted_error {
};
double bob::measure::minWeightedErrorRateThreshold
(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, double cost) {
double bob::measure::minWeightedErrorRateThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, double cost, bool isSorted) {
blitz::Array<double,1> neg, pos;
sort(negatives, neg, isSorted);
sort(positives, pos, isSorted);
weighted_error predicate(cost);
return bob::measure::minimizingThreshold(negatives, positives, predicate);
return bob::measure::minimizingThreshold(neg, pos, predicate);
}
blitz::Array<double,2> bob::measure::roc(const blitz::Array<double,1>& negatives,
......@@ -358,20 +369,16 @@ double bob::measure::rocch2eer(const blitz::Array<double,2>& pfa_pmiss)
* @return The ROC curve with the FAR in the first row and the FRR in the second.
*/
blitz::Array<double,2> bob::measure::roc_for_far(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, const blitz::Array<double,1>& far_list) {
const blitz::Array<double,1>& positives, const blitz::Array<double,1>& far_list, bool isSorted) {
int n_points = far_list.extent(0);
if (negatives.extent(0) == 0) throw std::runtime_error("The given set of negatives is empty.");
if (positives.extent(0) == 0) throw std::runtime_error("The given set of positives is empty.");
// sort negative scores ascendingly
std::vector<double> negatives_(negatives.extent(0));;
std::copy(negatives.begin(), negatives.end(), negatives_.begin());
std::sort(negatives_.begin(), negatives_.end());
// sort positive scores ascendingly
std::vector<double> positives_(positives.extent(0));;
std::copy(positives.begin(), positives.end(), positives_.begin());
std::sort(positives_.begin(), positives_.end());
// sort negative and positive scores ascendantly
blitz::Array<double,1> neg, pos;
sort(negatives, neg, isSorted);
sort(positives, pos, isSorted);
// do some magic to compute the FRR list
blitz::Array<double,2> retval(2, n_points);
......@@ -379,10 +386,10 @@ blitz::Array<double,2> bob::measure::roc_for_far(const blitz::Array<double,1>& n
// index into the FAR and FRR list
int far_index = n_points-1;
int pos_index = 0, neg_index = 0;
int n_pos = positives_.size(), n_neg = negatives_.size();
int n_pos = pos.extent(0), n_neg = neg.extent(0);
// iterators into the result lists
std::vector<double>::const_iterator pos_it = positives_.begin(), neg_it = negatives_.begin();
auto pos_it = pos.begin(), neg_it = neg.begin();
// do some fast magic to compute the FRR values ;-)
do{
// check whether the current positive value is less than the current negative one
......@@ -409,13 +416,15 @@ blitz::Array<double,2> bob::measure::roc_for_far(const blitz::Array<double,1>& n
}
// do this, as long as there are elements in both lists left and not all FRR elements where calculated yet
} while (pos_it != positives_.end() && neg_it != negatives_.end() && far_index >= 0);
} while (pos_it != pos.end() && neg_it != neg.end() && far_index >= 0);
// check if all FRR values have been set
if (far_index >= 0){
// walk to the end of both lists; at least one of both lists should already have reached its limit.
pos_index += positives_.end() - pos_it;
neg_index += negatives_.end() - neg_it;
while (pos_it++ != pos.end()) ++pos_index;
while (neg_it++ != neg.end()) ++neg_index;
// pos_index += positives_.end() - pos_it;
// neg_index += negatives_.end() - neg_it;
// fill in the remaining elements of the CAR list
do {
// copy the FAR value
......
......@@ -13,7 +13,6 @@
#include <blitz/array.h>
#include <utility>
#include <vector>
#include <algorithm>
namespace bob { namespace measure {
......@@ -113,6 +112,7 @@ namespace bob { namespace measure {
* and negatives) given a certain minimization criteria, input as a
* functional predicate. For a discussion on 'positive' and 'negative' see
* bob::measure::farfrr().
* Here, it is expected that the positives and the negatives are sorted ascendantly.
*
* The predicate method gives back the current minimum given false-acceptance
* (FA) and false-rejection (FR) ratios for the input data. As a predicate,
......@@ -125,7 +125,6 @@ namespace bob { namespace measure {
* predicates.
*
* The minimization is carried out in a data-driven way.
* First, it sorts the positive and negative scores.
* Starting from the lowest score (might be a positive or a negative), it
* increases the threshold based on the distance between the current score
* and the following higher score (also keeping track of duplicate scores)
......@@ -136,17 +135,8 @@ namespace bob { namespace measure {
*/
template <typename T>
double minimizingThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, T& predicate){
// sort negative and positive scores ascendingly
std::vector<double> negatives_(negatives.extent(0));
std::copy(negatives.begin(), negatives.end(), negatives_.begin());
std::sort(negatives_.begin(), negatives_.end(), std::less<double>());
std::vector<double> positives_(positives.extent(0));
std::copy(positives.begin(), positives.end(), positives_.begin());
std::sort(positives_.begin(), positives_.end(), std::less<double>());
// iterate over the whole set of points
std::vector<double>::const_iterator pos_it = positives_.begin(), neg_it = negatives_.begin();
auto pos_it = positives.begin(), neg_it = negatives.begin();
// iterate over all possible far and frr points and compute the predicate for each possible threshold...
double min_predicate = 1e8;
......@@ -155,11 +145,11 @@ namespace bob { namespace measure {
// we start with the extreme values for far and frr
double far = 1., frr = 0.;
// the decrease/increase for far/frr when moving one negative/positive
double far_decrease = 1./negatives_.size(), frr_increase = 1./positives_.size();
double far_decrease = 1./negatives.extent(0), frr_increase = 1./positives.extent(0);
// we start with the threshold based on the minimum score
double current_threshold = std::min(*pos_it, *neg_it);
// now, iterate over both lists, in a sorted order
while (pos_it != positives_.end() && neg_it != negatives_.end()){
while (pos_it != positives.end() && neg_it != negatives.end()){
// compute predicate
current_predicate = predicate(far, frr);
if (current_predicate <= min_predicate){
......@@ -180,21 +170,21 @@ namespace bob { namespace measure {
frr += frr_increase;
}
// increase positive and negative as long as they contain the same value
while (neg_it != negatives_.end() && *neg_it == current_threshold) {
while (neg_it != negatives.end() && *neg_it == current_threshold) {
// go to next negative
++neg_it;
far -= far_decrease;
}
while (pos_it != positives_.end() && *pos_it == current_threshold) {
while (pos_it != positives.end() && *pos_it == current_threshold) {
// go to next positive
++pos_it;
frr += frr_increase;
}
// compute a new threshold based on the center between last and current score, if we are not already at the end of the score lists
if (neg_it != negatives_.end() || pos_it != positives_.end()){
if (neg_it != negatives_.end() && pos_it != positives_.end())
if (neg_it != negatives.end() || pos_it != positives.end()){
if (neg_it != negatives.end() && pos_it != positives.end())
current_threshold += std::min(*pos_it, *neg_it);
else if (neg_it != negatives_.end())
else if (neg_it != negatives.end())
current_threshold += *neg_it;
else
current_threshold += *pos_it;
......@@ -220,16 +210,14 @@ namespace bob { namespace measure {
* where the FAR equals the FRR. Graphically, this would be equivalent to the
* intersection between the R.O.C. (or D.E.T.) curves and the identity.
*/
double eerThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives);
double eerThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, bool isSorted = false);
/**
* Calculates the equal-error-rate (EER) given the input data, on the ROC
* Convex Hull, as performed in the Bosaris toolkit.
* (https://sites.google.com/site/bosaristoolkit/)
*/
double eerRocch(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives);
double eerRocch(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives);
/**
* Calculates the threshold that minimizes the error rate, given the input
......@@ -244,15 +232,13 @@ namespace bob { namespace measure {
* The higher the cost, the higher the importance given to *not* making
* mistakes classifying negatives/noise/impostors.
*/
double minWeightedErrorRateThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, double cost);
double minWeightedErrorRateThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, double cost, bool isSorted = false);
/**
* Calculates the minWeightedErrorRateThreshold() when the cost is 0.5.
*/
inline double minHterThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives) {
return minWeightedErrorRateThreshold(negatives, positives, 0.5);
inline double minHterThreshold(const blitz::Array<double,1>& negatives, const blitz::Array<double,1>& positives, bool isSorted = false) {
return minWeightedErrorRateThreshold(negatives, positives, 0.5, isSorted);
}
/**
......@@ -266,7 +252,7 @@ namespace bob { namespace measure {
* @return The computed threshold
*/
double farThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, double far_value);
const blitz::Array<double,1>& positives, double far_value, bool isSorted = false);
/**
* Computes the threshold such that the real FRR is as close as possible
......@@ -279,7 +265,7 @@ namespace bob { namespace measure {
* @return The computed threshold
*/
double frrThreshold(const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives, double frr_value);
const blitz::Array<double,1>& positives, double frr_value, bool isSorted = false);
/**
* Calculates the ROC curve given a set of positive and negative scores and a
......@@ -337,7 +323,8 @@ namespace bob { namespace measure {
blitz::Array<double,2> roc_for_far(
const blitz::Array<double,1>& negatives,
const blitz::Array<double,1>& positives,
const blitz::Array<double,1>& far_list);
const blitz::Array<double,1>& far_list,
bool isSorted = false);
/**
* Returns the Deviate Scale equivalent of a false rejection/acceptance
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment