From c5c4017f8c1444aff0243e373ae46198fc9985d9 Mon Sep 17 00:00:00 2001
From: Amir Mohammadi <183.amir@gmail.com>
Date: Wed, 8 Feb 2017 15:48:14 +0100
Subject: [PATCH] Add an option to the epc function to also get the thresholds
which were used during calculation
---
bob/measure/cpp/error.cpp | 9 +++++++--
bob/measure/cpp/error.h | 17 ++++++++++-------
bob/measure/main.cpp | 17 +++++++++++------
bob/measure/test_error.py | 3 +++
4 files changed, 31 insertions(+), 15 deletions(-)
diff --git a/bob/measure/cpp/error.cpp b/bob/measure/cpp/error.cpp
index e91a18c..7310791 100644
--- a/bob/measure/cpp/error.cpp
+++ b/bob/measure/cpp/error.cpp
@@ -514,14 +514,16 @@ blitz::Array bob::measure::epc
(const blitz::Array& dev_negatives,
const blitz::Array& dev_positives,
const blitz::Array& test_negatives,
- const blitz::Array& test_positives, size_t points, bool isSorted) {
+ const blitz::Array& test_positives, size_t points, bool isSorted,
+ bool thresholds) {
blitz::Array dev_neg, dev_pos;
sort(dev_negatives, dev_neg, isSorted);
sort(dev_positives, dev_pos, isSorted);
double step = 1.0/((double)points-1.0);
- blitz::Array retval(2, points);
+ auto retval_shape0 = (thresholds) ? 3 : 2;
+ blitz::Array retval(retval_shape0, points);
for (int i=0; i<(int)points; ++i) {
double alpha = (double)i*step;
retval(0,i) = alpha;
@@ -530,6 +532,9 @@ blitz::Array bob::measure::epc
std::pair ratios =
bob::measure::farfrr(test_negatives, test_positives, threshold);
retval(1,i) = (ratios.first + ratios.second) / 2;
+ if (thresholds) {
+ retval(2,i) = threshold;
+ }
}
return retval;
}
diff --git a/bob/measure/cpp/error.h b/bob/measure/cpp/error.h
index 4220099..ad7771f 100644
--- a/bob/measure/cpp/error.h
+++ b/bob/measure/cpp/error.h
@@ -365,12 +365,14 @@ namespace bob { namespace measure {
/**
* Calculates the EPC curve given a set of positive and negative scores and a
* number of desired points. Returns a two-dimensional blitz::Array of
- * doubles that express the X (cost) and Y (HTER on the test set given the
- * min. HTER threshold on the development set) coordinates in this order.
- * Please note that, in order to calculate the EPC curve, one needs two sets
- * of data comprising a development set and a test set. The minimum weighted
- * error is calculated on the development set and then applied to the test
- * set to evaluate the half-total error rate at that position.
+ * doubles that express the X (cost), Y (HTER on the test set given the min.
+ * HTER threshold on the development set) coordinates, and the thresholds
+ * which were used during calculations (dependig on the `thresholds`
+ * argument) in this order. Please note that, in order to calculate the EPC
+ * curve, one needs two sets of data comprising a development set and a test
+ * set. The minimum weighted error is calculated on the development set and
+ * then applied to the test set to evaluate the half-total error rate at that
+ * position.
*
* The EPC curve plots the HTER on the test set for various values of 'cost'.
* For each value of 'cost', a threshold is found that provides the minimum
@@ -387,7 +389,8 @@ namespace bob { namespace measure {
const blitz::Array& test_negatives,
const blitz::Array& test_positives,
size_t points,
- bool isSorted = false);
+ bool isSorted = false,
+ bool thresholds = false);
}}
diff --git a/bob/measure/main.cpp b/bob/measure/main.cpp
index 9baa2f1..aa9c494 100644
--- a/bob/measure/main.cpp
+++ b/bob/measure/main.cpp
@@ -41,7 +41,9 @@ static auto epc_doc = bob::extension::FunctionDoc(
"epc",
"Calculates points of an Expected Performance Curve (EPC)",
"Calculates the EPC curve given a set of positive and negative scores and a desired number of points. "
- "Returns a two-dimensional :py:class:`numpy.ndarray` of type float that express the X (cost) and Y (weighted error rare on the test set given the min. threshold on the development set) coordinates in this order. "
+ "Returns a two-dimensional :py:class:`numpy.ndarray` of type float with the "
+ "shape of ``(2, points)`` or ``(3, points)`` depending on the ``thresholds`` argument. "
+ "The rows correspond to the X (cost), Y (weighted error rate on the test set given the min. threshold on the development set), and the thresholds which were used to calculate the error (if the ``thresholds`` argument was set to ``True``), respectively. "
"Please note that, in order to calculate the EPC curve, one needs two sets of data comprising a development set and a test set. "
"The minimum weighted error is calculated on the development set and then applied to the test set to evaluate the weighted error rate at that position.\n\n"
"The EPC curve plots the HTER on the test set for various values of 'cost'. "
@@ -50,11 +52,12 @@ static auto epc_doc = bob::extension::FunctionDoc(
"The cost points in which the EPC curve are calculated are distributed uniformly in the range :math:`[0.0, 1.0]`.\n\n"
".. note:: It is more memory efficient, when sorted arrays of scores are provided and the ``is_sorted`` parameter is set to ``True``."
)
-.add_prototype("dev_negatives, dev_positives, test_negatives, test_positives, n_points, is_sorted", "curve")
+.add_prototype("dev_negatives, dev_positives, test_negatives, test_positives, n_points, [is_sorted], [thresholds]", "curve")
.add_parameter("dev_negatives, dev_positives, test_negatives, test_positives", "array_like(1D, float)", "The scores for negatives and positives of the development and test set")
.add_parameter("n_points", "int", "The number of weights for which the EPC curve should be computed")
.add_parameter("is_sorted", "bool", "[Default: ``False``] Set this to ``True`` if the scores are already sorted. If ``False``, scores will be sorted internally, which will require more memory")
-.add_return("curve", "array_like(2D, float)", "The EPC curve, with the first row containing the weights, and the second row containing the weighted thresholds on the test set")
+.add_parameter("thresholds", "bool", "[Default: ``False``] If ``True`` the function returns an array with the shape of ``(3, points)`` where the third row contains the thresholds that were calculated on the development set.")
+.add_return("curve", "array_like(2D, float)", "The EPC curve, with the first row containing the weights and the second row containing the weighted errors on the test set. If ``thresholds`` is ``True``, there is also a third row which contains the thresholds that were calculated on the development set.")
;
static PyObject* epc(PyObject*, PyObject* args, PyObject* kwds) {
BOB_TRY
@@ -67,15 +70,17 @@ BOB_TRY
PyBlitzArrayObject* test_pos;
Py_ssize_t n_points;
PyObject* is_sorted = Py_False;
+ PyObject* thresholds = Py_False;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&n|O",
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&n|OO",
kwlist,
&double1d_converter, &dev_neg,
&double1d_converter, &dev_pos,
&double1d_converter, &test_neg,
&double1d_converter, &test_pos,
&n_points,
- &is_sorted
+ &is_sorted,
+ &thresholds
)) return 0;
//protects acquired resources through this scope
@@ -89,7 +94,7 @@ BOB_TRY
*PyBlitzArrayCxx_AsBlitz(dev_pos),
*PyBlitzArrayCxx_AsBlitz(test_neg),
*PyBlitzArrayCxx_AsBlitz(test_pos),
- n_points, PyObject_IsTrue(is_sorted));
+ n_points, PyObject_IsTrue(is_sorted), PyObject_IsTrue(thresholds));
return PyBlitzArrayCxx_AsNumpy(result);
BOB_CATCH_FUNCTION("epc", 0)
diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py
index 75de57c..46452c9 100644
--- a/bob/measure/test_error.py
+++ b/bob/measure/test_error.py
@@ -241,6 +241,9 @@ def test_plots():
# save('nonsep-epc.hdf5', xy)
xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
assert numpy.allclose(xy, xyref, atol=1e-15)
+ xy = epc(dev_negatives, dev_positives,
+ test_negatives, test_positives, 100, False, True)
+ assert numpy.allclose(xy[:2], xyref, atol=1e-15)
def test_rocch():
--
2.21.0