Commit c5c4017f authored by Amir Mohammadi's avatar Amir Mohammadi

Add an option to the epc function to also get the thresholds which were used during calculation

parent 142014da
Pipeline #8405 passed with stages
in 12 minutes and 28 seconds
...@@ -514,14 +514,16 @@ blitz::Array<double,2> bob::measure::epc ...@@ -514,14 +514,16 @@ blitz::Array<double,2> bob::measure::epc
(const blitz::Array<double,1>& dev_negatives, (const blitz::Array<double,1>& dev_negatives,
const blitz::Array<double,1>& dev_positives, const blitz::Array<double,1>& dev_positives,
const blitz::Array<double,1>& test_negatives, const blitz::Array<double,1>& test_negatives,
const blitz::Array<double,1>& test_positives, size_t points, bool isSorted) { const blitz::Array<double,1>& test_positives, size_t points, bool isSorted,
bool thresholds) {
blitz::Array<double,1> dev_neg, dev_pos; blitz::Array<double,1> dev_neg, dev_pos;
sort(dev_negatives, dev_neg, isSorted); sort(dev_negatives, dev_neg, isSorted);
sort(dev_positives, dev_pos, isSorted); sort(dev_positives, dev_pos, isSorted);
double step = 1.0/((double)points-1.0); double step = 1.0/((double)points-1.0);
blitz::Array<double,2> retval(2, points); auto retval_shape0 = (thresholds) ? 3 : 2;
blitz::Array<double,2> retval(retval_shape0, points);
for (int i=0; i<(int)points; ++i) { for (int i=0; i<(int)points; ++i) {
double alpha = (double)i*step; double alpha = (double)i*step;
retval(0,i) = alpha; retval(0,i) = alpha;
...@@ -530,6 +532,9 @@ blitz::Array<double,2> bob::measure::epc ...@@ -530,6 +532,9 @@ blitz::Array<double,2> bob::measure::epc
std::pair<double, double> ratios = std::pair<double, double> ratios =
bob::measure::farfrr(test_negatives, test_positives, threshold); bob::measure::farfrr(test_negatives, test_positives, threshold);
retval(1,i) = (ratios.first + ratios.second) / 2; retval(1,i) = (ratios.first + ratios.second) / 2;
if (thresholds) {
retval(2,i) = threshold;
}
} }
return retval; return retval;
} }
...@@ -365,12 +365,14 @@ namespace bob { namespace measure { ...@@ -365,12 +365,14 @@ namespace bob { namespace measure {
/** /**
* Calculates the EPC curve given a set of positive and negative scores and a * Calculates the EPC curve given a set of positive and negative scores and a
* number of desired points. Returns a two-dimensional blitz::Array of * number of desired points. Returns a two-dimensional blitz::Array of
* doubles that express the X (cost) and Y (HTER on the test set given the * doubles that express the X (cost), Y (HTER on the test set given the min.
* min. HTER threshold on the development set) coordinates in this order. * HTER threshold on the development set) coordinates, and the thresholds
* Please note that, in order to calculate the EPC curve, one needs two sets * which were used during calculations (dependig on the `thresholds`
* of data comprising a development set and a test set. The minimum weighted * argument) in this order. Please note that, in order to calculate the EPC
* error is calculated on the development set and then applied to the test * curve, one needs two sets of data comprising a development set and a test
* set to evaluate the half-total error rate at that position. * set. The minimum weighted error is calculated on the development set and
* then applied to the test set to evaluate the half-total error rate at that
* position.
* *
* The EPC curve plots the HTER on the test set for various values of 'cost'. * The EPC curve plots the HTER on the test set for various values of 'cost'.
* For each value of 'cost', a threshold is found that provides the minimum * For each value of 'cost', a threshold is found that provides the minimum
...@@ -387,7 +389,8 @@ namespace bob { namespace measure { ...@@ -387,7 +389,8 @@ namespace bob { namespace measure {
const blitz::Array<double,1>& test_negatives, const blitz::Array<double,1>& test_negatives,
const blitz::Array<double,1>& test_positives, const blitz::Array<double,1>& test_positives,
size_t points, size_t points,
bool isSorted = false); bool isSorted = false,
bool thresholds = false);
}} }}
......
...@@ -41,7 +41,9 @@ static auto epc_doc = bob::extension::FunctionDoc( ...@@ -41,7 +41,9 @@ static auto epc_doc = bob::extension::FunctionDoc(
"epc", "epc",
"Calculates points of an Expected Performance Curve (EPC)", "Calculates points of an Expected Performance Curve (EPC)",
"Calculates the EPC curve given a set of positive and negative scores and a desired number of points. " "Calculates the EPC curve given a set of positive and negative scores and a desired number of points. "
"Returns a two-dimensional :py:class:`numpy.ndarray` of type float that express the X (cost) and Y (weighted error rare on the test set given the min. threshold on the development set) coordinates in this order. " "Returns a two-dimensional :py:class:`numpy.ndarray` of type float with the "
"shape of ``(2, points)`` or ``(3, points)`` depending on the ``thresholds`` argument. "
"The rows correspond to the X (cost), Y (weighted error rate on the test set given the min. threshold on the development set), and the thresholds which were used to calculate the error (if the ``thresholds`` argument was set to ``True``), respectively. "
"Please note that, in order to calculate the EPC curve, one needs two sets of data comprising a development set and a test set. " "Please note that, in order to calculate the EPC curve, one needs two sets of data comprising a development set and a test set. "
"The minimum weighted error is calculated on the development set and then applied to the test set to evaluate the weighted error rate at that position.\n\n" "The minimum weighted error is calculated on the development set and then applied to the test set to evaluate the weighted error rate at that position.\n\n"
"The EPC curve plots the HTER on the test set for various values of 'cost'. " "The EPC curve plots the HTER on the test set for various values of 'cost'. "
...@@ -50,11 +52,12 @@ static auto epc_doc = bob::extension::FunctionDoc( ...@@ -50,11 +52,12 @@ static auto epc_doc = bob::extension::FunctionDoc(
"The cost points in which the EPC curve are calculated are distributed uniformly in the range :math:`[0.0, 1.0]`.\n\n" "The cost points in which the EPC curve are calculated are distributed uniformly in the range :math:`[0.0, 1.0]`.\n\n"
".. note:: It is more memory efficient, when sorted arrays of scores are provided and the ``is_sorted`` parameter is set to ``True``." ".. note:: It is more memory efficient, when sorted arrays of scores are provided and the ``is_sorted`` parameter is set to ``True``."
) )
.add_prototype("dev_negatives, dev_positives, test_negatives, test_positives, n_points, is_sorted", "curve") .add_prototype("dev_negatives, dev_positives, test_negatives, test_positives, n_points, [is_sorted], [thresholds]", "curve")
.add_parameter("dev_negatives, dev_positives, test_negatives, test_positives", "array_like(1D, float)", "The scores for negatives and positives of the development and test set") .add_parameter("dev_negatives, dev_positives, test_negatives, test_positives", "array_like(1D, float)", "The scores for negatives and positives of the development and test set")
.add_parameter("n_points", "int", "The number of weights for which the EPC curve should be computed") .add_parameter("n_points", "int", "The number of weights for which the EPC curve should be computed")
.add_parameter("is_sorted", "bool", "[Default: ``False``] Set this to ``True`` if the scores are already sorted. If ``False``, scores will be sorted internally, which will require more memory") .add_parameter("is_sorted", "bool", "[Default: ``False``] Set this to ``True`` if the scores are already sorted. If ``False``, scores will be sorted internally, which will require more memory")
.add_return("curve", "array_like(2D, float)", "The EPC curve, with the first row containing the weights, and the second row containing the weighted thresholds on the test set") .add_parameter("thresholds", "bool", "[Default: ``False``] If ``True`` the function returns an array with the shape of ``(3, points)`` where the third row contains the thresholds that were calculated on the development set.")
.add_return("curve", "array_like(2D, float)", "The EPC curve, with the first row containing the weights and the second row containing the weighted errors on the test set. If ``thresholds`` is ``True``, there is also a third row which contains the thresholds that were calculated on the development set.")
; ;
static PyObject* epc(PyObject*, PyObject* args, PyObject* kwds) { static PyObject* epc(PyObject*, PyObject* args, PyObject* kwds) {
BOB_TRY BOB_TRY
...@@ -67,15 +70,17 @@ BOB_TRY ...@@ -67,15 +70,17 @@ BOB_TRY
PyBlitzArrayObject* test_pos; PyBlitzArrayObject* test_pos;
Py_ssize_t n_points; Py_ssize_t n_points;
PyObject* is_sorted = Py_False; PyObject* is_sorted = Py_False;
PyObject* thresholds = Py_False;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&n|O", if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&n|OO",
kwlist, kwlist,
&double1d_converter, &dev_neg, &double1d_converter, &dev_neg,
&double1d_converter, &dev_pos, &double1d_converter, &dev_pos,
&double1d_converter, &test_neg, &double1d_converter, &test_neg,
&double1d_converter, &test_pos, &double1d_converter, &test_pos,
&n_points, &n_points,
&is_sorted &is_sorted,
&thresholds
)) return 0; )) return 0;
//protects acquired resources through this scope //protects acquired resources through this scope
...@@ -89,7 +94,7 @@ BOB_TRY ...@@ -89,7 +94,7 @@ BOB_TRY
*PyBlitzArrayCxx_AsBlitz<double,1>(dev_pos), *PyBlitzArrayCxx_AsBlitz<double,1>(dev_pos),
*PyBlitzArrayCxx_AsBlitz<double,1>(test_neg), *PyBlitzArrayCxx_AsBlitz<double,1>(test_neg),
*PyBlitzArrayCxx_AsBlitz<double,1>(test_pos), *PyBlitzArrayCxx_AsBlitz<double,1>(test_pos),
n_points, PyObject_IsTrue(is_sorted)); n_points, PyObject_IsTrue(is_sorted), PyObject_IsTrue(thresholds));
return PyBlitzArrayCxx_AsNumpy(result); return PyBlitzArrayCxx_AsNumpy(result);
BOB_CATCH_FUNCTION("epc", 0) BOB_CATCH_FUNCTION("epc", 0)
......
...@@ -241,6 +241,9 @@ def test_plots(): ...@@ -241,6 +241,9 @@ def test_plots():
# save('nonsep-epc.hdf5', xy) # save('nonsep-epc.hdf5', xy)
xyref = bob.io.base.load(F('nonsep-epc.hdf5')) xyref = bob.io.base.load(F('nonsep-epc.hdf5'))
assert numpy.allclose(xy, xyref, atol=1e-15) assert numpy.allclose(xy, xyref, atol=1e-15)
xy = epc(dev_negatives, dev_positives,
test_negatives, test_positives, 100, False, True)
assert numpy.allclose(xy[:2], xyref, atol=1e-15)
def test_rocch(): def test_rocch():
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment