[utils.metric] Rename as measure as some of the values are not real metrics;...

[utils.metric] Rename as measure as some of the values are not real metrics; Add tests for AUC and improve its implementation

[utils.metric] Rename as measure as some of the values are not real metrics;...
8c873885 · André Anjos · acbd24f8 · 8c873885 · acbd24f8 · 8c873885
Commit 8c873885 authored 5 years ago by André Anjos
--- a/bob/ip/binseg/engine/evaluator.py
+++ b/bob/ip/binseg/engine/evaluator.py
@@ -15,7 +15,7 @@ import torchvision.transforms.functional as VF

 import h5py

-from ..utils.metric import base_metrics
+from ..utils.metric import base_measures

 import logging

@@ -106,7 +106,7 @@ def _sample_metrics(pred, gt, bins):
            accuracy,
            jaccard,
            f1_score,
-        ) = base_metrics(tp_count, fp_count, tn_count, fn_count)
+        ) = base_measures(tp_count, fp_count, tn_count, fn_count)

        data.append(
            [

--- a/bob/ip/binseg/test/test_basemetrics.py
+++ b/bob/ip/binseg/test/test_basemetrics.py
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import unittest
-from bob.ip.binseg.utils.metric import base_metrics
-import random
-
-
-class Tester(unittest.TestCase):
-    """
-    Unit test for base metrics
-    """
-
-    def setUp(self):
-        self.tp = random.randint(1, 100)
-        self.fp = random.randint(1, 100)
-        self.tn = random.randint(1, 100)
-        self.fn = random.randint(1, 100)
-
-    def test_precision(self):
-        precision = base_metrics(self.tp, self.fp, self.tn, self.fn)[0]
-        self.assertEqual((self.tp) / (self.tp + self.fp), precision)
-
-    def test_recall(self):
-        recall = base_metrics(self.tp, self.fp, self.tn, self.fn)[1]
-        self.assertEqual((self.tp) / (self.tp + self.fn), recall)
-
-    def test_specificity(self):
-        specificity = base_metrics(self.tp, self.fp, self.tn, self.fn)[2]
-        self.assertEqual((self.tn) / (self.tn + self.fp), specificity)
-
-    def test_accuracy(self):
-        accuracy = base_metrics(self.tp, self.fp, self.tn, self.fn)[3]
-        self.assertEqual(
-            (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn), accuracy
-        )
-
-    def test_jaccard(self):
-        jaccard = base_metrics(self.tp, self.fp, self.tn, self.fn)[4]
-        self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard)
-
-    def test_f1(self):
-        f1 = base_metrics(self.tp, self.fp, self.tn, self.fn)[5]
-        self.assertEqual((2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1)
-
-
-if __name__ == "__main__":
-    unittest.main()
--- a/bob/ip/binseg/test/test_measures.py
+++ b/bob/ip/binseg/test/test_measures.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import random
+import unittest
+
+import math
+import nose.tools
+
+from ..utils.measure import base_measures, auc
+
+
+class Tester(unittest.TestCase):
+    """
+    Unit test for base measures
+    """
+
+    def setUp(self):
+        self.tp = random.randint(1, 100)
+        self.fp = random.randint(1, 100)
+        self.tn = random.randint(1, 100)
+        self.fn = random.randint(1, 100)
+
+    def test_precision(self):
+        precision = base_measures(self.tp, self.fp, self.tn, self.fn)[0]
+        self.assertEqual((self.tp) / (self.tp + self.fp), precision)
+
+    def test_recall(self):
+        recall = base_measures(self.tp, self.fp, self.tn, self.fn)[1]
+        self.assertEqual((self.tp) / (self.tp + self.fn), recall)
+
+    def test_specificity(self):
+        specificity = base_measures(self.tp, self.fp, self.tn, self.fn)[2]
+        self.assertEqual((self.tn) / (self.tn + self.fp), specificity)
+
+    def test_accuracy(self):
+        accuracy = base_measures(self.tp, self.fp, self.tn, self.fn)[3]
+        self.assertEqual(
+            (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn),
+            accuracy,
+        )
+
+    def test_jaccard(self):
+        jaccard = base_measures(self.tp, self.fp, self.tn, self.fn)[4]
+        self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard)
+
+    def test_f1(self):
+        p, r, s, a, j, f1 = base_measures(self.tp, self.fp, self.tn, self.fn)
+        self.assertEqual(
+            (2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1
+        )
+        self.assertAlmostEqual((2 * p * r) / (p + r), f1)  # base definition
+
+
+def test_auc():
+
+    # basic tests
+    assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0, 1.0]), 1.0)
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0], [1.0, 0.5, 0.0]), 0.5, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0], [0.0, 0.0, 0.0]), 0.0, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0], [0.0, 1.0, 0.0]), 0.5, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001
+    )
+
+    # reversing tht is also true
+    assert math.isclose(auc([0.0, 0.5, 1.0][::-1], [1.0, 1.0, 1.0][::-1]), 1.0)
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0][::-1], [1.0, 0.5, 0.0][::-1]), 0.5, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0][::-1], [0.0, 0.0, 0.0][::-1]), 0.0, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0][::-1], [0.0, 1.0, 0.0][::-1]), 0.5, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001
+    )
+    assert math.isclose(
+        auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001
+    )
+
+
+@nose.tools.raises(ValueError)
+def test_auc_raises_value_error():
+
+    # x is **not** monotonically increasing or decreasing
+    assert math.isclose(auc([0.0, 0.5, 0.0], [1.0, 1.0, 1.0]), 1.0)
+
+
+@nose.tools.raises(AssertionError)
+def test_auc_raises_assertion_error():
+
+    # x is **not** the same size as y
+    assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0]), 1.0)
--- a/bob/ip/binseg/utils/metric.py
+++ b/bob/ip/binseg/utils/metric.py
@@ -28,33 +28,62 @@ class SmoothedValue:
        return d.mean().item()


-def base_metrics(tp, fp, tn, fn):
+def base_measures(tp, fp, tn, fn):
    """
-    Calculates Precision, Recall (=Sensitivity), Specificity, Accuracy, Jaccard and F1-score (Dice)
+    Calculates a bunch of measures from true/false positive and negative counts
+
+    This function can return standard machine learning measures from true and
+    false positive counts of positives and negatives.
+
+    For a thorough look into these and alternate names for the returned values,
+    please check Wikipedia's entry on `Precision and Recall`_.


    Parameters
    ----------

-    tp : float
-        True positives
+    tp : int
+        True positive count, AKA "hit"

-    fp : float
-        False positives
+    fp : int
+        False positive count, AKA, "correct rejection"

-    tn : float
-        True negatives
+    tn : int
+        True negative count, AKA "false alarm", or "Type I error"

-    fn : float
-        False Negatives
+    fn : int
+        False Negative count, AKA "miss", or "Type II error"


    Returns
    -------

-    metrics : list
+    precision : float
+        P, AKA positive predictive value (PPV)
+        :math:`\frac{tp}{tp+fp}`
+
+    recall : float
+        R, AKA sensitivity, hit rate, or true positive rate (TPR)
+        :math:`\frac{tp}{p} = \frac{tp}{tp+fn}`
+
+    specificity : float
+        S, AKA selectivity or true negative rate (TNR).
+        :math:`\frac{tn}{n} = \frac{tn}{tn+fp}`
+
+    accuracy : float
+        A, :math:`\frac{tp + tn}{p + n} = \frac{tp + tn}{tp + fp + tn + fn}`
+
+    jaccard : float
+        J, :math:`\frac{tp}{tp+fp+fn}`, see `Jaccard Index`_
+
+    f1_score : float
+        F1, :math:`\frac{2 P R}{P + R} = \frac{2tp}{2tp + fp + fn}`, see
+        `F1-score`_

    """
+
+    tp = float(tp)
+    tn = float(tn)
    precision = tp / (tp + fp + ((tp + fp) == 0))
    recall = tp / (tp + fn + ((tp + fn) == 0))
    specificity = tn / (fp + tn + ((fp + tn) == 0))
@@ -87,7 +116,10 @@ def auc(x, y):

    """

-    assert len(x) == len(y)
+    x = numpy.array(x)
+    y = numpy.array(y)
+
+    assert len(x) == len(y), "x and y sequences must have the same length"

    dx = numpy.diff(x)
    if numpy.any(dx < 0):
@@ -99,18 +131,11 @@ def auc(x, y):
            raise ValueError("x is neither increasing nor decreasing "
                             ": {}.".format(x))

-    # avoids repeated sums for every y
-    y_unique, y_unique_ndx = numpy.unique(y, return_index=True)
-    x_unique = x[y_unique_ndx]
-
-    if y_unique.shape[0] > 1:
-        x_interp = numpy.interp(
-            numpy.arange(0, 1, 0.001),
-            y_unique,
-            x_unique,
-            left=0.0,
-            right=0.0,
-        )
-        return x_interp.sum() * 0.001
-
-    return 0.0
+    y_interp = numpy.interp(
+        numpy.arange(0, 1, 0.001),
+        numpy.array(x),
+        numpy.array(y),
+        left=1.0,
+        right=0.0,
+    )
+    return y_interp.sum() * 0.001