diff --git a/bob/ip/binseg/engine/evaluator.py b/bob/ip/binseg/engine/evaluator.py index 1bac73519c91419d3fb908375092468a7547767c..6e957cadca8867205188ea6026286c51d8b4f60a 100644 --- a/bob/ip/binseg/engine/evaluator.py +++ b/bob/ip/binseg/engine/evaluator.py @@ -15,7 +15,7 @@ import torchvision.transforms.functional as VF import h5py -from ..utils.metric import base_metrics +from ..utils.metric import base_measures import logging @@ -106,7 +106,7 @@ def _sample_metrics(pred, gt, bins): accuracy, jaccard, f1_score, - ) = base_metrics(tp_count, fp_count, tn_count, fn_count) + ) = base_measures(tp_count, fp_count, tn_count, fn_count) data.append( [ diff --git a/bob/ip/binseg/test/test_basemetrics.py b/bob/ip/binseg/test/test_basemetrics.py deleted file mode 100644 index 969894f5e453bfdf6fc86fe07448d8e1c8f7ece2..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/test/test_basemetrics.py +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import unittest -from bob.ip.binseg.utils.metric import base_metrics -import random - - -class Tester(unittest.TestCase): - """ - Unit test for base metrics - """ - - def setUp(self): - self.tp = random.randint(1, 100) - self.fp = random.randint(1, 100) - self.tn = random.randint(1, 100) - self.fn = random.randint(1, 100) - - def test_precision(self): - precision = base_metrics(self.tp, self.fp, self.tn, self.fn)[0] - self.assertEqual((self.tp) / (self.tp + self.fp), precision) - - def test_recall(self): - recall = base_metrics(self.tp, self.fp, self.tn, self.fn)[1] - self.assertEqual((self.tp) / (self.tp + self.fn), recall) - - def test_specificity(self): - specificity = base_metrics(self.tp, self.fp, self.tn, self.fn)[2] - self.assertEqual((self.tn) / (self.tn + self.fp), specificity) - - def test_accuracy(self): - accuracy = base_metrics(self.tp, self.fp, self.tn, self.fn)[3] - self.assertEqual( - (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn), accuracy - ) - - def test_jaccard(self): - jaccard = base_metrics(self.tp, self.fp, self.tn, self.fn)[4] - self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard) - - def test_f1(self): - f1 = base_metrics(self.tp, self.fp, self.tn, self.fn)[5] - self.assertEqual((2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1) - - -if __name__ == "__main__": - unittest.main() diff --git a/bob/ip/binseg/test/test_measures.py b/bob/ip/binseg/test/test_measures.py new file mode 100644 index 0000000000000000000000000000000000000000..71c2b241f10569284da2e20ad183a95841e74db4 --- /dev/null +++ b/bob/ip/binseg/test/test_measures.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import random +import unittest + +import math +import nose.tools + +from ..utils.measure import base_measures, auc + + +class Tester(unittest.TestCase): + """ + Unit test for base measures + """ + + def setUp(self): + self.tp = random.randint(1, 100) + self.fp = random.randint(1, 100) + self.tn = random.randint(1, 100) + self.fn = random.randint(1, 100) + + def test_precision(self): + precision = base_measures(self.tp, self.fp, self.tn, self.fn)[0] + self.assertEqual((self.tp) / (self.tp + self.fp), precision) + + def test_recall(self): + recall = base_measures(self.tp, self.fp, self.tn, self.fn)[1] + self.assertEqual((self.tp) / (self.tp + self.fn), recall) + + def test_specificity(self): + specificity = base_measures(self.tp, self.fp, self.tn, self.fn)[2] + self.assertEqual((self.tn) / (self.tn + self.fp), specificity) + + def test_accuracy(self): + accuracy = base_measures(self.tp, self.fp, self.tn, self.fn)[3] + self.assertEqual( + (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn), + accuracy, + ) + + def test_jaccard(self): + jaccard = base_measures(self.tp, self.fp, self.tn, self.fn)[4] + self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard) + + def test_f1(self): + p, r, s, a, j, f1 = base_measures(self.tp, self.fp, self.tn, self.fn) + self.assertEqual( + (2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1 + ) + self.assertAlmostEqual((2 * p * r) / (p + r), f1) # base definition + + +def test_auc(): + + # basic tests + assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0, 1.0]), 1.0) + assert math.isclose( + auc([0.0, 0.5, 1.0], [1.0, 0.5, 0.0]), 0.5, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0], [0.0, 0.0, 0.0]), 0.0, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0], [0.0, 1.0, 0.0]), 0.5, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001 + ) + + # reversing tht is also true + assert math.isclose(auc([0.0, 0.5, 1.0][::-1], [1.0, 1.0, 1.0][::-1]), 1.0) + assert math.isclose( + auc([0.0, 0.5, 1.0][::-1], [1.0, 0.5, 0.0][::-1]), 0.5, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0][::-1], [0.0, 0.0, 0.0][::-1]), 0.0, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0][::-1], [0.0, 1.0, 0.0][::-1]), 0.5, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001 + ) + assert math.isclose( + auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001 + ) + + +@nose.tools.raises(ValueError) +def test_auc_raises_value_error(): + + # x is **not** monotonically increasing or decreasing + assert math.isclose(auc([0.0, 0.5, 0.0], [1.0, 1.0, 1.0]), 1.0) + + +@nose.tools.raises(AssertionError) +def test_auc_raises_assertion_error(): + + # x is **not** the same size as y + assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0]), 1.0) diff --git a/bob/ip/binseg/utils/metric.py b/bob/ip/binseg/utils/measure.py similarity index 55% rename from bob/ip/binseg/utils/metric.py rename to bob/ip/binseg/utils/measure.py index b49f4ede3265fc18a38125178622f2dfe288c59d..881ac7c8ce7488db55fe93f2bdee5763e0008ecd 100644 --- a/bob/ip/binseg/utils/metric.py +++ b/bob/ip/binseg/utils/measure.py @@ -28,33 +28,62 @@ class SmoothedValue: return d.mean().item() -def base_metrics(tp, fp, tn, fn): +def base_measures(tp, fp, tn, fn): """ - Calculates Precision, Recall (=Sensitivity), Specificity, Accuracy, Jaccard and F1-score (Dice) + Calculates a bunch of measures from true/false positive and negative counts + + This function can return standard machine learning measures from true and + false positive counts of positives and negatives. + + For a thorough look into these and alternate names for the returned values, + please check Wikipedia's entry on `Precision and Recall`_. Parameters ---------- - tp : float - True positives + tp : int + True positive count, AKA "hit" - fp : float - False positives + fp : int + False positive count, AKA, "correct rejection" - tn : float - True negatives + tn : int + True negative count, AKA "false alarm", or "Type I error" - fn : float - False Negatives + fn : int + False Negative count, AKA "miss", or "Type II error" Returns ------- - metrics : list + precision : float + P, AKA positive predictive value (PPV) + :math:`\frac{tp}{tp+fp}` + + recall : float + R, AKA sensitivity, hit rate, or true positive rate (TPR) + :math:`\frac{tp}{p} = \frac{tp}{tp+fn}` + + specificity : float + S, AKA selectivity or true negative rate (TNR). + :math:`\frac{tn}{n} = \frac{tn}{tn+fp}` + + accuracy : float + A, :math:`\frac{tp + tn}{p + n} = \frac{tp + tn}{tp + fp + tn + fn}` + + jaccard : float + J, :math:`\frac{tp}{tp+fp+fn}`, see `Jaccard Index`_ + + f1_score : float + F1, :math:`\frac{2 P R}{P + R} = \frac{2tp}{2tp + fp + fn}`, see + `F1-score`_ """ + + tp = float(tp) + tn = float(tn) precision = tp / (tp + fp + ((tp + fp) == 0)) recall = tp / (tp + fn + ((tp + fn) == 0)) specificity = tn / (fp + tn + ((fp + tn) == 0)) @@ -87,7 +116,10 @@ def auc(x, y): """ - assert len(x) == len(y) + x = numpy.array(x) + y = numpy.array(y) + + assert len(x) == len(y), "x and y sequences must have the same length" dx = numpy.diff(x) if numpy.any(dx < 0): @@ -99,18 +131,11 @@ def auc(x, y): raise ValueError("x is neither increasing nor decreasing " ": {}.".format(x)) - # avoids repeated sums for every y - y_unique, y_unique_ndx = numpy.unique(y, return_index=True) - x_unique = x[y_unique_ndx] - - if y_unique.shape[0] > 1: - x_interp = numpy.interp( - numpy.arange(0, 1, 0.001), - y_unique, - x_unique, - left=0.0, - right=0.0, - ) - return x_interp.sum() * 0.001 - - return 0.0 + y_interp = numpy.interp( + numpy.arange(0, 1, 0.001), + numpy.array(x), + numpy.array(y), + left=1.0, + right=0.0, + ) + return y_interp.sum() * 0.001