Skip to content
Snippets Groups Projects
Commit 8c873885 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[utils.metric] Rename as measure as some of the values are not real metrics;...

[utils.metric] Rename as measure as some of the values are not real metrics; Add tests for AUC and improve its implementation
parent acbd24f8
Branches
Tags
No related merge requests found
Pipeline #39700 failed
......@@ -15,7 +15,7 @@ import torchvision.transforms.functional as VF
import h5py
from ..utils.metric import base_metrics
from ..utils.metric import base_measures
import logging
......@@ -106,7 +106,7 @@ def _sample_metrics(pred, gt, bins):
accuracy,
jaccard,
f1_score,
) = base_metrics(tp_count, fp_count, tn_count, fn_count)
) = base_measures(tp_count, fp_count, tn_count, fn_count)
data.append(
[
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import unittest
from bob.ip.binseg.utils.metric import base_metrics
import random
class Tester(unittest.TestCase):
"""
Unit test for base metrics
"""
def setUp(self):
self.tp = random.randint(1, 100)
self.fp = random.randint(1, 100)
self.tn = random.randint(1, 100)
self.fn = random.randint(1, 100)
def test_precision(self):
precision = base_metrics(self.tp, self.fp, self.tn, self.fn)[0]
self.assertEqual((self.tp) / (self.tp + self.fp), precision)
def test_recall(self):
recall = base_metrics(self.tp, self.fp, self.tn, self.fn)[1]
self.assertEqual((self.tp) / (self.tp + self.fn), recall)
def test_specificity(self):
specificity = base_metrics(self.tp, self.fp, self.tn, self.fn)[2]
self.assertEqual((self.tn) / (self.tn + self.fp), specificity)
def test_accuracy(self):
accuracy = base_metrics(self.tp, self.fp, self.tn, self.fn)[3]
self.assertEqual(
(self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn), accuracy
)
def test_jaccard(self):
jaccard = base_metrics(self.tp, self.fp, self.tn, self.fn)[4]
self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard)
def test_f1(self):
f1 = base_metrics(self.tp, self.fp, self.tn, self.fn)[5]
self.assertEqual((2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1)
if __name__ == "__main__":
unittest.main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import random
import unittest
import math
import nose.tools
from ..utils.measure import base_measures, auc
class Tester(unittest.TestCase):
"""
Unit test for base measures
"""
def setUp(self):
self.tp = random.randint(1, 100)
self.fp = random.randint(1, 100)
self.tn = random.randint(1, 100)
self.fn = random.randint(1, 100)
def test_precision(self):
precision = base_measures(self.tp, self.fp, self.tn, self.fn)[0]
self.assertEqual((self.tp) / (self.tp + self.fp), precision)
def test_recall(self):
recall = base_measures(self.tp, self.fp, self.tn, self.fn)[1]
self.assertEqual((self.tp) / (self.tp + self.fn), recall)
def test_specificity(self):
specificity = base_measures(self.tp, self.fp, self.tn, self.fn)[2]
self.assertEqual((self.tn) / (self.tn + self.fp), specificity)
def test_accuracy(self):
accuracy = base_measures(self.tp, self.fp, self.tn, self.fn)[3]
self.assertEqual(
(self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn),
accuracy,
)
def test_jaccard(self):
jaccard = base_measures(self.tp, self.fp, self.tn, self.fn)[4]
self.assertEqual(self.tp / (self.tp + self.fp + self.fn), jaccard)
def test_f1(self):
p, r, s, a, j, f1 = base_measures(self.tp, self.fp, self.tn, self.fn)
self.assertEqual(
(2.0 * self.tp) / (2.0 * self.tp + self.fp + self.fn), f1
)
self.assertAlmostEqual((2 * p * r) / (p + r), f1) # base definition
def test_auc():
# basic tests
assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0, 1.0]), 1.0)
assert math.isclose(
auc([0.0, 0.5, 1.0], [1.0, 0.5, 0.0]), 0.5, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0], [0.0, 0.0, 0.0]), 0.0, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0], [0.0, 1.0, 0.0]), 0.5, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0], [0.0, 0.5, 0.0]), 0.25, rel_tol=0.001
)
# reversing tht is also true
assert math.isclose(auc([0.0, 0.5, 1.0][::-1], [1.0, 1.0, 1.0][::-1]), 1.0)
assert math.isclose(
auc([0.0, 0.5, 1.0][::-1], [1.0, 0.5, 0.0][::-1]), 0.5, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0][::-1], [0.0, 0.0, 0.0][::-1]), 0.0, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0][::-1], [0.0, 1.0, 0.0][::-1]), 0.5, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001
)
assert math.isclose(
auc([0.0, 0.5, 1.0][::-1], [0.0, 0.5, 0.0][::-1]), 0.25, rel_tol=0.001
)
@nose.tools.raises(ValueError)
def test_auc_raises_value_error():
# x is **not** monotonically increasing or decreasing
assert math.isclose(auc([0.0, 0.5, 0.0], [1.0, 1.0, 1.0]), 1.0)
@nose.tools.raises(AssertionError)
def test_auc_raises_assertion_error():
# x is **not** the same size as y
assert math.isclose(auc([0.0, 0.5, 1.0], [1.0, 1.0]), 1.0)
......@@ -28,33 +28,62 @@ class SmoothedValue:
return d.mean().item()
def base_metrics(tp, fp, tn, fn):
def base_measures(tp, fp, tn, fn):
"""
Calculates Precision, Recall (=Sensitivity), Specificity, Accuracy, Jaccard and F1-score (Dice)
Calculates a bunch of measures from true/false positive and negative counts
This function can return standard machine learning measures from true and
false positive counts of positives and negatives.
For a thorough look into these and alternate names for the returned values,
please check Wikipedia's entry on `Precision and Recall`_.
Parameters
----------
tp : float
True positives
tp : int
True positive count, AKA "hit"
fp : float
False positives
fp : int
False positive count, AKA, "correct rejection"
tn : float
True negatives
tn : int
True negative count, AKA "false alarm", or "Type I error"
fn : float
False Negatives
fn : int
False Negative count, AKA "miss", or "Type II error"
Returns
-------
metrics : list
precision : float
P, AKA positive predictive value (PPV)
:math:`\frac{tp}{tp+fp}`
recall : float
R, AKA sensitivity, hit rate, or true positive rate (TPR)
:math:`\frac{tp}{p} = \frac{tp}{tp+fn}`
specificity : float
S, AKA selectivity or true negative rate (TNR).
:math:`\frac{tn}{n} = \frac{tn}{tn+fp}`
accuracy : float
A, :math:`\frac{tp + tn}{p + n} = \frac{tp + tn}{tp + fp + tn + fn}`
jaccard : float
J, :math:`\frac{tp}{tp+fp+fn}`, see `Jaccard Index`_
f1_score : float
F1, :math:`\frac{2 P R}{P + R} = \frac{2tp}{2tp + fp + fn}`, see
`F1-score`_
"""
tp = float(tp)
tn = float(tn)
precision = tp / (tp + fp + ((tp + fp) == 0))
recall = tp / (tp + fn + ((tp + fn) == 0))
specificity = tn / (fp + tn + ((fp + tn) == 0))
......@@ -87,7 +116,10 @@ def auc(x, y):
"""
assert len(x) == len(y)
x = numpy.array(x)
y = numpy.array(y)
assert len(x) == len(y), "x and y sequences must have the same length"
dx = numpy.diff(x)
if numpy.any(dx < 0):
......@@ -99,18 +131,11 @@ def auc(x, y):
raise ValueError("x is neither increasing nor decreasing "
": {}.".format(x))
# avoids repeated sums for every y
y_unique, y_unique_ndx = numpy.unique(y, return_index=True)
x_unique = x[y_unique_ndx]
if y_unique.shape[0] > 1:
x_interp = numpy.interp(
numpy.arange(0, 1, 0.001),
y_unique,
x_unique,
left=0.0,
right=0.0,
)
return x_interp.sum() * 0.001
return 0.0
y_interp = numpy.interp(
numpy.arange(0, 1, 0.001),
numpy.array(x),
numpy.array(y),
left=1.0,
right=0.0,
)
return y_interp.sum() * 0.001
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment