From a52c03afee4995d4a81ce7a57bd2b78f100d509c Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Wed, 15 Apr 2020 09:59:50 +0200
Subject: [PATCH] [data] Streamline test to make it easy to assess thresholds
 and visualize test samples

---
 bob/ip/binseg/data/chasedb1/test.py   | 18 ++++++++-----
 bob/ip/binseg/data/drishtigs1/test.py | 37 ++++++++++++++-------------
 bob/ip/binseg/data/drive/test.py      | 34 ++++++++++++++----------
 bob/ip/binseg/data/hrf/test.py        | 28 ++++++++++++--------
 bob/ip/binseg/data/iostar/test.py     | 37 +++++++++++++++++----------
 bob/ip/binseg/data/refuge/test.py     | 28 ++++++++++----------
 bob/ip/binseg/data/stare/test.py      | 18 ++++++++-----
 7 files changed, 119 insertions(+), 81 deletions(-)

diff --git a/bob/ip/binseg/data/chasedb1/test.py b/bob/ip/binseg/data/chasedb1/test.py
index eab83e67..4841ea7a 100644
--- a/bob/ip/binseg/data/chasedb1/test.py
+++ b/bob/ip/binseg/data/chasedb1/test.py
@@ -47,9 +47,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (999, 960)
-    bw_threshold_label = 0.10  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label):
 
         data = s.data
         assert isinstance(data, dict)
@@ -79,14 +78,21 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.08) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.10) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.09) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.09) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.chasedb1.datadir')
diff --git a/bob/ip/binseg/data/drishtigs1/test.py b/bob/ip/binseg/data/drishtigs1/test.py
index 4873a97e..e7dbf657 100644
--- a/bob/ip/binseg/data/drishtigs1/test.py
+++ b/bob/ip/binseg/data/drishtigs1/test.py
@@ -82,32 +82,33 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
-    subset = dataset.subsets("optic-cup-all")
+        return w/b
+
     limit = None
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.03)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.04)
+    subset = dataset.subsets("optic-cup-all")
+    proportions = [_check_sample(s, 0.027) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.035) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-disc-all")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.05)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.06)
+    proportions = [_check_sample(s, 0.045) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.055) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-cup-any")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.04)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.05)
+    proportions = [_check_sample(s, 0.034) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.047) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-disc-any")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.055)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.06)
+    proportions = [_check_sample(s, 0.052) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.060) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set("bob.ip.binseg.drishtigs1.datadir")
diff --git a/bob/ip/binseg/data/drive/test.py b/bob/ip/binseg/data/drive/test.py
index 3fac542a..4b0ec336 100644
--- a/bob/ip/binseg/data/drive/test.py
+++ b/bob/ip/binseg/data/drive/test.py
@@ -42,9 +42,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (565, 584)
-    bw_threshold_label = 0.14  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -62,7 +61,7 @@ def test_loading():
                 f"Counts of black + white ({b}+{w}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':label"
         assert (w/b) < bw_threshold_label, \
-                f"The proportion between black and white pixels " \
+                f"The proportion between black and white pixels in labels " \
                 f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \
                 f"of {bw_threshold_label} at '{s.key}':label - this could " \
                 f"indicate a loading problem!"
@@ -70,13 +69,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -87,14 +87,22 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.14, 2.14) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"]][:limit]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.drive.datadir')
diff --git a/bob/ip/binseg/data/hrf/test.py b/bob/ip/binseg/data/hrf/test.py
index 52207abc..ac928ecd 100644
--- a/bob/ip/binseg/data/hrf/test.py
+++ b/bob/ip/binseg/data/hrf/test.py
@@ -34,9 +34,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (3504, 2336)
-    bw_threshold_label = 0.12  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -62,13 +61,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -79,11 +79,17 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.12, 5.42) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.12, 5.41) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.hrf.datadir')
diff --git a/bob/ip/binseg/data/iostar/test.py b/bob/ip/binseg/data/iostar/test.py
index 128b9b4c..46553005 100644
--- a/bob/ip/binseg/data/iostar/test.py
+++ b/bob/ip/binseg/data/iostar/test.py
@@ -48,7 +48,7 @@ def test_loading():
     from ..utils import count_bw
     image_size = (1024, 1024)
 
-    def _check_sample(s, bw_threshold_label):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -74,13 +74,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -91,17 +92,25 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("vessel")
-    bw_threshold_label = 0.11  #(vessels to background proportion limit)
-    for s in subset["train"]: _check_sample(s, bw_threshold_label)
-    for s in subset["test"]: _check_sample(s, bw_threshold_label)
+    proportions = [_check_sample(s, 0.11, 3.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.10, 3.27) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
     subset = dataset.subsets("optic-disc")
-    bw_threshold_label = 0.04  #(optic-disc to background proportion limit)
-    for s in subset["train"]: _check_sample(s, bw_threshold_label)
-    for s in subset["test"]: _check_sample(s, bw_threshold_label)
+    proportions = [_check_sample(s, 0.023, 3.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.033, 3.27) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 @rc_variable_set('bob.ip.binseg.iostar.datadir')
 def test_check():
diff --git a/bob/ip/binseg/data/refuge/test.py b/bob/ip/binseg/data/refuge/test.py
index 9c6a31e6..a69334c0 100644
--- a/bob/ip/binseg/data/refuge/test.py
+++ b/bob/ip/binseg/data/refuge/test.py
@@ -81,23 +81,25 @@ def test_loading():
         # display = overlayed_image(data["data"], data["label"])
         # display.show()
         # import ipdb; ipdb.set_trace()
-        # pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("optic-disc")
-    for s in subset["train"]:
-        _check_sample(s, (2124, 2056), True, 3, 0.03)
-    for s in subset["validation"]:
-        _check_sample(s, (1634, 1634), False, 2, 0.045)
-    for s in subset["test"]:
-        _check_sample(s, (1634, 1634), True, 3, 0.03)
+    proportions = [_check_sample(s, (2124, 2056), True, 3, 0.029) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), False, 2, 0.043) for s in subset["validation"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), True, 3, 0.026) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-cup")
-    for s in subset["train"]:
-        _check_sample(s, (2124, 2056), True, 3, 0.018)
-    for s in subset["validation"]:
-        _check_sample(s, (1634, 1634), False, 2, 0.035)
-    for s in subset["test"]:
-        _check_sample(s, (1634, 1634), True, 3, 0.018)
+    proportions = [_check_sample(s, (2124, 2056), True, 3, 0.018) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), False, 2, 0.030) for s in subset["validation"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), True, 3, 0.017) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set("bob.ip.binseg.refuge.datadir")
diff --git a/bob/ip/binseg/data/stare/test.py b/bob/ip/binseg/data/stare/test.py
index 29d71b96..05358720 100644
--- a/bob/ip/binseg/data/stare/test.py
+++ b/bob/ip/binseg/data/stare/test.py
@@ -47,9 +47,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (700, 605)
-    bw_threshold_label = 0.19  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label):
 
         data = s.data
         assert isinstance(data, dict)
@@ -79,14 +78,21 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.10) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.12) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.18) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.stare.datadir')
-- 
GitLab