diff --git a/bob/ip/binseg/data/chasedb1/test.py b/bob/ip/binseg/data/chasedb1/test.py
index eab83e67843830f8b2c3adf0b45792dfc5c2bcb9..4841ea7ab35ca9f110c457276b5952ea34e553f2 100644
--- a/bob/ip/binseg/data/chasedb1/test.py
+++ b/bob/ip/binseg/data/chasedb1/test.py
@@ -47,9 +47,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (999, 960)
-    bw_threshold_label = 0.10  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label):
 
         data = s.data
         assert isinstance(data, dict)
@@ -79,14 +78,21 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.08) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.10) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.09) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.09) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.chasedb1.datadir')
diff --git a/bob/ip/binseg/data/drishtigs1/test.py b/bob/ip/binseg/data/drishtigs1/test.py
index 4873a97e3c8000523a9190411970c69e75589146..e7dbf65753a3861ceef1a4b733bd9b79c45025a9 100644
--- a/bob/ip/binseg/data/drishtigs1/test.py
+++ b/bob/ip/binseg/data/drishtigs1/test.py
@@ -82,32 +82,33 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
-    subset = dataset.subsets("optic-cup-all")
+        return w/b
+
     limit = None
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.03)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.04)
+    subset = dataset.subsets("optic-cup-all")
+    proportions = [_check_sample(s, 0.027) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.035) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-disc-all")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.05)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.06)
+    proportions = [_check_sample(s, 0.045) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.055) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-cup-any")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.04)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.05)
+    proportions = [_check_sample(s, 0.034) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.047) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-disc-any")
-    for s in subset["train"][:limit]:
-        _check_sample(s, 0.055)
-    for s in subset["test"][:limit]:
-        _check_sample(s, 0.06)
+    proportions = [_check_sample(s, 0.052) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.060) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set("bob.ip.binseg.drishtigs1.datadir")
diff --git a/bob/ip/binseg/data/drive/test.py b/bob/ip/binseg/data/drive/test.py
index 3fac542af22c5f43287e8c5ad82d9bd409bea907..4b0ec336a4f5980cf3b676302f043ad35c6da0e6 100644
--- a/bob/ip/binseg/data/drive/test.py
+++ b/bob/ip/binseg/data/drive/test.py
@@ -42,9 +42,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (565, 584)
-    bw_threshold_label = 0.14  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -62,7 +61,7 @@ def test_loading():
                 f"Counts of black + white ({b}+{w}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':label"
         assert (w/b) < bw_threshold_label, \
-                f"The proportion between black and white pixels " \
+                f"The proportion between black and white pixels in labels " \
                 f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \
                 f"of {bw_threshold_label} at '{s.key}':label - this could " \
                 f"indicate a loading problem!"
@@ -70,13 +69,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -87,14 +87,22 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.14, 2.14) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"]][:limit]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.drive.datadir')
diff --git a/bob/ip/binseg/data/hrf/test.py b/bob/ip/binseg/data/hrf/test.py
index 52207abc16c6336b03089302cbec3e0b8caff591..ac928ecddc6e42ccd9b67d3d08cb1800ab2ce961 100644
--- a/bob/ip/binseg/data/hrf/test.py
+++ b/bob/ip/binseg/data/hrf/test.py
@@ -34,9 +34,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (3504, 2336)
-    bw_threshold_label = 0.12  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -62,13 +61,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -79,11 +79,17 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.12, 5.42) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.12, 5.41) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.hrf.datadir')
diff --git a/bob/ip/binseg/data/iostar/test.py b/bob/ip/binseg/data/iostar/test.py
index 128b9b4cfeb49c17a2509d8f3d195d65e3d0597e..4655300592422193779518dfd2d7a9eae8aeddbf 100644
--- a/bob/ip/binseg/data/iostar/test.py
+++ b/bob/ip/binseg/data/iostar/test.py
@@ -48,7 +48,7 @@ def test_loading():
     from ..utils import count_bw
     image_size = (1024, 1024)
 
-    def _check_sample(s, bw_threshold_label):
+    def _check_sample(s, bw_threshold_label, bw_threshold_mask):
 
         data = s.data
         assert isinstance(data, dict)
@@ -74,13 +74,14 @@ def test_loading():
         assert "mask" in data
         nose.tools.eq_(data["mask"].size, image_size)
         nose.tools.eq_(data["mask"].mode, "1")
-        b, w = count_bw(data["mask"])
-        assert (b+w) == numpy.prod(image_size), \
-                f"Counts of black + white ({b}+{w}) do not add up to total " \
+        bm, wm = count_bw(data["mask"])
+        assert (bm+wm) == numpy.prod(image_size), \
+                f"Counts of black + white ({bm}+{wm}) do not add up to total " \
                 f"image size ({numpy.prod(image_size)}) at '{s.key}':mask"
-        assert w > b, \
-                f"The proportion between white and black pixels " \
-                f"({w} > {b}?) is not respected at '{s.key}':mask - " \
+        assert (wm/bm) > bw_threshold_mask, \
+                f"The proportion between black and white pixels in masks " \
+                f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \
+                f"threshold of {bw_threshold_mask} at '{s.key}':label - " \
                 f"this could indicate a loading problem!"
 
         # to visualize images, uncomment the folowing code
@@ -91,17 +92,25 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"], data["mask"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b, wm/bm
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("vessel")
-    bw_threshold_label = 0.11  #(vessels to background proportion limit)
-    for s in subset["train"]: _check_sample(s, bw_threshold_label)
-    for s in subset["test"]: _check_sample(s, bw_threshold_label)
+    proportions = [_check_sample(s, 0.11, 3.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.10, 3.27) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
     subset = dataset.subsets("optic-disc")
-    bw_threshold_label = 0.04  #(optic-disc to background proportion limit)
-    for s in subset["train"]: _check_sample(s, bw_threshold_label)
-    for s in subset["test"]: _check_sample(s, bw_threshold_label)
+    proportions = [_check_sample(s, 0.023, 3.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
+    proportions = [_check_sample(s, 0.033, 3.27) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(k[0] for k in proportions)}")
+    #print(f"min mask proportions = {min(k[1] for k in proportions)}")
 
 @rc_variable_set('bob.ip.binseg.iostar.datadir')
 def test_check():
diff --git a/bob/ip/binseg/data/refuge/test.py b/bob/ip/binseg/data/refuge/test.py
index 9c6a31e6df792722ed7a33864e41e60288f6475c..a69334c08e204a76e63077fc1eece5e06862d477 100644
--- a/bob/ip/binseg/data/refuge/test.py
+++ b/bob/ip/binseg/data/refuge/test.py
@@ -81,23 +81,25 @@ def test_loading():
         # display = overlayed_image(data["data"], data["label"])
         # display.show()
         # import ipdb; ipdb.set_trace()
-        # pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("optic-disc")
-    for s in subset["train"]:
-        _check_sample(s, (2124, 2056), True, 3, 0.03)
-    for s in subset["validation"]:
-        _check_sample(s, (1634, 1634), False, 2, 0.045)
-    for s in subset["test"]:
-        _check_sample(s, (1634, 1634), True, 3, 0.03)
+    proportions = [_check_sample(s, (2124, 2056), True, 3, 0.029) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), False, 2, 0.043) for s in subset["validation"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), True, 3, 0.026) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("optic-cup")
-    for s in subset["train"]:
-        _check_sample(s, (2124, 2056), True, 3, 0.018)
-    for s in subset["validation"]:
-        _check_sample(s, (1634, 1634), False, 2, 0.035)
-    for s in subset["test"]:
-        _check_sample(s, (1634, 1634), True, 3, 0.018)
+    proportions = [_check_sample(s, (2124, 2056), True, 3, 0.018) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), False, 2, 0.030) for s in subset["validation"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, (1634, 1634), True, 3, 0.017) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set("bob.ip.binseg.refuge.datadir")
diff --git a/bob/ip/binseg/data/stare/test.py b/bob/ip/binseg/data/stare/test.py
index 29d71b9627ea2061f826d3557d5bb8e010fe4044..05358720004eac6f8984f28beca6a8881d885c55 100644
--- a/bob/ip/binseg/data/stare/test.py
+++ b/bob/ip/binseg/data/stare/test.py
@@ -47,9 +47,8 @@ def test_loading():
 
     from ..utils import count_bw
     image_size = (700, 605)
-    bw_threshold_label = 0.19  #(vessels to background proportion limit)
 
-    def _check_sample(s):
+    def _check_sample(s, bw_threshold_label):
 
         data = s.data
         assert isinstance(data, dict)
@@ -79,14 +78,21 @@ def test_loading():
         #display = overlayed_image(data["data"], data["label"])
         #display.show()
         #import ipdb; ipdb.set_trace()
-        #pass
 
+        return w/b
+
+    limit = None  #use this to limit testing to first images only
     subset = dataset.subsets("default")
-    for s in subset["train"]: _check_sample(s)
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.10) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.12) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
     subset = dataset.subsets("second-annotation")
-    for s in subset["test"]: _check_sample(s)
+    proportions = [_check_sample(s, 0.19) for s in subset["train"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
+    proportions = [_check_sample(s, 0.18) for s in subset["test"][:limit]]
+    #print(f"max label proportions = {max(proportions)}")
 
 
 @rc_variable_set('bob.ip.binseg.stare.datadir')