From a52c03afee4995d4a81ce7a57bd2b78f100d509c Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Wed, 15 Apr 2020 09:59:50 +0200 Subject: [PATCH] [data] Streamline test to make it easy to assess thresholds and visualize test samples --- bob/ip/binseg/data/chasedb1/test.py | 18 ++++++++----- bob/ip/binseg/data/drishtigs1/test.py | 37 ++++++++++++++------------- bob/ip/binseg/data/drive/test.py | 34 ++++++++++++++---------- bob/ip/binseg/data/hrf/test.py | 28 ++++++++++++-------- bob/ip/binseg/data/iostar/test.py | 37 +++++++++++++++++---------- bob/ip/binseg/data/refuge/test.py | 28 ++++++++++---------- bob/ip/binseg/data/stare/test.py | 18 ++++++++----- 7 files changed, 119 insertions(+), 81 deletions(-) diff --git a/bob/ip/binseg/data/chasedb1/test.py b/bob/ip/binseg/data/chasedb1/test.py index eab83e67..4841ea7a 100644 --- a/bob/ip/binseg/data/chasedb1/test.py +++ b/bob/ip/binseg/data/chasedb1/test.py @@ -47,9 +47,8 @@ def test_loading(): from ..utils import count_bw image_size = (999, 960) - bw_threshold_label = 0.10 #(vessels to background proportion limit) - def _check_sample(s): + def _check_sample(s, bw_threshold_label): data = s.data assert isinstance(data, dict) @@ -79,14 +78,21 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"]) #display.show() #import ipdb; ipdb.set_trace() - #pass + return w/b + + limit = None #use this to limit testing to first images only subset = dataset.subsets("default") - for s in subset["train"]: _check_sample(s) - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.08) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.10) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("second-annotation") - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.09) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.09) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") @rc_variable_set('bob.ip.binseg.chasedb1.datadir') diff --git a/bob/ip/binseg/data/drishtigs1/test.py b/bob/ip/binseg/data/drishtigs1/test.py index 4873a97e..e7dbf657 100644 --- a/bob/ip/binseg/data/drishtigs1/test.py +++ b/bob/ip/binseg/data/drishtigs1/test.py @@ -82,32 +82,33 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"]) #display.show() #import ipdb; ipdb.set_trace() - #pass - subset = dataset.subsets("optic-cup-all") + return w/b + limit = None - for s in subset["train"][:limit]: - _check_sample(s, 0.03) - for s in subset["test"][:limit]: - _check_sample(s, 0.04) + subset = dataset.subsets("optic-cup-all") + proportions = [_check_sample(s, 0.027) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.035) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("optic-disc-all") - for s in subset["train"][:limit]: - _check_sample(s, 0.05) - for s in subset["test"][:limit]: - _check_sample(s, 0.06) + proportions = [_check_sample(s, 0.045) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.055) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("optic-cup-any") - for s in subset["train"][:limit]: - _check_sample(s, 0.04) - for s in subset["test"][:limit]: - _check_sample(s, 0.05) + proportions = [_check_sample(s, 0.034) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.047) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("optic-disc-any") - for s in subset["train"][:limit]: - _check_sample(s, 0.055) - for s in subset["test"][:limit]: - _check_sample(s, 0.06) + proportions = [_check_sample(s, 0.052) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.060) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") @rc_variable_set("bob.ip.binseg.drishtigs1.datadir") diff --git a/bob/ip/binseg/data/drive/test.py b/bob/ip/binseg/data/drive/test.py index 3fac542a..4b0ec336 100644 --- a/bob/ip/binseg/data/drive/test.py +++ b/bob/ip/binseg/data/drive/test.py @@ -42,9 +42,8 @@ def test_loading(): from ..utils import count_bw image_size = (565, 584) - bw_threshold_label = 0.14 #(vessels to background proportion limit) - def _check_sample(s): + def _check_sample(s, bw_threshold_label, bw_threshold_mask): data = s.data assert isinstance(data, dict) @@ -62,7 +61,7 @@ def test_loading(): f"Counts of black + white ({b}+{w}) do not add up to total " \ f"image size ({numpy.prod(image_size)}) at '{s.key}':label" assert (w/b) < bw_threshold_label, \ - f"The proportion between black and white pixels " \ + f"The proportion between black and white pixels in labels " \ f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ f"of {bw_threshold_label} at '{s.key}':label - this could " \ f"indicate a loading problem!" @@ -70,13 +69,14 @@ def test_loading(): assert "mask" in data nose.tools.eq_(data["mask"].size, image_size) nose.tools.eq_(data["mask"].mode, "1") - b, w = count_bw(data["mask"]) - assert (b+w) == numpy.prod(image_size), \ - f"Counts of black + white ({b}+{w}) do not add up to total " \ + bm, wm = count_bw(data["mask"]) + assert (bm+wm) == numpy.prod(image_size), \ + f"Counts of black + white ({bm}+{wm}) do not add up to total " \ f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" - assert w > b, \ - f"The proportion between white and black pixels " \ - f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + assert (wm/bm) > bw_threshold_mask, \ + f"The proportion between black and white pixels in masks " \ + f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \ + f"threshold of {bw_threshold_mask} at '{s.key}':label - " \ f"this could indicate a loading problem!" # to visualize images, uncomment the folowing code @@ -87,14 +87,22 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"], data["mask"]) #display.show() #import ipdb; ipdb.set_trace() - #pass + return w/b, wm/bm + + limit = None #use this to limit testing to first images only subset = dataset.subsets("default") - for s in subset["train"]: _check_sample(s) - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.14, 2.14) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") + proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"]][:limit] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") subset = dataset.subsets("second-annotation") - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.12, 2.12) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") @rc_variable_set('bob.ip.binseg.drive.datadir') diff --git a/bob/ip/binseg/data/hrf/test.py b/bob/ip/binseg/data/hrf/test.py index 52207abc..ac928ecd 100644 --- a/bob/ip/binseg/data/hrf/test.py +++ b/bob/ip/binseg/data/hrf/test.py @@ -34,9 +34,8 @@ def test_loading(): from ..utils import count_bw image_size = (3504, 2336) - bw_threshold_label = 0.12 #(vessels to background proportion limit) - def _check_sample(s): + def _check_sample(s, bw_threshold_label, bw_threshold_mask): data = s.data assert isinstance(data, dict) @@ -62,13 +61,14 @@ def test_loading(): assert "mask" in data nose.tools.eq_(data["mask"].size, image_size) nose.tools.eq_(data["mask"].mode, "1") - b, w = count_bw(data["mask"]) - assert (b+w) == numpy.prod(image_size), \ - f"Counts of black + white ({b}+{w}) do not add up to total " \ + bm, wm = count_bw(data["mask"]) + assert (bm+wm) == numpy.prod(image_size), \ + f"Counts of black + white ({bm}+{wm}) do not add up to total " \ f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" - assert w > b, \ - f"The proportion between white and black pixels " \ - f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + assert (wm/bm) > bw_threshold_mask, \ + f"The proportion between black and white pixels in masks " \ + f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \ + f"threshold of {bw_threshold_mask} at '{s.key}':label - " \ f"this could indicate a loading problem!" # to visualize images, uncomment the folowing code @@ -79,11 +79,17 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"], data["mask"]) #display.show() #import ipdb; ipdb.set_trace() - #pass + return w/b, wm/bm + + limit = None #use this to limit testing to first images only subset = dataset.subsets("default") - for s in subset["train"]: _check_sample(s) - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.12, 5.42) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") + proportions = [_check_sample(s, 0.12, 5.41) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") @rc_variable_set('bob.ip.binseg.hrf.datadir') diff --git a/bob/ip/binseg/data/iostar/test.py b/bob/ip/binseg/data/iostar/test.py index 128b9b4c..46553005 100644 --- a/bob/ip/binseg/data/iostar/test.py +++ b/bob/ip/binseg/data/iostar/test.py @@ -48,7 +48,7 @@ def test_loading(): from ..utils import count_bw image_size = (1024, 1024) - def _check_sample(s, bw_threshold_label): + def _check_sample(s, bw_threshold_label, bw_threshold_mask): data = s.data assert isinstance(data, dict) @@ -74,13 +74,14 @@ def test_loading(): assert "mask" in data nose.tools.eq_(data["mask"].size, image_size) nose.tools.eq_(data["mask"].mode, "1") - b, w = count_bw(data["mask"]) - assert (b+w) == numpy.prod(image_size), \ - f"Counts of black + white ({b}+{w}) do not add up to total " \ + bm, wm = count_bw(data["mask"]) + assert (bm+wm) == numpy.prod(image_size), \ + f"Counts of black + white ({bm}+{wm}) do not add up to total " \ f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" - assert w > b, \ - f"The proportion between white and black pixels " \ - f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + assert (wm/bm) > bw_threshold_mask, \ + f"The proportion between black and white pixels in masks " \ + f"({wm}/{bm}={wm/bm:.2f}) is smaller than the allowed " \ + f"threshold of {bw_threshold_mask} at '{s.key}':label - " \ f"this could indicate a loading problem!" # to visualize images, uncomment the folowing code @@ -91,17 +92,25 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"], data["mask"]) #display.show() #import ipdb; ipdb.set_trace() - #pass + return w/b, wm/bm + + limit = None #use this to limit testing to first images only subset = dataset.subsets("vessel") - bw_threshold_label = 0.11 #(vessels to background proportion limit) - for s in subset["train"]: _check_sample(s, bw_threshold_label) - for s in subset["test"]: _check_sample(s, bw_threshold_label) + proportions = [_check_sample(s, 0.11, 3.19) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") + proportions = [_check_sample(s, 0.10, 3.27) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") subset = dataset.subsets("optic-disc") - bw_threshold_label = 0.04 #(optic-disc to background proportion limit) - for s in subset["train"]: _check_sample(s, bw_threshold_label) - for s in subset["test"]: _check_sample(s, bw_threshold_label) + proportions = [_check_sample(s, 0.023, 3.19) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") + proportions = [_check_sample(s, 0.033, 3.27) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(k[0] for k in proportions)}") + #print(f"min mask proportions = {min(k[1] for k in proportions)}") @rc_variable_set('bob.ip.binseg.iostar.datadir') def test_check(): diff --git a/bob/ip/binseg/data/refuge/test.py b/bob/ip/binseg/data/refuge/test.py index 9c6a31e6..a69334c0 100644 --- a/bob/ip/binseg/data/refuge/test.py +++ b/bob/ip/binseg/data/refuge/test.py @@ -81,23 +81,25 @@ def test_loading(): # display = overlayed_image(data["data"], data["label"]) # display.show() # import ipdb; ipdb.set_trace() - # pass + return w/b + + limit = None #use this to limit testing to first images only subset = dataset.subsets("optic-disc") - for s in subset["train"]: - _check_sample(s, (2124, 2056), True, 3, 0.03) - for s in subset["validation"]: - _check_sample(s, (1634, 1634), False, 2, 0.045) - for s in subset["test"]: - _check_sample(s, (1634, 1634), True, 3, 0.03) + proportions = [_check_sample(s, (2124, 2056), True, 3, 0.029) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, (1634, 1634), False, 2, 0.043) for s in subset["validation"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, (1634, 1634), True, 3, 0.026) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("optic-cup") - for s in subset["train"]: - _check_sample(s, (2124, 2056), True, 3, 0.018) - for s in subset["validation"]: - _check_sample(s, (1634, 1634), False, 2, 0.035) - for s in subset["test"]: - _check_sample(s, (1634, 1634), True, 3, 0.018) + proportions = [_check_sample(s, (2124, 2056), True, 3, 0.018) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, (1634, 1634), False, 2, 0.030) for s in subset["validation"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, (1634, 1634), True, 3, 0.017) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") @rc_variable_set("bob.ip.binseg.refuge.datadir") diff --git a/bob/ip/binseg/data/stare/test.py b/bob/ip/binseg/data/stare/test.py index 29d71b96..05358720 100644 --- a/bob/ip/binseg/data/stare/test.py +++ b/bob/ip/binseg/data/stare/test.py @@ -47,9 +47,8 @@ def test_loading(): from ..utils import count_bw image_size = (700, 605) - bw_threshold_label = 0.19 #(vessels to background proportion limit) - def _check_sample(s): + def _check_sample(s, bw_threshold_label): data = s.data assert isinstance(data, dict) @@ -79,14 +78,21 @@ def test_loading(): #display = overlayed_image(data["data"], data["label"]) #display.show() #import ipdb; ipdb.set_trace() - #pass + return w/b + + limit = None #use this to limit testing to first images only subset = dataset.subsets("default") - for s in subset["train"]: _check_sample(s) - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.10) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.12) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") subset = dataset.subsets("second-annotation") - for s in subset["test"]: _check_sample(s) + proportions = [_check_sample(s, 0.19) for s in subset["train"][:limit]] + #print(f"max label proportions = {max(proportions)}") + proportions = [_check_sample(s, 0.18) for s in subset["test"][:limit]] + #print(f"max label proportions = {max(proportions)}") @rc_variable_set('bob.ip.binseg.stare.datadir') -- GitLab