From 1a2b5356877cabf521c53500bd9ccf2353dca6f1 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Mon, 13 Apr 2020 13:06:03 +0200 Subject: [PATCH] [data] Test correctness of images/label/masks returned by implemented datasets; Allow special context-dependent procedures for loading --- bob/ip/binseg/data/chasedb1/__init__.py | 7 +- bob/ip/binseg/data/chasedb1/test.py | 31 +++- bob/ip/binseg/data/drive/__init__.py | 9 +- bob/ip/binseg/data/drive/test.py | 43 ++++- bob/ip/binseg/data/hrf/__init__.py | 9 +- bob/ip/binseg/data/hrf/test.py | 43 ++++- bob/ip/binseg/data/iostar/__init__.py | 22 ++- bob/ip/binseg/data/iostar/default.json | 231 ------------------------ bob/ip/binseg/data/iostar/test.py | 102 ++++++++++- bob/ip/binseg/data/jsondataset.py | 25 ++- bob/ip/binseg/data/stare/__init__.py | 7 +- bob/ip/binseg/data/stare/test.py | 31 +++- bob/ip/binseg/data/utils.py | 114 ++++++++++++ 13 files changed, 401 insertions(+), 273 deletions(-) delete mode 100644 bob/ip/binseg/data/iostar/default.json diff --git a/bob/ip/binseg/data/chasedb1/__init__.py b/bob/ip/binseg/data/chasedb1/__init__.py index 3ab9e993..22286f79 100644 --- a/bob/ip/binseg/data/chasedb1/__init__.py +++ b/bob/ip/binseg/data/chasedb1/__init__.py @@ -47,10 +47,11 @@ _protocols = [ _root_path = bob.extension.rc.get('bob.ip.binseg.chasedb1.datadir', os.path.realpath(os.curdir)) -def _loader(s): +def _loader(context, sample): + #"context" is ignore in this case - database is homogeneous return dict( - data=load_pil_rgb(s["data"]), - label=load_pil_1(s["label"]), + data=load_pil_rgb(sample["data"]), + label=load_pil_1(sample["label"]), ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) diff --git a/bob/ip/binseg/data/chasedb1/test.py b/bob/ip/binseg/data/chasedb1/test.py index 90f196dd..a1a4af73 100644 --- a/bob/ip/binseg/data/chasedb1/test.py +++ b/bob/ip/binseg/data/chasedb1/test.py @@ -5,6 +5,8 @@ """Tests for CHASE-DB1""" import os + +import numpy import nose.tools from . import dataset @@ -43,16 +45,41 @@ def test_protocol_consitency(): @rc_variable_set('bob.ip.binseg.chasedb1.datadir') def test_loading(): + from ..utils import count_bw + image_size = (999, 960) + bw_threshold_label = 0.10 #(vessels to background proportion limit) + def _check_sample(s): + data = s.data assert isinstance(data, dict) nose.tools.eq_(len(data), 2) + assert "data" in data - nose.tools.eq_(data["data"].size, (999, 960)) + nose.tools.eq_(data["data"].size, image_size) nose.tools.eq_(data["data"].mode, "RGB") + assert "label" in data - nose.tools.eq_(data["label"].size, (999, 960)) + nose.tools.eq_(data["label"].size, image_size) nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass subset = dataset.subsets("default") for s in subset["train"]: _check_sample(s) diff --git a/bob/ip/binseg/data/drive/__init__.py b/bob/ip/binseg/data/drive/__init__.py index f5531603..5298e661 100644 --- a/bob/ip/binseg/data/drive/__init__.py +++ b/bob/ip/binseg/data/drive/__init__.py @@ -36,11 +36,12 @@ _protocols = [ _root_path = bob.extension.rc.get('bob.ip.binseg.drive.datadir', os.path.realpath(os.curdir)) -def _loader(s): +def _loader(context, sample): + #"context" is ignore in this case - database is homogeneous return dict( - data=load_pil_rgb(s["data"]), - label=load_pil_1(s["label"]), - mask=load_pil_1(s["mask"]), + data=load_pil_rgb(sample["data"]), + label=load_pil_1(sample["label"]), + mask=load_pil_1(sample["mask"]), ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) diff --git a/bob/ip/binseg/data/drive/test.py b/bob/ip/binseg/data/drive/test.py index b7b291ff..f66027ef 100644 --- a/bob/ip/binseg/data/drive/test.py +++ b/bob/ip/binseg/data/drive/test.py @@ -5,6 +5,8 @@ """Tests for DRIVE""" import os + +import numpy import nose.tools from . import dataset @@ -38,19 +40,54 @@ def test_protocol_consitency(): @rc_variable_set('bob.ip.binseg.drive.datadir') def test_loading(): + from ..utils import count_bw + image_size = (565, 584) + bw_threshold_label = 0.14 #(vessels to background proportion limit) + def _check_sample(s): + data = s.data assert isinstance(data, dict) nose.tools.eq_(len(data), 3) + assert "data" in data - nose.tools.eq_(data["data"].size, (565, 584)) + nose.tools.eq_(data["data"].size, image_size) nose.tools.eq_(data["data"].mode, "RGB") + assert "label" in data - nose.tools.eq_(data["label"].size, (565, 584)) + nose.tools.eq_(data["label"].size, image_size) nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + assert "mask" in data - nose.tools.eq_(data["mask"].size, (565, 584)) + nose.tools.eq_(data["mask"].size, image_size) nose.tools.eq_(data["mask"].mode, "1") + b, w = count_bw(data["mask"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" + assert w > b, \ + f"The proportion between white and black pixels " \ + f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + f"this could indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels and blue area indicating the + # parts to be masked out. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"], data["mask"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass subset = dataset.subsets("default") for s in subset["train"]: _check_sample(s) diff --git a/bob/ip/binseg/data/hrf/__init__.py b/bob/ip/binseg/data/hrf/__init__.py index b3d82263..8f0a387b 100644 --- a/bob/ip/binseg/data/hrf/__init__.py +++ b/bob/ip/binseg/data/hrf/__init__.py @@ -34,11 +34,12 @@ _protocols = [ _root_path = bob.extension.rc.get('bob.ip.binseg.hrf.datadir', os.path.realpath(os.curdir)) -def _loader(s): +def _loader(context, sample): + #"context" is ignore in this case - database is homogeneous return dict( - data=load_pil_rgb(s["data"]), - label=load_pil_1(s["label"]), - mask=load_pil_1(s["mask"]), + data=load_pil_rgb(sample["data"]), + label=load_pil_1(sample["label"]), + mask=load_pil_1(sample["mask"]), ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) diff --git a/bob/ip/binseg/data/hrf/test.py b/bob/ip/binseg/data/hrf/test.py index 9a463776..c6e44ccd 100644 --- a/bob/ip/binseg/data/hrf/test.py +++ b/bob/ip/binseg/data/hrf/test.py @@ -5,6 +5,8 @@ """Tests for HRF""" import os + +import numpy import nose.tools from . import dataset @@ -30,19 +32,54 @@ def test_protocol_consitency(): @rc_variable_set('bob.ip.binseg.hrf.datadir') def test_loading(): + from ..utils import count_bw + image_size = (3504, 2336) + bw_threshold_label = 0.12 #(vessels to background proportion limit) + def _check_sample(s): + data = s.data assert isinstance(data, dict) nose.tools.eq_(len(data), 3) + assert "data" in data - nose.tools.eq_(data["data"].size, (3504, 2336)) + nose.tools.eq_(data["data"].size, image_size) nose.tools.eq_(data["data"].mode, "RGB") + assert "label" in data - nose.tools.eq_(data["label"].size, (3504, 2336)) + nose.tools.eq_(data["label"].size, image_size) nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + assert "mask" in data - nose.tools.eq_(data["mask"].size, (3504, 2336)) + nose.tools.eq_(data["mask"].size, image_size) nose.tools.eq_(data["mask"].mode, "1") + b, w = count_bw(data["mask"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" + assert w > b, \ + f"The proportion between white and black pixels " \ + f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + f"this could indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels and blue area indicating the + # parts to be masked out. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"], data["mask"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass subset = dataset.subsets("default") for s in subset["train"]: _check_sample(s) diff --git a/bob/ip/binseg/data/iostar/__init__.py b/bob/ip/binseg/data/iostar/__init__.py index b8e23eb0..1654cbfd 100644 --- a/bob/ip/binseg/data/iostar/__init__.py +++ b/bob/ip/binseg/data/iostar/__init__.py @@ -29,6 +29,7 @@ import bob.extension from ..jsondataset import JSONDataset from ..loader import load_pil_rgb, load_pil_1 +from ..utils import invert_mode1_image, subtract_mode1_images _protocols = [ pkg_resources.resource_filename(__name__, "vessel.json"), @@ -38,12 +39,23 @@ _protocols = [ _root_path = bob.extension.rc.get('bob.ip.binseg.iostar.datadir', os.path.realpath(os.curdir)) -def _loader(s): - return dict( - data=load_pil_rgb(s["data"]), - label=load_pil_1(s["label"]), - mask=load_pil_1(s["mask"]), +def _loader(context, sample): + retval = dict( + data=load_pil_rgb(sample["data"]), + label=load_pil_1(sample["label"]), + mask=load_pil_1(sample["mask"]), ) + if context["protocol"] == "optic-disc": + # For optic-disc analysis, the label provided by IOSTAR raw data is the + # "inverted" (negative) label, and does not consider the mask region, + # which must be subtracted. We do this special manipulation here. + retval["label"] = subtract_mode1_images( + invert_mode1_image(retval["label"]), + invert_mode1_image(retval["mask"])) + return retval + elif context["protocol"] == "vessel": + return retval + raise RuntimeError(f"Unknown protocol {context['protocol']}") dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) """IOSTAR dataset object""" diff --git a/bob/ip/binseg/data/iostar/default.json b/bob/ip/binseg/data/iostar/default.json deleted file mode 100644 index 87f29ad5..00000000 --- a/bob/ip/binseg/data/iostar/default.json +++ /dev/null @@ -1,231 +0,0 @@ -{ - "train": [ - [ - "images/01_dr.JPG", - "manual1/01_dr.tif", - "mask/01_dr_mask.tif" - ], - [ - "images/02_dr.JPG", - "manual1/02_dr.tif", - "mask/02_dr_mask.tif" - ], - [ - "images/03_dr.JPG", - "manual1/03_dr.tif", - "mask/03_dr_mask.tif" - ], - [ - "images/04_dr.JPG", - "manual1/04_dr.tif", - "mask/04_dr_mask.tif" - ], - [ - "images/05_dr.JPG", - "manual1/05_dr.tif", - "mask/05_dr_mask.tif" - ], - [ - "images/01_g.jpg", - "manual1/01_g.tif", - "mask/01_g_mask.tif" - ], - [ - "images/02_g.jpg", - "manual1/02_g.tif", - "mask/02_g_mask.tif" - ], - [ - "images/03_g.jpg", - "manual1/03_g.tif", - "mask/03_g_mask.tif" - ], - [ - "images/04_g.jpg", - "manual1/04_g.tif", - "mask/04_g_mask.tif" - ], - [ - "images/05_g.jpg", - "manual1/05_g.tif", - "mask/05_g_mask.tif" - ], - [ - "images/01_h.jpg", - "manual1/01_h.tif", - "mask/01_h_mask.tif" - ], - [ - "images/02_h.jpg", - "manual1/02_h.tif", - "mask/02_h_mask.tif" - ], - [ - "images/03_h.jpg", - "manual1/03_h.tif", - "mask/03_h_mask.tif" - ], - [ - "images/04_h.jpg", - "manual1/04_h.tif", - "mask/04_h_mask.tif" - ], - [ - "images/05_h.jpg", - "manual1/05_h.tif", - "mask/05_h_mask.tif" - ] - ], - "test": [ - [ - "images/06_dr.JPG", - "manual1/06_dr.tif", - "mask/06_dr_mask.tif" - ], - [ - "images/07_dr.JPG", - "manual1/07_dr.tif", - "mask/07_dr_mask.tif" - ], - [ - "images/08_dr.JPG", - "manual1/08_dr.tif", - "mask/08_dr_mask.tif" - ], - [ - "images/09_dr.JPG", - "manual1/09_dr.tif", - "mask/09_dr_mask.tif" - ], - [ - "images/10_dr.JPG", - "manual1/10_dr.tif", - "mask/10_dr_mask.tif" - ], - [ - "images/11_dr.JPG", - "manual1/11_dr.tif", - "mask/11_dr_mask.tif" - ], - [ - "images/12_dr.JPG", - "manual1/12_dr.tif", - "mask/12_dr_mask.tif" - ], - [ - "images/13_dr.JPG", - "manual1/13_dr.tif", - "mask/13_dr_mask.tif" - ], - [ - "images/14_dr.JPG", - "manual1/14_dr.tif", - "mask/14_dr_mask.tif" - ], - [ - "images/15_dr.JPG", - "manual1/15_dr.tif", - "mask/15_dr_mask.tif" - ], - [ - "images/06_g.jpg", - "manual1/06_g.tif", - "mask/06_g_mask.tif" - ], - [ - "images/07_g.jpg", - "manual1/07_g.tif", - "mask/07_g_mask.tif" - ], - [ - "images/08_g.jpg", - "manual1/08_g.tif", - "mask/08_g_mask.tif" - ], - [ - "images/09_g.jpg", - "manual1/09_g.tif", - "mask/09_g_mask.tif" - ], - [ - "images/10_g.jpg", - "manual1/10_g.tif", - "mask/10_g_mask.tif" - ], - [ - "images/11_g.jpg", - "manual1/11_g.tif", - "mask/11_g_mask.tif" - ], - [ - "images/12_g.jpg", - "manual1/12_g.tif", - "mask/12_g_mask.tif" - ], - [ - "images/13_g.jpg", - "manual1/13_g.tif", - "mask/13_g_mask.tif" - ], - [ - "images/14_g.jpg", - "manual1/14_g.tif", - "mask/14_g_mask.tif" - ], - [ - "images/15_g.jpg", - "manual1/15_g.tif", - "mask/15_g_mask.tif" - ], - [ - "images/06_h.jpg", - "manual1/06_h.tif", - "mask/06_h_mask.tif" - ], - [ - "images/07_h.jpg", - "manual1/07_h.tif", - "mask/07_h_mask.tif" - ], - [ - "images/08_h.jpg", - "manual1/08_h.tif", - "mask/08_h_mask.tif" - ], - [ - "images/09_h.jpg", - "manual1/09_h.tif", - "mask/09_h_mask.tif" - ], - [ - "images/10_h.jpg", - "manual1/10_h.tif", - "mask/10_h_mask.tif" - ], - [ - "images/11_h.jpg", - "manual1/11_h.tif", - "mask/11_h_mask.tif" - ], - [ - "images/12_h.jpg", - "manual1/12_h.tif", - "mask/12_h_mask.tif" - ], - [ - "images/13_h.jpg", - "manual1/13_h.tif", - "mask/13_h_mask.tif" - ], - [ - "images/14_h.jpg", - "manual1/14_h.tif", - "mask/14_h_mask.tif" - ], - [ - "images/15_h.jpg", - "manual1/15_h.tif", - "mask/15_h_mask.tif" - ] - ] -} \ No newline at end of file diff --git a/bob/ip/binseg/data/iostar/test.py b/bob/ip/binseg/data/iostar/test.py index ecedb60c..865d2eb7 100644 --- a/bob/ip/binseg/data/iostar/test.py +++ b/bob/ip/binseg/data/iostar/test.py @@ -5,6 +5,8 @@ """Tests for IOSTAR""" import os + +import numpy import nose.tools from . import dataset @@ -41,26 +43,114 @@ def test_protocol_consitency(): @rc_variable_set('bob.ip.binseg.iostar.datadir') -def test_loading(): +def test_loading_vessel(): + + from ..utils import count_bw + image_size = (1024, 1024) + bw_threshold_label = 0.11 #(vessels to background proportion limit) def _check_sample(s): + data = s.data assert isinstance(data, dict) nose.tools.eq_(len(data), 3) + assert "data" in data - nose.tools.eq_(data["data"].size, (1024, 1024)) + nose.tools.eq_(data["data"].size, image_size) nose.tools.eq_(data["data"].mode, "RGB") + assert "label" in data - nose.tools.eq_(data["label"].size, (1024, 1024)) - nose.tools.eq_(data["label"].mode, "1") - assert "label" in data - nose.tools.eq_(data["label"].size, (1024, 1024)) + nose.tools.eq_(data["label"].size, image_size) nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + + assert "mask" in data + nose.tools.eq_(data["mask"].size, image_size) + nose.tools.eq_(data["mask"].mode, "1") + b, w = count_bw(data["mask"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" + assert w > b, \ + f"The proportion between white and black pixels " \ + f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + f"this could indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels and blue area indicating the + # parts to be masked out. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"], data["mask"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass subset = dataset.subsets("vessel") for s in subset["train"]: _check_sample(s) for s in subset["test"]: _check_sample(s) + +@rc_variable_set('bob.ip.binseg.iostar.datadir') +def test_loading_optic_disc(): + + from ..utils import count_bw + image_size = (1024, 1024) + bw_threshold_label = 0.04 #(optic-disc to background proportion limit) + + def _check_sample(s): + + data = s.data + assert isinstance(data, dict) + nose.tools.eq_(len(data), 3) + + assert "data" in data + nose.tools.eq_(data["data"].size, image_size) + nose.tools.eq_(data["data"].mode, "RGB") + + assert "label" in data + nose.tools.eq_(data["label"].size, image_size) + nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + + assert "mask" in data + nose.tools.eq_(data["mask"].size, image_size) + nose.tools.eq_(data["mask"].mode, "1") + b, w = count_bw(data["mask"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':mask" + assert w > b, \ + f"The proportion between white and black pixels " \ + f"({w} > {b}?) is not respected at '{s.key}':mask - " \ + f"this could indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels and blue area indicating the + # parts to be masked out. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"], data["mask"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass + subset = dataset.subsets("optic-disc") for s in subset["train"]: _check_sample(s) for s in subset["test"]: _check_sample(s) diff --git a/bob/ip/binseg/data/jsondataset.py b/bob/ip/binseg/data/jsondataset.py index 6ce86a20..01121ec1 100644 --- a/bob/ip/binseg/data/jsondataset.py +++ b/bob/ip/binseg/data/jsondataset.py @@ -14,7 +14,7 @@ from .sample import DelayedSample class JSONDataset: """ - Generic multi-protocol filelist dataset + Generic multi-protocol filelist dataset that yields samples To create a new dataset, you need to provide one or more JSON formatted filelists (one per protocol) with the following contents: @@ -66,11 +66,18 @@ class JSONDataset: Where: - * ``data``: absolute or relative path leading to original image + * ``data``: absolute or relative path leading to original image, in RGB + format * ``label``: (optional) absolute or relative path with manual segmentation - information + information. This image will be converted to a binary image. This + dataset shall always yield label images in which white pixels (value=1) + indicate the **presence** of the object, and black pixels (value=0), its + absence. * ``mask``: (optional) absolute or relative path with a mask that indicates - valid regions in the image where automatic segmentation should occur + valid regions in the image where automatic segmentation should occur. + This image will be converted to a binary image. This dataset shall + always yield mask images in which white pixels (value=1) indicate the + **valid** regions of the mask, and black pixels (value=0), invalid parts. Relative paths are interpreted with respect to the location where the JSON file is or to an optional ``root_path`` parameter, that can be provided. @@ -97,8 +104,11 @@ class JSONDataset: relative paths. loader : object - A function that receives, as input, a dictionary with ``{key: path}`` - entries, and returns a dictionary with the loaded data + A function that receives, as input, a context dictionary (with a + "protocol" and "subset" keys indicating which protocol and subset are + being served), and a dictionary with ``{key: path}`` entries, and + returns a dictionary with the loaded data. It shall respect the + loading principles of data, label and mask objects as stated above. """ @@ -173,6 +183,7 @@ class JSONDataset: for subset, samples in data.items(): delayeds = [] + context = dict(protocol=protocol, subset=subset) for k in samples: if isinstance(k, dict): @@ -194,7 +205,7 @@ class JSONDataset: if not os.path.isabs(v): abs_item[k] = os.path.join(self.root_path, v) - load = functools.partial(self.loader, abs_item) + load = functools.partial(self.loader, context, abs_item) delayeds.append(DelayedSample(load, key=key)) retval[subset] = delayeds diff --git a/bob/ip/binseg/data/stare/__init__.py b/bob/ip/binseg/data/stare/__init__.py index 3aa38840..6885fc4d 100644 --- a/bob/ip/binseg/data/stare/__init__.py +++ b/bob/ip/binseg/data/stare/__init__.py @@ -40,10 +40,11 @@ _protocols = [ _root_path = bob.extension.rc.get('bob.ip.binseg.stare.datadir', os.path.realpath(os.curdir)) -def _loader(s): +def _loader(context, sample): + #"context" is ignore in this case - database is homogeneous return dict( - data=load_pil_rgb(s["data"]), - label=load_pil_1(s["label"]), + data=load_pil_rgb(sample["data"]), + label=load_pil_1(sample["label"]), ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) diff --git a/bob/ip/binseg/data/stare/test.py b/bob/ip/binseg/data/stare/test.py index 0a60cd60..4db5d97f 100644 --- a/bob/ip/binseg/data/stare/test.py +++ b/bob/ip/binseg/data/stare/test.py @@ -5,6 +5,8 @@ """Tests for STARE""" import os + +import numpy import nose.tools from . import dataset @@ -43,16 +45,41 @@ def test_protocol_consitency(): @rc_variable_set('bob.ip.binseg.stare.datadir') def test_loading(): + from ..utils import count_bw + image_size = (700, 605) + bw_threshold_label = 0.19 #(vessels to background proportion limit) + def _check_sample(s): + data = s.data assert isinstance(data, dict) nose.tools.eq_(len(data), 2) + assert "data" in data - nose.tools.eq_(data["data"].size, (700, 605)) + nose.tools.eq_(data["data"].size, image_size) nose.tools.eq_(data["data"].mode, "RGB") + assert "label" in data - nose.tools.eq_(data["label"].size, (700, 605)) + nose.tools.eq_(data["label"].size, image_size) nose.tools.eq_(data["label"].mode, "1") + b, w = count_bw(data["label"]) + assert (b+w) == numpy.prod(image_size), \ + f"Counts of black + white ({b}+{w}) do not add up to total " \ + f"image size ({numpy.prod(image_size)}) at '{s.key}':label" + assert (w/b) < bw_threshold_label, \ + f"The proportion between black and white pixels " \ + f"({w}/{b}={w/b:.2f}) is larger than the allowed threshold " \ + f"of {bw_threshold_label} at '{s.key}':label - this could " \ + f"indicate a loading problem!" + + # to visualize images, uncomment the folowing code + # it should display an image with a faded background representing the + # original data, blended with green labels. + #from ..utils import overlayed_image + #display = overlayed_image(data["data"], data["label"]) + #display.show() + #import ipdb; ipdb.set_trace() + #pass subset = dataset.subsets("default") for s in subset["train"]: _check_sample(s) diff --git a/bob/ip/binseg/data/utils.py b/bob/ip/binseg/data/utils.py index 0e1c8842..d8ab2299 100644 --- a/bob/ip/binseg/data/utils.py +++ b/bob/ip/binseg/data/utils.py @@ -4,6 +4,10 @@ """Common utilities""" +import numpy +import PIL.Image +import PIL.ImageOps +import PIL.ImageChops import torch import torch.utils.data @@ -11,6 +15,116 @@ import torch.utils.data from .transforms import Compose, ToTensor +def count_bw(b): + """Calculates totals of black and white pixels in a binary image + + + Parameters + ---------- + + b : PIL.Image.Image + A PIL image in mode "1" to be used for calculating positives and + negatives + + Returns + ------- + + black : int + Number of black pixels in the binary image + + white : int + Number of white pixels in the binary image + """ + + boolean_array = numpy.array(b) + white = boolean_array.sum() + return (boolean_array.size-white), white + + +def invert_mode1_image(img): + """Inverts a binary PIL image (mode == ``"1"``)""" + + return PIL.ImageOps.invert(img.convert("RGB")).convert(mode="1", + dither=None) + + +def subtract_mode1_images(img1, img2): + """Returns a new image that represents ``img1 - img2``""" + + return PIL.ImageChops.subtract(img1, img2) + + +def overlayed_image(img, label, mask=None, label_color=(0, 255, 0), + mask_color=(0, 0, 255), alpha=0.4): + """Creates an image showing existing labels and masko + + This function creates a new representation of the input image ``img`` + overlaying a green mask for labelled objects, and a red mask for parts of + the image that should be ignored (negative mask). By looking at this + representation, it shall be possible to verify if the dataset/loader is + yielding images correctly. + + + Parameters + ---------- + + img : PIL.Image.Image + An RGB PIL image that represents the original image for analysis + + label : PIL.Image.Image + A PIL image in mode "1" that represents the labelled elements in the + image. White pixels represent the labelled object. Black pixels + represent background. + + mask : py:class:`PIL.Image.Image`, Optional + A PIL image in mode "1" that represents the mask for the image. White + pixels indicate where content should be used, black pixels, content to + be ignored. + + label_color : py:class:`tuple`, Optional + A tuple with three integer entries indicating the RGB color to be used + for labels + + mask_color : py:class:`tuple`, Optional + A tuple with three integer entries indicating the RGB color to be used + for the mask-negative (black parts in the original mask). + + alpha : py:class:`float`, Optional + A float that indicates how much of blending should be performed between + the label, mask and the original image. + + + Returns + ------- + + image : PIL.Image.Image + A new image overlaying the original image, object labels (in green) and + what is to be considered parts to be **masked-out** (i.e. a + representation of a negative of the mask). + + """ + + # creates a representation of labels with the right color + label_colored = PIL.ImageOps.colorize(label.convert("L"), (0, 0, 0), + label_color) + + # blend image and label together - first blend to get vessels drawn with a + # slight "label_color" tone on top, then composite with original image, not + # to loose brightness. + retval = PIL.Image.blend(img, label_colored, alpha) + retval = PIL.Image.composite(img, retval, + invert_mode1_image(label)) + + # creates a representation of the mask negative with the right color + if mask is not None: + antimask_colored = PIL.ImageOps.colorize(mask.convert("L"), mask_color, + (0, 0, 0)) + tmp = PIL.Image.blend(retval, antimask_colored, alpha) + retval = PIL.Image.composite(retval, tmp, mask) + + return retval + + class SampleList2TorchDataset(torch.utils.data.Dataset): """PyTorch dataset wrapper around Sample lists -- GitLab