diff --git a/MANIFEST.in b/MANIFEST.in index 9f9ab6b6edc5c72b48a8a4002ffb71f210e95bd4..70f6ad2ea295cdf025eab2310e578179315433f1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include README.rst buildout.cfg COPYING version.txt requirements.txt recursive-include doc *.sh *.rst *.png *.pdf *.ico *.txt -recursive-include bob *.json *.png +recursive-include bob *.json *.png *.csv *.jpg diff --git a/bob/ip/binseg/configs/datasets/csv.py b/bob/ip/binseg/configs/datasets/csv.py index c33bc62ce1478ef067cd5242fc21db115b09a32a..3b62ec89e4f373d811e674a5e471f79e24cd9ced 100644 --- a/bob/ip/binseg/configs/datasets/csv.py +++ b/bob/ip/binseg/configs/datasets/csv.py @@ -58,15 +58,18 @@ More information: """ +import os + # First, define how to access and load the raw data. Our package provides some # stock loaders we use for other datasets. You may have a look at the # documentation of that module for details. from bob.ip.binseg.data.loader import ( load_pil_rgb, load_pil_1, - data_path_keymaker, ) +from bob.ip.binseg.data.sample import Sample + # How we use the loaders - "sample" is a dictionary where keys are defined # below and map to the columns of the CSV files you input. This one is # configured to load images and labels using PIL. @@ -79,20 +82,23 @@ def _loader(context, sample): # the CSV file to contain only relative paths and is, therefore, more # compact. Of course, you can make those paths absolute and then simplify # it here. - import os - root_path = "/path/where/raw/files/sit" - return dict( - data=load_pil_rgb(os.path.join(root_path, sample["data"])), - label=load_pil_1(os.path.join(root_path, sample["label"])), - ) + data=load_pil_rgb(os.path.join(root_path, sample["data"])) + label=load_pil_1(os.path.join(root_path, sample["label"])) + + # You may also return DelayedSample to avoid data loading to take place + # as the sample object itself is created. Take a look at our own datasets + # for examples. + return Sample( + key=os.path.splitext(sample["data"])[0], + data=dict(data=data, label=label), + ) # This is just a class that puts everything together: the CSV file, how to load -# each sample defined in the dataset, names for the various columns of the CSV -# file and how to make unique keys for each sample (keymaker). Once created, -# this object can be called to generate sample lists. +# each sample defined in the dataset, and names for the various columns of the +# CSV file. Once created, this object can be called to generate sample lists. from bob.ip.binseg.data.dataset import CSVDataset _raw_dataset = CSVDataset( @@ -109,7 +115,6 @@ _raw_dataset = CSVDataset( }, fieldnames=("data", "label"), # these are the column names loader=_loader, - keymaker=data_path_keymaker, ) # Finally, we build a connector to passes our dataset to the pytorch framework diff --git a/bob/ip/binseg/data/chasedb1/__init__.py b/bob/ip/binseg/data/chasedb1/__init__.py index 213cb16cae4eb19c28c505528176b5ac8977fcae..08a59459c8c137c89d1a64787c85cc72ddaf31d1 100644 --- a/bob/ip/binseg/data/chasedb1/__init__.py +++ b/bob/ip/binseg/data/chasedb1/__init__.py @@ -37,23 +37,32 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed _protocols = [ - pkg_resources.resource_filename(__name__, "first-annotator.json"), - pkg_resources.resource_filename(__name__, "second-annotator.json"), - ] + pkg_resources.resource_filename(__name__, "first-annotator.json"), + pkg_resources.resource_filename(__name__, "second-annotator.json"), +] -_root_path = bob.extension.rc.get('bob.ip.binseg.chasedb1.datadir', - os.path.realpath(os.curdir)) +_root_path = bob.extension.rc.get( + "bob.ip.binseg.chasedb1.datadir", os.path.realpath(os.curdir) +) -def _loader(context, sample): - #"context" is ignored in this case - database is homogeneous + +def _raw_data_loader(sample): return dict( - data=load_pil_rgb(os.path.join(_root_path, sample["data"])), - label=load_pil_1(os.path.join(_root_path, sample["label"])), - ) + data=load_pil_rgb(os.path.join(_root_path, sample["data"])), + label=load_pil_1(os.path.join(_root_path, sample["label"])), + ) + + +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + -dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"), - loader=_loader, keymaker=data_path_keymaker) +dataset = JSONDataset( + protocols=_protocols, fieldnames=("data", "label"), loader=_loader +) """CHASE-DB1 dataset object""" diff --git a/bob/ip/binseg/data/dataset.py b/bob/ip/binseg/data/dataset.py index 92599aa8986d1838b3e8b32540b524476ac77761..7756b01de1b4f57ecce4764e027544fc742ac937 100644 --- a/bob/ip/binseg/data/dataset.py +++ b/bob/ip/binseg/data/dataset.py @@ -6,14 +6,11 @@ import csv import copy import json import pathlib -import functools import logging logger = logging.getLogger(__name__) -from .sample import DelayedSample - class JSONDataset: """ @@ -63,33 +60,33 @@ class JSONDataset: loader : object A function that receives as input, a context dictionary (with at least a "protocol" and "subset" keys indicating which protocol and subset are - being served), and a dictionary with ``{key: path}`` entries, and - returns a dictionary with the loaded data. + being served), and a dictionary with ``{fieldname: value}`` entries, + and returns an object with at least 2 attributes: - keymaker : object - A function that receives as input the same input from the ``loader``, - but outputs a single string that uniquely identifies a sample within - a given protocol. It is typically the path, without extension, of one - of the file entries for the sample, but you can tune it as you like. + * ``key``: which must be a unique string for every sample across + subsets in a protocol, and + * ``data``: which contains the data associated witht this sample """ - def __init__(self, protocols, fieldnames, loader, keymaker): + def __init__(self, protocols, fieldnames, loader): if isinstance(protocols, dict): - self.protocols = protocols + self._protocols = protocols else: - self.protocols = dict( - (os.path.splitext(os.path.basename(k))[0], k) - for k in protocols + self._protocols = dict( + (os.path.splitext(os.path.basename(k))[0], k) for k in protocols ) self.fieldnames = fieldnames - self.loader = loader - self.keymaker = keymaker + self._loader = loader def check(self, limit=0): """For each protocol, check if all data can be correctly accessed + This function assumes each sample has a ``data`` and a ``key`` + attribute. The ``key`` attribute should be a string, or representable + as such. + Parameters ---------- @@ -110,43 +107,34 @@ class JSONDataset: logger.info(f"Checking dataset...") errors = 0 - for proto in self.protocols: + for proto in self._protocols: logger.info(f"Checking protocol '{proto}'...") for name, samples in self.subsets(proto).items(): logger.info(f"Checking subset '{name}'...") if limit: logger.info(f"Checking at most first '{limit}' samples...") samples = samples[:limit] - for sample in samples: + for pos, sample in enumerate(samples): try: - sample.data # triggers loading + assert len(sample) == len(self.fieldnames), ( + f"Entry {pos} in subset {name} of protocol " + f"{proto} has {len(sample)} entries instead of " + f"{len(self.fieldnames)} (expected). Fix file " + f"'{self._protocols[proto]}'" + ) + sample.data # check data can be loaded logger.info(f"{sample.key}: OK") except Exception as e: logger.error(f"{sample.key}: {e}") errors += 1 return errors - def _make_delayed(self, pos, sample, context): - """Checks consistence and builds a delayed loading sample - """ - assert len(sample) == len(self.fieldnames), ( - f"Entry {k} in subset {context['subset']} of protocol " - f"{context['protocol']} has {len(sample)} entries instead of " - f"{len(self.fieldnames)} (expected). Fix file " - f"{self.protocols[context['protocol']]}" - ) - item = dict(zip(self.fieldnames, sample)) - return DelayedSample( - functools.partial(self.loader, context, item), - key=self.keymaker(context, item), - ) - def subsets(self, protocol): """Returns all subsets in a protocol This method will load JSON information for a given protocol and return - all subsets of the given protocol after converting each entry into a - :py:class:`bob.ip.binseg.data.sample.DelayedSample`. + all subsets of the given protocol after converting each entry through + the loader function. Parameters ---------- @@ -159,19 +147,14 @@ class JSONDataset: ------- subsets : dict - A dictionary mapping subset names to lists of - :py:class:`bob.ip.binseg.data.sample.DelayedSample` objects, with - the proper loading implemented. Each delayed sample also carries a - ``key`` parameter, that contains the output of the sample - contextual data after passing through the ``keymaker``. This - parameter can be used for recording sample transforms during - check-pointing. + A dictionary mapping subset names to lists of objects (respecting + the ``key``, ``data`` interface). """ - fileobj = self.protocols[protocol] + fileobj = self._protocols[protocol] if isinstance(fileobj, (str, bytes, pathlib.Path)): - with open(self.protocols[protocol], "r") as f: + with open(self._protocols[protocol], "r") as f: data = json.load(f) else: data = json.load(f) @@ -179,16 +162,20 @@ class JSONDataset: retval = {} for subset, samples in data.items(): - context = dict(protocol=protocol, subset=subset) retval[subset] = [ - self._make_delayed(k, v, context) for (k, v) in enumerate(samples) + self._loader( + dict(protocol=protocol, subset=subset, order=n), + dict(zip(self.fieldnames, k)) + ) + for n, k in enumerate(samples) ] + return retval class CSVDataset: """ - Generic single subset filelist dataset that yields samples + Generic multi-subset filelist dataset that yields samples To create a new dataset, you only need to provide a CSV formatted filelist using any separator (e.g. comma, space, semi-colon) with the following @@ -222,30 +209,26 @@ class CSVDataset: dictionary with ``{key: path}`` entries, and returns a dictionary with the loaded data. - keymaker : object - A function that receives as input the same input from the ``loader``, - but outputs a single string that uniquely identifies a sample within - a given protocol. It is typically the path, without extension, of one - of the file entries for the sample, but you can tune it as you like. - """ - def __init__(self, subsets, fieldnames, loader, keymaker): + def __init__(self, subsets, fieldnames, loader): if isinstance(subsets, dict): self._subsets = subsets else: self._subsets = dict( - (os.path.splitext(os.path.basename(k))[0], k) - for k in subsets + (os.path.splitext(os.path.basename(k))[0], k) for k in subsets ) self.fieldnames = fieldnames - self.loader = loader - self.keymaker = keymaker + self._loader = loader def check(self, limit=0): """For each subset, check if all data can be correctly accessed + This function assumes each sample has a ``data`` and a ``key`` + attribute. The ``key`` attribute should be a string, or representable + as such. + Parameters ---------- @@ -272,8 +255,13 @@ class CSVDataset: if limit: logger.info(f"Checking at most first '{limit}' samples...") samples = samples[:limit] - for sample in samples: + for pos, sample in enumerate(samples): try: + assert len(sample) == len(self.fieldnames), ( + f"Entry {pos} in subset {name} has {len(sample)} " + f"entries instead of {len(self.fieldnames)} " + f"(expected). Fix file '{self._subsets[name]}'" + ) sample.data # triggers loading logger.info(f"{sample.key}: OK") except Exception as e: @@ -281,20 +269,6 @@ class CSVDataset: errors += 1 return errors - def _make_delayed(self, pos, sample, context): - """Checks consistence and builds a delayed loading sample - """ - assert len(sample) == len(self.fieldnames), ( - f"Entry {k} in subset {context['subset']} has {len(sample)} " - f"entries instead of {len(self.fieldnames)} (expected). Fix " - f"file {self._subsets[context['subset']]}" - ) - item = dict(zip(self.fieldnames, sample)) - return DelayedSample( - functools.partial(self.loader, context, item), - key=self.keymaker(context, item), - ) - def subsets(self): """Returns all available subsets at once @@ -302,13 +276,8 @@ class CSVDataset: ------- subsets : dict - A dictionary mapping subset names to lists of - :py:class:`bob.ip.binseg.data.sample.DelayedSample` objects, with - the proper loading implemented. Each delayed sample also carries a - ``key`` parameter, that contains the output of the sample - contextual data after passing through the ``keymaker``. This - parameter can be used for recording sample transforms during - check-pointing. + A dictionary mapping subset names to lists of objects (respecting + the ``key``, ``data`` interface). """ @@ -318,8 +287,8 @@ class CSVDataset: """Returns all samples in a subset This method will load CSV information for a given subset and return - all samples of the given subset after converting each entry into a - :py:class:`bob.ip.binseg.data.sample.DelayedSample`. + all samples of the given subset after passing each entry through the + loading function. Parameters @@ -333,12 +302,7 @@ class CSVDataset: ------- subset : list - A list of :py:class:`bob.ip.binseg.data.sample.DelayedSample` - objects, with the proper loading implemented. Each delayed sample - also carries a ``key`` parameter, that contains the output of the - sample contextual data after passing through the ``keymaker``. - This parameter can be used for recording sample transforms during - check-pointing. + A lists of objects (respecting the ``key``, ``data`` interface). """ @@ -352,5 +316,9 @@ class CSVDataset: samples = [k for k in cf] fileobj.seek(0) - context = dict(subset=subset) - return [self._make_delayed(k, v, context) for (k, v) in enumerate(samples)] + return [ + self._loader( + dict(subset=subset, order=n), dict(zip(self.fieldnames, k)) + ) + for n, k in enumerate(samples) + ] diff --git a/bob/ip/binseg/data/drionsdb/__init__.py b/bob/ip/binseg/data/drionsdb/__init__.py index e9bdabd6f23211b09d0efa35912b8a70745c9606..92e345e3a21ec2309aa23c8f6c42e72bb4b1b4ee 100644 --- a/bob/ip/binseg/data/drionsdb/__init__.py +++ b/bob/ip/binseg/data/drionsdb/__init__.py @@ -31,49 +31,60 @@ import PIL.ImageDraw import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, data_path_keymaker +from ..loader import load_pil_rgb, make_delayed _protocols = [ - pkg_resources.resource_filename(__name__, "expert1.json"), - pkg_resources.resource_filename(__name__, "expert2.json"), - ] + pkg_resources.resource_filename(__name__, "expert1.json"), + pkg_resources.resource_filename(__name__, "expert2.json"), +] -_root_path = bob.extension.rc.get('bob.ip.binseg.drionsdb.datadir', - os.path.realpath(os.curdir)) +_root_path = bob.extension.rc.get( + "bob.ip.binseg.drionsdb.datadir", os.path.realpath(os.curdir) +) def _txt_to_pil_1(fname, size): """Converts DRIONS-DB annotations to image format""" - with open(fname,'r') as f: - rows = csv.reader(f,delimiter=',',quoting=csv.QUOTE_NONNUMERIC) - data = list(map(tuple,rows)) + with open(fname, "r") as f: + rows = csv.reader(f, delimiter=",", quoting=csv.QUOTE_NONNUMERIC) + data = list(map(tuple, rows)) - retval = PIL.Image.new('1', size) + retval = PIL.Image.new("1", size) draw = PIL.ImageDraw.ImageDraw(retval) - draw.polygon(data, fill='white') + draw.polygon(data, fill="white") del draw return retval def _pad_right(img): """Pads image on the right by one pixel, respects mode""" - retval = PIL.Image.new(img.mode, (img.size[0]+1, img.size[1]), 'black') - retval.paste(img, (0, 0)+img.size) #top-left pasting + retval = PIL.Image.new(img.mode, (img.size[0] + 1, img.size[1]), "black") + retval.paste(img, (0, 0) + img.size) # top-left pasting return retval -def _loader(context, sample): - data = load_pil_rgb(os.path.join(_root_path, sample["data"])) +def _raw_data_loader(sample): + data = load_pil_rgb(os.path.join(_root_path, sample["data"])) label = _txt_to_pil_1(os.path.join(_root_path, sample["label"]), data.size) + return dict(data=data, label=label,) + + +def _sample_101_loader(sample): + # pads the image on the right side to account for a difference in + # resolution to other images in the dataset + retval = _raw_data_loader(sample) + retval["data"] = _pad_right(retval["data"]) + retval["label"] = _pad_right(retval["label"]) + return retval + +def _loader(context, sample): if sample["data"].endswith("_101.jpg"): - # pads the image on the right side to account for a difference in - # resolution to other images in the dataset - data = _pad_right(data) - label = _pad_right(label) + return make_delayed(sample, _sample_101_loader) + return make_delayed(sample, _raw_data_loader) - return dict(data=data, label=label) -dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"), - loader=_loader, keymaker=data_path_keymaker) +dataset = JSONDataset( + protocols=_protocols, fieldnames=("data", "label"), loader=_loader +) """DRIONSDB dataset object""" diff --git a/bob/ip/binseg/data/drishtigs1/__init__.py b/bob/ip/binseg/data/drishtigs1/__init__.py index 538139232ed83b82eb4ca4d64521c714dbb08c86..c4ec018ec819f3221b7c0e3c6278cd11341cda4f 100644 --- a/bob/ip/binseg/data/drishtigs1/__init__.py +++ b/bob/ip/binseg/data/drishtigs1/__init__.py @@ -28,7 +28,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, data_path_keymaker +from ..loader import load_pil_rgb, make_delayed _protocols = { "optic-disc-all": pkg_resources.resource_filename( @@ -50,26 +50,40 @@ _root_path = bob.extension.rc.get( ) -def _loader(context, sample): +def _raw_data_loader_all(sample): retval = dict( data=load_pil_rgb(os.path.join(_root_path, sample["data"])), label=load_pil_rgb(os.path.join(_root_path, sample["label"])).convert( "L" ), ) + retval["label"] = retval["label"].point(lambda p: p > 254, mode="1") + return retval + + +def _raw_data_loader_any(sample): + retval = dict( + data=load_pil_rgb(os.path.join(_root_path, sample["data"])), + label=load_pil_rgb(os.path.join(_root_path, sample["label"])).convert( + "L" + ), + ) + retval["label"] = retval["label"].point(lambda p: p > 0, mode="1") + return retval + + +def _loader(context, sample): # Drishti-GS provides softmaps of multiple annotators # we threshold to get gt where all/any of the annotators overlap if context["protocol"].endswith("-all"): - retval["label"] = retval["label"].point(lambda p: p > 254, mode="1") + return make_delayed(sample, _raw_data_loader_all) elif context["protocol"].endswith("-any"): - retval["label"] = retval["label"].point(lambda p: p > 0, mode="1") + return make_delayed(sample, _raw_data_loader_any) else: raise RuntimeError(f"Unknown protocol {context['protocol']}") - return retval dataset = JSONDataset( - protocols=_protocols, fieldnames=("data", "label"), loader=_loader, - keymaker=data_path_keymaker + protocols=_protocols, fieldnames=("data", "label"), loader=_loader ) """Drishti-GS1 dataset object""" diff --git a/bob/ip/binseg/data/drive/__init__.py b/bob/ip/binseg/data/drive/__init__.py index 6ae0bb842ffd284f2776ef729c741dfd72020167..59e6f44f38c98b587cd73f8891c014c077d6fa1d 100644 --- a/bob/ip/binseg/data/drive/__init__.py +++ b/bob/ip/binseg/data/drive/__init__.py @@ -26,7 +26,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed _protocols = [ pkg_resources.resource_filename(__name__, "default.json"), @@ -38,8 +38,7 @@ _root_path = bob.extension.rc.get( ) -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous +def _raw_data_loader(sample): return dict( data=load_pil_rgb(os.path.join(_root_path, sample["data"])), label=load_pil_1(os.path.join(_root_path, sample["label"])), @@ -47,10 +46,15 @@ def _loader(context, sample): ) +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + + dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label", "mask"), loader=_loader, - keymaker=data_path_keymaker, ) """DRIVE dataset object""" diff --git a/bob/ip/binseg/data/hrf/__init__.py b/bob/ip/binseg/data/hrf/__init__.py index 1ab2919c8ee77d85560711bfbcb2a5c15ba24512..dd9f8da453f3863fb20b2d1e3af58cdde958e68f 100644 --- a/bob/ip/binseg/data/hrf/__init__.py +++ b/bob/ip/binseg/data/hrf/__init__.py @@ -25,7 +25,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed _protocols = [ pkg_resources.resource_filename(__name__, "default.json"), @@ -36,8 +36,7 @@ _root_path = bob.extension.rc.get( ) -def _loader(context, sample): - # "context" is ignore in this case - database is homogeneous +def _raw_data_loader(sample): return dict( data=load_pil_rgb(os.path.join(_root_path, sample["data"])), label=load_pil_1(os.path.join(_root_path, sample["label"])), @@ -45,10 +44,13 @@ def _loader(context, sample): ) +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + + dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label", "mask"), - loader=_loader, - keymaker=data_path_keymaker, + protocols=_protocols, fieldnames=("data", "label", "mask"), loader=_loader, ) """HRF dataset object""" diff --git a/bob/ip/binseg/data/iostar/__init__.py b/bob/ip/binseg/data/iostar/__init__.py index 0ce427195777310966b1cd4e9579dce0571ad23f..e5b762d9d714837f25d60062cc08b3c9be2a1e7d 100644 --- a/bob/ip/binseg/data/iostar/__init__.py +++ b/bob/ip/binseg/data/iostar/__init__.py @@ -28,7 +28,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed from ..utils import invert_mode1_image, subtract_mode1_images _protocols = [ @@ -41,30 +41,36 @@ _root_path = bob.extension.rc.get( ) -def _loader(context, sample): - retval = dict( +def _vessel_loader(sample): + return dict( data=load_pil_rgb(os.path.join(_root_path, sample["data"])), label=load_pil_1(os.path.join(_root_path, sample["label"])), mask=load_pil_1(os.path.join(_root_path, sample["mask"])), ) + + +def _disc_loader(sample): + # For optic-disc analysis, the label provided by IOSTAR raw data is the + # "inverted" (negative) label, and does not consider the mask region, which + # must be subtracted. We do this special manipulation here. + data = load_pil_rgb(os.path.join(_root_path, sample["data"])) + label = load_pil_1(os.path.join(_root_path, sample["label"])) + mask = load_pil_1(os.path.join(_root_path, sample["mask"])) + label = subtract_mode1_images( + invert_mode1_image(label), invert_mode1_image(mask) + ) + return dict(data=data, label=label, mask=mask) + + +def _loader(context, sample): if context["protocol"] == "optic-disc": - # For optic-disc analysis, the label provided by IOSTAR raw data is the - # "inverted" (negative) label, and does not consider the mask region, - # which must be subtracted. We do this special manipulation here. - retval["label"] = subtract_mode1_images( - invert_mode1_image(retval["label"]), - invert_mode1_image(retval["mask"]), - ) - return retval + return make_delayed(sample, _disc_loader) elif context["protocol"] == "vessel": - return retval + return make_delayed(sample, _vessel_loader) raise RuntimeError(f"Unknown protocol {context['protocol']}") dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label", "mask"), - loader=_loader, - keymaker=data_path_keymaker, + protocols=_protocols, fieldnames=("data", "label", "mask"), loader=_loader, ) """IOSTAR dataset object""" diff --git a/bob/ip/binseg/data/loader.py b/bob/ip/binseg/data/loader.py index 84928b7d39376bc180fd89d3bb89bc175b7f7774..aa2adc416065fb65e25a4b10c5b364758442e8b7 100644 --- a/bob/ip/binseg/data/loader.py +++ b/bob/ip/binseg/data/loader.py @@ -6,8 +6,12 @@ import os +import functools + import PIL.Image +from .sample import DelayedSample + def load_pil_rgb(path): """Loads a sample data @@ -51,32 +55,35 @@ def load_pil_1(path): return PIL.Image.open(path).convert(mode="1", dither=None) -def data_path_keymaker(context, sample): - """Returns a path without extension as a key - - This method assumes ``sample`` contains at least one entry named ``path``, - that contains a path to the sample raw data, without extension. It will - return the said path without its extension. - +def make_delayed(sample, loader, key=None): + """Returns a delayed-loading Sample object Parameters ---------- - context : dict - Context dictionary with entries (``protocol``, ``subset``), depending - on the context - sample : dict - A dictionary that maps field names to sample entries from the original - dataset. + A dictionary that maps field names to sample data values (e.g. paths) + + loader : object + A function that inputs ``sample`` dictionaries and returns the loaded + data. + + key : str + A unique key identifier for this sample. If not provided, assumes + ``sample`` is a dictionary with a ``data`` entry and uses its path as + key. Returns ------- - key : str - A string that uniquely identifies the sample within a given context + sample : bob.ip.binseg.data.sample.DelayedSample + In which ``key`` is as provided and ``data`` can be accessed to trigger + sample loading. """ - return os.path.splitext(sample["data"])[0] + return DelayedSample( + functools.partial(loader, sample), + key=key or os.path.splitext(sample["data"])[0], + ) diff --git a/bob/ip/binseg/data/refuge/__init__.py b/bob/ip/binseg/data/refuge/__init__.py index 328dec359fdbaed8fcd19545a95c5e78676941e9..addcfca9f368bafb68efd0a97a4232f88a5a9d8e 100644 --- a/bob/ip/binseg/data/refuge/__init__.py +++ b/bob/ip/binseg/data/refuge/__init__.py @@ -38,27 +38,48 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, data_path_keymaker +from ..loader import load_pil_rgb, make_delayed _protocols = { - "optic-disc": pkg_resources.resource_filename(__name__, "default.json"), - "optic-cup": pkg_resources.resource_filename(__name__, "default.json"), - } + "optic-disc": pkg_resources.resource_filename(__name__, "default.json"), + "optic-cup": pkg_resources.resource_filename(__name__, "default.json"), +} -_root_path = bob.extension.rc.get('bob.ip.binseg.refuge.datadir', - os.path.realpath(os.curdir)) +_root_path = bob.extension.rc.get( + "bob.ip.binseg.refuge.datadir", os.path.realpath(os.curdir) +) -def _loader(context, sample): + +def _disc_loader(sample): retval = dict( - data=load_pil_rgb(os.path.join(_root_path, sample["data"])), - label=load_pil_rgb(os.path.join(_root_path, sample["label"])), - ) + data=load_pil_rgb(os.path.join(_root_path, sample["data"])), + label=load_pil_rgb(os.path.join(_root_path, sample["label"])), + glaucoma=sample["glaucoma"], + ) + retval["label"] = retval["label"].convert("L") + retval["label"] = retval["label"].point(lambda p: p <= 150, mode="1") + return retval + +def _cup_loader(sample): + retval = dict( + data=load_pil_rgb(os.path.join(_root_path, sample["data"])), + label=load_pil_rgb(os.path.join(_root_path, sample["label"])), + glaucoma=sample["glaucoma"], + ) + retval["label"] = retval["label"].convert("L") + retval["label"] = retval["label"].point(lambda p: p <= 100, mode="1") + return retval + + +def _loader(context, sample): + + sample["glaucoma"] = False if context["subset"] == "train": # adds binary metadata for glaucoma/non-glaucoma patients - retval["glaucoma"] = os.path.basename(sample["label"]).startswith("g") + sample["glaucoma"] = os.path.basename(sample["label"]).startswith("g") elif context["subset"] == "test": - retval["glaucoma"] = sample["label"].split(os.sep)[-2] == "G" + sample["glaucoma"] = (sample["label"].split(os.sep)[-2] == "G") elif context["subset"] == "validation": pass else: @@ -67,16 +88,16 @@ def _loader(context, sample): # optic disc is drawn with gray == 128 and includes the cup, drawn with # black == 0. The rest is white == 255. if context["protocol"] == "optic-disc": - retval["label"] = retval["label"].convert("L") - retval["label"] = retval["label"].point(lambda p: p<=150, mode="1") + return make_delayed(sample, _disc_loader) elif context["protocol"] == "optic-cup": - retval["label"] = retval["label"].convert("L") - retval["label"] = retval["label"].point(lambda p: p<=100, mode="1") + return make_delayed(sample, _cup_loader) else: raise RuntimeError(f"Unknown protocol {context['protocol']}") - return retval -dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"), - loader=_loader, keymaker=data_path_keymaker) +dataset = JSONDataset( + protocols=_protocols, + fieldnames=("data", "label"), + loader=_loader, +) """REFUGE dataset object""" diff --git a/bob/ip/binseg/data/rimoner3/__init__.py b/bob/ip/binseg/data/rimoner3/__init__.py index b60faddab0f3e9747d6544a447cc915d546e1653..5e3eca89794709c4cce482b3c2d16565014b8dc1 100644 --- a/bob/ip/binseg/data/rimoner3/__init__.py +++ b/bob/ip/binseg/data/rimoner3/__init__.py @@ -26,7 +26,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed _protocols = [ pkg_resources.resource_filename(__name__, "optic-disc-exp1.json"), @@ -42,7 +42,7 @@ _root_path = bob.extension.rc.get( ) -def _loader(context, sample): +def _raw_data_loader(sample): # RIM-ONE r3 provides stereo images - we clip them here to get only the # left part of the image, which is also annotated return dict( @@ -55,10 +55,15 @@ def _loader(context, sample): ) +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + + dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label"), loader=_loader, - keymaker=data_path_keymaker, ) """RIM-ONE r3 dataset object""" diff --git a/bob/ip/binseg/data/stare/__init__.py b/bob/ip/binseg/data/stare/__init__.py index faeac3896ad6a1b9982bd39d7b7d37a27c9dac16..753c98feb92884f4f3ad11de3cc781a99d1bdb9e 100644 --- a/bob/ip/binseg/data/stare/__init__.py +++ b/bob/ip/binseg/data/stare/__init__.py @@ -30,7 +30,7 @@ import pkg_resources import bob.extension from ..dataset import JSONDataset -from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker +from ..loader import load_pil_rgb, load_pil_1, make_delayed _protocols = [ pkg_resources.resource_filename(__name__, "ah.json"), @@ -45,14 +45,19 @@ _root_path = bob.extension.rc.get( def _make_loader(root_path): + #hack to get testing on the CI working fine for this dataset - def _loader(context, sample): - # "context" is ignore in this case - database is homogeneous + def _raw_data_loader(sample): return dict( data=load_pil_rgb(os.path.join(root_path, sample["data"])), label=load_pil_1(os.path.join(root_path, sample["label"])), ) + def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + return _loader @@ -62,7 +67,6 @@ def _make_dataset(root_path): protocols=_protocols, fieldnames=_fieldnames, loader=_make_loader(root_path), - keymaker=data_path_keymaker, ) dataset = _make_dataset(_root_path) diff --git a/bob/ip/binseg/test/testimg-16bit.png b/bob/ip/binseg/test/data/img-16bit.png similarity index 100% rename from bob/ip/binseg/test/testimg-16bit.png rename to bob/ip/binseg/test/data/img-16bit.png diff --git a/bob/ip/binseg/test/data/iris-test.csv b/bob/ip/binseg/test/data/iris-test.csv new file mode 100644 index 0000000000000000000000000000000000000000..27d1b05a7aa70667844b74778504f3b51c624884 --- /dev/null +++ b/bob/ip/binseg/test/data/iris-test.csv @@ -0,0 +1,75 @@ +5,3,1.6,0.2,Iris-setosa +5,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5,3.3,1.4,0.2,Iris-setosa +6.6,3,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3,5,1.7,Iris-versicolor +6,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6,2.7,5.1,1.6,Iris-versicolor +5.4,3,4.5,1.5,Iris-versicolor +6,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3,4.1,1.3,Iris-versicolor +5.5,2.5,4,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3,4.6,1.4,Iris-versicolor +5.8,2.6,4,1.2,Iris-versicolor +5,2.3,3.3,1,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +7.2,3.2,6,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6,3,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3,5.2,2.3,Iris-virginica +6.3,2.5,5,1.9,Iris-virginica +6.5,3,5.2,2,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3,5.1,1.8,Iris-virginica diff --git a/bob/ip/binseg/test/data/iris-train.csv b/bob/ip/binseg/test/data/iris-train.csv new file mode 100644 index 0000000000000000000000000000000000000000..82d5b134803975463f070aebe6847e7c742749d2 --- /dev/null +++ b/bob/ip/binseg/test/data/iris-train.csv @@ -0,0 +1,75 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3,1.4,0.1,Iris-setosa +4.3,3,1.1,0.1,Iris-setosa +5.8,4,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +7,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5,2,3.5,1,Iris-versicolor +5.9,3,4.2,1.5,Iris-versicolor +6,2.2,4,1,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.3,3.3,6,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3,5.8,2.2,Iris-virginica +7.6,3,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3,5.5,2.1,Iris-virginica +5.7,2.5,5,2,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6,2.2,5,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2,Iris-virginica +7.7,2.8,6.7,2,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica diff --git a/bob/ip/binseg/test/data/iris.json b/bob/ip/binseg/test/data/iris.json new file mode 100644 index 0000000000000000000000000000000000000000..1777efc361562880096b9a21b2011218c152ecac --- /dev/null +++ b/bob/ip/binseg/test/data/iris.json @@ -0,0 +1,156 @@ +{ + "train": [ + [5.1,3.5,1.4,0.2,"Iris-setosa"], + [4.9,3,1.4,0.2,"Iris-setosa"], + [4.7,3.2,1.3,0.2,"Iris-setosa"], + [4.6,3.1,1.5,0.2,"Iris-setosa"], + [5,3.6,1.4,0.2,"Iris-setosa"], + [5.4,3.9,1.7,0.4,"Iris-setosa"], + [4.6,3.4,1.4,0.3,"Iris-setosa"], + [5,3.4,1.5,0.2,"Iris-setosa"], + [4.4,2.9,1.4,0.2,"Iris-setosa"], + [4.9,3.1,1.5,0.1,"Iris-setosa"], + [5.4,3.7,1.5,0.2,"Iris-setosa"], + [4.8,3.4,1.6,0.2,"Iris-setosa"], + [4.8,3,1.4,0.1,"Iris-setosa"], + [4.3,3,1.1,0.1,"Iris-setosa"], + [5.8,4,1.2,0.2,"Iris-setosa"], + [5.7,4.4,1.5,0.4,"Iris-setosa"], + [5.4,3.9,1.3,0.4,"Iris-setosa"], + [5.1,3.5,1.4,0.3,"Iris-setosa"], + [5.7,3.8,1.7,0.3,"Iris-setosa"], + [5.1,3.8,1.5,0.3,"Iris-setosa"], + [5.4,3.4,1.7,0.2,"Iris-setosa"], + [5.1,3.7,1.5,0.4,"Iris-setosa"], + [4.6,3.6,1,0.2,"Iris-setosa"], + [5.1,3.3,1.7,0.5,"Iris-setosa"], + [4.8,3.4,1.9,0.2,"Iris-setosa"], + [7,3.2,4.7,1.4,"Iris-versicolor"], + [6.4,3.2,4.5,1.5,"Iris-versicolor"], + [6.9,3.1,4.9,1.5,"Iris-versicolor"], + [5.5,2.3,4,1.3,"Iris-versicolor"], + [6.5,2.8,4.6,1.5,"Iris-versicolor"], + [5.7,2.8,4.5,1.3,"Iris-versicolor"], + [6.3,3.3,4.7,1.6,"Iris-versicolor"], + [4.9,2.4,3.3,1,"Iris-versicolor"], + [6.6,2.9,4.6,1.3,"Iris-versicolor"], + [5.2,2.7,3.9,1.4,"Iris-versicolor"], + [5,2,3.5,1,"Iris-versicolor"], + [5.9,3,4.2,1.5,"Iris-versicolor"], + [6,2.2,4,1,"Iris-versicolor"], + [6.1,2.9,4.7,1.4,"Iris-versicolor"], + [5.6,2.9,3.6,1.3,"Iris-versicolor"], + [6.7,3.1,4.4,1.4,"Iris-versicolor"], + [5.6,3,4.5,1.5,"Iris-versicolor"], + [5.8,2.7,4.1,1,"Iris-versicolor"], + [6.2,2.2,4.5,1.5,"Iris-versicolor"], + [5.6,2.5,3.9,1.1,"Iris-versicolor"], + [5.9,3.2,4.8,1.8,"Iris-versicolor"], + [6.1,2.8,4,1.3,"Iris-versicolor"], + [6.3,2.5,4.9,1.5,"Iris-versicolor"], + [6.1,2.8,4.7,1.2,"Iris-versicolor"], + [6.4,2.9,4.3,1.3,"Iris-versicolor"], + [6.3,3.3,6,2.5,"Iris-virginica"], + [5.8,2.7,5.1,1.9,"Iris-virginica"], + [7.1,3,5.9,2.1,"Iris-virginica"], + [6.3,2.9,5.6,1.8,"Iris-virginica"], + [6.5,3,5.8,2.2,"Iris-virginica"], + [7.6,3,6.6,2.1,"Iris-virginica"], + [4.9,2.5,4.5,1.7,"Iris-virginica"], + [7.3,2.9,6.3,1.8,"Iris-virginica"], + [6.7,2.5,5.8,1.8,"Iris-virginica"], + [7.2,3.6,6.1,2.5,"Iris-virginica"], + [6.5,3.2,5.1,2,"Iris-virginica"], + [6.4,2.7,5.3,1.9,"Iris-virginica"], + [6.8,3,5.5,2.1,"Iris-virginica"], + [5.7,2.5,5,2,"Iris-virginica"], + [5.8,2.8,5.1,2.4,"Iris-virginica"], + [6.4,3.2,5.3,2.3,"Iris-virginica"], + [6.5,3,5.5,1.8,"Iris-virginica"], + [7.7,3.8,6.7,2.2,"Iris-virginica"], + [7.7,2.6,6.9,2.3,"Iris-virginica"], + [6,2.2,5,1.5,"Iris-virginica"], + [6.9,3.2,5.7,2.3,"Iris-virginica"], + [5.6,2.8,4.9,2,"Iris-virginica"], + [7.7,2.8,6.7,2,"Iris-virginica"], + [6.3,2.7,4.9,1.8,"Iris-virginica"], + [6.7,3.3,5.7,2.1,"Iris-virginica"] + ], + "test": [ + [5,3,1.6,0.2,"Iris-setosa"], + [5,3.4,1.6,0.4,"Iris-setosa"], + [5.2,3.5,1.5,0.2,"Iris-setosa"], + [5.2,3.4,1.4,0.2,"Iris-setosa"], + [4.7,3.2,1.6,0.2,"Iris-setosa"], + [4.8,3.1,1.6,0.2,"Iris-setosa"], + [5.4,3.4,1.5,0.4,"Iris-setosa"], + [5.2,4.1,1.5,0.1,"Iris-setosa"], + [5.5,4.2,1.4,0.2,"Iris-setosa"], + [4.9,3.1,1.5,0.1,"Iris-setosa"], + [5,3.2,1.2,0.2,"Iris-setosa"], + [5.5,3.5,1.3,0.2,"Iris-setosa"], + [4.9,3.1,1.5,0.1,"Iris-setosa"], + [4.4,3,1.3,0.2,"Iris-setosa"], + [5.1,3.4,1.5,0.2,"Iris-setosa"], + [5,3.5,1.3,0.3,"Iris-setosa"], + [4.5,2.3,1.3,0.3,"Iris-setosa"], + [4.4,3.2,1.3,0.2,"Iris-setosa"], + [5,3.5,1.6,0.6,"Iris-setosa"], + [5.1,3.8,1.9,0.4,"Iris-setosa"], + [4.8,3,1.4,0.3,"Iris-setosa"], + [5.1,3.8,1.6,0.2,"Iris-setosa"], + [4.6,3.2,1.4,0.2,"Iris-setosa"], + [5.3,3.7,1.5,0.2,"Iris-setosa"], + [5,3.3,1.4,0.2,"Iris-setosa"], + [6.6,3,4.4,1.4,"Iris-versicolor"], + [6.8,2.8,4.8,1.4,"Iris-versicolor"], + [6.7,3,5,1.7,"Iris-versicolor"], + [6,2.9,4.5,1.5,"Iris-versicolor"], + [5.7,2.6,3.5,1,"Iris-versicolor"], + [5.5,2.4,3.8,1.1,"Iris-versicolor"], + [5.5,2.4,3.7,1,"Iris-versicolor"], + [5.8,2.7,3.9,1.2,"Iris-versicolor"], + [6,2.7,5.1,1.6,"Iris-versicolor"], + [5.4,3,4.5,1.5,"Iris-versicolor"], + [6,3.4,4.5,1.6,"Iris-versicolor"], + [6.7,3.1,4.7,1.5,"Iris-versicolor"], + [6.3,2.3,4.4,1.3,"Iris-versicolor"], + [5.6,3,4.1,1.3,"Iris-versicolor"], + [5.5,2.5,4,1.3,"Iris-versicolor"], + [5.5,2.6,4.4,1.2,"Iris-versicolor"], + [6.1,3,4.6,1.4,"Iris-versicolor"], + [5.8,2.6,4,1.2,"Iris-versicolor"], + [5,2.3,3.3,1,"Iris-versicolor"], + [5.6,2.7,4.2,1.3,"Iris-versicolor"], + [5.7,3,4.2,1.2,"Iris-versicolor"], + [5.7,2.9,4.2,1.3,"Iris-versicolor"], + [6.2,2.9,4.3,1.3,"Iris-versicolor"], + [5.1,2.5,3,1.1,"Iris-versicolor"], + [5.7,2.8,4.1,1.3,"Iris-versicolor"], + [7.2,3.2,6,1.8,"Iris-virginica"], + [6.2,2.8,4.8,1.8,"Iris-virginica"], + [6.1,3,4.9,1.8,"Iris-virginica"], + [6.4,2.8,5.6,2.1,"Iris-virginica"], + [7.2,3,5.8,1.6,"Iris-virginica"], + [7.4,2.8,6.1,1.9,"Iris-virginica"], + [7.9,3.8,6.4,2,"Iris-virginica"], + [6.4,2.8,5.6,2.2,"Iris-virginica"], + [6.3,2.8,5.1,1.5,"Iris-virginica"], + [6.1,2.6,5.6,1.4,"Iris-virginica"], + [7.7,3,6.1,2.3,"Iris-virginica"], + [6.3,3.4,5.6,2.4,"Iris-virginica"], + [6.4,3.1,5.5,1.8,"Iris-virginica"], + [6,3,4.8,1.8,"Iris-virginica"], + [6.9,3.1,5.4,2.1,"Iris-virginica"], + [6.7,3.1,5.6,2.4,"Iris-virginica"], + [6.9,3.1,5.1,2.3,"Iris-virginica"], + [5.8,2.7,5.1,1.9,"Iris-virginica"], + [6.8,3.2,5.9,2.3,"Iris-virginica"], + [6.7,3.3,5.7,2.5,"Iris-virginica"], + [6.7,3,5.2,2.3,"Iris-virginica"], + [6.3,2.5,5,1.9,"Iris-virginica"], + [6.5,3,5.2,2,"Iris-virginica"], + [6.2,3.4,5.4,2.3,"Iris-virginica"], + [5.9,3,5.1,1.8,"Iris-virginica"] + ] +} diff --git a/bob/ip/binseg/test/test_csv.py b/bob/ip/binseg/test/test_csv.py index 482a0fc81a0d2dd10bab85183b51ce744b812ba9..1d8e22fcd2075cf0ce8bc58cc884cf506a79010f 100644 --- a/bob/ip/binseg/test/test_csv.py +++ b/bob/ip/binseg/test/test_csv.py @@ -1,75 +1,96 @@ #!/usr/bin/env python # coding=utf-8 -"""Unit tests for the CSV dataset""" - -import io +"""Test code for datasets""" +import os +import pkg_resources import nose.tools -from ..data.dataset import CSVDataset -from ..data import stare - -## special trick for CI builds -from . import mock_dataset, TESTDB_TMPDIR - -datadir, json_dataset, rc_variable_set = mock_dataset() - - -## definition of stare subsets for "default" protocol -default = { - "train": io.StringIO( - """\ -stare-images/im0001.ppm,labels-ah/im0001.ah.ppm -stare-images/im0002.ppm,labels-ah/im0002.ah.ppm -stare-images/im0003.ppm,labels-ah/im0003.ah.ppm -stare-images/im0004.ppm,labels-ah/im0004.ah.ppm -stare-images/im0005.ppm,labels-ah/im0005.ah.ppm -stare-images/im0044.ppm,labels-ah/im0044.ah.ppm -stare-images/im0077.ppm,labels-ah/im0077.ah.ppm -stare-images/im0081.ppm,labels-ah/im0081.ah.ppm -stare-images/im0082.ppm,labels-ah/im0082.ah.ppm -stare-images/im0139.ppm,labels-ah/im0139.ah.ppm""" - ), - "test": io.StringIO( - """\ -stare-images/im0162.ppm,labels-ah/im0162.ah.ppm -stare-images/im0163.ppm,labels-ah/im0163.ah.ppm -stare-images/im0235.ppm,labels-ah/im0235.ah.ppm -stare-images/im0236.ppm,labels-ah/im0236.ah.ppm -stare-images/im0239.ppm,labels-ah/im0239.ah.ppm -stare-images/im0240.ppm,labels-ah/im0240.ah.ppm -stare-images/im0255.ppm,labels-ah/im0255.ah.ppm -stare-images/im0291.ppm,labels-ah/im0291.ah.ppm -stare-images/im0319.ppm,labels-ah/im0319.ah.ppm -stare-images/im0324.ppm,labels-ah/im0324.ah.ppm""" - ), -} - - -@rc_variable_set("bob.ip.binseg.stare.datadir") -def test_compare_to_json(): - - test_dataset = CSVDataset( - default, - stare._fieldnames, - stare._make_loader(datadir), - stare.data_path_keymaker, - ) - - for subset in ("train", "test"): - for t1, t2 in zip( - test_dataset.samples(subset), - json_dataset.subsets("ah")[subset], - ): - nose.tools.eq_(t1.key, t2.key) - nose.tools.eq_(t1.data, t2.data) - - subsets = test_dataset.subsets() - for subset in subsets.keys(): - for t1, t2 in zip( - subsets[subset], - json_dataset.subsets("ah")[subset], - ): - nose.tools.eq_(t1.key, t2.key) - nose.tools.eq_(t1.data, t2.data) +from ..data.dataset import CSVDataset, JSONDataset +from ..data.sample import Sample + + +def _data_file(f): + return pkg_resources.resource_filename(__name__, os.path.join("data", f)) + + +def _raw_data_loader(context, d): + return Sample( + data=[ + float(d["sepal_length"]), + float(d["sepal_width"]), + float(d["petal_length"]), + float(d["petal_width"]), + d["species"][5:], + ], + key=(context["subset"] + str(context["order"])) + ) + + +def test_csv_loading(): + + # tests if we can build a simple CSV loader for the Iris Flower dataset + subsets = { + "train": _data_file("iris-train.csv"), + "test": _data_file("iris-train.csv") + } + + fieldnames = ( + "sepal_length", + "sepal_width", + "petal_length", + "petal_width", + "species", + ) + + dataset = CSVDataset(subsets, fieldnames, _raw_data_loader) + + data = dataset.subsets() + + nose.tools.eq_(len(data["train"]), 75) + for k in data["train"]: + for f in range(4): + nose.tools.eq_(type(k.data[f]), float) + nose.tools.eq_(type(k.data[4]), str) + nose.tools.eq_(type(k.key), str) + + nose.tools.eq_(len(data["test"]), 75) + for k in data["test"]: + for f in range(4): + nose.tools.eq_(type(k.data[f]), float) + nose.tools.eq_(type(k.data[4]), str) + assert k.data[4] in ("setosa", "versicolor", "virginica") + nose.tools.eq_(type(k.key), str) + + +def test_json_loading(): + + # tests if we can build a simple JSON loader for the Iris Flower dataset + protocols = {"default": _data_file("iris.json")} + + fieldnames = ( + "sepal_length", + "sepal_width", + "petal_length", + "petal_width", + "species", + ) + + dataset = JSONDataset(protocols, fieldnames, _raw_data_loader) + + data = dataset.subsets("default") + + nose.tools.eq_(len(data["train"]), 75) + for k in data["train"]: + for f in range(4): + nose.tools.eq_(type(k.data[f]), float) + nose.tools.eq_(type(k.data[4]), str) + nose.tools.eq_(type(k.key), str) + + nose.tools.eq_(len(data["test"]), 75) + for k in data["test"]: + for f in range(4): + nose.tools.eq_(type(k.data[f]), float) + nose.tools.eq_(type(k.data[4]), str) + nose.tools.eq_(type(k.key), str) diff --git a/bob/ip/binseg/test/test_transforms.py b/bob/ip/binseg/test/test_transforms.py index 698c0a0e17fbfc6c6873a5c8c9c3146aef54c244..e71a4a4927f1598a49037dc09a672de91845dd11 100644 --- a/bob/ip/binseg/test/test_transforms.py +++ b/bob/ip/binseg/test/test_transforms.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import os import random import nose.tools @@ -348,7 +349,8 @@ def test_compose(): def test_16bit_autolevel(): - path = pkg_resources.resource_filename(__name__, "testimg-16bit.png") + path = pkg_resources.resource_filename(__name__, os.path.join("data", + "img-16bit.png")) # the way to load a 16-bit PNG image correctly, according to: # https://stackoverflow.com/questions/32622658/read-16-bit-png-image-file-using-python # https://github.com/python-pillow/Pillow/issues/3011