diff --git a/bob/ip/binseg/configs/datasets/__init__.py b/bob/ip/binseg/configs/datasets/__init__.py index 436796ffb94e196dc4cfe3e4af2164f0c6fca3c2..cd6d9b34db1ac30aff072a947465a016d413b37f 100644 --- a/bob/ip/binseg/configs/datasets/__init__.py +++ b/bob/ip/binseg/configs/datasets/__init__.py @@ -20,9 +20,17 @@ RANDOM_FLIP_JITTER = [_hflip(), _vflip(), _jitter()] """Shared data augmentation transforms without random rotation""" -def make_subset(l, transforms, prefixes, suffixes): +def make_subset(l, transforms, prefixes=[], suffixes=[]): """Creates a new data set, applying transforms + .. note:: + + This is a convenience function for our own dataset definitions inside + this module, guaranteeting homogenity between dataset definitions + provided in this package. It assumes certain strategies for data + augmentation that may not be translatable to other applications. + + Parameters ---------- @@ -44,24 +52,31 @@ def make_subset(l, transforms, prefixes, suffixes): Returns ------- - subset : :py:class:`torch.utils.data.Dataset` + subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset` A pre-formatted dataset that can be fed to one of our engines """ - from ...data.utils import SampleList2TorchDataset as wrapper + from ...data.utils import SampleListDataset as wrapper - return wrapper(l, transforms, prefixes, suffixes) + return wrapper(l, prefixes + transforms + suffixes) def make_trainset(l, transforms, rotation_before=False): - """Creates a new training set, with data augmentation + """Creates a new training set, **with data augmentation** Typically, the transforms are chained to a default set of data augmentation operations (random rotation, horizontal and vertical flips, and color jitter), but flag allows prefixing the rotation specially (useful for some COVD training sets). + .. note:: + + This is a convenience function for our own dataset definitions inside + this module, guaranteeting homogenity between dataset definitions + provided in this package. It assumes certain strategies for data + augmentation that may not be translatable to other applications. + Parameters ---------- @@ -76,7 +91,7 @@ def make_trainset(l, transforms, rotation_before=False): Returns ------- - subset : :py:class:`torch.utils.data.Dataset` + subset : :py:class:`bob.ip.binseg.data.utils.SampleListDataset` A pre-formatted dataset that can be fed to one of our engines """ @@ -91,8 +106,7 @@ def make_trainset(l, transforms, rotation_before=False): return make_subset( l, - transforms, - prefixes=[], + transforms=transforms, suffixes=(RANDOM_ROTATION + RANDOM_FLIP_JITTER), ) @@ -101,13 +115,32 @@ def make_dataset(subsets, transforms): """Creates a new configuration dataset from dictionary and transforms This function takes as input a dictionary as those that can be returned by - :py:meth:`bob.ip.binseg.data.dataset.JSONDataset.subsets`, mapping protocol + :py:meth:`bob.ip.binseg.data.dataset.JSONDataset.subsets`, or + :py:meth:`bob.ip.binseg.data.dataset.CSVDataset.subsets`, mapping protocol names (such as ``train``, ``dev`` and ``test``) to :py:class:`bob.ip.binseg.data.sample.DelayedSample` lists, and a set of transforms, and returns a dictionary applying - :py:class:`bob.ip.binseg.data.utils.SampleList2TorchDataset` to these + :py:class:`bob.ip.binseg.data.utils.SampleListDataset` to these lists, and our standard data augmentation if a ``train`` set exists. + For example, if ``subsets`` is composed of two sets named ``train`` and + ``test``, this function will yield a dictionary with the following entries: + + * ``__train__``: Wraps the ``train`` subset, includes data augmentation + (note: datasets with names starting with ``_`` (underscore) are excluded + from prediction and evaluation by default, as they contain data + augmentation transformations.) + * ``train``: Wraps the ``train`` subset, **without** data augmentation + * ``train``: Wraps the ``test`` subset, **without** data augmentation + + .. note:: + + This is a convenience function for our own dataset definitions inside + this module, guaranteeting homogenity between dataset definitions + provided in this package. It assumes certain strategies for data + augmentation that may not be translatable to other applications. + + Parameters ---------- @@ -126,20 +159,18 @@ def make_dataset(subsets, transforms): dataset : dict A pre-formatted dataset that can be fed to one of our engines. It maps - string names to :py:class:`torch.utils.data.Dataset`'s. + string names to + :py:class:`bob.ip.binseg.data.utils.SampleListDataset`'s. """ retval = {} for key in subsets.keys(): + retval[key] = make_subset(subsets[key], transforms=transforms) if key == "train": - retval[key] = make_trainset( + retval["__train__"] = make_trainset( subsets[key], transforms=transforms, rotation_before=False ) - else: - retval[key] = make_subset( - subsets[key], transforms=transforms, prefixes=[], suffixes=[] - ) return retval diff --git a/bob/ip/binseg/configs/datasets/chasedb1/covd.py b/bob/ip/binseg/configs/datasets/chasedb1/covd.py index e9cd82e20f6d2faa2189833ca34b7105e716cf83..ed8c37aff14036c88fc5949820efb733e802e4ae 100644 --- a/bob/ip/binseg/configs/datasets/chasedb1/covd.py +++ b/bob/ip/binseg/configs/datasets/chasedb1/covd.py @@ -20,6 +20,7 @@ from bob.ip.binseg.data.transforms import CenterCrop, Pad, Resize from bob.ip.binseg.configs.datasets import make_trainset as _maker from bob.ip.binseg.data.drive import dataset as _raw_drive + _drive = _maker( _raw_drive.subsets("default")["train"], [CenterCrop((544, 544)), Resize(960)], @@ -27,6 +28,7 @@ _drive = _maker( ) from bob.ip.binseg.data.stare import dataset as _raw_stare + # n.b.: not the best fit, but what was there for Tim's work _stare = _maker( _raw_stare.subsets("ah")["train"], @@ -35,20 +37,21 @@ _stare = _maker( ) from bob.ip.binseg.data.hrf import dataset as _raw_hrf + _hrf = _maker( _raw_hrf.subsets("default")["train"], [Pad((0, 584, 0, 584)), Resize(960)], ) from bob.ip.binseg.data.iostar import dataset as _raw_iostar + # n.b.: not the best fit, but what was there for Tim's work _iostar = _maker(_raw_iostar.subsets("vessel")["train"], [Resize(960)]) from torch.utils.data import ConcatDataset from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( - dataset as _baselines, + dataset as _baseline, ) -dataset = { - "train": ConcatDataset([_drive, _stare, _hrf, _iostar]), - "test": _baselines["test"], # use the same test set always -} +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_baseline) +dataset["__train__"] = ConcatDataset([_drive, _stare, _hrf, _iostar]) diff --git a/bob/ip/binseg/configs/datasets/chasedb1/ssl.py b/bob/ip/binseg/configs/datasets/chasedb1/ssl.py index d65408e7520a667c41a025b90593eac51ebd4d7a..8bd97ddad2e8a468ec78d20c99b79240deec4f11 100644 --- a/bob/ip/binseg/configs/datasets/chasedb1/ssl.py +++ b/bob/ip/binseg/configs/datasets/chasedb1/ssl.py @@ -19,13 +19,12 @@ For details on datasets, consult: * :py:mod:`bob.ip.binseg.data.hrf` """ -from bob.ip.binseg.configs.datasets.chasedb1.covd import dataset as _labelled +from bob.ip.binseg.configs.datasets.chasedb1.covd import dataset as _covd from bob.ip.binseg.configs.datasets.chasedb1.first_annotator import ( - dataset as _baselines, + dataset as _baseline, ) from bob.ip.binseg.data.utils import SSLDataset -dataset = { - "train": SSLDataset(_labelled["train"], _baselines["train"]), - "test": _baselines["test"], # use always the same test set -} +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_covd) +dataset["__train__"] = SSLDataset(_covd["__train__"], _baseline["__train__"]) diff --git a/bob/ip/binseg/configs/datasets/drive/covd.py b/bob/ip/binseg/configs/datasets/drive/covd.py index 20b4c45718bbd5efa623de13e31d52f43abd10d1..494ca9a9960d82fb3508d6a31238ef7810b58e60 100644 --- a/bob/ip/binseg/configs/datasets/drive/covd.py +++ b/bob/ip/binseg/configs/datasets/drive/covd.py @@ -20,34 +20,33 @@ from bob.ip.binseg.data.transforms import Resize, Pad, Crop from bob.ip.binseg.configs.datasets import make_trainset as _maker from bob.ip.binseg.data.stare import dataset as _raw_stare + _stare = _maker( - _raw_stare.subsets("ah")["train"], - [Resize(471), Pad((0, 37, 0, 36))], - rotation_before=True, - ) + _raw_stare.subsets("ah")["train"], + [Resize(471), Pad((0, 37, 0, 36))], + rotation_before=True, +) from bob.ip.binseg.data.chasedb1 import dataset as _raw_chase + _chase = _maker( - _raw_chase.subsets("first-annotator")["train"], - [Resize(544), Crop(0, 12, 544, 544)], - ) + _raw_chase.subsets("first-annotator")["train"], + [Resize(544), Crop(0, 12, 544, 544)], +) from bob.ip.binseg.data.iostar import dataset as _raw_iostar -_iostar = _maker( - _raw_iostar.subsets("vessel")["train"], - [Resize(544)], - ) + +_iostar = _maker(_raw_iostar.subsets("vessel")["train"], [Resize(544)],) from bob.ip.binseg.data.hrf import dataset as _raw_hrf + _hrf = _maker( - _raw_hrf.subsets("default")["train"], - [Resize((363)), Pad((0, 90, 0, 91))], - ) + _raw_hrf.subsets("default")["train"], [Resize((363)), Pad((0, 90, 0, 91))], +) from torch.utils.data import ConcatDataset -from bob.ip.binseg.configs.datasets.drive.default import dataset as _baselines +from bob.ip.binseg.configs.datasets.drive.default import dataset as _baseline -dataset = { - "train": ConcatDataset([_stare, _chase, _iostar, _hrf]), - "test": _baselines["test"], #use the same test set always - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_baseline) +dataset["__train__"] = ConcatDataset([_stare, _chase, _iostar, _hrf]) diff --git a/bob/ip/binseg/configs/datasets/drive/ssl.py b/bob/ip/binseg/configs/datasets/drive/ssl.py index edb76e6ae26baffaedd0ce46cba81880c2952c20..23af544342f1a48a8e83f87d9041d638f58ed6cf 100644 --- a/bob/ip/binseg/configs/datasets/drive/ssl.py +++ b/bob/ip/binseg/configs/datasets/drive/ssl.py @@ -18,11 +18,10 @@ For details on datasets, consult: * :py:mod:`bob.ip.binseg.data.hrf` """ -from bob.ip.binseg.configs.datasets.drive.covd import dataset as _labelled -from bob.ip.binseg.configs.datasets.drive.default import dataset as _baselines +from bob.ip.binseg.configs.datasets.drive.covd import dataset as _covd +from bob.ip.binseg.configs.datasets.drive.default import dataset as _baseline from bob.ip.binseg.data.utils import SSLDataset -dataset = { - "train": SSLDataset(_labelled["train"], _baselines["train"]), - "test": _baselines["test"], #use always the same test set - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_covd) +dataset["__train__"] = SSLDataset(_covd["__train__"], _baseline["__train__"]) diff --git a/bob/ip/binseg/configs/datasets/hrf/covd.py b/bob/ip/binseg/configs/datasets/hrf/covd.py index 06610544b85216ec4a49e362a03b38edd1072b43..792a005b6c08fee3b0dbab1cea07e1379c7cf056 100644 --- a/bob/ip/binseg/configs/datasets/hrf/covd.py +++ b/bob/ip/binseg/configs/datasets/hrf/covd.py @@ -20,37 +20,40 @@ from bob.ip.binseg.data.transforms import Crop, Pad, Resize from bob.ip.binseg.configs.datasets import make_trainset as _maker from bob.ip.binseg.data.drive import dataset as _raw_drive + _drive = _maker( - _raw_drive.subsets("default")["train"], - [Crop(75, 10, 416, 544), Pad((21, 0, 22, 0)), Resize(1168)], - rotation_before=True, - ) + _raw_drive.subsets("default")["train"], + [Crop(75, 10, 416, 544), Pad((21, 0, 22, 0)), Resize(1168)], + rotation_before=True, +) from bob.ip.binseg.data.stare import dataset as _raw_stare + _stare = _maker( - _raw_stare.subsets("ah")["train"], - [Crop(50, 0, 500, 705), Resize(1168), Pad((1, 0, 1, 0))], - rotation_before=True, - ) + _raw_stare.subsets("ah")["train"], + [Crop(50, 0, 500, 705), Resize(1168), Pad((1, 0, 1, 0))], + rotation_before=True, +) from bob.ip.binseg.data.chasedb1 import dataset as _raw_chase + _chase = _maker( - _raw_chase.subsets("first-annotator")["train"], - [Crop(140, 18, 680, 960), Resize(1168)], - rotation_before=True, - ) + _raw_chase.subsets("first-annotator")["train"], + [Crop(140, 18, 680, 960), Resize(1168)], + rotation_before=True, +) from bob.ip.binseg.data.iostar import dataset as _raw_iostar + _iostar = _maker( - _raw_iostar.subsets("vessel")["train"], - [Crop(144, 0, 768, 1024), Pad((30, 0, 30, 0)), Resize(1168)], - rotation_before=True, - ) + _raw_iostar.subsets("vessel")["train"], + [Crop(144, 0, 768, 1024), Pad((30, 0, 30, 0)), Resize(1168)], + rotation_before=True, +) from torch.utils.data import ConcatDataset -from bob.ip.binseg.configs.datasets.hrf.default import dataset as _baselines +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _baseline -dataset = { - "train": ConcatDataset([_drive, _stare, _chase, _iostar]), - "test": _baselines["test"], #use the same test set always - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_baseline) +dataset["__train__"] = ConcatDataset([_drive, _stare, _chase, _iostar]) diff --git a/bob/ip/binseg/configs/datasets/hrf/ssl.py b/bob/ip/binseg/configs/datasets/hrf/ssl.py index dea93d248109d355e1025e3195b6e16a83e314fb..7f6f369e2510bb39acee083c2caa3ffa74855fa6 100644 --- a/bob/ip/binseg/configs/datasets/hrf/ssl.py +++ b/bob/ip/binseg/configs/datasets/hrf/ssl.py @@ -18,11 +18,10 @@ For details on datasets, consult: * :py:mod:`bob.ip.binseg.data.hrf` """ -from bob.ip.binseg.configs.datasets.hrf.covd import dataset as _labelled -from bob.ip.binseg.configs.datasets.hrf.default import dataset as _baselines +from bob.ip.binseg.configs.datasets.hrf.covd import dataset as _covd +from bob.ip.binseg.configs.datasets.hrf.default import dataset as _baseline from bob.ip.binseg.data.utils import SSLDataset -dataset = { - "train": SSLDataset(_labelled["train"], _baselines["train"]), - "test": _baselines["test"], # use always the same test set -} +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_covd) +dataset["__train__"] = SSLDataset(_covd["__train__"], _baseline["__train__"]) diff --git a/bob/ip/binseg/configs/datasets/iostar/covd.py b/bob/ip/binseg/configs/datasets/iostar/covd.py index e2a1b90b9b773d4ae0576698736bc3a4f351fcd6..e2f054feaa64222cce354265969f9e7937638be0 100644 --- a/bob/ip/binseg/configs/datasets/iostar/covd.py +++ b/bob/ip/binseg/configs/datasets/iostar/covd.py @@ -20,36 +20,38 @@ from bob.ip.binseg.data.transforms import CenterCrop, Crop, Pad, Resize from bob.ip.binseg.configs.datasets import make_trainset as _maker from bob.ip.binseg.data.drive import dataset as _raw_drive + _drive = _maker( - _raw_drive.subsets("default")["train"], - [CenterCrop((540, 540)), Resize(1024)], - rotation_before=True, - ) + _raw_drive.subsets("default")["train"], + [CenterCrop((540, 540)), Resize(1024)], + rotation_before=True, +) from bob.ip.binseg.data.stare import dataset as _raw_stare + _stare = _maker( - _raw_stare.subsets("ah")["train"], - [Pad((0, 32, 0, 32)), Resize(1024), CenterCrop(1024)], - rotation_before=True, - ) + _raw_stare.subsets("ah")["train"], + [Pad((0, 32, 0, 32)), Resize(1024), CenterCrop(1024)], + rotation_before=True, +) from bob.ip.binseg.data.hrf import dataset as _raw_hrf + _hrf = _maker( - _raw_hrf.subsets("default")["train"], - [Pad((0, 584, 0, 584)), Resize(1024)], - ) + _raw_hrf.subsets("default")["train"], [Pad((0, 584, 0, 584)), Resize(1024)], +) from bob.ip.binseg.data.chasedb1 import dataset as _raw_chase + _chase = _maker( - _raw_chase.subsets("first-annotator")["train"], - [Crop(0, 18, 960, 960), Resize(1024)], - rotation_before=True, - ) + _raw_chase.subsets("first-annotator")["train"], + [Crop(0, 18, 960, 960), Resize(1024)], + rotation_before=True, +) from torch.utils.data import ConcatDataset -from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _baselines +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _baseline -dataset = { - "train": ConcatDataset([_drive, _stare, _hrf, _chase]), - "test": _baselines["test"], #use the same test set always - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_baseline) +dataset["__train__"] = ConcatDataset([_drive, _stare, _hrf, _chase]) diff --git a/bob/ip/binseg/configs/datasets/iostar/ssl.py b/bob/ip/binseg/configs/datasets/iostar/ssl.py index f8666c01c3bca498321d2758fd5aef65449a25f6..2635552ee87704cd8c370c56a22431f5faa6b151 100644 --- a/bob/ip/binseg/configs/datasets/iostar/ssl.py +++ b/bob/ip/binseg/configs/datasets/iostar/ssl.py @@ -18,11 +18,10 @@ For details on datasets, consult: * :py:mod:`bob.ip.binseg.data.iostar` """ -from bob.ip.binseg.configs.datasets.iostar.covd import dataset as _labelled -from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _baselines +from bob.ip.binseg.configs.datasets.iostar.covd import dataset as _covd +from bob.ip.binseg.configs.datasets.iostar.vessel import dataset as _baseline from bob.ip.binseg.data.utils import SSLDataset -dataset = { - "train": SSLDataset(_labelled["train"], _baselines["train"]), - "test": _baselines["test"], # use always the same test set -} +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_covd) +dataset["__train__"] = SSLDataset(_covd["__train__"], _baseline["__train__"]) diff --git a/bob/ip/binseg/configs/datasets/stare/covd.py b/bob/ip/binseg/configs/datasets/stare/covd.py index ffd402288e7b811247d4b5b74bd61133a9d7658b..0abbf93441a7f70036073841786ac06b61ca6528 100644 --- a/bob/ip/binseg/configs/datasets/stare/covd.py +++ b/bob/ip/binseg/configs/datasets/stare/covd.py @@ -20,36 +20,38 @@ from bob.ip.binseg.data.transforms import CenterCrop, Pad, Resize from bob.ip.binseg.configs.datasets import make_trainset as _maker from bob.ip.binseg.data.drive import dataset as _raw_drive + _drive = _maker( - _raw_drive.subsets("default")["train"], - [CenterCrop((470, 544)), Pad((10, 9, 10, 8)), Resize(608)], - rotation_before=True, - ) + _raw_drive.subsets("default")["train"], + [CenterCrop((470, 544)), Pad((10, 9, 10, 8)), Resize(608)], + rotation_before=True, +) from bob.ip.binseg.data.chasedb1 import dataset as _raw_chase + _chase = _maker( - _raw_chase.subsets("first-annotator")["train"], - [CenterCrop((829, 960)), Resize(608)], - rotation_before=True, - ) + _raw_chase.subsets("first-annotator")["train"], + [CenterCrop((829, 960)), Resize(608)], + rotation_before=True, +) from bob.ip.binseg.data.iostar import dataset as _raw_iostar + _iostar = _maker( - _raw_iostar.subsets("vessel")["train"], - # n.b.: not the best fit, but what was there for Tim's work - [Pad((81, 0, 81, 0)), Resize(608)], - ) + _raw_iostar.subsets("vessel")["train"], + # n.b.: not the best fit, but what was there for Tim's work + [Pad((81, 0, 81, 0)), Resize(608)], +) from bob.ip.binseg.data.hrf import dataset as _raw_hrf + _hrf = _maker( - _raw_hrf.subsets("default")["train"], - [Pad((0, 345, 0, 345)), Resize(608)], - ) + _raw_hrf.subsets("default")["train"], [Pad((0, 345, 0, 345)), Resize(608)], +) from torch.utils.data import ConcatDataset -from bob.ip.binseg.configs.datasets.stare.ah import dataset as _baselines +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _baseline -dataset = { - "train": ConcatDataset([_drive, _chase, _iostar, _hrf]), - "test": _baselines["test"], #use the same test set always - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_baseline) +dataset["__train__"] = ConcatDataset([_drive, _chase, _iostar, _hrf]) diff --git a/bob/ip/binseg/configs/datasets/stare/ssl.py b/bob/ip/binseg/configs/datasets/stare/ssl.py index e3047254ec12397eb83a2062bf4c7c401cc69ba0..10440efd9bb35db7b499c860772c272b01bcc1f6 100644 --- a/bob/ip/binseg/configs/datasets/stare/ssl.py +++ b/bob/ip/binseg/configs/datasets/stare/ssl.py @@ -18,11 +18,10 @@ For details on datasets, consult: * :py:mod:`bob.ip.binseg.data.hrf` """ -from bob.ip.binseg.configs.datasets.stare.covd import dataset as _labelled -from bob.ip.binseg.configs.datasets.stare.ah import dataset as _baselines +from bob.ip.binseg.configs.datasets.stare.covd import dataset as _covd +from bob.ip.binseg.configs.datasets.stare.ah import dataset as _baseline from bob.ip.binseg.data.utils import SSLDataset -dataset = { - "train": SSLDataset(_labelled["train"], _baselines["train"]), - "test": _baselines["test"], #use always the same test set - } +# copy dictionary and replace only the augmented train dataset +dataset = dict(**_covd) +dataset["__train__"] = SSLDataset(_covd["__train__"], _baseline["__train__"]) diff --git a/bob/ip/binseg/data/utils.py b/bob/ip/binseg/data/utils.py index 17332979a780978423dcadf0fbbb2cff2dfc7216..3d77e9c37e4888adcaa852ea31a7085648437140 100644 --- a/bob/ip/binseg/data/utils.py +++ b/bob/ip/binseg/data/utils.py @@ -116,7 +116,7 @@ def overlayed_image( return retval -class SampleList2TorchDataset(torch.utils.data.Dataset): +class SampleListDataset(torch.utils.data.Dataset): """PyTorch dataset wrapper around Sample lists A transform object can be passed that will be applied to the image, ground @@ -125,13 +125,6 @@ class SampleList2TorchDataset(torch.utils.data.Dataset): It supports indexing such that dataset[i] can be used to get ith sample. - Attributes - ---------- - - augmented : bool - Tells if this set has data augmentation prefixes or suffixes installed. - - Parameters ---------- @@ -143,28 +136,12 @@ class SampleList2TorchDataset(torch.utils.data.Dataset): ground-truth data. Notice a last transform (:py:class:`bob.ip.binseg.data.transforms.ToTensor`) is always applied. - prefixes : :py:class:`list`, Optional - a list of data augmentation transformations to be applied to **both** - image and ground-truth data and **before** ``transforms`` above. - Notice that transforms like - :py:class:`bob.ip.binseg.data.transforms.ColorJitter` are only applied - to the input image. - - suffixes : :py:class:`list`, Optional - a list of data augmentation transformations to be applied to **both** - image and ground-truth data and **after** ``transforms`` above. - Notice that transforms like - :py:class:`bob.ip.binseg.data.transforms.ColorJitter` are only applied - to the input image. - """ - def __init__(self, samples, transforms=[], prefixes=[], suffixes=[]): + def __init__(self, samples, transforms=[]): self._samples = samples - self._middle = transforms - self._transforms = Compose(prefixes + transforms + suffixes + [ToTensor()]) - self.augmented = bool(prefixes or suffixes) + self._transforms = Compose(transforms + [ToTensor()]) def __len__(self): """ @@ -178,18 +155,6 @@ class SampleList2TorchDataset(torch.utils.data.Dataset): """ return len(self._samples) - @contextlib.contextmanager - def not_augmented(self): - """Context to avoid data augmentation to be applied to self""" - - backup = (self.augmented, self._transforms) - self.augmented = False - self._transforms = Compose(self._middle + [ToTensor()]) - try: - yield self - finally: - self.augmented, self._transforms = backup - def __getitem__(self, key): """ diff --git a/bob/ip/binseg/engine/ssltrainer.py b/bob/ip/binseg/engine/ssltrainer.py index d7310ed057de296eadaffd6833891dd296290257..2448782cc6b1b00965a4974af13f58e36a0dd0fc 100644 --- a/bob/ip/binseg/engine/ssltrainer.py +++ b/bob/ip/binseg/engine/ssltrainer.py @@ -335,7 +335,6 @@ def run( logwriter.writerow(dict(k for k in logdata)) logger.info("|".join([f"{k}: {v}" for (k, v) in logdata])) - logger.info("End of training") total_training_time = time.time() - start_training_time logger.info( f"Total training time: {datetime.timedelta(seconds=total_training_time)} ({(total_training_time/max_epoch):.4f}s in average per epoch)" diff --git a/bob/ip/binseg/engine/trainer.py b/bob/ip/binseg/engine/trainer.py index dee2d6287dad0f4481b69ede9b25f0dfd52236b1..783d5dcb1643301fbcc862f7a5d3fcf4d1107f2f 100644 --- a/bob/ip/binseg/engine/trainer.py +++ b/bob/ip/binseg/engine/trainer.py @@ -176,7 +176,6 @@ def run( logwriter.writerow(dict(k for k in logdata)) logger.info("|".join([f"{k}: {v}" for (k, v) in logdata])) - logger.info("End of training") total_training_time = time.time() - start_training_time logger.info( f"Total training time: {datetime.timedelta(seconds=total_training_time)} ({(total_training_time/max_epoch):.4f}s in average per epoch)" diff --git a/bob/ip/binseg/script/evaluate.py b/bob/ip/binseg/script/evaluate.py index 8c8e575fcb65e85ab588c7a622bf7f03d13ec360..5a27eaee91d28ae4e23ddea28e4ea06e79e2ff92 100644 --- a/bob/ip/binseg/script/evaluate.py +++ b/bob/ip/binseg/script/evaluate.py @@ -59,13 +59,11 @@ logger = logging.getLogger(__name__) @click.option( "--dataset", "-d", - help="A bob.ip.binseg.data.utils.SampleList2TorchDataset instance " - "implementing a dataset to be used for evaluation purposes, possibly " - "including all pre-processing pipelines required or, optionally, a " - "dictionary mapping string keys to " - "bob.ip.binseg.data.utils.SampleList2TorchDataset's. In such a case, " - "all datasets will be used for evaluation. Data augmentation " - "operations are excluded automatically in this case", + help="A torch.utils.data.dataset.Dataset instance implementing a dataset " + "to be used for evaluation purposes, possibly including all pre-processing " + "pipelines required or, optionally, a dictionary mapping string keys to " + "torch.utils.data.dataset.Dataset instances. All keys that do not start " + "with an underscore (_) will be processed.", required=True, cls=ResourceOption, ) @@ -74,7 +72,8 @@ logger = logging.getLogger(__name__) "-S", help="A dataset or dictionary, like in --dataset, with the same " "sample keys, but with annotations from a different annotator that is " - "going to be compared to the one in --dataset", + "going to be compared to the one in --dataset. The same rules regarding " + "dataset naming conventions apply", required=False, default=None, cls=ResourceOption, @@ -145,6 +144,9 @@ def evaluate( } else: for k, v in dataset.items(): + if k.startswith("_"): + logger.info(f"Skipping dataset '{k}' (not to be evaluated)") + continue config[k] = { "dataset": v, "output_folder": os.path.join(output_folder, k), @@ -155,16 +157,17 @@ def evaluate( } for k, v in config.items(): - with v["dataset"].not_augmented() as d: - run( - d, - predictions_folder, - v["output_folder"], - overlayed, - overlay_threshold, + run( + v["dataset"], + predictions_folder, + v["output_folder"], + overlayed, + overlay_threshold, + ) + if v["second_annotator"] is not None: + compare_annotators( + v["dataset"], + v["second_annotator"], + v["second_annotator_folder"], + os.path.join(overlayed, "second-annotator"), ) - if v["second_annotator"] is not None: - with v["second_annotator"].not_augmented() as d2: - compare_annotators( - d, d2, v["second_annotator_folder"], overlayed - ) diff --git a/bob/ip/binseg/script/experiment.py b/bob/ip/binseg/script/experiment.py index 6882d5b931bf69399d48bc83956ce78b459c4de0..a4c74d4594095d19d538bf6e916e3e879b2b139a 100644 --- a/bob/ip/binseg/script/experiment.py +++ b/bob/ip/binseg/script/experiment.py @@ -16,6 +16,38 @@ import logging logger = logging.getLogger(__name__) +def _save_sh_command(destfile): + """Records command-line to reproduce this experiment""" + + import sys + import time + import pkg_resources + + dirname = os.path.dirname(destfile) + + if not os.path.exists(dirname): + os.makedirs(dirname) + + logger.info(f"Writing command-line for reproduction at '{destfile}'...") + + with open(destfile, "wt") as f: + f.write("#!/usr/bin/env sh\n") + f.write(f"# date: {time.asctime()}\n") + version = pkg_resources.require('bob.ip.binseg')[0].version + f.write(f"# version: {version} (bob.ip.binseg)\n") + f.write(f"# platform: {sys.platform}\n") + f.write("\n") + args = [] + for k in sys.argv: + if " " in k: args.append(f'"{k}"') + else: args.append(k) + if os.environ.get('CONDA_DEFAULT_ENV') is not None: + f.write(f"#conda activate {os.environ['CONDA_DEFAULT_ENV']}\n") + f.write(f"#cd {os.path.realpath(os.curdir)}\n") + f.write(" ".join(args) + "\n") + os.chmod(destfile, 0o755) + + @click.command( entry_point_group="bob.ip.binseg.config", cls=ConfigCommand, @@ -248,12 +280,15 @@ def experiment( """ + _save_sh_command(os.path.join(output_folder, "command.sh")) + ## Training logger.info("Started training") from .train import train train_output_folder = os.path.join(output_folder, "model") + ctx.invoke( train, model=model, @@ -283,7 +318,7 @@ def experiment( model_file = os.path.join(train_output_folder, "model_final.pth") predictions_folder = os.path.join(output_folder, "predictions") overlayed_folder = ( - os.path.join(output_folder, "overlayed", "probabilities") + os.path.join(output_folder, "overlayed", "predictions") if overlayed else None ) @@ -336,9 +371,15 @@ def experiment( systems = [] for k, v in dataset.items(): + if k.startswith("_"): + logger.info(f"Skipping dataset '{k}' (not to be compared)") + continue systems += [k, os.path.join(analysis_folder, k, "metrics.csv")] if second_annotator is not None: for k, v in second_annotator.items(): + if k.startswith("_"): + logger.info(f"Skipping dataset '{k}' (not to be compared)") + continue systems += [f"{k} (2nd. annot.)", os.path.join(second_annotator_folder, k, "metrics.csv")] output_pdf = os.path.join(output_folder, "comparison.pdf") diff --git a/bob/ip/binseg/script/predict.py b/bob/ip/binseg/script/predict.py index 41419ece1c3a32f41b46310e65d31b3f58ad8f5e..bf988ec4a61f8c62cb73d88d347fabd3e7608c54 100644 --- a/bob/ip/binseg/script/predict.py +++ b/bob/ip/binseg/script/predict.py @@ -61,13 +61,11 @@ logger = logging.getLogger(__name__) @click.option( "--dataset", "-d", - help="A bob.ip.binseg.data.utils.SampleList2TorchDataset instance " - "implementing a dataset to be used for running prediction, possibly " - "including all pre-processing pipelines required or, optionally, a " - "dictionary mapping string keys to " - "bob.ip.binseg.data.utils.SampleList2TorchDataset's. In such a case, " - "all datasets will be used for running prediction. Data augmentation " - "operations are excluded automatically for prediction purposes", + help="A torch.utils.data.dataset.Dataset instance implementing a dataset " + "to be used for running prediction, possibly including all pre-processing " + "pipelines required or, optionally, a dictionary mapping string keys to " + "torch.utils.data.dataset.Dataset instances. All keys that do not start " + "with an underscore (_) will be processed.", required=True, cls=ResourceOption, ) @@ -129,11 +127,15 @@ def predict(output_folder, model, dataset, batch_size, device, weight, overlayed = overlayed.strip() for k,v in dataset.items(): - with v.not_augmented() as d: # we remove any data augmentation - data_loader = DataLoader( - dataset=d, - batch_size=batch_size, - shuffle=False, - pin_memory=torch.cuda.is_available(), - ) - run(model, data_loader, device, output_folder, overlayed) + + if k.startswith("_"): + logger.info(f"Skipping dataset '{k}' (not to be evaluated)") + continue + + data_loader = DataLoader( + dataset=v, + batch_size=batch_size, + shuffle=False, + pin_memory=torch.cuda.is_available(), + ) + run(model, data_loader, device, output_folder, overlayed) diff --git a/bob/ip/binseg/script/train.py b/bob/ip/binseg/script/train.py index 5df8ccfb9a6a5391761dcc490415f67931832a9f..3076aae4c9796fedb8ed009aea4ee6afb89edc85 100644 --- a/bob/ip/binseg/script/train.py +++ b/bob/ip/binseg/script/train.py @@ -66,10 +66,12 @@ logger = logging.getLogger(__name__) help="A torch.utils.data.dataset.Dataset instance implementing a dataset " "to be used for training the model, possibly including all pre-processing " "pipelines required or, optionally, a dictionary mapping string keys to " - "bob.ip.binseg.data.utils.SampleList2TorchDataset's. At least one key " - "named 'train' must be available. This dataset will be used for training " - "the network model. The dataset description include all required " - "pre-processing, including eventual data augmentation", + "torch.utils.data.dataset.Dataset instances. At least one key " + "named ``train`` must be available. This dataset will be used for " + "training the network model. The dataset description must include all " + "required pre-processing, including eventual data augmentation. If a " + "dataset named ``__train__`` is available, it is used prioritarily for " + "training instead of ``train``.", required=True, cls=ResourceOption, ) @@ -224,9 +226,17 @@ def train( torch.manual_seed(seed) + use_dataset = dataset + if isinstance(dataset, dict): + if "__train__" in dataset: + logger.info("Found (dedicated) '__train__' set for training") + use_dataset = dataset["__train__"] + else: + use_dataset = dataset["train"] + # PyTorch dataloader data_loader = DataLoader( - dataset=dataset["train"] if isinstance(dataset, dict) else dataset, + dataset=use_dataset, batch_size=batch_size, shuffle=True, drop_last=drop_incomplete_batch, diff --git a/bob/ip/binseg/test/test_cli.py b/bob/ip/binseg/test/test_cli.py index cbaeea2849a67dcf7160d071b0d965c2e2da1155..8979509553dca6bfa003a9c7246a92832e485c93 100644 --- a/bob/ip/binseg/test/test_cli.py +++ b/bob/ip/binseg/test/test_cli.py @@ -3,10 +3,14 @@ """Tests for our CLI applications""" +import os import re +import fnmatch import tempfile import contextlib +import nose.tools + from click.testing import CliRunner from . import mock_dataset @@ -36,7 +40,7 @@ def _assert_exit_0(result): assert ( result.exit_code == 0 - ), f"Exit code != 0 ({result.exit_code}); Output:\n{result.output}" + ), f"Exit code {result.exit_code} != 0 -- Output:\n{result.output}" def _check_help(entry_point): @@ -60,17 +64,18 @@ def test_experiment_help(): def _str_counter(substr, s): - return sum(1 for _ in re.finditer(r"\b%s\b" % re.escape(substr), s)) + return sum(1 for _ in re.finditer(r"%s" % re.escape(substr), s)) @rc_variable_set("bob.ip.binseg.stare.datadir") def test_experiment_stare(): + from ..script.experiment import experiment runner = CliRunner() - with runner.isolated_filesystem(), \ - stdout_logging() as buf, \ - tempfile.NamedTemporaryFile(mode="wt") as config: + with runner.isolated_filesystem(), stdout_logging() as buf, tempfile.NamedTemporaryFile( + mode="wt" + ) as config: # re-write STARE dataset configuration for test config.write("from bob.ip.binseg.data.stare import _make_dataset\n") @@ -82,16 +87,73 @@ def test_experiment_stare(): config.write("second_annotator = _maker('vk', _raw)\n") config.flush() + output_folder = "results" result = runner.invoke( experiment, - ["m2unet", config.name, "-vv", "--epochs=1", "--batch-size=1", - "--overlayed"], + [ + "m2unet", + config.name, + "-vv", + "--epochs=1", + "--batch-size=1", + "--overlayed", + f"--output-folder={output_folder}", + ], ) _assert_exit_0(result) + + # check command-line + assert os.path.exists(os.path.join(output_folder, "command.sh")) + + # check model was saved + train_folder = os.path.join(output_folder, "model") + assert os.path.exists(os.path.join(train_folder, "model_final.pth")) + assert os.path.exists(os.path.join(train_folder, "last_checkpoint")) + assert os.path.exists(os.path.join(train_folder, "trainlog.csv")) + + # check predictions are there + predict_folder = os.path.join(output_folder, "predictions") + assert os.path.exists(os.path.join(predict_folder, "model-info.txt")) + basedir = os.path.join(predict_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.hdf5")), 20) + + # check overlayed images are there (since we requested them) + overlay_folder = os.path.join(output_folder, "overlayed", "predictions") + basedir = os.path.join(overlay_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.png")), 20) + + # check evaluation outputs + eval_folder = os.path.join(output_folder, "analysis") + second_folder = os.path.join(eval_folder, "second-annotator") + assert os.path.exists(os.path.join(eval_folder, "train", "metrics.csv")) + assert os.path.exists(os.path.join(eval_folder, "test", "metrics.csv")) + assert os.path.exists(os.path.join(second_folder, "train", "metrics.csv")) + assert os.path.exists(os.path.join(second_folder, "test", "metrics.csv")) + + # check overlayed images are there (since we requested them) + overlay_folder = os.path.join(output_folder, "overlayed", "analysis") + basedir = os.path.join(overlay_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.png")), 20) + + # check overlayed images from first-to-second annotator comparisons are + # there (since we requested them) + overlay_folder = os.path.join(output_folder, "overlayed", "analysis", + "second-annotator") + basedir = os.path.join(overlay_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.png")), 20) + + # check outcomes of the comparison phase + assert os.path.exists(os.path.join(output_folder, "comparison.pdf")) + keywords = { # from different logging systems "Started training": 1, # logging + "Found (dedicated) '__train__' set for training": 1, # logging "epoch: 1|total-time": 1, # logging - "Saving checkpoint to results/model/model_final.pth": 1, # logging + "Saving checkpoint": 1, # logging "Ended training": 1, # logging "Started prediction": 1, # logging "Loading checkpoint from": 2, # logging @@ -103,7 +165,7 @@ def test_experiment_stare(): # "Saving results/overlayed/analysis": 1, #tqdm.write "Ended evaluation": 1, # logging "Started comparison": 1, # logging - "Loading metrics from results/analysis": 4, # logging + "Loading metrics from": 4, # logging "Ended comparison": 1, # logging } buf.seek(0) @@ -120,6 +182,231 @@ def test_experiment_stare(): ) +def _check_train(runner): + + from ..script.train import train + + with tempfile.NamedTemporaryFile( + mode="wt" + ) as config, stdout_logging() as buf: + + # single training set configuration + config.write("from bob.ip.binseg.data.stare import _make_dataset\n") + config.write(f"_raw = _make_dataset('{stare_datadir}')\n") + config.write( + "from bob.ip.binseg.configs.datasets.stare import _maker\n" + ) + config.write("dataset = _maker('ah', _raw)['train']\n") + config.flush() + + output_folder = "results" + result = runner.invoke( + train, + ["m2unet", config.name, "-vv", "--epochs=1", "--batch-size=1", + f"--output-folder={output_folder}"], + ) + _assert_exit_0(result) + + assert os.path.exists(os.path.join(output_folder, "model_final.pth")) + assert os.path.exists(os.path.join(output_folder, "last_checkpoint")) + assert os.path.exists(os.path.join(output_folder, "trainlog.csv")) + + keywords = { # from different logging systems + "Continuing from epoch 0": 1, # logging + "epoch: 1|total-time": 1, # logging + f"Saving checkpoint to {output_folder}/model_final.pth": 1, # logging + "Total training time:": 1, # logging + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + # if _str_counter(k, logging_output) != v: + # print(f"Count for string '{k}' appeared " \ + # f"({_str_counter(k, result.output)}) " \ + # f"instead of the expected {v}") + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) + + +def _check_predict(runner): + + from ..script.predict import predict + + with tempfile.NamedTemporaryFile( + mode="wt" + ) as config, stdout_logging() as buf: + + # single training set configuration + config.write("from bob.ip.binseg.data.stare import _make_dataset\n") + config.write(f"_raw = _make_dataset('{stare_datadir}')\n") + config.write( + "from bob.ip.binseg.configs.datasets.stare import _maker\n" + ) + config.write("dataset = _maker('ah', _raw)['test']\n") + config.flush() + + output_folder = "predictions" + overlay_folder = os.path.join("overlayed", "predictions") + result = runner.invoke( + predict, + [ + "m2unet", + config.name, + "-vv", + "--batch-size=1", + "--weight=results/model_final.pth", + f"--output-folder={output_folder}", + f"--overlayed={overlay_folder}", + ], + ) + _assert_exit_0(result) + + # check predictions are there + assert os.path.exists(os.path.join(output_folder, "model-info.txt")) + basedir = os.path.join(output_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.hdf5")), 10) + + # check overlayed images are there (since we requested them) + basedir = os.path.join(overlay_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.png")), 10) + + keywords = { # from different logging systems + "Loading checkpoint from": 1, # logging + "Total time:": 1, # logging + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + # if _str_counter(k, logging_output) != v: + # print(f"Count for string '{k}' appeared " \ + # f"({_str_counter(k, result.output)}) " \ + # f"instead of the expected {v}") + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) + + +def _check_evaluate(runner): + + from ..script.evaluate import evaluate + + with tempfile.NamedTemporaryFile( + mode="wt" + ) as config, stdout_logging() as buf: + + # single training set configuration + config.write("from bob.ip.binseg.data.stare import _make_dataset\n") + config.write(f"_raw = _make_dataset('{stare_datadir}')\n") + config.write( + "from bob.ip.binseg.configs.datasets.stare import _maker\n" + ) + config.write("dataset = _maker('ah', _raw)['test']\n") + config.write("second_annotator = _maker('vk', _raw)['test']\n") + config.flush() + + output_folder = "evaluations" + second_folder = "evaluations-2nd" + overlay_folder = os.path.join("overlayed", "analysis") + result = runner.invoke( + evaluate, + [ + config.name, + "-vv", + f"--output-folder={output_folder}", + "--predictions-folder=predictions", + f"--overlayed={overlay_folder}", + f"--second-annotator-folder={second_folder}", + ], + ) + _assert_exit_0(result) + + assert os.path.exists(os.path.join(output_folder, "metrics.csv")) + assert os.path.exists(os.path.join(second_folder, "metrics.csv")) + + # check overlayed images are there (since we requested them) + basedir = os.path.join(overlay_folder, "stare-images") + assert os.path.exists(basedir) + nose.tools.eq_(len(fnmatch.filter(os.listdir(basedir), "*.png")), 10) + + keywords = { # from different logging systems + "Skipping dataset '__train__'": 0, # logging + "Saving averages over all input images": 2, # logging + "Highest F1-score": 2, # logging + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + # if _str_counter(k, logging_output) != v: + # print(f"Count for string '{k}' appeared " \ + # f"({_str_counter(k, result.output)}) " \ + # f"instead of the expected {v}") + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) + + +def _check_compare(runner): + + from ..script.compare import compare + + with stdout_logging() as buf: + + output_folder = "evaluations" + second_folder = "evaluations-2nd" + result = runner.invoke( + compare, + [ + "-vv", + # label - path to metrics + "test", os.path.join(output_folder, "metrics.csv"), + "test (2nd. human)", os.path.join(second_folder, "metrics.csv"), + ], + ) + _assert_exit_0(result) + + assert os.path.exists("comparison.pdf") + + keywords = { # from different logging systems + "Loading metrics from": 2, # logging + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + # if _str_counter(k, logging_output) != v: + # print(f"Count for string '{k}' appeared " \ + # f"({_str_counter(k, result.output)}) " \ + # f"instead of the expected {v}") + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) + + +@rc_variable_set("bob.ip.binseg.stare.datadir") +def test_discrete_experiment_stare(): + + runner = CliRunner() + with runner.isolated_filesystem(): + _check_train(runner) + _check_predict(runner) + _check_evaluate(runner) + _check_compare(runner) + + def test_train_help(): from ..script.train import train diff --git a/bob/ip/binseg/test/test_config.py b/bob/ip/binseg/test/test_config.py index 84af4ed4dab532a088feaff95176eb26b7417d53..10d08e49e8fbb88fb024736842b94bd84326c968 100644 --- a/bob/ip/binseg/test/test_config.py +++ b/bob/ip/binseg/test/test_config.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # coding=utf-8 +import importlib + import nose.tools import torch @@ -16,347 +18,31 @@ N = 10 @rc_variable_set("bob.ip.binseg.drive.datadir") -def test_drive_default(): +def test_drive(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples: + nose.tools.eq_(len(s), 4) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 544, 544)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 544, 544)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + nose.tools.eq_(s[3].shape, (1, 544, 544)) #planes, height, width + nose.tools.eq_(s[3].dtype, torch.float32) from ..configs.datasets.drive.default import dataset - nose.tools.eq_(len(dataset["train"]), 20) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 4) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 544, 544)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 544, 544)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - nose.tools.eq_(sample[3].shape, (1, 544, 544)) #planes, height, width - nose.tools.eq_(sample[3].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 20) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 4) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 544, 544)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 544, 544)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - nose.tools.eq_(sample[3].shape, (1, 544, 544)) #planes, height, width - nose.tools.eq_(sample[3].dtype, torch.float32) - - -@stare_variable_set("bob.ip.binseg.stare.datadir") -def test_stare_augmentation_manipulation(): - - # some tests to check our context management for dataset augmentation works - # adequately, with one example dataset - - # hack to allow testing on the CI - from ..configs.datasets.stare import _maker - dataset = _maker("ah", stare_dataset) - - nose.tools.eq_(dataset["train"].augmented, True) - nose.tools.eq_(dataset["test"].augmented, False) - nose.tools.eq_(len(dataset["train"]._transforms.transforms), - len(dataset["test"]._transforms.transforms) + 4) - - with dataset["train"].not_augmented() as d: - nose.tools.eq_(len(d._transforms.transforms), 2) - nose.tools.eq_(d.augmented, False) - nose.tools.eq_(dataset["train"].augmented, False) - nose.tools.eq_(dataset["test"].augmented, False) - - nose.tools.eq_(dataset["train"].augmented, True) - nose.tools.eq_(dataset["test"].augmented, False) - nose.tools.eq_(len(dataset["train"]._transforms.transforms), - len(dataset["test"]._transforms.transforms) + 4) - - -@stare_variable_set("bob.ip.binseg.stare.datadir") -def test_stare_ah(): - - # hack to allow testing on the CI - from ..configs.datasets.stare import _maker - dataset = _maker("ah", stare_dataset) - - nose.tools.eq_(len(dataset["train"]), 10) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 608, 704)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 608, 704)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 10) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 608, 704)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 608, 704)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@stare_variable_set("bob.ip.binseg.stare.datadir") -def test_stare_vk(): - - # hack to allow testing on the CI - from ..configs.datasets.stare import _maker - dataset = _maker("vk", stare_dataset) - - nose.tools.eq_(len(dataset["train"]), 10) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 608, 704)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 608, 704)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 10) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 608, 704)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 608, 704)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.chasedb1.datadir") -def test_chasedb1_first_annotator(): - - from ..configs.datasets.chasedb1.first_annotator import dataset - - nose.tools.eq_(len(dataset["train"]), 8) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 960, 960)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 960, 960)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 20) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 960, 960)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 960, 960)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.chasedb1.datadir") -def test_chasedb1_second_annotator(): - - from ..configs.datasets.chasedb1.second_annotator import dataset - nose.tools.eq_(len(dataset["train"]), 8) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 960, 960)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 960, 960)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) + nose.tools.eq_(len(dataset), 3) + _check_subset(dataset["__train__"], 20) + _check_subset(dataset["train"], 20) + _check_subset(dataset["test"], 20) - nose.tools.eq_(len(dataset["test"]), 20) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 960, 960)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 960, 960)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.hrf.datadir") -def test_hrf_default(): - - from ..configs.datasets.hrf.default import dataset + from ..configs.datasets.drive.second_annotator import dataset - nose.tools.eq_(len(dataset["train"]), 15) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 4) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - nose.tools.eq_(sample[3].shape, (1, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[3].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 30) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 4) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - nose.tools.eq_(sample[3].shape, (1, 1168, 1648)) #planes, height, width - nose.tools.eq_(sample[3].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.refuge.datadir") -def test_refuge_disc(): - - from ..configs.datasets.refuge.disc import dataset - - nose.tools.eq_(len(dataset["train"]), 400) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["validation"]), 400) - nose.tools.eq_(dataset["validation"].augmented, False) - for sample in dataset["validation"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 400) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.refuge.datadir") -def test_refuge_cup(): - - from ..configs.datasets.refuge.cup import dataset - - nose.tools.eq_(len(dataset["train"]), 400) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["validation"]), 400) - nose.tools.eq_(dataset["validation"].augmented, False) - for sample in dataset["validation"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 400) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1632, 1632)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.drishtigs1.datadir") -def test_drishtigs1_disc_all(): - - from ..configs.datasets.drishtigs1.disc_all import dataset - - nose.tools.eq_(len(dataset["train"]), 50) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 51) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.drishtigs1.datadir") -def test_drishtigs1_cup_all(): - - from ..configs.datasets.drishtigs1.cup_all import dataset - - nose.tools.eq_(len(dataset["train"]), 50) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 51) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 1760, 2048)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - -@rc_variable_set("bob.ip.binseg.drionsdb.datadir") -def test_drionsdb_expert1(): - - from ..configs.datasets.drionsdb.expert1 import dataset - - nose.tools.eq_(len(dataset["train"]), 60) - nose.tools.eq_(dataset["train"].augmented, True) - for sample in dataset["train"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 416, 608)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 416, 608)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) - - nose.tools.eq_(len(dataset["test"]), 50) - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["test"][:N]: - nose.tools.eq_(len(sample), 3) - assert isinstance(sample[0], str) - nose.tools.eq_(sample[1].shape, (3, 416, 608)) #planes, height, width - nose.tools.eq_(sample[1].dtype, torch.float32) - nose.tools.eq_(sample[2].shape, (1, 416, 608)) #planes, height, width - nose.tools.eq_(sample[2].dtype, torch.float32) + nose.tools.eq_(len(dataset), 1) + _check_subset(dataset["test"], 20) @rc_variable_set("bob.ip.binseg.stare.datadir") @@ -366,11 +52,16 @@ def test_drionsdb_expert1(): def test_drive_covd(): from ..configs.datasets.drive.covd import dataset + nose.tools.eq_(len(dataset), 3) + + from ..configs.datasets.drive.default import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) - nose.tools.eq_(len(dataset["train"]), 53) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 53) + + for sample in dataset["__train__"]: assert 3 <= len(sample) <= 4 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 544, 544)) #planes, height, width @@ -390,11 +81,16 @@ def test_drive_covd(): def test_drive_ssl(): from ..configs.datasets.drive.ssl import dataset + nose.tools.eq_(len(dataset), 3) + + from ..configs.datasets.drive.default import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) - nose.tools.eq_(len(dataset["train"]), 53) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 53) + + for sample in dataset["__train__"]: assert 5 <= len(sample) <= 6 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 544, 544)) #planes, height, width @@ -413,6 +109,47 @@ def test_drive_ssl(): nose.tools.eq_(sample[4].dtype, torch.float32) +@stare_variable_set("bob.ip.binseg.stare.datadir") +def test_stare_augmentation_manipulation(): + + # some tests to check our context management for dataset augmentation works + # adequately, with one example dataset + + # hack to allow testing on the CI + from ..configs.datasets.stare import _maker + dataset = _maker("ah", stare_dataset) + + nose.tools.eq_(len(dataset["__train__"]._transforms.transforms), + len(dataset["test"]._transforms.transforms) + 4) + + nose.tools.eq_(len(dataset["train"]._transforms.transforms), + len(dataset["test"]._transforms.transforms)) + + +@stare_variable_set("bob.ip.binseg.stare.datadir") +def test_stare(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 608, 704)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 608, 704)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + # hack to allow testing on the CI + from ..configs.datasets.stare import _maker + + for protocol in "ah", "vk": + dataset = _maker(protocol, stare_dataset) + nose.tools.eq_(len(dataset), 3) + _check_subset(dataset["__train__"], 10) + _check_subset(dataset["train"], 10) + _check_subset(dataset["test"], 10) + + @rc_variable_set("bob.ip.binseg.drive.datadir") @rc_variable_set("bob.ip.binseg.chasedb1.datadir") @rc_variable_set("bob.ip.binseg.hrf.datadir") @@ -420,11 +157,15 @@ def test_drive_ssl(): def test_stare_covd(): from ..configs.datasets.stare.covd import dataset + nose.tools.eq_(len(dataset), 3) - nose.tools.eq_(len(dataset["train"]), 63) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + from ..configs.datasets.stare.ah import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) + + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 63) + for sample in dataset["__train__"]: assert 3 <= len(sample) <= 4 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 608, 704)) #planes, height, width @@ -436,6 +177,28 @@ def test_stare_covd(): nose.tools.eq_(sample[3].dtype, torch.float32) +@rc_variable_set("bob.ip.binseg.chasedb1.datadir") +def test_chasedb1(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 960, 960)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 960, 960)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + for m in ("first_annotator", "second_annotator"): + d = importlib.import_module(f"...configs.datasets.chasedb1.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 3) + _check_subset(d["__train__"], 8) + _check_subset(d["train"], 8) + _check_subset(d["test"], 20) + + @rc_variable_set("bob.ip.binseg.drive.datadir") @rc_variable_set("bob.ip.binseg.stare.datadir") @rc_variable_set("bob.ip.binseg.hrf.datadir") @@ -443,11 +206,15 @@ def test_stare_covd(): def test_chasedb1_covd(): from ..configs.datasets.chasedb1.covd import dataset + nose.tools.eq_(len(dataset), 3) + + from ..configs.datasets.chasedb1.first_annotator import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) - nose.tools.eq_(len(dataset["train"]), 65) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 65) + for sample in dataset["__train__"]: assert 3 <= len(sample) <= 4 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 960, 960)) #planes, height, width @@ -459,6 +226,28 @@ def test_chasedb1_covd(): nose.tools.eq_(sample[3].dtype, torch.float32) +@rc_variable_set("bob.ip.binseg.hrf.datadir") +def test_hrf(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples: + nose.tools.eq_(len(s), 4) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 1168, 1648)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 1168, 1648)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + nose.tools.eq_(s[3].shape, (1, 1168, 1648)) #planes, height, width + nose.tools.eq_(s[3].dtype, torch.float32) + + from ..configs.datasets.hrf.default import dataset + nose.tools.eq_(len(dataset), 3) + _check_subset(dataset["__train__"], 15) + _check_subset(dataset["train"], 15) + _check_subset(dataset["test"], 30) + + @rc_variable_set("bob.ip.binseg.drive.datadir") @rc_variable_set("bob.ip.binseg.stare.datadir") @rc_variable_set("bob.ip.binseg.chasedb1.datadir") @@ -466,11 +255,15 @@ def test_chasedb1_covd(): def test_hrf_covd(): from ..configs.datasets.hrf.covd import dataset + nose.tools.eq_(len(dataset), 3) - nose.tools.eq_(len(dataset["train"]), 58) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + from ..configs.datasets.hrf.default import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) + + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 58) + for sample in dataset["__train__"]: assert 3 <= len(sample) <= 4 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 1168, 1648)) #planes, height, width @@ -482,6 +275,30 @@ def test_hrf_covd(): nose.tools.eq_(sample[3].dtype, torch.float32) +@rc_variable_set("bob.ip.binseg.iostar.datadir") +def test_iostar(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples: + nose.tools.eq_(len(s), 4) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 1024, 1024)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 1024, 1024)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + nose.tools.eq_(s[3].shape, (1, 1024, 1024)) #planes, height, width + nose.tools.eq_(s[3].dtype, torch.float32) + + for m in ("vessel", "optic_disc"): + d = importlib.import_module(f"...configs.datasets.iostar.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 3) + _check_subset(d["__train__"], 20) + _check_subset(d["train"], 20) + _check_subset(d["test"], 10) + + @rc_variable_set("bob.ip.binseg.drive.datadir") @rc_variable_set("bob.ip.binseg.stare.datadir") @rc_variable_set("bob.ip.binseg.chasedb1.datadir") @@ -489,11 +306,15 @@ def test_hrf_covd(): def test_iostar_covd(): from ..configs.datasets.iostar.covd import dataset + nose.tools.eq_(len(dataset), 3) - nose.tools.eq_(len(dataset["train"]), 53) - #nose.tools.eq_(dataset["train"].augmented, True) ##ConcatDataset - nose.tools.eq_(dataset["test"].augmented, False) - for sample in dataset["train"]: + from ..configs.datasets.iostar.vessel import dataset as baseline + nose.tools.eq_(dataset["train"], baseline["train"]) + nose.tools.eq_(dataset["test"], baseline["test"]) + + # this is the only different set from the baseline + nose.tools.eq_(len(dataset["__train__"]), 53) + for sample in dataset["__train__"]: assert 3 <= len(sample) <= 4 assert isinstance(sample[0], str) nose.tools.eq_(sample[1].shape, (3, 1024, 1024)) #planes, height, width @@ -503,3 +324,92 @@ def test_iostar_covd(): if len(sample) == 4: nose.tools.eq_(sample[3].shape, (1, 1024, 1024)) nose.tools.eq_(sample[3].dtype, torch.float32) + + +@rc_variable_set("bob.ip.binseg.refuge.datadir") +def test_refuge(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples[:N]: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 1632, 1632)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 1632, 1632)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + for m in ("disc", "cup"): + d = importlib.import_module(f"...configs.datasets.refuge.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 4) + _check_subset(d["__train__"], 400) + _check_subset(d["train"], 400) + _check_subset(d["validation"], 400) + _check_subset(d["test"], 400) + + +@rc_variable_set("bob.ip.binseg.drishtigs1.datadir") +def test_drishtigs1(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples[:N]: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 1760, 2048)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 1760, 2048)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + for m in ("disc_all", "cup_all", "disc_any", "cup_any"): + d = importlib.import_module(f"...configs.datasets.drishtigs1.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 3) + _check_subset(d["__train__"], 50) + _check_subset(d["train"], 50) + _check_subset(d["test"], 51) + + +@rc_variable_set("bob.ip.binseg.rimoner3.datadir") +def test_rimoner3(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples[:N]: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 1440, 1088)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 1440, 1088)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + for m in ("disc_exp1", "cup_exp1", "disc_exp2", "cup_exp2"): + d = importlib.import_module(f"...configs.datasets.rimoner3.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 3) + _check_subset(d["__train__"], 99) + _check_subset(d["train"], 99) + _check_subset(d["test"], 60) + + +@rc_variable_set("bob.ip.binseg.drionsdb.datadir") +def test_drionsdb(): + + def _check_subset(samples, size): + nose.tools.eq_(len(samples), size) + for s in samples[:N]: + nose.tools.eq_(len(s), 3) + assert isinstance(s[0], str) + nose.tools.eq_(s[1].shape, (3, 416, 608)) #planes, height, width + nose.tools.eq_(s[1].dtype, torch.float32) + nose.tools.eq_(s[2].shape, (1, 416, 608)) #planes, height, width + nose.tools.eq_(s[2].dtype, torch.float32) + + for m in ("expert1", "expert2"): + d = importlib.import_module(f"...configs.datasets.drionsdb.{m}", + package=__name__).dataset + nose.tools.eq_(len(d), 3) + _check_subset(d["__train__"], 60) + _check_subset(d["train"], 60) + _check_subset(d["test"], 50) diff --git a/doc/cli.rst b/doc/cli.rst index 9315447d778ef1917a1f9404d677cd8cbb2e70d4..e5b261d6735221d6cfe40e35256ce734e29c4f9b 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -137,4 +137,15 @@ combined figures and tables that compare results of multiple systems. .. command-output:: bob binseg compare --help +.. _bob.ip.binseg.cli.experiment: + +Running Complete Experiments +---------------------------- + +This command can run training, prediction, evaluation and comparison from a +single, multi-step application. + +.. command-output:: bob binseg experiment --help + + .. include:: links.rst diff --git a/doc/evaluation.rst b/doc/evaluation.rst index 28a0bf65f0cffb9f3144f5990a3549f12de976e1..3646969173ddde58f56b87d2f08c718f5c34477c 100644 --- a/doc/evaluation.rst +++ b/doc/evaluation.rst @@ -24,8 +24,8 @@ point numbers indicating the vessel probability (``[0.0,1.0]``) for each pixel in the input image. -Inference on an existing datasets -================================= +Inference on an existing dataset +================================ To run inference, use the sub-command :ref:`predict <bob.ip.binseg.cli.predict>` to run prediction on an existing dataset: @@ -66,9 +66,9 @@ Evaluation In evaluation, we input an **annotated** dataset and predictions to generate performance figures that can help analysis of a trained model. Evaluation is -done using ``bob binseg evaluate`` followed by the model and the annotated -dataset configuration, and the path to the pretrained model via the -``--weight`` argument. +done using the :ref:`evaluate command `<bob.ip.binseg.cli.evaluate>` followed +by the model and the annotated dataset configuration, and the path to the +pretrained weights via the ``--weight`` argument. Use ``bob binseg evaluate --help`` for more information. @@ -79,12 +79,15 @@ E.g. run inference on predictions from the DRIVE test set, do the following: # Point directly to saved model via -w argument: bob binseg evaluate -vv drive-test -p /predictions/folder -o /eval/results/folder +If available, you may use the option ``--second-annotator`` to + Comparing Systems ================= To compare multiple systems together and generate combined plots and tables, -use ``bob binseg compare``. Use ``--help`` for a quick guide. +use the :ref:`compare command <bob.ip.binseg.cli.compare>`. Use ``--help`` for +a quick guide. .. code-block:: bash diff --git a/doc/experiment.rst b/doc/experiment.rst new file mode 100644 index 0000000000000000000000000000000000000000..4ef87f9004200a0888994de58b4ac086001c57f0 --- /dev/null +++ b/doc/experiment.rst @@ -0,0 +1,22 @@ +.. -*- coding: utf-8 -*- + +.. _bob.ip.binseg.experiment: + +============================== + Running complete experiments +============================== + +We provide an :ref:`aggregator command called "experiment" +<bob.ip.binseg.cli.experiment>` that runs training, followed by prediction, +evaluation and comparison. After running, you will be able to find results +from model fitting, prediction, evaluation and comparison under a single output +directory. + +For example, to train a Mobile V2 U-Net architecture on the STARE dataset, +evaluate both train and test set performances, output prediction maps and +overlay analysis, together with a performance curve, run the following: + +.. code-block:: sh + + $ bob binseg experiment -vv m2unet stare --batch-size=16 --overlayed + # check results in the "results" folder diff --git a/doc/training.rst b/doc/training.rst index 9a05c6685957115b8bd4bf231a1d717085f2e5ce..693561286fd8cf4a19ccddc0e75050743b155d7e 100644 --- a/doc/training.rst +++ b/doc/training.rst @@ -7,14 +7,10 @@ ========== To train a new FCN, use the command-line interface (CLI) application ``bob -binseg train``, available on your prompt. To use this CLI, you must define -the input dataset that will be used to train the FCN, as well as the type of -model that will be trained. You may issue ``bob binseg train --help`` for a -help message containing more detailed instructions. - -To replicate our results, use our main application ``bob binseg train`` -followed by the model configuration, and dataset configuration files, and/or -command-line options. Use ``bob binseg train --help`` for more information. +binseg train``, available on your prompt. To use this CLI, you must define the +input dataset that will be used to train the FCN, as well as the type of model +that will be trained. You may issue ``bob binseg train --help`` for a help +message containing more detailed instructions. .. tip:: @@ -75,14 +71,14 @@ card, for supervised training of baselines. Use it like this: customized dataset and model files. You may :ref:`copy any of the existing configuration resources <bob.ip.binseg.cli.config.copy>` and change them locally. Once you're happy, you may use the newly created files directly on - your training command line. For example, suppose you wanted to slightly - change the drive pre-processing pipeline. You could do the following: + your command line. For example, suppose you wanted to slightly change the + DRIVE pre-processing pipeline. You could do the following: .. code-block:: bash $ bob binseg config copy drive my_drive_remix.py # edit my_drive_remix.py to your needs - $ bob binseg train -vv <model> ./my_drive_remix.py --batch-size=<see-table> --device="cuda:0" + $ bob binseg train -vv <model> ./my_drive_remix.py .. _bob.ip.binseg.gridtk-tip: @@ -94,14 +90,14 @@ card, for supervised training of baselines. Use it like this: .. code-block:: sh - $ jman submit --queue=gpu --memory=24G --name=m2unet-drive -- bob binseg train --device='cuda:0' ... #paste the rest of the command-line + $ jman submit --queue=gpu --memory=24G --name=myjob -- bob binseg train --device='cuda:0' ... #paste the rest of the command-line Combined Vessel Dataset (COVD) ============================== -The following table describes recommended batch sizes for 24Gb of RAM GPU -card, for supervised training of COVD- systems. Use it like this: +The following table describes recommended batch sizes for 24Gb of RAM GPU card, +for supervised training of COVD- systems. Use it like this: .. code-block:: sh @@ -167,11 +163,11 @@ Using your own dataset ====================== To use your own dataset, we recommend you read our instructions at -:py:mod:`bob.ip.binseg.configs.datasets.csv`, and setup a CSV file describing -input data and ground-truth (segmentation maps). Then, prepare a configuration -file by copying our configuration example and edit it to apply the required -transforms to your input data. Once you are happy with the result, use it in -place of one of our datasets: +:py:mod:`bob.ip.binseg.configs.datasets.csv`, and setup one or more CSV file +describing input data and ground-truth (segmentation maps). Then, prepare a +configuration file by copying our configuration example and edit it to apply +the required transforms to your input data. Once you are happy with the +result, use it in place of one of our datasets: .. code-block:: sh diff --git a/doc/usage.rst b/doc/usage.rst index be0b7f893a4804b4caea5466f2e60c0a6503ad0d..c9967139f77c1e64bd634b641bf795ffdea73c55 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -49,6 +49,7 @@ modifying one of our configuration resources. training models evaluation + experiment .. include:: links.rst