diff --git a/bob/ip/binseg/configs/datasets/csv.py b/bob/ip/binseg/configs/datasets/csv.py
index 9da9bd1d2140c9061f6b9db820cad2578b0e4762..14c394de88a321c179cf51309a4884fccddaf000 100644
--- a/bob/ip/binseg/configs/datasets/csv.py
+++ b/bob/ip/binseg/configs/datasets/csv.py
@@ -4,15 +4,12 @@
 """Example CSV-based filelist dataset
 
 In case you have your own dataset that is organized on your filesystem, this
-configuration shows an example setup so you can feed such files and
-ground-truth data to train one of the available network models or to evaluate
+configuration shows an example setup so you can feed such files (and
+ground-truth data) to train one of the available network models or to evaluate
 it.
 
 You must write CSV based file (e.g. using comma as separator) that describes
-the image and ground-truth locations for each image pair on your dataset.
-Relative paths are considered with respect to the location of the CSV file
-itself by default, also pass the ``root_path`` parameter to the
-:py:class:`bob.ip.binseg.data.csvdataset.CSVDataset` object constructor.  So,
+the image and ground-truth locations for each image pair on your dataset.  So,
 for example, if you have a structure like this:
 
 .. code-block:: text
@@ -26,8 +23,7 @@ for example, if you have a structure like this:
        ├── ...
        └── gt_n.png
 
-Then create a file in the same level of ``images`` and ``ground-truth`` with
-the following contents:
+Then create a file with the following contents:
 
 .. code-block:: text
 
@@ -38,13 +34,13 @@ the following contents:
 To create a dataset without ground-truth (e.g., for prediction purposes), then
 omit the second column on the CSV file.
 
-Use the path leading to the CSV file and replace ``<path.csv>`` on the example
-code for this configuration, that you must copy locally to make changes:
+Use the path leading to the CSV file and carefully read the comments in this
+configuration.  **Copy it locally to make changes**:
 
 .. code-block:: sh
 
    $ bob binseg config copy csv-dataset-example mydataset.py
-   # edit mydataset.py as explained here
+   # edit mydataset.py as explained here, follow the comments
 
 Fine-tune the transformations for your particular purpose:
 
@@ -58,20 +54,79 @@ certain restrictions (input dimensions, image centering, etc.).  Check the
 configuration that was used to train models and try to match it as well as
 possible.
 
-See:
+Finally, you must create a connector that will act as a "dataset" for pytorch.
+The connector make a list of samples, returned by your raw dataset, look like
+something our pytorch setup can digest (tuples of data with a certain
+organisation).
 
-* :py:class:`bob.ip.binseg.data.csvdataset.CSVDataset` for operational details.
-* :py:class:`bob.ip.binseg.data.folderdataset.FolderDataset` for an alternative
-   implementation of an easier to generate **prediction** dataset.
+More information:
+
+* :py:class:`bob.ip.binseg.data.dataset.CSVDataset` for operational details.
+* :py:class:`bob.ip.binseg.data.dataset.JSONDataset` for an alternative for
+  multi-protocol datasets (all of our supported raw datasets are implemented
+  using this)
+* :py:class:`bob.ip.binseg.data.utils.SampleList2TorchDataset` for extra
+  information on the sample list to pytorch connector
 
 """
 
-# add your transforms below - these are just examples
+# First, define how to access and load the raw data. Our package provides some
+# stock loaders we use for other datasets. You may have a look at the
+# documentation of that module for details.
+from bob.ip.binseg.data.loaders import (
+    load_pil_rgb,
+    load_pil_1,
+    data_path_keymaker,
+)
+
+# How we use the loaders - "sample" is a dictionary where keys are defined
+# below and map to the columns of the CSV files you input.
+def _loader(context, sample):
+    # "context" is ignored in this case - database is homogeneous
+    # it is a dictionary that passes e.g., the name of the subset
+    # being loaded, so you can take contextual decisions on the loading
+
+    # Using the path leading to the various data files stored in disk allows
+    # the CSV file to contain only relative paths and is, therefore, more
+    # compact.  Of course, you can make those paths absolute and then simplify
+    # it here.
+    import os
+    root_path = "/path/where/raw/files/sit"
+
+    return dict(
+        data=load_pil_rgb(os.path.join(root_path, sample["data"])),
+        label=load_pil_1(os.path.join(root_path, sample["label"])),
+    )
+
+# This is just a class that puts everything together: the CSV file, how to load
+# each sample defined in the dataset, names for the various columns of the CSV
+# file and how to make unique keys for each sample (keymaker).  Once created,
+# this object can be called to generate sample lists.
+from bob.ip.binseg.data.dataset import CSVDataset
+raw_dataset = CSVDataset(
+    # path to the CSV file(s) - you may add as many subsets as you want, each
+    # with an unique name, you'll use later to generate sample lists
+    subsets=dict(data="<path/to/train.csv>"),
+    fieldnames=("data", "label"),  #these are the column names
+    loader=_loader,
+    keymaker=data_path_keymaker,
+)
+
+# Finally, we build a connector to passes our dataset to the pytorch framework
+# so we can, for example, evaluate a trained pytorch model
+
+# Add/tune your transforms below - these are just examples compatible with a
+# model that requires image inputs of 544 x 544 pixels.
 from bob.ip.binseg.data.transforms import CenterCrop
-#from bob.ip.binseg.configs.datasets.utils import DATA_AUGMENTATION as _DA
-_transforms = [
-        CenterCrop((544, 544)),
-        ] # + _DA
 
-from bob.ip.binseg.data.csvdataset import CSVDataset
-#dataset = CSVDataset("<path.csv>", check_available=False, transforms=_transforms)
+# from bob.ip.binseg.configs.datasets.utils import DATA_AUGMENTATION as _DA
+_transforms = [
+    CenterCrop((544, 544)),
+]  # + _DA
+
+# This class will simply trigger data loading and re-arrange the data so that
+# data is fed in the right order to pytorch: (key, image[, label[, mask]]).
+# This class also inherits from pytorch Dataset and respect its required API.
+# See the documentation for details.
+from bob.ip.binseg.data.utils import SampleList2TorchDataset
+dataset = SampleList2TorchDataset(raw_dataset.subset("data"), _transforms)
diff --git a/bob/ip/binseg/data/chasedb1/__init__.py b/bob/ip/binseg/data/chasedb1/__init__.py
index 22286f791cbd660560b0a209a62bc889ee1544bb..0f91b3bd5f945196ed202a2115151dfda78e6e53 100644
--- a/bob/ip/binseg/data/chasedb1/__init__.py
+++ b/bob/ip/binseg/data/chasedb1/__init__.py
@@ -36,8 +36,8 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 
 _protocols = [
         pkg_resources.resource_filename(__name__, "default.json"),
@@ -48,11 +48,12 @@ _root_path = bob.extension.rc.get('bob.ip.binseg.chasedb1.datadir',
         os.path.realpath(os.curdir))
 
 def _loader(context, sample):
-    #"context" is ignore in this case - database is homogeneous
+    #"context" is ignored in this case - database is homogeneous
     return dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_1(sample["label"]),
+            data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+            label=load_pil_1(os.path.join(_root_path, sample["label"])),
             )
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"),
+        loader=_loader, keymaker=data_path_keymaker)
 """CHASE-DB1 dataset object"""
diff --git a/bob/ip/binseg/data/csvdataset.py b/bob/ip/binseg/data/csvdataset.py
deleted file mode 100644
index 43699faf9e41721573bbbc55649f5dd9ddcce8ae..0000000000000000000000000000000000000000
--- a/bob/ip/binseg/data/csvdataset.py
+++ /dev/null
@@ -1,188 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import os
-import csv
-
-from PIL import Image
-
-from torch.utils.data import Dataset
-import torch
-import torchvision.transforms.functional as VF
-
-import bob.io.base
-
-from .transforms import Compose, ToTensor
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class CSVDataset(Dataset):
-    """
-    Generic filelist dataset
-
-    To create a new dataset, you only need to provide a CSV formatted filelist
-    using any separator (e.g. comma, space, semi-colon) with the following
-    information:
-
-    .. code-block:: text
-
-       image[,label[,mask]]
-
-    Where:
-
-    * ``image``: absolute or relative path leading to original image
-    * ``label``: (optional) absolute or relative path with manual segmentation
-      information
-    * ``mask``: (optional) absolute or relative path with a mask that indicates
-      valid regions in the image where automatic segmentation should occur
-
-    Relative paths are interpreted with respect to the location where the CSV
-    file is or to an optional ``root_path`` parameter, that can be provided.
-
-    There are no requirements concerning image or ground-truth homogenity.
-    Anything that can be loaded by our image and data loaders is OK.  Use
-    a non-white character as separator.  Example
-
-    .. code-block:: text
-
-       image1.jpg,gt1.tif,mask1.png
-       image2.png,gt2.png,mask2.png
-       ...
-
-
-    Notice that all rows must have the same number of entries.
-
-    .. important::
-
-       Images are converted to RGB after readout via PIL.  Ground-truth data is
-       loaded using the same technique, but converted to mode ``1`` instead of
-       ``RGB``.  If ground-truth data is encoded as an HDF5 file, we use
-       instead :py:func:`bob.io.base.load`, and then converted it to 32-bit
-       float data.
-
-    To generate a dataset without ground-truth (e.g. for prediction tasks),
-    then omit the second and third columns.
-
-
-    Parameters
-    ----------
-    path : str
-        Full path to the file containing the dataset description, in CSV
-        format as described above
-
-    root_path : :py:class:`str`, Optional
-        Path to a common filesystem root where files with relative paths should
-        be sitting.  If not set, then we use the absolute path leading to the
-        CSV file as ``root_path``
-
-    check_available : :py:class:`bool`, Optional
-        If set to ``True``, then checks if files in the file list are
-        available.  Otherwise does not.
-
-    transforms : :py:class:`list`, Optional
-        a list of transformations to be applied to **both** image and
-        ground-truth data.  Notice that image changing transformations such as
-        :py:class:`.transforms.ColorJitter` are only applied to the image and
-        **not** to ground-truth.  Also notice a last transform
-        (:py:class:`bob.ip.binseg.data.transforms.ToTensor`) is always applied.
-
-    """
-
-    def __init__(
-        self, path, root_path=None, check_available=True, transforms=[]
-    ):
-
-        self.root_path = root_path or os.path.dirname(path)
-        self.transform = Compose(transforms + [ToTensor()])
-
-        def _make_abs_path(root, s):
-            retval = []
-            for p in s:
-                if not os.path.isabs(p):
-                    retval.append(os.path.join(root, p))
-            return retval
-
-        with open(path, newline="") as f:
-            reader = csv.reader(f)
-            self.data = [_make_abs_path(self.root_path, k) for k in reader]
-
-        # check if all files are readable, warn otherwise
-        if check_available:
-            errors = 0
-            for s in self.data:
-                for p in s:
-                    if not os.path.exists(p):
-                        errors += 1
-                        logger.error(f"Cannot find {p}")
-            assert errors == 0, (
-                f"There {errors} files which cannot be "
-                f"found on your filelist ({path}) dataset"
-            )
-
-        # check all data entries have the same size
-        assert all(len(k) == len(self.data[0]) for k in self.data), (
-            f"There is an inconsistence on your dataset - not all "
-            f"entries have length=={len(self.data[0])}"
-        )
-
-    def __len__(self):
-        """
-
-        Returns
-        -------
-
-        length : int
-            size of the dataset
-        """
-
-        return len(self.data)
-
-    def __getitem__(self, index):
-        """
-
-        Parameters
-        ----------
-        index : int
-
-        Returns
-        -------
-        sample : list
-            ``[name, img, gt, mask]``, ``[name, img, gt]`` or ``[name, img]``
-            depending on whether this dataset has or not ground-truth
-            annotations and masks.  The value of ``name`` is relative to
-            ``root_path``, in cases it starts with ``root_path``.
-        """
-
-        sample_paths = self.data[index]
-
-        img_path = sample_paths[0]
-        meta_data = sample_paths[1:]
-
-        # images are converted to RGB mode automatically
-        sample = [Image.open(img_path).convert(mode="RGB")]
-
-        # ground-truth annotations and masks are treated the same
-        for path in meta_data:
-            if path is not None:
-                if path.endswith(".hdf5"):
-                    data = bob.io.base.load(str(path)).astype("float32")
-                    # a bit hackish, but will get what we need
-                    data = VF.to_pil_image(torch.from_numpy(data))
-                else:
-                    data = Image.open(path)
-                sample += [data.convert(mode="1", dither=None)]
-
-        if self.transform:
-            sample = self.transform(*sample)
-
-        # make paths relative if necessary
-        stem = img_path
-        if stem.startswith(self.root_path):
-            stem = os.path.relpath(stem, self.root_path)
-        elif stem.startswith(os.pathsep):
-            stem = stem[len(os.pathsep) :]
-
-        return [stem] + sample
diff --git a/bob/ip/binseg/data/dataset.py b/bob/ip/binseg/data/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..687a7d0bab474f774ffe2a6bfca6d703f0acf7da
--- /dev/null
+++ b/bob/ip/binseg/data/dataset.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+import os
+import copy
+import json
+import functools
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+from .sample import DelayedSample
+
+
+class JSONDataset:
+    """
+    Generic multi-protocol/subset filelist dataset that yields samples
+
+    To create a new dataset, you need to provide one or more JSON formatted
+    filelists (one per protocol) with the following contents:
+
+    .. code-block:: json
+
+       {
+           "subset1": [
+               [
+                   "value1",
+                   "value2",
+                   "value3"
+               ],
+               [
+                   "value4",
+                   "value5",
+                   "value6"
+               ]
+           ],
+           "subset2": [
+           ]
+       }
+
+    Your dataset many contain any number of subsets, but all sample entries
+    must contain the same number of fields.
+
+
+    Parameters
+    ----------
+
+    protocols : list, dict
+        Paths to one or more JSON formatted files containing the various
+        protocols to be recognized by this dataset, or a dictionary, mapping
+        protocol names to paths of JSON files.  Internally, we save a
+        dictionary where keys default to the basename of paths.
+
+    fieldnames : list, tuple
+        An iterable over the field names (strings) to assign to each entry in
+        the JSON file.  It should have as many items as fields in each entry of
+        the JSON file.
+
+    loader : object
+        A function that receives as input, a context dictionary (with a
+        "protocol" and "subset" keys indicating which protocol and subset are
+        being served), and a dictionary with ``{key: path}`` entries, and
+        returns a dictionary with the loaded data.
+
+    keymaker : object
+        A function that receives as input the same input from the ``loader``,
+        but outputs a single string that uniquely identifies a sample within
+        a given protocol.  It is typically the path, without extension, of one
+        of the file entries for the sample, but you can tune it as you like.
+
+    """
+
+    def __init__(self, protocols, fieldnames, loader, keymaker):
+
+        if isinstance(protocols, dict):
+            self.protocols = dict(
+                (k, os.path.realpath(v)) for k, v in protocols.items()
+            )
+        else:
+            self.protocols = dict(
+                (os.path.splitext(os.path.basename(k))[0], os.path.realpath(k))
+                for k in protocols
+            )
+        self.fieldnames = fieldnames
+        self.loader = loader
+        self.keymaker = keymaker
+
+    def check(self):
+        """For each protocol, check if all data can be correctly accessed
+
+        Returns
+        -------
+
+        errors : int
+            Number of errors found
+
+        """
+
+        logger.info(f"Checking dataset...")
+        errors = 0
+        for proto in self.protocols:
+            logger.info(f"Checking protocol '{proto}'...")
+            for name, samples in self.subsets(proto).items():
+                logger.info(f"Checking subset '{name}'...")
+                for sample in samples:
+                    try:
+                        sample.data  # triggers loading
+                        logger.info(f"{sample.key}: OK")
+                    except Exception as e:
+                        logger.error(f"{sample.key}: {e}")
+                        errors += 1
+        return errors
+
+    def _make_delayed(self, pos, sample, context):
+        """Checks consistence and builds a delayed loading sample
+        """
+        assert len(sample) == len(self.fieldnames), (
+            f"Entry {k} in subset {context['subset']} of protocol "
+            f"{context['protocol']} has {len(sample)} entries instead of "
+            f"{len(self.fieldnames)} (expected). Fix file "
+            f"{self.protocols[context['protocol']]}"
+        )
+        item = dict(zip(self.fieldnames, sample))
+        return DelayedSample(
+            functools.partial(self.loader, context, item),
+            key=self.keymaker(context, item),
+        )
+
+    def subsets(self, protocol):
+        """Returns all subsets in a protocol
+
+        This method will load JSON information for a given protocol and return
+        all subsets of the given protocol after converting each entry into a
+        :py:class:`bob.ip.binseg.data.sample.DelayedSample`.
+
+        Parameters
+        ----------
+
+        protocol : str
+            Name of the protocol data to load
+
+
+        Returns
+        -------
+
+        subsets : dict
+            A dictionary mapping subset names to lists of
+            :py:class:`bob.ip.binseg.data.sample.DelayedSample` objects, with
+            the proper loading implemented.  Each delayed sample also carries a
+            ``key`` parameter, that contains the output of the sample
+            contextual data after passing through the ``keymaker``.  This
+            parameter can be used for recording sample transforms during
+            check-pointing.
+
+        """
+
+        with open(self.protocols[protocol], "r") as f:
+            data = json.load(f)
+
+        retval = {}
+        for subset, samples in data.items():
+            context = dict(protocol=protocol, subset=subset)
+            retval[subset] = [
+                self._make_delayed(k, v, context) for (k, v) in enumerate(samples)
+            ]
+        return retval
+
+
+class CSVDataset:
+    """
+    Generic single subset filelist dataset that yields samples
+
+    To create a new dataset, you only need to provide a CSV formatted filelist
+    using any separator (e.g. comma, space, semi-colon) with the following
+    information:
+
+    .. code-block:: text
+
+       value1,value2,value3
+       value4,value5,value6
+       ...
+
+    Notice that all rows must have the same number of entries.
+
+    Parameters
+    ----------
+
+    subsets : list, dict
+        Paths to one or more CSV formatted files containing the various
+        subsets to be recognized by this dataset, or a dictionary, mapping
+        subset names to paths of CSV files.  Internally, we save a
+        dictionary where keys default to the basename of paths.
+
+    fieldnames : list, tuple
+        An iterable over the field names (strings) to assign to each column in
+        the CSV file.  It should have as many items as fields in each row of
+        the CSV file(s).
+
+    loader : object
+        A function that receives, as input, a dictionary with ``{name: value}``
+        entries (for each header in the input CSV file), and returns a
+        dictionary with the loaded data.
+
+    keymaker : object
+        A function that receives as input the same input from the ``loader``,
+        but outputs a single string that uniquely identifies a sample within
+        a given protocol.  It is typically the path, without extension, of one
+        of the file entries for the sample, but you can tune it as you like.
+
+    """
+
+    def __init__(self, subsets, fieldnames, loader, keymaker):
+
+        if isinstance(subsets, dict):
+            self.subsets = dict(
+                (k, os.path.realpath(v)) for k, v in subsets.items()
+            )
+        else:
+            self.subsets = dict(
+                (os.path.splitext(os.path.basename(k))[0], os.path.realpath(k))
+                for k in subsets
+            )
+        self.fieldnames = fieldnames
+        self.loader = loader
+        self.keymaker = keymaker
+
+    def check(self):
+        """For each subset, check if all data can be correctly accessed
+
+        Returns
+        -------
+
+        errors : int
+            Number of errors found
+
+        """
+
+        logger.info(f"Checking dataset...")
+        errors = 0
+        for name in self.subsets.keys():
+            logger.info(f"Checking subset '{name}'...")
+            for sample in self.samples(name):
+                try:
+                    sample.data  # triggers loading
+                    logger.info(f"{sample.key}: OK")
+                except Exception as e:
+                    logger.error(f"{sample.key}: {e}")
+                    errors += 1
+        return errors
+
+    def _make_delayed(self, pos, sample, context):
+        """Checks consistence and builds a delayed loading sample
+        """
+        assert len(sample) == len(self.fieldnames), (
+            f"Entry {k} in subset {context['subset']} has {len(sample)} "
+            f"entries instead of {len(self.fieldnames)} (expected). Fix "
+            f"file {self.subsets[context['subset']]}"
+        )
+        item = dict(zip(self.fieldnames, v))
+        return DelayedSample(
+            functools.partial(self.loader, context, item),
+            key=self.keymaker(context, item),
+        )
+
+    def samples(self, subset):
+        """Returns all samples in a subset
+
+        This method will load CSV information for a given subset and return
+        all samples of the given subset after converting each entry into a
+        :py:class:`bob.ip.binseg.data.sample.DelayedSample`.
+
+
+        Parameters
+        ----------
+
+        subset : str
+            Name of the subset data to load
+
+
+        Returns
+        -------
+
+        subset : list
+            A list of :py:class:`bob.ip.binseg.data.sample.DelayedSample`
+            objects, with the proper loading implemented.  Each delayed sample
+            also carries a ``key`` parameter, that contains the output of the
+            sample contextual data after passing through the ``keymaker``.
+            This parameter can be used for recording sample transforms during
+            check-pointing.
+
+        """
+
+        with open(self.subsets[subset], newline="") as f:
+            cf = csv.reader(f)
+            samples = [k for k in cf]
+
+        context = dict(subset=subset)
+        return [self._make_delayed(k, v, context) for (k, v) in enumerate(samples)]
diff --git a/bob/ip/binseg/data/drionsdb/__init__.py b/bob/ip/binseg/data/drionsdb/__init__.py
index 3e1b5798c45636ebdc7f6169f2f91cdf33288644..33aea1ab9cb0ba19f793920e4857361d89248f10 100644
--- a/bob/ip/binseg/data/drionsdb/__init__.py
+++ b/bob/ip/binseg/data/drionsdb/__init__.py
@@ -30,8 +30,8 @@ import PIL.ImageDraw
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, data_path_keymaker
 
 _protocols = [
         pkg_resources.resource_filename(__name__, "default.json"),
@@ -63,8 +63,8 @@ def _pad_right(img):
 
 
 def _loader(context, sample):
-    data  = load_pil_rgb(sample["data"])
-    label = _txt_to_pil_1(sample["label"], data.size)
+    data  = load_pil_rgb(os.path.join(_root_path, sample["data"]))
+    label = _txt_to_pil_1(os.path.join(_root_path, sample["label"]), data.size)
 
     if sample["data"].endswith("_101.jpg"):
         # pads the image on the right side to account for a difference in
@@ -74,5 +74,6 @@ def _loader(context, sample):
 
     return dict(data=data, label=label)
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"),
+        loader=_loader, keymaker=data_path_keymaker)
 """DRIONSDB dataset object"""
diff --git a/bob/ip/binseg/data/drishtigs1/__init__.py b/bob/ip/binseg/data/drishtigs1/__init__.py
index 7ab1543cfdb2dd5e32a4296c6b4801c4c9b44cdb..538139232ed83b82eb4ca4d64521c714dbb08c86 100644
--- a/bob/ip/binseg/data/drishtigs1/__init__.py
+++ b/bob/ip/binseg/data/drishtigs1/__init__.py
@@ -27,33 +27,49 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, data_path_keymaker
 
 _protocols = {
-        "optic-disc-all": pkg_resources.resource_filename(__name__, "optic-disc.json"),
-        "optic-cup-all": pkg_resources.resource_filename(__name__, "optic-cup.json"),
-        "optic-disc-any": pkg_resources.resource_filename(__name__, "optic-disc.json"),
-        "optic-cup-any": pkg_resources.resource_filename(__name__, "optic-cup.json"),
-        }
+    "optic-disc-all": pkg_resources.resource_filename(
+        __name__, "optic-disc.json"
+    ),
+    "optic-cup-all": pkg_resources.resource_filename(
+        __name__, "optic-cup.json"
+    ),
+    "optic-disc-any": pkg_resources.resource_filename(
+        __name__, "optic-disc.json"
+    ),
+    "optic-cup-any": pkg_resources.resource_filename(
+        __name__, "optic-cup.json"
+    ),
+}
+
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.drishtigs1.datadir", os.path.realpath(os.curdir)
+)
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.drishtigs1.datadir',
-        os.path.realpath(os.curdir))
 
 def _loader(context, sample):
     retval = dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_rgb(sample["label"]).convert("L"),
-            )
+        data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+        label=load_pil_rgb(os.path.join(_root_path, sample["label"])).convert(
+            "L"
+        ),
+    )
     # Drishti-GS provides softmaps of multiple annotators
     # we threshold to get gt where all/any of the annotators overlap
     if context["protocol"].endswith("-all"):
-        retval["label"] = retval["label"].point(lambda p: p>254, mode="1")
+        retval["label"] = retval["label"].point(lambda p: p > 254, mode="1")
     elif context["protocol"].endswith("-any"):
-        retval["label"] = retval["label"].point(lambda p: p>0, mode="1")
+        retval["label"] = retval["label"].point(lambda p: p > 0, mode="1")
     else:
         raise RuntimeError(f"Unknown protocol {context['protocol']}")
     return retval
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+
+dataset = JSONDataset(
+    protocols=_protocols, fieldnames=("data", "label"), loader=_loader,
+    keymaker=data_path_keymaker
+)
 """Drishti-GS1 dataset object"""
diff --git a/bob/ip/binseg/data/drive/__init__.py b/bob/ip/binseg/data/drive/__init__.py
index 5298e66131ced5a2a0e428c76d21098c1d1c7903..2253bcbecd7c15db4b4cacd850f6e3d655d15ca7 100644
--- a/bob/ip/binseg/data/drive/__init__.py
+++ b/bob/ip/binseg/data/drive/__init__.py
@@ -25,24 +25,32 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 
 _protocols = [
-        pkg_resources.resource_filename(__name__, "default.json"),
-        pkg_resources.resource_filename(__name__, "second-annotation.json"),
-        ]
+    pkg_resources.resource_filename(__name__, "default.json"),
+    pkg_resources.resource_filename(__name__, "second-annotation.json"),
+]
+
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.drive.datadir", os.path.realpath(os.curdir)
+)
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.drive.datadir',
-        os.path.realpath(os.curdir))
 
 def _loader(context, sample):
-    #"context" is ignore in this case - database is homogeneous
+    # "context" is ignored in this case - database is homogeneous
     return dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_1(sample["label"]),
-            mask=load_pil_1(sample["mask"]),
-            )
-
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+        data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+        label=load_pil_1(os.path.join(_root_path, sample["label"])),
+        mask=load_pil_1(os.path.join(_root_path, sample["mask"])),
+    )
+
+
+dataset = JSONDataset(
+    protocols=_protocols,
+    fieldnames=("data", "label", "mask"),
+    loader=_loader,
+    keymaker=data_path_keymaker,
+)
 """DRIVE dataset object"""
diff --git a/bob/ip/binseg/data/folderdataset.py b/bob/ip/binseg/data/folderdataset.py
deleted file mode 100644
index 2566fd77ca64dd357e9e3f8998bb830f10ec0c97..0000000000000000000000000000000000000000
--- a/bob/ip/binseg/data/folderdataset.py
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-from pathlib import Path
-
-from PIL import Image
-
-from torch.utils.data import Dataset
-
-from .transforms import Compose, ToTensor
-
-
-def _find_files(data_path, glob):
-    """
-    Recursively retrieves file lists from a given path, matching a given glob
-
-    This function will use :py:meth:`pathlib.Path.rglob`, together with the
-    provided glob pattern to search for anything the desired filename.
-    """
-
-    data_path = Path(data_path)
-    return sorted(list(data_path.rglob(glob)))
-
-
-class FolderDataset(Dataset):
-    """
-    Generic image folder containing images for prediction
-
-    .. important::
-
-        This implementation, contrary to its sister
-        :py:class:`.csvdataset.CSVDataset`, does not *automatically* convert
-        the input image to RGB, before passing it to the transforms, so it is
-        possible to accomodate a wider range of input types (e.g. 16-bit PNG
-        images).
-
-    Parameters
-    ----------
-
-    path : str
-        full path to root of dataset
-
-    glob : str
-        glob that can be used to filter-down files to be loaded on the provided
-        path
-
-    transforms : :py:class:`list`, Optional
-        a list of transformations to be applied to **both** image and
-        ground-truth data.  Notice that image changing transformations such as
-        :py:class:`.transforms.ColorJitter` are only applied to the image and
-        **not** to ground-truth.  Also notice a last transform
-        (:py:class:`bob.ip.binseg.data.transforms.ToTensor`) is always applied.
-
-    """
-
-    def __init__(self, path, glob="*", transforms=[]):
-        self.transform = Compose(transforms + [ToTensor()])
-        self.path = path
-        self.data = _find_files(path, glob)
-
-    def __len__(self):
-        """
-        Returns
-        -------
-        int
-            size of the dataset
-        """
-
-        return len(self.data)
-
-    def __getitem__(self, index):
-        """
-        Parameters
-        ----------
-        index : int
-
-        Returns
-        -------
-        sample : list
-            [name, img]
-        """
-
-        sample = [Image.open(self.data[index])]
-        if self.transform:
-            sample = self.transform(*sample)
-        return [self.data[index].relative_to(self.path).as_posix()] + sample
diff --git a/bob/ip/binseg/data/hrf/__init__.py b/bob/ip/binseg/data/hrf/__init__.py
index 8f0a387b5026e73750dd1ec6b44de6b751688cfb..1ab2919c8ee77d85560711bfbcb2a5c15ba24512 100644
--- a/bob/ip/binseg/data/hrf/__init__.py
+++ b/bob/ip/binseg/data/hrf/__init__.py
@@ -24,23 +24,31 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 
 _protocols = [
-        pkg_resources.resource_filename(__name__, "default.json"),
-        ]
+    pkg_resources.resource_filename(__name__, "default.json"),
+]
+
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.hrf.datadir", os.path.realpath(os.curdir)
+)
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.hrf.datadir',
-        os.path.realpath(os.curdir))
 
 def _loader(context, sample):
-    #"context" is ignore in this case - database is homogeneous
+    # "context" is ignore in this case - database is homogeneous
     return dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_1(sample["label"]),
-            mask=load_pil_1(sample["mask"]),
-            )
-
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+        data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+        label=load_pil_1(os.path.join(_root_path, sample["label"])),
+        mask=load_pil_1(os.path.join(_root_path, sample["mask"])),
+    )
+
+
+dataset = JSONDataset(
+    protocols=_protocols,
+    fieldnames=("data", "label", "mask"),
+    loader=_loader,
+    keymaker=data_path_keymaker,
+)
 """HRF dataset object"""
diff --git a/bob/ip/binseg/data/iostar/__init__.py b/bob/ip/binseg/data/iostar/__init__.py
index 1654cbfda75eca139a07734f31ed3e0b14b4b095..0ce427195777310966b1cd4e9579dce0571ad23f 100644
--- a/bob/ip/binseg/data/iostar/__init__.py
+++ b/bob/ip/binseg/data/iostar/__init__.py
@@ -27,35 +27,44 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 from ..utils import invert_mode1_image, subtract_mode1_images
 
 _protocols = [
-        pkg_resources.resource_filename(__name__, "vessel.json"),
-        pkg_resources.resource_filename(__name__, "optic-disc.json"),
-        ]
+    pkg_resources.resource_filename(__name__, "vessel.json"),
+    pkg_resources.resource_filename(__name__, "optic-disc.json"),
+]
+
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.iostar.datadir", os.path.realpath(os.curdir)
+)
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.iostar.datadir',
-        os.path.realpath(os.curdir))
 
 def _loader(context, sample):
     retval = dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_1(sample["label"]),
-            mask=load_pil_1(sample["mask"]),
-            )
+        data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+        label=load_pil_1(os.path.join(_root_path, sample["label"])),
+        mask=load_pil_1(os.path.join(_root_path, sample["mask"])),
+    )
     if context["protocol"] == "optic-disc":
         # For optic-disc analysis, the label provided by IOSTAR raw data is the
         # "inverted" (negative) label, and does not consider the mask region,
         # which must be subtracted.  We do this special manipulation here.
         retval["label"] = subtract_mode1_images(
-                invert_mode1_image(retval["label"]),
-                invert_mode1_image(retval["mask"]))
+            invert_mode1_image(retval["label"]),
+            invert_mode1_image(retval["mask"]),
+        )
         return retval
     elif context["protocol"] == "vessel":
         return retval
     raise RuntimeError(f"Unknown protocol {context['protocol']}")
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+
+dataset = JSONDataset(
+    protocols=_protocols,
+    fieldnames=("data", "label", "mask"),
+    loader=_loader,
+    keymaker=data_path_keymaker,
+)
 """IOSTAR dataset object"""
diff --git a/bob/ip/binseg/data/jsondataset.py b/bob/ip/binseg/data/jsondataset.py
deleted file mode 100644
index 427bd6e8de7caa8f197c1348403f5392d9d837a3..0000000000000000000000000000000000000000
--- a/bob/ip/binseg/data/jsondataset.py
+++ /dev/null
@@ -1,219 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import os
-import copy
-import json
-import functools
-
-import logging
-logger = logging.getLogger(__name__)
-
-from .sample import DelayedSample
-
-
-class JSONDataset:
-    """
-    Generic multi-protocol filelist dataset that yields samples
-
-    To create a new dataset, you need to provide one or more JSON formatted
-    filelists (one per protocol) with the following contents:
-
-    .. code-block:: json
-
-       {
-           "subset1": [
-               {
-                   "data": "path/to/data",
-                   "label": "path/to/optional/label",
-                   "mask": "path/to/optional/mask"
-               }
-           ],
-           "subset2": [
-           ]
-       }
-
-    Optionally, you may also format your JSON file like this, where each sample
-    is described as a list of up to 3 elements:
-
-    .. code-block:: json
-
-       {
-           "subset1": [
-               [
-                   "path/to/data",
-                   "path/to/optional/label",
-                   "path/to/optional/mask"
-               ]
-           ],
-           "subset2": [
-           ]
-       }
-
-    If your dataset does not have labels or masks, you may also represent it
-    like this:
-
-    .. code-block:: json
-
-       {
-           "subset1": [
-               "path/to/data1",
-               "path/to/data2"
-           ],
-           "subset2": [
-           ]
-       }
-
-    Where:
-
-    * ``data``: absolute or relative path leading to original image, in RGB
-      format
-    * ``label``: (optional) absolute or relative path with manual segmentation
-      information.  This image will be converted to a binary image.  This
-      dataset shall always yield label images in which white pixels (value=1)
-      indicate the **presence** of the object, and black pixels (value=0), its
-      absence.
-    * ``mask``: (optional) absolute or relative path with a mask that indicates
-      valid regions in the image where automatic segmentation should occur.
-      This image will be converted to a binary image.  This dataset shall
-      always yield mask images in which white pixels (value=1) indicate the
-      **valid** regions of the mask, and black pixels (value=0), invalid parts.
-
-    Relative paths are interpreted with respect to the location where the JSON
-    file is or to an optional ``root_path`` parameter, that can be provided.
-
-    There are no requirements concerning image or ground-truth homogenity.
-    Anything that can be loaded by our image and data loaders is OK.
-
-    Notice that all rows must have the same number of entries.
-
-    To generate a dataset without ground-truth (e.g. for prediction tasks),
-    then omit the ``label`` and ``mask`` entries.
-
-
-    Parameters
-    ----------
-
-    protocols : list, dict
-        Paths to one or more JSON formatted files containing the various
-        protocols to be recognized by this dataset, or a dictionary, mapping
-        protocol names to paths of JSON files.  Internally, we save a
-        dictionary where keys default to the basename of paths.
-
-    root_path : str
-        Path to a common filesystem root where files with relative paths should
-        be sitting.  If not set, then we use the current directory to resolve
-        relative paths.
-
-    loader : object
-        A function that receives, as input, a context dictionary (with a
-        "protocol" and "subset" keys indicating which protocol and subset are
-        being served), and a dictionary with ``{key: path}`` entries, and
-        returns a dictionary with the loaded data.  It shall respect the
-        loading principles of data, label and mask objects as stated above.
-
-    """
-
-    def __init__(self, protocols, root_path, loader):
-
-        if isinstance(protocols, dict):
-            self.protocols = dict((k,os.path.realpath(v)) for k,v in
-                    protocols.items())
-        else:
-            self.protocols = dict(
-                (os.path.splitext(os.path.basename(k))[0], os.path.realpath(k))
-                for k in protocols
-            )
-        self.root_path = root_path
-        self.loader = loader
-
-    def check(self):
-        """For each protocol, check all files are available on the filesystem
-
-        Returns
-        -------
-
-        errors : int
-            Number of errors found
-
-        """
-
-        logger.info(f"Checking dataset at '{self.root_path}'...")
-
-        errors = 0
-        for proto in self.protocols:
-            logger.info(f"Checking protocol '{proto}'...")
-            for name, samples in self.subsets(proto).items():
-                logger.info(f"Checking subset '{name}'...")
-                for sample in samples:
-                    try:
-                        sample.data  # triggers loading
-                        logger.info(f"{sample.key}: OK")
-                    except Exception as e:
-                        logger.error(f"{sample.key}: {e}")
-                        errors += 1
-        return errors
-
-    def subsets(self, protocol):
-        """Returns all subsets in a protocol
-
-        This method will load JSON information for a given protocol and return
-        all subsets of the given protocol after converting each entry into a
-        :py:class:`bob.ip.binseg.data.sample.DelayedSample`.
-
-        Parameters
-        ----------
-
-        protocol : str
-            Name of the protocol data to load
-
-
-        Returns
-        -------
-
-        subsets : dict
-            A dictionary mapping subset names to lists of
-            :py:class:`bob.ip.binseg.data.sample.DelayedSample` objects, with
-            the proper loading implemented.  Each delayed sample also carries a
-            ``key`` parameter, that contains the relative path of the sample,
-            without its extension.  This parameter can be used for recording
-            sample transforms during check-pointing.
-
-        """
-
-        with open(self.protocols[protocol], "r") as f:
-            data = json.load(f)
-
-        # returns a fixed sample representations as a DelayedSamples
-        retval = {}
-
-        for subset, samples in data.items():
-            delayeds = []
-            context = dict(protocol=protocol, subset=subset)
-            for k in samples:
-
-                if isinstance(k, dict):
-                    item = k
-
-                elif isinstance(k, list):
-                    item = {"data": k[0]}
-                    if len(k) > 1: item["label"] = k[1]
-                    if len(k) > 2: item["mask"] = k[2]
-
-                elif isinstance(k, str):
-                    item = {"data": k}
-
-                key = os.path.splitext(item["data"])[0]
-
-                # make paths absolute
-                abs_item = copy.deepcopy(item)
-                for k,v in item.items():
-                    if not os.path.isabs(v):
-                        abs_item[k] = os.path.join(self.root_path, v)
-
-                load = functools.partial(self.loader, context, abs_item)
-                delayeds.append(DelayedSample(load, key=key))
-
-            retval[subset] = delayeds
-
-        return retval
diff --git a/bob/ip/binseg/data/loader.py b/bob/ip/binseg/data/loader.py
index c5a235ceb24a46d80bdc95237824a05cacb75639..84928b7d39376bc180fd89d3bb89bc175b7f7774 100644
--- a/bob/ip/binseg/data/loader.py
+++ b/bob/ip/binseg/data/loader.py
@@ -5,6 +5,7 @@
 """Data loading code"""
 
 
+import os
 import PIL.Image
 
 
@@ -48,3 +49,34 @@ def load_pil_1(path):
     """
 
     return PIL.Image.open(path).convert(mode="1", dither=None)
+
+
+def data_path_keymaker(context, sample):
+    """Returns a path without extension as a key
+
+    This method assumes ``sample`` contains at least one entry named ``path``,
+    that contains a path to the sample raw data, without extension.  It will
+    return the said path without its extension.
+
+
+    Parameters
+    ----------
+
+    context : dict
+        Context dictionary with entries (``protocol``, ``subset``), depending
+        on the context
+
+    sample : dict
+        A dictionary that maps field names to sample entries from the original
+        dataset.
+
+
+    Returns
+    -------
+
+    key : str
+        A string that uniquely identifies the sample within a given context
+
+    """
+
+    return os.path.splitext(sample["data"])[0]
diff --git a/bob/ip/binseg/data/refuge/__init__.py b/bob/ip/binseg/data/refuge/__init__.py
index 766d0d0eeceb482ae46bf8c2c47de843b7e55f07..328dec359fdbaed8fcd19545a95c5e78676941e9 100644
--- a/bob/ip/binseg/data/refuge/__init__.py
+++ b/bob/ip/binseg/data/refuge/__init__.py
@@ -37,8 +37,8 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, data_path_keymaker
 
 _protocols = {
         "optic-disc": pkg_resources.resource_filename(__name__, "default.json"),
@@ -50,8 +50,8 @@ _root_path = bob.extension.rc.get('bob.ip.binseg.refuge.datadir',
 
 def _loader(context, sample):
     retval = dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_rgb(sample["label"]),
+            data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+            label=load_pil_rgb(os.path.join(_root_path, sample["label"])),
             )
 
     if context["subset"] == "train":
@@ -77,5 +77,6 @@ def _loader(context, sample):
 
     return retval
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+dataset = JSONDataset(protocols=_protocols, fieldnames=("data", "label"),
+        loader=_loader, keymaker=data_path_keymaker)
 """REFUGE dataset object"""
diff --git a/bob/ip/binseg/data/rimoner3/__init__.py b/bob/ip/binseg/data/rimoner3/__init__.py
index d251bc4f0d27a66eebfc452017fbac3da79865aa..b60faddab0f3e9747d6544a447cc915d546e1653 100644
--- a/bob/ip/binseg/data/rimoner3/__init__.py
+++ b/bob/ip/binseg/data/rimoner3/__init__.py
@@ -25,28 +25,40 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 
 _protocols = [
-        pkg_resources.resource_filename(__name__, "optic-disc-exp1.json"),
-        pkg_resources.resource_filename(__name__, "optic-cup-exp1.json"),
-        pkg_resources.resource_filename(__name__, "optic-disc-exp2.json"),
-        pkg_resources.resource_filename(__name__, "optic-cup-exp2.json"),
-        pkg_resources.resource_filename(__name__, "optic-disc-avg.json"),
-        pkg_resources.resource_filename(__name__, "optic-cup-avg.json"),
-        ]
+    pkg_resources.resource_filename(__name__, "optic-disc-exp1.json"),
+    pkg_resources.resource_filename(__name__, "optic-cup-exp1.json"),
+    pkg_resources.resource_filename(__name__, "optic-disc-exp2.json"),
+    pkg_resources.resource_filename(__name__, "optic-cup-exp2.json"),
+    pkg_resources.resource_filename(__name__, "optic-disc-avg.json"),
+    pkg_resources.resource_filename(__name__, "optic-cup-avg.json"),
+]
+
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.rimoner3.datadir", os.path.realpath(os.curdir)
+)
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.rimoner3.datadir',
-        os.path.realpath(os.curdir))
 
 def _loader(context, sample):
     # RIM-ONE r3 provides stereo images - we clip them here to get only the
     # left part of the image, which is also annotated
     return dict(
-            data=load_pil_rgb(sample["data"]).crop((0, 0, 1072, 1424)),
-            label=load_pil_1(sample["label"]).crop((0, 0, 1072, 1424)),
-            )
+        data=load_pil_rgb(os.path.join(_root_path, sample["data"])).crop(
+            (0, 0, 1072, 1424)
+        ),
+        label=load_pil_1(os.path.join(_root_path, sample["label"])).crop(
+            (0, 0, 1072, 1424)
+        ),
+    )
+
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+dataset = JSONDataset(
+    protocols=_protocols,
+    fieldnames=("data", "label"),
+    loader=_loader,
+    keymaker=data_path_keymaker,
+)
 """RIM-ONE r3 dataset object"""
diff --git a/bob/ip/binseg/data/stare/__init__.py b/bob/ip/binseg/data/stare/__init__.py
index 6885fc4d8821386cee0129c3b233467b68a6ab95..1ad911552da3afc6044bc3ac9d7feededdd0c9f3 100644
--- a/bob/ip/binseg/data/stare/__init__.py
+++ b/bob/ip/binseg/data/stare/__init__.py
@@ -29,23 +29,35 @@ import pkg_resources
 
 import bob.extension
 
-from ..jsondataset import JSONDataset
-from ..loader import load_pil_rgb, load_pil_1
+from ..dataset import JSONDataset
+from ..loader import load_pil_rgb, load_pil_1, data_path_keymaker
 
 _protocols = [
-        pkg_resources.resource_filename(__name__, "default.json"),
-        pkg_resources.resource_filename(__name__, "second-annotation.json"),
-        ]
+    pkg_resources.resource_filename(__name__, "default.json"),
+    pkg_resources.resource_filename(__name__, "second-annotation.json"),
+]
 
-_root_path = bob.extension.rc.get('bob.ip.binseg.stare.datadir',
-        os.path.realpath(os.curdir))
+_fieldnames = ("data", "label")
 
-def _loader(context, sample):
-    #"context" is ignore in this case - database is homogeneous
-    return dict(
-            data=load_pil_rgb(sample["data"]),
-            label=load_pil_1(sample["label"]),
-            )
+_root_path = bob.extension.rc.get(
+    "bob.ip.binseg.stare.datadir", os.path.realpath(os.curdir)
+)
 
-dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
+def _make_loader(root_path):
+
+    def _loader(context, sample):
+        # "context" is ignore in this case - database is homogeneous
+        return dict(
+            data=load_pil_rgb(os.path.join(root_path, sample["data"])),
+            label=load_pil_1(os.path.join(root_path, sample["label"])),
+        )
+    return _loader
+
+
+dataset = JSONDataset(
+    protocols=_protocols,
+    fieldnames=_fieldnames,
+    loader=_make_loader(_root_path),
+    keymaker=data_path_keymaker,
+)
 """STARE dataset object"""
diff --git a/bob/ip/binseg/test/__init__.py b/bob/ip/binseg/test/__init__.py
index 428bd154ba23e7516809b9bc147f2caf2db953a5..2e507ed77cc08ad5f73b8c579defe79d5759b202 100644
--- a/bob/ip/binseg/test/__init__.py
+++ b/bob/ip/binseg/test/__init__.py
@@ -5,10 +5,13 @@
 
 import tempfile
 import logging
+
 logger = logging.getLogger(__name__)
 
 TESTDB_TMPDIR = None
-_URL = "http://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/_testdb.zip"
+_URL = (
+    "http://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/_testdb.zip"
+)
 _RCKEY = "bob.ip.binseg.stare.datadir"
 
 
@@ -24,40 +27,54 @@ def _mock_test_skipper(name):
     Dummary decorator that does nothing
     """
     import functools
+
     def wrapped_function(test):
         @functools.wraps(test)
         def wrapper(*args, **kwargs):
             return test(*args, **kwargs)
+
         return wrapper
+
     return wrapped_function
 
 
 def mock_dataset():
     global TESTDB_TMPDIR
     from bob.extension import rc
+
     if (TESTDB_TMPDIR is not None) or (_RCKEY in rc):
         logger.info("Test database already set up - not downloading")
     else:
         logger.info("Test database not available, downloading...")
         import zipfile
         import urllib.request
+
         # Download the file from `url` and save it locally under `file_name`:
         with urllib.request.urlopen(_URL) as r, tempfile.TemporaryFile() as f:
             f.write(r.read())
             f.flush()
             f.seek(0)
-            TESTDB_TMPDIR = \
-                    tempfile.TemporaryDirectory(prefix=__name__ + '-')
+            TESTDB_TMPDIR = tempfile.TemporaryDirectory(prefix=__name__ + "-")
             print(f"Creating test database at {TESTDB_TMPDIR.name}...")
             logger.info(f"Creating test database at {TESTDB_TMPDIR.name}...")
-            with zipfile.ZipFile(f) as zf: zf.extractall(TESTDB_TMPDIR.name)
+            with zipfile.ZipFile(f) as zf:
+                zf.extractall(TESTDB_TMPDIR.name)
 
     from ..data import stare
+
     if TESTDB_TMPDIR is None:
         # if the user has the STARE directory ready, then we do a normal return
         from .utils import rc_variable_set
+
         return stare.dataset, rc_variable_set
 
     # else, we do a "mock" return
-    return stare.JSONDataset(stare._protocols, TESTDB_TMPDIR.name,
-            stare._loader), _mock_test_skipper
+    return (
+        stare.JSONDataset(
+            stare._protocols,
+            stare._fieldnames,
+            stare._make_loader(TESTDB_TMPDIR.name),
+            stare.data_path_keymaker,
+        ),
+        _mock_test_skipper,
+    )
diff --git a/doc/api.rst b/doc/api.rst
index 4e70a851f03a4af60faa1e24fbbb1360190088ca..0062dc3fb263282391799916ecc8e9898354007a 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -15,9 +15,7 @@ Data Manipulation
 .. autosummary::
    :toctree: api/data
 
-   bob.ip.binseg.data.folderdataset
-   bob.ip.binseg.data.csvdataset
-   bob.ip.binseg.data.jsondataset
+   bob.ip.binseg.data.dataset
    bob.ip.binseg.data.loader
    bob.ip.binseg.data.sample
    bob.ip.binseg.data.utils
@@ -133,7 +131,6 @@ Datasets
    :template: config.rst
 
    bob.ip.binseg.configs.datasets.csv
-   bob.ip.binseg.configs.datasets.folder
    bob.ip.binseg.configs.datasets.utils
 
    bob.ip.binseg.configs.datasets.chasedb1
diff --git a/setup.py b/setup.py
index 269925af9ff99faecbeeb48a0bc96f0eea00d3d8..c97566905eadcd5c4c9d312f8c88c8ff197d1451 100644
--- a/setup.py
+++ b/setup.py
@@ -53,9 +53,8 @@ setup(
             "unet = bob.ip.binseg.configs.models.unet",
             "resunet = bob.ip.binseg.configs.models.resunet",
 
-            # datasets
+            # example datasets
             "csv-dataset-example = bob.ip.binseg.configs.datasets.csv",
-            "folder-dataset-example = bob.ip.binseg.configs.datasets.folder",
 
             # drive dataset
             "drive = bob.ip.binseg.configs.datasets.drive",