diff --git a/bob/bio/base/config/examples/lda_atnt_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy.py
index a23b93d859d81cbbd10656fb9003b62488ba217b..8903a324d5d3d9fb38b7112425eec2150c6c9803 100644
--- a/bob/bio/base/config/examples/lda_atnt_legacy.py
+++ b/bob/bio/base/config/examples/lda_atnt_legacy.py
@@ -1,22 +1,26 @@
-# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor
-
-import bob.bio.base
-import bob.bio.face
-
-## DATABASE
+from bob.bio.face.database import AtntBioDatabase
+from bob.bio.base.algorithm import LDA
+from bob.bio.face.preprocessor import FaceCrop
+from sklearn.pipeline import make_pipeline
+from bob.bio.base.mixins.legacy import (
+    LegacyPreprocessor,
+    LegacyAlgorithmAsTransformer,
+)
+from bob.pipelines.transformers import CheckpointSampleLinearize
+from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyDatabaseConnector
+import functools
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    CheckpointDistance,
+)
 
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector
-database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default")
+# DATABASE
 
+database = LegacyDatabaseConnector(
+    AtntBioDatabase(original_directory="./atnt", protocol="Default"),
+)
 
-from sklearn.pipeline import Pipeline, make_pipeline
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin
-from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
 
-
-#### PREPROCESSOR LEGACY ###
-import functools
+# PREPROCESSOR LEGACY
 
 # Cropping
 CROPPED_IMAGE_HEIGHT = 80
@@ -31,45 +35,36 @@ LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3)
 # I JUST MADE UP THESE NUMBERS
 FIXED_RIGHT_EYE_POS = (30, 30)
 FIXED_LEFT_EYE_POS = (20, 50)
-import bob.bio.face
 
 face_cropper = functools.partial(
-    bob.bio.face.preprocessor.FaceCrop,
+    FaceCrop,
     cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
     cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
     fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS},
 )
 
-from bob.pipelines.mixins import mix_me_up
-preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin)
-
-#### ALGORITHM LEGACY #####
+# ALGORITHM LEGACY
 
-algorithm = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90)
+lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90)
 
-from bob.pipelines.mixins import dask_it
 
-extractor = Pipeline(
-    steps=[
-        ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")),
-        ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")),
-        (
-            "2",
-            LegacyAlgorithmMixin(
-                callable=algorithm, features_dir="./example/extractor2", model_path="./example/"
-            ),
-        ),
-    ]
+transformer = make_pipeline(
+    LegacyPreprocessor(callable=face_cropper, features_dir="./example/transformer0"),
+    CheckpointSampleLinearize(features_dir="./example/transformer1"),
+    LegacyAlgorithmAsTransformer(
+        callable=lda, features_dir="./example/transformer2", model_path="./example/"
+    ),
 )
 
-#extractor = dask_it(extractor)
 
+algorithm = CheckpointDistance(features_dir="./example/")
+
+
+# comment out the code below to disable dask
+from bob.pipelines.mixins import estimator_dask_it, mix_me_up
 from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
-    Distance,
-    BiometricAlgorithmCheckpointMixin,
+    BioAlgDaskMixin,
 )
 
-
-class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass
-algorithm = CheckpointDistance(features_dir="./example/")
-# algorithm = Distance()
+transformer = estimator_dask_it(transformer)
+algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
diff --git a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py
index 0b9d601e3840d24d76835b49aa2dfbfbd0dc17e7..469e9c9552388ac814c6671c4d2518fda17078e4 100644
--- a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py
+++ b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py
@@ -1,23 +1,24 @@
-# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor
-
-import bob.bio.base
-import bob.bio.face
-
-## DATABASE
+from bob.bio.face.database import AtntBioDatabase
+from bob.bio.base.algorithm import LDA
+from bob.bio.face.preprocessor import FaceCrop
+from sklearn.pipeline import make_pipeline
+from bob.bio.base.mixins.legacy import (
+    LegacyPreprocessor,
+    LegacyAlgorithmAsTransformer,
+    LegacyAlgorithmAsBioAlg,
+)
+from bob.pipelines.transformers import CheckpointSampleLinearize
+from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyDatabaseConnector
+import functools
 
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector
-database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default")
+# DATABASE
 
+database = LegacyDatabaseConnector(
+    AtntBioDatabase(original_directory="./atnt", protocol="Default"),
+)
 
-from sklearn.pipeline import Pipeline, make_pipeline
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin
-from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyBiometricAlgorithm
 
-
-#### PREPROCESSOR LEGACY ###
-import functools
+# PREPROCESSOR LEGACY
 
 # Cropping
 CROPPED_IMAGE_HEIGHT = 80
@@ -32,37 +33,35 @@ LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3)
 # I JUST MADE UP THESE NUMBERS
 FIXED_RIGHT_EYE_POS = (30, 30)
 FIXED_LEFT_EYE_POS = (20, 50)
-import bob.bio.face
 
 face_cropper = functools.partial(
-    bob.bio.face.preprocessor.FaceCrop,
+    FaceCrop,
     cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
     cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
     fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS},
 )
 
-from bob.pipelines.mixins import mix_me_up
-preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin)
-
-#### ALGORITHM LEGACY #####
+# ALGORITHM LEGACY
 
-algorithm_estimator = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90)
+lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90)
 
-from bob.pipelines.mixins import dask_it
 
-extractor = Pipeline(
-    steps=[
-        ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")),
-        ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")),
-        (
-            "2",
-            LegacyAlgorithmMixin(
-                callable=algorithm_estimator, features_dir="./example/extractor2", model_path="./example/"
-            ),
-        ),
-    ]
+transformer = make_pipeline(
+    LegacyPreprocessor(callable=face_cropper, features_dir="./example/transformer0"),
+    CheckpointSampleLinearize(features_dir="./example/transformer1"),
+    LegacyAlgorithmAsTransformer(
+        callable=lda, features_dir="./example/transformer2", model_path="./example/"
+    ),
 )
 
-extractor = dask_it(extractor)
+algorithm = LegacyAlgorithmAsBioAlg(callable=lda, features_dir="./example/")
+
+
+# comment out the code below to disable dask
+from bob.pipelines.mixins import estimator_dask_it, mix_me_up
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    BioAlgDaskMixin,
+)
 
-algorithm = LegacyBiometricAlgorithm(callable=algorithm_estimator, features_dir="./example/")
+transformer = estimator_dask_it(transformer)
+algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
diff --git a/bob/bio/base/config/examples/pca_atnt.py b/bob/bio/base/config/examples/pca_atnt.py
index a72d9577873c8d7fe76502223867ca64b1b0b46b..76015a797b36ac5ee1336af3ee5ef8e1b25fca12 100644
--- a/bob/bio/base/config/examples/pca_atnt.py
+++ b/bob/bio/base/config/examples/pca_atnt.py
@@ -1,20 +1,28 @@
-#from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor
-
-import bob.bio.face
 from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector
-database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default")
+from sklearn.pipeline import make_pipeline
+from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    CheckpointDistance,
+)
+from bob.bio.face.database import AtntBioDatabase
 
-from sklearn.pipeline import Pipeline, make_pipeline
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
 
+database = DatabaseConnector(
+    AtntBioDatabase(original_directory="./atnt"), protocol="Default"
+)
+transformer = make_pipeline(
+    CheckpointSampleLinearize(features_dir="./example/extractor0"),
+    CheckpointSamplePCA(
+        features_dir="./example/extractor1", model_path="./example/pca.pkl"
+    ),
+)
+algorithm = CheckpointDistance(features_dir="./example/")
 
-from bob.pipelines.mixins import dask_it
-extractor = Pipeline(steps=[('0',CheckpointSampleLinearize(features_dir="./example/extractor0")), 
-	                        ('1',CheckpointSamplePCA(features_dir="./example/extractor1", model_path="./example/pca.pkl"))])
-#extractor = dask_it(extractor)
+# comment out the code below to disable dask
+from bob.pipelines.mixins import estimator_dask_it, mix_me_up
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    BioAlgDaskMixin,
+)
 
-from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance, BiometricAlgorithmCheckpointMixin
-class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance):  pass
-algorithm = CheckpointDistance(features_dir="./example/")
-#algorithm = Distance()
+transformer = estimator_dask_it(transformer)
+algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
diff --git a/bob/bio/base/config/examples/pca_atnt_legacy.py b/bob/bio/base/config/examples/pca_atnt_legacy.py
deleted file mode 100644
index b895eba8d35869cd22ecb50ed08668e3dd51e604..0000000000000000000000000000000000000000
--- a/bob/bio/base/config/examples/pca_atnt_legacy.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor
-
-
-### DATABASE
-import bob.bio.face
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector
-database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default")
-
-
-from sklearn.pipeline import Pipeline, make_pipeline
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin
-from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
-
-
-#### PREPROCESSOR LEGACY ###
-import functools
-
-# Cropping
-CROPPED_IMAGE_HEIGHT = 80
-CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5
-
-# eye positions for frontal images
-RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1)
-LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3)
-
-
-# RANDOM EYES POSITIONS
-# I JUST MADE UP THESE NUMBERS
-FIXED_RIGHT_EYE_POS = (30, 30)
-FIXED_LEFT_EYE_POS = (20, 50)
-import bob.bio.face
-
-face_cropper = functools.partial(
-    bob.bio.face.preprocessor.FaceCrop,
-    cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
-    cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
-    fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS},
-)
-
-
-from bob.pipelines.mixins import mix_me_up
-preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin)
-
-from bob.pipelines.mixins import dask_it
-
-extractor = Pipeline(
-    steps=[
-        ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")),
-        ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")),
-        (
-            "2",
-            CheckpointSamplePCA(
-                features_dir="./example/extractor2", model_path="./example/pca.pkl"
-            ),
-        ),
-    ]
-)
-# extractor = dask_it(extractor)
-
-from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
-    Distance,
-    BiometricAlgorithmCheckpointMixin,
-)
-
-
-class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance):
-    pass
-algorithm = CheckpointDistance(features_dir="./example/")
-# algorithm = Distance()
diff --git a/bob/bio/base/config/examples/pca_mobio-male.py b/bob/bio/base/config/examples/pca_mobio-male.py
index 440d4e97d1c46e4e41c52f49f84ebaff768a8fdf..61393f38dd0ac2c93b8e579c097a76ad44028f57 100644
--- a/bob/bio/base/config/examples/pca_mobio-male.py
+++ b/bob/bio/base/config/examples/pca_mobio-male.py
@@ -1,69 +1,54 @@
-
-import functools
-import bob.db.atnt
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, DatabaseConnectorAnnotated
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    CheckpointDistance,
+)
+from bob.bio.base.pipelines.vanilla_biometrics.legacy import (
+    LegacyDatabaseConnector,
+    LegacyPreprocessor,
+)
+from bob.bio.face.database.mobio import MobioBioDatabase
+from bob.bio.face.preprocessor import FaceCrop
 from bob.extension import rc
-import bob.bio.face
-
-from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin
-from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyBiometricAlgorithm
-from bob.bio.base.transformers import CheckpointSamplePCA
-
-import os
-#base_dir = "/idiap/temp/tpereira/mobio/pca"
-base_dir = "./example"
-
-
-### DATABASE
-
-original_directory=rc['bob.db.mobio.directory']
-annotation_directory=rc['bob.db.mobio.annotation_directory']
-database = DatabaseConnectorAnnotated(bob.bio.face.database.mobio.MobioBioDatabase(
-	                         original_directory=original_directory,
-	                         annotation_directory=annotation_directory,
-	                         original_extension=".png"
-	                         ), 
-	                         protocol="mobile0-male")
-
-from sklearn.pipeline import Pipeline, make_pipeline
-from sklearn.decomposition import PCA
-
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from bob.bio.base.transformers import CheckpointSampleLinearize
-
-
+from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA
+from sklearn.pipeline import make_pipeline
+import functools
 
 
-#### PREPROCESSOR LEGACY ###
+database = LegacyDatabaseConnector(
+    MobioBioDatabase(
+        original_directory=rc["bob.db.mobio.directory"],
+        annotation_directory=rc["bob.db.mobio.annotation_directory"],
+        original_extension=".png",
+        protocol="mobile0-male",
+    )
+)
 
 # Using face crop
 CROPPED_IMAGE_HEIGHT = 80
 CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5
-
-## eye positions for frontal images
+# eye positions for frontal images
 RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1)
 LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3)
-
-original_preprocessor = functools.partial(
-                  bob.bio.face.preprocessor.FaceCrop,
-                  cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
-                  cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
-               )
-
-
-from bob.pipelines.mixins import mix_me_up
-preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin)
-#class preprocessor(CheckpointMixin, SampleMixin, LegacyProcessorMixin): pass
-
-from bob.pipelines.mixins import dask_it
-extractor = Pipeline(steps=[
-	                        ('0', preprocessor(callable=original_preprocessor, features_dir=os.path.join(base_dir,"extractor0"))),
-                            ('1',CheckpointSampleLinearize(features_dir=os.path.join(base_dir,"extractor1"))), 
-	                        ('2',CheckpointSamplePCA(features_dir=os.path.join(base_dir,"extractor2"), model_path=os.path.join(base_dir,"pca.pkl")))
-	                       ])
-#extractor = dask_it(extractor, npartitions=48)
-
-from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance, BiometricAlgorithmCheckpointMixin
-
-class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance):  pass
-algorithm = CheckpointDistance(features_dir=base_dir)
+# FaceCrop
+preprocessor = functools.partial(
+    FaceCrop,
+    cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
+    cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
+)
+
+transformer = make_pipeline(
+    LegacyPreprocessor(preprocessor),
+    CheckpointSampleLinearize(features_dir="./example/extractor0"),
+    CheckpointSamplePCA(
+        features_dir="./example/extractor1", model_path="./example/pca.pkl"
+    ),
+)
+algorithm = CheckpointDistance(features_dir="./example/")
+
+# comment out the code below to disable dask
+from bob.pipelines.mixins import estimator_dask_it, mix_me_up
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    BioAlgDaskMixin,
+)
+
+transformer = estimator_dask_it(transformer)
+algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
diff --git a/bob/bio/base/mixins/__init__.py b/bob/bio/base/mixins/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/bob/bio/base/mixins/legacy.py b/bob/bio/base/mixins/legacy.py
deleted file mode 100644
index 3cf0d70f3b3448b49379af0bf6b22b2c58aa1899..0000000000000000000000000000000000000000
--- a/bob/bio/base/mixins/legacy.py
+++ /dev/null
@@ -1,224 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-
-"""
-Mixins to handle legacy components
-"""
-
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from sklearn.base import TransformerMixin, BaseEstimator
-from sklearn.utils.validation import check_array
-from bob.pipelines.sample import Sample, DelayedSample, SampleSet
-from bob.pipelines.utils import is_picklable
-import numpy
-import logging
-import os
-import bob.io.base
-import functools
-
-logger = logging.getLogger(__name__)
-
-
-def scikit_to_bob_supervised(X, Y):
-    """
-    Given an input data ready for :py:method:`scikit.estimator.BaseEstimator.fit`,
-    convert for :py:class:`bob.bio.base.algorithm.Algorithm.train_projector` when 
-    `performs_projection=True`
-    """
-
-    # TODO: THIS IS VERY INNEFICI
-    logger.warning(
-        "INEFFICIENCY WARNING. HERE YOU ARE USING A HACK FOR USING BOB ALGORITHMS IN SCIKIT LEARN PIPELINES. \
-                    WE RECOMMEND YOU TO PORT THIS ALGORITHM. DON'T BE LAZY :-)"
-    )
-
-    bob_output = dict()
-    for x, y in zip(X, Y):
-        if y in bob_output:
-            bob_output[y] = numpy.vstack((bob_output[y], x.data))
-        else:
-            bob_output[y] = x.data
-
-    return [bob_output[k] for k in bob_output]
-
-
-class LegacyProcessorMixin(TransformerMixin):
-    """Class that wraps :py:class:`bob.bio.base.preprocessor.Preprocessor` and
-    :py:class:`bob.bio.base.extractor.Extractors`
-
-
-    Example
-    -------
-
-        Wrapping preprocessor with functtools
-        >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin
-        >>> from bob.bio.face.preprocessor import FaceCrop
-        >>> import functools
-        >>> transformer = LegacyProcessorMixin(functools.partial(FaceCrop, cropped_image_size=(10,10)))
-
-    Example
-    -------
-        Wrapping extractor 
-        >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin
-        >>> from bob.bio.face.extractor import Linearize
-        >>> transformer = LegacyProcessorMixin(Linearize)
-
-
-    Parameters
-    ----------
-      callable: callable
-         Calleble function that instantiates the scikit estimator
-
-    """
-
-    def __init__(self, callable=None, **kwargs):
-        super().__init__(**kwargs)
-        self.callable = callable
-        self.instance = None
-
-    def transform(self, X):
-
-        # Instantiates and do the "real" transform
-        if self.instance is None:
-            self.instance = self.callable()
-        if isinstance(X[0], dict):
-            # Handling annotations if it's the case
-            retval = []
-            for x in X:
-                data = x["data"]
-                annotations = x["annotations"]
-
-                retval.append(self.instance(data, annotations=annotations))
-            return retval
-
-        else:
-            X = check_array(X, allow_nd=True)
-            return [self.instance(x) for x in X]
-
-    def __setstate__(self):
-        # Handling unpicklable objects
-        self.instance = None
-
-    def __getstate__(self):
-        # Handling unpicklable objects
-        self.instance = None
-
-
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-class LegacyAlgorithmMixin(CheckpointMixin, SampleMixin, BaseEstimator):
-    """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm`
-    
-    :py:method:`LegacyAlgorithmrMixin.fit` maps to :py:method:`bob.bio.base.algorithm.Algoritm.train_projector`
-
-    :py:method:`LegacyAlgorithmrMixin.transform` maps :py:method:`bob.bio.base.algorithm.Algoritm.project`
-
-    .. warning THIS HAS TO BE SAMPABLE AND CHECKPOINTABLE
-
-
-    Example
-    -------
-
-        Wrapping LDA algorithm with functtools
-        >>> from bob.bio.base.mixins.legacy import LegacyAlgorithmMixin
-        >>> from bob.bio.base.algorithm import LDA
-        >>> import functools
-        >>> transformer = LegacyAlgorithmMixin(functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90))
-
-
-
-    Parameters
-    ----------
-      callable: callable
-         Calleble function that instantiates the scikit estimator
-
-    """
-
-    def __init__(self, callable=None, **kwargs):
-        super().__init__(**kwargs)
-        self.callable = callable
-        self.instance = None
-        self.projector_file = None
-
-    def fit(self, X, y=None, **fit_params):
-
-        self.projector_file = os.path.join(self.model_path, "Projector.hdf5")
-        if os.path.exists(self.projector_file):
-            return self
-
-        # Instantiates and do the "real" fit
-        if self.instance is None:
-            self.instance = self.callable()
-
-        if self.instance.performs_projection:
-            # Organizing the date by class
-            bob_X = scikit_to_bob_supervised(X, y)
-            self.instance.train_projector(bob_X, self.projector_file)
-
-        # Deleting the instance, so it's picklable
-        self.instance = None
-
-        return self
-
-    def transform(self, X):
-        def _project_save_sample(sample):
-            # Project
-            projected_data = self.instance.project(sample.data)
-
-            # Checkpointing
-            path = self.make_path(sample)
-            bob.io.base.create_directories_safe(os.path.dirname(path))
-            f = bob.io.base.HDF5File(path, "w")
-
-            self.instance.write_feature(projected_data, f)
-            reader = get_reader(self.instance.read_feature, path)
-
-            return DelayedSample(reader, parent=sample)
-
-        self.projector_file = os.path.join(self.model_path, "Projector.hdf5")
-        if not isinstance(X, list):
-            raise ValueError("It's expected a list, not %s" % type(X))
-
-        # Instantiates and do the "real" transform
-        if self.instance is None:
-            self.instance = self.callable()
-        self.instance.load_projector(self.projector_file)
-
-        if isinstance(X[0], Sample) or isinstance(X[0], DelayedSample):
-            samples = []
-            for sample in X:
-                samples.append(_project_save_sample(sample))
-            return samples
-
-        elif isinstance(X[0], SampleSet):
-            # Projecting and checkpointing sampleset
-            sample_sets = []
-            for sset in X:
-                samples = []
-                for sample in sset.samples:
-                    samples.append(_project_save_sample(sample))
-                sample_sets.append(SampleSet(samples=samples, parent=sset))
-            return sample_sets
-
-        else:
-            raise ValueError("Type not allowed %s" % type(X[0]))
-
-    def __setstate__(self):
-        # Handling unpicklable objects
-        self.instance = None
-
-    def __getstate__(self):
-        # Handling unpicklable objects
-        self.instance = None
-
-
-def get_reader(reader, path):
-    if is_picklable(reader):
-        return functools.partial(reader, path)
-    else:
-        logger.warning(
-            f"The method {reader} is not picklable. Shiping its unbounded method to `DelayedSample`."
-        )
-        reader = reader.__func__  # The reader object might not be picklable
-        return functools.partial(reader, None, path)
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
new file mode 100644
index 0000000000000000000000000000000000000000..1fbfce94c8bd30451fae052346b160272e4e3cfe
--- /dev/null
+++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
@@ -0,0 +1,213 @@
+from abc import ABCMeta, abstractmethod
+from bob.pipelines.sample import Sample, SampleSet, DelayedSample
+
+
+class BioAlgorithm(metaclass=ABCMeta):
+    """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_.
+
+    biometric model enrollement, via ``enroll()`` and scoring, with
+    ``score()``.
+
+    """
+
+    def enroll_samples(self, biometric_references):
+        """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`.
+
+        It handles the creation of biometric references
+
+        Parameters
+        ----------
+            biometric_references : list
+                A list of :py:class:`SampleSet` objects to be used for
+                creating biometric references.  The sets must be identified
+                with a unique id and a path, for eventual checkpointing.
+        """
+
+        retval = []
+        for k in biometric_references:
+            # compute on-the-fly
+            retval.append(self._enroll_sample_set(k))
+
+        return retval
+
+    def _enroll_sample_set(self, sampleset):
+
+        # Unpack the sampleset
+        data = [s.data for s in sampleset.samples]
+
+        # Enroll
+        return Sample(self.enroll(data), parent=sampleset)
+
+    @abstractmethod
+    def enroll(self, data):
+        """
+        It handles the creation of ONE biometric reference for the vanilla ppipeline
+
+        Parameters
+        ----------
+
+            data:
+                Data used for the creation of ONE BIOMETRIC REFERENCE
+
+        """
+        pass
+
+    def score_samples(self, probe_features, biometric_references):
+        """Scores a new sample against multiple (potential) references
+
+        Parameters
+        ----------
+
+            probes : list
+                A list of :py:class:`SampleSet` objects to be used for
+                scoring the input references
+
+            biometric_references : list
+                A list of :py:class:`Sample` objects to be used for
+                scoring the input probes, must have an ``id`` attribute that
+                will be used to cross-reference which probes need to be scored.
+
+        Returns
+        -------
+
+            scores : list
+                For each sample in a probe, returns as many scores as there are
+                samples in the probe, together with the probe's and the
+                relevant reference's subject identifiers.
+
+        """
+
+        retval = []
+        for p in probe_features:
+            retval.append(self._score_sample_set(p, biometric_references))
+        return retval
+
+    def _score_sample_set(self, sampleset, biometric_references):
+        """Given a sampleset for probing, compute the scores and retures a sample set with the scores
+        """
+
+        # Stacking the samples from a sampleset
+        data = [s.data for s in sampleset.samples]
+
+        # Compute scores for each sample inside of the sample set
+        # TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
+        # We should add an agregator function here so we can properlly agregate samples from
+        # a sampleset either after or before scoring.
+        # To be honest, this should be the default behaviour
+        retval = []
+        for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)):
+            # Creating one sample per comparison
+            subprobe_scores = []
+            for ref in [
+                r for r in biometric_references if r.key in sampleset.references
+            ]:
+                subprobe_scores.append(Sample(self.score(ref.data, s), parent=ref))
+
+            # Creating one sampleset per probe
+            subprobe = SampleSet(subprobe_scores, parent=sampleset)
+            subprobe.subprobe_id = subprobe_id
+            retval.append(subprobe)
+
+        return retval
+
+    @abstractmethod
+    def score(self, biometric_reference, data):
+        """It handles the score computation for one sample
+
+        Parameters
+        ----------
+
+            biometric_reference : list
+                Biometric reference to be compared
+
+            data : list
+                Data to be compared
+
+        Returns
+        -------
+
+            scores : list
+                For each sample in a probe, returns as many scores as there are
+                samples in the probe, together with the probe's and the
+                relevant reference's subject identifiers.
+
+        """
+        pass
+
+
+class Database(metaclass=ABCMeta):
+    """Base class for Vanilla Biometric pipeline
+    """
+
+    @abstractmethod
+    def background_model_samples(self):
+        """Returns :py:class:`Sample`'s to train a background model
+
+
+        Returns
+        -------
+        samples : list
+            List of samples for background model training.
+
+        """
+        pass
+
+    @abstractmethod
+    def references(self, group="dev"):
+        """Returns :py:class:`Reference`'s to enroll biometric references
+
+
+        Parameters
+        ----------
+        group : :py:class:`str`, optional
+            Limits samples to this group
+
+
+        Returns
+        -------
+        references : list
+            List of samples for the creation of biometric references.
+
+        """
+        pass
+
+    @abstractmethod
+    def probes(self, group):
+        """Returns :py:class:`Probe`'s to score biometric references
+
+
+        Parameters
+        ----------
+        group : str
+            Limits samples to this group
+
+
+        Returns
+        -------
+        probes : list
+            List of samples for the creation of biometric probes.
+
+        """
+        pass
+
+
+def save_scores_four_columns(path, probe):
+    """
+    Write scores in the four columns format
+    """
+
+    with open(path, "w") as f:
+        for biometric_reference in probe.samples:
+            line = "{0} {1} {2} {3}\n".format(
+                biometric_reference.subject,
+                probe.subject,
+                probe.key,
+                biometric_reference.data,
+            )
+            f.write(line)
+
+    def load():
+        with open(path) as f:
+            return [float(line.split()[-1]) for line in f]
+
+    return DelayedSample(load, parent=probe)
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py
deleted file mode 100644
index 5853582d87a43e61b06926bd817546ebf15b192e..0000000000000000000000000000000000000000
--- a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py
+++ /dev/null
@@ -1,327 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# @author: Andre Anjos <andre.anjos@idiap.ch>
-
-from bob.pipelines.sample import Sample, SampleSet, DelayedSample
-import numpy
-import bob.io.base
-import os
-import functools
-
-
-class BiometricAlgorithm(object):
-    """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_.
-
-    biometric model enrollement, via ``enroll()`` and scoring, with
-    ``score()``.
-
-    """
-
-    def __init__(self):
-        pass
-
-    def _enroll_samples(
-        self, biometric_references, extractor=None, checkpoint=None, *args, **kwargs
-    ):
-        """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`.
-
-        It handles the creation of biometric references
-
-        Parameters
-        ----------
-            biometric_references : list
-                A list of :py:class:`SampleSet` objects to be used for
-                creating biometric references.  The sets must be identified
-                with a unique id and a path, for eventual checkpointing.
-
-            background_model : 
-                Object containing the background model
-
-            checkpoint : str, None
-                If passed and not ``None``, then it is considered to be the
-                path of a directory containing possible cached values for each
-                of the references in this experiment.  If that is the case, the
-                values are loaded from there and not recomputed.
-
-            *args, **kwargs :
-                Extra parameters that can be used to hook-up processing graph
-                dependencies, but are currently ignored
-
-        """
-
-        retval = []
-        for k in biometric_references:
-            # compute on-the-fly
-            retval.append(self._enroll_sample_set(k))
-
-        return retval
-
-    def _enroll_sample_set(self, sampleset):
-
-        # Unpack the sampleset
-        data = [s.data for s in sampleset.samples]
-
-        # Enroll
-        return Sample(self.enroll(data), parent=sampleset)
-
-    def enroll(self, data, extractor=None, **kwargs):
-        """
-        It handles the creation of ONE biometric reference for the vanilla ppipeline
-
-        Parameters
-        ----------
-
-            data:
-                Data used for the creation of ONE BIOMETRIC REFERENCE        
-
-        """
-
-        raise NotImplemented("Please, implement me")
-
-    def _score_samples(
-        self, probes, biometric_references, extractor=None, *args, **kwargs
-    ):
-        """Scores a new sample against multiple (potential) references
-
-        Parameters
-        ----------
-
-            probes : list
-                A list of :py:class:`SampleSet` objects to be used for
-                scoring the input references
-
-            biometric_references : list
-                A list of :py:class:`Sample` objects to be used for
-                scoring the input probes, must have an ``id`` attribute that
-                will be used to cross-reference which probes need to be scored.
-
-            extractor : 
-                Path pointing to stored model on disk
-
-            *args, **kwargs :
-                Extra parameters that can be used to hook-up processing graph
-                dependencies, but are currently ignored
-
-
-        Returns
-        -------
-
-            scores : list
-                For each sample in a probe, returns as many scores as there are
-                samples in the probe, together with the probe's and the
-                relevant reference's subject identifiers.
-
-        """
-
-        retval = []
-        for p in probes:
-            retval.append(self._score_sample_set(p, biometric_references, extractor))
-        return retval
-
-    def _score_sample_set(self, sampleset, biometric_references, extractor):
-        """Given a sampleset for probing, compute the scores and retures a sample set with the scores
-        """
-
-        # Stacking the samples from a sampleset
-        data = [s.data for s in sampleset.samples]
-
-        # Compute scores for each sample inside of the sample set
-        # TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
-        # We should add an agregator function here so we can properlly agregate samples from
-        # a sampleset either after or before scoring.
-        # To be honest, this should be the default behaviour
-        retval = []
-        for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)):
-            # Creating one sample per comparison
-            subprobe_scores = []
-            for ref in [
-                r for r in biometric_references if r.key in sampleset.references
-            ]:
-                subprobe_scores.append(
-                    Sample(self.score(ref.data, s, extractor), parent=ref)
-                )
-
-            # Creating one sampleset per probe
-            subprobe = SampleSet(subprobe_scores, parent=sampleset)
-            subprobe.subprobe_id = subprobe_id
-            retval.append(subprobe)
-
-        return retval
-
-    def score(self, biometric_reference, data, extractor=None, **kwargs):
-        """It handles the score computation for one sample
-
-        Parameters
-        ----------
-
-            biometric_reference : list
-                Biometric reference to be compared
-
-            data : list
-                Data to be compared
-
-        Returns
-        -------
-
-            scores : list
-                For each sample in a probe, returns as many scores as there are
-                samples in the probe, together with the probe's and the
-                relevant reference's subject identifiers.
-
-        """
-        raise NotImplemented("Please, implement me")
-
-
-from bob.pipelines.mixins import CheckpointMixin
-
-
-class BiometricAlgorithmCheckpointMixin(CheckpointMixin):
-    """Mixing used to checkpoint Enrolled and Scoring samples.
-
-    Examples
-    --------
-
-    >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BiometricAlgorithmCheckpointMixin, Distance
-    >>> class DistanceCheckpoint(BiometricAlgorithmCheckpointMixin, Distance) pass:
-    >>> biometric_algorithm = DistanceCheckpoint(features_dir="./")
-    >>> biometric_algorithm.enroll(sample)
-
-    It's possible to use it as with the :py:func:`bob.pipelines.mixins.mix_me_up` 
-
-    >>> from bob.pipelines.mixins import mix_me_up
-    >>> biometric_algorithm = mix_me_up([BiometricAlgorithmCheckpointMixin], Distance)(features_dir="./")
-    >>> biometric_algorithm.enroll(sample)
-
-    """
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.biometric_reference_dir = os.path.join(
-            self.features_dir, "biometric_references"
-        )
-        self.score_dir = os.path.join(self.features_dir, "scores")
-
-    def save(self, sample, path):
-        return bob.io.base.save(sample.data, path, create_directories=True)
-
-    def _enroll_sample_set(self, sampleset):
-        """
-        Enroll a sample set with checkpointing
-        """
-
-        # Amending `models` directory
-        path = os.path.join(
-            self.biometric_reference_dir, str(sampleset.key) + self.extension
-        )
-        if path is None or not os.path.isfile(path):
-
-            # Enrolling the sample
-            enrolled_sample = super()._enroll_sample_set(sampleset)
-
-            # saving the new sample
-            self.save(enrolled_sample, path)
-
-            # Dealaying it.
-            # This seems inefficient, but it's crucial for large datasets
-            delayed_enrolled_sample = DelayedSample(
-                functools.partial(bob.io.base.load, path), enrolled_sample
-            )
-
-        else:
-            # If sample already there, just load
-            delayed_enrolled_sample = self.load(path)
-            delayed_enrolled_sample.key = sampleset.key
-
-        return delayed_enrolled_sample
-
-    def _score_sample_set(self, sampleset, biometric_references, extractor):
-        """Given a sampleset for probing, compute the scores and retures a sample set with the scores
-        """
-        # Computing score
-        scored_sample_set = super()._score_sample_set(
-            sampleset, biometric_references, extractor
-        )
-
-        for s in scored_sample_set:
-            # Checkpointing score
-            path = os.path.join(self.score_dir, str(s.path) + ".txt")
-            bob.io.base.create_directories_safe(os.path.dirname(path))
-
-            delayed_scored_sample = save_scores_four_columns(path, s)
-            s.samples = [delayed_scored_sample]
-
-        return scored_sample_set
-
-
-import scipy.spatial.distance
-from sklearn.utils.validation import check_array
-
-
-class Distance(BiometricAlgorithm):
-    def __init__(self, distance_function=scipy.spatial.distance.euclidean, factor=-1):
-
-        self.distance_function = distance_function
-        self.factor = factor
-
-    def enroll(self, enroll_features, **kwargs):
-        """enroll(enroll_features) -> model
-
-        Enrolls the model by storing all given input vectors.
-
-        Parameters:
-        -----------
-
-        ``enroll_features`` : [:py:class:`numpy.ndarray`]
-          The list of projected features to enroll the model from.
-
-        Returns:
-        --------
-
-        ``model`` : 2D :py:class:`numpy.ndarray`
-	      The enrolled model.
-        """
-
-        enroll_features = check_array(enroll_features, allow_nd=True)
-
-        return numpy.mean(enroll_features, axis=0)
-
-    def score(self, model, probe, extractor=None, **kwargs):
-        """score(model, probe) -> float
-
-        Computes the distance of the model to the probe using the distance function specified in the constructor.
-
-        Parameters:
-	    -----------
-
-        ``model`` : 2D :py:class:`numpy.ndarray`
-          The model storing all enrollment features
-
-        ``probe`` : :py:class:`numpy.ndarray`
-          The probe feature vector
-
-        Returns:
-        --------
-
-        ``score`` : float
-          A similarity value between ``model`` and ``probe``
-        """
-
-        probe = probe.flatten()
-        # return the negative distance (as a similarity measure)
-        return self.factor * self.distance_function(model, probe)
-
-
-def save_scores_four_columns(path, probe):
-    """
-    Write scores in the four columns format
-    """
-
-    with open(path, "w") as f:
-        for biometric_reference in probe.samples:
-            line = "{0} {1} {2} {3}\n".format(
-                biometric_reference.key, probe.key, probe.path, biometric_reference.data
-            )
-            f.write(line)
-
-    return DelayedSample(functools.partial(open, path))
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/implemented.py b/bob/bio/base/pipelines/vanilla_biometrics/implemented.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9bcd6c9a94c9fce0ba6f59cb68e1c71b396c155
--- /dev/null
+++ b/bob/bio/base/pipelines/vanilla_biometrics/implemented.py
@@ -0,0 +1,63 @@
+import scipy.spatial.distance
+from sklearn.utils.validation import check_array
+import numpy
+from .abstract_classes import BioAlgorithm
+from .mixins import BioAlgCheckpointMixin
+
+
+class Distance(BioAlgorithm):
+    def __init__(self, distance_function=scipy.spatial.distance.euclidean, factor=-1):
+
+        self.distance_function = distance_function
+        self.factor = factor
+
+    def enroll(self, enroll_features):
+        """enroll(enroll_features) -> model
+
+        Enrolls the model by storing all given input vectors.
+
+        Parameters:
+        -----------
+
+        ``enroll_features`` : [:py:class:`numpy.ndarray`]
+          The list of projected features to enroll the model from.
+
+        Returns:
+        --------
+
+        ``model`` : 2D :py:class:`numpy.ndarray`
+          The enrolled model.
+        """
+
+        enroll_features = check_array(enroll_features, allow_nd=True)
+
+        return numpy.mean(enroll_features, axis=0)
+
+    def score(self, model, probe):
+        """score(model, probe) -> float
+
+        Computes the distance of the model to the probe using the distance function specified in the constructor.
+
+        Parameters:
+        -----------
+
+        ``model`` : 2D :py:class:`numpy.ndarray`
+          The model storing all enrollment features
+
+        ``probe`` : :py:class:`numpy.ndarray`
+          The probe feature vector
+
+        Returns:
+        --------
+
+        ``score`` : float
+          A similarity value between ``model`` and ``probe``
+        """
+
+        probe = probe.flatten()
+        # return the negative distance (as a similarity measure)
+        return self.factor * self.distance_function(model, probe)
+
+
+class CheckpointDistance(BioAlgCheckpointMixin, Distance):
+    pass
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py
index e2a700fba760d4f77cf1c18d23dba83ea48345ff..6e629ad0da99bdf4829da73d747a55bb4c651b86 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py
@@ -4,23 +4,33 @@
 """Re-usable blocks for legacy bob.bio.base algorithms"""
 
 import os
-import copy
 import functools
+from collections import defaultdict
 
-import bob.io.base
+from .... import utils
+from .abstract_classes import BioAlgorithm, Database, save_scores_four_columns
+from bob.io.base import HDF5File
+from bob.pipelines.mixins import SampleMixin, CheckpointMixin
 from bob.pipelines.sample import DelayedSample, SampleSet, Sample
-import numpy
+from bob.pipelines.utils import is_picklable
+from sklearn.base import TransformerMixin
 import logging
-import dask
-import sys
-import pickle
-from bob.bio.base.mixins.legacy import get_reader
-from .biometric_algorithm import save_scores_four_columns
 
 logger = logging.getLogger("bob.bio.base")
 
 
-class DatabaseConnector:
+def _biofile_to_delayed_sample(biofile, database):
+    return DelayedSample(
+        load=functools.partial(
+            biofile.load, database.original_directory, database.original_extension,
+        ),
+        key=biofile.path,
+        path=biofile.path,
+        annotations=database.annotations(biofile),
+    )
+
+
+class LegacyDatabaseConnector(Database):
     """Wraps a bob.bio.base database and generates conforming samples
 
     This connector allows wrapping generic bob.bio.base datasets and generate
@@ -40,12 +50,9 @@ class DatabaseConnector:
 
     """
 
-    def __init__(self, database, protocol):
+    def __init__(self, database, **kwargs):
+        super().__init__(**kwargs)
         self.database = database
-        self.protocol = protocol
-        self.directory = database.original_directory
-        self.extension = database.original_extension
-
 
     def background_model_samples(self):
         """Returns :py:class:`Sample`'s to train a background model (group
@@ -61,28 +68,9 @@ class DatabaseConnector:
 
         """
 
-        # TODO: This should be organized by client
-        retval = []
+        objects = self.database.training_files()
 
-        objects = self.database.objects(protocol=self.protocol, groups="world")
-
-        return [
-            SampleSet(
-                [
-                    DelayedSample(
-                        load=functools.partial(
-                            k.load,
-                            self.database.original_directory,
-                            self.database.original_extension,
-                        ),
-                        key=k.path,
-                        path=k.path,
-                    )
-                ],
-                key=str(k.client_id),
-            )
-            for k in objects
-        ]
+        return [_biofile_to_delayed_sample(k, self.database) for k in objects]
 
     def references(self, group="dev"):
         """Returns :py:class:`Reference`'s to enroll biometric references
@@ -106,26 +94,13 @@ class DatabaseConnector:
         """
 
         retval = []
-        for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group):
+        for m in self.database.model_ids(groups=group):
 
-            objects = self.database.objects(
-                protocol=self.protocol, groups=group, model_ids=(m,), purposes="enroll"
-            )
+            objects = self.database.enroll_files(groups=group, model_id=m)
 
             retval.append(
                 SampleSet(
-                    [
-                        DelayedSample(
-                            load=functools.partial(
-                                k.load,
-                                self.database.original_directory,
-                                self.database.original_extension,
-                            ),
-                            key=k.path,
-                            path=k.path,
-                        )
-                        for k in objects
-                    ],
+                    [_biofile_to_delayed_sample(k, self.database) for k in objects],
                     key=str(m),
                     path=str(m),
                     subject=str(objects[0].client_id),
@@ -157,29 +132,17 @@ class DatabaseConnector:
 
         probes = dict()
 
-        for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group):
+        for m in self.database.model_ids(groups=group):
 
             # Getting all the probe objects from a particular biometric
             # reference
-            objects = self.database.objects(
-                protocol=self.protocol, groups=group, model_ids=(m,), purposes="probe"
-            )
+            objects = self.database.probe_files(group=group, model_id=m)
 
             # Creating probe samples
             for o in objects:
                 if o.id not in probes:
                     probes[o.id] = SampleSet(
-                        [
-                            DelayedSample(
-                                load=functools.partial(
-                                    o.load,
-                                    self.database.original_directory,
-                                    self.database.original_extension,
-                                ),
-                                key=o.path,
-                                path=o.path,
-                            )
-                        ],
+                        [_biofile_to_delayed_sample(o, self.database)],
                         key=str(o.client_id),
                         path=o.path,
                         subject=str(o.client_id),
@@ -191,214 +154,201 @@ class DatabaseConnector:
         return list(probes.values())
 
 
+class _NonPickableWrapper:
+    def __init__(self, callable, **kwargs):
+        super().__init__(**kwargs)
+        self.callable = callable
+        self._instance = None
 
-def _load_data_and_annotations(bio_file, annotations, original_directory, original_extension):
-    """
-    Return a tuple (data, annotations) given a :py:class:`bob.bio.base.database.BioFile` as input
+    @property
+    def instance(self):
+        if self._instance is None:
+            self._instance = self.callable()
+        return self._instance
 
-    Parameters
-    ----------
+    def __setstate__(self, d):
+        # Handling unpicklable objects
+        self._instance = None
+        return super().__setstate__(d)
 
-     bio_file: :py:class:`bob.bio.base.database.BioFile`
-        Input bio file
+    def __getstate__(self):
+        # Handling unpicklable objects
+        self._instance = None
+        return super().__getstate__()
 
-    Returns
-    -------
-        (data, annotations): A dictionary containing the raw data + annotations
 
-    """
+class _Preprocessor(_NonPickableWrapper, TransformerMixin):
+    def transform(self, X, annotations):
+        return [self.instance(data, annot) for data, annot in zip(X, annotations)]
 
-    data = bio_file.load(original_directory, original_extension)
+    def _more_tags(self):
+        return {"stateless": True}
 
-    # I know it sounds stupid to return the the annotations here without any transformation
-    # but I can't do `database.annotations(bio_file)`, SQLAlcheamy session is not picklable
-    return {"data": data, "annotations": annotations}
 
+def _get_pickable_method(method):
+    if not is_picklable(method):
+        logger.warning(
+            f"The method {method} is not picklable. Returning its unbounded method"
+        )
+        method = functools.partial(method.__func__, None)
+    return method
+
+
+class LegacyPreprocessor(CheckpointMixin, SampleMixin, _Preprocessor):
+    def __init__(self, callable, **kwargs):
+        instance = callable()
+        super().__init__(
+            callable=callable,
+            transform_extra_arguments=(("annotations", "annotations"),),
+            load_func=_get_pickable_method(instance.read_data),
+            save_func=_get_pickable_method(instance.write_data),
+            **kwargs,
+        )
 
-class DatabaseConnectorAnnotated(DatabaseConnector):
-    """Wraps a bob.bio.base database and generates conforming samples for datasets
-    that has annotations
 
-    This connector allows wrapping generic bob.bio.base datasets and generate
-    samples that conform to the specifications of biometric pipelines defined
-    in this package.
+def _split_X_by_y(X, y):
+    training_data = defaultdict(list)
+    for x1, y1 in zip(X, y):
+        training_data[y1].append(x1)
+    training_data = training_data.values()
+    return training_data
 
 
-    Parameters
-    ----------
+class _Extractor(_NonPickableWrapper, TransformerMixin):
+    def transform(self, X, metadata=None):
+        if self.requires_metadata:
+            return [self.instance(data, metadata=m) for data, m in zip(X, metadata)]
+        else:
+            return [self.instance(data) for data in X]
 
-    database : object
-        An instantiated version of a bob.bio.base.Database object
+    def fit(self, X, y=None):
+        if not self.instance.requires_training:
+            return self
 
-    protocol : str
-        The name of the protocol to generate samples from.
-        To be plugged at :py:method:`bob.db.base.Database.objects`.
+        training_data = X
+        if self.instance.split_training_data_by_client:
+            training_data = _split_X_by_y(X, y)
 
-    """
+        self.instance.train(self, training_data, self.model_path)
+        return self
 
-    def __init__(self, database, protocol):
-        super(DatabaseConnectorAnnotated, self).__init__(database, protocol)
+    def _more_tags(self):
+        return {"requires_fit": self.instance.requires_training}
 
 
-    def background_model_samples(self):
-        """Returns :py:class:`Sample`'s to train a background model (group
-        ``world``).
+class LegacyExtractor(CheckpointMixin, SampleMixin, _Extractor):
+    def __init__(self, callable, model_path, **kwargs):
+        instance = callable()
 
+        transform_extra_arguments = None
+        self.requires_metadata = False
+        if utils.is_argument_available("metadata", instance.__call__):
+            transform_extra_arguments = (("metadata", "metadata"),)
+            self.requires_metadata = True
 
-        Returns
-        -------
+        fit_extra_arguments = None
+        if instance.requires_training and instance.split_training_data_by_client:
+            fit_extra_arguments = (("y", "subject"),)
 
-            samples : list
-                List of samples conforming the pipeline API for background
-                model training.  See, e.g., :py:func:`.pipelines.first`.
+        super().__init__(
+            callable=callable,
+            transform_extra_arguments=transform_extra_arguments,
+            fit_extra_arguments=fit_extra_arguments,
+            load_func=_get_pickable_method(instance.read_feature),
+            save_func=_get_pickable_method(instance.write_feature),
+            model_path=model_path,
+            **kwargs,
+        )
 
-        """
+    def load_model(self):
+        self.instance.load(self.model_path)
+        return self
 
-        # TODO: This should be organized by client
-        retval = []
+    def save_model(self):
+        # we have already saved the model in .fit()
+        return self
 
-        objects = self.database.objects(protocol=self.protocol, groups="world")
-        return [
-            SampleSet(
-                [
-                    DelayedSample(
-                        load=functools.partial(
-                            _load_data_and_annotations, k, self.database.annotations(k), self.database.original_directory, self.database.original_extension
-                        ),
-                        key=k.path,
-                        path=k.path,
-                        annotations=self.database.annotations(k),
-                    )
-                ],
-                key=str(k.client_id),
-            )
-            for k in objects
-        ]
 
-    def references(self, group="dev"):
-        """Returns :py:class:`Reference`'s to enroll biometric references
+class _AlgorithmTransformer(_NonPickableWrapper, TransformerMixin):
+    def transform(self, X):
+        return [self.instance.project(feature) for feature in X]
 
+    def fit(self, X, y=None):
+        if not self.instance.requires_projector_training:
+            return self
 
-        Parameters
-        ----------
+        training_data = X
+        if self.instance.split_training_features_by_client:
+            training_data = _split_X_by_y(X, y)
 
-            group : :py:class:`str`, optional
-                A ``group`` to be plugged at
-                :py:meth:`bob.db.base.Database.objects`
+        self.instance.train_projector(self, training_data, self.model_path)
+        return self
 
+    def _more_tags(self):
+        return {"requires_fit": self.instance.requires_projector_training}
 
-        Returns
-        -------
-
-            references : list
-                List of samples conforming the pipeline API for the creation of
-                biometric references.  See, e.g., :py:func:`.pipelines.first`.
 
-        """
+class LegacyAlgorithmAsTransformer(CheckpointMixin, SampleMixin, _AlgorithmTransformer):
+    """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm`
 
-        retval = []
+    :py:method:`LegacyAlgorithmrMixin.fit` maps to :py:method:`bob.bio.base.algorithm.Algoritm.train_projector`
 
-        for m in self.database.model_ids_with_protocol(
-            protocol=self.protocol, groups=group
-        ):
+    :py:method:`LegacyAlgorithmrMixin.transform` maps :py:method:`bob.bio.base.algorithm.Algoritm.project`
 
-            objects = self.database.objects(
-                protocol=self.protocol, groups=group, model_ids=(m,), purposes="enroll"
-            )
-
-            retval.append(
-                SampleSet(
-                    [
-                        DelayedSample(
-                            load=functools.partial(
-                                _load_data_and_annotations, k, self.database.annotations(k), self.database.original_directory, self.database.original_extension
-                            ),
-                            key=k.path,
-                            path=k.path,
-                            subject=str(objects[0].client_id),
-                            annotations=self.database.annotations(k),
-                        )
-                        for k in objects
-                    ],
-                    key=str(m),
-                    path=str(m),
-                    subject=objects[0].client_id,
-                )
-            )
-
-        return retval
-
-    def probes(self, group):
-        """Returns :py:class:`Probe`'s to score biometric references
-
-
-        Parameters
-        ----------
+    Example
+    -------
 
-            group : str
-                A ``group`` to be plugged at
-                :py:meth:`bob.db.base.Database.objects`
+        Wrapping LDA algorithm with functtools
+        >>> from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyAlgorithmAsTransformer
+        >>> from bob.bio.base.algorithm import LDA
+        >>> import functools
+        >>> transformer = LegacyAlgorithmAsTransformer(functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90))
 
 
-        Returns
-        -------
 
-            probes : list
-                List of samples conforming the pipeline API for the creation of
-                biometric probes.  See, e.g., :py:func:`.pipelines.first`.
+    Parameters
+    ----------
+      callable: callable
+         Calleble function that instantiates the bob.bio.base.algorithm.Algorithm
 
-        """
+    """
 
-        probes = dict()
+    def __init__(self, callable, model_path, **kwargs):
+        instance = callable()
 
-        for m in self.database.model_ids_with_protocol(
-            protocol=self.protocol, groups=group
+        fit_extra_arguments = None
+        if (
+            instance.requires_projector_training
+            and instance.split_training_features_by_client
         ):
+            fit_extra_arguments = (("y", "subject"),)
+
+        super().__init__(
+            callable=callable,
+            fit_extra_arguments=fit_extra_arguments,
+            load_func=_get_pickable_method(instance.read_feature),
+            save_func=_get_pickable_method(instance.write_feature),
+            model_path=model_path,
+            **kwargs,
+        )
 
-            # Getting all the probe objects from a particular biometric
-            # reference
-            objects = self.database.objects(
-                protocol=self.protocol, groups=group, model_ids=(m,), purposes="probe"
-            )
-
-            # Creating probe samples
-            for o in objects:
-                if o.id not in probes:
-                    probes[o.id] = SampleSet(
-                        [
-                            DelayedSample(
-                                load=functools.partial(
-                                    _load_data_and_annotations, o, self.database.annotations(o), self.database.original_directory, self.database.original_extension
-                                ),
-                                key=o.path,
-                                path=o.path,
-                                annotations=self.database.annotations(o),
-                            )
-                        ],
-                        key=str(o.client_id),
-                        path=o.path,
-                        subject=o.client_id,
-                        references=[str(m)],
-                    )
-                else:
-                    probes[o.id].references.append(str(m))
-
-        return list(probes.values())
-
+    def load_model(self):
+        self.instance.load_projector(self.model_path)
+        return self
 
-from .biometric_algorithm import BiometricAlgorithm
+    def save_model(self):
+        # we have already saved the model in .fit()
+        return self
 
 
-class LegacyBiometricAlgorithm(BiometricAlgorithm):
+class LegacyAlgorithmAsBioAlg(BioAlgorithm, _NonPickableWrapper):
     """Biometric Algorithm that handles legacy :py:class:`bob.bio.base.algorithm.Algorithm`
 
 
-    :py:method:`BiometricAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll`
-
-    :py:method:`BiometricAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score`
+    :py:method:`BioAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll`
 
+    :py:method:`BioAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score`
 
-    THIS CODE HAS TO BE CHECKPOINTABLE IN A SPECIAL WAY
 
     Example
     -------
@@ -407,33 +357,27 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm):
     Parameters
     ----------
       callable: callable
-         Calleble function that instantiates the scikit estimator
+         Calleble function that instantiates the bob.bio.base.algorithm.Algorithm
 
     """
 
-    def __init__(self, callable=None, features_dir=None, **kwargs):
-        super().__init__(**kwargs)
-        self.callable = callable
-        self.instance = None
-        self.projector_file = None
+    def __init__(self, callable, features_dir, extension=".hdf5", **kwargs):
+        super().__init__(callable, **kwargs)
         self.features_dir = features_dir
         self.biometric_reference_dir = os.path.join(
             self.features_dir, "biometric_references"
         )
         self.score_dir = os.path.join(self.features_dir, "scores")
-        self.extension = ".hdf5"
+        self.extension = extension
 
     def _enroll_sample_set(self, sampleset):
         # Enroll
         return self.enroll(sampleset)
 
-    def _score_sample_set(self, sampleset, biometric_references, extractor):
+    def _score_sample_set(self, sampleset, biometric_references):
         """Given a sampleset for probing, compute the scores and retures a sample set with the scores
         """
 
-        # Stacking the samples from a sampleset
-        data = [s for s in sampleset.samples]
-
         # Compute scores for each sample inside of the sample set
         # TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
         # We should add an agregator function here so we can properlly agregate samples from
@@ -447,10 +391,7 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm):
             for ref in [
                 r for r in biometric_references if r.key in sampleset.references
             ]:
-                # subprobe_scores.append(self.score(ref.data, s, extractor))
-                subprobe_scores.append(
-                    Sample(self.score(ref.data, s.data, extractor), parent=ref)
-                )
+                subprobe_scores.append(Sample(self.score(ref.data, s.data), parent=ref))
 
             # Creating one sampleset per probe
             subprobe = SampleSet(subprobe_scores, parent=sampleset)
@@ -458,7 +399,7 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm):
 
             # Checkpointing score MANDATORY FOR LEGACY
             path = os.path.join(self.score_dir, str(subprobe.path) + ".txt")
-            bob.io.base.create_directories_safe(os.path.dirname(path))
+            os.makedirs(os.path.dirname(path), exist_ok=True)
 
             delayed_scored_sample = save_scores_four_columns(path, subprobe)
             subprobe.samples = [delayed_scored_sample]
@@ -474,10 +415,6 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm):
                 f"`enroll_features` should be the type SampleSet, not {enroll_features}"
             )
 
-        # Instantiates and do the "real" fit
-        if self.instance is None:
-            self.instance = self.callable()
-
         path = os.path.join(
             self.biometric_reference_dir, str(enroll_features.key) + self.extension
         )
@@ -488,17 +425,12 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm):
             model = self.instance.enroll(data)
 
             # Checkpointing
-            bob.io.base.create_directories_safe(os.path.dirname(path))
-            hdf5 = bob.io.base.HDF5File(path, "w")
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            hdf5 = HDF5File(path, "w")
             self.instance.write_model(model, hdf5)
 
-        reader = get_reader(self.instance.read_model, path)
-        return DelayedSample(reader, parent=enroll_features)
-
-    def score(self, model, probe, extractor=None, **kwargs):
-
-        # Instantiates and do the "real" fit
-        if self.instance is None:
-            self.instance = self.callable()
+        reader = _get_pickable_method(self.instance.read_model)
+        return DelayedSample(functools.partial(reader, path), parent=enroll_features)
 
+    def score(self, model, probe, **kwargs):
         return self.instance.score(model, probe)
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/mixins.py b/bob/bio/base/pipelines/vanilla_biometrics/mixins.py
new file mode 100644
index 0000000000000000000000000000000000000000..5655e0df19f6cef969a8def0c2e14233581c57d1
--- /dev/null
+++ b/bob/bio/base/pipelines/vanilla_biometrics/mixins.py
@@ -0,0 +1,105 @@
+from bob.pipelines.mixins import CheckpointMixin
+from bob.pipelines.sample import DelayedSample
+import bob.io.base
+import os
+import functools
+import dask
+from .abstract_classes import save_scores_four_columns
+
+
+class BioAlgCheckpointMixin(CheckpointMixin):
+    """Mixing used to checkpoint Enrolled and Scoring samples.
+
+    Examples
+    --------
+
+    >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BioAlgCheckpointMixin, Distance
+    >>> class DistanceCheckpoint(BioAlgCheckpointMixin, Distance) pass:
+    >>> biometric_algorithm = DistanceCheckpoint(features_dir="./")
+    >>> biometric_algorithm.enroll(sample)
+
+    It's possible to use it as with the :py:func:`bob.pipelines.mixins.mix_me_up`
+
+    >>> from bob.pipelines.mixins import mix_me_up
+    >>> biometric_algorithm = mix_me_up([BioAlgCheckpointMixin], Distance)(features_dir="./")
+    >>> biometric_algorithm.enroll(sample)
+
+    """
+
+    def __init__(self, features_dir, **kwargs):
+        super().__init__(features_dir=features_dir, **kwargs)
+        self.biometric_reference_dir = os.path.join(
+            features_dir, "biometric_references"
+        )
+        self.score_dir = os.path.join(features_dir, "scores")
+
+    def save(self, sample, path):
+        return bob.io.base.save(sample.data, path, create_directories=True)
+
+    def _enroll_sample_set(self, sampleset):
+        """
+        Enroll a sample set with checkpointing
+        """
+
+        # Amending `models` directory
+        path = os.path.join(
+            self.biometric_reference_dir, str(sampleset.key) + self.extension
+        )
+        if path is None or not os.path.isfile(path):
+
+            # Enrolling the sample
+            enrolled_sample = super()._enroll_sample_set(sampleset)
+
+            # saving the new sample
+            self.save(enrolled_sample, path)
+
+            # Dealaying it.
+            # This seems inefficient, but it's crucial for large datasets
+            delayed_enrolled_sample = DelayedSample(
+                functools.partial(bob.io.base.load, path), enrolled_sample
+            )
+
+        else:
+            # If sample already there, just load
+            delayed_enrolled_sample = self.load(path)
+            delayed_enrolled_sample.key = sampleset.key
+
+        return delayed_enrolled_sample
+
+    def _score_sample_set(self, sampleset, biometric_references):
+        """Given a sampleset for probing, compute the scores and retures a sample set with the scores
+        """
+        # Computing score
+        scored_sample_set = super()._score_sample_set(sampleset, biometric_references)
+
+        for s in scored_sample_set:
+            # Checkpointing score
+            path = os.path.join(self.score_dir, str(s.path) + ".txt")
+            bob.io.base.create_directories_safe(os.path.dirname(path))
+
+            delayed_scored_sample = save_scores_four_columns(path, s)
+            s.samples = [delayed_scored_sample]
+
+        return scored_sample_set
+
+
+class BioAlgDaskMixin:
+    def enroll_samples(self, biometric_reference_features):
+        biometric_references = biometric_reference_features.map_partitions(
+            self.enroll_samples
+        )
+        return biometric_references
+
+    def score_samples(self, probe_features, biometric_references):
+
+        # TODO: Here, we are sending all computed biometric references to all
+        # probes.  It would be more efficient if only the models related to each
+        # probe are sent to the probing split.  An option would be to use caching
+        # and allow the ``score`` function above to load the required data from
+        # the disk, directly.  A second option would be to generate named delays
+        # for each model and then associate them here.
+
+        all_references = dask.delayed(list)(biometric_references)
+
+        scores = probe_features.map_partitions(self.score_samples, all_references)
+        return scores
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
index 9b32ae55f66ca498b1d0cbf88692b842fc9e6406..9ac00cbb0a63ae2762d936439a438726987fa30b 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
@@ -8,11 +8,8 @@ This file contains simple processing blocks meant to be used
 for bob.bio experiments
 """
 
-import dask.bag
-import dask.delayed
-from bob.pipelines.sample import samplesets_to_samples
-
 import logging
+
 logger = logging.getLogger(__name__)
 
 
@@ -20,94 +17,63 @@ def biometric_pipeline(
     background_model_samples,
     biometric_reference_samples,
     probe_samples,
-    extractor,
+    transformer,
     biometric_algorithm,
 ):
-    logger.info(f" >> Vanilla Biometrics: Training background model with pipeline {extractor}")
+    logger.info(
+        f" >> Vanilla Biometrics: Training background model with pipeline {transformer}"
+    )
 
-    ## Training background model (fit will return even if samples is ``None``,
-    ## in which case we suppose the algorithm is not trainable in any way)
-    extractor = train_background_model(background_model_samples, extractor)
+    # Training background model (fit will return even if samples is ``None``,
+    # in which case we suppose the algorithm is not trainable in any way)
+    transformer = train_background_model(background_model_samples, transformer)
 
-    logger.info(f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}")
+    logger.info(
+        f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}"
+    )
 
-    ## Create biometric samples
+    # Create biometric samples
     biometric_references = create_biometric_reference(
-        biometric_reference_samples, extractor, biometric_algorithm
+        biometric_reference_samples, transformer, biometric_algorithm
     )
 
-    logger.info(f" >> Computing scores with the biometric algorithm {biometric_algorithm}")
+    logger.info(
+        f" >> Computing scores with the biometric algorithm {biometric_algorithm}"
+    )
 
-    ## Scores all probes
+    # Scores all probes
     return compute_scores(
-        probe_samples, biometric_references, extractor, biometric_algorithm
+        probe_samples, biometric_references, transformer, biometric_algorithm
     )
 
 
-def train_background_model(background_model_samples, extractor):
-
-    X, y = samplesets_to_samples(background_model_samples)
-
-    extractor = extractor.fit(X, y=y)
-
-    return extractor
+def train_background_model(background_model_samples, transformer):
+    # background_model_samples is a list of Samples
+    transformer = transformer.fit(background_model_samples)
+    return transformer
 
 
 def create_biometric_reference(
-    biometric_reference_samples, extractor, biometric_algorithm
+    biometric_reference_samples, transformer, biometric_algorithm
 ):
-    biometric_reference_features = extractor.transform(biometric_reference_samples)
-
-    # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE
-    # I COULD WORK OUT A MIXIN FOR IT, BUT THE USER WOULD NEED TO SET THAT SOMETWHERE
-    # HERE'S ALREADY SETTING ONCE (for the pipeline) AND I DON'T WANT TO MAKE
-    # THEM SET IN ANOTHER PLACE
-    # LET'S DISCUSS THIS ON SLACK
-
-    if isinstance(biometric_reference_features, dask.bag.core.Bag):
-        # ASSUMING THAT IS A DASK THING IS COMMING
-        biometric_references = biometric_reference_features.map_partitions(
-            biometric_algorithm._enroll_samples
-        )
-    else:
-        biometric_references = biometric_algorithm._enroll_samples(
-            biometric_reference_features
-        )
+    biometric_reference_features = transformer.transform(biometric_reference_samples)
+
+    biometric_references = biometric_algorithm.enroll_samples(
+        biometric_reference_features
+    )
 
     # models is a list of Samples
     return biometric_references
 
 
-def compute_scores(probe_samples, biometric_references, extractor, biometric_algorithm):
+def compute_scores(
+    probe_samples, biometric_references, transformer, biometric_algorithm
+):
 
     # probes is a list of SampleSets
-    probe_features = extractor.transform(probe_samples)
-
-    # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE
-    # I COULD WORK OUT A MIXIN FOR IT, BUT THE USER WOULD NEED TO SET THAT SOMETWHERE
-    # HERE'S ALREADY SETTING ONCE (for the pipeline) AND I DON'T WANT TO MAKE
-    # THEM SET IN ANOTHER PLACE
-    # LET'S DISCUSS THIS ON SLACK
-    if isinstance(probe_features, dask.bag.core.Bag):
-        # ASSUMING THAT IS A DASK THING IS COMMING
-
-        ## TODO: Here, we are sending all computed biometric references to all
-        ## probes.  It would be more efficient if only the models related to each
-        ## probe are sent to the probing split.  An option would be to use caching
-        ## and allow the ``score`` function above to load the required data from
-        ## the disk, directly.  A second option would be to generate named delays
-        ## for each model and then associate them here.
-
-        all_references = dask.delayed(list)(biometric_references)
-
-        scores = probe_features.map_partitions(
-            biometric_algorithm._score_samples, all_references, extractor
-        )
-
-    else:
-        scores = biometric_algorithm._score_samples(
-            probe_features, biometric_references, extractor
-        )
+    probe_features = transformer.transform(probe_samples)
+
+    scores = biometric_algorithm.score_samples(probe_features, biometric_references)
 
     # scores is a list of Samples
     return scores
diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py
index b58204d2bf9fcbe4a51386fb993ee7352dbb6dd1..c0cba089f0027d35280a2466c309bd7a670ca679 100644
--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -5,26 +5,26 @@
 
 """Executes biometric pipeline"""
 
-import os
-import functools
-
 import click
 
-from bob.extension.scripts.click_helper import verbosity_option, ResourceOption, ConfigCommand
-from bob.pipelines.sample import DelayedSample, Sample
+from bob.extension.scripts.click_helper import (
+    verbosity_option,
+    ResourceOption,
+    ConfigCommand,
+)
 
 import logging
-logger = logging.getLogger(__name__)
 
+logger = logging.getLogger(__name__)
 
 
 EPILOG = """\b
 
- 
+
  Command line examples\n
  -----------------------
 
- 
+
  $ bob pipelines vanilla-biometrics my_experiment.py -vv
 
 
@@ -34,7 +34,7 @@ EPILOG = """\b
  >>> extractor = my_extractor() \n
  >>> algorithm = my_algorithm() \n
  >>> checkpoints = EXPLAIN CHECKPOINTING \n
- 
+
 \b
 
 
@@ -54,15 +54,14 @@ TODO: Work out this help
 
 
 @click.command(
-    entry_point_group='bob.pipelines.config', cls=ConfigCommand,
-    epilog=EPILOG,
+    entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG,
 )
 @click.option(
-    "--extractor",
+    "--transformer",
     "-e",
     required=True,
     cls=ResourceOption,
-    entry_point_group="bob.bio.extractor",  # This should be linked to bob.bio.base
+    entry_point_group="bob.pipelines.transformer",
     help="Feature extraction algorithm",
 )
 @click.option(
@@ -92,6 +91,7 @@ TODO: Work out this help
 @click.option(
     "--group",
     "-g",
+    "groups",
     type=click.Choice(["dev", "eval"]),
     multiple=True,
     default=("dev",),
@@ -106,13 +106,7 @@ TODO: Work out this help
 )
 @verbosity_option(cls=ResourceOption)
 def vanilla_biometrics(
-    extractor,
-    algorithm,
-    database,
-    dask_client,
-    group,
-    output,
-    **kwargs
+    transformer, algorithm, database, dask_client, groups, output, **kwargs
 ):
     """Runs the simplest biometrics pipeline.
 
@@ -121,7 +115,7 @@ def vanilla_biometrics(
 
     Sub-pipeline 1:\n
     ---------------
-    
+
     Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps:
 
     \b
@@ -133,13 +127,13 @@ def vanilla_biometrics(
 
     Sub-pipeline 2:\n
     ---------------
- 
+
     Creation of biometric references: This is a standard step in a biometric pipelines.
     Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following:
 
     \b
     raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
-    
+
     Note that this sub-pipeline depends on the previous one
 
 
@@ -150,99 +144,59 @@ def vanilla_biometrics(
 
     Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
 
- 
+
     raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score
 
     Note that this sub-pipeline depends on the two previous ones
 
 
     """
-    
-    # Always turn-on the checkpointing
-    checkpointing = True
 
-    # Chooses the pipeline to run
     from bob.bio.base.pipelines.vanilla_biometrics.pipeline import biometric_pipeline
+    import dask.bag
+    import itertools
+    import os
+    from bob.pipelines.sample import Sample
 
     if not os.path.exists(output):
-        os.makedirs(output)
- 
-    for g in group:
+        os.makedirs(output, exist_ok=True)
+
+    for group in groups:
+
+        with open(os.path.join(output, f"scores-{group}"), "w") as f:
+            biometric_references = database.references(group=group)
 
-        with open(os.path.join(output,f"scores-{g}"), "w") as f:
-            biometric_references = database.references(group=g)
-            
-            logger.info(f"Running vanilla biometrics for group {g}")
+            logger.info(f"Running vanilla biometrics for group {group}")
 
             result = biometric_pipeline(
                 database.background_model_samples(),
                 biometric_references,
-                database.probes(group=g),
-                extractor,
+                database.probes(group=group),
+                transformer,
                 algorithm,
-                
             )
-            
-            import dask.bag
+
             if isinstance(result, dask.bag.core.Bag):
                 if dask_client is not None:
                     result = result.compute(scheduler=dask_client)
                 else:
-                    logger.warning("`dask_client` not set. Your pipeline will run locally")
+                    logger.warning(
+                        "`dask_client` not set. Your pipeline will run locally"
+                    )
                     result = result.compute()
 
             # Flatting out the list
-            import itertools
-            result = list(itertools.chain(*result))
+            result = itertools.chain(*result)
             for probe in result:
                 for sample in probe.samples:
-                    
+
                     if isinstance(sample, Sample):
-                        line = "{0} {1} {2} {3}\n".format(sample.key, probe.key, probe.path, sample.data)
+                        line = "{0} {1} {2} {3}\n".format(
+                            sample.key, probe.key, probe.path, sample.data
+                        )
                         f.write(line)
-                    elif isinstance(sample, DelayedSample):
-                        lines = sample.load().readlines()
-                        f.writelines(lines)
                     else:
                         raise TypeError("The output of the pipeline is not writeble")
 
     if dask_client is not None:
         dask_client.shutdown()
-
-
-@click.command()
-@click.argument("output-file")
-@verbosity_option(cls=ResourceOption)
-def vanilla_biometrics_template(output_file, **kwargs):
-    """
-    Generate an template configuration file for the vanilla biometrics pipeline
-    """
-
-    import bob.io.base
-
-    path = os.path.dirname(output_file)
-    logger.info(f"Writting template configuration file in {path}")
-    bob.io.base.create_directories_safe(path)
-
-    template = '''
-
-# Client dask. Look at https://gitlab.idiap.ch/bob/bob.pipelines/tree/master/bob/pipelines/config/distributed to find proper dask clients.
-# You don't need to necessary instantiate a dask client yourself. You can simply pipe those config files
-
-dask_client = my_client
-
-
-preprocessor = my_preprocessor
-
-
-extractor = my_extractor
-
-
-algorithm = my_algorithm
-
-
-database = my_database
-    
-'''
-
-    open(output_file, "w").write(template)
diff --git a/bob/bio/base/test/test_transformers.py b/bob/bio/base/test/test_transformers.py
index 97d290756620dc2e1689c7aadf72c96a0f6071b5..5fa1b098d1f4d92475d570fa098917bd6a5ee6ac 100644
--- a/bob/bio/base/test/test_transformers.py
+++ b/bob/bio/base/test/test_transformers.py
@@ -8,9 +8,9 @@ import numpy
 import tempfile
 from sklearn.utils.validation import check_is_fitted
 
-from bob.bio.base.transformers import Linearize, SampleLinearize, CheckpointSampleLinearize
+from bob.pipelines.transformers import Linearize, SampleLinearize, CheckpointSampleLinearize
 def test_linearize_processor():
-    
+
     ## Test the transformer only
     transformer = Linearize()
     X = numpy.zeros(shape=(10,10))
@@ -24,7 +24,7 @@ def test_linearize_processor():
     X_tr = transformer.transform([sample])
     assert X_tr[0].data.shape == (100,)
 
-    ## Test checkpoint    
+    ## Test checkpoint
     with tempfile.TemporaryDirectory() as d:
         transformer = CheckpointSampleLinearize(features_dir=d)
         X_tr =  transformer.transform([sample])
@@ -32,9 +32,9 @@ def test_linearize_processor():
         assert os.path.exists(os.path.join(d, "1.h5"))
 
 
-from bob.bio.base.transformers import SamplePCA, CheckpointSamplePCA
+from bob.pipelines.transformers import SamplePCA, CheckpointSamplePCA
 def test_pca_processor():
-    
+
     ## Test wrapped in to a Sample
     X = numpy.random.rand(100,10)
     samples = [Sample(data, key=str(i)) for i, data in enumerate(X)]
@@ -43,17 +43,17 @@ def test_pca_processor():
     n_components = 2
     estimator = SamplePCA(n_components=n_components)
     estimator = estimator.fit(samples)
-    
+
     # https://scikit-learn.org/stable/modules/generated/sklearn.utils.validation.check_is_fitted.html
     assert check_is_fitted(estimator, "n_components_") is None
-    
+
     # transform
     samples_tr = estimator.transform(samples)
     assert samples_tr[0].data.shape == (n_components,)
-    
+
 
     ## Test Checkpoining
-    with tempfile.TemporaryDirectory() as d:        
+    with tempfile.TemporaryDirectory() as d:
         model_path = os.path.join(d, "model.pkl")
         estimator = CheckpointSamplePCA(n_components=n_components, features_dir=d, model_path=model_path)
 
@@ -61,8 +61,8 @@ def test_pca_processor():
         estimator = estimator.fit(samples)
         assert check_is_fitted(estimator, "n_components_") is None
         assert os.path.exists(model_path)
-        
+
         # transform
         samples_tr = estimator.transform(samples)
-        assert samples_tr[0].data.shape == (n_components,)        
+        assert samples_tr[0].data.shape == (n_components,)
         assert os.path.exists(os.path.join(d, samples_tr[0].key+".h5"))
diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py
index 60e0ac4027c6e7535496db6a4b88b02d284974df..75d9354860926ed739c382eb590252dc87ecf0ed 100644
--- a/bob/bio/base/test/test_vanilla_biometrics.py
+++ b/bob/bio/base/test/test_vanilla_biometrics.py
@@ -19,10 +19,10 @@ class DummyDatabase:
         self.one_d = one_d
 
 
-    def _create_random_1dsamples(self, n_samples, offset, dim):        
+    def _create_random_1dsamples(self, n_samples, offset, dim):
         return [ Sample(numpy.random.rand(dim), key=i) for i in range(offset,offset+n_samples) ]
 
-    def _create_random_2dsamples(self, n_samples, offset, dim):        
+    def _create_random_2dsamples(self, n_samples, offset, dim):
         return [ Sample(numpy.random.rand(dim, dim), key=i) for i in range(offset,offset+n_samples) ]
 
     def _create_random_sample_set(self, n_sample_set=10, n_samples=2):
@@ -30,7 +30,7 @@ class DummyDatabase:
         # Just generate random samples
         sample_set = [SampleSet(samples=[], key=i) for i in range(n_sample_set)]
 
-        offset = 0        
+        offset = 0
         for s in sample_set:
             if self.one_d:
                 s.samples = self._create_random_1dsamples(n_samples, offset, self.dim)
@@ -61,22 +61,22 @@ class DummyDatabase:
 from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance
 import itertools
 def test_distance_comparator():
-    
+
     n_references = 10
     dim = 10
     n_probes = 10
     database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True)
-    references = database.references()    
+    references = database.references()
     probes = database.probes()
-    
+
     comparator = Distance()
-    references = comparator._enroll_samples(references)
+    references = comparator.enroll_samples(references)
     assert len(references)== n_references
     assert references[0].data.shape == (dim,)
 
     probes = database.probes()
-    scores = comparator._score_samples(probes, references)
+    scores = comparator.score_samples(probes, references)
     scores = list(itertools.chain(*scores))
-    
+
     assert len(scores) == n_probes*n_references
     assert len(scores[0].samples)==n_references
diff --git a/bob/bio/base/transformers/__init__.py b/bob/bio/base/transformers/__init__.py
deleted file mode 100644
index 729af155f8f3f72cd1e3805f4a6efe40e2787dd7..0000000000000000000000000000000000000000
--- a/bob/bio/base/transformers/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .linearize import Linearize, SampleLinearize, CheckpointSampleLinearize
-from .pca import CheckpointSamplePCA, SamplePCA
diff --git a/bob/bio/base/transformers/linearize.py b/bob/bio/base/transformers/linearize.py
deleted file mode 100644
index 03d079567018361610c8573031f6bbffd9bfbacb..0000000000000000000000000000000000000000
--- a/bob/bio/base/transformers/linearize.py
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from sklearn.base import TransformerMixin
-from sklearn.utils.validation import check_array
-import numpy
-
-
-class Linearize(TransformerMixin):
-    """Extracts features by simply concatenating all elements of the data into one long vector.
-
-    If a ``dtype`` is specified in the contructor, it is assured that the resulting
-    """
-
-    def transform(self, X):
-
-        """__call__(data) -> data
-
-        Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired.
-
-        Parameters:
-        -----------
-
-        data : :py:class:`numpy.ndarray`
-          The preprocessed data to be transformed into one vector.
-
-        Returns:
-        --------
-
-        data : 1D :py:class:`numpy.ndarray`
-          The extracted feature vector, of the desired ``dtype`` (if specified).
-        """
-
-        X = check_array(X, allow_nd=True)
-
-        if X.ndim == 2:
-            return numpy.reshape(X, X.size)
-        else:
-            # Reshaping n-dimensional arrays assuming that the
-            # first axis corresponds to the number of samples
-            return numpy.reshape(X, (X.shape[0], numpy.prod(X.shape[1:])))
-
-
-class SampleLinearize(SampleMixin, Linearize):
-    pass
-
-
-class CheckpointSampleLinearize(CheckpointMixin, SampleMixin, Linearize):
-    pass
diff --git a/bob/bio/base/transformers/pca.py b/bob/bio/base/transformers/pca.py
deleted file mode 100644
index 1412188f66aa8b111d39b741093486cf5bf780c8..0000000000000000000000000000000000000000
--- a/bob/bio/base/transformers/pca.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-
-"""
-TODO: This should be deployed in bob.pipelines
-"""
-
-from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from sklearn.base import TransformerMixin
-from sklearn.decomposition import PCA
-import numpy
-
-"""
-Wraps the 
-"""
-
-
-class SamplePCA(SampleMixin, PCA):
-    """
-    Enables SAMPLE handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
-    """
-
-    pass
-
-
-class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA):
-    """
-    Enables SAMPLE and CHECKPOINTIN handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
-    """
-
-    pass