Wrapped the vanilla_biometric in a class called VanillaBiometrics. In this way...

Wrapped the vanilla_biometric in a class called VanillaBiometrics. In this way the user has to dask_it this only once and not twice as before, making the user code cleaner

Wrapped the vanilla_biometric in a class called VanillaBiometrics. In this way...
92748386 · Tiago de Freitas Pereira · 16ea9865 · 92748386 · 92748386 · 92748386
Commit 92748386 authored 5 years ago by Tiago de Freitas Pereira
--- a/bob/bio/base/config/examples/gabor_mobio-male.py
+++ b/bob/bio/base/config/examples/gabor_mobio-male.py
@@ -26,6 +26,7 @@ database = DatabaseConnector(
        protocol="mobile0-male",
    )
 )
+database.allow_score_multiple_references = True

 # Using face crop
 CROPPED_IMAGE_HEIGHT = 80
@@ -74,9 +75,8 @@ algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir, allow_s
 #algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir)


-# comment out the code below to disable dask
-from bob.pipelines.mixins import estimator_dask_it, mix_me_up
-from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin
+from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
+
+#pipeline = VanillaBiometrics(transformer, algorithm)
+pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48)

-transformer = estimator_dask_it(transformer)
-algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
--- a/bob/bio/base/config/examples/lda_atnt_legacy.py
+++ b/bob/bio/base/config/examples/lda_atnt_legacy.py
@@ -59,11 +59,7 @@ algorithm = CheckpointDistance(features_dir="./example/")
 # algorithm = Distance()


-# comment out the code below to disable dask
-from bob.pipelines.mixins import estimator_dask_it, mix_me_up
-from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
-    BioAlgDaskMixin,
-)
+from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics

-transformer = estimator_dask_it(transformer)
-algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
+#pipeline = VanillaBiometrics(transformer, algorithm)
+pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
--- a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py
+++ b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py
@@ -58,10 +58,7 @@ transformer = make_pipeline(
 algorithm = AlgorithmAsBioAlg(callable=lda, features_dir="./example/")


-from bob.pipelines.mixins import estimator_dask_it, mix_me_up
-from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
-    BioAlgDaskMixin,
-)
+from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics

-transformer = estimator_dask_it(transformer)
-algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
+#pipeline = VanillaBiometrics(transformer, algorithm)
+pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
--- a/bob/bio/base/config/examples/pca_atnt.py
+++ b/bob/bio/base/config/examples/pca_atnt.py
@@ -11,6 +11,7 @@ import os
 base_dir = "example"

 database = DatabaseConnector(AtntBioDatabase(original_directory="./atnt", protocol="Default"))
+database.allow_scoring_with_all_biometric_references = True

 transformer = make_pipeline(
    CheckpointSampleLinearize(features_dir=os.path.join(base_dir, "linearize")),
@@ -26,6 +27,7 @@ from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
    BioAlgDaskMixin,
 )

-transformer = estimator_dask_it(transformer)
-algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
+from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics

+pipeline = VanillaBiometrics(transformer, algorithm)
+#pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
--- a/bob/bio/base/config/examples/pca_mobio-male.py
+++ b/bob/bio/base/config/examples/pca_mobio-male.py
@@ -44,12 +44,7 @@ transformer = make_pipeline(
 )
 algorithm = CheckpointDistance(features_dir="./example/")

-# comment out the code below to disable dask
-from bob.pipelines.mixins import estimator_dask_it, mix_me_up
-from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
-    BioAlgDaskMixin,
-)
-
-transformer = estimator_dask_it(transformer)
-algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
+from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics

+#pipeline = VanillaBiometrics(transformer, algorithm)
+pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48)
--- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
 # see https://docs.python.org/3/library/pkgutil.html
 from pkgutil import extend_path

+from .pipeline import VanillaBiometrics, dask_vanilla_biometrics
+
 __path__ = extend_path(__path__, __name__)
--- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
@@ -14,89 +14,160 @@ import numpy
 logger = logging.getLogger(__name__)


-def biometric_pipeline(
-    background_model_samples,
-    biometric_reference_samples,
-    probe_samples,
-    transformer,
-    biometric_algorithm,
-    allow_scoring_with_all_biometric_references=False,
-):
-    logger.info(
-        f" >> Vanilla Biometrics: Training background model with pipeline {transformer}"
-    )
-
-    # Training background model (fit will return even if samples is ``None``,
-    # in which case we suppose the algorithm is not trainable in any way)
-    transformer = train_background_model(background_model_samples, transformer)
-
-    logger.info(
-        f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}"
-    )
-
-    # Create biometric samples
-    biometric_references = create_biometric_reference(
-        biometric_reference_samples, transformer, biometric_algorithm
-    )
-
-    logger.info(
-        f" >> Computing scores with the biometric algorithm {biometric_algorithm}"
-    )
-
-    # Scores all probes
-    return compute_scores(
+class VanillaBiometrics(object):
+    """
+    Vanilla Biometrics Pipeline
+
+    This is the backbone of most biometric recognition systems.
+    It implements three subpipelines and they are the following:
+
+     - :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer.
+        It will run :py:meth:`sklearn.base.BaseEstimator.fit`
+
+     - :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references
+        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
+        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll`
+
+     - :py:class:`VanillaBiometrics.compute_scores`: Computes scores
+        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
+        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score`
+
+
+    Example
+    -------
+       >>> from sklearn.pipeline import make_pipeline
+       >>> from bob.bio.base.pipelines.vanilla_biometrics.implemented import Distance
+       >>> transformer = make_pipeline(estimator_1, estimator_2)
+       >>> biometric_algoritm = Distance()
+       >>> pipeline = VanillaBiometrics(transformer, biometric_algoritm)
+       >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring)
+
+
+    To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`.
+
+    Example
+    -------
+      >>> pipeline = VanillaBiometrics(transformer, biometric_algoritm)
+      >>> pipeline = dask_vanilla_biometrics(pipeline)
+      >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute()
+
+
+    Parameters:
+    -----------
+
+      transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator`
+        Transformer that will preprocess your data
+
+      biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm`
+        Biometrics algorithm object that implements the methods `enroll` and `score` methods
+
+
+    """
+
+    def __init__(self, transformer, biometric_algorithm):
+        self.transformer = transformer
+        self.biometric_algorithm = biometric_algorithm
+
+    def __call__(
+        self,
+        background_model_samples,
+        biometric_reference_samples,
        probe_samples,
-        biometric_references,
-        transformer,
-        biometric_algorithm,
-        allow_scoring_with_all_biometric_references,
-    )
+        allow_scoring_with_all_biometric_references=False,
+    ):
+        logger.info(
+            f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}"
+        )

+        # Training background model (fit will return even if samples is ``None``,
+        # in which case we suppose the algorithm is not trainable in any way)
+        self.transformer = self.train_background_model(background_model_samples)

-def train_background_model(background_model_samples, transformer):
-    # background_model_samples is a list of Samples
+        logger.info(
+            f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}"
+        )

-    # We might have algorithms that has no data for training
-    if len(background_model_samples) <= 0:
-        logger.warning(
-            "There's no data to train background model."
-            "For the rest of the execution it will be assumed that the pipeline is stateless."
+        # Create biometric samples
+        biometric_references = self.create_biometric_reference(
+            biometric_reference_samples
        )
-        return transformer

-    transformer = transformer.fit(background_model_samples)
-    return transformer
+        logger.info(
+            f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}"
+        )

+        # Scores all probes
+        return self.compute_scores(
+            probe_samples,
+            biometric_references,
+            allow_scoring_with_all_biometric_references,
+        )

-def create_biometric_reference(
-    biometric_reference_samples, transformer, biometric_algorithm
-):
-    biometric_reference_features = transformer.transform(biometric_reference_samples)
+    def train_background_model(self, background_model_samples):
+        # background_model_samples is a list of Samples

-    biometric_references = biometric_algorithm.enroll_samples(
-        biometric_reference_features
-    )
+        # We might have algorithms that has no data for training
+        if len(background_model_samples) <= 0:
+            logger.warning(
+                "There's no data to train background model."
+                "For the rest of the execution it will be assumed that the pipeline is stateless."
+            )
+            return self.transformer

-    # models is a list of Samples
-    return biometric_references
+        return self.transformer.fit(background_model_samples)

+    def create_biometric_reference(self, biometric_reference_samples):
+        biometric_reference_features = self.transformer.transform(
+            biometric_reference_samples
+        )

-def compute_scores(
-    probe_samples,
-    biometric_references,
-    transformer,
-    biometric_algorithm,
-    allow_scoring_with_all_biometric_references=False,
-):
+        biometric_references = self.biometric_algorithm.enroll_samples(
+            biometric_reference_features
+        )

-    # probes is a list of SampleSets
-    probe_features = transformer.transform(probe_samples)
+        # models is a list of Samples
+        return biometric_references

-    scores = biometric_algorithm.score_samples(
-        probe_features,
+    def compute_scores(
+        self,
+        probe_samples,
        biometric_references,
-        allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
-    )
+        allow_scoring_with_all_biometric_references=False,
+    ):
+
+        # probes is a list of SampleSets
+        probe_features = self.transformer.transform(probe_samples)
+
+        scores = self.biometric_algorithm.score_samples(
+            probe_features,
+            biometric_references,
+            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
+        )
+
+        # scores is a list of Samples
+        return scores
+
+
+def dask_vanilla_biometrics(pipeline, npartitions=None):
+    """
+    Given a :py:class:`VanillaBiometrics`, wraps :py:meth:`VanillaBiometrics.transformer` and
+    :py:class:`VanillaBiometrics.biometric_algorithm` with Dask delayeds
+
+    Parameters
+    ----------
+
+    pipeline: :py:class:`VanillaBiometrics`
+       Vanilla Biometrics based pipeline to be dasked
+
+
+    npartitions: int
+       Number of partitions for the initial `Dask.bag`
+    """
+
+    from bob.pipelines.mixins import estimator_dask_it, mix_me_up
+    from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin
+
+    transformer = estimator_dask_it(pipeline.transformer, npartitions=npartitions)
+    biometric_algorithm = mix_me_up([BioAlgDaskMixin], pipeline.biometric_algorithm)

-    # scores is a list of Samples
-    return scores
+    return VanillaBiometrics(transformer, biometric_algorithm)
--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -57,21 +57,13 @@ TODO: Work out this help
    entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG,
 )
 @click.option(
-    "--transformer",
-    "-e",
+    "--pipeline",
+    "-p",
    required=True,
    cls=ResourceOption,
-    entry_point_group="bob.pipelines.transformer",
+    entry_point_group="bob.pipelines.pipeline",
    help="Feature extraction algorithm",
 )
-@click.option(
-    "--algorithm",
-    "-a",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.bio.algorithm",  # This should be linked to bob.bio.base
-    help="Biometric Algorithm (class that implements the methods: `fit`, `enroll` and `score`)",
-)
 @click.option(
    "--database",
    "-d",
@@ -106,7 +98,7 @@ TODO: Work out this help
 )
 @verbosity_option(cls=ResourceOption)
 def vanilla_biometrics(
-    transformer, algorithm, database, dask_client, groups, output, **kwargs
+    pipeline, database, dask_client, groups, output, **kwargs
 ):
    """Runs the simplest biometrics pipeline.

@@ -152,7 +144,7 @@ def vanilla_biometrics(

    """

-    from bob.bio.base.pipelines.vanilla_biometrics.pipeline import biometric_pipeline
+    from bob.bio.base.pipelines.vanilla_biometrics.pipeline import VanillaBiometrics
    import dask.bag
    import itertools
    import os
@@ -174,14 +166,11 @@ def vanilla_biometrics(
                else False
            )

-            result = biometric_pipeline(
-                database.background_model_samples(),
-                biometric_references,
-                database.probes(group=group),
-                transformer,
-                algorithm,
-                allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references
-            )
+            result = pipeline(database.background_model_samples(),
+                              biometric_references,
+                              database.probes(group=group),
+                              allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references
+                              )

            if isinstance(result, dask.bag.core.Bag):
                if dask_client is not None: