Skip to content
Snippets Groups Projects
Commit 92748386 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Wrapped the vanilla_biometric in a class called VanillaBiometrics. In this way...

Wrapped the vanilla_biometric in a class called VanillaBiometrics. In this way the user has to dask_it this only once and not twice as before, making the user code cleaner
parent 16ea9865
No related branches found
No related tags found
1 merge request!180[dask] Preparing bob.bio.base for dask pipelines
Pipeline #38590 failed
......@@ -26,6 +26,7 @@ database = DatabaseConnector(
protocol="mobile0-male",
)
)
database.allow_score_multiple_references = True
# Using face crop
CROPPED_IMAGE_HEIGHT = 80
......@@ -74,9 +75,8 @@ algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir, allow_s
#algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir)
# comment out the code below to disable dask
from bob.pipelines.mixins import estimator_dask_it, mix_me_up
from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin
from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
#pipeline = VanillaBiometrics(transformer, algorithm)
pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48)
transformer = estimator_dask_it(transformer)
algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
......@@ -59,11 +59,7 @@ algorithm = CheckpointDistance(features_dir="./example/")
# algorithm = Distance()
# comment out the code below to disable dask
from bob.pipelines.mixins import estimator_dask_it, mix_me_up
from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
BioAlgDaskMixin,
)
from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
transformer = estimator_dask_it(transformer)
algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
#pipeline = VanillaBiometrics(transformer, algorithm)
pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
......@@ -58,10 +58,7 @@ transformer = make_pipeline(
algorithm = AlgorithmAsBioAlg(callable=lda, features_dir="./example/")
from bob.pipelines.mixins import estimator_dask_it, mix_me_up
from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
BioAlgDaskMixin,
)
from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
transformer = estimator_dask_it(transformer)
algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
#pipeline = VanillaBiometrics(transformer, algorithm)
pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
......@@ -11,6 +11,7 @@ import os
base_dir = "example"
database = DatabaseConnector(AtntBioDatabase(original_directory="./atnt", protocol="Default"))
database.allow_scoring_with_all_biometric_references = True
transformer = make_pipeline(
CheckpointSampleLinearize(features_dir=os.path.join(base_dir, "linearize")),
......@@ -26,6 +27,7 @@ from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
BioAlgDaskMixin,
)
transformer = estimator_dask_it(transformer)
algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
pipeline = VanillaBiometrics(transformer, algorithm)
#pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm))
......@@ -44,12 +44,7 @@ transformer = make_pipeline(
)
algorithm = CheckpointDistance(features_dir="./example/")
# comment out the code below to disable dask
from bob.pipelines.mixins import estimator_dask_it, mix_me_up
from bob.bio.base.pipelines.vanilla_biometrics.mixins import (
BioAlgDaskMixin,
)
transformer = estimator_dask_it(transformer)
algorithm = mix_me_up([BioAlgDaskMixin], algorithm)
from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics
#pipeline = VanillaBiometrics(transformer, algorithm)
pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48)
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
from .pipeline import VanillaBiometrics, dask_vanilla_biometrics
__path__ = extend_path(__path__, __name__)
......@@ -14,89 +14,160 @@ import numpy
logger = logging.getLogger(__name__)
def biometric_pipeline(
background_model_samples,
biometric_reference_samples,
probe_samples,
transformer,
biometric_algorithm,
allow_scoring_with_all_biometric_references=False,
):
logger.info(
f" >> Vanilla Biometrics: Training background model with pipeline {transformer}"
)
# Training background model (fit will return even if samples is ``None``,
# in which case we suppose the algorithm is not trainable in any way)
transformer = train_background_model(background_model_samples, transformer)
logger.info(
f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}"
)
# Create biometric samples
biometric_references = create_biometric_reference(
biometric_reference_samples, transformer, biometric_algorithm
)
logger.info(
f" >> Computing scores with the biometric algorithm {biometric_algorithm}"
)
# Scores all probes
return compute_scores(
class VanillaBiometrics(object):
"""
Vanilla Biometrics Pipeline
This is the backbone of most biometric recognition systems.
It implements three subpipelines and they are the following:
- :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer.
It will run :py:meth:`sklearn.base.BaseEstimator.fit`
- :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references
It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
:py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll`
- :py:class:`VanillaBiometrics.compute_scores`: Computes scores
It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
:py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score`
Example
-------
>>> from sklearn.pipeline import make_pipeline
>>> from bob.bio.base.pipelines.vanilla_biometrics.implemented import Distance
>>> transformer = make_pipeline(estimator_1, estimator_2)
>>> biometric_algoritm = Distance()
>>> pipeline = VanillaBiometrics(transformer, biometric_algoritm)
>>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring)
To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`.
Example
-------
>>> pipeline = VanillaBiometrics(transformer, biometric_algoritm)
>>> pipeline = dask_vanilla_biometrics(pipeline)
>>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute()
Parameters:
-----------
transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator`
Transformer that will preprocess your data
biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm`
Biometrics algorithm object that implements the methods `enroll` and `score` methods
"""
def __init__(self, transformer, biometric_algorithm):
self.transformer = transformer
self.biometric_algorithm = biometric_algorithm
def __call__(
self,
background_model_samples,
biometric_reference_samples,
probe_samples,
biometric_references,
transformer,
biometric_algorithm,
allow_scoring_with_all_biometric_references,
)
allow_scoring_with_all_biometric_references=False,
):
logger.info(
f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}"
)
# Training background model (fit will return even if samples is ``None``,
# in which case we suppose the algorithm is not trainable in any way)
self.transformer = self.train_background_model(background_model_samples)
def train_background_model(background_model_samples, transformer):
# background_model_samples is a list of Samples
logger.info(
f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}"
)
# We might have algorithms that has no data for training
if len(background_model_samples) <= 0:
logger.warning(
"There's no data to train background model."
"For the rest of the execution it will be assumed that the pipeline is stateless."
# Create biometric samples
biometric_references = self.create_biometric_reference(
biometric_reference_samples
)
return transformer
transformer = transformer.fit(background_model_samples)
return transformer
logger.info(
f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}"
)
# Scores all probes
return self.compute_scores(
probe_samples,
biometric_references,
allow_scoring_with_all_biometric_references,
)
def create_biometric_reference(
biometric_reference_samples, transformer, biometric_algorithm
):
biometric_reference_features = transformer.transform(biometric_reference_samples)
def train_background_model(self, background_model_samples):
# background_model_samples is a list of Samples
biometric_references = biometric_algorithm.enroll_samples(
biometric_reference_features
)
# We might have algorithms that has no data for training
if len(background_model_samples) <= 0:
logger.warning(
"There's no data to train background model."
"For the rest of the execution it will be assumed that the pipeline is stateless."
)
return self.transformer
# models is a list of Samples
return biometric_references
return self.transformer.fit(background_model_samples)
def create_biometric_reference(self, biometric_reference_samples):
biometric_reference_features = self.transformer.transform(
biometric_reference_samples
)
def compute_scores(
probe_samples,
biometric_references,
transformer,
biometric_algorithm,
allow_scoring_with_all_biometric_references=False,
):
biometric_references = self.biometric_algorithm.enroll_samples(
biometric_reference_features
)
# probes is a list of SampleSets
probe_features = transformer.transform(probe_samples)
# models is a list of Samples
return biometric_references
scores = biometric_algorithm.score_samples(
probe_features,
def compute_scores(
self,
probe_samples,
biometric_references,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
)
allow_scoring_with_all_biometric_references=False,
):
# probes is a list of SampleSets
probe_features = self.transformer.transform(probe_samples)
scores = self.biometric_algorithm.score_samples(
probe_features,
biometric_references,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
)
# scores is a list of Samples
return scores
def dask_vanilla_biometrics(pipeline, npartitions=None):
"""
Given a :py:class:`VanillaBiometrics`, wraps :py:meth:`VanillaBiometrics.transformer` and
:py:class:`VanillaBiometrics.biometric_algorithm` with Dask delayeds
Parameters
----------
pipeline: :py:class:`VanillaBiometrics`
Vanilla Biometrics based pipeline to be dasked
npartitions: int
Number of partitions for the initial `Dask.bag`
"""
from bob.pipelines.mixins import estimator_dask_it, mix_me_up
from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin
transformer = estimator_dask_it(pipeline.transformer, npartitions=npartitions)
biometric_algorithm = mix_me_up([BioAlgDaskMixin], pipeline.biometric_algorithm)
# scores is a list of Samples
return scores
return VanillaBiometrics(transformer, biometric_algorithm)
......@@ -57,21 +57,13 @@ TODO: Work out this help
entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG,
)
@click.option(
"--transformer",
"-e",
"--pipeline",
"-p",
required=True,
cls=ResourceOption,
entry_point_group="bob.pipelines.transformer",
entry_point_group="bob.pipelines.pipeline",
help="Feature extraction algorithm",
)
@click.option(
"--algorithm",
"-a",
required=True,
cls=ResourceOption,
entry_point_group="bob.bio.algorithm", # This should be linked to bob.bio.base
help="Biometric Algorithm (class that implements the methods: `fit`, `enroll` and `score`)",
)
@click.option(
"--database",
"-d",
......@@ -106,7 +98,7 @@ TODO: Work out this help
)
@verbosity_option(cls=ResourceOption)
def vanilla_biometrics(
transformer, algorithm, database, dask_client, groups, output, **kwargs
pipeline, database, dask_client, groups, output, **kwargs
):
"""Runs the simplest biometrics pipeline.
......@@ -152,7 +144,7 @@ def vanilla_biometrics(
"""
from bob.bio.base.pipelines.vanilla_biometrics.pipeline import biometric_pipeline
from bob.bio.base.pipelines.vanilla_biometrics.pipeline import VanillaBiometrics
import dask.bag
import itertools
import os
......@@ -174,14 +166,11 @@ def vanilla_biometrics(
else False
)
result = biometric_pipeline(
database.background_model_samples(),
biometric_references,
database.probes(group=group),
transformer,
algorithm,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references
)
result = pipeline(database.background_model_samples(),
biometric_references,
database.probes(group=group),
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references
)
if isinstance(result, dask.bag.core.Bag):
if dask_client is not None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment