diff --git a/bob/bio/base/config/examples/gabor_mobio-male.py b/bob/bio/base/config/examples/gabor_mobio-male.py index a20453ed28611697c0c063162fbab8f54f74ed92..8336c510b5cbd927647abd607fb6339f99bcbcc7 100644 --- a/bob/bio/base/config/examples/gabor_mobio-male.py +++ b/bob/bio/base/config/examples/gabor_mobio-male.py @@ -26,6 +26,7 @@ database = DatabaseConnector( protocol="mobile0-male", ) ) +database.allow_score_multiple_references = True # Using face crop CROPPED_IMAGE_HEIGHT = 80 @@ -74,9 +75,8 @@ algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir, allow_s #algorithm = AlgorithmAsBioAlg(callable=gabor_jet, features_dir=base_dir) -# comment out the code below to disable dask -from bob.pipelines.mixins import estimator_dask_it, mix_me_up -from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin +from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics + +#pipeline = VanillaBiometrics(transformer, algorithm) +pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48) -transformer = estimator_dask_it(transformer) -algorithm = mix_me_up([BioAlgDaskMixin], algorithm) diff --git a/bob/bio/base/config/examples/lda_atnt_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy.py index 4ad30b856f56079a5037b029c1f7f2ef9152be22..dd6f93edc46fb041870d2dea18ab66f02fd5257a 100644 --- a/bob/bio/base/config/examples/lda_atnt_legacy.py +++ b/bob/bio/base/config/examples/lda_atnt_legacy.py @@ -59,11 +59,7 @@ algorithm = CheckpointDistance(features_dir="./example/") # algorithm = Distance() -# comment out the code below to disable dask -from bob.pipelines.mixins import estimator_dask_it, mix_me_up -from bob.bio.base.pipelines.vanilla_biometrics.mixins import ( - BioAlgDaskMixin, -) +from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics -transformer = estimator_dask_it(transformer) -algorithm = mix_me_up([BioAlgDaskMixin], algorithm) +#pipeline = VanillaBiometrics(transformer, algorithm) +pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py index 6bedb447b6efd21de7be6022a81ed633a8874e38..d9736430ea82c0623137ee53ae3bf6919e40242e 100644 --- a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py +++ b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py @@ -58,10 +58,7 @@ transformer = make_pipeline( algorithm = AlgorithmAsBioAlg(callable=lda, features_dir="./example/") -from bob.pipelines.mixins import estimator_dask_it, mix_me_up -from bob.bio.base.pipelines.vanilla_biometrics.mixins import ( - BioAlgDaskMixin, -) +from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics -transformer = estimator_dask_it(transformer) -algorithm = mix_me_up([BioAlgDaskMixin], algorithm) +#pipeline = VanillaBiometrics(transformer, algorithm) +pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/pca_atnt.py b/bob/bio/base/config/examples/pca_atnt.py index 3cd159b1d621d98a71e25e8b76d2a03d9ef05330..e75daf2752badb3930f34a10c62943455fa38970 100644 --- a/bob/bio/base/config/examples/pca_atnt.py +++ b/bob/bio/base/config/examples/pca_atnt.py @@ -11,6 +11,7 @@ import os base_dir = "example" database = DatabaseConnector(AtntBioDatabase(original_directory="./atnt", protocol="Default")) +database.allow_scoring_with_all_biometric_references = True transformer = make_pipeline( CheckpointSampleLinearize(features_dir=os.path.join(base_dir, "linearize")), @@ -26,6 +27,7 @@ from bob.bio.base.pipelines.vanilla_biometrics.mixins import ( BioAlgDaskMixin, ) -transformer = estimator_dask_it(transformer) -algorithm = mix_me_up([BioAlgDaskMixin], algorithm) +from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics +pipeline = VanillaBiometrics(transformer, algorithm) +#pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm)) diff --git a/bob/bio/base/config/examples/pca_mobio-male.py b/bob/bio/base/config/examples/pca_mobio-male.py index 45fc2049b45226045d17db9de5f5c500eb454b6c..668a1e6f7cbdc34dbae1f1ac5bcf40299db07bcf 100644 --- a/bob/bio/base/config/examples/pca_mobio-male.py +++ b/bob/bio/base/config/examples/pca_mobio-male.py @@ -44,12 +44,7 @@ transformer = make_pipeline( ) algorithm = CheckpointDistance(features_dir="./example/") -# comment out the code below to disable dask -from bob.pipelines.mixins import estimator_dask_it, mix_me_up -from bob.bio.base.pipelines.vanilla_biometrics.mixins import ( - BioAlgDaskMixin, -) - -transformer = estimator_dask_it(transformer) -algorithm = mix_me_up([BioAlgDaskMixin], algorithm) +from bob.bio.base.pipelines.vanilla_biometrics import VanillaBiometrics, dask_vanilla_biometrics +#pipeline = VanillaBiometrics(transformer, algorithm) +pipeline = dask_vanilla_biometrics(VanillaBiometrics(transformer, algorithm), npartitions=48) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py index edbb4090fca046b19d22d3982711084621bff3be..d67835735a076de0842a1f9467eaf54eb53a4fb7 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py @@ -1,4 +1,6 @@ # see https://docs.python.org/3/library/pkgutil.html from pkgutil import extend_path +from .pipeline import VanillaBiometrics, dask_vanilla_biometrics + __path__ = extend_path(__path__, __name__) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py index 4df6aa8975f834925f17496e7a5ec6e552a72af7..c5d0cbf487c708d2b8d6a8e5ef01cfecbc8fa897 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py @@ -14,89 +14,160 @@ import numpy logger = logging.getLogger(__name__) -def biometric_pipeline( - background_model_samples, - biometric_reference_samples, - probe_samples, - transformer, - biometric_algorithm, - allow_scoring_with_all_biometric_references=False, -): - logger.info( - f" >> Vanilla Biometrics: Training background model with pipeline {transformer}" - ) - - # Training background model (fit will return even if samples is ``None``, - # in which case we suppose the algorithm is not trainable in any way) - transformer = train_background_model(background_model_samples, transformer) - - logger.info( - f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}" - ) - - # Create biometric samples - biometric_references = create_biometric_reference( - biometric_reference_samples, transformer, biometric_algorithm - ) - - logger.info( - f" >> Computing scores with the biometric algorithm {biometric_algorithm}" - ) - - # Scores all probes - return compute_scores( +class VanillaBiometrics(object): + """ + Vanilla Biometrics Pipeline + + This is the backbone of most biometric recognition systems. + It implements three subpipelines and they are the following: + + - :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer. + It will run :py:meth:`sklearn.base.BaseEstimator.fit` + + - :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references + It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of + :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll` + + - :py:class:`VanillaBiometrics.compute_scores`: Computes scores + It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of + :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score` + + + Example + ------- + >>> from sklearn.pipeline import make_pipeline + >>> from bob.bio.base.pipelines.vanilla_biometrics.implemented import Distance + >>> transformer = make_pipeline(estimator_1, estimator_2) + >>> biometric_algoritm = Distance() + >>> pipeline = VanillaBiometrics(transformer, biometric_algoritm) + >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring) + + + To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`. + + Example + ------- + >>> pipeline = VanillaBiometrics(transformer, biometric_algoritm) + >>> pipeline = dask_vanilla_biometrics(pipeline) + >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute() + + + Parameters: + ----------- + + transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator` + Transformer that will preprocess your data + + biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm` + Biometrics algorithm object that implements the methods `enroll` and `score` methods + + + """ + + def __init__(self, transformer, biometric_algorithm): + self.transformer = transformer + self.biometric_algorithm = biometric_algorithm + + def __call__( + self, + background_model_samples, + biometric_reference_samples, probe_samples, - biometric_references, - transformer, - biometric_algorithm, - allow_scoring_with_all_biometric_references, - ) + allow_scoring_with_all_biometric_references=False, + ): + logger.info( + f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}" + ) + # Training background model (fit will return even if samples is ``None``, + # in which case we suppose the algorithm is not trainable in any way) + self.transformer = self.train_background_model(background_model_samples) -def train_background_model(background_model_samples, transformer): - # background_model_samples is a list of Samples + logger.info( + f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}" + ) - # We might have algorithms that has no data for training - if len(background_model_samples) <= 0: - logger.warning( - "There's no data to train background model." - "For the rest of the execution it will be assumed that the pipeline is stateless." + # Create biometric samples + biometric_references = self.create_biometric_reference( + biometric_reference_samples ) - return transformer - transformer = transformer.fit(background_model_samples) - return transformer + logger.info( + f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}" + ) + # Scores all probes + return self.compute_scores( + probe_samples, + biometric_references, + allow_scoring_with_all_biometric_references, + ) -def create_biometric_reference( - biometric_reference_samples, transformer, biometric_algorithm -): - biometric_reference_features = transformer.transform(biometric_reference_samples) + def train_background_model(self, background_model_samples): + # background_model_samples is a list of Samples - biometric_references = biometric_algorithm.enroll_samples( - biometric_reference_features - ) + # We might have algorithms that has no data for training + if len(background_model_samples) <= 0: + logger.warning( + "There's no data to train background model." + "For the rest of the execution it will be assumed that the pipeline is stateless." + ) + return self.transformer - # models is a list of Samples - return biometric_references + return self.transformer.fit(background_model_samples) + def create_biometric_reference(self, biometric_reference_samples): + biometric_reference_features = self.transformer.transform( + biometric_reference_samples + ) -def compute_scores( - probe_samples, - biometric_references, - transformer, - biometric_algorithm, - allow_scoring_with_all_biometric_references=False, -): + biometric_references = self.biometric_algorithm.enroll_samples( + biometric_reference_features + ) - # probes is a list of SampleSets - probe_features = transformer.transform(probe_samples) + # models is a list of Samples + return biometric_references - scores = biometric_algorithm.score_samples( - probe_features, + def compute_scores( + self, + probe_samples, biometric_references, - allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, - ) + allow_scoring_with_all_biometric_references=False, + ): + + # probes is a list of SampleSets + probe_features = self.transformer.transform(probe_samples) + + scores = self.biometric_algorithm.score_samples( + probe_features, + biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + # scores is a list of Samples + return scores + + +def dask_vanilla_biometrics(pipeline, npartitions=None): + """ + Given a :py:class:`VanillaBiometrics`, wraps :py:meth:`VanillaBiometrics.transformer` and + :py:class:`VanillaBiometrics.biometric_algorithm` with Dask delayeds + + Parameters + ---------- + + pipeline: :py:class:`VanillaBiometrics` + Vanilla Biometrics based pipeline to be dasked + + + npartitions: int + Number of partitions for the initial `Dask.bag` + """ + + from bob.pipelines.mixins import estimator_dask_it, mix_me_up + from bob.bio.base.pipelines.vanilla_biometrics.mixins import BioAlgDaskMixin + + transformer = estimator_dask_it(pipeline.transformer, npartitions=npartitions) + biometric_algorithm = mix_me_up([BioAlgDaskMixin], pipeline.biometric_algorithm) - # scores is a list of Samples - return scores + return VanillaBiometrics(transformer, biometric_algorithm) diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index 14b910352c26ad1713b4e6329caa1c7ac62446e6..ea0ad30ec52ce3ea7f6937b890ba2c47f0f97f43 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -57,21 +57,13 @@ TODO: Work out this help entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG, ) @click.option( - "--transformer", - "-e", + "--pipeline", + "-p", required=True, cls=ResourceOption, - entry_point_group="bob.pipelines.transformer", + entry_point_group="bob.pipelines.pipeline", help="Feature extraction algorithm", ) -@click.option( - "--algorithm", - "-a", - required=True, - cls=ResourceOption, - entry_point_group="bob.bio.algorithm", # This should be linked to bob.bio.base - help="Biometric Algorithm (class that implements the methods: `fit`, `enroll` and `score`)", -) @click.option( "--database", "-d", @@ -106,7 +98,7 @@ TODO: Work out this help ) @verbosity_option(cls=ResourceOption) def vanilla_biometrics( - transformer, algorithm, database, dask_client, groups, output, **kwargs + pipeline, database, dask_client, groups, output, **kwargs ): """Runs the simplest biometrics pipeline. @@ -152,7 +144,7 @@ def vanilla_biometrics( """ - from bob.bio.base.pipelines.vanilla_biometrics.pipeline import biometric_pipeline + from bob.bio.base.pipelines.vanilla_biometrics.pipeline import VanillaBiometrics import dask.bag import itertools import os @@ -174,14 +166,11 @@ def vanilla_biometrics( else False ) - result = biometric_pipeline( - database.background_model_samples(), - biometric_references, - database.probes(group=group), - transformer, - algorithm, - allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references - ) + result = pipeline(database.background_model_samples(), + biometric_references, + database.probes(group=group), + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references + ) if isinstance(result, dask.bag.core.Bag): if dask_client is not None: