diff --git a/bob/bio/base/config/examples/lda_atnt_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy.py index a23b93d859d81cbbd10656fb9003b62488ba217b..8903a324d5d3d9fb38b7112425eec2150c6c9803 100644 --- a/bob/bio/base/config/examples/lda_atnt_legacy.py +++ b/bob/bio/base/config/examples/lda_atnt_legacy.py @@ -1,22 +1,26 @@ -# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor - -import bob.bio.base -import bob.bio.face - -## DATABASE +from bob.bio.face.database import AtntBioDatabase +from bob.bio.base.algorithm import LDA +from bob.bio.face.preprocessor import FaceCrop +from sklearn.pipeline import make_pipeline +from bob.bio.base.mixins.legacy import ( + LegacyPreprocessor, + LegacyAlgorithmAsTransformer, +) +from bob.pipelines.transformers import CheckpointSampleLinearize +from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyDatabaseConnector +import functools +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + CheckpointDistance, +) -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector -database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default") +# DATABASE +database = LegacyDatabaseConnector( + AtntBioDatabase(original_directory="./atnt", protocol="Default"), +) -from sklearn.pipeline import Pipeline, make_pipeline -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin -from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA - -#### PREPROCESSOR LEGACY ### -import functools +# PREPROCESSOR LEGACY # Cropping CROPPED_IMAGE_HEIGHT = 80 @@ -31,45 +35,36 @@ LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) # I JUST MADE UP THESE NUMBERS FIXED_RIGHT_EYE_POS = (30, 30) FIXED_LEFT_EYE_POS = (20, 50) -import bob.bio.face face_cropper = functools.partial( - bob.bio.face.preprocessor.FaceCrop, + FaceCrop, cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, ) -from bob.pipelines.mixins import mix_me_up -preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) - -#### ALGORITHM LEGACY ##### +# ALGORITHM LEGACY -algorithm = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90) +lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90) -from bob.pipelines.mixins import dask_it -extractor = Pipeline( - steps=[ - ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")), - ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")), - ( - "2", - LegacyAlgorithmMixin( - callable=algorithm, features_dir="./example/extractor2", model_path="./example/" - ), - ), - ] +transformer = make_pipeline( + LegacyPreprocessor(callable=face_cropper, features_dir="./example/transformer0"), + CheckpointSampleLinearize(features_dir="./example/transformer1"), + LegacyAlgorithmAsTransformer( + callable=lda, features_dir="./example/transformer2", model_path="./example/" + ), ) -#extractor = dask_it(extractor) +algorithm = CheckpointDistance(features_dir="./example/") + + +# comment out the code below to disable dask +from bob.pipelines.mixins import estimator_dask_it, mix_me_up from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( - Distance, - BiometricAlgorithmCheckpointMixin, + BioAlgDaskMixin, ) - -class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass -algorithm = CheckpointDistance(features_dir="./example/") -# algorithm = Distance() +transformer = estimator_dask_it(transformer) +algorithm = mix_me_up([BioAlgDaskMixin], algorithm) diff --git a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py index 0b9d601e3840d24d76835b49aa2dfbfbd0dc17e7..469e9c9552388ac814c6671c4d2518fda17078e4 100644 --- a/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py +++ b/bob/bio/base/config/examples/lda_atnt_legacy_all_legacy.py @@ -1,23 +1,24 @@ -# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor - -import bob.bio.base -import bob.bio.face - -## DATABASE +from bob.bio.face.database import AtntBioDatabase +from bob.bio.base.algorithm import LDA +from bob.bio.face.preprocessor import FaceCrop +from sklearn.pipeline import make_pipeline +from bob.bio.base.mixins.legacy import ( + LegacyPreprocessor, + LegacyAlgorithmAsTransformer, + LegacyAlgorithmAsBioAlg, +) +from bob.pipelines.transformers import CheckpointSampleLinearize +from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyDatabaseConnector +import functools -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector -database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default") +# DATABASE +database = LegacyDatabaseConnector( + AtntBioDatabase(original_directory="./atnt", protocol="Default"), +) -from sklearn.pipeline import Pipeline, make_pipeline -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin -from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA -from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyBiometricAlgorithm - -#### PREPROCESSOR LEGACY ### -import functools +# PREPROCESSOR LEGACY # Cropping CROPPED_IMAGE_HEIGHT = 80 @@ -32,37 +33,35 @@ LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) # I JUST MADE UP THESE NUMBERS FIXED_RIGHT_EYE_POS = (30, 30) FIXED_LEFT_EYE_POS = (20, 50) -import bob.bio.face face_cropper = functools.partial( - bob.bio.face.preprocessor.FaceCrop, + FaceCrop, cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, ) -from bob.pipelines.mixins import mix_me_up -preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) - -#### ALGORITHM LEGACY ##### +# ALGORITHM LEGACY -algorithm_estimator = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90) +lda = functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90) -from bob.pipelines.mixins import dask_it -extractor = Pipeline( - steps=[ - ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")), - ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")), - ( - "2", - LegacyAlgorithmMixin( - callable=algorithm_estimator, features_dir="./example/extractor2", model_path="./example/" - ), - ), - ] +transformer = make_pipeline( + LegacyPreprocessor(callable=face_cropper, features_dir="./example/transformer0"), + CheckpointSampleLinearize(features_dir="./example/transformer1"), + LegacyAlgorithmAsTransformer( + callable=lda, features_dir="./example/transformer2", model_path="./example/" + ), ) -extractor = dask_it(extractor) +algorithm = LegacyAlgorithmAsBioAlg(callable=lda, features_dir="./example/") + + +# comment out the code below to disable dask +from bob.pipelines.mixins import estimator_dask_it, mix_me_up +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + BioAlgDaskMixin, +) -algorithm = LegacyBiometricAlgorithm(callable=algorithm_estimator, features_dir="./example/") +transformer = estimator_dask_it(transformer) +algorithm = mix_me_up([BioAlgDaskMixin], algorithm) diff --git a/bob/bio/base/config/examples/pca_atnt.py b/bob/bio/base/config/examples/pca_atnt.py index a72d9577873c8d7fe76502223867ca64b1b0b46b..76015a797b36ac5ee1336af3ee5ef8e1b25fca12 100644 --- a/bob/bio/base/config/examples/pca_atnt.py +++ b/bob/bio/base/config/examples/pca_atnt.py @@ -1,20 +1,28 @@ -#from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor - -import bob.bio.face from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector -database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default") +from sklearn.pipeline import make_pipeline +from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + CheckpointDistance, +) +from bob.bio.face.database import AtntBioDatabase -from sklearn.pipeline import Pipeline, make_pipeline -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA +database = DatabaseConnector( + AtntBioDatabase(original_directory="./atnt"), protocol="Default" +) +transformer = make_pipeline( + CheckpointSampleLinearize(features_dir="./example/extractor0"), + CheckpointSamplePCA( + features_dir="./example/extractor1", model_path="./example/pca.pkl" + ), +) +algorithm = CheckpointDistance(features_dir="./example/") -from bob.pipelines.mixins import dask_it -extractor = Pipeline(steps=[('0',CheckpointSampleLinearize(features_dir="./example/extractor0")), - ('1',CheckpointSamplePCA(features_dir="./example/extractor1", model_path="./example/pca.pkl"))]) -#extractor = dask_it(extractor) +# comment out the code below to disable dask +from bob.pipelines.mixins import estimator_dask_it, mix_me_up +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + BioAlgDaskMixin, +) -from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance, BiometricAlgorithmCheckpointMixin -class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass -algorithm = CheckpointDistance(features_dir="./example/") -#algorithm = Distance() +transformer = estimator_dask_it(transformer) +algorithm = mix_me_up([BioAlgDaskMixin], algorithm) diff --git a/bob/bio/base/config/examples/pca_atnt_legacy.py b/bob/bio/base/config/examples/pca_atnt_legacy.py deleted file mode 100644 index b895eba8d35869cd22ecb50ed08668e3dd51e604..0000000000000000000000000000000000000000 --- a/bob/bio/base/config/examples/pca_atnt_legacy.py +++ /dev/null @@ -1,70 +0,0 @@ -# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor - - -### DATABASE -import bob.bio.face -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector -database = DatabaseConnector(bob.bio.face.database.AtntBioDatabase(original_directory="./atnt"), protocol="Default") - - -from sklearn.pipeline import Pipeline, make_pipeline -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin -from bob.bio.base.transformers import CheckpointSampleLinearize, CheckpointSamplePCA - - -#### PREPROCESSOR LEGACY ### -import functools - -# Cropping -CROPPED_IMAGE_HEIGHT = 80 -CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 - -# eye positions for frontal images -RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) -LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - - -# RANDOM EYES POSITIONS -# I JUST MADE UP THESE NUMBERS -FIXED_RIGHT_EYE_POS = (30, 30) -FIXED_LEFT_EYE_POS = (20, 50) -import bob.bio.face - -face_cropper = functools.partial( - bob.bio.face.preprocessor.FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, -) - - -from bob.pipelines.mixins import mix_me_up -preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) - -from bob.pipelines.mixins import dask_it - -extractor = Pipeline( - steps=[ - ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")), - ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")), - ( - "2", - CheckpointSamplePCA( - features_dir="./example/extractor2", model_path="./example/pca.pkl" - ), - ), - ] -) -# extractor = dask_it(extractor) - -from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( - Distance, - BiometricAlgorithmCheckpointMixin, -) - - -class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): - pass -algorithm = CheckpointDistance(features_dir="./example/") -# algorithm = Distance() diff --git a/bob/bio/base/config/examples/pca_mobio-male.py b/bob/bio/base/config/examples/pca_mobio-male.py index 440d4e97d1c46e4e41c52f49f84ebaff768a8fdf..61393f38dd0ac2c93b8e579c097a76ad44028f57 100644 --- a/bob/bio/base/config/examples/pca_mobio-male.py +++ b/bob/bio/base/config/examples/pca_mobio-male.py @@ -1,69 +1,54 @@ - -import functools -import bob.db.atnt -from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, DatabaseConnectorAnnotated +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + CheckpointDistance, +) +from bob.bio.base.pipelines.vanilla_biometrics.legacy import ( + LegacyDatabaseConnector, + LegacyPreprocessor, +) +from bob.bio.face.database.mobio import MobioBioDatabase +from bob.bio.face.preprocessor import FaceCrop from bob.extension import rc -import bob.bio.face - -from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin -from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyBiometricAlgorithm -from bob.bio.base.transformers import CheckpointSamplePCA - -import os -#base_dir = "/idiap/temp/tpereira/mobio/pca" -base_dir = "./example" - - -### DATABASE - -original_directory=rc['bob.db.mobio.directory'] -annotation_directory=rc['bob.db.mobio.annotation_directory'] -database = DatabaseConnectorAnnotated(bob.bio.face.database.mobio.MobioBioDatabase( - original_directory=original_directory, - annotation_directory=annotation_directory, - original_extension=".png" - ), - protocol="mobile0-male") - -from sklearn.pipeline import Pipeline, make_pipeline -from sklearn.decomposition import PCA - -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from bob.bio.base.transformers import CheckpointSampleLinearize - - +from bob.pipelines.transformers import CheckpointSampleLinearize, CheckpointSamplePCA +from sklearn.pipeline import make_pipeline +import functools -#### PREPROCESSOR LEGACY ### +database = LegacyDatabaseConnector( + MobioBioDatabase( + original_directory=rc["bob.db.mobio.directory"], + annotation_directory=rc["bob.db.mobio.annotation_directory"], + original_extension=".png", + protocol="mobile0-male", + ) +) # Using face crop CROPPED_IMAGE_HEIGHT = 80 CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 - -## eye positions for frontal images +# eye positions for frontal images RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) - -original_preprocessor = functools.partial( - bob.bio.face.preprocessor.FaceCrop, - cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), - cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, - ) - - -from bob.pipelines.mixins import mix_me_up -preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) -#class preprocessor(CheckpointMixin, SampleMixin, LegacyProcessorMixin): pass - -from bob.pipelines.mixins import dask_it -extractor = Pipeline(steps=[ - ('0', preprocessor(callable=original_preprocessor, features_dir=os.path.join(base_dir,"extractor0"))), - ('1',CheckpointSampleLinearize(features_dir=os.path.join(base_dir,"extractor1"))), - ('2',CheckpointSamplePCA(features_dir=os.path.join(base_dir,"extractor2"), model_path=os.path.join(base_dir,"pca.pkl"))) - ]) -#extractor = dask_it(extractor, npartitions=48) - -from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance, BiometricAlgorithmCheckpointMixin - -class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass -algorithm = CheckpointDistance(features_dir=base_dir) +# FaceCrop +preprocessor = functools.partial( + FaceCrop, + cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), + cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, +) + +transformer = make_pipeline( + LegacyPreprocessor(preprocessor), + CheckpointSampleLinearize(features_dir="./example/extractor0"), + CheckpointSamplePCA( + features_dir="./example/extractor1", model_path="./example/pca.pkl" + ), +) +algorithm = CheckpointDistance(features_dir="./example/") + +# comment out the code below to disable dask +from bob.pipelines.mixins import estimator_dask_it, mix_me_up +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + BioAlgDaskMixin, +) + +transformer = estimator_dask_it(transformer) +algorithm = mix_me_up([BioAlgDaskMixin], algorithm) diff --git a/bob/bio/base/mixins/__init__.py b/bob/bio/base/mixins/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/bob/bio/base/mixins/legacy.py b/bob/bio/base/mixins/legacy.py deleted file mode 100644 index 3cf0d70f3b3448b49379af0bf6b22b2c58aa1899..0000000000000000000000000000000000000000 --- a/bob/bio/base/mixins/legacy.py +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> - - -""" -Mixins to handle legacy components -""" - -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from sklearn.base import TransformerMixin, BaseEstimator -from sklearn.utils.validation import check_array -from bob.pipelines.sample import Sample, DelayedSample, SampleSet -from bob.pipelines.utils import is_picklable -import numpy -import logging -import os -import bob.io.base -import functools - -logger = logging.getLogger(__name__) - - -def scikit_to_bob_supervised(X, Y): - """ - Given an input data ready for :py:method:`scikit.estimator.BaseEstimator.fit`, - convert for :py:class:`bob.bio.base.algorithm.Algorithm.train_projector` when - `performs_projection=True` - """ - - # TODO: THIS IS VERY INNEFICI - logger.warning( - "INEFFICIENCY WARNING. HERE YOU ARE USING A HACK FOR USING BOB ALGORITHMS IN SCIKIT LEARN PIPELINES. \ - WE RECOMMEND YOU TO PORT THIS ALGORITHM. DON'T BE LAZY :-)" - ) - - bob_output = dict() - for x, y in zip(X, Y): - if y in bob_output: - bob_output[y] = numpy.vstack((bob_output[y], x.data)) - else: - bob_output[y] = x.data - - return [bob_output[k] for k in bob_output] - - -class LegacyProcessorMixin(TransformerMixin): - """Class that wraps :py:class:`bob.bio.base.preprocessor.Preprocessor` and - :py:class:`bob.bio.base.extractor.Extractors` - - - Example - ------- - - Wrapping preprocessor with functtools - >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin - >>> from bob.bio.face.preprocessor import FaceCrop - >>> import functools - >>> transformer = LegacyProcessorMixin(functools.partial(FaceCrop, cropped_image_size=(10,10))) - - Example - ------- - Wrapping extractor - >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin - >>> from bob.bio.face.extractor import Linearize - >>> transformer = LegacyProcessorMixin(Linearize) - - - Parameters - ---------- - callable: callable - Calleble function that instantiates the scikit estimator - - """ - - def __init__(self, callable=None, **kwargs): - super().__init__(**kwargs) - self.callable = callable - self.instance = None - - def transform(self, X): - - # Instantiates and do the "real" transform - if self.instance is None: - self.instance = self.callable() - if isinstance(X[0], dict): - # Handling annotations if it's the case - retval = [] - for x in X: - data = x["data"] - annotations = x["annotations"] - - retval.append(self.instance(data, annotations=annotations)) - return retval - - else: - X = check_array(X, allow_nd=True) - return [self.instance(x) for x in X] - - def __setstate__(self): - # Handling unpicklable objects - self.instance = None - - def __getstate__(self): - # Handling unpicklable objects - self.instance = None - - -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -class LegacyAlgorithmMixin(CheckpointMixin, SampleMixin, BaseEstimator): - """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm` - - :py:method:`LegacyAlgorithmrMixin.fit` maps to :py:method:`bob.bio.base.algorithm.Algoritm.train_projector` - - :py:method:`LegacyAlgorithmrMixin.transform` maps :py:method:`bob.bio.base.algorithm.Algoritm.project` - - .. warning THIS HAS TO BE SAMPABLE AND CHECKPOINTABLE - - - Example - ------- - - Wrapping LDA algorithm with functtools - >>> from bob.bio.base.mixins.legacy import LegacyAlgorithmMixin - >>> from bob.bio.base.algorithm import LDA - >>> import functools - >>> transformer = LegacyAlgorithmMixin(functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90)) - - - - Parameters - ---------- - callable: callable - Calleble function that instantiates the scikit estimator - - """ - - def __init__(self, callable=None, **kwargs): - super().__init__(**kwargs) - self.callable = callable - self.instance = None - self.projector_file = None - - def fit(self, X, y=None, **fit_params): - - self.projector_file = os.path.join(self.model_path, "Projector.hdf5") - if os.path.exists(self.projector_file): - return self - - # Instantiates and do the "real" fit - if self.instance is None: - self.instance = self.callable() - - if self.instance.performs_projection: - # Organizing the date by class - bob_X = scikit_to_bob_supervised(X, y) - self.instance.train_projector(bob_X, self.projector_file) - - # Deleting the instance, so it's picklable - self.instance = None - - return self - - def transform(self, X): - def _project_save_sample(sample): - # Project - projected_data = self.instance.project(sample.data) - - # Checkpointing - path = self.make_path(sample) - bob.io.base.create_directories_safe(os.path.dirname(path)) - f = bob.io.base.HDF5File(path, "w") - - self.instance.write_feature(projected_data, f) - reader = get_reader(self.instance.read_feature, path) - - return DelayedSample(reader, parent=sample) - - self.projector_file = os.path.join(self.model_path, "Projector.hdf5") - if not isinstance(X, list): - raise ValueError("It's expected a list, not %s" % type(X)) - - # Instantiates and do the "real" transform - if self.instance is None: - self.instance = self.callable() - self.instance.load_projector(self.projector_file) - - if isinstance(X[0], Sample) or isinstance(X[0], DelayedSample): - samples = [] - for sample in X: - samples.append(_project_save_sample(sample)) - return samples - - elif isinstance(X[0], SampleSet): - # Projecting and checkpointing sampleset - sample_sets = [] - for sset in X: - samples = [] - for sample in sset.samples: - samples.append(_project_save_sample(sample)) - sample_sets.append(SampleSet(samples=samples, parent=sset)) - return sample_sets - - else: - raise ValueError("Type not allowed %s" % type(X[0])) - - def __setstate__(self): - # Handling unpicklable objects - self.instance = None - - def __getstate__(self): - # Handling unpicklable objects - self.instance = None - - -def get_reader(reader, path): - if is_picklable(reader): - return functools.partial(reader, path) - else: - logger.warning( - f"The method {reader} is not picklable. Shiping its unbounded method to `DelayedSample`." - ) - reader = reader.__func__ # The reader object might not be picklable - return functools.partial(reader, None, path) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..1fbfce94c8bd30451fae052346b160272e4e3cfe --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py @@ -0,0 +1,213 @@ +from abc import ABCMeta, abstractmethod +from bob.pipelines.sample import Sample, SampleSet, DelayedSample + + +class BioAlgorithm(metaclass=ABCMeta): + """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_. + + biometric model enrollement, via ``enroll()`` and scoring, with + ``score()``. + + """ + + def enroll_samples(self, biometric_references): + """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`. + + It handles the creation of biometric references + + Parameters + ---------- + biometric_references : list + A list of :py:class:`SampleSet` objects to be used for + creating biometric references. The sets must be identified + with a unique id and a path, for eventual checkpointing. + """ + + retval = [] + for k in biometric_references: + # compute on-the-fly + retval.append(self._enroll_sample_set(k)) + + return retval + + def _enroll_sample_set(self, sampleset): + + # Unpack the sampleset + data = [s.data for s in sampleset.samples] + + # Enroll + return Sample(self.enroll(data), parent=sampleset) + + @abstractmethod + def enroll(self, data): + """ + It handles the creation of ONE biometric reference for the vanilla ppipeline + + Parameters + ---------- + + data: + Data used for the creation of ONE BIOMETRIC REFERENCE + + """ + pass + + def score_samples(self, probe_features, biometric_references): + """Scores a new sample against multiple (potential) references + + Parameters + ---------- + + probes : list + A list of :py:class:`SampleSet` objects to be used for + scoring the input references + + biometric_references : list + A list of :py:class:`Sample` objects to be used for + scoring the input probes, must have an ``id`` attribute that + will be used to cross-reference which probes need to be scored. + + Returns + ------- + + scores : list + For each sample in a probe, returns as many scores as there are + samples in the probe, together with the probe's and the + relevant reference's subject identifiers. + + """ + + retval = [] + for p in probe_features: + retval.append(self._score_sample_set(p, biometric_references)) + return retval + + def _score_sample_set(self, sampleset, biometric_references): + """Given a sampleset for probing, compute the scores and retures a sample set with the scores + """ + + # Stacking the samples from a sampleset + data = [s.data for s in sampleset.samples] + + # Compute scores for each sample inside of the sample set + # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) + # We should add an agregator function here so we can properlly agregate samples from + # a sampleset either after or before scoring. + # To be honest, this should be the default behaviour + retval = [] + for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): + # Creating one sample per comparison + subprobe_scores = [] + for ref in [ + r for r in biometric_references if r.key in sampleset.references + ]: + subprobe_scores.append(Sample(self.score(ref.data, s), parent=ref)) + + # Creating one sampleset per probe + subprobe = SampleSet(subprobe_scores, parent=sampleset) + subprobe.subprobe_id = subprobe_id + retval.append(subprobe) + + return retval + + @abstractmethod + def score(self, biometric_reference, data): + """It handles the score computation for one sample + + Parameters + ---------- + + biometric_reference : list + Biometric reference to be compared + + data : list + Data to be compared + + Returns + ------- + + scores : list + For each sample in a probe, returns as many scores as there are + samples in the probe, together with the probe's and the + relevant reference's subject identifiers. + + """ + pass + + +class Database(metaclass=ABCMeta): + """Base class for Vanilla Biometric pipeline + """ + + @abstractmethod + def background_model_samples(self): + """Returns :py:class:`Sample`'s to train a background model + + + Returns + ------- + samples : list + List of samples for background model training. + + """ + pass + + @abstractmethod + def references(self, group="dev"): + """Returns :py:class:`Reference`'s to enroll biometric references + + + Parameters + ---------- + group : :py:class:`str`, optional + Limits samples to this group + + + Returns + ------- + references : list + List of samples for the creation of biometric references. + + """ + pass + + @abstractmethod + def probes(self, group): + """Returns :py:class:`Probe`'s to score biometric references + + + Parameters + ---------- + group : str + Limits samples to this group + + + Returns + ------- + probes : list + List of samples for the creation of biometric probes. + + """ + pass + + +def save_scores_four_columns(path, probe): + """ + Write scores in the four columns format + """ + + with open(path, "w") as f: + for biometric_reference in probe.samples: + line = "{0} {1} {2} {3}\n".format( + biometric_reference.subject, + probe.subject, + probe.key, + biometric_reference.data, + ) + f.write(line) + + def load(): + with open(path) as f: + return [float(line.split()[-1]) for line in f] + + return DelayedSample(load, parent=probe) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py deleted file mode 100644 index 5853582d87a43e61b06926bd817546ebf15b192e..0000000000000000000000000000000000000000 --- a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py +++ /dev/null @@ -1,327 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -# @author: Andre Anjos <andre.anjos@idiap.ch> - -from bob.pipelines.sample import Sample, SampleSet, DelayedSample -import numpy -import bob.io.base -import os -import functools - - -class BiometricAlgorithm(object): - """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_. - - biometric model enrollement, via ``enroll()`` and scoring, with - ``score()``. - - """ - - def __init__(self): - pass - - def _enroll_samples( - self, biometric_references, extractor=None, checkpoint=None, *args, **kwargs - ): - """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`. - - It handles the creation of biometric references - - Parameters - ---------- - biometric_references : list - A list of :py:class:`SampleSet` objects to be used for - creating biometric references. The sets must be identified - with a unique id and a path, for eventual checkpointing. - - background_model : - Object containing the background model - - checkpoint : str, None - If passed and not ``None``, then it is considered to be the - path of a directory containing possible cached values for each - of the references in this experiment. If that is the case, the - values are loaded from there and not recomputed. - - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored - - """ - - retval = [] - for k in biometric_references: - # compute on-the-fly - retval.append(self._enroll_sample_set(k)) - - return retval - - def _enroll_sample_set(self, sampleset): - - # Unpack the sampleset - data = [s.data for s in sampleset.samples] - - # Enroll - return Sample(self.enroll(data), parent=sampleset) - - def enroll(self, data, extractor=None, **kwargs): - """ - It handles the creation of ONE biometric reference for the vanilla ppipeline - - Parameters - ---------- - - data: - Data used for the creation of ONE BIOMETRIC REFERENCE - - """ - - raise NotImplemented("Please, implement me") - - def _score_samples( - self, probes, biometric_references, extractor=None, *args, **kwargs - ): - """Scores a new sample against multiple (potential) references - - Parameters - ---------- - - probes : list - A list of :py:class:`SampleSet` objects to be used for - scoring the input references - - biometric_references : list - A list of :py:class:`Sample` objects to be used for - scoring the input probes, must have an ``id`` attribute that - will be used to cross-reference which probes need to be scored. - - extractor : - Path pointing to stored model on disk - - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored - - - Returns - ------- - - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. - - """ - - retval = [] - for p in probes: - retval.append(self._score_sample_set(p, biometric_references, extractor)) - return retval - - def _score_sample_set(self, sampleset, biometric_references, extractor): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores - """ - - # Stacking the samples from a sampleset - data = [s.data for s in sampleset.samples] - - # Compute scores for each sample inside of the sample set - # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from - # a sampleset either after or before scoring. - # To be honest, this should be the default behaviour - retval = [] - for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): - # Creating one sample per comparison - subprobe_scores = [] - for ref in [ - r for r in biometric_references if r.key in sampleset.references - ]: - subprobe_scores.append( - Sample(self.score(ref.data, s, extractor), parent=ref) - ) - - # Creating one sampleset per probe - subprobe = SampleSet(subprobe_scores, parent=sampleset) - subprobe.subprobe_id = subprobe_id - retval.append(subprobe) - - return retval - - def score(self, biometric_reference, data, extractor=None, **kwargs): - """It handles the score computation for one sample - - Parameters - ---------- - - biometric_reference : list - Biometric reference to be compared - - data : list - Data to be compared - - Returns - ------- - - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. - - """ - raise NotImplemented("Please, implement me") - - -from bob.pipelines.mixins import CheckpointMixin - - -class BiometricAlgorithmCheckpointMixin(CheckpointMixin): - """Mixing used to checkpoint Enrolled and Scoring samples. - - Examples - -------- - - >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BiometricAlgorithmCheckpointMixin, Distance - >>> class DistanceCheckpoint(BiometricAlgorithmCheckpointMixin, Distance) pass: - >>> biometric_algorithm = DistanceCheckpoint(features_dir="./") - >>> biometric_algorithm.enroll(sample) - - It's possible to use it as with the :py:func:`bob.pipelines.mixins.mix_me_up` - - >>> from bob.pipelines.mixins import mix_me_up - >>> biometric_algorithm = mix_me_up([BiometricAlgorithmCheckpointMixin], Distance)(features_dir="./") - >>> biometric_algorithm.enroll(sample) - - """ - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.biometric_reference_dir = os.path.join( - self.features_dir, "biometric_references" - ) - self.score_dir = os.path.join(self.features_dir, "scores") - - def save(self, sample, path): - return bob.io.base.save(sample.data, path, create_directories=True) - - def _enroll_sample_set(self, sampleset): - """ - Enroll a sample set with checkpointing - """ - - # Amending `models` directory - path = os.path.join( - self.biometric_reference_dir, str(sampleset.key) + self.extension - ) - if path is None or not os.path.isfile(path): - - # Enrolling the sample - enrolled_sample = super()._enroll_sample_set(sampleset) - - # saving the new sample - self.save(enrolled_sample, path) - - # Dealaying it. - # This seems inefficient, but it's crucial for large datasets - delayed_enrolled_sample = DelayedSample( - functools.partial(bob.io.base.load, path), enrolled_sample - ) - - else: - # If sample already there, just load - delayed_enrolled_sample = self.load(path) - delayed_enrolled_sample.key = sampleset.key - - return delayed_enrolled_sample - - def _score_sample_set(self, sampleset, biometric_references, extractor): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores - """ - # Computing score - scored_sample_set = super()._score_sample_set( - sampleset, biometric_references, extractor - ) - - for s in scored_sample_set: - # Checkpointing score - path = os.path.join(self.score_dir, str(s.path) + ".txt") - bob.io.base.create_directories_safe(os.path.dirname(path)) - - delayed_scored_sample = save_scores_four_columns(path, s) - s.samples = [delayed_scored_sample] - - return scored_sample_set - - -import scipy.spatial.distance -from sklearn.utils.validation import check_array - - -class Distance(BiometricAlgorithm): - def __init__(self, distance_function=scipy.spatial.distance.euclidean, factor=-1): - - self.distance_function = distance_function - self.factor = factor - - def enroll(self, enroll_features, **kwargs): - """enroll(enroll_features) -> model - - Enrolls the model by storing all given input vectors. - - Parameters: - ----------- - - ``enroll_features`` : [:py:class:`numpy.ndarray`] - The list of projected features to enroll the model from. - - Returns: - -------- - - ``model`` : 2D :py:class:`numpy.ndarray` - The enrolled model. - """ - - enroll_features = check_array(enroll_features, allow_nd=True) - - return numpy.mean(enroll_features, axis=0) - - def score(self, model, probe, extractor=None, **kwargs): - """score(model, probe) -> float - - Computes the distance of the model to the probe using the distance function specified in the constructor. - - Parameters: - ----------- - - ``model`` : 2D :py:class:`numpy.ndarray` - The model storing all enrollment features - - ``probe`` : :py:class:`numpy.ndarray` - The probe feature vector - - Returns: - -------- - - ``score`` : float - A similarity value between ``model`` and ``probe`` - """ - - probe = probe.flatten() - # return the negative distance (as a similarity measure) - return self.factor * self.distance_function(model, probe) - - -def save_scores_four_columns(path, probe): - """ - Write scores in the four columns format - """ - - with open(path, "w") as f: - for biometric_reference in probe.samples: - line = "{0} {1} {2} {3}\n".format( - biometric_reference.key, probe.key, probe.path, biometric_reference.data - ) - f.write(line) - - return DelayedSample(functools.partial(open, path)) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/implemented.py b/bob/bio/base/pipelines/vanilla_biometrics/implemented.py new file mode 100644 index 0000000000000000000000000000000000000000..f9bcd6c9a94c9fce0ba6f59cb68e1c71b396c155 --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics/implemented.py @@ -0,0 +1,63 @@ +import scipy.spatial.distance +from sklearn.utils.validation import check_array +import numpy +from .abstract_classes import BioAlgorithm +from .mixins import BioAlgCheckpointMixin + + +class Distance(BioAlgorithm): + def __init__(self, distance_function=scipy.spatial.distance.euclidean, factor=-1): + + self.distance_function = distance_function + self.factor = factor + + def enroll(self, enroll_features): + """enroll(enroll_features) -> model + + Enrolls the model by storing all given input vectors. + + Parameters: + ----------- + + ``enroll_features`` : [:py:class:`numpy.ndarray`] + The list of projected features to enroll the model from. + + Returns: + -------- + + ``model`` : 2D :py:class:`numpy.ndarray` + The enrolled model. + """ + + enroll_features = check_array(enroll_features, allow_nd=True) + + return numpy.mean(enroll_features, axis=0) + + def score(self, model, probe): + """score(model, probe) -> float + + Computes the distance of the model to the probe using the distance function specified in the constructor. + + Parameters: + ----------- + + ``model`` : 2D :py:class:`numpy.ndarray` + The model storing all enrollment features + + ``probe`` : :py:class:`numpy.ndarray` + The probe feature vector + + Returns: + -------- + + ``score`` : float + A similarity value between ``model`` and ``probe`` + """ + + probe = probe.flatten() + # return the negative distance (as a similarity measure) + return self.factor * self.distance_function(model, probe) + + +class CheckpointDistance(BioAlgCheckpointMixin, Distance): + pass diff --git a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py index e2a700fba760d4f77cf1c18d23dba83ea48345ff..6e629ad0da99bdf4829da73d747a55bb4c651b86 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py @@ -4,23 +4,33 @@ """Re-usable blocks for legacy bob.bio.base algorithms""" import os -import copy import functools +from collections import defaultdict -import bob.io.base +from .... import utils +from .abstract_classes import BioAlgorithm, Database, save_scores_four_columns +from bob.io.base import HDF5File +from bob.pipelines.mixins import SampleMixin, CheckpointMixin from bob.pipelines.sample import DelayedSample, SampleSet, Sample -import numpy +from bob.pipelines.utils import is_picklable +from sklearn.base import TransformerMixin import logging -import dask -import sys -import pickle -from bob.bio.base.mixins.legacy import get_reader -from .biometric_algorithm import save_scores_four_columns logger = logging.getLogger("bob.bio.base") -class DatabaseConnector: +def _biofile_to_delayed_sample(biofile, database): + return DelayedSample( + load=functools.partial( + biofile.load, database.original_directory, database.original_extension, + ), + key=biofile.path, + path=biofile.path, + annotations=database.annotations(biofile), + ) + + +class LegacyDatabaseConnector(Database): """Wraps a bob.bio.base database and generates conforming samples This connector allows wrapping generic bob.bio.base datasets and generate @@ -40,12 +50,9 @@ class DatabaseConnector: """ - def __init__(self, database, protocol): + def __init__(self, database, **kwargs): + super().__init__(**kwargs) self.database = database - self.protocol = protocol - self.directory = database.original_directory - self.extension = database.original_extension - def background_model_samples(self): """Returns :py:class:`Sample`'s to train a background model (group @@ -61,28 +68,9 @@ class DatabaseConnector: """ - # TODO: This should be organized by client - retval = [] + objects = self.database.training_files() - objects = self.database.objects(protocol=self.protocol, groups="world") - - return [ - SampleSet( - [ - DelayedSample( - load=functools.partial( - k.load, - self.database.original_directory, - self.database.original_extension, - ), - key=k.path, - path=k.path, - ) - ], - key=str(k.client_id), - ) - for k in objects - ] + return [_biofile_to_delayed_sample(k, self.database) for k in objects] def references(self, group="dev"): """Returns :py:class:`Reference`'s to enroll biometric references @@ -106,26 +94,13 @@ class DatabaseConnector: """ retval = [] - for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): + for m in self.database.model_ids(groups=group): - objects = self.database.objects( - protocol=self.protocol, groups=group, model_ids=(m,), purposes="enroll" - ) + objects = self.database.enroll_files(groups=group, model_id=m) retval.append( SampleSet( - [ - DelayedSample( - load=functools.partial( - k.load, - self.database.original_directory, - self.database.original_extension, - ), - key=k.path, - path=k.path, - ) - for k in objects - ], + [_biofile_to_delayed_sample(k, self.database) for k in objects], key=str(m), path=str(m), subject=str(objects[0].client_id), @@ -157,29 +132,17 @@ class DatabaseConnector: probes = dict() - for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): + for m in self.database.model_ids(groups=group): # Getting all the probe objects from a particular biometric # reference - objects = self.database.objects( - protocol=self.protocol, groups=group, model_ids=(m,), purposes="probe" - ) + objects = self.database.probe_files(group=group, model_id=m) # Creating probe samples for o in objects: if o.id not in probes: probes[o.id] = SampleSet( - [ - DelayedSample( - load=functools.partial( - o.load, - self.database.original_directory, - self.database.original_extension, - ), - key=o.path, - path=o.path, - ) - ], + [_biofile_to_delayed_sample(o, self.database)], key=str(o.client_id), path=o.path, subject=str(o.client_id), @@ -191,214 +154,201 @@ class DatabaseConnector: return list(probes.values()) +class _NonPickableWrapper: + def __init__(self, callable, **kwargs): + super().__init__(**kwargs) + self.callable = callable + self._instance = None -def _load_data_and_annotations(bio_file, annotations, original_directory, original_extension): - """ - Return a tuple (data, annotations) given a :py:class:`bob.bio.base.database.BioFile` as input + @property + def instance(self): + if self._instance is None: + self._instance = self.callable() + return self._instance - Parameters - ---------- + def __setstate__(self, d): + # Handling unpicklable objects + self._instance = None + return super().__setstate__(d) - bio_file: :py:class:`bob.bio.base.database.BioFile` - Input bio file + def __getstate__(self): + # Handling unpicklable objects + self._instance = None + return super().__getstate__() - Returns - ------- - (data, annotations): A dictionary containing the raw data + annotations - """ +class _Preprocessor(_NonPickableWrapper, TransformerMixin): + def transform(self, X, annotations): + return [self.instance(data, annot) for data, annot in zip(X, annotations)] - data = bio_file.load(original_directory, original_extension) + def _more_tags(self): + return {"stateless": True} - # I know it sounds stupid to return the the annotations here without any transformation - # but I can't do `database.annotations(bio_file)`, SQLAlcheamy session is not picklable - return {"data": data, "annotations": annotations} +def _get_pickable_method(method): + if not is_picklable(method): + logger.warning( + f"The method {method} is not picklable. Returning its unbounded method" + ) + method = functools.partial(method.__func__, None) + return method + + +class LegacyPreprocessor(CheckpointMixin, SampleMixin, _Preprocessor): + def __init__(self, callable, **kwargs): + instance = callable() + super().__init__( + callable=callable, + transform_extra_arguments=(("annotations", "annotations"),), + load_func=_get_pickable_method(instance.read_data), + save_func=_get_pickable_method(instance.write_data), + **kwargs, + ) -class DatabaseConnectorAnnotated(DatabaseConnector): - """Wraps a bob.bio.base database and generates conforming samples for datasets - that has annotations - This connector allows wrapping generic bob.bio.base datasets and generate - samples that conform to the specifications of biometric pipelines defined - in this package. +def _split_X_by_y(X, y): + training_data = defaultdict(list) + for x1, y1 in zip(X, y): + training_data[y1].append(x1) + training_data = training_data.values() + return training_data - Parameters - ---------- +class _Extractor(_NonPickableWrapper, TransformerMixin): + def transform(self, X, metadata=None): + if self.requires_metadata: + return [self.instance(data, metadata=m) for data, m in zip(X, metadata)] + else: + return [self.instance(data) for data in X] - database : object - An instantiated version of a bob.bio.base.Database object + def fit(self, X, y=None): + if not self.instance.requires_training: + return self - protocol : str - The name of the protocol to generate samples from. - To be plugged at :py:method:`bob.db.base.Database.objects`. + training_data = X + if self.instance.split_training_data_by_client: + training_data = _split_X_by_y(X, y) - """ + self.instance.train(self, training_data, self.model_path) + return self - def __init__(self, database, protocol): - super(DatabaseConnectorAnnotated, self).__init__(database, protocol) + def _more_tags(self): + return {"requires_fit": self.instance.requires_training} - def background_model_samples(self): - """Returns :py:class:`Sample`'s to train a background model (group - ``world``). +class LegacyExtractor(CheckpointMixin, SampleMixin, _Extractor): + def __init__(self, callable, model_path, **kwargs): + instance = callable() + transform_extra_arguments = None + self.requires_metadata = False + if utils.is_argument_available("metadata", instance.__call__): + transform_extra_arguments = (("metadata", "metadata"),) + self.requires_metadata = True - Returns - ------- + fit_extra_arguments = None + if instance.requires_training and instance.split_training_data_by_client: + fit_extra_arguments = (("y", "subject"),) - samples : list - List of samples conforming the pipeline API for background - model training. See, e.g., :py:func:`.pipelines.first`. + super().__init__( + callable=callable, + transform_extra_arguments=transform_extra_arguments, + fit_extra_arguments=fit_extra_arguments, + load_func=_get_pickable_method(instance.read_feature), + save_func=_get_pickable_method(instance.write_feature), + model_path=model_path, + **kwargs, + ) - """ + def load_model(self): + self.instance.load(self.model_path) + return self - # TODO: This should be organized by client - retval = [] + def save_model(self): + # we have already saved the model in .fit() + return self - objects = self.database.objects(protocol=self.protocol, groups="world") - return [ - SampleSet( - [ - DelayedSample( - load=functools.partial( - _load_data_and_annotations, k, self.database.annotations(k), self.database.original_directory, self.database.original_extension - ), - key=k.path, - path=k.path, - annotations=self.database.annotations(k), - ) - ], - key=str(k.client_id), - ) - for k in objects - ] - def references(self, group="dev"): - """Returns :py:class:`Reference`'s to enroll biometric references +class _AlgorithmTransformer(_NonPickableWrapper, TransformerMixin): + def transform(self, X): + return [self.instance.project(feature) for feature in X] + def fit(self, X, y=None): + if not self.instance.requires_projector_training: + return self - Parameters - ---------- + training_data = X + if self.instance.split_training_features_by_client: + training_data = _split_X_by_y(X, y) - group : :py:class:`str`, optional - A ``group`` to be plugged at - :py:meth:`bob.db.base.Database.objects` + self.instance.train_projector(self, training_data, self.model_path) + return self + def _more_tags(self): + return {"requires_fit": self.instance.requires_projector_training} - Returns - ------- - - references : list - List of samples conforming the pipeline API for the creation of - biometric references. See, e.g., :py:func:`.pipelines.first`. - """ +class LegacyAlgorithmAsTransformer(CheckpointMixin, SampleMixin, _AlgorithmTransformer): + """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm` - retval = [] + :py:method:`LegacyAlgorithmrMixin.fit` maps to :py:method:`bob.bio.base.algorithm.Algoritm.train_projector` - for m in self.database.model_ids_with_protocol( - protocol=self.protocol, groups=group - ): + :py:method:`LegacyAlgorithmrMixin.transform` maps :py:method:`bob.bio.base.algorithm.Algoritm.project` - objects = self.database.objects( - protocol=self.protocol, groups=group, model_ids=(m,), purposes="enroll" - ) - - retval.append( - SampleSet( - [ - DelayedSample( - load=functools.partial( - _load_data_and_annotations, k, self.database.annotations(k), self.database.original_directory, self.database.original_extension - ), - key=k.path, - path=k.path, - subject=str(objects[0].client_id), - annotations=self.database.annotations(k), - ) - for k in objects - ], - key=str(m), - path=str(m), - subject=objects[0].client_id, - ) - ) - - return retval - - def probes(self, group): - """Returns :py:class:`Probe`'s to score biometric references - - - Parameters - ---------- + Example + ------- - group : str - A ``group`` to be plugged at - :py:meth:`bob.db.base.Database.objects` + Wrapping LDA algorithm with functtools + >>> from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyAlgorithmAsTransformer + >>> from bob.bio.base.algorithm import LDA + >>> import functools + >>> transformer = LegacyAlgorithmAsTransformer(functools.partial(LDA, use_pinv=True, pca_subspace_dimension=0.90)) - Returns - ------- - probes : list - List of samples conforming the pipeline API for the creation of - biometric probes. See, e.g., :py:func:`.pipelines.first`. + Parameters + ---------- + callable: callable + Calleble function that instantiates the bob.bio.base.algorithm.Algorithm - """ + """ - probes = dict() + def __init__(self, callable, model_path, **kwargs): + instance = callable() - for m in self.database.model_ids_with_protocol( - protocol=self.protocol, groups=group + fit_extra_arguments = None + if ( + instance.requires_projector_training + and instance.split_training_features_by_client ): + fit_extra_arguments = (("y", "subject"),) + + super().__init__( + callable=callable, + fit_extra_arguments=fit_extra_arguments, + load_func=_get_pickable_method(instance.read_feature), + save_func=_get_pickable_method(instance.write_feature), + model_path=model_path, + **kwargs, + ) - # Getting all the probe objects from a particular biometric - # reference - objects = self.database.objects( - protocol=self.protocol, groups=group, model_ids=(m,), purposes="probe" - ) - - # Creating probe samples - for o in objects: - if o.id not in probes: - probes[o.id] = SampleSet( - [ - DelayedSample( - load=functools.partial( - _load_data_and_annotations, o, self.database.annotations(o), self.database.original_directory, self.database.original_extension - ), - key=o.path, - path=o.path, - annotations=self.database.annotations(o), - ) - ], - key=str(o.client_id), - path=o.path, - subject=o.client_id, - references=[str(m)], - ) - else: - probes[o.id].references.append(str(m)) - - return list(probes.values()) - + def load_model(self): + self.instance.load_projector(self.model_path) + return self -from .biometric_algorithm import BiometricAlgorithm + def save_model(self): + # we have already saved the model in .fit() + return self -class LegacyBiometricAlgorithm(BiometricAlgorithm): +class LegacyAlgorithmAsBioAlg(BioAlgorithm, _NonPickableWrapper): """Biometric Algorithm that handles legacy :py:class:`bob.bio.base.algorithm.Algorithm` - :py:method:`BiometricAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll` - - :py:method:`BiometricAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score` + :py:method:`BioAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll` + :py:method:`BioAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score` - THIS CODE HAS TO BE CHECKPOINTABLE IN A SPECIAL WAY Example ------- @@ -407,33 +357,27 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): Parameters ---------- callable: callable - Calleble function that instantiates the scikit estimator + Calleble function that instantiates the bob.bio.base.algorithm.Algorithm """ - def __init__(self, callable=None, features_dir=None, **kwargs): - super().__init__(**kwargs) - self.callable = callable - self.instance = None - self.projector_file = None + def __init__(self, callable, features_dir, extension=".hdf5", **kwargs): + super().__init__(callable, **kwargs) self.features_dir = features_dir self.biometric_reference_dir = os.path.join( self.features_dir, "biometric_references" ) self.score_dir = os.path.join(self.features_dir, "scores") - self.extension = ".hdf5" + self.extension = extension def _enroll_sample_set(self, sampleset): # Enroll return self.enroll(sampleset) - def _score_sample_set(self, sampleset, biometric_references, extractor): + def _score_sample_set(self, sampleset, biometric_references): """Given a sampleset for probing, compute the scores and retures a sample set with the scores """ - # Stacking the samples from a sampleset - data = [s for s in sampleset.samples] - # Compute scores for each sample inside of the sample set # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) # We should add an agregator function here so we can properlly agregate samples from @@ -447,10 +391,7 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): for ref in [ r for r in biometric_references if r.key in sampleset.references ]: - # subprobe_scores.append(self.score(ref.data, s, extractor)) - subprobe_scores.append( - Sample(self.score(ref.data, s.data, extractor), parent=ref) - ) + subprobe_scores.append(Sample(self.score(ref.data, s.data), parent=ref)) # Creating one sampleset per probe subprobe = SampleSet(subprobe_scores, parent=sampleset) @@ -458,7 +399,7 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): # Checkpointing score MANDATORY FOR LEGACY path = os.path.join(self.score_dir, str(subprobe.path) + ".txt") - bob.io.base.create_directories_safe(os.path.dirname(path)) + os.makedirs(os.path.dirname(path), exist_ok=True) delayed_scored_sample = save_scores_four_columns(path, subprobe) subprobe.samples = [delayed_scored_sample] @@ -474,10 +415,6 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): f"`enroll_features` should be the type SampleSet, not {enroll_features}" ) - # Instantiates and do the "real" fit - if self.instance is None: - self.instance = self.callable() - path = os.path.join( self.biometric_reference_dir, str(enroll_features.key) + self.extension ) @@ -488,17 +425,12 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): model = self.instance.enroll(data) # Checkpointing - bob.io.base.create_directories_safe(os.path.dirname(path)) - hdf5 = bob.io.base.HDF5File(path, "w") + os.makedirs(os.path.dirname(path), exist_ok=True) + hdf5 = HDF5File(path, "w") self.instance.write_model(model, hdf5) - reader = get_reader(self.instance.read_model, path) - return DelayedSample(reader, parent=enroll_features) - - def score(self, model, probe, extractor=None, **kwargs): - - # Instantiates and do the "real" fit - if self.instance is None: - self.instance = self.callable() + reader = _get_pickable_method(self.instance.read_model) + return DelayedSample(functools.partial(reader, path), parent=enroll_features) + def score(self, model, probe, **kwargs): return self.instance.score(model, probe) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/mixins.py b/bob/bio/base/pipelines/vanilla_biometrics/mixins.py new file mode 100644 index 0000000000000000000000000000000000000000..5655e0df19f6cef969a8def0c2e14233581c57d1 --- /dev/null +++ b/bob/bio/base/pipelines/vanilla_biometrics/mixins.py @@ -0,0 +1,105 @@ +from bob.pipelines.mixins import CheckpointMixin +from bob.pipelines.sample import DelayedSample +import bob.io.base +import os +import functools +import dask +from .abstract_classes import save_scores_four_columns + + +class BioAlgCheckpointMixin(CheckpointMixin): + """Mixing used to checkpoint Enrolled and Scoring samples. + + Examples + -------- + + >>> from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import BioAlgCheckpointMixin, Distance + >>> class DistanceCheckpoint(BioAlgCheckpointMixin, Distance) pass: + >>> biometric_algorithm = DistanceCheckpoint(features_dir="./") + >>> biometric_algorithm.enroll(sample) + + It's possible to use it as with the :py:func:`bob.pipelines.mixins.mix_me_up` + + >>> from bob.pipelines.mixins import mix_me_up + >>> biometric_algorithm = mix_me_up([BioAlgCheckpointMixin], Distance)(features_dir="./") + >>> biometric_algorithm.enroll(sample) + + """ + + def __init__(self, features_dir, **kwargs): + super().__init__(features_dir=features_dir, **kwargs) + self.biometric_reference_dir = os.path.join( + features_dir, "biometric_references" + ) + self.score_dir = os.path.join(features_dir, "scores") + + def save(self, sample, path): + return bob.io.base.save(sample.data, path, create_directories=True) + + def _enroll_sample_set(self, sampleset): + """ + Enroll a sample set with checkpointing + """ + + # Amending `models` directory + path = os.path.join( + self.biometric_reference_dir, str(sampleset.key) + self.extension + ) + if path is None or not os.path.isfile(path): + + # Enrolling the sample + enrolled_sample = super()._enroll_sample_set(sampleset) + + # saving the new sample + self.save(enrolled_sample, path) + + # Dealaying it. + # This seems inefficient, but it's crucial for large datasets + delayed_enrolled_sample = DelayedSample( + functools.partial(bob.io.base.load, path), enrolled_sample + ) + + else: + # If sample already there, just load + delayed_enrolled_sample = self.load(path) + delayed_enrolled_sample.key = sampleset.key + + return delayed_enrolled_sample + + def _score_sample_set(self, sampleset, biometric_references): + """Given a sampleset for probing, compute the scores and retures a sample set with the scores + """ + # Computing score + scored_sample_set = super()._score_sample_set(sampleset, biometric_references) + + for s in scored_sample_set: + # Checkpointing score + path = os.path.join(self.score_dir, str(s.path) + ".txt") + bob.io.base.create_directories_safe(os.path.dirname(path)) + + delayed_scored_sample = save_scores_four_columns(path, s) + s.samples = [delayed_scored_sample] + + return scored_sample_set + + +class BioAlgDaskMixin: + def enroll_samples(self, biometric_reference_features): + biometric_references = biometric_reference_features.map_partitions( + self.enroll_samples + ) + return biometric_references + + def score_samples(self, probe_features, biometric_references): + + # TODO: Here, we are sending all computed biometric references to all + # probes. It would be more efficient if only the models related to each + # probe are sent to the probing split. An option would be to use caching + # and allow the ``score`` function above to load the required data from + # the disk, directly. A second option would be to generate named delays + # for each model and then associate them here. + + all_references = dask.delayed(list)(biometric_references) + + scores = probe_features.map_partitions(self.score_samples, all_references) + return scores diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py index 9b32ae55f66ca498b1d0cbf88692b842fc9e6406..9ac00cbb0a63ae2762d936439a438726987fa30b 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py @@ -8,11 +8,8 @@ This file contains simple processing blocks meant to be used for bob.bio experiments """ -import dask.bag -import dask.delayed -from bob.pipelines.sample import samplesets_to_samples - import logging + logger = logging.getLogger(__name__) @@ -20,94 +17,63 @@ def biometric_pipeline( background_model_samples, biometric_reference_samples, probe_samples, - extractor, + transformer, biometric_algorithm, ): - logger.info(f" >> Vanilla Biometrics: Training background model with pipeline {extractor}") + logger.info( + f" >> Vanilla Biometrics: Training background model with pipeline {transformer}" + ) - ## Training background model (fit will return even if samples is ``None``, - ## in which case we suppose the algorithm is not trainable in any way) - extractor = train_background_model(background_model_samples, extractor) + # Training background model (fit will return even if samples is ``None``, + # in which case we suppose the algorithm is not trainable in any way) + transformer = train_background_model(background_model_samples, transformer) - logger.info(f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}") + logger.info( + f" >> Creating biometric references with the biometric algorithm {biometric_algorithm}" + ) - ## Create biometric samples + # Create biometric samples biometric_references = create_biometric_reference( - biometric_reference_samples, extractor, biometric_algorithm + biometric_reference_samples, transformer, biometric_algorithm ) - logger.info(f" >> Computing scores with the biometric algorithm {biometric_algorithm}") + logger.info( + f" >> Computing scores with the biometric algorithm {biometric_algorithm}" + ) - ## Scores all probes + # Scores all probes return compute_scores( - probe_samples, biometric_references, extractor, biometric_algorithm + probe_samples, biometric_references, transformer, biometric_algorithm ) -def train_background_model(background_model_samples, extractor): - - X, y = samplesets_to_samples(background_model_samples) - - extractor = extractor.fit(X, y=y) - - return extractor +def train_background_model(background_model_samples, transformer): + # background_model_samples is a list of Samples + transformer = transformer.fit(background_model_samples) + return transformer def create_biometric_reference( - biometric_reference_samples, extractor, biometric_algorithm + biometric_reference_samples, transformer, biometric_algorithm ): - biometric_reference_features = extractor.transform(biometric_reference_samples) - - # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE - # I COULD WORK OUT A MIXIN FOR IT, BUT THE USER WOULD NEED TO SET THAT SOMETWHERE - # HERE'S ALREADY SETTING ONCE (for the pipeline) AND I DON'T WANT TO MAKE - # THEM SET IN ANOTHER PLACE - # LET'S DISCUSS THIS ON SLACK - - if isinstance(biometric_reference_features, dask.bag.core.Bag): - # ASSUMING THAT IS A DASK THING IS COMMING - biometric_references = biometric_reference_features.map_partitions( - biometric_algorithm._enroll_samples - ) - else: - biometric_references = biometric_algorithm._enroll_samples( - biometric_reference_features - ) + biometric_reference_features = transformer.transform(biometric_reference_samples) + + biometric_references = biometric_algorithm.enroll_samples( + biometric_reference_features + ) # models is a list of Samples return biometric_references -def compute_scores(probe_samples, biometric_references, extractor, biometric_algorithm): +def compute_scores( + probe_samples, biometric_references, transformer, biometric_algorithm +): # probes is a list of SampleSets - probe_features = extractor.transform(probe_samples) - - # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE - # I COULD WORK OUT A MIXIN FOR IT, BUT THE USER WOULD NEED TO SET THAT SOMETWHERE - # HERE'S ALREADY SETTING ONCE (for the pipeline) AND I DON'T WANT TO MAKE - # THEM SET IN ANOTHER PLACE - # LET'S DISCUSS THIS ON SLACK - if isinstance(probe_features, dask.bag.core.Bag): - # ASSUMING THAT IS A DASK THING IS COMMING - - ## TODO: Here, we are sending all computed biometric references to all - ## probes. It would be more efficient if only the models related to each - ## probe are sent to the probing split. An option would be to use caching - ## and allow the ``score`` function above to load the required data from - ## the disk, directly. A second option would be to generate named delays - ## for each model and then associate them here. - - all_references = dask.delayed(list)(biometric_references) - - scores = probe_features.map_partitions( - biometric_algorithm._score_samples, all_references, extractor - ) - - else: - scores = biometric_algorithm._score_samples( - probe_features, biometric_references, extractor - ) + probe_features = transformer.transform(probe_samples) + + scores = biometric_algorithm.score_samples(probe_features, biometric_references) # scores is a list of Samples return scores diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index b58204d2bf9fcbe4a51386fb993ee7352dbb6dd1..c0cba089f0027d35280a2466c309bd7a670ca679 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -5,26 +5,26 @@ """Executes biometric pipeline""" -import os -import functools - import click -from bob.extension.scripts.click_helper import verbosity_option, ResourceOption, ConfigCommand -from bob.pipelines.sample import DelayedSample, Sample +from bob.extension.scripts.click_helper import ( + verbosity_option, + ResourceOption, + ConfigCommand, +) import logging -logger = logging.getLogger(__name__) +logger = logging.getLogger(__name__) EPILOG = """\b - + Command line examples\n ----------------------- - + $ bob pipelines vanilla-biometrics my_experiment.py -vv @@ -34,7 +34,7 @@ EPILOG = """\b >>> extractor = my_extractor() \n >>> algorithm = my_algorithm() \n >>> checkpoints = EXPLAIN CHECKPOINTING \n - + \b @@ -54,15 +54,14 @@ TODO: Work out this help @click.command( - entry_point_group='bob.pipelines.config', cls=ConfigCommand, - epilog=EPILOG, + entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG, ) @click.option( - "--extractor", + "--transformer", "-e", required=True, cls=ResourceOption, - entry_point_group="bob.bio.extractor", # This should be linked to bob.bio.base + entry_point_group="bob.pipelines.transformer", help="Feature extraction algorithm", ) @click.option( @@ -92,6 +91,7 @@ TODO: Work out this help @click.option( "--group", "-g", + "groups", type=click.Choice(["dev", "eval"]), multiple=True, default=("dev",), @@ -106,13 +106,7 @@ TODO: Work out this help ) @verbosity_option(cls=ResourceOption) def vanilla_biometrics( - extractor, - algorithm, - database, - dask_client, - group, - output, - **kwargs + transformer, algorithm, database, dask_client, groups, output, **kwargs ): """Runs the simplest biometrics pipeline. @@ -121,7 +115,7 @@ def vanilla_biometrics( Sub-pipeline 1:\n --------------- - + Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps: \b @@ -133,13 +127,13 @@ def vanilla_biometrics( Sub-pipeline 2:\n --------------- - + Creation of biometric references: This is a standard step in a biometric pipelines. Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following: \b raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference - + Note that this sub-pipeline depends on the previous one @@ -150,99 +144,59 @@ def vanilla_biometrics( Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms. - + raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score Note that this sub-pipeline depends on the two previous ones """ - - # Always turn-on the checkpointing - checkpointing = True - # Chooses the pipeline to run from bob.bio.base.pipelines.vanilla_biometrics.pipeline import biometric_pipeline + import dask.bag + import itertools + import os + from bob.pipelines.sample import Sample if not os.path.exists(output): - os.makedirs(output) - - for g in group: + os.makedirs(output, exist_ok=True) + + for group in groups: + + with open(os.path.join(output, f"scores-{group}"), "w") as f: + biometric_references = database.references(group=group) - with open(os.path.join(output,f"scores-{g}"), "w") as f: - biometric_references = database.references(group=g) - - logger.info(f"Running vanilla biometrics for group {g}") + logger.info(f"Running vanilla biometrics for group {group}") result = biometric_pipeline( database.background_model_samples(), biometric_references, - database.probes(group=g), - extractor, + database.probes(group=group), + transformer, algorithm, - ) - - import dask.bag + if isinstance(result, dask.bag.core.Bag): if dask_client is not None: result = result.compute(scheduler=dask_client) else: - logger.warning("`dask_client` not set. Your pipeline will run locally") + logger.warning( + "`dask_client` not set. Your pipeline will run locally" + ) result = result.compute() # Flatting out the list - import itertools - result = list(itertools.chain(*result)) + result = itertools.chain(*result) for probe in result: for sample in probe.samples: - + if isinstance(sample, Sample): - line = "{0} {1} {2} {3}\n".format(sample.key, probe.key, probe.path, sample.data) + line = "{0} {1} {2} {3}\n".format( + sample.key, probe.key, probe.path, sample.data + ) f.write(line) - elif isinstance(sample, DelayedSample): - lines = sample.load().readlines() - f.writelines(lines) else: raise TypeError("The output of the pipeline is not writeble") if dask_client is not None: dask_client.shutdown() - - -@click.command() -@click.argument("output-file") -@verbosity_option(cls=ResourceOption) -def vanilla_biometrics_template(output_file, **kwargs): - """ - Generate an template configuration file for the vanilla biometrics pipeline - """ - - import bob.io.base - - path = os.path.dirname(output_file) - logger.info(f"Writting template configuration file in {path}") - bob.io.base.create_directories_safe(path) - - template = ''' - -# Client dask. Look at https://gitlab.idiap.ch/bob/bob.pipelines/tree/master/bob/pipelines/config/distributed to find proper dask clients. -# You don't need to necessary instantiate a dask client yourself. You can simply pipe those config files - -dask_client = my_client - - -preprocessor = my_preprocessor - - -extractor = my_extractor - - -algorithm = my_algorithm - - -database = my_database - -''' - - open(output_file, "w").write(template) diff --git a/bob/bio/base/test/test_transformers.py b/bob/bio/base/test/test_transformers.py index 97d290756620dc2e1689c7aadf72c96a0f6071b5..5fa1b098d1f4d92475d570fa098917bd6a5ee6ac 100644 --- a/bob/bio/base/test/test_transformers.py +++ b/bob/bio/base/test/test_transformers.py @@ -8,9 +8,9 @@ import numpy import tempfile from sklearn.utils.validation import check_is_fitted -from bob.bio.base.transformers import Linearize, SampleLinearize, CheckpointSampleLinearize +from bob.pipelines.transformers import Linearize, SampleLinearize, CheckpointSampleLinearize def test_linearize_processor(): - + ## Test the transformer only transformer = Linearize() X = numpy.zeros(shape=(10,10)) @@ -24,7 +24,7 @@ def test_linearize_processor(): X_tr = transformer.transform([sample]) assert X_tr[0].data.shape == (100,) - ## Test checkpoint + ## Test checkpoint with tempfile.TemporaryDirectory() as d: transformer = CheckpointSampleLinearize(features_dir=d) X_tr = transformer.transform([sample]) @@ -32,9 +32,9 @@ def test_linearize_processor(): assert os.path.exists(os.path.join(d, "1.h5")) -from bob.bio.base.transformers import SamplePCA, CheckpointSamplePCA +from bob.pipelines.transformers import SamplePCA, CheckpointSamplePCA def test_pca_processor(): - + ## Test wrapped in to a Sample X = numpy.random.rand(100,10) samples = [Sample(data, key=str(i)) for i, data in enumerate(X)] @@ -43,17 +43,17 @@ def test_pca_processor(): n_components = 2 estimator = SamplePCA(n_components=n_components) estimator = estimator.fit(samples) - + # https://scikit-learn.org/stable/modules/generated/sklearn.utils.validation.check_is_fitted.html assert check_is_fitted(estimator, "n_components_") is None - + # transform samples_tr = estimator.transform(samples) assert samples_tr[0].data.shape == (n_components,) - + ## Test Checkpoining - with tempfile.TemporaryDirectory() as d: + with tempfile.TemporaryDirectory() as d: model_path = os.path.join(d, "model.pkl") estimator = CheckpointSamplePCA(n_components=n_components, features_dir=d, model_path=model_path) @@ -61,8 +61,8 @@ def test_pca_processor(): estimator = estimator.fit(samples) assert check_is_fitted(estimator, "n_components_") is None assert os.path.exists(model_path) - + # transform samples_tr = estimator.transform(samples) - assert samples_tr[0].data.shape == (n_components,) + assert samples_tr[0].data.shape == (n_components,) assert os.path.exists(os.path.join(d, samples_tr[0].key+".h5")) diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py index 60e0ac4027c6e7535496db6a4b88b02d284974df..75d9354860926ed739c382eb590252dc87ecf0ed 100644 --- a/bob/bio/base/test/test_vanilla_biometrics.py +++ b/bob/bio/base/test/test_vanilla_biometrics.py @@ -19,10 +19,10 @@ class DummyDatabase: self.one_d = one_d - def _create_random_1dsamples(self, n_samples, offset, dim): + def _create_random_1dsamples(self, n_samples, offset, dim): return [ Sample(numpy.random.rand(dim), key=i) for i in range(offset,offset+n_samples) ] - def _create_random_2dsamples(self, n_samples, offset, dim): + def _create_random_2dsamples(self, n_samples, offset, dim): return [ Sample(numpy.random.rand(dim, dim), key=i) for i in range(offset,offset+n_samples) ] def _create_random_sample_set(self, n_sample_set=10, n_samples=2): @@ -30,7 +30,7 @@ class DummyDatabase: # Just generate random samples sample_set = [SampleSet(samples=[], key=i) for i in range(n_sample_set)] - offset = 0 + offset = 0 for s in sample_set: if self.one_d: s.samples = self._create_random_1dsamples(n_samples, offset, self.dim) @@ -61,22 +61,22 @@ class DummyDatabase: from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance import itertools def test_distance_comparator(): - + n_references = 10 dim = 10 n_probes = 10 database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True) - references = database.references() + references = database.references() probes = database.probes() - + comparator = Distance() - references = comparator._enroll_samples(references) + references = comparator.enroll_samples(references) assert len(references)== n_references assert references[0].data.shape == (dim,) probes = database.probes() - scores = comparator._score_samples(probes, references) + scores = comparator.score_samples(probes, references) scores = list(itertools.chain(*scores)) - + assert len(scores) == n_probes*n_references assert len(scores[0].samples)==n_references diff --git a/bob/bio/base/transformers/__init__.py b/bob/bio/base/transformers/__init__.py deleted file mode 100644 index 729af155f8f3f72cd1e3805f4a6efe40e2787dd7..0000000000000000000000000000000000000000 --- a/bob/bio/base/transformers/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .linearize import Linearize, SampleLinearize, CheckpointSampleLinearize -from .pca import CheckpointSamplePCA, SamplePCA diff --git a/bob/bio/base/transformers/linearize.py b/bob/bio/base/transformers/linearize.py deleted file mode 100644 index 03d079567018361610c8573031f6bbffd9bfbacb..0000000000000000000000000000000000000000 --- a/bob/bio/base/transformers/linearize.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> - - -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from sklearn.base import TransformerMixin -from sklearn.utils.validation import check_array -import numpy - - -class Linearize(TransformerMixin): - """Extracts features by simply concatenating all elements of the data into one long vector. - - If a ``dtype`` is specified in the contructor, it is assured that the resulting - """ - - def transform(self, X): - - """__call__(data) -> data - - Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired. - - Parameters: - ----------- - - data : :py:class:`numpy.ndarray` - The preprocessed data to be transformed into one vector. - - Returns: - -------- - - data : 1D :py:class:`numpy.ndarray` - The extracted feature vector, of the desired ``dtype`` (if specified). - """ - - X = check_array(X, allow_nd=True) - - if X.ndim == 2: - return numpy.reshape(X, X.size) - else: - # Reshaping n-dimensional arrays assuming that the - # first axis corresponds to the number of samples - return numpy.reshape(X, (X.shape[0], numpy.prod(X.shape[1:]))) - - -class SampleLinearize(SampleMixin, Linearize): - pass - - -class CheckpointSampleLinearize(CheckpointMixin, SampleMixin, Linearize): - pass diff --git a/bob/bio/base/transformers/pca.py b/bob/bio/base/transformers/pca.py deleted file mode 100644 index 1412188f66aa8b111d39b741093486cf5bf780c8..0000000000000000000000000000000000000000 --- a/bob/bio/base/transformers/pca.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> - - -""" -TODO: This should be deployed in bob.pipelines -""" - -from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from sklearn.base import TransformerMixin -from sklearn.decomposition import PCA -import numpy - -""" -Wraps the -""" - - -class SamplePCA(SampleMixin, PCA): - """ - Enables SAMPLE handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html - """ - - pass - - -class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA): - """ - Enables SAMPLE and CHECKPOINTIN handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html - """ - - pass