diff --git a/bob/bio/base/config/baselines/lda_atnt_legacy.py b/bob/bio/base/config/baselines/lda_atnt_legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..76e859cdf72c7a1a84bc5099718c1d086c58106c --- /dev/null +++ b/bob/bio/base/config/baselines/lda_atnt_legacy.py @@ -0,0 +1,83 @@ +# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor + +import bob.db.atnt +from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector + +database = DatabaseConnector(bob.db.atnt.Database(), protocol="Default") + +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.decomposition import PCA + +from bob.pipelines.mixins import CheckpointMixin, SampleMixin +from bob.bio.base.mixins import CheckpointSampleLinearize +from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin + + +class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA): + """ + Enables SAMPLE and CHECKPOINTIN handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + """ + + pass + + +#### PREPROCESSOR LEGACY ### +import functools + +# Cropping +CROPPED_IMAGE_HEIGHT = 80 +CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 + +# eye positions for frontal images +RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) +LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) + + +# RANDOM EYES POSITIONS +# I JUST MADE UP THESE NUMBERS +FIXED_RIGHT_EYE_POS = (30, 30) +FIXED_LEFT_EYE_POS = (20, 50) +import bob.bio.face + +face_cropper = functools.partial( + bob.bio.face.preprocessor.FaceCrop, + cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), + cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, + fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, +) + +from bob.pipelines.mixins import mix_me_up +preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) + +#### ALGORITHM LEGACY ##### + +algorithm = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90) + +from bob.pipelines.mixins import dask_it + +extractor = Pipeline( + steps=[ + ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")), + ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")), + ( + "2", + LegacyAlgorithmMixin( + callable=algorithm, features_dir="./example/extractor2", model_path="./example/" + ), + ), + ] +) +# extractor = dask_it(extractor) + +from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( + Distance, + BiometricAlgorithmCheckpointMixin, +) + + +class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): + pass + + +algorithm = CheckpointDistance(features_dir="./example/") +# algorithm = Distance() diff --git a/bob/bio/base/mixins/legacy.py b/bob/bio/base/mixins/legacy.py index 3c3cc47002bd84a0eb265c1a24d1b7625ab88876..6f34efc65fc92e38e63811e4f50fbe1fd01cacdb 100644 --- a/bob/bio/base/mixins/legacy.py +++ b/bob/bio/base/mixins/legacy.py @@ -8,9 +8,33 @@ Mixins to handle legacy components """ from bob.pipelines.mixins import CheckpointMixin, SampleMixin -from sklearn.base import TransformerMixin +from sklearn.base import TransformerMixin, BaseEstimator from sklearn.utils.validation import check_array +from bob.pipelines.sample import Sample, DelayedSample, SampleSet +import numpy +import logging +import os +logger = logging.getLogger(__name__) +def scikit_to_bob_supervised(X, Y): + """ + Given an input data ready for :py:method:`scikit.estimator.BaseEstimator.fit`, + convert for :py:class:`bob.bio.base.algorithm.Algorithm.train_projector` when + `performs_projection=True` + """ + + # TODO: THIS IS VERY INNEFICI + logger.warning("INEFFICIENCY WARNING. HERE YOU ARE USING A HACK FOR USING BOB ALGORITHMS IN SCIKIT LEARN PIPELINES. \ + WE RECOMMEND YOU TO PORT THIS ALGORITHM. DON'T BE LAZY :-)") + + bob_output = dict() + for x,y in zip(X, Y): + if y in bob_output: + bob_output[y] = numpy.vstack((bob_output[y], x.data)) + else: + bob_output[y] = x.data + + return [bob_output[k] for k in bob_output] class LegacyProcessorMixin(TransformerMixin): """Class that wraps :py:class:`bob.bio.base.preprocessor.Preprocessor` and @@ -41,7 +65,8 @@ class LegacyProcessorMixin(TransformerMixin): """ - def __init__(self, callable=None): + def __init__(self, callable=None, **kwargs): + super().__init__(**kwargs) self.callable = callable self.instance = None @@ -56,3 +81,115 @@ class LegacyProcessorMixin(TransformerMixin): if self.instance is None: self.instance = self.callable() return [self.instance(x) for x in X] + + +from bob.pipelines.mixins import CheckpointMixin, SampleMixin +class LegacyAlgorithmMixin(CheckpointMixin,SampleMixin,BaseEstimator): + """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm` and + + LegacyAlgorithmrMixin.fit maps :py:method:`bob.bio.base.algorithm.Algoritm.train_projector` + + LegacyAlgorithmrMixin.transform maps :py:method:`bob.bio.base.algorithm.Algoritm.project` + + THIS HAS TO BE SAMPABLE AND CHECKPOINTABLE + + + Example + ------- + + Wrapping preprocessor with functtools + >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin + >>> from bob.bio.face.preprocessor import FaceCrop + >>> import functools + >>> transformer = LegacyProcessorMixin(functools.partial(FaceCrop, cropped_image_size=(10,10))) + + Example + ------- + Wrapping extractor + >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin + >>> from bob.bio.face.extractor import Linearize + >>> transformer = LegacyProcessorMixin(Linearize) + + + Parameters + ---------- + callable: callable + Calleble function that instantiates the scikit estimator + + """ + + def __init__(self, callable=None, **kwargs): + super().__init__(**kwargs) + self.callable = callable + self.instance = None + self.projector_file = os.path.join(self.model_path, "Projector.hdf5") + + def fit(self, X, y=None, **fit_params): + + if os.path.exists(self.projector_file): + return self + + # Instantiates and do the "real" fit + if self.instance is None: + self.instance = self.callable() + + if self.instance.performs_projection: + # Organizing the date by class + bob_X = scikit_to_bob_supervised(X, y) + self.instance.train_projector(bob_X, self.projector_file) + else: + self.instance.train_projector(X, **fit_params) + + # Deleting the instance, so it's picklable + self.instance = None + + return self + + def transform(self, X): + + if not isinstance(X, list): + raise ValueError("It's expected a list, not %s" % type(X)) + + # Instantiates and do the "real" transform + if self.instance is None: + self.instance = self.callable() + self.instance.load_projector(self.projector_file) + + import ipdb; ipdb.set_trace() + + if isinstance(X[0], Sample) or isinstance(X[0], DelayedSample): + #samples = [] + for s in X: + projected_data = self.instance.project(s.data) + + #raw_X = [s.data for s in X] + elif isinstance(X[0], SampleSet): + + sample_sets = [] + for sset in X: + + samples = [] + for sample in sset.samples: + + # Project + projected_data = self.instance.project(sample.data) + + #Checkpointing + path = self.make_path(sample) + self.instance.write_feature(path) + + samples.append(DelayedSample()) + + + pass + #bob.io.base.save(projected_data) + + + + + #raw_X = [x.data for s in X for x in s.samples] + else: + raise ValueError("Type not allowed %s" % type(X[0])) + + + return self.instance.project(raw_X) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py index 06d891a883bd64c8ebf82937c5b726cbc405dbe3..5ce6e673c5026bbaa84dff1266781febab51f35c 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py @@ -54,7 +54,6 @@ def train_background_model(background_model_samples, extractor): def create_biometric_reference( biometric_reference_samples, extractor, biometric_algorithm ): - biometric_reference_features = extractor.transform(biometric_reference_samples) # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE