From de935b20e182c9ae53d365f4d02cb5a3b9a356f2 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Thu, 19 Mar 2020 18:49:36 +0100
Subject: [PATCH] Handling Legacy

---
 .../base/config/baselines/lda_atnt_legacy.py  |  83 +++++++++++
 bob/bio/base/mixins/legacy.py                 | 141 +++++++++++++++++-
 .../pipelines/vanilla_biometrics/pipeline.py  |   1 -
 3 files changed, 222 insertions(+), 3 deletions(-)
 create mode 100644 bob/bio/base/config/baselines/lda_atnt_legacy.py

diff --git a/bob/bio/base/config/baselines/lda_atnt_legacy.py b/bob/bio/base/config/baselines/lda_atnt_legacy.py
new file mode 100644
index 00000000..76e859cd
--- /dev/null
+++ b/bob/bio/base/config/baselines/lda_atnt_legacy.py
@@ -0,0 +1,83 @@
+# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor
+
+import bob.db.atnt
+from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector
+
+database = DatabaseConnector(bob.db.atnt.Database(), protocol="Default")
+
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.decomposition import PCA
+
+from bob.pipelines.mixins import CheckpointMixin, SampleMixin
+from bob.bio.base.mixins import CheckpointSampleLinearize
+from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin
+
+
+class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA):
+    """
+    Enables SAMPLE and CHECKPOINTIN handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
+    """
+
+    pass
+
+
+#### PREPROCESSOR LEGACY ###
+import functools
+
+# Cropping
+CROPPED_IMAGE_HEIGHT = 80
+CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5
+
+# eye positions for frontal images
+RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1)
+LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3)
+
+
+# RANDOM EYES POSITIONS
+# I JUST MADE UP THESE NUMBERS
+FIXED_RIGHT_EYE_POS = (30, 30)
+FIXED_LEFT_EYE_POS = (20, 50)
+import bob.bio.face
+
+face_cropper = functools.partial(
+    bob.bio.face.preprocessor.FaceCrop,
+    cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),
+    cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS},
+    fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS},
+)
+
+from bob.pipelines.mixins import mix_me_up
+preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin)
+
+#### ALGORITHM LEGACY #####
+
+algorithm = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90)
+
+from bob.pipelines.mixins import dask_it
+
+extractor = Pipeline(
+    steps=[
+        ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")),
+        ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")),
+        (
+            "2",
+            LegacyAlgorithmMixin(
+                callable=algorithm, features_dir="./example/extractor2", model_path="./example/"
+            ),
+        ),
+    ]
+)
+# extractor = dask_it(extractor)
+
+from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import (
+    Distance,
+    BiometricAlgorithmCheckpointMixin,
+)
+
+
+class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance):
+    pass
+
+
+algorithm = CheckpointDistance(features_dir="./example/")
+# algorithm = Distance()
diff --git a/bob/bio/base/mixins/legacy.py b/bob/bio/base/mixins/legacy.py
index 3c3cc470..6f34efc6 100644
--- a/bob/bio/base/mixins/legacy.py
+++ b/bob/bio/base/mixins/legacy.py
@@ -8,9 +8,33 @@ Mixins to handle legacy components
 """
 
 from bob.pipelines.mixins import CheckpointMixin, SampleMixin
-from sklearn.base import TransformerMixin
+from sklearn.base import TransformerMixin, BaseEstimator
 from sklearn.utils.validation import check_array
+from bob.pipelines.sample import Sample, DelayedSample, SampleSet
+import numpy
+import logging
+import os
+logger = logging.getLogger(__name__)
 
+def scikit_to_bob_supervised(X, Y):
+    """
+    Given an input data ready for :py:method:`scikit.estimator.BaseEstimator.fit`,
+    convert for :py:class:`bob.bio.base.algorithm.Algorithm.train_projector` when 
+    `performs_projection=True`
+    """
+
+    # TODO: THIS IS VERY INNEFICI
+    logger.warning("INEFFICIENCY WARNING. HERE YOU ARE USING A HACK FOR USING BOB ALGORITHMS IN SCIKIT LEARN PIPELINES. \
+                    WE RECOMMEND YOU TO PORT THIS ALGORITHM. DON'T BE LAZY :-)")
+
+    bob_output = dict()
+    for x,y in zip(X, Y):
+        if y in bob_output:
+            bob_output[y] = numpy.vstack((bob_output[y], x.data))
+        else:
+            bob_output[y] = x.data
+    
+    return [bob_output[k] for k in bob_output]
 
 class LegacyProcessorMixin(TransformerMixin):
     """Class that wraps :py:class:`bob.bio.base.preprocessor.Preprocessor` and
@@ -41,7 +65,8 @@ class LegacyProcessorMixin(TransformerMixin):
 
     """
 
-    def __init__(self, callable=None):
+    def __init__(self, callable=None, **kwargs):
+        super().__init__(**kwargs)
         self.callable = callable
         self.instance = None
 
@@ -56,3 +81,115 @@ class LegacyProcessorMixin(TransformerMixin):
         if self.instance is None:
             self.instance = self.callable()
         return [self.instance(x) for x in X]
+
+
+from bob.pipelines.mixins import CheckpointMixin, SampleMixin
+class LegacyAlgorithmMixin(CheckpointMixin,SampleMixin,BaseEstimator):
+    """Class that wraps :py:class:`bob.bio.base.algorithm.Algoritm` and
+    
+    LegacyAlgorithmrMixin.fit maps :py:method:`bob.bio.base.algorithm.Algoritm.train_projector`
+
+    LegacyAlgorithmrMixin.transform maps :py:method:`bob.bio.base.algorithm.Algoritm.project`
+
+    THIS HAS TO BE SAMPABLE AND CHECKPOINTABLE
+
+
+    Example
+    -------
+
+        Wrapping preprocessor with functtools
+        >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin
+        >>> from bob.bio.face.preprocessor import FaceCrop
+        >>> import functools
+        >>> transformer = LegacyProcessorMixin(functools.partial(FaceCrop, cropped_image_size=(10,10)))
+
+    Example
+    -------
+        Wrapping extractor 
+        >>> from bob.bio.base.mixins.legacy import LegacyProcessorMixin
+        >>> from bob.bio.face.extractor import Linearize
+        >>> transformer = LegacyProcessorMixin(Linearize)
+
+
+    Parameters
+    ----------
+      callable: callable
+         Calleble function that instantiates the scikit estimator
+
+    """
+
+    def __init__(self, callable=None, **kwargs):
+        super().__init__(**kwargs)
+        self.callable = callable
+        self.instance = None
+        self.projector_file = os.path.join(self.model_path, "Projector.hdf5")
+
+    def fit(self, X, y=None, **fit_params):
+        
+        if os.path.exists(self.projector_file):
+            return self
+
+        # Instantiates and do the "real" fit
+        if self.instance is None:
+            self.instance = self.callable()
+
+        if self.instance.performs_projection:
+            # Organizing the date by class
+            bob_X = scikit_to_bob_supervised(X, y)
+            self.instance.train_projector(bob_X, self.projector_file)
+        else:
+            self.instance.train_projector(X, **fit_params)
+
+        # Deleting the instance, so it's picklable
+        self.instance = None
+
+        return self
+
+    def transform(self, X):
+
+        if not isinstance(X, list):
+            raise ValueError("It's expected a list, not %s" % type(X))
+
+        # Instantiates and do the "real" transform
+        if self.instance is None:
+            self.instance = self.callable()
+        self.instance.load_projector(self.projector_file)
+
+        import ipdb; ipdb.set_trace()
+
+        if isinstance(X[0], Sample) or isinstance(X[0], DelayedSample):
+            #samples = []
+            for s in X:
+                projected_data = self.instance.project(s.data)
+        
+            #raw_X = [s.data for s in X]
+        elif isinstance(X[0], SampleSet):
+
+            sample_sets = []
+            for sset in X:
+
+                samples = []
+                for sample in sset.samples:
+
+                    # Project
+                    projected_data = self.instance.project(sample.data)
+
+                    #Checkpointing
+                    path = self.make_path(sample)
+                    self.instance.write_feature(path)
+
+                    samples.append(DelayedSample())
+
+
+                    pass
+                    #bob.io.base.save(projected_data)
+
+
+
+
+            #raw_X = [x.data for s in X for x in s.samples]
+        else:
+            raise ValueError("Type not allowed %s" % type(X[0]))
+
+
+        return self.instance.project(raw_X)
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
index 06d891a8..5ce6e673 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
@@ -54,7 +54,6 @@ def train_background_model(background_model_samples, extractor):
 def create_biometric_reference(
     biometric_reference_samples, extractor, biometric_algorithm
 ):
-    
     biometric_reference_features = extractor.transform(biometric_reference_samples)
 
     # TODO: I KNOW THIS LOOKS UGLY, BUT THIS `MAP_PARTITIONS` HAS TO APPEAR SOMEWHERE
-- 
GitLab