diff --git a/bob/bio/base/config/baselines/lda_atnt_legacy.py b/bob/bio/base/config/baselines/lda_atnt_legacy.py index a60c2cd696755687fb71d10036cfa83ac42f1d8b..aecb53fb4ba978cd9edbb07ead7c75e85bab1c50 100644 --- a/bob/bio/base/config/baselines/lda_atnt_legacy.py +++ b/bob/bio/base/config/baselines/lda_atnt_legacy.py @@ -76,9 +76,6 @@ from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import ( ) -class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): - pass - - +class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass algorithm = CheckpointDistance(features_dir="./example/") # algorithm = Distance() diff --git a/bob/bio/base/config/baselines/lda_atnt_legacy_all_legacy.py b/bob/bio/base/config/baselines/lda_atnt_legacy_all_legacy.py new file mode 100644 index 0000000000000000000000000000000000000000..1e12c1a971c0042e85c80124dab950357cba33b8 --- /dev/null +++ b/bob/bio/base/config/baselines/lda_atnt_legacy_all_legacy.py @@ -0,0 +1,74 @@ +# from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector, AlgorithmAdaptor + +import bob.db.atnt +from bob.bio.base.pipelines.vanilla_biometrics.legacy import DatabaseConnector + +database = DatabaseConnector(bob.db.atnt.Database(), protocol="Default") + +from sklearn.pipeline import Pipeline, make_pipeline +from sklearn.decomposition import PCA + +from bob.pipelines.mixins import CheckpointMixin, SampleMixin +from bob.bio.base.mixins import CheckpointSampleLinearize +from bob.bio.base.mixins.legacy import LegacyProcessorMixin, LegacyAlgorithmMixin +from bob.bio.base.pipelines.vanilla_biometrics.legacy import LegacyBiometricAlgorithm + + +class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA): + """ + Enables SAMPLE and CHECKPOINTIN handling for https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html + """ + + pass + + +#### PREPROCESSOR LEGACY ### +import functools + +# Cropping +CROPPED_IMAGE_HEIGHT = 80 +CROPPED_IMAGE_WIDTH = CROPPED_IMAGE_HEIGHT * 4 // 5 + +# eye positions for frontal images +RIGHT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 - 1) +LEFT_EYE_POS = (CROPPED_IMAGE_HEIGHT // 5, CROPPED_IMAGE_WIDTH // 4 * 3) + + +# RANDOM EYES POSITIONS +# I JUST MADE UP THESE NUMBERS +FIXED_RIGHT_EYE_POS = (30, 30) +FIXED_LEFT_EYE_POS = (20, 50) +import bob.bio.face + +face_cropper = functools.partial( + bob.bio.face.preprocessor.FaceCrop, + cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH), + cropped_positions={"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}, + fixed_positions={"leye": FIXED_LEFT_EYE_POS, "reye": FIXED_RIGHT_EYE_POS}, +) + +from bob.pipelines.mixins import mix_me_up +preprocessor = mix_me_up((CheckpointMixin, SampleMixin), LegacyProcessorMixin) + +#### ALGORITHM LEGACY ##### + +algorithm_estimator = functools.partial(bob.bio.base.algorithm.LDA, use_pinv=True, pca_subspace_dimension=0.90) + +from bob.pipelines.mixins import dask_it + +extractor = Pipeline( + steps=[ + ("0", preprocessor(callable=face_cropper, features_dir="./example/extractor0")), + ("1", CheckpointSampleLinearize(features_dir="./example/extractor1")), + ( + "2", + LegacyAlgorithmMixin( + callable=algorithm_estimator, features_dir="./example/extractor2", model_path="./example/" + ), + ), + ] +) + +extractor = dask_it(extractor) + +algorithm = LegacyBiometricAlgorithm(callable=algorithm_estimator, features_dir="./example/") diff --git a/bob/bio/base/config/baselines/pca_atnt.py b/bob/bio/base/config/baselines/pca_atnt.py index 9ce0446474190933d3c8e1b7e9970b3255532f0c..68c055124ee31e3dad9b9f027884ccdc38808c34 100644 --- a/bob/bio/base/config/baselines/pca_atnt.py +++ b/bob/bio/base/config/baselines/pca_atnt.py @@ -20,7 +20,7 @@ class CheckpointSamplePCA(CheckpointMixin, SampleMixin, PCA): from bob.pipelines.mixins import dask_it extractor = Pipeline(steps=[('0',CheckpointSampleLinearize(features_dir="./example/extractor0")), ('1',CheckpointSamplePCA(features_dir="./example/extractor1", model_path="./example/pca.pkl"))]) -extractor = dask_it(extractor) +#extractor = dask_it(extractor) from bob.bio.base.pipelines.vanilla_biometrics.biometric_algorithm import Distance, BiometricAlgorithmCheckpointMixin class CheckpointDistance(BiometricAlgorithmCheckpointMixin, Distance): pass diff --git a/bob/bio/base/mixins/legacy.py b/bob/bio/base/mixins/legacy.py index 63e03b90fbcb55a535b817b948cb7187d46fdd83..1114d2d5ce0b2cec560b1c2e0badc7344555c1cc 100644 --- a/bob/bio/base/mixins/legacy.py +++ b/bob/bio/base/mixins/legacy.py @@ -119,7 +119,7 @@ class LegacyAlgorithmMixin(CheckpointMixin,SampleMixin,BaseEstimator): def __init__(self, callable=None, **kwargs): super().__init__(**kwargs) self.callable = callable - self.instance = None + self.instance = None self.projector_file = None @@ -157,7 +157,7 @@ class LegacyAlgorithmMixin(CheckpointMixin,SampleMixin,BaseEstimator): f = bob.io.base.HDF5File(path, "w") self.instance.write_feature(projected_data, f) - reader = self._get_reader(self.instance.read_feature, path) + reader = get_reader(self.instance.read_feature, path) return DelayedSample(reader, parent=sample) @@ -190,13 +190,12 @@ class LegacyAlgorithmMixin(CheckpointMixin,SampleMixin,BaseEstimator): raise ValueError("Type not allowed %s" % type(X[0])) - def _get_reader(self, reader, path): - if(is_picklable(self.instance.read_feature)): - return functools.partial(reader, path) - else: - logger.warning( - f"The method {reader} is not picklable. Shiping its unbounded method to `DelayedSample`." - ) - reader = reader.__func__ # The reader object might not be picklable - return functools.partial(reader, None, path) - +def get_reader(reader, path): + if(is_picklable(reader)): + return functools.partial(reader, path) + else: + logger.warning( + f"The method {reader} is not picklable. Shiping its unbounded method to `DelayedSample`." + ) + reader = reader.__func__ # The reader object might not be picklable + return functools.partial(reader, None, path) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py index 893562b6faeaebcf1d6550728230844972150083..b6c9c589bc8cb408210c281efa486235d1bf2b38 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py @@ -132,6 +132,7 @@ class BiometricAlgorithm(object): # We should add an agregator function here so we can properlly agregate samples from # a sampleset either after or before scoring. # To be honest, this should be the default behaviour + retval = [] for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): # Creating one sample per comparison subprobe_scores = [] @@ -140,11 +141,13 @@ class BiometricAlgorithm(object): subprobe_scores.append( Sample(self.score(ref.data, s, extractor), parent=ref) ) + # Creating one sampleset per probe subprobe = SampleSet(subprobe_scores, parent=sampleset) subprobe.subprobe_id = subprobe_id + retval.append(subprobe) - return subprobe + return retval def score(self, biometric_reference, data, extractor=None, **kwargs): @@ -235,12 +238,13 @@ class BiometricAlgorithmCheckpointMixin(CheckpointMixin): # Computing score scored_sample_set = super()._score_sample_set(sampleset, biometric_references, extractor) - # Checkpointing score - path = os.path.join(self.score_dir, str(sampleset.path) + ".txt") - bob.io.base.create_directories_safe(os.path.dirname(path)) + for s in scored_sample_set: + # Checkpointing score + path = os.path.join(self.score_dir, str(s.path) + ".txt") + bob.io.base.create_directories_safe(os.path.dirname(path)) - delayed_scored_sample = save_scores_four_columns(path, scored_sample_set) - scored_sample_set.samples = [delayed_scored_sample] + delayed_scored_sample = save_scores_four_columns(path, s) + s.samples = [delayed_scored_sample] return scored_sample_set @@ -304,6 +308,7 @@ class Distance(BiometricAlgorithm): return self.factor * self.distance_function(model, probe) + def save_scores_four_columns(path, probe): """ Write scores in the four columns format @@ -315,4 +320,3 @@ def save_scores_four_columns(path, probe): f.write(line) return DelayedSample(functools.partial(open, path)) - diff --git a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py index 791111b96ab5cb946e2e9def98c017aa27348358..267b2292edaccead53d617e1cc3337366292af14 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py @@ -12,9 +12,11 @@ from bob.pipelines.sample import DelayedSample, SampleSet, Sample import numpy import logging import dask - import sys import pickle +from bob.bio.base.mixins.legacy import get_reader +from .biometric_algorithm import save_scores_four_columns + logger = logging.getLogger("bob.bio.base") @@ -189,264 +191,113 @@ class DatabaseConnector: return list(probes.values()) -class AlgorithmAdaptor: - """Describes a biometric model based on :py:class:`bob.bio.base.algorithm.Algorithm`'s +from .biometric_algorithm import BiometricAlgorithm +class LegacyBiometricAlgorithm(BiometricAlgorithm): + """Biometric Algorithm that handles legacy :py:class:`bob.bio.base.algorithm.Algorithm` - The model can be fitted (optionally). Otherwise, it can only execute - biometric model enrollement, via ``enroll()`` and scoring, with - ``score()``. - Parameters - ---------- + :py:method:`BiometricAlgorithm.enroll` maps to :py:method:`bob.bio.base.algorithm.Algoritm.enroll` - algorithm : object - An object that can be initialized by default and posseses the - following attributes and methods: - - * attribute ``requires_projector_training``: indicating if the - model is fittable or not - * method ``train_projector(samples, path)``: receives a list of - objects produced by the equivalent ``Sample.data`` object, fed - **after** sample loading by the equivalent pipeline, and records - the model to an on-disk file - * method ``load_projector(path)``: loads the model state from a file - * method ``project(sample)``: projects the data to an embedding - from a single sample - * method ``enroll(samples)``: creates a scorable biometric - reference from a set of input samples - * method ``score(model, probe)``: scores a single probe, given the - input model, which can be obtained by a simple - ``project(sample)`` - - If the algorithm cannot be initialized by default, pass the result - of :py:func:`functools.partial` instead. - - path : string - A path leading to a place where to save the fitted model or, in - case this model is not fittable (``not is_fitable == False``), then - name of the model to load for running prediction and scoring. + :py:method:`BiometricAlgorithm.score` maps :py:method:`bob.bio.base.algorithm.Algoritm.score` - """ - def __init__(self, algorithm): - self.algorithm = algorithm - self.extension = ".hdf5" + THIS CODE HAS TO BE CHECKPOINTABLE IN A SPECIAL WAY - def fit(self, samplesets, checkpoint): - """Fits this model, if it is fittable + Example + ------- - Parameters - ---------- - - samplesets : list - A list of :py:class:`SampleSet`s to be used for fitting this - model - - checkpoint : str - If provided, must the path leading to a location where this - model should be saved at (complete path without extension) - - currently, it needs to be provided because of existing - serialization requirements (see bob/bob.io.base#106), but - checkpointing will still work as expected. - - - Returns - ------- - model : str - A path leading to the fitted model - - """ - - self.path = checkpoint + self.extension - if not os.path.exists(self.path): # needs training - model = self.algorithm() - bob.io.base.create_directories_safe(os.path.dirname(self.path)) - if model.requires_projector_training: - alldata = [ - sample.data - for sampleset in samplesets - for sample in sampleset.samples - ] - model.train_projector(alldata, self.path) - - return self.path - - def enroll(self, references, path, checkpoint, *args, **kwargs): - """Runs prediction on multiple input samples - - This method is optimized to deal with multiple reference biometric - samples at once, organized in partitions - - - Parameters - ---------- - - references : list - A list of :py:class:`SampleSet` objects to be used for - creating biometric references. The sets must be identified - with a unique id and a path, for eventual checkpointing. + Parameters + ---------- + callable: callable + Calleble function that instantiates the scikit estimator - path : str - Path pointing to stored model on disk + """ - checkpoint : str, None - If passed and not ``None``, then it is considered to be the - path of a directory containing possible cached values for each - of the references in this experiment. If that is the case, the - values are loaded from there and not recomputed. + def __init__(self, callable=None, features_dir=None, **kwargs): + super().__init__(**kwargs) + self.callable = callable + self.instance = None + self.projector_file = None + self.features_dir = features_dir + self.biometric_reference_dir = os.path.join(self.features_dir, "biometric_references") + self.score_dir = os.path.join(self.features_dir, "scores") + self.extension=".hdf5" - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored - Returns - ------- + def _enroll_sample_set(self, sampleset): + # Enroll + return self.enroll(sampleset) - references : list - A list of :py:class:`.samples.Reference` objects that can be - used in scoring + def _score_sample_set(self, sampleset, biometric_references, extractor): + """Given a sampleset for probing, compute the scores and retures a sample set with the scores """ - class _CachedModel: - def __init__(self, algorithm, path): - self.model = algorithm() - self.loaded = False - self.path = path - - def load(self): - if not self.loaded: - self.model.load_projector(self.path) - self.loaded = True - - def enroll(self, k): - self.load() - if self.model.requires_projector_training: - return self.model.enroll( - numpy.array([self.model.project(s.data) for s in k.samples]) - ) - else: - return self.model.enroll(numpy.array([s.data for s in k.samples])) - - def write_enrolled(self, k, path): - self.model.write_model(k, path) - - model = _CachedModel(self.algorithm, path) - - retval = [] - for k in references: - if checkpoint is not None: - candidate = os.path.join(os.path.join(checkpoint, k.path + ".hdf5")) - if not os.path.exists(candidate): - # create new checkpoint - bob.io.base.create_directories_safe(os.path.dirname(candidate)) - enrolled = model.enroll(k) - model.model.write_model(enrolled, candidate) - retval.append( - DelayedSample( - functools.partial(model.model.read_model, candidate), parent=k - ) - ) - else: - # compute on-the-fly - retval.append(Sample(model.enroll(k), parent=k)) - return retval - - def score(self, probes, references, path, checkpoint, *args, **kwargs): - """Scores a new sample against multiple (potential) references + # Stacking the samples from a sampleset + data = [s for s in sampleset.samples] - Parameters - ---------- + # Compute scores for each sample inside of the sample set + # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) + # We should add an agregator function here so we can properlly agregate samples from + # a sampleset either after or before scoring. + # To be honest, this should be the default behaviour + retval = [] + for subprobe_id, s in enumerate(sampleset.samples): + # Creating one sample per comparison + subprobe_scores = [] + + for ref in [r for r in biometric_references if r.key in sampleset.references]: + #subprobe_scores.append(self.score(ref.data, s, extractor)) + subprobe_scores.append( + Sample(self.score(ref.data, s.data, extractor), parent=ref) + ) - probes : list - A list of :py:class:`SampleSet` objects to be used for - scoring the input references + # Creating one sampleset per probe + subprobe = SampleSet(subprobe_scores, parent=sampleset) + subprobe.subprobe_id = subprobe_id - references : list - A list of :py:class:`Sample` objects to be used for - scoring the input probes, must have an ``id`` attribute that - will be used to cross-reference which probes need to be scored. + # Checkpointing score MANDATORY FOR LEGACY + path = os.path.join(self.score_dir, str(subprobe.path) + ".txt") + bob.io.base.create_directories_safe(os.path.dirname(path)) - path : str - Path pointing to stored model on disk + delayed_scored_sample = save_scores_four_columns(path, subprobe) + subprobe.samples = [delayed_scored_sample] - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored + retval.append(subprobe) + return retval - Returns - ------- - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. + def enroll(self, enroll_features, **kwargs): - """ + if not isinstance(enroll_features, SampleSet): + raise ValueError(f"`enroll_features` should be the type SampleSet, not {enroll_features}") - model = self.algorithm() - model.load_projector(path) - - score_sample_sets = [] - - # TODO: temporary optimization - optimize = True - references_stacked = None - ############### - - for i,p in enumerate(probes): - if model.requires_projector_training: - data = [model.project(s.data) for s in p.samples] - else: - data = [s.data for s in p.samples] - - for subprobe_id, (s, parent) in enumerate(zip(data, p.samples)): - - # each sub-probe in the probe needs to be checked - subprobe_scores = [] - - # Temporary optimization - if optimize: - # TODO: THIS IS JUST FOR CITER PROJECT - # GIVE ME A BREAK AND LOOK SOMEWHERE ELSE - if references_stacked is None: - references_stacked = numpy.vstack([r.data for r in references if r.id in p.references]) - from scipy.spatial.distance import cdist - scores = -1*cdist(references_stacked, s.reshape(1,-1), 'cosine') - for ref, score in zip([r for r in references if r.id in p.references], scores): - subprobe_scores.append(Sample(score[0], parent=ref)) - else: - def _compute_score(ref, probe_sample): - return Sample(model.score(ref.data, probe_sample), parent=ref) + # Instantiates and do the "real" fit + if self.instance is None: + self.instance = self.callable() - # Parellelizing the scoring - #subprobe_scores_delayed = [] - for ref in [r for r in references if r.id in p.references]: - subprobe_scores.append(_compute_score(ref, s)) + path = os.path.join(self.biometric_reference_dir, str(enroll_features.key) + self.extension) + if path is None or not os.path.isfile(path): - # Delaying the computation of a single score. - subprobe = SampleSet(subprobe_scores, parent=parent) - subprobe.subprobe_id = subprobe_id + # Enrolling + data = [s.data for s in enroll_features.samples] + model = self.instance.enroll(data) - # Chekpointing if necessary - if checkpoint is not None: - candidate = os.path.join(os.path.join(checkpoint, parent.path + ".txt")) - bob.io.base.create_directories_safe(os.path.dirname(candidate)) - delayed_samples_subprobe = _save_scores_four_columns(candidate, subprobe) - subprobe.samples = [delayed_samples_subprobe] + # Checkpointing + bob.io.base.create_directories_safe(os.path.dirname(path)) + hdf5 = bob.io.base.HDF5File(path, "w") + self.instance.write_model(model, hdf5) - score_sample_sets.append(subprobe) + reader = get_reader(self.instance.read_model, path) + return DelayedSample(reader, parent=enroll_features) - return score_sample_sets + def score(self, model, probe, extractor=None, **kwargs): -def _save_scores_four_columns(path, probe): - - with open(path, "w") as f: - for biometric_reference in probe.samples: - line = "{0} {1} {2} {3}\n".format(biometric_reference.subject, probe.subject, probe.path, biometric_reference.data) - f.write(line) + # Instantiates and do the "real" fit + if self.instance is None: + self.instance = self.callable() - return DelayedSample(functools.partial(open, path)) \ No newline at end of file + return self.instance.score(model, probe) diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index 744df43001101859b11868b5d9a006c90350b780..f8e17b31f5ec48d7e7ae3c480dcc8d8787d84b9f 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -189,6 +189,9 @@ def vanilla_biometrics( logger.warning("`dask_client` not set. Your pipeline will run locally") result = result.compute() + # Flatting out the list + import itertools + result = list(itertools.chain(*result)) for probe in result: for sample in probe.samples: