#!/usr/bin/env python # vim: set fileencoding=utf-8 : """Re-usable blocks for legacy bob.bio.base algorithms""" import os import functools from collections import defaultdict from bob.bio.base import utils from .abstract_classes import ( BioAlgorithm, Database, ) from bob.io.base import HDF5File from bob.pipelines import DelayedSample, SampleSet, Sample import logging import copy import pickle from .score_writers import FourColumnsScoreWriter from bob.bio.base.algorithm import Algorithm logger = logging.getLogger("bob.bio.base") def _biofile_to_delayed_sample(biofile, database): return DelayedSample( load=functools.partial( biofile.load, database.original_directory, database.original_extension, ), subject=str(biofile.client_id), key=biofile.path, path=biofile.path, annotations=database.annotations(biofile), ) class DatabaseConnector(Database): """Wraps a bob.bio.base database and generates conforming samples This connector allows wrapping generic bob.bio.base datasets and generate samples that conform to the specifications of biometric pipelines defined in this package. Parameters ---------- database : object An instantiated version of a bob.bio.base.Database object protocol : str The name of the protocol to generate samples from. To be plugged at :py:method:`bob.db.base.Database.objects`. allow_scoring_with_all_biometric_references: bool If True will allow the scoring function to be performed in one shot with multiple probes. This optimization is useful when all probes needs to be compared with all biometric references AND your scoring function allows this broadcast computation. annotation_type: str Type of the annotations that the database provide. Allowed types are: `eyes-center` and `bounding-box` fixed_positions: dict In case database contains one single annotation for all samples. This is useful for registered databases. """ def __init__( self, database, allow_scoring_with_all_biometric_references=True, annotation_type="eyes-center", fixed_positions=None, ** kwargs, ): self.database = database self.allow_scoring_with_all_biometric_references = allow_scoring_with_all_biometric_references self.annotation_type = annotation_type self.fixed_positions=fixed_positions def background_model_samples(self): """Returns :py:class:`Sample`'s to train a background model (group ``world``). Returns ------- samples : list List of samples conforming the pipeline API for background model training. See, e.g., :py:func:`.pipelines.first`. """ objects = self.database.training_files() return [_biofile_to_delayed_sample(k, self.database) for k in objects] def references(self, group="dev"): """Returns :py:class:`Reference`'s to enroll biometric references Parameters ---------- group : :py:class:`str`, optional A ``group`` to be plugged at :py:meth:`bob.db.base.Database.objects` Returns ------- references : list List of samples conforming the pipeline API for the creation of biometric references. See, e.g., :py:func:`.pipelines.first`. """ retval = [] for m in self.database.model_ids(groups=group): objects = self.database.enroll_files(group=group, model_id=m) retval.append( SampleSet( [_biofile_to_delayed_sample(k, self.database) for k in objects], key=str(m), path=str(m), subject=str(objects[0].client_id), ) ) return retval def probes(self, group): """Returns :py:class:`Probe`'s to score biometric references Parameters ---------- group : str A ``group`` to be plugged at :py:meth:`bob.db.base.Database.objects` Returns ------- probes : list List of samples conforming the pipeline API for the creation of biometric probes. See, e.g., :py:func:`.pipelines.first`. """ probes = dict() for m in self.database.model_ids(groups=group): # Getting all the probe objects from a particular biometric # reference objects = self.database.probe_files(group=group, model_id=m) # Creating probe samples for o in objects: if o.id not in probes: probes[o.id] = SampleSet( [_biofile_to_delayed_sample(o, self.database)], key=str(o.client_id), path=o.path, subject=str(o.client_id), references=[str(m)], ) else: probes[o.id].references.append(str(m)) return list(probes.values()) class BioAlgorithmLegacy(BioAlgorithm): """Biometric Algorithm that handlesy :any:`bob.bio.base.algorithm.Algorithm` In this design, :any:`BioAlgorithm.enroll` maps to :any:`bob.bio.base.algorithm.Algorithm.enroll` and :any:`BioAlgorithm.score` maps :any:`bob.bio.base.algorithm.Algorithm.score` .. note:: Legacy algorithms are always checkpointable Parameters ---------- instance: object An instance of :any:`bob.bio.base.algorithm.Algorithm` Example ------- >>> from bob.bio.base.pipelines.vanilla_biometrics import BioAlgorithmLegacy >>> from bob.bio.base.algorithm import PCA >>> biometric_algorithm = BioAlgorithmLegacy(PCA()) """ def __init__( self, instance, base_dir, force=False, projector_file=None, **kwargs, ): super().__init__(**kwargs) if not isinstance(instance, Algorithm): raise ValueError( f"Only `bob.bio.base.Algorithm` supported, not `{instance}`" ) logger.info(f"Using `bob.bio.base` legacy algorithm {instance}") if instance.requires_projector_training and projector_file is None: raise ValueError(f"{instance} requires a `projector_file` to be set") self.instance = instance self.is_background_model_loaded = False self.projector_file = projector_file self.biometric_reference_dir = os.path.join(base_dir, "biometric_references") self._biometric_reference_extension = ".hdf5" self.score_dir = os.path.join(base_dir, "scores") self.force = force def load_legacy_background_model(self): # Loading background model if not self.is_background_model_loaded: self.instance.load_projector(self.projector_file) self.is_background_model_loaded = True def enroll(self, enroll_features, **kwargs): self.load_legacy_background_model() return self.instance.enroll(enroll_features) def score(self, biometric_reference, data, **kwargs): self.load_legacy_background_model() scores = self.instance.score(biometric_reference, data) if isinstance(scores, list): scores = self.instance.probe_fusion_function(scores) return scores def score_multiple_biometric_references(self, biometric_references, data, **kwargs): scores = self.instance.score_for_multiple_models(biometric_references, data) return scores def write_biometric_reference(self, sample, path): os.makedirs(os.path.dirname(path), exist_ok=True) self.instance.write_model(sample.data, path) def _enroll_sample_set(self, sampleset): """ Enroll a sample set with checkpointing """ # Amending `models` directory path = os.path.join( self.biometric_reference_dir, str(sampleset.key) + self._biometric_reference_extension, ) if self.force or not os.path.exists(path): enrolled_sample = super()._enroll_sample_set(sampleset) # saving the new sample self.write_biometric_reference(enrolled_sample, path) delayed_enrolled_sample = DelayedSample( functools.partial(self.instance.read_model, path), parent=sampleset ) return delayed_enrolled_sample def write_scores(self, samples, path): os.makedirs(os.path.dirname(path), exist_ok=True) open(path, "wb").write(pickle.dumps(samples)) def _score_sample_set( self, sampleset, biometric_references, allow_scoring_with_all_biometric_references=False, ): def _load(path): return pickle.loads(open(path, "rb").read()) def _make_name(sampleset, biometric_references): # The score file name is composed by sampleset key and the # first 3 biometric_references name = str(sampleset.key) suffix = "_".join([str(s.key) for s in biometric_references[0:3]]) return name + suffix path = os.path.join( self.score_dir, _make_name(sampleset, biometric_references) + ".pkl" ) if self.force or not os.path.exists(path): # Computing score scored_sample_set = super()._score_sample_set( sampleset, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) self.write_scores(scored_sample_set.samples, path) scored_sample_set = SampleSet( [DelayedSample(functools.partial(_load, path), parent=sampleset)], parent=sampleset, ) else: scored_sample_set = SampleSet(_load(path), parent=sampleset) return scored_sample_set