diff --git a/bob/bio/base/pipelines/vanilla_biometrics/annotated_legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/annotated_legacy.py deleted file mode 100644 index 91f8d4877bfdcce451b4495ffec265dc61cf104a..0000000000000000000000000000000000000000 --- a/bob/bio/base/pipelines/vanilla_biometrics/annotated_legacy.py +++ /dev/null @@ -1,328 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - - -"""Re-usable blocks for legacy bob.bio.base algorithms""" - - -import os -import copy -import functools - -import bob.io.base - - -from .legacy import DatabaseConnector -from .blocks import SampleLoader -from bob.pipelines.sample.sample import SampleSet, DelayedSample, Sample -from bob.pipelines.utils import is_picklable - -import logging -logger = logging.getLogger("bob.bio.base") - - -class DatabaseConnectorAnnotated(DatabaseConnector): - """Wraps a bob.bio.base database and generates conforming samples for datasets - that has annotations - - This connector allows wrapping generic bob.bio.base datasets and generate - samples that conform to the specifications of biometric pipelines defined - in this package. - - - Parameters - ---------- - - database : object - An instantiated version of a bob.bio.base.Database object - - protocol : str - The name of the protocol to generate samples from. - To be plugged at :py:method:`bob.db.base.Database.objects`. - - """ - - def __init__(self, database, protocol): - super(DatabaseConnectorAnnotated, self).__init__(database, protocol) - - - def background_model_samples(self): - """Returns :py:class:`Sample`'s to train a background model (group - ``world``). - - - Returns - ------- - - samples : list - List of samples conforming the pipeline API for background - model training. See, e.g., :py:func:`.pipelines.first`. - - """ - - # TODO: This should be organized by client - retval = [] - - objects = self.database.objects(protocol=self.protocol, groups="world") - - return [ - SampleSet( - [ - DelayedSample( - load=functools.partial( - k.load, - self.database.original_directory, - self.database.original_extension, - ), - id=k.id, - path=k.path, - annotations=self.database.annotations(k) - ) - ] - ) - for k in objects - ] - - def references(self, group="dev"): - """Returns :py:class:`Reference`'s to enroll biometric references - - - Parameters - ---------- - - group : :py:class:`str`, optional - A ``group`` to be plugged at - :py:meth:`bob.db.base.Database.objects` - - - Returns - ------- - - references : list - List of samples conforming the pipeline API for the creation of - biometric references. See, e.g., :py:func:`.pipelines.first`. - - """ - - retval = [] - - for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): - - objects = self.database.objects( - protocol=self.protocol, - groups=group, - model_ids=(m,), - purposes="enroll", - ) - - retval.append( - SampleSet( - [ - DelayedSample( - load=functools.partial( - k.load, - self.database.original_directory, - self.database.original_extension, - ), - id=k.id, - path=k.path, - annotations=self.database.annotations(k) - ) - for k in objects - ], - id=m, - path=str(m), - subject=objects[0].client_id, - ) - ) - - return retval - - def probes(self, group): - """Returns :py:class:`Probe`'s to score biometric references - - - Parameters - ---------- - - group : str - A ``group`` to be plugged at - :py:meth:`bob.db.base.Database.objects` - - - Returns - ------- - - probes : list - List of samples conforming the pipeline API for the creation of - biometric probes. See, e.g., :py:func:`.pipelines.first`. - - """ - - probes = dict() - - for m in self.database.model_ids_with_protocol(protocol=self.protocol, groups=group): - - # Getting all the probe objects from a particular biometric - # reference - objects = self.database.objects( - protocol=self.protocol, - groups=group, - model_ids=(m,), - purposes="probe", - ) - - # Creating probe samples - for o in objects: - if o.id not in probes: - probes[o.id] = SampleSet( - [ - DelayedSample( - load=functools.partial( - o.load, - self.database.original_directory, - self.database.original_extension, - ), - id=o.id, - path=o.path, - annotations=self.database.annotations(o) - ) - ], - id=o.id, - path=o.path, - subject=o.client_id, - references=[m], - ) - else: - probes[o.id].references.append(m) - - return list(probes.values()) - - -class SampleLoaderAnnotated(SampleLoader): - """Adaptor for loading, preprocessing and feature extracting samples that uses annotations - - This adaptor class wraps around sample: - - .. code-block:: text - - [loading [-> preprocessing [-> extraction]]] - - The input sample object must obbey the following (minimal) API: - - * attribute ``id``: Contains an unique (string-fiable) identifier for - processed samples - * attribute ``data``: Contains the data for this sample - - Optional checkpointing is also implemented for each of the states, - independently. You may check-point just the preprocessing, feature - extraction or both. - - - Parameters - ---------- - - pipeline : :py:class:`list` of (:py:class:`str`, callable) - A list of doubles in which the first entry are names of each processing - step in the pipeline and second entry must be default-constructible - :py:class:`bob.bio.base.preprocessor.Preprocessor` or - :py:class:`bob.bio.base.preprocessor.Extractor` in any order. Each - of these objects must be a python type, that can be instantiated and - used through its ``__call__()`` interface to process a single entry of - a sample. For python types that you may want to plug-in, but do not - offer a default constructor that you like, pass the result of - :py:func:`functools.partial` instead. - - """ - - def __init__(self, pipeline): - super(SampleLoaderAnnotated, self).__init__(pipeline) - - - def _handle_step(self, sset, func, checkpoint): - """Handles a single step in the pipeline, with optional checkpointing - - Parameters - ---------- - - sset : SampleSet - The original sample set to be processed (delayed or pre-loaded) - - func : callable - The processing function to call for processing **each** sample in - the set, if needs be - - checkpoint : str, None - An optional string that may point to a directory that will be used - for checkpointing the processing phase in question - - - Returns - ------- - - r : SampleSet - The prototype processed sample. If no checkpointing required, this - will be of type :py:class:`Sample`. Otherwise, it will be a - :py:class:`DelayedSample` - - """ - - if checkpoint is not None: - samples = [] # processed samples - for s in sset.samples: - # there can be a checkpoint for the data to be processed - candidate = os.path.join(checkpoint, s.path + ".hdf5") - if not os.path.exists(candidate): - - # TODO: Fix this on bob.bio.base - try: - # preprocessing is required, and checkpointing, do it now - data = func(s.data, annotations=s.annotations) - except: - data = func(s.data) - - # notice this can be called in parallel w/o failing - bob.io.base.create_directories_safe( - os.path.dirname(candidate) - ) - # bob.bio.base standard interface for preprocessor - # has a read/write_data methods - writer = ( - getattr(func, "write_data") - if hasattr(func, "write_data") - else getattr(func, "write_feature") - ) - writer(data, candidate) - - # because we are checkpointing, we return a DelayedSample - # instead of normal (preloaded) sample. This allows the next - # phase to avoid loading it would it be unnecessary (e.g. next - # phase is already check-pointed) - reader = ( - getattr(func, "read_data") - if hasattr(func, "read_data") - else getattr(func, "read_feature") - ) - if is_picklable(reader): - samples.append( - DelayedSample( - functools.partial(reader, candidate), parent=s - ) - ) - else: - logger.warning(f"The method {reader} is not picklable. Shiping its unbounded method to `DelayedSample`.") - reader = reader.__func__ # The reader object might not be picklable - - samples.append( - DelayedSample( - functools.partial(reader, None, candidate), parent=s - ) - ) - - else: - # if checkpointing is not required, load the data and preprocess it - # as we would normally do - samples = [Sample(func(s.data), parent=s) for s in sset.samples] - - r = SampleSet(samples, parent=sset) - return r diff --git a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py index b6c9c589bc8cb408210c281efa486235d1bf2b38..735e7699d587464125fd5e8eb6dae25a81592fad 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/biometric_algorithm.py @@ -52,7 +52,7 @@ class BiometricAlgorithm(object): retval = [] for k in biometric_references: - # compute on-the-fly + # compute on-the-fly retval.append(self._enroll_sample_set(k)) return retval @@ -65,7 +65,6 @@ class BiometricAlgorithm(object): # Enroll return Sample(self.enroll(data), parent=sampleset) - def enroll(self, data, extractor=None, **kwargs): """ It handles the creation of ONE biometric reference for the vanilla ppipeline @@ -80,8 +79,9 @@ class BiometricAlgorithm(object): raise NotImplemented("Please, implement me") - - def _score_samples(self, probes, biometric_references, extractor=None, *args, **kwargs): + def _score_samples( + self, probes, biometric_references, extractor=None, *args, **kwargs + ): """Scores a new sample against multiple (potential) references Parameters @@ -119,17 +119,16 @@ class BiometricAlgorithm(object): retval.append(self._score_sample_set(p, biometric_references, extractor)) return retval - def _score_sample_set(self, sampleset, biometric_references, extractor): """Given a sampleset for probing, compute the scores and retures a sample set with the scores """ - # Stacking the samples from a sampleset + # Stacking the samples from a sampleset data = [s.data for s in sampleset.samples] # Compute scores for each sample inside of the sample set # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from + # We should add an agregator function here so we can properlly agregate samples from # a sampleset either after or before scoring. # To be honest, this should be the default behaviour retval = [] @@ -137,7 +136,9 @@ class BiometricAlgorithm(object): # Creating one sample per comparison subprobe_scores = [] - for ref in [r for r in biometric_references if r.key in sampleset.references]: + for ref in [ + r for r in biometric_references if r.key in sampleset.references + ]: subprobe_scores.append( Sample(self.score(ref.data, s, extractor), parent=ref) ) @@ -149,8 +150,7 @@ class BiometricAlgorithm(object): return retval - - def score(self, biometric_reference, data, extractor=None, **kwargs): + def score(self, biometric_reference, data, extractor=None, **kwargs): """It handles the score computation for one sample Parameters @@ -175,6 +175,8 @@ class BiometricAlgorithm(object): from bob.pipelines.mixins import CheckpointMixin + + class BiometricAlgorithmCheckpointMixin(CheckpointMixin): """Mixing used to checkpoint Enrolled and Scoring samples. @@ -196,21 +198,23 @@ class BiometricAlgorithmCheckpointMixin(CheckpointMixin): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.biometric_reference_dir = os.path.join(self.features_dir, "biometric_references") + self.biometric_reference_dir = os.path.join( + self.features_dir, "biometric_references" + ) self.score_dir = os.path.join(self.features_dir, "scores") - def save(self, sample, path): return bob.io.base.save(sample.data, path, create_directories=True) - def _enroll_sample_set(self, sampleset): """ Enroll a sample set with checkpointing """ # Amending `models` directory - path = os.path.join(self.biometric_reference_dir, str(sampleset.key) + self.extension) + path = os.path.join( + self.biometric_reference_dir, str(sampleset.key) + self.extension + ) if path is None or not os.path.isfile(path): # Enrolling the sample @@ -221,22 +225,24 @@ class BiometricAlgorithmCheckpointMixin(CheckpointMixin): # Dealaying it. # This seems inefficient, but it's crucial for large datasets - delayed_enrolled_sample = DelayedSample(functools.partial(bob.io.base.load, path), enrolled_sample) + delayed_enrolled_sample = DelayedSample( + functools.partial(bob.io.base.load, path), enrolled_sample + ) else: # If sample already there, just load delayed_enrolled_sample = self.load(path) delayed_enrolled_sample.key = sampleset.key - return delayed_enrolled_sample - def _score_sample_set(self, sampleset, biometric_references, extractor): """Given a sampleset for probing, compute the scores and retures a sample set with the scores """ # Computing score - scored_sample_set = super()._score_sample_set(sampleset, biometric_references, extractor) + scored_sample_set = super()._score_sample_set( + sampleset, biometric_references, extractor + ) for s in scored_sample_set: # Checkpointing score @@ -251,15 +257,15 @@ class BiometricAlgorithmCheckpointMixin(CheckpointMixin): import scipy.spatial.distance from sklearn.utils.validation import check_array -class Distance(BiometricAlgorithm): - def __init__(self,distance_function = scipy.spatial.distance.euclidean,factor=-1): + +class Distance(BiometricAlgorithm): + def __init__(self, distance_function=scipy.spatial.distance.euclidean, factor=-1): self.distance_function = distance_function self.factor = factor - - def enroll(self, enroll_features, **kwargs): + def enroll(self, enroll_features, **kwargs): """enroll(enroll_features) -> model Enrolls the model by storing all given input vectors. @@ -281,7 +287,6 @@ class Distance(BiometricAlgorithm): return numpy.mean(enroll_features, axis=0) - def score(self, model, probe, extractor=None, **kwargs): """score(model, probe) -> float @@ -308,15 +313,16 @@ class Distance(BiometricAlgorithm): return self.factor * self.distance_function(model, probe) - def save_scores_four_columns(path, probe): """ Write scores in the four columns format """ - + with open(path, "w") as f: for biometric_reference in probe.samples: - line = "{0} {1} {2} {3}\n".format(biometric_reference.key, probe.key, probe.path, biometric_reference.data) + line = "{0} {1} {2} {3}\n".format( + biometric_reference.key, probe.key, probe.path, biometric_reference.data + ) f.write(line) return DelayedSample(functools.partial(open, path)) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/blocks.py b/bob/bio/base/pipelines/vanilla_biometrics/blocks.py deleted file mode 100644 index 88390c13323b78ed7d69cddd5c9abcb7833b3fbd..0000000000000000000000000000000000000000 --- a/bob/bio/base/pipelines/vanilla_biometrics/blocks.py +++ /dev/null @@ -1,466 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : - -import copy -import functools -import numpy -import os -import bob.io.base -from bob.pipelines.sample.sample import DelayedSample, SampleSet, Sample -from bob.pipelines.utils import is_picklable - -"""Re-usable blocks for legacy bob.bio.base algorithms""" -import logging -logger = logging.getLogger("bob.bio.base") - - -class SampleLoader: - """Adaptor for loading, preprocessing and feature extracting samples - - This adaptor class wraps around sample: - - .. code-block:: text - - [loading [-> preprocessing [-> extraction]]] - - The input sample object must obbey the following (minimal) API: - - * attribute ``id``: Contains an unique (string-fiable) identifier for - processed samples - * attribute ``data``: Contains the data for this sample - - Optional checkpointing is also implemented for each of the states, - independently. You may check-point just the preprocessing, feature - extraction or both. - - - Parameters - ---------- - - pipeline : :py:class:`list` of (:py:class:`str`, callable) - A list of doubles in which the first entry are names of each processing - step in the pipeline and second entry must be default-constructible - :py:class:`bob.bio.base.preprocessor.Preprocessor` or - :py:class:`bob.bio.base.preprocessor.Extractor` in any order. Each - of these objects must be a python type, that can be instantiated and - used through its ``__call__()`` interface to process a single entry of - a sample. For python types that you may want to plug-in, but do not - offer a default constructor that you like, pass the result of - :py:func:`functools.partial` instead. - - """ - - def __init__(self, pipeline): - self.pipeline = copy.deepcopy(pipeline) - - def _handle_step(self, sset, func, checkpoint): - """Handles a single step in the pipeline, with optional checkpointing - - Parameters - ---------- - - sset : SampleSet - The original sample set to be processed (delayed or pre-loaded) - - func : callable - The processing function to call for processing **each** sample in - the set, if needs be - - checkpoint : str, None - An optional string that may point to a directory that will be used - for checkpointing the processing phase in question - - - Returns - ------- - - r : SampleSet - The prototype processed sample. If no checkpointing required, this - will be of type :py:class:`Sample`. Otherwise, it will be a - :py:class:`DelayedSample` - - """ - - if checkpoint is not None: - samples = [] # processed samples - for s in sset.samples: - # there can be a checkpoint for the data to be processed - candidate = os.path.join(checkpoint, s.path + ".hdf5") - if not os.path.exists(candidate): - # preprocessing is required, and checkpointing, do it now - data = func(s.data) - - # notice this can be called in parallel w/o failing - bob.io.base.create_directories_safe(os.path.dirname(candidate)) - # bob.bio.base standard interface for preprocessor - # has a read/write_data methods - writer = ( - getattr(func, "write_data") - if hasattr(func, "write_data") - else getattr(func, "write_feature") - ) - writer(data, candidate) - - # because we are checkpointing, we return a DelayedSample - # instead of normal (preloaded) sample. This allows the next - # phase to avoid loading it would it be unnecessary (e.g. next - # phase is already check-pointed) - reader = ( - getattr(func, "read_data") - if hasattr(func, "read_data") - else getattr(func, "read_feature") - ) - if is_picklable(reader): - samples.append( - DelayedSample( - functools.partial(reader, candidate), parent=s - ) - ) - else: - logger.warning(f"The method {func} is not picklable. Shiping its unbounded method to `DelayedSample`.") - reader = reader.__func__ # The reader object might not be picklable - - samples.append( - DelayedSample( - functools.partial(reader, None, candidate), parent=s - ) - ) - else: - # if checkpointing is not required, load the data and preprocess it - # as we would normally do - samples = [Sample(func(s.data), parent=s) for s in sset.samples] - - r = SampleSet(samples, parent=sset) - return r - - def _handle_sample(self, sset, pipeline): - """Handles a single sampleset through a pipelien - - Parameters - ---------- - - sset : SampleSet - The original sample set to be processed (delayed or pre-loaded) - - pipeline : :py:class:`list` of :py:class:`tuple` - A list of tuples, each comprising of one processing function and - one checkpoint directory (:py:class:`str` or ``None``, to avoid - checkpointing that phase), respectively - - - Returns - ------- - - r : Sample - The processed sample - - """ - - r = sset - for func, checkpoint in pipeline: - r = r if func is None else self._handle_step(r, func, checkpoint) - return r - - def __call__(self, samples, checkpoints): - """Applies the pipeline chaining with optional checkpointing - - Our implementation is optimized to minimize disk I/O to the most. It - yields :py:class:`DelayedSample`'s instead of :py:class:`Sample` if - checkpointing is enabled. - - - Parameters - ---------- - - samples : list - List of :py:class:`SampleSet` to be treated by this pipeline - - checkpoints : dict - A dictionary (with any number of entries) that may contain as many - keys as those defined when you constructed this class with the - pipeline tuple list. Upon execution, the existance of an entry - that defines checkpointing, this phase of the pipeline will be - checkpointed. Notice that you are in the control of checkpointing. - If you miss an intermediary step, it will trigger this loader to - load the relevant sample, even if the next phase is supposed to be - checkpointed. This strategy keeps the implementation as simple as - possible. - - - Returns - ------- - - samplesets : list - Loaded samplesets, after optional preprocessing and extraction - - """ - - pipe = [(v(), checkpoints.get(k)) for k, v in self.pipeline] - return [self._handle_sample(k, pipe) for k in samples] - - -class VanillaBiometricsAlgoritm(object): - """Describes a base biometric algorithm for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_. - - The model can be fitted (optionally). Otherwise, it can only execute - biometric model enrollement, via ``enroll()`` and scoring, with - ``score()``. - - """ - - def __init__(self, performs_projection=False): - self.performs_projection = performs_projection - pass - - def _stack_samples_2_ndarray(self, samplesets, stack_per_sampleset=False): - """ - Stack a set of :py:class:`bob.pipelines.sample.sample.SampleSet` - and convert them to :py:class:`numpy.ndarray` - - Parameters - ---------- - - samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet` - Set of samples to be stackted - - stack_per_sampleset: bool - If true will return a list of :py:class:`numpy.ndarray`, each one for a sample set - - """ - - if stack_per_sampleset: - # TODO: Make it more efficient - all_data = [] - for sampleset in samplesets: - all_data.append( - numpy.array([sample.data for sample in sampleset.samples]) - ) - return all_data - else: - return numpy.array( - [ - sample.data - for sampleset in samplesets - for sample in sampleset.samples - ] - ) - - def fit(self, samplesets, checkpoint): - """ - This method should implement the sub-pipeline 0 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-0`. - - It represents the training of background models that an algorithm may need. - - Parameters - ---------- - - samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet` - Set of samples used to train a background model - - - checkpoint: str - If provided, must the path leading to a location where this - model should be saved at (complete path without extension) - - currently, it needs to be provided because of existing - serialization requirements (see bob/bob.io.base#106), but - checkpointing will still work as expected. - - """ - raise NotImplemented("Please implement me") - - def enroll( - self, references, background_model=None, checkpoint=None, *args, **kwargs - ): - """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`. - - It handles the creation of biometric references - - Parameters - ---------- - references : list - A list of :py:class:`SampleSet` objects to be used for - creating biometric references. The sets must be identified - with a unique id and a path, for eventual checkpointing. - - background_model : - Object containing the background model - - checkpoint : str, None - If passed and not ``None``, then it is considered to be the - path of a directory containing possible cached values for each - of the references in this experiment. If that is the case, the - values are loaded from there and not recomputed. - - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored - - """ - - def _project(k): - return ( - self.project_one_sample(background_model, k.data) - if self.performs_projection - else k.data - ) - - retval = [] - for k in references: - if checkpoint is not None: - candidate = os.path.join(os.path.join(checkpoint, k.path + ".hdf5")) - if not os.path.exists(candidate): - # create new checkpoint - bob.io.base.create_directories_safe(os.path.dirname(candidate)) - data = numpy.vstack([_project(s) for s in k.samples]) - enrolled = self.enroll_one_sample(data) - self.write_biometric_reference(enrolled, candidate) - - retval.append( - DelayedSample( - functools.partial(self.read_biometric_reference, candidate), - parent=k, - ) - ) - else: - # compute on-the-fly - data = _project(k) - retval.append(Sample(model.enroll_one_sample(data), parent=k)) - - return retval - - def write_biometric_reference(self, biometric_reference, filename): - """Writes the enrolled model to the given file. - In this base class implementation: - - - If the given model has a 'save' attribute, it calls ``model.save(bob.io.base.HDF5File(model_file), 'w')``. - In this case, the given model_file might be either a file name or a :py:class:`bob.io.base.HDF5File`. - - Otherwise, it uses :py:func:`bob.io.base.save` to do that. - - If you have a different format, please overwrite this function. - - **Parameters:** - - model : object - A model as returned by the :py:meth:`enroll` function, which should be written. - - model_file : str or :py:class:`bob.io.base.HDF5File` - The file open for writing, or the file name to write to. - """ - import h5py - - with h5py.File(filename, "w") as f: - f.create_dataset("biometric_reference", data=biometric_reference) - - def read_biometric_reference(self, filename): - import h5py - - with h5py.File(filename, "r") as f: - data = f["biometric_reference"].value - return data - - def enroll_one_sample(self, data): - """ - It handles the creation of ONE biometric reference for the vanilla ppipeline - - Parameters - ---------- - - data: - Data used for the creation of ONE BIOMETRIC REFERENCE - - """ - - raise NotImplemented("Please, implement me") - - def project_one_sample(self, data): - """ - If your method performs projection, it runs the projecttion - - Parameters - ---------- - - data: - Data used for the projection of ONE BIOMETRIC REFERENCE - - """ - - raise NotImplemented("Please, implement me") - - def score(self, probes, references, background_model=None, *args, **kwargs): - """Scores a new sample against multiple (potential) references - - Parameters - ---------- - - probes : list - A list of :py:class:`SampleSet` objects to be used for - scoring the input references - - references : list - A list of :py:class:`Sample` objects to be used for - scoring the input probes, must have an ``id`` attribute that - will be used to cross-reference which probes need to be scored. - - background_model : - Path pointing to stored model on disk - - *args, **kwargs : - Extra parameters that can be used to hook-up processing graph - dependencies, but are currently ignored - - - Returns - ------- - - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. - - """ - - def _project(k): - return ( - self.project_one_sample(background_model, k.data) - if self.performs_projection - else k.data - ) - - retval = [] - for p in probes: - data = numpy.vstack([_project(s) for s in p.samples]) - - for subprobe_id, (s, parent) in enumerate(zip(data, p.samples)): - # each sub-probe in the probe needs to be checked - subprobe_scores = [] - for ref in [r for r in references if r.id in p.references]: - subprobe_scores.append( - Sample(self.score_one_sample(ref.data, s), parent=ref) - ) - subprobe = SampleSet(subprobe_scores, parent=p) - subprobe.subprobe_id = subprobe_id - retval.append(subprobe) - return retval - - def score_one_sample(self, biometric_reference, data): - """It handles the score computation for one sample - - Parameters - ---------- - - biometric_reference : list - Biometric reference to be compared - - data : list - Data to be compared - - Returns - ------- - - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. - - """ - raise NotImplemented("Please, implement me") diff --git a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py index 267b2292edaccead53d617e1cc3337366292af14..f79e47c826c9406d1a7e4c71092b11a906a5f7ee 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/legacy.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/legacy.py @@ -78,7 +78,7 @@ class DatabaseConnector: path=k.path, ) ], - key=str(k.client_id) + key=str(k.client_id), ) for k in objects ] @@ -191,7 +191,184 @@ class DatabaseConnector: return list(probes.values()) +class DatabaseConnectorAnnotated(DatabaseConnector): + """Wraps a bob.bio.base database and generates conforming samples for datasets + that has annotations + + This connector allows wrapping generic bob.bio.base datasets and generate + samples that conform to the specifications of biometric pipelines defined + in this package. + + + Parameters + ---------- + + database : object + An instantiated version of a bob.bio.base.Database object + + protocol : str + The name of the protocol to generate samples from. + To be plugged at :py:method:`bob.db.base.Database.objects`. + + """ + + def __init__(self, database, protocol): + super(DatabaseConnectorAnnotated, self).__init__(database, protocol) + + def background_model_samples(self): + """Returns :py:class:`Sample`'s to train a background model (group + ``world``). + + + Returns + ------- + + samples : list + List of samples conforming the pipeline API for background + model training. See, e.g., :py:func:`.pipelines.first`. + + """ + + # TODO: This should be organized by client + retval = [] + + objects = self.database.objects(protocol=self.protocol, groups="world") + + return [ + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + annotations=self.database.annotations(k), + ) + ] + ) + for k in objects + ] + + def references(self, group="dev"): + """Returns :py:class:`Reference`'s to enroll biometric references + + + Parameters + ---------- + + group : :py:class:`str`, optional + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + references : list + List of samples conforming the pipeline API for the creation of + biometric references. See, e.g., :py:func:`.pipelines.first`. + + """ + + retval = [] + + for m in self.database.model_ids_with_protocol( + protocol=self.protocol, groups=group + ): + + objects = self.database.objects( + protocol=self.protocol, groups=group, model_ids=(m,), purposes="enroll" + ) + + retval.append( + SampleSet( + [ + DelayedSample( + load=functools.partial( + k.load, + self.database.original_directory, + self.database.original_extension, + ), + id=k.id, + path=k.path, + annotations=self.database.annotations(k), + ) + for k in objects + ], + id=m, + path=str(m), + subject=objects[0].client_id, + ) + ) + + return retval + + def probes(self, group): + """Returns :py:class:`Probe`'s to score biometric references + + + Parameters + ---------- + + group : str + A ``group`` to be plugged at + :py:meth:`bob.db.base.Database.objects` + + + Returns + ------- + + probes : list + List of samples conforming the pipeline API for the creation of + biometric probes. See, e.g., :py:func:`.pipelines.first`. + + """ + + probes = dict() + + for m in self.database.model_ids_with_protocol( + protocol=self.protocol, groups=group + ): + + # Getting all the probe objects from a particular biometric + # reference + objects = self.database.objects( + protocol=self.protocol, groups=group, model_ids=(m,), purposes="probe" + ) + + # Creating probe samples + for o in objects: + if o.id not in probes: + probes[o.id] = SampleSet( + [ + DelayedSample( + load=functools.partial( + o.load, + self.database.original_directory, + self.database.original_extension, + ), + id=o.id, + path=o.path, + annotations=self.database.annotations(o), + ) + ], + id=o.id, + path=o.path, + subject=o.client_id, + references=[m], + ) + else: + probes[o.id].references.append(m) + + return list(probes.values()) + + from .biometric_algorithm import BiometricAlgorithm + + class LegacyBiometricAlgorithm(BiometricAlgorithm): """Biometric Algorithm that handles legacy :py:class:`bob.bio.base.algorithm.Algorithm` @@ -219,27 +396,27 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): self.callable = callable self.instance = None self.projector_file = None - self.features_dir = features_dir - self.biometric_reference_dir = os.path.join(self.features_dir, "biometric_references") + self.features_dir = features_dir + self.biometric_reference_dir = os.path.join( + self.features_dir, "biometric_references" + ) self.score_dir = os.path.join(self.features_dir, "scores") - self.extension=".hdf5" - + self.extension = ".hdf5" def _enroll_sample_set(self, sampleset): # Enroll return self.enroll(sampleset) - def _score_sample_set(self, sampleset, biometric_references, extractor): """Given a sampleset for probing, compute the scores and retures a sample set with the scores """ - # Stacking the samples from a sampleset + # Stacking the samples from a sampleset data = [s for s in sampleset.samples] # Compute scores for each sample inside of the sample set # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from + # We should add an agregator function here so we can properlly agregate samples from # a sampleset either after or before scoring. # To be honest, this should be the default behaviour retval = [] @@ -247,8 +424,10 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): # Creating one sample per comparison subprobe_scores = [] - for ref in [r for r in biometric_references if r.key in sampleset.references]: - #subprobe_scores.append(self.score(ref.data, s, extractor)) + for ref in [ + r for r in biometric_references if r.key in sampleset.references + ]: + # subprobe_scores.append(self.score(ref.data, s, extractor)) subprobe_scores.append( Sample(self.score(ref.data, s.data, extractor), parent=ref) ) @@ -268,17 +447,20 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): return retval - - def enroll(self, enroll_features, **kwargs): + def enroll(self, enroll_features, **kwargs): if not isinstance(enroll_features, SampleSet): - raise ValueError(f"`enroll_features` should be the type SampleSet, not {enroll_features}") + raise ValueError( + f"`enroll_features` should be the type SampleSet, not {enroll_features}" + ) # Instantiates and do the "real" fit if self.instance is None: self.instance = self.callable() - path = os.path.join(self.biometric_reference_dir, str(enroll_features.key) + self.extension) + path = os.path.join( + self.biometric_reference_dir, str(enroll_features.key) + self.extension + ) if path is None or not os.path.isfile(path): # Enrolling @@ -293,7 +475,6 @@ class LegacyBiometricAlgorithm(BiometricAlgorithm): reader = get_reader(self.instance.read_model, path) return DelayedSample(reader, parent=enroll_features) - def score(self, model, probe, extractor=None, **kwargs): # Instantiates and do the "real" fit diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py index 5ce6e673c5026bbaa84dff1266781febab51f35c..fe346a6aa4e0cf20649cbebb24e1367174ffd4d5 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py @@ -12,6 +12,7 @@ import dask.bag import dask.delayed from bob.pipelines.sample import samplesets_to_samples + def biometric_pipeline( background_model_samples, biometric_reference_samples, @@ -22,10 +23,7 @@ def biometric_pipeline( ## Training background model (fit will return even if samples is ``None``, ## in which case we suppose the algorithm is not trainable in any way) - extractor = train_background_model( - background_model_samples, - extractor - ) + extractor = train_background_model(background_model_samples, extractor) ## Create biometric samples biometric_references = create_biometric_reference( @@ -34,10 +32,7 @@ def biometric_pipeline( ## Scores all probes return compute_scores( - probe_samples, - biometric_references, - extractor, - biometric_algorithm, + probe_samples, biometric_references, extractor, biometric_algorithm ) @@ -64,10 +59,14 @@ def create_biometric_reference( if isinstance(biometric_reference_features, dask.bag.core.Bag): # ASSUMING THAT IS A DASK THING IS COMMING - biometric_references = biometric_reference_features.map_partitions(biometric_algorithm._enroll_samples) + biometric_references = biometric_reference_features.map_partitions( + biometric_algorithm._enroll_samples + ) else: - biometric_references = biometric_algorithm._enroll_samples(biometric_reference_features) - + biometric_references = biometric_algorithm._enroll_samples( + biometric_reference_features + ) + # models is a list of Samples return biometric_references @@ -94,10 +93,14 @@ def compute_scores(probe_samples, biometric_references, extractor, biometric_alg all_references = dask.delayed(list)(biometric_references) - scores = probe_features.map_partitions(biometric_algorithm._score_samples, all_references, extractor) + scores = probe_features.map_partitions( + biometric_algorithm._score_samples, all_references, extractor + ) else: - scores = biometric_algorithm._score_samples(probe_features, biometric_references, extractor) + scores = biometric_algorithm._score_samples( + probe_features, biometric_references, extractor + ) # scores is a list of Samples return scores