-
Tiago de Freitas Pereira authoredTiago de Freitas Pereira authored
legacy.py 9.97 KiB
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
"""Re-usable blocks for legacy bob.bio.base algorithms"""
import os
import functools
from collections import defaultdict
from bob.bio.base import utils
from .abstract_classes import (
BioAlgorithm,
Database,
)
from bob.io.base import HDF5File
from bob.pipelines import DelayedSample, SampleSet, Sample
import logging
import copy
import pickle
from .score_writers import FourColumnsScoreWriter
from bob.bio.base.algorithm import Algorithm
logger = logging.getLogger("bob.bio.base")
def _biofile_to_delayed_sample(biofile, database):
return DelayedSample(
load=functools.partial(
biofile.load, database.original_directory, database.original_extension,
),
subject=str(biofile.client_id),
key=biofile.path,
path=biofile.path,
annotations=database.annotations(biofile),
)
class DatabaseConnector(Database):
"""Wraps a bob.bio.base database and generates conforming samples
This connector allows wrapping generic bob.bio.base datasets and generate
samples that conform to the specifications of biometric pipelines defined
in this package.
Parameters
----------
database : object
An instantiated version of a bob.bio.base.Database object
protocol : str
The name of the protocol to generate samples from.
To be plugged at :py:method:`bob.db.base.Database.objects`.
allow_scoring_with_all_biometric_references: bool
If True will allow the scoring function to be performed in one shot with multiple probes.
This optimization is useful when all probes needs to be compared with all biometric references AND
your scoring function allows this broadcast computation.
annotation_type: str
Type of the annotations that the database provide.
Allowed types are: `eyes-center` and `bounding-box`
fixed_positions: dict
In case database contains one single annotation for all samples.
This is useful for registered databases.
"""
def __init__(
self,
database,
allow_scoring_with_all_biometric_references=True,
annotation_type="eyes-center",
fixed_positions=None,
** kwargs,
):
self.database = database
self.allow_scoring_with_all_biometric_references = allow_scoring_with_all_biometric_references
self.annotation_type = annotation_type
self.fixed_positions=fixed_positions
def background_model_samples(self):
"""Returns :py:class:`Sample`'s to train a background model (group
``world``).
Returns
-------
samples : list
List of samples conforming the pipeline API for background
model training. See, e.g., :py:func:`.pipelines.first`.
"""
objects = self.database.training_files()
return [_biofile_to_delayed_sample(k, self.database) for k in objects]
def references(self, group="dev"):
"""Returns :py:class:`Reference`'s to enroll biometric references
Parameters
----------
group : :py:class:`str`, optional
A ``group`` to be plugged at
:py:meth:`bob.db.base.Database.objects`
Returns
-------
references : list
List of samples conforming the pipeline API for the creation of
biometric references. See, e.g., :py:func:`.pipelines.first`.
"""
retval = []
for m in self.database.model_ids(groups=group):
objects = self.database.enroll_files(group=group, model_id=m)
retval.append(
SampleSet(
[_biofile_to_delayed_sample(k, self.database) for k in objects],
key=str(m),
path=str(m),
subject=str(objects[0].client_id),
)
)
return retval
def probes(self, group):
"""Returns :py:class:`Probe`'s to score biometric references
Parameters
----------
group : str
A ``group`` to be plugged at
:py:meth:`bob.db.base.Database.objects`
Returns
-------
probes : list
List of samples conforming the pipeline API for the creation of
biometric probes. See, e.g., :py:func:`.pipelines.first`.
"""
probes = dict()
for m in self.database.model_ids(groups=group):
# Getting all the probe objects from a particular biometric
# reference
objects = self.database.probe_files(group=group, model_id=m)
# Creating probe samples
for o in objects:
if o.id not in probes:
probes[o.id] = SampleSet(
[_biofile_to_delayed_sample(o, self.database)],
key=str(o.client_id),
path=o.path,
subject=str(o.client_id),
references=[str(m)],
)
else:
probes[o.id].references.append(str(m))
return list(probes.values())
class BioAlgorithmLegacy(BioAlgorithm):
"""Biometric Algorithm that handlesy :any:`bob.bio.base.algorithm.Algorithm`
In this design, :any:`BioAlgorithm.enroll` maps to :any:`bob.bio.base.algorithm.Algorithm.enroll` and
:any:`BioAlgorithm.score` maps :any:`bob.bio.base.algorithm.Algorithm.score`
.. note::
Legacy algorithms are always checkpointable
Parameters
----------
instance: object
An instance of :any:`bob.bio.base.algorithm.Algorithm`
Example
-------
>>> from bob.bio.base.pipelines.vanilla_biometrics import BioAlgorithmLegacy
>>> from bob.bio.base.algorithm import PCA
>>> biometric_algorithm = BioAlgorithmLegacy(PCA())
"""
def __init__(
self, instance, base_dir, force=False, projector_file=None, **kwargs,
):
super().__init__(**kwargs)
if not isinstance(instance, Algorithm):
raise ValueError(
f"Only `bob.bio.base.Algorithm` supported, not `{instance}`"
)
logger.info(f"Using `bob.bio.base` legacy algorithm {instance}")
if instance.requires_projector_training and projector_file is None:
raise ValueError(f"{instance} requires a `projector_file` to be set")
self.instance = instance
self.is_background_model_loaded = False
self.projector_file = projector_file
self.biometric_reference_dir = os.path.join(base_dir, "biometric_references")
self._biometric_reference_extension = ".hdf5"
self.score_dir = os.path.join(base_dir, "scores")
self.force = force
def load_legacy_background_model(self):
# Loading background model
if not self.is_background_model_loaded:
self.instance.load_projector(self.projector_file)
self.is_background_model_loaded = True
def enroll(self, enroll_features, **kwargs):
self.load_legacy_background_model()
return self.instance.enroll(enroll_features)
def score(self, biometric_reference, data, **kwargs):
self.load_legacy_background_model()
scores = self.instance.score(biometric_reference, data)
if isinstance(scores, list):
scores = self.instance.probe_fusion_function(scores)
return scores
def score_multiple_biometric_references(self, biometric_references, data, **kwargs):
scores = self.instance.score_for_multiple_models(biometric_references, data)
return scores
def write_biometric_reference(self, sample, path):
os.makedirs(os.path.dirname(path), exist_ok=True)
self.instance.write_model(sample.data, path)
def _enroll_sample_set(self, sampleset):
"""
Enroll a sample set with checkpointing
"""
# Amending `models` directory
path = os.path.join(
self.biometric_reference_dir,
str(sampleset.key) + self._biometric_reference_extension,
)
if self.force or not os.path.exists(path):
enrolled_sample = super()._enroll_sample_set(sampleset)
# saving the new sample
self.write_biometric_reference(enrolled_sample, path)
delayed_enrolled_sample = DelayedSample(
functools.partial(self.instance.read_model, path), parent=sampleset
)
return delayed_enrolled_sample
def write_scores(self, samples, path):
os.makedirs(os.path.dirname(path), exist_ok=True)
open(path, "wb").write(pickle.dumps(samples))
def _score_sample_set(
self,
sampleset,
biometric_references,
allow_scoring_with_all_biometric_references=False,
):
def _load(path):
return pickle.loads(open(path, "rb").read())
def _make_name(sampleset, biometric_references):
# The score file name is composed by sampleset key and the
# first 3 biometric_references
name = str(sampleset.key)
suffix = "_".join([str(s.key) for s in biometric_references[0:3]])
return name + suffix
path = os.path.join(
self.score_dir, _make_name(sampleset, biometric_references) + ".pkl"
)
if self.force or not os.path.exists(path):
# Computing score
scored_sample_set = super()._score_sample_set(
sampleset,
biometric_references,
allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
)
self.write_scores(scored_sample_set.samples, path)
scored_sample_set = SampleSet(
[DelayedSample(functools.partial(_load, path), parent=sampleset)],
parent=sampleset,
)
else:
scored_sample_set = SampleSet(_load(path), parent=sampleset)
return scored_sample_set