pipelines.py 7.98 KB
Newer Older
1
2
3
4
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :

"""
5
Implementation of the Vanilla Biometrics pipeline using Dask :ref:`bob.bio.base.struct_bio_rec_sys`_
6
7
8
9
10

This file contains simple processing blocks meant to be used
for bob.bio experiments
"""

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
11
import logging
12
import numpy
13
from .score_writers import FourColumnsScoreWriter
14
15
16
17
18
from bob.pipelines.utils import isinstance_nested
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator
from bob.pipelines import SampleWrapper, wrap
from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import BioAlgorithm
19

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
20
logger = logging.getLogger(__name__)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
21
22
import tempfile
import os
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
23

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
24

25
class VanillaBiometricsPipeline(object):
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
    """
    Vanilla Biometrics Pipeline

    This is the backbone of most biometric recognition systems.
    It implements three subpipelines and they are the following:

     - :py:class:`VanillaBiometrics.train_background_model`: Initializes or trains your transformer.
        It will run :py:meth:`sklearn.base.BaseEstimator.fit`

     - :py:class:`VanillaBiometrics.create_biometric_reference`: Creates biometric references
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.enroll`

     - :py:class:`VanillaBiometrics.compute_scores`: Computes scores
        It will run :py:meth:`sklearn.base.BaseEstimator.transform` followed by a sequence of
        :py:meth:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm.score`


    Example
    -------
46
       >>> from bob.pipelines.transformers import Linearize
47
       >>> from sklearn.pipeline import make_pipeline
48
49
50
       >>> from bob.bio.base.pipelines.vanilla_biometrics import Distance, VanillaBiometricsPipeline
       >>> estimator_1 = Linearize()
       >>> transformer = make_pipeline(estimator_1)
51
       >>> biometric_algoritm = Distance()
52
53
       >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
       >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring)  # doctest: +SKIP
54
55
56
57
58
59


    To run this pipeline using Dask, used the function :py:func:`dask_vanilla_biometrics`.

    Example
    -------
60
61
      >>> from bob.bio.base.pipelines.vanilla_biometrics import dask_vanilla_biometrics
      >>> pipeline = VanillaBiometricsPipeline(transformer, biometric_algoritm)
62
      >>> pipeline = dask_vanilla_biometrics(pipeline)
63
      >>> pipeline(samples_for_training_back_ground_model, samplesets_for_enroll, samplesets_for_scoring).compute()  # doctest: +SKIP
64
65


66
67
    Parameters
    ----------
68
69
70
71
72
73
74

      transformer: :py:class`sklearn.pipeline.Pipeline` or a `sklearn.base.BaseEstimator`
        Transformer that will preprocess your data

      biometric_algorithm: :py:class:`bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm`
        Biometrics algorithm object that implements the methods `enroll` and `score` methods

75
76
      score_writer: :any:`bob.bio.base.pipelines.vanilla_biometrics.ScoreWriter`
          Format to write scores. Default to :any:`bob.bio.base.pipelines.vanilla_biometrics.FourColumnsScoreWriter`
77
78
79

    """

80
    def __init__(
Tiago de Freitas Pereira's avatar
[black]    
Tiago de Freitas Pereira committed
81
        self, transformer, biometric_algorithm, score_writer=None,
82
    ):
83
84
        self.transformer = transformer
        self.biometric_algorithm = biometric_algorithm
85
        self.score_writer = score_writer
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
86
87
88
        if self.score_writer is None:
            tempdir = tempfile.TemporaryDirectory()
            self.score_writer = FourColumnsScoreWriter(tempdir.name)
89

90
91
        check_valid_pipeline(self)

92
93
94
95
    def __call__(
        self,
        background_model_samples,
        biometric_reference_samples,
96
        probe_samples,
97
        allow_scoring_with_all_biometric_references=True,
98
99
100
101
    ):
        logger.info(
            f" >> Vanilla Biometrics: Training background model with pipeline {self.transformer}"
        )
102

103
104
105
        # Training background model (fit will return even if samples is ``None``,
        # in which case we suppose the algorithm is not trainable in any way)
        self.transformer = self.train_background_model(background_model_samples)
106

107
108
109
        logger.info(
            f" >> Creating biometric references with the biometric algorithm {self.biometric_algorithm}"
        )
110

111
112
113
        # Create biometric samples
        biometric_references = self.create_biometric_reference(
            biometric_reference_samples
114
115
        )

116
117
118
        logger.info(
            f" >> Computing scores with the biometric algorithm {self.biometric_algorithm}"
        )
119

120
        # Scores all probes
121
        scores, _ = self.compute_scores(
122
123
124
125
            probe_samples,
            biometric_references,
            allow_scoring_with_all_biometric_references,
        )
126

127
128
        return scores

129
130
    def train_background_model(self, background_model_samples):
        # background_model_samples is a list of Samples
131

132
133
134
135
136
137
138
        # We might have algorithms that has no data for training
        if len(background_model_samples) <= 0:
            logger.warning(
                "There's no data to train background model."
                "For the rest of the execution it will be assumed that the pipeline is stateless."
            )
            return self.transformer
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
139

140
        return self.transformer.fit(background_model_samples)
141

142
143
144
145
    def create_biometric_reference(self, biometric_reference_samples):
        biometric_reference_features = self.transformer.transform(
            biometric_reference_samples
        )
146

147
148
149
        biometric_references = self.biometric_algorithm.enroll_samples(
            biometric_reference_features
        )
150

151
152
        # models is a list of Samples
        return biometric_references
153

154
155
156
    def compute_scores(
        self,
        probe_samples,
157
        biometric_references,
158
        allow_scoring_with_all_biometric_references=True,
159
160
161
162
163
164
165
166
167
168
169
    ):

        # probes is a list of SampleSets
        probe_features = self.transformer.transform(probe_samples)
        scores = self.biometric_algorithm.score_samples(
            probe_features,
            biometric_references,
            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
        )

        # scores is a list of Samples
170
        return scores, probe_features
171

172
    def write_scores(self, scores):
173
174
        if self.score_writer is None:
            raise ValueError("No score writer defined in the pipeline")
175
        return self.score_writer.write(scores)
176
177
178
179
180
181

    def post_process(self, score_paths, filename):
        if self.score_writer is None:
            raise ValueError("No score writer defined in the pipeline")

        return self.score_writer.post_process(score_paths, filename)
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206


def check_valid_pipeline(vanilla_pipeline):
    """
    Applying some checks in the vanilla biometrics pipeline
    """

    ## CHECKING THE TRANSFORMER
    # Checking if it's a Scikit Pipeline or a estimator
    if isinstance(vanilla_pipeline.transformer, Pipeline):

        # Checking if all steps are wrapped as samples, if not, we should wrap them
        for p in vanilla_pipeline.transformer:
            if not isinstance_nested(p, "estimator", SampleWrapper):
                wrap(["sample"], p)

    # In this case it can be a simple estimator. AND
    # Checking if it's sample wrapper, if not, do it
    elif isinstance_nested(
        vanilla_pipeline.transformer, "estimator", BaseEstimator
    ) and isinstance_nested(vanilla_pipeline.transformer, "estimator", BaseEstimator):
        wrap(["sample"], vanilla_pipeline.transformer)
    else:
        raise ValueError(
            f"VanillaBiometricsPipeline.transformer should be instance of either `sklearn.pipeline.Pipeline` or"
207
            f"sklearn.base.BaseEstimator, not {vanilla_pipeline.transformer}"
208
209
210
211
212
213
        )

    ## Checking the Biometric algorithm
    if not isinstance(vanilla_pipeline.biometric_algorithm, BioAlgorithm):
        raise ValueError(
            f"VanillaBiometricsPipeline.biometric_algorithm should be instance of `BioAlgorithm`"
214
            f"not {vanilla_pipeline.biometric_algorithm}"
215
216
217
        )

    return True