opencv.py 5.48 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Yu Linghu & Xinyi Zhang <yu.linghu@uzh.ch, xinyi.zhang@uzh.ch>
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>

import bob.bio.base
import numpy as np
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn.utils import check_array
import os
from bob.extension.download import get_file
12
13
14
15
16
17
18
19
20
from bob.bio.face.utils import (
    dnn_default_cropping,
    embedding_transformer,
)

from bob.bio.base.pipelines.vanilla_biometrics import (
    Distance,
    VanillaBiometricsPipeline,
)
21
22
23
24


class OpenCVTransformer(TransformerMixin, BaseEstimator):
    """
25
    Base Transformer using the OpenCV DNN interface (https://docs.opencv.org/master/d2/d58/tutorial_table_of_content_dnn.html).
26
27
28
29
30
31
32
33
34
35
36
37
38
39


    .. note::
       This class supports Caffe ``.caffemodel``, Tensorflow ``.pb``, Torch ``.t7`` ``.net``, Darknet ``.weights``, DLDT ``.bin``, and ONNX ``.onnx``


    Parameters
    ----------

    checkpoint_path: str
       Path containing the checkpoint

    config:
        Path containing some configuration file (e.g. .json, .prototxt)
40
41
42
43

    preprocessor:
        A function that will transform the data right before forward. The default transformation is `X/255`

44
45
    """

46
47
48
49
50
51
52
    def __init__(
        self,
        checkpoint_path=None,
        config=None,
        preprocessor=lambda x: x / 255,
        **kwargs,
    ):
53
54
55
56
        super().__init__(**kwargs)
        self.checkpoint_path = checkpoint_path
        self.config = config
        self.model = None
57
        self.preprocessor = preprocessor
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

    def _load_model(self):
        import cv2

        net = cv2.dnn.readNet(self.checkpoint_path, self.config)
        self.model = net

    def transform(self, X):
        """__call__(image) -> feature

        Extracts the features from the given image.

        **Parameters:**

        X : 2D :py:class:`numpy.ndarray` (floats)
          The image to extract the features from.

        **Returns:**

        feature : 2D or 3D :py:class:`numpy.ndarray` (floats)
          The list of features extracted from the image.
        """

        import cv2

        if self.model is None:
            self._load_model()

86
        X = check_array(X, allow_nd=True)
87

88
        X = self.preprocessor(X)
89

90
        self.model.setInput(X)
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

        return self.model.forward()

    def __getstate__(self):
        # Handling unpicklable objects

        d = self.__dict__.copy()
        d["model"] = None
        return d

    def _more_tags(self):
        return {"stateless": True, "requires_fit": False}


class VGG16_Oxford(OpenCVTransformer):
    """
    Original VGG16 model from the paper: https://www.robots.ox.ac.uk/~vgg/publications/2015/Parkhi15/parkhi15.pdf

    """

111
    def __init__(self, embedding_layer="fc7"):
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
        urls = [
            "https://www.robots.ox.ac.uk/~vgg/software/vgg_face/src/vgg_face_caffe.tar.gz",
            "http://bobconda.lab.idiap.ch/public-upload/data/bob/bob.bio.face/master/caffe/vgg_face_caffe.tar.gz",
        ]

        filename = get_file(
            "vgg_face_caffe.tar.gz",
            urls,
            cache_subdir="data/caffe/vgg_face_caffe",
            file_hash="ee707ac6e890bc148cb155adeaad12be",
            extract=True,
        )
        path = os.path.dirname(filename)
        config = os.path.join(path, "vgg_face_caffe", "VGG_FACE_deploy.prototxt")
        checkpoint_path = os.path.join(path, "vgg_face_caffe", "VGG_FACE.caffemodel")

128
        caffe_average_img = [129.1863, 104.7624, 93.5940]
129
        self.embedding_layer = embedding_layer
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

        def preprocessor(X):
            """
            Normalize using data from caffe

            Caffe has the shape `C x H x W` and the chanel is BGR and 

            """
            # To BGR
            X = X[:, ::-1, :, :].astype("float32")

            # Subtracting
            X[:, :, :, 0] -= caffe_average_img[0]
            X[:, :, :, 1] -= caffe_average_img[1]
            X[:, :, :, 2] -= caffe_average_img[2]

            return X

        super(VGG16_Oxford, self).__init__(checkpoint_path, config, preprocessor)
149
150
151
152
153
154

    def _load_model(self):
        import cv2

        net = cv2.dnn.readNet(self.checkpoint_path, self.config)
        self.model = net
155

156
157
158
159
160
161
162
163
164
165
166
167
168
169
    def transform(self, X):
        import cv2

        if self.model is None:
            self._load_model()

        X = check_array(X, allow_nd=True)

        X = self.preprocessor(X)

        self.model.setInput(X)

        return self.model.forward(self.embedding_layer)

170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

def vgg16_oxford_baseline(annotation_type, fixed_positions=None):
    """
    Get the VGG16 pipeline which will crop the face :math:`224 \times 224`
    use the :py:class:`VGG16_Oxford`

    Parameters
    ----------

      annotation_type: str
         Type of the annotations (e.g. `eyes-center')

      fixed_positions: dict
         Set it if in your face images are registered to a fixed position in the image
    """

    # DEFINE CROPPING
    cropped_image_size = (224, 224)

    if annotation_type == "eyes-center":
        # Hard coding eye positions for backward consistency
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
191
        cropped_positions = {"leye": (65, 144), "reye": (65, 80)}
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
    else:
        cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type)

    transformer = embedding_transformer(
        cropped_image_size=cropped_image_size,
        embedding=VGG16_Oxford(),
        cropped_positions=cropped_positions,
        fixed_positions=fixed_positions,
        color_channel="rgb",
        annotator="mtcnn",
    )

    algorithm = Distance()

    return VanillaBiometricsPipeline(transformer, algorithm)