diff --git a/bob/bio/face/embeddings/mxnet.py b/bob/bio/face/embeddings/mxnet.py index 2db98ff88eb32580905420a69d84581e3f68980a..b88bf858da3574c6e4757066645d65e90b69634b 100644 --- a/bob/bio/face/embeddings/mxnet.py +++ b/bob/bio/face/embeddings/mxnet.py @@ -8,6 +8,7 @@ from sklearn.utils import check_array from bob.extension.download import get_file import numpy as np import os +from bob.bio.face.annotator import BobIpMTCNN class MxNetTransformer(TransformerMixin, BaseEstimator): @@ -163,20 +164,27 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( def arcface_template(embedding, annotation_type, fixed_positions=None): # DEFINE CROPPING cropped_image_size = (112, 112) - if annotation_type == "eyes-center": + if annotation_type == "eyes-center" or annotation_type == "bounding-box": # Hard coding eye positions for backward consistency # cropped_positions = { cropped_positions = cropped_positions_arcface() + if annotation_type == "bounding-box": + # This will allow us to use `BoundingBoxAnnotatorCrop` + cropped_positions.update( + {"topleft": (0, 0), "bottomright": cropped_image_size} + ) + else: cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type) + annotator = BobIpMTCNN(min_size=40, factor=0.709, thresholds=(0.1, 0.2, 0.2)) transformer = embedding_transformer( cropped_image_size=cropped_image_size, embedding=embedding, cropped_positions=cropped_positions, fixed_positions=fixed_positions, color_channel="rgb", - annotator="mtcnn", + annotator=annotator, ) algorithm = Distance() diff --git a/bob/bio/face/embeddings/opencv.py b/bob/bio/face/embeddings/opencv.py index 8a5d55f8e47be1b7cb126a0f07607cd78690ab64..fc4dd69e86b5a6f9490645bdf0ea394ea6d5cc3c 100644 --- a/bob/bio/face/embeddings/opencv.py +++ b/bob/bio/face/embeddings/opencv.py @@ -18,6 +18,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( Distance, VanillaBiometricsPipeline, ) +from bob.bio.face.annotator import BobIpMTCNN class OpenCVTransformer(TransformerMixin, BaseEstimator): @@ -187,12 +188,19 @@ def vgg16_oxford_baseline(annotation_type, fixed_positions=None): # DEFINE CROPPING cropped_image_size = (224, 224) - if annotation_type == "eyes-center": + if annotation_type == "eyes-center" or annotation_type == "bounding-box": # Hard coding eye positions for backward consistency + # cropped_positions = { cropped_positions = {"reye": (112, 82), "leye": (112, 142)} + if annotation_type == "bounding-box": + # This will allow us to use `BoundingBoxAnnotatorCrop` + cropped_positions.update( + {"topleft": (0, 0), "bottomright": cropped_image_size} + ) else: cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type) + annotator = BobIpMTCNN(min_size=40, factor=0.709, thresholds=(0.1, 0.2, 0.2)) transformer = embedding_transformer( cropped_image_size=cropped_image_size, embedding=VGG16_Oxford(), diff --git a/bob/bio/face/embeddings/pytorch.py b/bob/bio/face/embeddings/pytorch.py index fb0b4c6df191c465a32cde2d776dd9fb9db0a32b..d5dfabc815ce13aa05ccf666783592889eb6df69 100644 --- a/bob/bio/face/embeddings/pytorch.py +++ b/bob/bio/face/embeddings/pytorch.py @@ -16,6 +16,7 @@ from bob.extension.download import get_file from sklearn.base import BaseEstimator from sklearn.base import TransformerMixin from sklearn.utils import check_array +from bob.bio.face.annotator import BobIpMTCNN class PyTorchModel(TransformerMixin, BaseEstimator): @@ -281,12 +282,20 @@ class IResnet100(PyTorchModel): def iresnet_template(embedding, annotation_type, fixed_positions=None): # DEFINE CROPPING cropped_image_size = (112, 112) - if annotation_type == "eyes-center": + if annotation_type == "eyes-center" or annotation_type == "bounding-box": # Hard coding eye positions for backward consistency + # cropped_positions = { cropped_positions = cropped_positions_arcface() + if annotation_type == "bounding-box": + # This will allow us to use `BoundingBoxAnnotatorCrop` + cropped_positions.update( + {"topleft": (0, 0), "bottomright": cropped_image_size} + ) + else: cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type) + annotator = BobIpMTCNN(min_size=40, factor=0.709, thresholds=(0.1, 0.2, 0.2)) transformer = embedding_transformer( cropped_image_size=cropped_image_size, embedding=embedding, diff --git a/bob/bio/face/embeddings/tensorflow.py b/bob/bio/face/embeddings/tensorflow.py index 9182c19d1a9fe7fb04765e3afc909b4761d221c8..76d1c1a1f59f51247418b65fc84703e08d8dc4c2 100644 --- a/bob/bio/face/embeddings/tensorflow.py +++ b/bob/bio/face/embeddings/tensorflow.py @@ -24,6 +24,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( Distance, VanillaBiometricsPipeline, ) +from bob.bio.face.annotator import BobIpMTCNN def sanderberg_rescaling(): @@ -747,14 +748,29 @@ def facenet_template(embedding, annotation_type, fixed_positions=None): def resnet_template(embedding, annotation_type, fixed_positions=None): + # DEFINE CROPPING + # cropped_image_size = (112, 112) + # if annotation_type == "eyes-center": + # # Hard coding eye positions for backward consistency + # cropped_positions = cropped_positions_arcface() + # else: + # cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type) # DEFINE CROPPING cropped_image_size = (112, 112) - if annotation_type == "eyes-center": + if annotation_type == "eyes-center" or annotation_type == "bounding-box": # Hard coding eye positions for backward consistency + # cropped_positions = { cropped_positions = cropped_positions_arcface() + if annotation_type == "bounding-box": + # This will allow us to use `BoundingBoxAnnotatorCrop` + cropped_positions.update( + {"topleft": (0, 0), "bottomright": cropped_image_size} + ) + else: cropped_positions = dnn_default_cropping(cropped_image_size, annotation_type) + annotator = BobIpMTCNN(min_size=40, factor=0.709, thresholds=(0.1, 0.2, 0.2)) transformer = embedding_transformer( cropped_image_size=cropped_image_size, embedding=embedding, diff --git a/bob/bio/face/preprocessor/FaceCrop.py b/bob/bio/face/preprocessor/FaceCrop.py index 21d23b15a58fb0b333fc6d1639cb52975e418d31..a19eea65c9ac4ad1881377e4ba73d0a2c21c559c 100644 --- a/bob/bio/face/preprocessor/FaceCrop.py +++ b/bob/bio/face/preprocessor/FaceCrop.py @@ -7,6 +7,7 @@ from sklearn.base import TransformerMixin, BaseEstimator logger = logging.getLogger("bob.bio.face") from bob.bio.base import load_resource +from .Scale import scale class FaceCrop(Base): @@ -446,3 +447,181 @@ class MultiFaceCrop(Base): def fit(self, X, y=None): return self + + +class BoundingBoxAnnotatorCrop(Base): + """ + This face cropper uses a 2 stage strategy to crop and align faces in case `annotation_type` has a bounding-box. + In the first stage, it crops the face using the {`topleft`, `bottomright`} parameters and expands them using a `margin` factor. + In the second stage, it uses the `annotator` to estimate {`leye` and `reye`} to make the crop using :py:class:`bob.ip.base.FaceEyesNorm`. + In case the annotator doesn't work, it returnds the cropped face using the `bounding-box` coordinates. + + + .. warning:: + `cropped_positions` must be set with `leye`, `reye`, `topleft` and `bottomright` positions + + + Parameters + ---------- + + cropped_image_size : (int, int) + The resolution of the cropped image, in order (HEIGHT,WIDTH); if not given, + no face cropping will be performed + + cropped_positions : dict + The coordinates in the cropped image, where the annotated points should be + put to. This parameter is a dictionary with usually two elements, e.g., + ``{'reye':(RIGHT_EYE_Y, RIGHT_EYE_X) , 'leye':(LEFT_EYE_Y, LEFT_EYE_X)}``. + However, also other parameters, such as ``{'topleft' : ..., 'bottomright' : + ...}`` are supported, as long as the ``annotations`` in the `__call__` + function are present. + + fixed_positions : dict or None + If specified, ignore the annotations from the database and use these fixed + positions throughout. + + mask_sigma : float or None + Fill the area outside of image boundaries with random pixels from the + border, by adding noise to the pixel values. To disable extrapolation, set + this value to ``None``. To disable adding random noise, set it to a + negative value or 0. + + mask_neighbors : int + The number of neighbors used during mask extrapolation. See + :py:func:`bob.ip.base.extrapolate_mask` for details. + + mask_seed : int or None + The random seed to apply for mask extrapolation. + + .. warning:: + + When run in parallel, the same random seed will be applied to all + parallel processes. Hence, results of parallel execution will differ + from the results in serial execution. + + allow_upside_down_normalized_faces: bool, optional + If ``False`` (default), a ValueError is raised when normalized faces are going to be + upside down compared to input image. This allows you to catch wrong annotations in + your database easily. If you are sure about your input, you can set this flag to + ``True``. + + annotator : :any:`bob.bio.base.annotator.Annotator` + If provided, the annotator will be used if the required annotations are + missing. + + margin: float + The cropped face will be scaled to this factor (proportionally to the bouding-box width and height). Default to `0.5`. + + + """ + + def __init__( + self, + cropped_image_size, + cropped_positions, + annotator, + mask_sigma=None, + mask_neighbors=5, + mask_seed=None, + allow_upside_down_normalized_faces=False, + color_channel="rgb", + margin=0.5, + **kwargs, + ): + + if annotator is None: + raise ValueError(f"A valid annotator needs to be set.") + + if isinstance(annotator, str): + annotator = load_resource(annotator, "annotator") + self.annotator = annotator + + # We need to have the four coordinates + assert "leye" in cropped_positions + assert "reye" in cropped_positions + assert "topleft" in cropped_positions + assert "bottomright" in cropped_positions + + # copy parameters (sklearn convention : each explicit __init__ argument *has* to become an attribute of the estimator) + self.cropped_image_size = cropped_image_size + self.cropped_positions = cropped_positions + self.mask_sigma = mask_sigma + self.mask_neighbors = mask_neighbors + self.mask_seed = mask_seed + self.allow_upside_down_normalized_faces = allow_upside_down_normalized_faces + self.color_channel = color_channel + + ## Eyes cropper + eyes_position = dict() + eyes_position["leye"] = cropped_positions["leye"] + eyes_position["reye"] = cropped_positions["reye"] + self.eyes_cropper = FaceCrop( + cropped_image_size, + eyes_position, + fixed_positions=None, + mask_sigma=mask_sigma, + mask_neighbors=mask_neighbors, + mask_seed=mask_seed, + allow_upside_down_normalized_faces=allow_upside_down_normalized_faces, + color_channel=color_channel, + ) + self.margin = margin + + def transform(self, X, annotations=None): + faces = [] + + for x, annot in zip(X, annotations): + + # If it's grayscaled, expand dims + if x.ndim == 2: + logger.warning( + "Gray-scaled image. Expanding the channels before detection" + ) + x = numpy.repeat(numpy.expand_dims(x, 0), 3, axis=0) + + top = annot["topleft"][0] + left = annot["topleft"][1] + + bottom = annot["bottomright"][0] + right = annot["bottomright"][1] + + width = right - left + height = bottom - top + + # Expanding the borders + top_expanded = int(numpy.maximum(top - self.margin * height, 0)) + left_expanded = int(numpy.maximum(left - self.margin * width, 0)) + + bottom_expanded = int( + numpy.minimum(bottom + self.margin * height, x.shape[1]) + ) + right_expanded = int(numpy.minimum(right + self.margin * width, x.shape[2])) + + face_crop = x[ + :, top_expanded:bottom_expanded, left_expanded:right_expanded, + ] + + # get the coordinates with the annotator + annotator_annotations = self.annotator([face_crop])[0] + + # If nothing was detected OR if the annotations are swaped, return the cropped face + if ( + annotator_annotations is None + or annotator_annotations["reye"][1] > annotator_annotations["leye"][1] + ): + logger.warning( + f"Wrong annotations: {annotator_annotations}. Rescaling images" + ) + + face_crop = scale(face_crop, self.cropped_image_size) + faces.append(face_crop) + else: + + faces.append( + self.eyes_cropper.transform([face_crop], [annotator_annotations])[0] + ) + + return faces + + def fit(self, X, y=None): + return self diff --git a/bob/bio/face/preprocessor/Scale.py b/bob/bio/face/preprocessor/Scale.py index 53166f42f641dfee017c4011c14702c135f3dfe7..14d132bdb959c0f81ad771b771dde75a5762ca58 100644 --- a/bob/bio/face/preprocessor/Scale.py +++ b/bob/bio/face/preprocessor/Scale.py @@ -4,6 +4,7 @@ from sklearn.utils import check_array from bob.io.image import to_matplotlib, to_bob import numpy as np + def scale(images, target_img_size): """Scales a list of images to the target size @@ -24,7 +25,7 @@ def scale(images, target_img_size): # images are always batched images = check_array(images, allow_nd=True) - + output_shape = tuple(target_img_size) output_shape = tuple(images.shape[0:1]) + output_shape @@ -32,11 +33,11 @@ def scale(images, target_img_size): if images.ndim > 3: images = to_matplotlib(images) images = resize(images, output_shape=output_shape) - return to_bob(images) + return to_bob(images) * 255 else: # If it's Bob batched gray scaled images images = resize(images, output_shape=output_shape) - return images + return images * 255 def Scale(target_img_size): diff --git a/bob/bio/face/preprocessor/__init__.py b/bob/bio/face/preprocessor/__init__.py index 1a0c9b64a3e248993333f3161503a1d3b2eae1af..74ce05701d987d49645800c6c5da1d06dbda483e 100644 --- a/bob/bio/face/preprocessor/__init__.py +++ b/bob/bio/face/preprocessor/__init__.py @@ -1,5 +1,5 @@ from .Base import Base -from .FaceCrop import FaceCrop, MultiFaceCrop +from .FaceCrop import FaceCrop, MultiFaceCrop, BoundingBoxAnnotatorCrop from .TanTriggs import TanTriggs from .INormLBP import INormLBP @@ -19,16 +19,11 @@ def __appropriate__(*args): <https://github.com/sphinx-doc/sphinx/issues/3048>` """ - for obj in args: obj.__module__ = __name__ + for obj in args: + obj.__module__ = __name__ __appropriate__( - Base, - FaceCrop, - TanTriggs, - INormLBP, - HistogramEqualization, - SelfQuotientImage, - Scale + Base, FaceCrop, TanTriggs, INormLBP, HistogramEqualization, SelfQuotientImage, Scale ) -__all__ = [_ for _ in dir() if not _.startswith('_')] +__all__ = [_ for _ in dir() if not _.startswith("_")] diff --git a/bob/bio/face/test/test_baselines.py b/bob/bio/face/test/test_baselines.py index 6bdae469e8415d732e0737300567a912d0520a2a..ff3f568bec4130bed019dec6465dd89b4d6a89b4 100644 --- a/bob/bio/face/test/test_baselines.py +++ b/bob/bio/face/test/test_baselines.py @@ -78,7 +78,6 @@ def run_baseline(baseline, samples_for_training=[], target_scores=None): checkpoint_scores = checkpoint_pipeline([], biometric_references, probes) assert len(checkpoint_scores) == 1 assert len(checkpoint_scores[0]) == 1 - if target_scores is not None: assert np.allclose(target_scores, scores[0][0].data, atol=10e-5, rtol=10e-5) @@ -143,47 +142,43 @@ def test_inception_resnetv1_casiawebface(): @pytest.mark.slow @is_library_available("mxnet") def test_arcface_insightface(): - run_baseline("arcface-insightface", target_scores=-0.0005965275677296544) + run_baseline("arcface-insightface", target_scores=-0.8541907225411619) @pytest.mark.slow @is_library_available("tensorflow") def test_arcface_resnet50_msceleb_v1(): - run_baseline("resnet50-msceleb-arcface-2021", target_scores=-0.0008105830382632018) + run_baseline("resnet50-msceleb-arcface-2021", target_scores=-0.799834989589404) @pytest.mark.slow @is_library_available("tensorflow") def test_iresnet50_msceleb_idiap_20210623(): - run_baseline("iresnet50-msceleb-idiap-20210623", target_scores=-0.00045892492346155667) + run_baseline("iresnet50-msceleb-idiap-20210623", target_scores=-1.0540606303938558) @pytest.mark.slow @is_library_available("tensorflow") def test_iresnet100_msceleb_idiap_20210623(): - run_baseline("iresnet100-msceleb-idiap-20210623", target_scores=-0.00010635761699118174) + run_baseline("iresnet100-msceleb-idiap-20210623", target_scores=-1.0353392904250978) @pytest.mark.slow @is_library_available("tensorflow") def test_arcface_resnet50_vgg2_v1(): - run_baseline("resnet50-vgg2-arcface-2021", target_scores=-0.0035127080413503986) + run_baseline("resnet50-vgg2-arcface-2021", target_scores=-0.949471222980922) @pytest.mark.slow @is_library_available("tensorflow") def test_arcface_mobilenet_msceleb(): - run_baseline( - "mobilenetv2-msceleb-arcface-2021", target_scores=-9.430960384781972e-05 - ) + run_baseline("mobilenetv2-msceleb-arcface-2021", target_scores=-0.5688437955767786) @pytest.mark.slow @is_library_available("tensorflow") def test_arcface_resnet50_msceleb_20210521(): - run_baseline( - "resnet50-msceleb-arcface-20210521", target_scores=-0.001238845659379595 - ) + run_baseline("resnet50-msceleb-arcface-20210521", target_scores=-0.9628566738931277) def test_gabor_graph(): @@ -206,7 +201,7 @@ def test_afffe(): @is_library_available("torch") def test_iresnet34(): run_baseline( - "iresnet34", target_scores=-0.0003085132478504171, + "iresnet34", target_scores=-0.8302991105719331, ) @@ -214,7 +209,7 @@ def test_iresnet34(): @is_library_available("torch") def test_iresnet50(): run_baseline( - "iresnet50", target_scores=-0.0013965432856760662, + "iresnet50", target_scores=-0.8016123867448196, ) @@ -222,11 +217,11 @@ def test_iresnet50(): @is_library_available("torch") def test_iresnet100(): run_baseline( - "iresnet100", target_scores=-0.0002386926047015514, + "iresnet100", target_scores=-0.8541905958816157, ) @pytest.mark.slow @is_library_available("cv2") def test_vgg16_oxford(): - run_baseline("vgg16-oxford", target_scores=-0.0019032474437553626) + run_baseline("vgg16-oxford", target_scores=-0.4142390683487125) diff --git a/bob/bio/face/utils.py b/bob/bio/face/utils.py index 4bea8b40871753faccadc5c8e2b17096049268c5..1185e61c9663aaa9b8a2e75e51116140e406d0d5 100644 --- a/bob/bio/face/utils.py +++ b/bob/bio/face/utils.py @@ -1,7 +1,7 @@ import logging from .preprocessor import FaceCrop -from .preprocessor import MultiFaceCrop +from .preprocessor import MultiFaceCrop, BoundingBoxAnnotatorCrop from .preprocessor import Scale from bob.pipelines import wrap from sklearn.pipeline import make_pipeline @@ -348,14 +348,26 @@ def face_crop_solver( annotator=annotator, ) else: - return FaceCrop( - cropped_image_size=cropped_image_size, - cropped_positions=cropped_positions, - color_channel=color_channel, - fixed_positions=fixed_positions, - dtype=dtype, - annotator=annotator, - ) + # If the eyes annotations are provides + if "topleft" in cropped_positions or "bottomright" in cropped_positions: + return BoundingBoxAnnotatorCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel=color_channel, + fixed_positions=fixed_positions, + dtype=dtype, + annotator=annotator, + ) + + else: + return FaceCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel=color_channel, + fixed_positions=fixed_positions, + dtype=dtype, + annotator=annotator, + ) def get_default_cropped_positions(mode, cropped_image_size, annotation_type): diff --git a/doc/implemented.rst b/doc/implemented.rst index 7d7eca48710c53298d157b2c6ad226cf9ce07d0c..72a8c84a68bf3b38843bd437668b2c6a7b8125a9 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -87,6 +87,8 @@ Image Preprocessors .. autosummary:: bob.bio.face.preprocessor.Base bob.bio.face.preprocessor.FaceCrop + bob.bio.face.preprocessor.MultiFaceCrop + bob.bio.face.preprocessor.BoundingBoxAnnotatorCrop bob.bio.face.preprocessor.TanTriggs bob.bio.face.preprocessor.HistogramEqualization