FaceCrop.py 23.9 KB
Newer Older
1 2
import bob.ip.base
import numpy
3
import logging
4 5

from .Base import Base
6
from sklearn.base import TransformerMixin, BaseEstimator
7

8
logger = logging.getLogger("bob.bio.face")
9
from bob.bio.base import load_resource
10
from .Scale import scale
11

12

13 14
class FaceCrop(Base):
    """Crops the face according to the given annotations.
Manuel Günther's avatar
Manuel Günther committed
15

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    This class is designed to perform a geometric normalization of the face based
    on the eye locations, using :py:class:`bob.ip.base.FaceEyesNorm`. Usually,
    when executing the :py:meth:`crop_face` function, the image and the eye
    locations have to be specified. There, the given image will be transformed
    such that the eye locations will be placed at specific locations in the
    resulting image. These locations, as well as the size of the cropped image,
    need to be specified in the constructor of this class, as
    ``cropped_positions`` and ``cropped_image_size``.

    Some image databases do not provide eye locations, but rather bounding boxes.
    This is not a problem at all.
    Simply define the coordinates, where you want your ``cropped_positions`` to
    be in the cropped image, by specifying the same keys in the dictionary that
    will be given as ``annotations`` to the :py:meth:`crop_face` function.

    .. note::

      These locations can even be outside of the cropped image boundary, i.e.,
      when the crop should be smaller than the annotated bounding boxes.

    Sometimes, databases provide pre-cropped faces, where the eyes are located at
    (almost) the same position in all images. Usually, the cropping does not
    conform with the cropping that you like (i.e., image resolution is wrong, or
    too much background information). However, the database does not provide eye
    locations (since they are almost identical for all images). In that case, you
    can specify the ``fixed_positions`` in the constructor, which will be taken
    instead of the ``annotations`` inside the :py:meth:`crop_face` function (in
    which case the ``annotations`` are ignored).

    Sometimes, the crop of the face is outside of the original image boundaries.
    Usually, these pixels will simply be left black, resulting in sharp edges in
    the image. However, some feature extractors do not like these sharp edges. In
    this case, you can set the ``mask_sigma`` to copy pixels from the valid
    border of the image and add random noise (see
    :py:func:`bob.ip.base.extrapolate_mask`).


    Parameters
    ----------

    cropped_image_size : (int, int)
      The resolution of the cropped image, in order (HEIGHT,WIDTH); if not given,
      no face cropping will be performed

    cropped_positions : dict
      The coordinates in the cropped image, where the annotated points should be
      put to. This parameter is a dictionary with usually two elements, e.g.,
      ``{'reye':(RIGHT_EYE_Y, RIGHT_EYE_X) , 'leye':(LEFT_EYE_Y, LEFT_EYE_X)}``.
      However, also other parameters, such as ``{'topleft' : ..., 'bottomright' :
      ...}`` are supported, as long as the ``annotations`` in the `__call__`
      function are present.

    fixed_positions : dict or None
      If specified, ignore the annotations from the database and use these fixed
      positions throughout.

    mask_sigma : float or None
      Fill the area outside of image boundaries with random pixels from the
      border, by adding noise to the pixel values. To disable extrapolation, set
      this value to ``None``. To disable adding random noise, set it to a
      negative value or 0.

    mask_neighbors : int
      The number of neighbors used during mask extrapolation. See
      :py:func:`bob.ip.base.extrapolate_mask` for details.

    mask_seed : int or None
      The random seed to apply for mask extrapolation.

      .. warning::

         When run in parallel, the same random seed will be applied to all
         parallel processes. Hence, results of parallel execution will differ
         from the results in serial execution.

    allow_upside_down_normalized_faces: bool, optional
      If ``False`` (default), a ValueError is raised when normalized faces are going to be
      upside down compared to input image. This allows you to catch wrong annotations in
      your database easily. If you are sure about your input, you can set this flag to
      ``True``.

    annotator : :any:`bob.bio.base.annotator.Annotator`
      If provided, the annotator will be used if the required annotations are
      missing.

    kwargs
      Remaining keyword parameters passed to the :py:class:`Base` constructor,
      such as ``color_channel`` or ``dtype``.
    """
105

106
    def __init__(
107
        self,
108 109 110 111 112 113 114 115
        cropped_image_size,
        cropped_positions,
        fixed_positions=None,
        mask_sigma=None,
        mask_neighbors=5,
        mask_seed=None,
        annotator=None,
        allow_upside_down_normalized_faces=False,
116
        **kwargs,
117
    ):
118
        # call base class constructor
119 120
        Base.__init__(self, **kwargs)

121 122 123
        if isinstance(cropped_image_size, int):
            cropped_image_size = (cropped_image_size, cropped_image_size)

124 125 126 127 128 129 130 131 132 133 134 135 136
        # check parameters
        assert len(cropped_positions) == 2
        if fixed_positions:
            assert len(fixed_positions) == 2

        # copy parameters
        self.cropped_image_size = cropped_image_size
        self.cropped_positions = cropped_positions
        self.cropped_keys = sorted(cropped_positions.keys())
        self.fixed_positions = fixed_positions
        self.mask_sigma = mask_sigma
        self.mask_neighbors = mask_neighbors
        self.mask_seed = mask_seed
137 138 139
        if isinstance(annotator, str):
            annotator = load_resource(annotator, "annotator")
        self.annotator = annotator
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
        self.allow_upside_down_normalized_faces = allow_upside_down_normalized_faces

        # create objects required for face cropping
        self.cropper = bob.ip.base.FaceEyesNorm(
            crop_size=cropped_image_size,
            right_eye=cropped_positions[self.cropped_keys[0]],
            left_eye=cropped_positions[self.cropped_keys[1]],
        )
        self.cropped_mask = numpy.ndarray(cropped_image_size, numpy.bool)

        self._init_non_pickables()

    def _init_non_pickables(self):
        self.mask_rng = (
            bob.core.random.mt19937(self.mask_seed)
            if self.mask_seed is not None
            else bob.core.random.mt19937()
        )
        self.cropper = bob.ip.base.FaceEyesNorm(
            crop_size=self.cropped_image_size,
            right_eye=self.cropped_positions[self.cropped_keys[0]],
            left_eye=self.cropped_positions[self.cropped_keys[1]],
        )

    def crop_face(self, image, annotations=None):
        """Crops the face.
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187
        Executes the face cropping on the given image and returns the cropped
        version of it.

        Parameters
        ----------
        image : 2D :py:class:`numpy.ndarray`
            The face image to be processed.

        annotations : dict or ``None``
            The annotations that fit to the given image. ``None`` is only accepted,
            when ``fixed_positions`` were specified in the constructor.

        Returns
        -------
        face : 2D :py:class:`numpy.ndarray` (float)
            The cropped face.

        Raises
        ------
        ValueError
            If the annotations is None.
        """
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
        if self.fixed_positions is not None:
            annotations = self.fixed_positions
        if annotations is None:
            raise ValueError(
                "Cannot perform image cropping since annotations are not given, and "
                "no fixed annotations are specified."
            )

        assert isinstance(annotations, dict)
        if not all(k in annotations for k in self.cropped_keys):
            raise ValueError(
                "At least one of the expected annotations '%s' are not given "
                "in '%s'." % (self.cropped_keys, annotations.keys())
            )

        reye = self.cropped_keys[0]
        leye = self.cropped_keys[1]
        reye_desired_width = self.cropped_positions[reye][1]
        leye_desired_width = self.cropped_positions[leye][1]
        right_eye = annotations[reye]
        left_eye = annotations[leye]
        if not self.allow_upside_down_normalized_faces:
            if (
                reye_desired_width > leye_desired_width and right_eye[1] < left_eye[1]
            ) or (
                reye_desired_width < leye_desired_width and right_eye[1] > left_eye[1]
            ):
                raise ValueError(
                    "Looks like {leye} and {reye} in annotations: {annot} are swapped. "
                    "This will make the normalized face upside down (compared to the original "
                    "image). Most probably your annotations are wrong. Otherwise, you can set "
                    "the ``allow_upside_down_normalized_faces`` parameter to "
                    "True.".format(leye=leye, reye=reye, annot=annotations)
                )

        # create output
        mask = numpy.ones(image.shape[-2:], dtype=numpy.bool)
        shape = (
            self.cropped_image_size
            if image.ndim == 2
            else [image.shape[0]] + list(self.cropped_image_size)
        )
        cropped_image = numpy.zeros(shape)
        self.cropped_mask[:] = False

        # perform the cropping
        self.cropper(
            image,  # input image
            mask,  # full input mask
            cropped_image,  # cropped image
            self.cropped_mask,  # cropped mask
            # position of first annotation, usually right eye
            right_eye=right_eye,
            # position of second annotation, usually left eye
            left_eye=left_eye,
        )

        if self.mask_sigma is not None:
            # extrapolate the mask so that pixels outside of the image original image
            # region are filled with border pixels
            if cropped_image.ndim == 2:
                bob.ip.base.extrapolate_mask(
                    self.cropped_mask,
                    cropped_image,
                    self.mask_sigma,
                    self.mask_neighbors,
                    self.mask_rng,
                )
            else:
                [
                    bob.ip.base.extrapolate_mask(
                        self.cropped_mask,
                        cropped_image_channel,
                        self.mask_sigma,
                        self.mask_neighbors,
                        self.mask_rng,
                    )
                    for cropped_image_channel in cropped_image
                ]

        return cropped_image

    def is_annotations_valid(self, annotations):
        if not annotations:
            return False
        # check if the required keys are available
        return all(key in annotations for key in self.cropped_keys)

276
    def transform(self, X, annotations=None):
277
        """Aligns the given image according to the given annotations.
Manuel Günther's avatar
Manuel Günther committed
278

279 280 281 282
        First, the desired color channel is extracted from the given image.
        Afterward, the face is cropped, according to the given ``annotations`` (or
        to ``fixed_positions``, see :py:meth:`crop_face`). Finally, the resulting
        face is converted to the desired data type.
Manuel Günther's avatar
Manuel Günther committed
283

284 285 286 287 288 289
        Parameters
        ----------
        image : 2D or 3D :py:class:`numpy.ndarray`
            The face image to be processed.
        annotations : dict or ``None``
            The annotations that fit to the given image.
Manuel Günther's avatar
Manuel Günther committed
290

291 292 293 294 295
        Returns
        -------
        face : 2D :py:class:`numpy.ndarray`
            The cropped face.
        """
296

297
        def _crop(image, annot):
298 299 300 301 302 303
            # if annotations are missing and cannot do anything else return None.
            if (
                not self.is_annotations_valid(annot)
                and not self.fixed_positions
                and self.annotator is None
            ):
304
                logger.warn(
305 306 307
                    "Cannot crop face without valid annotations or "
                    "fixed_positions or an annotator. Returning None. "
                    "The annotations were: {}".format(annot)
308 309 310
                )
                return None

311
            # convert to the desired color channel
312
            image = self.change_color_channel(image)
313 314 315 316 317 318 319

            # annotate the image if annotations are missing
            if (
                not self.is_annotations_valid(annot)
                and not self.fixed_positions
                and self.annotator is not None
            ):
320
                annot = self.annotator([image], annotations=[annot])[0]
321 322 323 324 325 326 327 328 329 330
                if not self.is_annotations_valid(annot):
                    logger.warn(
                        "The annotator failed and the annot are missing too"
                        ". Returning None."
                    )
                    return None

            # crop face
            return self.data_type(self.crop_face(image, annot))

331 332
        if annotations is None:
            return [_crop(data, None) for data in X]
333
        else:
334
            return [_crop(data, annot) for data, annot in zip(X, annotations)]
335 336

    def __getstate__(self):
337
        d = self.__dict__.copy()
338 339 340 341 342 343 344
        d.pop("mask_rng")
        d.pop("cropper")
        return d

    def __setstate__(self, d):
        self.__dict__ = d
        self._init_non_pickables()
345 346


347
class MultiFaceCrop(Base):
348
    """Wraps around FaceCrop to enable a dynamical cropper that can handle several annotation types.
349 350 351 352 353 354 355 356 357 358 359 360 361
    Initialization and usage is similar to the FaceCrop, but the main difference here is that one specifies
    a *list* of cropped_positions, and optionally a *list* of associated fixed positions.

    For each set of cropped_positions in the list, a new FaceCrop will be instanciated that handles this
    exact set of annotations.
    When calling the *transform* method, the MultiFaceCrop matches each sample to its associated cropper
    based on the received annotation, then performs the cropping of each subset, and finally gathers the results.

    In case of ambiguity (when no cropper is a match for the received annotations, or when several croppers
    match the received annotations), raises a ValueError.

    """

362 363 364 365
    def __init__(
        self,
        cropped_image_size,
        cropped_positions_list,
366
        fixed_positions_list=None,
367 368 369 370 371 372 373
        mask_sigma=None,
        mask_neighbors=5,
        mask_seed=None,
        annotator=None,
        allow_upside_down_normalized_faces=False,
        **kwargs,
    ):
374
        # Check parameters
375
        assert isinstance(cropped_positions_list, list)
376 377 378
        if fixed_positions_list is None:
            fixed_positions_list = [None] * len(cropped_positions_list)
        assert isinstance(fixed_positions_list, list)
379

380 381 382 383 384 385 386 387 388 389 390 391 392
        # copy parameters (sklearn convention : each explicit __init__ argument *has* to become an attribute of the estimator)
        self.cropped_image_size = cropped_image_size
        self.cropped_positions_list = cropped_positions_list
        self.fixed_positions_list = fixed_positions_list
        self.mask_sigma = mask_sigma
        self.mask_neighbors = mask_neighbors
        self.mask_seed = mask_seed
        if isinstance(annotator, str):
            annotator = load_resource(annotator, "annotator")
        self.annotator = annotator
        self.allow_upside_down_normalized_faces = allow_upside_down_normalized_faces

        # Instantiate individual croppers
393
        self.croppers = {}
394
        for cropped_positions, fixed_positions in zip(
395
            self.cropped_positions_list, self.fixed_positions_list
396
        ):
397 398
            assert len(cropped_positions) == 2
            self.croppers[tuple(cropped_positions)] = FaceCrop(
399
                self.cropped_image_size,
400 401
                cropped_positions,
                fixed_positions,
402 403 404 405 406
                self.mask_sigma,
                self.mask_neighbors,
                self.mask_seed,
                self.annotator,
                self.allow_upside_down_normalized_faces,
407 408 409 410 411 412 413
                **kwargs,
            )

    def transform(self, X, annotations=None):
        subsets = {k: {"X": [], "annotations": []} for k in self.croppers.keys()}

        def assign(X_elem, annotations_elem):
414 415 416
            # Assign a single sample to its matching cropper

            # Compare the received annotations keys to the cropped_positions keys of each cropper
417 418 419 420 421 422
            valid_keys = [
                k
                for k in self.croppers.keys()
                if set(k).issubset(set(annotations_elem.keys()))
            ]

423 424 425 426 427 428 429 430 431 432 433 434 435 436
            # Ensure exactly one cropper is a match
            if len(valid_keys) != 1:
                raise ValueError(
                    "Cropper selection from the annotations is ambiguous ({} valid croppers)".format(
                        len(valid_keys)
                    )
                )
            else:
                # Assign the sample to this particuler cropper
                cropper_key = valid_keys[0]
                subsets[cropper_key]["X"].append(X_elem)
                subsets[cropper_key]["annotations"].append(annotations_elem)

        # Assign each sample to its matching cropper
437 438 439
        for X_elem, annotations_elem in zip(X, annotations):
            assign(X_elem, annotations_elem)

440
        # Call each FaceCrop on its sample subset
441 442 443 444
        transformed_subsets = {
            k: self.croppers[k].transform(**subsets[k]) for k in subsets.keys()
        }

445
        # Gather the results
446 447 448 449
        return [item for sublist in transformed_subsets.values() for item in sublist]

    def fit(self, X, y=None):
        return self
450 451 452 453


class BoundingBoxAnnotatorCrop(Base):
    """
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
    This face cropper uses a 2 stage strategy to crop and align faces in case `annotation_type` has a bounding-box.
    In the first stage, it crops the face using the {`topleft`, `bottomright`} parameters and expands them using a `margin` factor.
    In the second stage, it uses the `annotator` to estimate {`leye` and `reye`} to make the crop using :py:class:`bob.ip.base.FaceEyesNorm`.
    In case the annotator doesn't work, it returnds the cropped face using the `bounding-box` coordinates.


    .. warning::
            `cropped_positions` must be set with `leye`, `reye`, `topleft` and `bottomright` positions


    Parameters
    ----------

    cropped_image_size : (int, int)
      The resolution of the cropped image, in order (HEIGHT,WIDTH); if not given,
      no face cropping will be performed

    cropped_positions : dict
      The coordinates in the cropped image, where the annotated points should be
      put to. This parameter is a dictionary with usually two elements, e.g.,
      ``{'reye':(RIGHT_EYE_Y, RIGHT_EYE_X) , 'leye':(LEFT_EYE_Y, LEFT_EYE_X)}``.
      However, also other parameters, such as ``{'topleft' : ..., 'bottomright' :
      ...}`` are supported, as long as the ``annotations`` in the `__call__`
      function are present.

    fixed_positions : dict or None
      If specified, ignore the annotations from the database and use these fixed
      positions throughout.

    mask_sigma : float or None
      Fill the area outside of image boundaries with random pixels from the
      border, by adding noise to the pixel values. To disable extrapolation, set
      this value to ``None``. To disable adding random noise, set it to a
      negative value or 0.

    mask_neighbors : int
      The number of neighbors used during mask extrapolation. See
      :py:func:`bob.ip.base.extrapolate_mask` for details.

    mask_seed : int or None
      The random seed to apply for mask extrapolation.

      .. warning::

         When run in parallel, the same random seed will be applied to all
         parallel processes. Hence, results of parallel execution will differ
         from the results in serial execution.

    allow_upside_down_normalized_faces: bool, optional
      If ``False`` (default), a ValueError is raised when normalized faces are going to be
      upside down compared to input image. This allows you to catch wrong annotations in
      your database easily. If you are sure about your input, you can set this flag to
      ``True``.

    annotator : :any:`bob.bio.base.annotator.Annotator`
      If provided, the annotator will be used if the required annotations are
      missing.

    margin: float
       The cropped face will be scaled to this factor (proportionally to the bouding-box width and height). Default to `0.5`.

515 516 517 518 519 520 521

    """

    def __init__(
        self,
        cropped_image_size,
        cropped_positions,
522
        annotator,
523 524 525 526 527
        mask_sigma=None,
        mask_neighbors=5,
        mask_seed=None,
        allow_upside_down_normalized_faces=False,
        color_channel="rgb",
528
        margin=0.5,
529 530 531
        **kwargs,
    ):

532 533 534 535 536 537 538
        if annotator is None:
            raise ValueError(f"A valid annotator needs to be set.")

        if isinstance(annotator, str):
            annotator = load_resource(annotator, "annotator")
        self.annotator = annotator

539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
        # We need to have the four coordinates
        assert "leye" in cropped_positions
        assert "reye" in cropped_positions
        assert "topleft" in cropped_positions
        assert "bottomright" in cropped_positions

        # copy parameters (sklearn convention : each explicit __init__ argument *has* to become an attribute of the estimator)
        self.cropped_image_size = cropped_image_size
        self.cropped_positions = cropped_positions
        self.mask_sigma = mask_sigma
        self.mask_neighbors = mask_neighbors
        self.mask_seed = mask_seed
        self.allow_upside_down_normalized_faces = allow_upside_down_normalized_faces
        self.color_channel = color_channel

        ## Eyes cropper
        eyes_position = dict()
        eyes_position["leye"] = cropped_positions["leye"]
        eyes_position["reye"] = cropped_positions["reye"]
        self.eyes_cropper = FaceCrop(
            cropped_image_size,
            eyes_position,
            fixed_positions=None,
            mask_sigma=mask_sigma,
            mask_neighbors=mask_neighbors,
            mask_seed=mask_seed,
            allow_upside_down_normalized_faces=allow_upside_down_normalized_faces,
            color_channel=color_channel,
        )
568
        self.margin = margin
569 570 571 572 573 574 575 576 577 578 579 580 581

    def transform(self, X, annotations=None):
        faces = []

        for x, annot in zip(X, annotations):

            # If it's grayscaled, expand dims
            if x.ndim == 2:
                logger.warning(
                    "Gray-scaled image. Expanding the channels before detection"
                )
                x = numpy.repeat(numpy.expand_dims(x, 0), 3, axis=0)

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
            top = annot["topleft"][0]
            left = annot["topleft"][1]

            bottom = annot["bottomright"][0]
            right = annot["bottomright"][1]

            width = right - left
            height = bottom - top

            # Expanding the borders
            top_expanded = int(numpy.maximum(top - self.margin * height, 0))
            left_expanded = int(numpy.maximum(left - self.margin * width, 0))

            bottom_expanded = int(
                numpy.minimum(bottom + self.margin * height, x.shape[1])
            )
            right_expanded = int(numpy.minimum(right + self.margin * width, x.shape[2]))

600
            face_crop = x[
601
                :, top_expanded:bottom_expanded, left_expanded:right_expanded,
602 603 604 605 606 607 608 609
            ]

            # get the coordinates with the annotator
            annotator_annotations = self.annotator([face_crop])[0]

            # If nothing was detected OR if the annotations are swaped, return the cropped face
            if (
                annotator_annotations is None
610
                or annotator_annotations["reye"][1] > annotator_annotations["leye"][1]
611
            ):
612
                logger.warning(
613
                    f"Unable to detect face in bounding box. Got : {annotator_annotations}. Cropping will be only based on bounding-box."
614 615
                )

616 617 618
                face_crop = scale(face_crop, self.cropped_image_size)
                faces.append(face_crop)
            else:
619

620 621 622 623 624 625 626 627
                faces.append(
                    self.eyes_cropper.transform([face_crop], [annotator_annotations])[0]
                )

        return faces

    def fit(self, X, y=None):
        return self