From 34209d05fef0e574906f24fdff37975f1fb40e80 Mon Sep 17 00:00:00 2001 From: Olegs NIKISINS <onikisins@italix03.idiap.ch> Date: Fri, 13 Oct 2017 14:06:21 +0200 Subject: [PATCH] Added a local face detection option to the VideoFaceCrop class --- .../config/preprocessor/video_face_crop.py | 31 +++++++ bob/pad/face/preprocessor/VideoFaceCrop.py | 15 ++++ bob/pad/face/utils/__init__.py | 0 bob/pad/face/utils/face_detection_utils.py | 87 +++++++++++++++++++ setup.py | 1 + 5 files changed, 134 insertions(+) create mode 100644 bob/pad/face/config/preprocessor/video_face_crop.py create mode 100644 bob/pad/face/utils/__init__.py create mode 100644 bob/pad/face/utils/face_detection_utils.py diff --git a/bob/pad/face/config/preprocessor/video_face_crop.py b/bob/pad/face/config/preprocessor/video_face_crop.py new file mode 100644 index 00000000..ad7f963b --- /dev/null +++ b/bob/pad/face/config/preprocessor/video_face_crop.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +from bob.pad.face.preprocessor import VideoFaceCrop + +#======================================================================================= +# Define instances here: + +CROPPED_IMAGE_SIZE = (64, 64) # The size of the resulting face +CROPPED_POSITIONS = {'topleft' : (0,0) , 'bottomright' : CROPPED_IMAGE_SIZE} +FIXED_POSITIONS = None +MASK_SIGMA = None # The sigma for random values areas outside image +MASK_NEIGHBORS = 5 # The number of neighbors to consider while extrapolating +MASK_SEED = None # The seed for generating random values during extrapolation +CHECK_FACE_SIZE_FLAG = True # Check the size of the face +MIN_FACE_SIZE = 50 +USE_LOCAL_CROPPER_FLAG = True # Use the local face cropping class (identical to Ivana's paper) +RGB_OUTPUT_FLAG = True # Return RGB cropped face using local cropper +DETECT_FACES_FLAG = True # find annotations locally replacing the database annotations + +preprocessor_rgb_face_detect = VideoFaceCrop(cropped_image_size = CROPPED_IMAGE_SIZE, + cropped_positions = CROPPED_POSITIONS, + fixed_positions = FIXED_POSITIONS, + mask_sigma = MASK_SIGMA, + mask_neighbors = MASK_NEIGHBORS, + mask_seed = None, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE, + use_local_cropper_flag = USE_LOCAL_CROPPER_FLAG, + rgb_output_flag = RGB_OUTPUT_FLAG, + detect_faces_flag = DETECT_FACES_FLAG) diff --git a/bob/pad/face/preprocessor/VideoFaceCrop.py b/bob/pad/face/preprocessor/VideoFaceCrop.py index fd975676..97024f8d 100644 --- a/bob/pad/face/preprocessor/VideoFaceCrop.py +++ b/bob/pad/face/preprocessor/VideoFaceCrop.py @@ -18,6 +18,8 @@ import numpy as np from bob.pad.face.preprocessor.ImageFaceCrop import ImageFaceCrop +from ..utils.face_detection_utils import detect_faces_in_video + #============================================================================== # Main body: @@ -76,6 +78,12 @@ class VideoFaceCrop(Preprocessor, object): returned. This flag is only valid when ``use_local_cropper_flag = True``. Default: ``False``. + ``detect_faces_flag`` : :py:class:`bool` + If set to ``True`` the facial annotations will be generated using + face detection. Otherwise, annotations of the database are used for + cropping. + Default: ``False``. + ``kwargs`` Remaining keyword parameters passed to the Base constructor, such as ``color_channel`` or ``dtype``. """ @@ -92,6 +100,7 @@ class VideoFaceCrop(Preprocessor, object): min_face_size = 50, use_local_cropper_flag = False, rgb_output_flag = False, + detect_faces_flag = False, **kwargs): super(VideoFaceCrop, self).__init__(cropped_image_size = cropped_image_size, @@ -104,6 +113,7 @@ class VideoFaceCrop(Preprocessor, object): min_face_size = min_face_size, use_local_cropper_flag = use_local_cropper_flag, rgb_output_flag = rgb_output_flag, + detect_faces_flag = detect_faces_flag, **kwargs) self.cropped_image_size = cropped_image_size @@ -116,6 +126,7 @@ class VideoFaceCrop(Preprocessor, object): self.min_face_size = min_face_size self.use_local_cropper_flag = use_local_cropper_flag self.rgb_output_flag = rgb_output_flag + self.detect_faces_flag = detect_faces_flag # Save also the data stored in the kwargs: for (k, v) in kwargs.items(): @@ -263,6 +274,10 @@ class VideoFaceCrop(Preprocessor, object): Cropped faces stored in the FrameContainer. """ + if self.detect_faces_flag: + + annotations = detect_faces_in_video(frames) + if len(frames) != len(annotations): # if some annotations are missing ## Select only annotated frames: diff --git a/bob/pad/face/utils/__init__.py b/bob/pad/face/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bob/pad/face/utils/face_detection_utils.py b/bob/pad/face/utils/face_detection_utils.py new file mode 100644 index 00000000..2214958d --- /dev/null +++ b/bob/pad/face/utils/face_detection_utils.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +""" +This file contains face detection utils. +""" +#============================================================================== +# Import here: + +import bob.ip.dlib # for face detection functionality + + +#============================================================================== +def detect_face_in_image(image): + """ + This function detects a face in the input image. + + **Parameters:** + + ``image`` : 3D :py:class:`numpy.ndarray` + A color image to detect the face in. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing annotations of the face bounding box. + Dictionary must be as follows ``{'topleft': (row, col), 'bottomright': (row, col)}``. + """ + + bounding_box, _ = bob.ip.dlib.FaceDetector().detect_single_face(image) + + annotations = {} + + if bounding_box is not None: + + annotations['topleft'] = bounding_box.topleft + + annotations['bottomright'] = bounding_box.bottomright + + else: + + annotations['topleft'] = (0, 0) + + annotations['bottomright'] = (0, 0) + + return annotations + + +#============================================================================== +def detect_faces_in_video(frame_container): + """ + This function detects a face in each farme of the input video. + + **Parameters:** + + ``frame_container`` : FrameContainer + FrameContainer containing the frames data. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing the annotations for each frame in the video. + Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. + Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` + is the dictionary defining the coordinates of the face bounding box in frame N. + """ + + annotations = {} + + for idx, frame in enumerate(frame_container): + + image = frame[1] + + frame_annotations = detect_face_in_image(image) + + annotations[str(idx)] = frame_annotations + + return annotations + + + + + + + + + diff --git a/setup.py b/setup.py index 228505a4..0a0578e9 100644 --- a/setup.py +++ b/setup.py @@ -103,6 +103,7 @@ setup( # registered preprocessors: 'bob.pad.preprocessor': [ 'empty-preprocessor = bob.pad.face.config.preprocessor.filename:empty_preprocessor', # no preprocessing + 'preprocessor-rgb-face-detect = bob.pad.face.config.preprocessor.video_face_crop:preprocessor_rgb_face_detect', # detect faces locally replacing database annotations ], # registered extractors: -- GitLab