diff --git a/bob/pad/face/config/preprocessor/video_face_crop.py b/bob/pad/face/config/preprocessor/video_face_crop.py new file mode 100644 index 0000000000000000000000000000000000000000..ad7f963b0fdb8f92319c02ad8210c899a44dbccc --- /dev/null +++ b/bob/pad/face/config/preprocessor/video_face_crop.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python2 +# -*- coding: utf-8 -*- + +from bob.pad.face.preprocessor import VideoFaceCrop + +#======================================================================================= +# Define instances here: + +CROPPED_IMAGE_SIZE = (64, 64) # The size of the resulting face +CROPPED_POSITIONS = {'topleft' : (0,0) , 'bottomright' : CROPPED_IMAGE_SIZE} +FIXED_POSITIONS = None +MASK_SIGMA = None # The sigma for random values areas outside image +MASK_NEIGHBORS = 5 # The number of neighbors to consider while extrapolating +MASK_SEED = None # The seed for generating random values during extrapolation +CHECK_FACE_SIZE_FLAG = True # Check the size of the face +MIN_FACE_SIZE = 50 +USE_LOCAL_CROPPER_FLAG = True # Use the local face cropping class (identical to Ivana's paper) +RGB_OUTPUT_FLAG = True # Return RGB cropped face using local cropper +DETECT_FACES_FLAG = True # find annotations locally replacing the database annotations + +preprocessor_rgb_face_detect = VideoFaceCrop(cropped_image_size = CROPPED_IMAGE_SIZE, + cropped_positions = CROPPED_POSITIONS, + fixed_positions = FIXED_POSITIONS, + mask_sigma = MASK_SIGMA, + mask_neighbors = MASK_NEIGHBORS, + mask_seed = None, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE, + use_local_cropper_flag = USE_LOCAL_CROPPER_FLAG, + rgb_output_flag = RGB_OUTPUT_FLAG, + detect_faces_flag = DETECT_FACES_FLAG) diff --git a/bob/pad/face/preprocessor/VideoFaceCrop.py b/bob/pad/face/preprocessor/VideoFaceCrop.py index fd9756766aea41ad1de85a1f9ae398ac2809f7f3..97024f8daa0b7f282206025b4e7643020f24bdb1 100644 --- a/bob/pad/face/preprocessor/VideoFaceCrop.py +++ b/bob/pad/face/preprocessor/VideoFaceCrop.py @@ -18,6 +18,8 @@ import numpy as np from bob.pad.face.preprocessor.ImageFaceCrop import ImageFaceCrop +from ..utils.face_detection_utils import detect_faces_in_video + #============================================================================== # Main body: @@ -76,6 +78,12 @@ class VideoFaceCrop(Preprocessor, object): returned. This flag is only valid when ``use_local_cropper_flag = True``. Default: ``False``. + ``detect_faces_flag`` : :py:class:`bool` + If set to ``True`` the facial annotations will be generated using + face detection. Otherwise, annotations of the database are used for + cropping. + Default: ``False``. + ``kwargs`` Remaining keyword parameters passed to the Base constructor, such as ``color_channel`` or ``dtype``. """ @@ -92,6 +100,7 @@ class VideoFaceCrop(Preprocessor, object): min_face_size = 50, use_local_cropper_flag = False, rgb_output_flag = False, + detect_faces_flag = False, **kwargs): super(VideoFaceCrop, self).__init__(cropped_image_size = cropped_image_size, @@ -104,6 +113,7 @@ class VideoFaceCrop(Preprocessor, object): min_face_size = min_face_size, use_local_cropper_flag = use_local_cropper_flag, rgb_output_flag = rgb_output_flag, + detect_faces_flag = detect_faces_flag, **kwargs) self.cropped_image_size = cropped_image_size @@ -116,6 +126,7 @@ class VideoFaceCrop(Preprocessor, object): self.min_face_size = min_face_size self.use_local_cropper_flag = use_local_cropper_flag self.rgb_output_flag = rgb_output_flag + self.detect_faces_flag = detect_faces_flag # Save also the data stored in the kwargs: for (k, v) in kwargs.items(): @@ -263,6 +274,10 @@ class VideoFaceCrop(Preprocessor, object): Cropped faces stored in the FrameContainer. """ + if self.detect_faces_flag: + + annotations = detect_faces_in_video(frames) + if len(frames) != len(annotations): # if some annotations are missing ## Select only annotated frames: diff --git a/bob/pad/face/test/test.py b/bob/pad/face/test/test.py index 160fbdc9600d8ebf60269ea99e33093e90a51b1c..06dcf820fc7a6c2ecdaac9b0f33dc430e0947c8a 100644 --- a/bob/pad/face/test/test.py +++ b/bob/pad/face/test/test.py @@ -29,8 +29,12 @@ from ..extractor import FrameDiffFeatures from ..extractor import VideoLBPHistogram +from ..extractor import VideoQualityMeasure + from ..algorithm import VideoSvmPadAlgorithm +from ..algorithm import VideoGmmPadAlgorithm + import random #============================================================================== @@ -149,6 +153,43 @@ def test_video_face_crop(): assert np.sum(faces[0][1]) == 429158 assert np.sum(faces[-1][1]) == 429158 + #========================================================================== + # test another configuration of the VideoFaceCrop preprocessor: + + CROPPED_IMAGE_SIZE = (64, 64) # The size of the resulting face + CROPPED_POSITIONS = {'topleft' : (0,0) , 'bottomright' : CROPPED_IMAGE_SIZE} + FIXED_POSITIONS = None + MASK_SIGMA = None # The sigma for random values areas outside image + MASK_NEIGHBORS = 5 # The number of neighbors to consider while extrapolating + MASK_SEED = None # The seed for generating random values during extrapolation + CHECK_FACE_SIZE_FLAG = True # Check the size of the face + MIN_FACE_SIZE = 50 + USE_LOCAL_CROPPER_FLAG = True # Use the local face cropping class (identical to Ivana's paper) + RGB_OUTPUT_FLAG = True # Return RGB cropped face using local cropper + DETECT_FACES_FLAG = True # find annotations locally replacing the database annotations + + preprocessor = VideoFaceCrop(cropped_image_size = CROPPED_IMAGE_SIZE, + cropped_positions = CROPPED_POSITIONS, + fixed_positions = FIXED_POSITIONS, + mask_sigma = MASK_SIGMA, + mask_neighbors = MASK_NEIGHBORS, + mask_seed = None, + check_face_size_flag = CHECK_FACE_SIZE_FLAG, + min_face_size = MIN_FACE_SIZE, + use_local_cropper_flag = USE_LOCAL_CROPPER_FLAG, + rgb_output_flag = RGB_OUTPUT_FLAG, + detect_faces_flag = DETECT_FACES_FLAG) + + video, _ = convert_image_to_video_data(image, annotations, 3) + + faces = preprocessor(frames = video, annotations = annotations) + + assert len(faces) == 3 + assert faces[0][1].shape == (3, 64, 64) + assert faces[-1][1].shape == (3, 64, 64) + assert np.sum(faces[0][1]) == 1253048 + assert np.sum(faces[-1][1]) == 1253048 + #============================================================================== def test_frame_difference(): @@ -261,6 +302,34 @@ def test_video_lbp_histogram(): assert (lbp_histograms[0][1][-1] - 0.031737773152965658) < 0.000001 +#============================================================================== +def test_video_quality_measure(): + """ + Test VideoQualityMeasure extractor. + """ + + image = load(datafile('test_image.png', 'bob.pad.face.test')) + annotations = {'topleft': (95, 155), 'bottomright': (215, 265)} + + video, annotations = convert_image_to_video_data(image, annotations, 2) + + GALBALLY=True + MSU=True + DTYPE=None + + extractor = VideoQualityMeasure(galbally=GALBALLY, + msu=MSU, + dtype=DTYPE) + + features = extractor(video) + + assert len(features) == 2 + assert len(features[0][1]) == 139 + assert (features[0][1]==features[-1][1]).all() + assert (features[0][1][0] - 2.7748559659812599e-05) < 0.000001 + assert (features[0][1][-1] - 0.16410418866596271) < 0.000001 + + #============================================================================== def convert_array_to_list_of_frame_cont(data): """ @@ -350,6 +419,57 @@ def test_video_svm_pad_algorithm(): assert precision > 0.99 +#============================================================================== +def test_video_gmm_pad_algorithm(): + """ + Test the VideoGmmPadAlgorithm algorithm. + """ + + random.seed(7) + + N = 1000 + mu = 1 + sigma = 1 + real_array = np.transpose( np.vstack([[random.gauss(mu, sigma) for _ in range(N)], [random.gauss(mu, sigma) for _ in range(N)]]) ) + + mu = 5 + sigma = 1 + attack_array = np.transpose( np.vstack([[random.gauss(mu, sigma) for _ in range(N)], [random.gauss(mu, sigma) for _ in range(N)]]) ) + + real = convert_array_to_list_of_frame_cont(real_array) + + N_COMPONENTS = 1 + RANDOM_STATE = 3 + FRAME_LEVEL_SCORES_FLAG = True + + algorithm = VideoGmmPadAlgorithm(n_components = N_COMPONENTS, + random_state = RANDOM_STATE, + frame_level_scores_flag = FRAME_LEVEL_SCORES_FLAG) + + # training_features[0] - training features for the REAL class. + real_array_converted = algorithm.convert_list_of_frame_cont_to_array(real) # output is array + + assert (real_array == real_array_converted).all() + + # Train the GMM machine and get normalizers: + machine, features_mean, features_std = algorithm.train_gmm(real = real_array_converted, + n_components = algorithm.n_components, + random_state = algorithm.random_state) + + algorithm.machine = machine + + algorithm.features_mean = features_mean + + algorithm.features_std = features_std + + scores_real = algorithm.project(real_array_converted) + + scores_attack = algorithm.project(attack_array) + + assert (np.min(scores_real) + 7.9423798970985917) < 0.000001 + assert (np.max(scores_real) + 1.8380480068281055) < 0.000001 + assert (np.min(scores_attack) + 38.831260843070098) < 0.000001 + assert (np.max(scores_attack) + 5.3633030621521272) < 0.000001 diff --git a/bob/pad/face/utils/__init__.py b/bob/pad/face/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/pad/face/utils/face_detection_utils.py b/bob/pad/face/utils/face_detection_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2214958d80a15c7730517b5a82b49d52ab4a0abe --- /dev/null +++ b/bob/pad/face/utils/face_detection_utils.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +""" +This file contains face detection utils. +""" +#============================================================================== +# Import here: + +import bob.ip.dlib # for face detection functionality + + +#============================================================================== +def detect_face_in_image(image): + """ + This function detects a face in the input image. + + **Parameters:** + + ``image`` : 3D :py:class:`numpy.ndarray` + A color image to detect the face in. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing annotations of the face bounding box. + Dictionary must be as follows ``{'topleft': (row, col), 'bottomright': (row, col)}``. + """ + + bounding_box, _ = bob.ip.dlib.FaceDetector().detect_single_face(image) + + annotations = {} + + if bounding_box is not None: + + annotations['topleft'] = bounding_box.topleft + + annotations['bottomright'] = bounding_box.bottomright + + else: + + annotations['topleft'] = (0, 0) + + annotations['bottomright'] = (0, 0) + + return annotations + + +#============================================================================== +def detect_faces_in_video(frame_container): + """ + This function detects a face in each farme of the input video. + + **Parameters:** + + ``frame_container`` : FrameContainer + FrameContainer containing the frames data. + + **Returns:** + + ``annotations`` : :py:class:`dict` + A dictionary containing the annotations for each frame in the video. + Dictionary structure: ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``. + Where ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}`` + is the dictionary defining the coordinates of the face bounding box in frame N. + """ + + annotations = {} + + for idx, frame in enumerate(frame_container): + + image = frame[1] + + frame_annotations = detect_face_in_image(image) + + annotations[str(idx)] = frame_annotations + + return annotations + + + + + + + + + diff --git a/requirements.txt b/requirements.txt index 4eff3ad6e2749acdbb9af39bbd8e6ca322a2f2e1..6692bce0eb1efa0062578f7755eea27928de0e94 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ bob.bio.video bob.io.image bob.ip.color bob.ip.qualitymeasure +bob.ip.dlib bob.learn.libsvm bob.learn.linear scikit-learn diff --git a/setup.py b/setup.py index 228505a442da3ff6ed99c33d09c2c447d25071ec..0a0578e9d4355fcdc12bd181768fb43c0c237ffe 100644 --- a/setup.py +++ b/setup.py @@ -103,6 +103,7 @@ setup( # registered preprocessors: 'bob.pad.preprocessor': [ 'empty-preprocessor = bob.pad.face.config.preprocessor.filename:empty_preprocessor', # no preprocessing + 'preprocessor-rgb-face-detect = bob.pad.face.config.preprocessor.video_face_crop:preprocessor_rgb_face_detect', # detect faces locally replacing database annotations ], # registered extractors: