From a2b16a9cad9d8ad7531bf61fa40b8375d8bda8a0 Mon Sep 17 00:00:00 2001
From: Olegs NIKISINS <onikisins@italix03.idiap.ch>
Date: Mon, 25 Sep 2017 14:53:56 +0200
Subject: [PATCH] Added a part of VideoSparseCoding preprocessor

---
 .../face/preprocessor/VideoSparseCoding.py    | 549 ++++++++++++++++++
 1 file changed, 549 insertions(+)
 create mode 100644 bob/pad/face/preprocessor/VideoSparseCoding.py

diff --git a/bob/pad/face/preprocessor/VideoSparseCoding.py b/bob/pad/face/preprocessor/VideoSparseCoding.py
new file mode 100644
index 00000000..f4a0e968
--- /dev/null
+++ b/bob/pad/face/preprocessor/VideoSparseCoding.py
@@ -0,0 +1,549 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 22 2017
+
+@author: Olegs Nikisins
+"""
+
+#==============================================================================
+# Import what is needed here:
+
+from bob.bio.base.preprocessor import Preprocessor
+
+import bob.bio.video
+
+import numpy as np
+
+import random
+random.seed(7)
+
+
+#==============================================================================
+class VideoSparseCoding(Preprocessor, object):
+    """
+    This class is designed to compute "feature vectors" for all stacks of facial
+    images using sparse coding. The feature vector is computed for each stack
+    containing ``block_length`` images.
+
+    The maximum number of facial stacks per video is:
+    (``num_of_frames_in_video`` - ``block_length``).
+    However, the number of facial volumes might be less than above, because
+    frames with small faces ( < min_face_size ) are discarded.
+
+    The feature vector is computed as follows............
+
+    **Parameters:**
+
+    ``block_size`` : :py:class:`int`
+        The spatial size of facial patches. Default: 5 .
+
+    ``block_length`` : :py:class:`int`
+        The temporal length of the stack of facial images / number of frames
+        per stack. Default: 10 .
+
+    ``min_face_size`` : :py:class:`int`
+        Discard frames with face of the size less than ``min_face_size``.
+        Default: 50 .
+
+    ``norm_face_size`` : :py:class:`int`
+        The size of the face after normalization. Default: 64 .
+    """
+
+
+    #==========================================================================
+    def __init__(self,
+                 block_size = 5,
+                 block_length = 10,
+                 min_face_size = 50,
+                 norm_face_size = 64,
+                 **kwargs):
+
+        super(VideoSparseCoding, self).__init__(block_size = block_size,
+                                                block_length = block_length,
+                                                min_face_size = min_face_size,
+                                                norm_face_size = norm_face_size)
+
+        self.block_size = block_size
+        self.block_length = block_length
+        self.min_face_size = min_face_size
+        self.norm_face_size = norm_face_size
+
+        self.video_preprocessor = bob.bio.video.preprocessor.Wrapper()
+
+
+    #==========================================================================
+    def crop_norm_face_grayscale(self, image, annotations, face_size):
+        """
+        This function crops the face in the input Gray-scale image given
+        annotations defining the face bounding box. The size of the face is
+        also normalized to the pre-defined dimensions.
+
+        The algorithm is identical to the following paper:
+        "On the Effectiveness of Local Binary Patterns in Face Anti-spoofing"
+
+        **Parameters:**
+
+        ``image`` : 2D :py:class:`numpy.ndarray`
+            Gray-scale input image.
+
+        ``annotations`` : :py:class:`dict`
+            A dictionary containing annotations of the face bounding box.
+            Dictionary must be as follows:
+            ``{'topleft': (row, col), 'bottomright': (row, col)}``
+
+        ``face_size`` : :py:class:`int`
+            The size of the face after normalization.
+
+        **Returns:**
+
+        ``normbbx`` : 2D :py:class:`numpy.ndarray`
+            Cropped facial image of the size (self.face_size, self.face_size).
+        """
+
+        cutframe = image[annotations['topleft'][0]:annotations['bottomright'][0],
+                 annotations['topleft'][1]:annotations['bottomright'][1]]
+
+        tempbbx = np.ndarray((face_size, face_size), 'float64')
+        normbbx = np.ndarray((face_size, face_size), 'uint8')
+        bob.ip.base.scale(cutframe, tempbbx) # normalization
+        tempbbx_ = tempbbx + 0.5
+        tempbbx_ = np.floor(tempbbx_)
+        normbbx = np.cast['uint8'](tempbbx_)
+
+        return normbbx
+
+
+    #==========================================================================
+    def crop_norm_faces_grayscale(self, images, annotations, face_size):
+        """
+        This function crops and normalizes faces in a stack of images given
+        annotations of the face bounding box for the first image in the stack.
+
+        **Parameters:**
+
+        ``images`` : 3D :py:class:`numpy.ndarray`
+            A stack of gray-scale input images. The size of the array is
+            (n_images x n_rows x n_cols).
+
+        ``annotations`` : :py:class:`dict`
+            A dictionary containing annotations of the face bounding box.
+            Dictionary must be as follows:
+            ``{'topleft': (row, col), 'bottomright': (row, col)}``
+
+        ``face_size`` : :py:class:`int`
+            The size of the face after normalization.
+
+        **Returns:**
+
+        ``normbbx`` : 3D :py:class:`numpy.ndarray`
+            A stack of normalized faces.
+        """
+
+        normbbx = []
+
+        for image in images:
+
+            normbbx.append( self.crop_norm_face_grayscale(image, annotations, face_size) )
+
+        normbbx = np.stack(normbbx)
+
+        return normbbx
+
+
+    #==========================================================================
+    def select_all_blocks(self, images, block_size):
+        """
+        Extract all possible 3D blocks from a stack of images.
+
+        ``images`` : 3D :py:class:`numpy.ndarray`
+            A stack of gray-scale input images. The size of the array is
+            (``n_images`` x ``n_rows`` x ``n_cols``).
+
+        ``block_size`` : :py:class:`int`
+            The spatial size of patches. The size of extracted 3D blocks is:
+            (``n_images`` x ``block_size`` x ``block_size``).
+        """
+
+        (_, row_num, col_num) = images.shape
+
+        all_blocks = []
+
+        for row in range(row_num - block_size):
+
+            for col in range(col_num - block_size):
+
+                block = images[:, row:row+block_size, col:col+block_size]
+
+                all_blocks.append( block )
+
+        return all_blocks
+
+
+    #==========================================================================
+    def convert_frame_cont_to_grayscale_array(self, frame_cont):
+        """
+        Convert color video stored in the frame container into 3D array storing
+        gray-scale frames. The dimensions of the output array are:
+        (n_frames x n_rows x n_cols).
+
+        **Parameters:**
+
+        ``frames`` : FrameContainer
+            Video data stored in the FrameContainer, see
+            ``bob.bio.video.utils.FrameContainer`` for further details.
+
+        **Returns:**
+
+        ``result_array`` : 3D :py:class:`numpy.ndarray`
+            A stack of gray-scale frames. The size of the array is
+            (n_frames x n_rows x n_cols).
+        """
+
+        result_array = []
+
+        for frame in frame_cont:
+
+            image = frame[1]
+
+            result_array.append( bob.ip.color.rgb_to_gray(image) )
+
+        result_array = np.stack(result_array)
+
+        return result_array
+
+
+    #==========================================================================
+    def get_all_blocks_from_color_channel(self, video, annotations, block_size, block_length, min_face_size, norm_face_size):
+        """
+        Extract all 3D blocks from facial region of the input 3D array.
+        Input 3D array represents one color channel of the video or a gray-scale
+        video. Blocks are extracted from all 3D facial volumes. Facial volumes
+        overlap with a shift of one frame.
+
+        The size of the facial volume is:
+        (``block_length`` x ``norm_face_size`` x ``norm_face_size``).
+
+        The maximum number of available facial volumes in the video:
+        (``num_of_frames_in_video`` - ``block_length``).
+        However the final number of facial volumes might be less than above,
+        because frames with small faces ( < min_face_size ) are discarded.
+
+        **Parameters:**
+
+        ``video`` : 3D :py:class:`numpy.ndarray`
+            A stack of gray-scale input images. The size of the array is
+            (n_images x n_rows x n_cols).
+
+        ``annotations`` : :py:class:`dict`
+            A dictionary containing the annotations for each frame in the video.
+            Dictionary structure:
+            ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``,
+            where
+            ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}``
+            is the dictionary defining the coordinates of the face bounding
+            box in frame N.
+
+        ``block_size`` : :py:class:`int`
+            The spatial size of facial patches.
+
+        ``block_length`` : :py:class:`int`
+            The temporal length of the stack of facial images / number of frames
+            per stack.
+
+        ``min_face_size`` : :py:class:`int`
+            Discard frames with face of the size less than ``min_face_size``.
+
+        ``norm_face_size`` : :py:class:`int`
+            The size of the face after normalization.
+
+        **Returns:**
+
+        ``all_blocks`` : [[3D :py:class:`numpy.ndarray`]]
+            Internal list contains all possible 3D blocks/volumes extracted from
+            a particular stack of facial images. The dimensions of each 3D block:
+            (block_length x block_size x block_size).
+            The number of possible blocks is: (norm_face_size - block_size)^2.
+
+            The length of the outer list is equal to the number of possible
+            facial stacks in the input video:
+            (``num_of_frames_in_video`` - ``block_length``).
+            However, the final number of facial volumes might be less than above,
+            because frames with small faces ( < min_face_size ) are discarded.
+        """
+
+        annotated_frames = annotations.keys()
+
+        all_blocks = []
+
+        for fn in range(len(video)-block_length):
+
+            if str(fn) in annotated_frames: # process if frame is annotated
+
+                frame_annotations = annotations[str(fn)]
+
+                face_size = np.min(np.array(frame_annotations['bottomright']) - np.array(frame_annotations['topleft']))
+
+                if face_size >= min_face_size: # process is face is large enough
+
+                    # Selected 3D stacks of images. Stack has ``block_length`` images.
+                    stack_of_images = video[fn:fn + block_length, :, :]
+
+                    # 3D stacks of normalized face images.
+                    faces = self.crop_norm_faces_grayscale(stack_of_images, frame_annotations, norm_face_size)
+
+                    # A list with all blocks per stack of facial images.
+                    list_all_blocks_per_stack = self.select_all_blocks(faces, block_size)
+
+                    all_blocks.append( list_all_blocks_per_stack )
+
+        return all_blocks
+
+
+    #==========================================================================
+    def extract_patches_from_blocks(self, all_blocks):
+        """
+        Extract frontal, central-horizontal and central-vertical patches from
+        all blocks returned by ``get_all_blocks_from_color_channel``
+        method of this class. The patches are returned in a vectorized form.
+
+        **Parameters:**
+
+        ``all_blocks`` : [[3D :py:class:`numpy.ndarray`]]
+            Internal list contains all possible 3D blocks/volumes extracted from
+            a particular stack of facial images. The dimensions of each 3D block:
+            (block_length x block_size x block_size).
+            The number of possible blocks is: (norm_face_size - block_size)^2.
+
+            The length of the outer list is equal to the number of possible
+            facial stacks in the input video:
+            (``num_of_frames_in_video`` - ``block_length``).
+            However, the final number of facial volumes might be less than above,
+            because frames with small faces ( < min_face_size ) are discarded.
+
+        **Returns:**
+
+        ``frontal_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized frontal
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_size``^2 ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``horizontal_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized horizontal
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_length``*``block_size`` ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``vertical_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized vertical
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_length``*``block_size`` ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+        """
+
+        lenghth, row_num, col_num = all_blocks[0][0].shape
+
+        selected_row = np.int(row_num/2)
+
+        selected_col = np.int(col_num/2)
+
+        frontal_patches = []
+        horizontal_patches = []
+        vertical_patches = []
+
+        # volume - is a list of 3D blocks for a particular stack of facial images.
+        for volume in all_blocks:
+
+            volume_frontal_patches = []
+            volume_horizontal_patches = []
+            volume_vertical_patches = []
+
+            for block in volume:
+
+                frontal_patch = block[0, :, :] # the frontal patch of a block. Size: (row_num x col_num)
+                volume_frontal_patches.append(frontal_patch.flatten())
+
+                horizontal_patch = block[:, selected_row, :] # the central-horizontal patch of a block. Size: (lenghth x col_num), where
+                # lenghth = block_length, col_num = block_size.
+                volume_horizontal_patches.append(horizontal_patch.flatten())
+
+                vertical_patch = block[:, :, selected_col] # the central-vertical patch of a block. Size: (lenghth x row_num)
+                volume_vertical_patches.append(vertical_patch.flatten())
+
+            frontal_patches.append( np.stack(volume_frontal_patches) )
+
+            horizontal_patches.append( np.stack(volume_horizontal_patches) )
+
+            vertical_patches.append( np.stack(volume_vertical_patches) )
+
+        return frontal_patches, horizontal_patches, vertical_patches
+
+
+    #==========================================================================
+    def __select_random_patches_single_list(self, patches, n_patches):
+        """
+        This method is called by ``select_random_patches`` method to process
+        all lists of patches.
+
+        **Parameters:**
+
+        ``patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_size``^2 ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``n_patches`` : :py:class:`int`
+            Number of randomly selected patches.
+
+        **Returns:**
+
+        ``selected_patches`` : [2D :py:class:`numpy.ndarray`]
+            An array of selected patches. The dimensionality of the array:
+            (``n_patches`` x ``number_of_features``).
+        """
+
+        all_patches = np.vstack(patches)
+
+        idx = [random.randint( 0, len(all_patches) - 1 ) for _ in range(n_patches)]
+
+        selected_patches = all_patches[idx, :]
+
+        return selected_patches
+
+
+    #==========================================================================
+    def select_random_patches(self, frontal_patches, horizontal_patches, vertical_patches, n_patches):
+        """
+        Select random patches given lists of frontal, central-horizontal and
+        central-vertical patches, as returned by ``extract_patches_from_blocks``
+        method of this class.
+
+        **Parameters:**
+
+        ``frontal_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized frontal
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_size``^2 ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``horizontal_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized horizontal
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_length``*``block_size`` ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``vertical_patches`` : [2D :py:class:`numpy.ndarray`]
+            Each element in the list contains an array of vectorized vertical
+            patches for the particular stack of facial images.
+            The size of each array is:
+            ( (``norm_face_size`` - ``block_size``)^2 x ``block_length``*``block_size`` ).
+            The maximum length of the list is:
+            (``num_of_frames_in_video`` - ``block_length``)
+
+        ``n_patches`` : :py:class:`int`
+            Number of randomly selected patches.
+        """
+
+        selected_frontal_patches = self.__select_random_patches_single_list(frontal_patches, n_patches)
+
+        selected_horizontal_patches = self.__select_random_patches_single_list(horizontal_patches, n_patches)
+
+        selected_vertical_patches = self.__select_random_patches_single_list(vertical_patches, n_patches)
+
+        return selected_frontal_patches, selected_horizontal_patches, selected_vertical_patches
+
+
+    #==========================================================================
+    def __call__(self, frames, annotations):
+        """
+        Do something....
+
+        **Parameters:**
+
+        ``frames`` : FrameContainer
+            Video data stored in the FrameContainer, see
+            ``bob.bio.video.utils.FrameContainer`` for further details.
+
+        ``annotations`` : :py:class:`dict`
+            A dictionary containing the annotations for each frame in the video.
+            Dictionary structure:
+            ``annotations = {'1': frame1_dict, '2': frame1_dict, ...}``,
+            where
+            ``frameN_dict = {'topleft': (row, col), 'bottomright': (row, col)}``
+            is the dictionary defining the coordinates of the face bounding
+            box in frame N.
+
+        **Returns:**
+
+        ``preprocessed_video`` : FrameContainer
+            ????????????????
+        """
+
+        # Convert frame container to 3D array:
+        video = self.convert_frame_cont_to_grayscale_array(frames)
+
+        # get all blocks from all possible facial stacks:
+        all_blocks = self.get_all_blocks_from_color_channel(video, annotations,
+                                                            self.block_size, self.block_length,
+                                                            self.min_face_size, self.norm_face_size)
+
+        frontal_patches, horizontal_patches, vertical_patches = self.extract_patches_from_blocks(all_blocks)
+
+        return frontal_patches, horizontal_patches, vertical_patches
+
+
+    #==========================================================================
+    def write_data( self, frames, file_name ):
+        """
+        Writes the given data (that has been generated using the __call__
+        function of this class) to file. This method overwrites the write_data()
+        method of the Preprocessor class.
+
+        **Parameters:**
+
+        ``frames`` :
+            data returned by the __call__ method of the class.
+
+        ``file_name`` : :py:class:`str`
+            name of the file.
+        """
+
+        self.video_preprocessor.write_data(frames, file_name)
+
+
+    #==========================================================================
+    def read_data( self, file_name ):
+        """
+        Reads the preprocessed data from file.
+        This method overwrites the read_data() method of the Preprocessor class.
+
+        **Parameters:**
+
+        ``file_name`` : :py:class:`str`
+            name of the file.
+
+        **Returns:**
+
+        ``frames`` : :py:class:`bob.bio.video.FrameContainer`
+            Frames stored in the frame container.
+        """
+
+        frames = self.video_preprocessor.read_data(file_name)
+
+        return frames
+
+
-- 
GitLab