From 0887a83b47f41b299a1c55ee98c669b9325a5024 Mon Sep 17 00:00:00 2001
From: Olegs NIKISINS <onikisins@italix03.idiap.ch>
Date: Mon, 9 Oct 2017 11:11:22 +0200
Subject: [PATCH] Optimized the VideoSparseCoding for memory useage

---
 .../preprocessor/video_sparse_coding.py       | 21 ++++-
 .../face/extractor/VideoHistOfSparseCodes.py  | 37 +++++++-
 .../face/preprocessor/VideoSparseCoding.py    | 94 ++++++++++++++-----
 setup.py                                      |  1 +
 4 files changed, 129 insertions(+), 24 deletions(-)

diff --git a/bob/pad/face/config/preprocessor/video_sparse_coding.py b/bob/pad/face/config/preprocessor/video_sparse_coding.py
index 905dd7b0..032e89bd 100644
--- a/bob/pad/face/config/preprocessor/video_sparse_coding.py
+++ b/bob/pad/face/config/preprocessor/video_sparse_coding.py
@@ -99,8 +99,27 @@ preprocessor_10_5_128 = VideoSparseCoding(gblock_size = BLOCK_SIZE,
                                          frame_step = FRAME_STEP,
                                          extract_histograms_flag = EXTRACT_HISTOGRAMS_FLAG)
 
+#=======================================================================================
 
 
+BLOCK_SIZE = 5
+BLOCK_LENGTH = 10
+MIN_FACE_SIZE = 50
+NORM_FACE_SIZE = 64
+DICTIONARY_FILE_NAMES = ["/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_front_10_5_64.hdf5",
+                         "/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_hor_10_5_64.hdf5",
+                         "/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_vert_10_5_64.hdf5"]
 
-
+FRAME_STEP = 50 # (!) a small number of feature vectors will be computed
+EXTRACT_HISTOGRAMS_FLAG = True
+COMP_RECONSTRUCT_ERR_FLAG = True
+
+preprocessor_10_5_64_rec_err = VideoSparseCoding(gblock_size = BLOCK_SIZE,
+                                                 block_length = BLOCK_LENGTH,
+                                                 min_face_size = MIN_FACE_SIZE,
+                                                 norm_face_size = NORM_FACE_SIZE,
+                                                 dictionary_file_names = DICTIONARY_FILE_NAMES,
+                                                 frame_step = FRAME_STEP,
+                                                 extract_histograms_flag = EXTRACT_HISTOGRAMS_FLAG,
+                                                 comp_reconstruct_err_flag = COMP_RECONSTRUCT_ERR_FLAG)
 
diff --git a/bob/pad/face/extractor/VideoHistOfSparseCodes.py b/bob/pad/face/extractor/VideoHistOfSparseCodes.py
index 5ef9c47a..9ef07094 100644
--- a/bob/pad/face/extractor/VideoHistOfSparseCodes.py
+++ b/bob/pad/face/extractor/VideoHistOfSparseCodes.py
@@ -117,6 +117,35 @@ class VideoHistOfSparseCodes(Extractor, object):
         return return_list
 
 
+    #==========================================================================
+    def select_reconstruction_vector(self, frames, sorted_flag):
+        """
+        Select either sorted or non-sorted reconstruction errors.
+        """
+
+        return_list = []
+
+        if sorted_flag:
+
+            for item in frames:
+
+                return_list.append( item[1][1,:] )
+
+        else:
+
+            for item in frames:
+
+                return_list.append( item[1][0,:] )
+
+#        return_list = []
+#
+#        for item in frames:
+#
+#            return_list.append( np.max(item[1], axis=1) )
+
+        return return_list
+
+
     #==========================================================================
     def __call__(self, frames):
         """
@@ -136,9 +165,13 @@ class VideoHistOfSparseCodes(Extractor, object):
 
 #        histograms = self.comp_hist_of_sparse_codes(frames, self.method)
 
-        histograms = self.reduce_features_number(frames)
+#        histograms = self.reduce_features_number(frames)
+
+        sorted_flag = False
+
+        list_of_error_vecs = self.select_reconstruction_vector(frames, sorted_flag)
 
-        frame_container = self.convert_sparse_codes_to_frame_container(histograms)
+        frame_container = self.convert_sparse_codes_to_frame_container(list_of_error_vecs)
 
         return frame_container
 
diff --git a/bob/pad/face/preprocessor/VideoSparseCoding.py b/bob/pad/face/preprocessor/VideoSparseCoding.py
index 016ada30..40dc41ed 100644
--- a/bob/pad/face/preprocessor/VideoSparseCoding.py
+++ b/bob/pad/face/preprocessor/VideoSparseCoding.py
@@ -723,20 +723,18 @@ class VideoSparseCoding(Preprocessor, object):
 
 
     #==========================================================================
-    def comp_hist_of_sparse_codes(self, frames, method):
+    def comp_hist_of_sparse_codes(self, sparse_codes, method):
         """
         Compute the histograms of sparse codes.
 
         **Parameters:**
 
-        ``frame_container`` : FrameContainer
-            FrameContainer containing the frames with sparse codes for the
-            frontal, horizontal and vertical patches. Each frame is a 3D array.
-            The dimensionality of array is:
-            (``3`` x ``n_samples`` x ``n_words_in_the_dictionary``).
-            First array [0,:,:] contains frontal sparse codes.
-            Second array [1,:,:] contains horizontal sparse codes.
-            Third array [2,:,:] contains vertical sparse codes.
+        ``sparse_codes`` : [[2D :py:class:`numpy.ndarray`]]
+            A list of lists of 2D arrays. Each 2D array contains sparse codes
+            of a particular stack of facial images. The length of internal lists
+            is equal to the number of processed frames. The outer list contains
+            the codes for frontal, horizontal and vertical patches, thus the
+            length of an outer list in the context of this class is 3.
 
         ``method`` : :py:class:`str`
             Name of the method to be used for combining the sparse codes into
@@ -759,9 +757,9 @@ class VideoSparseCoding(Preprocessor, object):
 
         histograms = []
 
-        for frame_data in frames:
+        for frontal_codes, horizontal_codes, vertical_codes in zip(sparse_codes[0], sparse_codes[1], sparse_codes[2]):
 
-            frame = frame_data[1]
+            frame = np.stack([frontal_codes, horizontal_codes, vertical_codes])
 
             if method == "mean":
 
@@ -807,12 +805,66 @@ class VideoSparseCoding(Preprocessor, object):
         return frame_container
 
 
+    #==========================================================================
+    def mean_std_normalize(self, features, features_mean= None, features_std = None):
+        """
+        The features in the input 2D array are mean-std normalized.
+        The rows are samples, the columns are features. If ``features_mean``
+        and ``features_std`` are provided, then these vectors will be used for
+        normalization. Otherwise, the mean and std of the features is
+        computed on the fly.
+
+        **Parameters:**
+
+        ``features`` : 2D :py:class:`numpy.ndarray`
+            Array of features to be normalized.
+
+        ``features_mean`` : 1D :py:class:`numpy.ndarray`
+            Mean of the features. Default: None.
+
+        ``features_std`` : 2D :py:class:`numpy.ndarray`
+            Standart deviation of the features. Default: None.
+
+        **Returns:**
+
+        ``features_norm`` : 2D :py:class:`numpy.ndarray`
+            Normalized array of features.
+
+        ``features_mean`` : 1D :py:class:`numpy.ndarray`
+            Mean of the features.
+
+        ``features_std`` : 1D :py:class:`numpy.ndarray`
+            Standart deviation of the features.
+        """
+
+        features = np.copy(features)
+
+        # Compute mean and std if not given:
+        if features_mean is None:
+
+            features_mean = np.mean(features, axis=0)
+
+            features_std = np.std(features, axis=0)
+
+        row_norm_list = []
+
+        for row in features: # row is a sample
+
+            row_norm = (row - features_mean) / features_std
+
+            row_norm_list.append(row_norm)
+
+        features_norm = np.vstack(row_norm_list)
+
+        return features_norm, features_mean, features_std
+
+
     #==========================================================================
     def compute_patches_mean_squared_errors(self, sparse_codes, original_data, dictionary):
         """
-        This function computes mean squared errors (MSE) for each feature (column)
-        in the reconstructed array of vectorized patches. The patches are reconstructed
-        given array of sparse codes and a dictionary.
+        This function computes normalized mean squared errors (MSE) for each
+        feature (column) in the reconstructed array of vectorized patches.
+        The patches are reconstructed given array of sparse codes and a dictionary.
 
         **Parameters:**
 
@@ -834,16 +886,14 @@ class VideoSparseCoding(Preprocessor, object):
         **Returns:**
 
         ``squared_errors`` : 1D :py:class:`numpy.ndarray`
-            MSE for each feature across all patches/samples.
+            Normalzied MSE for each feature across all patches/samples.
             The dimensionality of the array:
             (``n_features_in_patch``, ).
         """
 
         recovered_data = np.dot(sparse_codes, dictionary)
 
-        n_samples = recovered_data.shape[0]
-
-        squared_error = 1.0 / n_samples * np.sum((recovered_data - original_data) ** 2, axis=0)
+        squared_error = 1.*np.sum((original_data - recovered_data) ** 2, axis=0) / np.sum(original_data**2, axis=0)
 
         return squared_error
 
@@ -1049,14 +1099,16 @@ class VideoSparseCoding(Preprocessor, object):
 
         else:
 
-            frame_container = self.convert_sparse_codes_to_frame_container([frontal_video_codes, horizontal_video_codes, vertical_video_codes])
-
             if self.extract_histograms_flag: # in this case histograms will be extracted in the preprocessor , no feature extraction is needed then
 
-                histograms = self.comp_hist_of_sparse_codes(frame_container, self.method)
+                histograms = self.comp_hist_of_sparse_codes([frontal_video_codes, horizontal_video_codes, vertical_video_codes], self.method)
 
                 frame_container = self.convert_arrays_to_frame_container(histograms)
 
+            else:
+
+                frame_container = self.convert_sparse_codes_to_frame_container([frontal_video_codes, horizontal_video_codes, vertical_video_codes])
+
         return frame_container
 
 
diff --git a/setup.py b/setup.py
index 474dcb05..2dff88ee 100644
--- a/setup.py
+++ b/setup.py
@@ -110,6 +110,7 @@ setup(
             'sparse-coding-preprocessor-10-5-32 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_32',
             'sparse-coding-preprocessor-10-5-64 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_64',
             'sparse-coding-preprocessor-10-5-128 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_128',
+            'sparse-coding-preprocessor-10-5-64-rec-err = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_64_rec_err',
             ],
 
         # registered extractors:
-- 
GitLab