diff --git a/bob/pad/face/preprocessor/VideoSparseCoding.py b/bob/pad/face/preprocessor/VideoSparseCoding.py
index 6345548e436928527ab6e271e85b49349838176f..4168882ba7a5dfa321b1b25b10fc6459d77800d2 100644
--- a/bob/pad/face/preprocessor/VideoSparseCoding.py
+++ b/bob/pad/face/preprocessor/VideoSparseCoding.py
@@ -26,16 +26,13 @@ import bob.io.base
 #==============================================================================
 class VideoSparseCoding(Preprocessor, object):
     """
-    This class is designed to compute "feature vectors" for all stacks of facial
-    images using sparse coding. The feature vector is computed for each stack
-    containing ``block_length`` images.
-
-    The maximum number of facial stacks per video is:
+    This class is designed to compute sparse codes for spatial frontal,
+    spatio-temporal horizontal, and spatio-temporal vertical patches.
+    The codes are computed for all possible stacks of facial images.
+    The maximum possible number of stacks is:
     (``num_of_frames_in_video`` - ``block_length``).
-    However, the number of facial volumes might be less than above, because
-    frames with small faces ( < min_face_size ) are discarded.
-
-    The feature vector is computed as follows............
+    However, this number can be smaller, and is controlled by two arguments
+    of this class: ``min_face_size`` and ``frame_step``.
 
     **Parameters:**
 
@@ -573,7 +570,7 @@ class VideoSparseCoding(Preprocessor, object):
         **Returns:**
 
         ``video_codes`` : [2D :py:class:`numpy.ndarray`]
-            A list of arrays of reconstruction sparse codes for each patch.
+            A list of arrays with reconstruction sparse codes for each patch.
             The dimensionality of each array in the list is:
             (``n_samples`` x ``n_words_in_the_dictionary``).
         """
@@ -655,10 +652,55 @@ class VideoSparseCoding(Preprocessor, object):
         return dictionary_frontal, dictionary_horizontal, dictionary_vertical
 
 
+    #==========================================================================
+    def convert_sparse_codes_to_frame_container(self, sparse_codes):
+        """
+        Convert an input list of lists of 2D arrays / sparse codes into Frame
+        Container. Each frame in the output Frame Container is a 3D array which
+        stacks 3 2D arrays representing particular frame / stack of facial images.
+
+        **Parameters:**
+
+        ``sparse_codes`` : [[2D :py:class:`numpy.ndarray`]]
+            A list of lists of 2D arrays. Each 2D array contains sparse codes
+            of a particular stack of facial images. The length of internal lists
+            is equal to the number of processed frames. The outer list contains
+            the codes for frontal, horizontal and vertical patches, thus the
+            length of an outer list in the context of this class is 3.
+
+        **Returns:**
+
+        ``frame_container`` : FrameContainer
+            FrameContainer containing the frames with sparse codes for the
+            frontal, horizontal and vertical patches. Each frame is a 3D array.
+            The dimensionality of array is:
+            (``3`` x ``n_samples`` x ``n_words_in_the_dictionary``).
+        """
+
+        frame_container = bob.bio.video.FrameContainer() # initialize the FrameContainer
+
+        idx = 0
+
+        for frontal_codes, horizontal_codes, vertical_codes in zip(sparse_codes[0], sparse_codes[1], sparse_codes[2]):
+
+            frame_3d = np.stack([frontal_codes, horizontal_codes, vertical_codes])
+
+            frame_container.add(idx, frame_3d) # add frame to FrameContainer
+
+            idx = idx + 1
+
+        return frame_container
+
+
     #==========================================================================
     def __call__(self, frames, annotations):
         """
-        Do something....
+        Compute sparse codes for spatial frontal, spatio-temporal horizontal,
+        and spatio-temporal vertical patches. The codes are computed for all
+        possible stacks of facial images. The maximum possible number of stacks
+        is: (``num_of_frames_in_video`` - ``block_length``).
+        However, this number can be smaller, and is controlled by two arguments
+        of this class: ``min_face_size`` and ``frame_step``.
 
         **Parameters:**
 
@@ -677,8 +719,13 @@ class VideoSparseCoding(Preprocessor, object):
 
         **Returns:**
 
-        ``preprocessed_video`` : FrameContainer
-            ????????????????
+        ``frame_container`` : FrameContainer
+            FrameContainer containing the frames with sparse codes for the
+            frontal, horizontal and vertical patches. Each frame is a 3D array.
+            The dimensionality of array is:
+            (``3`` x ``n_samples`` x ``n_words_in_the_dictionary``).
+            The first slice in the 3D arrays corresponds to frontal sparse codes,
+            second slice to horizontal, and third to vertical codes.
         """
 
         # Convert frame container to 3D array:
@@ -700,7 +747,9 @@ class VideoSparseCoding(Preprocessor, object):
         horizontal_video_codes = self.get_sparse_codes_for_list_of_patches(horizontal_patches[::self.frame_step], dictionary_horizontal)
         vertical_video_codes = self.get_sparse_codes_for_list_of_patches(vertical_patches[::self.frame_step], dictionary_vertical)
 
-        return frontal_video_codes, horizontal_video_codes, vertical_video_codes
+        frame_container = self.convert_sparse_codes_to_frame_container([frontal_video_codes, horizontal_video_codes, vertical_video_codes])
+
+        return frame_container
 
 
     #==========================================================================
@@ -744,3 +793,9 @@ class VideoSparseCoding(Preprocessor, object):
         return frames
 
 
+
+
+
+
+
+