diff --git a/bob/pad/face/config/preprocessor/video_sparse_coding.py b/bob/pad/face/config/preprocessor/video_sparse_coding.py index 905dd7b01edaddd01584eec16ebe839e890c65ca..032e89bdc813713dfbcedd0106b2cd0c37eae614 100644 --- a/bob/pad/face/config/preprocessor/video_sparse_coding.py +++ b/bob/pad/face/config/preprocessor/video_sparse_coding.py @@ -99,8 +99,27 @@ preprocessor_10_5_128 = VideoSparseCoding(gblock_size = BLOCK_SIZE, frame_step = FRAME_STEP, extract_histograms_flag = EXTRACT_HISTOGRAMS_FLAG) +#======================================================================================= +BLOCK_SIZE = 5 +BLOCK_LENGTH = 10 +MIN_FACE_SIZE = 50 +NORM_FACE_SIZE = 64 +DICTIONARY_FILE_NAMES = ["/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_front_10_5_64.hdf5", + "/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_hor_10_5_64.hdf5", + "/idiap/user/onikisins/Projects/ODIN/Python/scripts/test_scripts/data/dictionary_vert_10_5_64.hdf5"] - +FRAME_STEP = 50 # (!) a small number of feature vectors will be computed +EXTRACT_HISTOGRAMS_FLAG = True +COMP_RECONSTRUCT_ERR_FLAG = True + +preprocessor_10_5_64_rec_err = VideoSparseCoding(gblock_size = BLOCK_SIZE, + block_length = BLOCK_LENGTH, + min_face_size = MIN_FACE_SIZE, + norm_face_size = NORM_FACE_SIZE, + dictionary_file_names = DICTIONARY_FILE_NAMES, + frame_step = FRAME_STEP, + extract_histograms_flag = EXTRACT_HISTOGRAMS_FLAG, + comp_reconstruct_err_flag = COMP_RECONSTRUCT_ERR_FLAG) diff --git a/bob/pad/face/extractor/VideoHistOfSparseCodes.py b/bob/pad/face/extractor/VideoHistOfSparseCodes.py index 61747358cf625ba3043c521c46bcd78d800f0d3d..9ef070943307d3417fca4d2518747aed5a0ac8d3 100644 --- a/bob/pad/face/extractor/VideoHistOfSparseCodes.py +++ b/bob/pad/face/extractor/VideoHistOfSparseCodes.py @@ -102,6 +102,50 @@ class VideoHistOfSparseCodes(Extractor, object): return frame_container + #========================================================================== + def reduce_features_number(self, list_of_arrays): + """ + Reduce the number of features. + """ + + return_list = [] + + for item in list_of_arrays: + + return_list.append( item[1][32:] ) + + return return_list + + + #========================================================================== + def select_reconstruction_vector(self, frames, sorted_flag): + """ + Select either sorted or non-sorted reconstruction errors. + """ + + return_list = [] + + if sorted_flag: + + for item in frames: + + return_list.append( item[1][1,:] ) + + else: + + for item in frames: + + return_list.append( item[1][0,:] ) + +# return_list = [] +# +# for item in frames: +# +# return_list.append( np.max(item[1], axis=1) ) + + return return_list + + #========================================================================== def __call__(self, frames): """ @@ -119,9 +163,15 @@ class VideoHistOfSparseCodes(Extractor, object): Histograms of sparse codes stored in the FrameContainer. """ - histograms = self.comp_hist_of_sparse_codes(frames, self.method) +# histograms = self.comp_hist_of_sparse_codes(frames, self.method) + +# histograms = self.reduce_features_number(frames) + + sorted_flag = False + + list_of_error_vecs = self.select_reconstruction_vector(frames, sorted_flag) - frame_container = self.convert_sparse_codes_to_frame_container(histograms) + frame_container = self.convert_sparse_codes_to_frame_container(list_of_error_vecs) return frame_container diff --git a/bob/pad/face/preprocessor/VideoSparseCoding.py b/bob/pad/face/preprocessor/VideoSparseCoding.py index 944ca82e30ae792dd269d8ae927b61bbbaa01ce5..40dc41ed87288b639557f7b65051b825d347f570 100644 --- a/bob/pad/face/preprocessor/VideoSparseCoding.py +++ b/bob/pad/face/preprocessor/VideoSparseCoding.py @@ -77,6 +77,11 @@ class VideoSparseCoding(Preprocessor, object): "mean" and "hist". This argument is valid only if ``extract_histograms_flag`` is set to ``True``. Default: "hist". + + ``comp_reconstruct_err_flag`` : :py:class:`bool` + If this flag is set to ``True`` resulting feature vector will be a + reconstruction error, not a histogram. + Default: ``False``. """ #========================================================================== @@ -89,6 +94,7 @@ class VideoSparseCoding(Preprocessor, object): frame_step = 1, extract_histograms_flag = False, method = "hist", + comp_reconstruct_err_flag = False, **kwargs): super(VideoSparseCoding, self).__init__(block_size = block_size, @@ -98,6 +104,7 @@ class VideoSparseCoding(Preprocessor, object): dictionary_file_names = dictionary_file_names, frame_step = frame_step, extract_histograms_flag = extract_histograms_flag, + comp_reconstruct_err_flag = comp_reconstruct_err_flag, method = method) self.block_size = block_size @@ -108,6 +115,7 @@ class VideoSparseCoding(Preprocessor, object): self.frame_step = frame_step self.extract_histograms_flag = extract_histograms_flag self.method = method + self.comp_reconstruct_err_flag = comp_reconstruct_err_flag self.video_preprocessor = bob.bio.video.preprocessor.Wrapper() @@ -715,20 +723,18 @@ class VideoSparseCoding(Preprocessor, object): #========================================================================== - def comp_hist_of_sparse_codes(self, frames, method): + def comp_hist_of_sparse_codes(self, sparse_codes, method): """ Compute the histograms of sparse codes. **Parameters:** - ``frame_container`` : FrameContainer - FrameContainer containing the frames with sparse codes for the - frontal, horizontal and vertical patches. Each frame is a 3D array. - The dimensionality of array is: - (``3`` x ``n_samples`` x ``n_words_in_the_dictionary``). - First array [0,:,:] contains frontal sparse codes. - Second array [1,:,:] contains horizontal sparse codes. - Third array [2,:,:] contains vertical sparse codes. + ``sparse_codes`` : [[2D :py:class:`numpy.ndarray`]] + A list of lists of 2D arrays. Each 2D array contains sparse codes + of a particular stack of facial images. The length of internal lists + is equal to the number of processed frames. The outer list contains + the codes for frontal, horizontal and vertical patches, thus the + length of an outer list in the context of this class is 3. ``method`` : :py:class:`str` Name of the method to be used for combining the sparse codes into @@ -751,9 +757,9 @@ class VideoSparseCoding(Preprocessor, object): histograms = [] - for frame_data in frames: + for frontal_codes, horizontal_codes, vertical_codes in zip(sparse_codes[0], sparse_codes[1], sparse_codes[2]): - frame = frame_data[1] + frame = np.stack([frontal_codes, horizontal_codes, vertical_codes]) if method == "mean": @@ -799,6 +805,216 @@ class VideoSparseCoding(Preprocessor, object): return frame_container + #========================================================================== + def mean_std_normalize(self, features, features_mean= None, features_std = None): + """ + The features in the input 2D array are mean-std normalized. + The rows are samples, the columns are features. If ``features_mean`` + and ``features_std`` are provided, then these vectors will be used for + normalization. Otherwise, the mean and std of the features is + computed on the fly. + + **Parameters:** + + ``features`` : 2D :py:class:`numpy.ndarray` + Array of features to be normalized. + + ``features_mean`` : 1D :py:class:`numpy.ndarray` + Mean of the features. Default: None. + + ``features_std`` : 2D :py:class:`numpy.ndarray` + Standart deviation of the features. Default: None. + + **Returns:** + + ``features_norm`` : 2D :py:class:`numpy.ndarray` + Normalized array of features. + + ``features_mean`` : 1D :py:class:`numpy.ndarray` + Mean of the features. + + ``features_std`` : 1D :py:class:`numpy.ndarray` + Standart deviation of the features. + """ + + features = np.copy(features) + + # Compute mean and std if not given: + if features_mean is None: + + features_mean = np.mean(features, axis=0) + + features_std = np.std(features, axis=0) + + row_norm_list = [] + + for row in features: # row is a sample + + row_norm = (row - features_mean) / features_std + + row_norm_list.append(row_norm) + + features_norm = np.vstack(row_norm_list) + + return features_norm, features_mean, features_std + + + #========================================================================== + def compute_patches_mean_squared_errors(self, sparse_codes, original_data, dictionary): + """ + This function computes normalized mean squared errors (MSE) for each + feature (column) in the reconstructed array of vectorized patches. + The patches are reconstructed given array of sparse codes and a dictionary. + + **Parameters:** + + ``sparse_codes`` : 2D :py:class:`numpy.ndarray` + An array of sparse codes. Each row contains a sparse code encoding a + vectorized patch. The dimensionality of the array: + (``n_samples`` x ``n_words_in_dictionary``). + + ``original_data`` : 2D :py:class:`numpy.ndarray` + An array with original vectorized patches. + The dimensionality of the array: + (``n_samples`` x ``n_features_in_patch``). + + ``dictionary`` : 2D :py:class:`numpy.ndarray` + A dictionary with vectorized visual words. + The dimensionality of the array: + (``n_words_in_dictionary`` x ``n_features_in_patch``). + + **Returns:** + + ``squared_errors`` : 1D :py:class:`numpy.ndarray` + Normalzied MSE for each feature across all patches/samples. + The dimensionality of the array: + (``n_features_in_patch``, ). + """ + + recovered_data = np.dot(sparse_codes, dictionary) + + squared_error = 1.*np.sum((original_data - recovered_data) ** 2, axis=0) / np.sum(original_data**2, axis=0) + + return squared_error + + + #========================================================================== + def compute_mse_for_all_patches_types(self, sparse_codes_list, original_data_list, dictionary_list): + """ + This function computes mean squared errors (MSE) for all types of patches: + frontal, horizontal, and vertical. In this case the function + ``compute_patches_mean_squared_errors`` is called in a loop for all + values in the input lists. + + **Parameters:** + + ``sparse_codes_list`` : [2D :py:class:`numpy.ndarray`] + A list with arrays of sparse codes. Each row in the arrays contains a + sparse code encoding a vectorized patch of particular type. + The dimensionality of the each array: + (``n_samples`` x ``n_words_in_dictionary``). + + ``original_data_list`` : [2D :py:class:`numpy.ndarray`] + A list of arrays with original vectorized patches of various types. + The dimensionality of the arrays might be different for various types + of the patches: + (``n_samples`` x ``n_features_in_patch_of_particular_type``). + + ``dictionary_list`` : [2D :py:class:`numpy.ndarray`] + A list of dictionaries with vectorized visual words of various types. + The dimensionality of the arrays might be different for various types + of the patches: + (``n_words_in_dictionary`` x ``n_features_in_patch_of_particular_type``). + + **Returns:** + + ``squared_errors`` : 2D :py:class:`numpy.ndarray` + First row: + MSE of features for various types of patches concatenated into a single + vector. + Second row: + The same as above but MSE are sorted for each type of patches. + The dimensionality of the array: + (2 x ``n_features_in_patch_of_all_types``). + """ + + squared_errors = [] + + squared_errors_sorted = [] + + for sparse_codes, original_data, dictionary in zip(sparse_codes_list, original_data_list, dictionary_list): + + squared_error = self.compute_patches_mean_squared_errors(sparse_codes, original_data, dictionary) + + squared_error_sorted = np.sort(squared_error) + + squared_errors.append(squared_error) + + squared_errors_sorted.append(squared_error_sorted) + + squared_errors = np.hstack(squared_errors) + + squared_errors_sorted = np.hstack(squared_errors_sorted) + + squared_errors = np.vstack([squared_errors, squared_errors_sorted]) + + return squared_errors + + + #========================================================================== + def compute_mse_for_all_stacks(self, video_codes_list, patches_list, dictionary_list): + """ + Call ``compute_mse_for_all_patches_types`` for data coming from all stacks + of facial images. + + **Parameters:** + + ``video_codes_list`` : [ [2D :py:class:`numpy.ndarray`] ] + A list with ``frontal_video_codes``, ``horizontal_video_codes``, and + ``vertical_video_codes`` as returned by ``get_sparse_codes_for_list_of_patches`` + method of this class. + + ``patches_list`` : [ [2D :py:class:`numpy.ndarray`] ] + A list with ``frontal_patches``, ``horizontal_patches``, and + ``vertical_patches`` as returned by ``extract_patches_from_blocks`` + method of this class. + + ``dictionary_list`` : [2D :py:class:`numpy.ndarray`] + A list of dictionaries with vectorized visual words of various types. + The dimensionality of the arrays might be different for various types + of the patches: + (``n_words_in_dictionary`` x ``n_features_in_patch_of_particular_type``). + + **Returns:** + + ``squared_errors_list`` : [2D :py:class:`numpy.ndarray`] + A list of ``squared_errors`` as returned by ``compute_mse_for_all_patches_types`` + method of this class. + """ + + fcs = video_codes_list[0] + hcs = video_codes_list[1] + vcs = video_codes_list[2] + + fps = patches_list[0] + hps = patches_list[1] + vps = patches_list[2] + + squared_errors_list = [] + + for fc, hc, vc, fp, hp, vp in zip(fcs, hcs, vcs, fps, hps, vps): + + sparse_codes_list = [fc, hc, vc] + + original_data_list = [fp, hp, vp] + + squared_errors = self.compute_mse_for_all_patches_types(sparse_codes_list, original_data_list, dictionary_list) + + squared_errors_list.append(squared_errors) + + return squared_errors_list + + #========================================================================== def __call__(self, frames, annotations): """ @@ -859,18 +1075,39 @@ class VideoSparseCoding(Preprocessor, object): # Download the dictionaries: dictionary_frontal, dictionary_horizontal, dictionary_vertical = self.load_the_dictionaries(self.dictionary_file_names) + # Select subset of patches if ``frame_step`` > 1: + frontal_patches_subset = frontal_patches[::self.frame_step] + horizontal_patches_subset = horizontal_patches[::self.frame_step] + vertical_patches_subset = vertical_patches[::self.frame_step] + # Compute sparse codes for all patches of all types: - frontal_video_codes = self.get_sparse_codes_for_list_of_patches(frontal_patches[::self.frame_step], dictionary_frontal) - horizontal_video_codes = self.get_sparse_codes_for_list_of_patches(horizontal_patches[::self.frame_step], dictionary_horizontal) - vertical_video_codes = self.get_sparse_codes_for_list_of_patches(vertical_patches[::self.frame_step], dictionary_vertical) + frontal_video_codes = self.get_sparse_codes_for_list_of_patches(frontal_patches_subset, dictionary_frontal) + horizontal_video_codes = self.get_sparse_codes_for_list_of_patches(horizontal_patches_subset, dictionary_horizontal) + vertical_video_codes = self.get_sparse_codes_for_list_of_patches(vertical_patches_subset, dictionary_vertical) + + if self.comp_reconstruct_err_flag: + + video_codes_list = [frontal_video_codes, horizontal_video_codes, vertical_video_codes] + + patches_list = [frontal_patches_subset, horizontal_patches_subset, vertical_patches_subset] + + dictionary_list = [dictionary_frontal, dictionary_horizontal, dictionary_vertical] + + squared_errors_list = self.compute_mse_for_all_stacks(video_codes_list, patches_list, dictionary_list) + + frame_container = self.convert_arrays_to_frame_container(squared_errors_list) + + else: + + if self.extract_histograms_flag: # in this case histograms will be extracted in the preprocessor , no feature extraction is needed then - frame_container = self.convert_sparse_codes_to_frame_container([frontal_video_codes, horizontal_video_codes, vertical_video_codes]) + histograms = self.comp_hist_of_sparse_codes([frontal_video_codes, horizontal_video_codes, vertical_video_codes], self.method) - if self.extract_histograms_flag: # in this case histograms will be extracted in the preprocessor , no feature extraction is needed then + frame_container = self.convert_arrays_to_frame_container(histograms) - histograms = self.comp_hist_of_sparse_codes(frame_container, self.method) + else: - frame_container = self.convert_arrays_to_frame_container(histograms) + frame_container = self.convert_sparse_codes_to_frame_container([frontal_video_codes, horizontal_video_codes, vertical_video_codes]) return frame_container diff --git a/setup.py b/setup.py index 474dcb0598e14cd18370fa74410b80362e8592e5..2dff88ee16c2a9173b0299e429c2d11e43276151 100644 --- a/setup.py +++ b/setup.py @@ -110,6 +110,7 @@ setup( 'sparse-coding-preprocessor-10-5-32 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_32', 'sparse-coding-preprocessor-10-5-64 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_64', 'sparse-coding-preprocessor-10-5-128 = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_128', + 'sparse-coding-preprocessor-10-5-64-rec-err = bob.pad.face.config.preprocessor.video_sparse_coding:preprocessor_10_5_64_rec_err', ], # registered extractors: