Revised the mean-std normalization in MLPAlgorithm, updated test and config

c94e1d3a · Olegs NIKISINS · c51586ca · c94e1d3a · c94e1d3a · c94e1d3a
Commit c94e1d3a authored 6 years ago by Olegs NIKISINS
--- a/bob/ip/pytorch_extractor/MLPAlgorithm.py
+++ b/bob/ip/pytorch_extractor/MLPAlgorithm.py
@@ -17,6 +17,12 @@ from bob.ip.pytorch_extractor.utils import transform_and_net_forward

 from bob.ip.pytorch_extractor.utils import load_pretrained_model

+from bob.pad.base.utils import mean_std_normalize
+
+from bob.pad.base.utils import convert_list_of_frame_cont_to_array
+
+import bob.io.base
+

 # =============================================================================
 # Main body :
@@ -74,6 +80,14 @@ class MLPAlgorithm(Algorithm):
    frame_level_scores_flag : bool
        Return scores for each frame individually if True. Otherwise, return a
        single score per video. Default: ``True``.
+
+    mean_std_norm_flag : bool
+        Perform mean-std normalization of data if set to ``True``.
+        Note: make sure an MLP was trained on mean-std normalizaed
+        features, if this flag is set to ``True``. The tutorial on MLP
+        training in ``bob.learn.pytorch`` provides an example on how to train
+        network on normalized features.
+        Default: ``True``.
    """

    def __init__(self,
@@ -82,14 +96,18 @@ class MLPAlgorithm(Algorithm):
                 model_file = None,
                 url = None,
                 archive_extension = '.tar.gz',
-                 frame_level_scores_flag = True):
+                 frame_level_scores_flag = True,
+                 mean_std_norm_flag = True):

        super(MLPAlgorithm, self).__init__(config_file = config_file,
                                           config_group = config_group,
                                           model_file = model_file,
                                           url = url,
                                           archive_extension = archive_extension,
-                                           frame_level_scores_flag = frame_level_scores_flag)
+                                           frame_level_scores_flag = frame_level_scores_flag,
+                                           mean_std_norm_flag = mean_std_norm_flag,
+                                           performs_projection=True,
+                                           requires_projector_training=True)

        self.config_file = config_file
        self.config_group = config_group
@@ -97,6 +115,97 @@ class MLPAlgorithm(Algorithm):
        self.url = url
        self.archive_extension = archive_extension
        self.frame_level_scores_flag = frame_level_scores_flag
+        self.mean_std_norm_flag = mean_std_norm_flag
+
+        self.features_mean = None  # this argument will be updated with features mean
+        self.features_std = None  # this argument will be updated with features std
+
+
+    # =========================================================================
+    def _save_mean_std(self,
+                       projector_file,
+                       features_mean,
+                       features_std):
+        """
+        Saves mean and std normalization to the hdf5 file.
+        The absolute name of the file is specified in ``projector_file`` string.
+
+        Parameters
+        ----------
+        projector_file : str
+            Absolute name of the file to save the data to, as returned by
+            ``bob.pad.base`` framework.
+
+        features_mean : 1D :py:class:`numpy.ndarray`
+            Mean of the features.
+
+        features_std : 1D :py:class:`numpy.ndarray`
+            Standart deviation of the features.
+        """
+
+        f = bob.io.base.HDF5File(projector_file,'w')  # open hdf5 file to save
+
+        f.set("features_mean", features_mean)
+
+        f.set("features_std", features_std)
+
+        del f
+
+
+    # =========================================================================
+    def train_projector(self, training_features, projector_file):
+        """
+        Compute mean-std normalizers using samples of the real class only.
+
+        Parameters
+        ----------
+        training_features : [[object], [object]]
+            A list containing two elements: [0] - a list of Frame Containers
+            with feature vectors for the real class;
+            [1] - a list of Frame Containers with feature vectors for the
+            attack class.
+
+        projector_file : str
+            The file to save the trained projector to, as returned by the
+            ``bob.pad.base`` framework.
+        """
+
+        # training_features[0] - training features for the REAL class.
+        # training_features[1] - training features for the ATTACK class.
+
+        real = convert_list_of_frame_cont_to_array(training_features[0])
+
+        # compute normalization params:
+        _, features_mean, features_std = mean_std_normalize(real)
+
+        # Save the normalizers:
+        self._save_mean_std(projector_file, features_mean, features_std)
+
+
+    # =========================================================================
+    def load_projector(self, projector_file):
+        """
+        Loads features mean and std from the hdf5 file.
+        The absolute name of the file is specified in ``projector_file`` string.
+
+        Parameters
+        ----------
+        projector_file : str
+            Absolute name of the file to load the trained projector from, as
+            returned by ``bob.pad.base`` framework.
+        """
+
+        f = bob.io.base.HDF5File(projector_file, 'r')  # file to read
+
+        features_mean = f.read("features_mean")
+
+        features_std = f.read("features_std")
+
+        del f
+
+        self.features_mean = features_mean
+
+        self.features_std = features_std


    # =========================================================================
@@ -135,6 +244,12 @@ class MLPAlgorithm(Algorithm):

            feature = feature.as_array()

+        if self.mean_std_norm_flag:
+
+            feature, _, _ = mean_std_normalize(feature,
+                                               self.features_mean,
+                                               self.features_std)
+
        # kwargs for the transform_and_net_forward function:
        function_kwargs = {}
        function_kwargs["config_file"] = self.config_file
@@ -142,7 +257,8 @@ class MLPAlgorithm(Algorithm):
        function_kwargs["model_file"] = self.model_file
        function_kwargs["color_input_flag"] = False

-        scores = transform_and_net_forward(feature = feature, **function_kwargs)
+        scores = transform_and_net_forward(feature = feature,
+                                           **function_kwargs)

        return scores


--- a/bob/ip/pytorch_extractor/test.py
+++ b/bob/ip/pytorch_extractor/test.py
 #!/usr/bin/env python
 # encoding: utf-8

-
-import pkg_resources
 import numpy
 numpy.random.seed(10)
-import os


 def test_cnn8():
@@ -108,7 +105,7 @@ def test_mlp_algorithm():
    # =========================================================================
    # prepare the test data / feature vector:

-    features = numpy.random.randn(2, 100)
+    features = numpy.random.randn(2, 1296)

    # =========================================================================
    # test the extractor:

--- a/bob/ip/pytorch_extractor/test_data/mlp_algo_test_config.py
+++ b/bob/ip/pytorch_extractor/test_data/mlp_algo_test_config.py
@@ -6,8 +6,6 @@
 #==============================================================================
 # Import here:

-import numpy as np
-
 import torch


@@ -16,17 +14,14 @@ import torch

 """
 Transformations to be applied to the input 1D numpy arrays (feature vectors).
-Here, for demonstrative purposes, the transformation is mean std-normalization,
-where mean and std values are just numpy generated vectors. In real applications,
-normalizers must be computed in the meaningfull way. This config is just for
-test purposes.
+Only conversion to Tensor and unsqueezing is needed to match the input of
+TwoLayerMLP network
 """

 def transform(x):
    """
-    Transformation function applying dummy mean-std normalization and converting
-    input numpy feature vectors to PyTorch tensors, making them compatible with
-    MLP.
+    Convert input to Tensor and unsqueeze to match the input of
+    TwoLayerMLP network.

    Arguments
    ---------
@@ -39,15 +34,7 @@ def transform(x):
        Torch tensor, transformed ``x`` to be used as MLP input.
    """

-    features_mean = np.zeros(x.shape)
-
-    features_std = np.ones(x.shape)
-
-    x_norm = (x - features_mean) / features_std
-
-    x_norm.squeeze()
-
-    return torch.Tensor(x_norm).unsqueeze(0)
+    return torch.Tensor(x).unsqueeze(0)


 """
@@ -62,7 +49,7 @@ from bob.learn.pytorch.architectures import TwoLayerMLP as Network
 kwargs to be used for ``Network`` initialization. The name must be ``network_kwargs``.
 """
 network_kwargs = {}
-network_kwargs['in_features'] = 100
+network_kwargs['in_features'] = 1296
 network_kwargs['n_hidden_relu'] = 10
 network_kwargs['apply_sigmoid'] = False # don't use sigmoid to make the scores more even