From 9fcb42b282d429e1f7211f658daddb38a605025c Mon Sep 17 00:00:00 2001
From: Pavel Korshunov <pavel.korshunov@idiap.ch>
Date: Tue, 13 Feb 2018 15:40:28 +0100
Subject: [PATCH] Moved one class GMM from bob.pad.face

---
 bob/pad/base/algorithm/OneClassGMM.py  | 359 +++++++++++++++++++++++++
 bob/pad/base/algorithm/__init__.py     |   3 +-
 bob/pad/base/test/test_algorithms.py   |  67 ++++-
 bob/pad/base/utils/helper_functions.py |  54 ++++
 doc/index.rst                          |   4 +-
 5 files changed, 481 insertions(+), 6 deletions(-)
 create mode 100644 bob/pad/base/algorithm/OneClassGMM.py

diff --git a/bob/pad/base/algorithm/OneClassGMM.py b/bob/pad/base/algorithm/OneClassGMM.py
new file mode 100644
index 0000000..477d6bb
--- /dev/null
+++ b/bob/pad/base/algorithm/OneClassGMM.py
@@ -0,0 +1,359 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 28 16:47:47 2017
+
+@author: Olegs Nikisins
+"""
+
+# ==============================================================================
+# Import what is needed here:
+
+from bob.pad.base.algorithm import Algorithm
+from bob.bio.video.utils import FrameContainer
+
+import numpy as np
+
+import bob.io.base
+
+from sklearn import mixture
+
+from bob.pad.base.utils import convert_frame_cont_to_array, mean_std_normalize
+
+# ==============================================================================
+# Main body :
+
+
+class OneClassGMM(Algorithm):
+    """
+    This class is designed to train a OneClassGMM based PAD system. The OneClassGMM is trained
+    using data of one class (real class) only. The procedure is the following:
+
+    1. First, the training data is mean-std normalized using mean and std of the
+       real class only.
+
+    2. Second, the OneClassGMM with ``n_components`` Gaussians is trained using samples
+       of the real class.
+
+    3. The input features are next classified using pre-trained OneClassGMM machine.
+
+    **Parameters:**
+
+    ``n_components`` : :py:class:`int`
+        Number of Gaussians in the OneClassGMM. Default: 1 .
+
+    ``random_state`` : :py:class:`int`
+        A seed for the random number generator used in the initialization of
+        the OneClassGMM. Default: 7 .
+
+    ``frame_level_scores_flag`` : :py:class:`bool`
+        Return scores for each frame individually if True. Otherwise, return a
+        single score per video. Default: False.
+    """
+
+    def __init__(self,
+                 n_components=1,
+                 random_state=3,
+                 frame_level_scores_flag=False):
+
+        Algorithm.__init__(
+            self,
+            n_components=n_components,
+            random_state=random_state,
+            frame_level_scores_flag=frame_level_scores_flag,
+            performs_projection=True,
+            requires_projector_training=True)
+
+        self.n_components = n_components
+
+        self.random_state = random_state
+
+        self.frame_level_scores_flag = frame_level_scores_flag
+
+        self.machine = None  # this argument will be updated with pretrained OneClassGMM machine
+
+        self.features_mean = None  # this argument will be updated with features mean
+
+        self.features_std = None  # this argument will be updated with features std
+
+        # names of the arguments of the pretrained OneClassGMM machine to be saved/loaded to/from HDF5 file:
+        self.gmm_param_keys = [
+            "covariance_type", "covariances_", "lower_bound_", "means_",
+            "n_components", "weights_", "converged_", "precisions_",
+            "precisions_cholesky_"
+        ]
+
+    # ==========================================================================
+    def train_gmm(self, real, n_components, random_state):
+        """
+        Train OneClassGMM classifier given real class. Prior to the training the data is
+        mean-std normalized.
+
+        **Parameters:**
+
+        ``real`` : 2D :py:class:`numpy.ndarray`
+            Training features for the real class.
+
+        ``n_components`` : :py:class:`int`
+            Number of Gaussians in the OneClassGMM. Default: 1 .
+
+        ``random_state`` : :py:class:`int`
+            A seed for the random number generator used in the initialization of
+            the OneClassGMM. Default: 7 .
+
+        **Returns:**
+
+        ``machine`` : object
+            A trained OneClassGMM machine.
+
+        ``features_mean`` : 1D :py:class:`numpy.ndarray`
+            Mean of the features.
+
+        ``features_std`` : 1D :py:class:`numpy.ndarray`
+            Standart deviation of the features.
+        """
+
+        features_norm, features_mean, features_std = mean_std_normalize(
+            real)
+        # real is now mean-std normalized
+
+        machine = mixture.GaussianMixture(
+            n_components=n_components,
+            random_state=random_state,
+            covariance_type='full')
+
+        machine.fit(features_norm)
+
+        return machine, features_mean, features_std
+
+    # ==========================================================================
+    def save_gmm_machine_and_mean_std(self, projector_file, machine,
+                                      features_mean, features_std):
+        """
+        Saves the OneClassGMM machine, features mean and std to the hdf5 file.
+        The absolute name of the file is specified in ``projector_file`` string.
+
+        **Parameters:**
+
+        ``projector_file`` : :py:class:`str`
+            Absolute name of the file to save the data to, as returned by
+            ``bob.pad.base`` framework.
+
+        ``machine`` : object
+            The OneClassGMM machine to be saved. As returned by sklearn.linear_model
+            module.
+
+        ``features_mean`` : 1D :py:class:`numpy.ndarray`
+            Mean of the features.
+
+        ``features_std`` : 1D :py:class:`numpy.ndarray`
+            Standart deviation of the features.
+        """
+
+        f = bob.io.base.HDF5File(projector_file,
+                                 'w')  # open hdf5 file to save to
+
+        for key in self.gmm_param_keys:
+            data = getattr(machine, key)
+
+            f.set(key, data)
+
+        f.set("features_mean", features_mean)
+
+        f.set("features_std", features_std)
+
+        del f
+
+    # ==========================================================================
+    def train_projector(self, training_features, projector_file):
+        """
+        Train OneClassGMM for feature projection and save it to file.
+        The ``requires_projector_training = True`` flag must be set to True
+        to enable this function.
+
+        **Parameters:**
+
+        ``training_features`` : [[FrameContainer], [FrameContainer]]
+            A list containing two elements: [0] - a list of Frame Containers with
+            feature vectors for the real class; [1] - a list of Frame Containers with
+            feature vectors for the attack class.
+
+        ``projector_file`` : :py:class:`str`
+            The file to save the trained projector to, as returned by the
+            ``bob.pad.base`` framework.
+        """
+
+        # training_features[0] - training features for the REAL class.
+        real = self.convert_and_prepare_features(
+            training_features[0])  # output is array
+
+        # training_features[1] - training features for the ATTACK class.
+        #        attack = self.convert_and_prepare_features(training_features[1]) # output is array
+
+        # Train the OneClassGMM machine and get normalizers:
+        machine, features_mean, features_std = self.train_gmm(
+            real=real,
+            n_components=self.n_components,
+            random_state=self.random_state)
+
+        # Save the GNN machine and normalizers:
+        self.save_gmm_machine_and_mean_std(projector_file, machine,
+                                           features_mean, features_std)
+
+    # ==========================================================================
+    def load_gmm_machine_and_mean_std(self, projector_file):
+        """
+        Loads the machine, features mean and std from the hdf5 file.
+        The absolute name of the file is specified in ``projector_file`` string.
+
+        **Parameters:**
+
+        ``projector_file`` : :py:class:`str`
+            Absolute name of the file to load the trained projector from, as
+            returned by ``bob.pad.base`` framework.
+
+        **Returns:**
+
+        ``machine`` : object
+            The loaded OneClassGMM machine. As returned by sklearn.mixture module.
+
+        ``features_mean`` : 1D :py:class:`numpy.ndarray`
+            Mean of the features.
+
+        ``features_std`` : 1D :py:class:`numpy.ndarray`
+            Standart deviation of the features.
+        """
+
+        f = bob.io.base.HDF5File(projector_file,
+                                 'r')  # file to read the machine from
+
+        # initialize the machine:
+        machine = mixture.GaussianMixture()
+
+        # set the params of the machine:
+        for key in self.gmm_param_keys:
+            data = f.read(key)
+
+            setattr(machine, key, data)
+
+        features_mean = f.read("features_mean")
+
+        features_std = f.read("features_std")
+
+        del f
+
+        return machine, features_mean, features_std
+
+    # ==========================================================================
+    def load_projector(self, projector_file):
+        """
+        Loads the machine, features mean and std from the hdf5 file.
+        The absolute name of the file is specified in ``projector_file`` string.
+
+        This function sets the arguments ``self.machine``, ``self.features_mean``
+        and ``self.features_std`` of this class with loaded machines.
+
+        The function must be capable of reading the data saved with the
+        :py:meth:`train_projector` method of this class.
+
+        Please register `performs_projection = True` in the constructor to
+        enable this function.
+
+        **Parameters:**
+
+        ``projector_file`` : :py:class:`str`
+            The file to read the projector from, as returned by the
+            ``bob.pad.base`` framework. In this class the names of the files to
+            read the projectors from are modified, see ``load_machine`` and
+            ``load_cascade_of_machines`` methods of this class for more details.
+        """
+
+        machine, features_mean, features_std = self.load_gmm_machine_and_mean_std(
+            projector_file)
+
+        self.machine = machine
+
+        self.features_mean = features_mean
+
+        self.features_std = features_std
+
+    # ==========================================================================
+    def project(self, feature):
+        """
+        This function computes a vector of scores for each sample in the input
+        array of features. The following steps are applied:
+
+        1. First, the input data is mean-std normalized using mean and std of the
+           real class only.
+
+        2. The input features are next classified using pre-trained OneClassGMM machine.
+
+        Set ``performs_projection = True`` in the constructor to enable this function.
+        It is assured that the :py:meth:`load_projector` was **called before** the
+        ``project`` function is executed.
+
+        **Parameters:**
+
+        ``feature`` : FrameContainer or 2D :py:class:`numpy.ndarray`
+            Two types of inputs are accepted.
+            A Frame Container conteining the features of an individual,
+            see ``bob.bio.video.utils.FrameContainer``.
+            Or a 2D feature array of the size (N_samples x N_features).
+
+        **Returns:**
+
+        ``scores`` : 1D :py:class:`numpy.ndarray`
+            Vector of scores. Scores for the real class are expected to be
+            higher, than the scores of the negative / attack class.
+            In this case scores are the weighted log probabilities.
+        """
+
+        # 1. Convert input array to numpy array if necessary.
+        if isinstance(
+                feature,
+                FrameContainer):  # if FrameContainer convert to 2D numpy array
+
+            features_array = convert_frame_cont_to_array(feature)
+
+        else:
+
+            features_array = feature
+
+        features_array_norm, _, _ = mean_std_normalize(
+            features_array, self.features_mean, self.features_std)
+
+        scores = self.machine.score_samples(features_array_norm)
+
+        return scores
+
+    # ==========================================================================
+    def score(self, toscore):
+        """
+        Returns a probability of a sample being a real class.
+
+        **Parameters:**
+
+        ``toscore`` : 1D :py:class:`numpy.ndarray`
+            Vector with scores for each frame/sample defining the probability
+            of the frame being a sample of the real class.
+
+        **Returns:**
+
+        ``score`` : [:py:class:`float`]
+            If ``frame_level_scores_flag = False`` a single score is returned.
+            One score per video. This score is placed into a list, because
+            the ``score`` must be an iterable.
+            Score is a probability of a sample being a real class.
+            If ``frame_level_scores_flag = True`` a list of scores is returned.
+            One score per frame/sample.
+        """
+
+        if self.frame_level_scores_flag:
+
+            score = list(toscore)
+
+        else:
+
+            score = [np.mean(toscore)]  # compute a single score per video
+
+        return score
diff --git a/bob/pad/base/algorithm/__init__.py b/bob/pad/base/algorithm/__init__.py
index 065a5b2..c146db0 100644
--- a/bob/pad/base/algorithm/__init__.py
+++ b/bob/pad/base/algorithm/__init__.py
@@ -1,6 +1,6 @@
 from .Algorithm import Algorithm
 from .SVM import SVM
-
+from OneClassGMM import OneClassGMM
 
 def __appropriate__(*args):
     """Says object was actually declared here, and not in the import module.
@@ -23,5 +23,6 @@ def __appropriate__(*args):
 __appropriate__(
     Algorithm,
     SVM,
+    OneClassGMM,
 )
 __all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/pad/base/test/test_algorithms.py b/bob/pad/base/test/test_algorithms.py
index 6059fd0..6f78efc 100644
--- a/bob/pad/base/test/test_algorithms.py
+++ b/bob/pad/base/test/test_algorithms.py
@@ -14,14 +14,17 @@ import bob.bio.video
 import bob.pad.base
 
 from bob.pad.base.algorithm import SVM
+from bob.pad.base.algorithm import OneClassGMM
 
 import random
 
-from bob.pad.base.utils import convert_array_to_list_of_frame_cont, convert_frame_cont_to_array
+from bob.pad.base.utils import convert_array_to_list_of_frame_cont, convert_list_of_frame_cont_to_array, \
+    convert_frame_cont_to_array
+
 
 def test_video_svm_pad_algorithm():
     """
-    Test the VideoSvmPadAlgorithm algorithm.
+    Test the SVM PAD algorithm.
     """
 
     random.seed(7)
@@ -81,4 +84,62 @@ def test_video_svm_pad_algorithm():
     precision = algorithm.comp_prediction_precision(machine, real_array,
                                                     attack_array)
 
-    assert precision > 0.99
\ No newline at end of file
+    assert precision > 0.99
+
+
+def test_video_gmm_pad_algorithm():
+    """
+    Test the OneClassGMM PAD algorithm.
+    """
+
+    random.seed(7)
+
+    N = 1000
+    mu = 1
+    sigma = 1
+    real_array = np.transpose(
+        np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
+                   [random.gauss(mu, sigma) for _ in range(N)]]))
+
+    mu = 5
+    sigma = 1
+    attack_array = np.transpose(
+        np.vstack([[random.gauss(mu, sigma) for _ in range(N)],
+                   [random.gauss(mu, sigma) for _ in range(N)]]))
+
+    real = convert_array_to_list_of_frame_cont(real_array)
+
+    N_COMPONENTS = 1
+    RANDOM_STATE = 3
+    FRAME_LEVEL_SCORES_FLAG = True
+
+    algorithm = OneClassGMM(
+        n_components=N_COMPONENTS,
+        random_state=RANDOM_STATE,
+        frame_level_scores_flag=FRAME_LEVEL_SCORES_FLAG)
+
+    # training_features[0] - training features for the REAL class.
+    real_array_converted = convert_list_of_frame_cont_to_array(real)  # output is array
+
+    assert (real_array == real_array_converted).all()
+
+    # Train the OneClassGMM machine and get normalizers:
+    machine, features_mean, features_std = algorithm.train_gmm(
+        real=real_array_converted,
+        n_components=algorithm.n_components,
+        random_state=algorithm.random_state)
+
+    algorithm.machine = machine
+
+    algorithm.features_mean = features_mean
+
+    algorithm.features_std = features_std
+
+    scores_real = algorithm.project(real_array_converted)
+
+    scores_attack = algorithm.project(attack_array)
+
+    assert (np.min(scores_real) + 7.9423798970985917) < 0.000001
+    assert (np.max(scores_real) + 1.8380480068281055) < 0.000001
+    assert (np.min(scores_attack) + 38.831260843070098) < 0.000001
+    assert (np.max(scores_attack) + 5.3633030621521272) < 0.000001
diff --git a/bob/pad/base/utils/helper_functions.py b/bob/pad/base/utils/helper_functions.py
index 1f19d59..cc371c2 100644
--- a/bob/pad/base/utils/helper_functions.py
+++ b/bob/pad/base/utils/helper_functions.py
@@ -225,3 +225,57 @@ def convert_array_to_list_of_frame_cont(data):
             frame_container)  # add current frame to FrameContainer
 
     return frame_container_list
+
+
+def mean_std_normalize(features,
+                       features_mean=None,
+                       features_std=None):
+    """
+    The features in the input 2D array are mean-std normalized.
+    The rows are samples, the columns are features. If ``features_mean``
+    and ``features_std`` are provided, then these vectors will be used for
+    normalization. Otherwise, the mean and std of the features is
+    computed on the fly.
+
+    **Parameters:**
+
+    ``features`` : 2D :py:class:`numpy.ndarray`
+        Array of features to be normalized.
+
+    ``features_mean`` : 1D :py:class:`numpy.ndarray`
+        Mean of the features. Default: None.
+
+    ``features_std`` : 2D :py:class:`numpy.ndarray`
+        Standart deviation of the features. Default: None.
+
+    **Returns:**
+
+    ``features_norm`` : 2D :py:class:`numpy.ndarray`
+        Normalized array of features.
+
+    ``features_mean`` : 1D :py:class:`numpy.ndarray`
+        Mean of the features.
+
+    ``features_std`` : 1D :py:class:`numpy.ndarray`
+        Standart deviation of the features.
+    """
+
+    features = np.copy(features)
+
+    # Compute mean and std if not given:
+    if features_mean is None:
+        features_mean = np.mean(features, axis=0)
+
+        features_std = np.std(features, axis=0)
+
+    row_norm_list = []
+
+    for row in features:  # row is a sample
+
+        row_norm = (row - features_mean) / features_std
+
+        row_norm_list.append(row_norm)
+
+    features_norm = np.vstack(row_norm_list)
+
+    return features_norm, features_mean, features_std
\ No newline at end of file
diff --git a/doc/index.rst b/doc/index.rst
index deb35dc..2aebc2b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -34,8 +34,8 @@ The implementation of (most of) the tools is separated into other packages in th
 All these packages can be easily combined.
 Here is a growing list of derived packages:
 
-* `bob.pad.voice <http://pypi.python.org/pypi/bob.pad.voice>`__ Tools to run presentation attack detection experiments for speech, including several Cepstral-based features and LBP-based feature extraction, GMM-based and logistic regression based algorithms, as well as plot and score fusion scripts.
-* `bob.pad.face <http://pypi.python.org/pypi/bob.pad.face>`__ Tools to run presentation attack detection experiments for face, including face-related feature extraction, GMM, SVM, and logistic regression based algorithms, as well as plotting scripts.
+* `bob.pad.voice <http://pypi.python.org/pypi/bob.pad.voice>`__ Tools to run presentation attack detection experiments for speech, including several Cepstral-based features and LBP-based feature extraction, OneClassGMM-based and logistic regression based algorithms, as well as plot and score fusion scripts.
+* `bob.pad.face <http://pypi.python.org/pypi/bob.pad.face>`__ Tools to run presentation attack detection experiments for face, including face-related feature extraction, OneClassGMM, SVM, and logistic regression based algorithms, as well as plotting scripts.
 
 If you are interested, please continue reading:
 
-- 
GitLab