diff --git a/bob/learn/em/linear/__init__.py b/bob/learn/em/linear/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..051c3932f575f1eb9ef1a4623f7c76e5c9d11fa6 --- /dev/null +++ b/bob/learn/em/linear/__init__.py @@ -0,0 +1,2 @@ +from .wccn import WCCN +from .whitening import Whitening diff --git a/bob/learn/em/linear/wccn.py b/bob/learn/em/linear/wccn.py new file mode 100644 index 0000000000000000000000000000000000000000..5d0dbaa1e92d820258b6ea69d4a584a41fdeb131 --- /dev/null +++ b/bob/learn/em/linear/wccn.py @@ -0,0 +1,90 @@ +from sklearn.base import BaseEstimator +from sklearn.base import TransformerMixin + + +import dask + +# Dask doesn't have an implementation for `pinv` +from scipy.linalg import pinv + + +class WCCN(TransformerMixin, BaseEstimator): + """ + Trains a linear machine to perform Within-Class Covariance Normalization (WCCN) + WCCN finds the projection matrix W that allows us to linearly project the data matrix X to another (sub) space such that: + + .. math:: + (1/N) S_{w} = W W^T + + where :math:`W` is an upper triangular matrix computed using Cholesky Decomposition: + + .. math:: + W = cholesky([(1/K) S_{w} ]^{-1}) + + + where: + - :math:`K` the number of classes + - :math:`S_w` the within-class scatter; it also has dimensions ``(X.shape[0], X.shape[0])`` and is defined as :math:`S_w = \\sum_{k=1}^K \\sum_{n \\in C_k} (x_n-m_k)(x_n-m_k)^T`, with :math:`C_k` being a set representing all samples for class k. + - :math:`m_k` the class *k* empirical mean, defined as :math:`m_k = \\frac{1}{N_k}\\sum_{n \\in C_k} x_n` + + + References: + - 1. Within-class covariance normalization for SVM-based speaker recognition, Andrew O. Hatch, Sachin Kajarekar, and Andreas Stolcke, In INTERSPEECH, 2006. + - 2. http://en.wikipedia.org/wiki/Cholesky_decomposition" + + """ + + def __init__(self, pinv=False): + self.pinv = pinv + + def fit(self, X, y): + + # CHECKING THE TYPES + if isinstance(X, dask.array.Array): + import dask.array as numerical_module + from dask.array.linalg import inv, cholesky + else: + import numpy as numerical_module + from scipy.linalg import inv, cholesky + + possible_labels = set(y) + y_ = numerical_module.array(y) + + n_classes = len(possible_labels) + + # 1. compute the means for each label + mu_l = numerical_module.array( + [ + numerical_module.mean(X[numerical_module.where(y_ == l)[0]], axis=0) + for l in possible_labels + ] + ) + + # 2. Compute Sw + Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float) + + for l in possible_labels: + indexes = numerical_module.where(y_ == l)[0] + X_l_mu_l = X[indexes] - mu_l[l] + + Sw += X_l_mu_l.T @ X_l_mu_l + + # 3. Compute inv + scaled_Sw = (1 / n_classes) * Sw + inv_scaled_Sw = pinv(scaled_Sw) if self.pinv else inv(scaled_Sw) + + # 3. Computes the Cholesky decomposition + self.weights = cholesky( + inv_scaled_Sw, lower=True + ) # Setting lower true to have the same implementation as in the previous code + self.input_subtract = 0 + self.input_divide = 1.0 + + return self + + def transform(self, X): + + return ((X - self.input_subtract) / self.input_divide) @ self.weights + + def _more_tags(self): + return {"stateless": False, "requires_fit": True} diff --git a/bob/learn/em/linear/whitening.py b/bob/learn/em/linear/whitening.py new file mode 100644 index 0000000000000000000000000000000000000000..dd1046069f23ab33fc2e2f50640e0b8a026a919b --- /dev/null +++ b/bob/learn/em/linear/whitening.py @@ -0,0 +1,75 @@ +from sklearn.base import BaseEstimator +from sklearn.base import TransformerMixin +import numpy as np +from scipy.linalg import pinv +import dask + + +class Whitening(TransformerMixin, BaseEstimator): + """ + Trains an Estimator perform Cholesky whitening. + + The whitening transformation is a decorrelation method that converts the covariance matrix of a set of samples into the identity matrix :math:`I`. + This effectively linearly transforms random variables such that the resulting variables are uncorrelated and have the same variances as the original random variables. + + This transformation is invertible. + The method is called the whitening transform because it transforms the input matrix :math:`X` closer towards white noise (let's call it :math:`\\tilde{X}`): + + .. math:: + Cov(\\tilde{X}) = I + + with: + .. math:: \\tilde{X} = X W + + where :math:`W` is the projection matrix that allows us to linearly project the data matrix :math:`X` to another (sub) space such that: + + .. math:: + Cov(X) = W W^T + + + :math:`W` is computed using Cholesky decomposition: + + .. math:: + W = cholesky([Cov(X)]^{-1}) + + + References: + - 1. https://rtmath.net/help/html/e9c12dc0-e813-4ca9-aaa3-82340f1c5d24.htm + - 2. http://en.wikipedia.org/wiki/Cholesky_decomposition + + """ + + def __init__(self, pinv: bool = False): + self.pinv = pinv + + def fit(self, X, y=None): + # CHECKING THE TYPES + if isinstance(X, dask.array.Array): + import dask.array as numerical_module + from dask.array.linalg import inv, cholesky + + else: + import numpy as numerical_module + from scipy.linalg import inv, cholesky + + # 1. Computes the mean vector and the covariance matrix of the training set + mu = numerical_module.mean(X, axis=0) + cov = numerical_module.cov(X.T) + + # 2. Computes the inverse of the covariance matrix + inv_cov = pinv(cov) if self.pinv else inv(cov) + + # 3. Computes the Cholesky decomposition of the inverse covariance matrix + self.weights = cholesky( + inv_cov, lower=True + ) # Setting lower true to have the same implementation as in the previous code + self.input_subtract = mu + self.input_divide = 1.0 + + return self + + def transform(self, X): + return ((X - self.input_subtract) / self.input_divide) @ self.weights + + def _more_tags(self): + return {"stateless": False, "requires_fit": True} diff --git a/bob/learn/em/test/test_linear.py b/bob/learn/em/test/test_linear.py new file mode 100644 index 0000000000000000000000000000000000000000..98ad1fba33c151d5e3e9dd739ca583b26395f145 --- /dev/null +++ b/bob/learn/em/test/test_linear.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Andre Anjos <andre.anjos@idiap.ch> +# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> +# Tue May 31 16:55:10 2011 +0200 +# +# Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland + +"""Tests on the machine infrastructure. +""" + +import dask.array as da +import numpy as np + +from bob.learn.em.linear import ( + Whitening, + WCCN, +) + + +def run_whitening(with_dask): + + # CHECKING THE TYPES + if with_dask: + import dask.array as numerical_module + else: + import numpy as numerical_module + + # Tests our Whitening extractor. + data = numerical_module.array( + [ + [1.2622, -1.6443, 0.1889], + [0.4286, -0.8922, 1.3020], + [-0.6613, 0.0430, 0.6377], + [-0.8718, -0.4788, 0.3988], + [-0.0098, -0.3121, -0.1807], + [0.4301, 0.4886, -0.1456], + ] + ) + sample = numerical_module.array([1, 2, 3.0]) + + # Expected results (from matlab) + mean_ref = numerical_module.array( + [0.096324163333333, -0.465965438333333, 0.366839091666667] + ) + whit_ref = numerical_module.array( + [ + [1.608410253685985, 0, 0], + [1.079813355720326, 1.411083365535711, 0], + [0.693459921529905, 0.571417184139332, 1.800117179839927], + ] + ) + sample_whitened_ref = numerical_module.array( + [5.942255453628436, 4.984316201643742, 4.739998188373740] + ) + + # Runs whitening (first method) + + t = Whitening() + t.fit(data) + + s = t.transform(sample) + + # Makes sure results are good + eps = 1e-4 + assert np.allclose(t.input_subtract, mean_ref, eps, eps) + assert np.allclose(t.weights, whit_ref, eps, eps) + assert np.allclose(s, sample_whitened_ref, eps, eps) + + # Runs whitening (second method) + m2 = t.fit(data) + s2 = t.transform(sample) + + # Makes sure results are good + eps = 1e-4 + assert np.allclose(m2.input_subtract, mean_ref, eps, eps) + assert np.allclose(m2.weights, whit_ref, eps, eps) + assert np.allclose(s2, sample_whitened_ref, eps, eps) + + +def run_wccn(with_dask): + + # CHECKING THE TYPES + if with_dask: + import dask.array as numerical_module + else: + import numpy as numerical_module + + # Tests our Whitening extractor. + X = numerical_module.array( + [ + [1.2622, -1.6443, 0.1889], + [0.4286, -0.8922, 1.3020], + [-0.6613, 0.0430, 0.6377], + [-0.8718, -0.4788, 0.3988], + [-0.0098, -0.3121, -0.1807], + [0.4301, 0.4886, -0.1456], + ] + ) + y = [0, 0, 1, 1, 2, 2] + + sample = numerical_module.array([1, 2, 3.0]) + + # Expected results + mean_ref = numerical_module.array([0.0, 0.0, 0.0]) + weight_ref = numerical_module.array( + [ + [15.8455444, 0.0, 0.0], + [-10.7946764, 2.87942129, 0.0], + [18.76762201, -2.19719292, 2.1505817], + ] + ) + sample_wccn_ref = numerical_module.array([50.55905765, -0.83273618, 6.45174511]) + + # Runs WCCN (first method) + t = WCCN() + t.fit(X, y=y) + s = t.transform(sample) + + # Makes sure results are good + eps = 1e-4 + assert np.allclose(t.input_subtract, mean_ref, eps, eps) + assert np.allclose(t.weights, weight_ref, eps, eps) + assert np.allclose(s, sample_wccn_ref, eps, eps) + + # Runs WCCN (second method) + m2 = t.fit(X, y) + s2 = t.transform(sample) + + # Makes sure results are good + eps = 1e-4 + assert np.allclose(t.input_subtract, mean_ref, eps, eps) + assert np.allclose(t.weights, weight_ref, eps, eps) + assert np.allclose(s2, sample_wccn_ref, eps, eps) + + +def test_wccn_numpy(): + run_wccn(with_dask=False) + + +def test_wccn_dask(): + run_wccn(with_dask=True) + + +def test_whitening_numpy(): + run_whitening(with_dask=False) + + +def test_whitening_dask(): + run_whitening(with_dask=True) diff --git a/doc/index.rst b/doc/index.rst index c32431d6ed2646a1f30bfe0225e3e14b2718630d..d81413146ca3255cce7f5462ff8d295c14cb7a03 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,6 +17,8 @@ includes implementations of the following algorithms: - Total Variability Modeling (iVectors) - Probabilistic Linear Discriminant Analysis (PLDA) - EM Principal Component Analysis (EM-PCA) + - Whitening + - Within-Class Covariance Normalization (WCCN) Documentation diff --git a/doc/py_api.rst b/doc/py_api.rst index 64695e03be4fbb3c60340ccbd01eb6693cd6a7c2..54a64cff4a51149eeef8c18fab4e66bd951e6f87 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -36,6 +36,8 @@ Machines bob.learn.em.cluster.KMeansMachine bob.learn.em.mixture.GMMStats bob.learn.em.mixture.GMMMachine + bob.learn.em.linear.WCCN + bob.learn.em.linear.Whitening .. TODO uncomment when implemented