From 665363a76b426732e20ec15978fd8b834e5b94a3 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Mon, 14 Feb 2022 16:04:25 +0100 Subject: [PATCH] Making linear models work with numpy and dask --- bob/learn/em/linear/wccn.py | 33 ++++++++++++------- bob/learn/em/linear/whitening.py | 20 ++++++++---- bob/learn/em/test/test_linear.py | 54 +++++++++++++++++++++++++------- 3 files changed, 78 insertions(+), 29 deletions(-) diff --git a/bob/learn/em/linear/wccn.py b/bob/learn/em/linear/wccn.py index b0c86f4..5d0dbaa 100644 --- a/bob/learn/em/linear/wccn.py +++ b/bob/learn/em/linear/wccn.py @@ -1,11 +1,11 @@ from sklearn.base import BaseEstimator from sklearn.base import TransformerMixin -# import numpy as np -from scipy.linalg import inv, cholesky + +import dask + +# Dask doesn't have an implementation for `pinv` from scipy.linalg import pinv -import dask.array as da -import numpy as np class WCCN(TransformerMixin, BaseEstimator): @@ -37,23 +37,34 @@ class WCCN(TransformerMixin, BaseEstimator): def __init__(self, pinv=False): self.pinv = pinv - def fit(self, X: da.Array, y: list): + def fit(self, X, y): + + # CHECKING THE TYPES + if isinstance(X, dask.array.Array): + import dask.array as numerical_module + from dask.array.linalg import inv, cholesky + else: + import numpy as numerical_module + from scipy.linalg import inv, cholesky possible_labels = set(y) - y_ = np.array(y) + y_ = numerical_module.array(y) n_classes = len(possible_labels) # 1. compute the means for each label - mu_l = da.array( - [da.mean(X[np.where(y_ == l)[0]], axis=0) for l in possible_labels] + mu_l = numerical_module.array( + [ + numerical_module.mean(X[numerical_module.where(y_ == l)[0]], axis=0) + for l in possible_labels + ] ) # 2. Compute Sw - Sw = da.zeros((X.shape[1], X.shape[1]), dtype=float) + Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float) for l in possible_labels: - indexes = np.where(y_ == l)[0] + indexes = numerical_module.where(y_ == l)[0] X_l_mu_l = X[indexes] - mu_l[l] Sw += X_l_mu_l.T @ X_l_mu_l @@ -71,7 +82,7 @@ class WCCN(TransformerMixin, BaseEstimator): return self - def transform(self, X: da.Array): + def transform(self, X): return ((X - self.input_subtract) / self.input_divide) @ self.weights diff --git a/bob/learn/em/linear/whitening.py b/bob/learn/em/linear/whitening.py index b44084b..dd10460 100644 --- a/bob/learn/em/linear/whitening.py +++ b/bob/learn/em/linear/whitening.py @@ -1,9 +1,8 @@ from sklearn.base import BaseEstimator from sklearn.base import TransformerMixin import numpy as np -from dask.array.linalg import inv, cholesky from scipy.linalg import pinv -import dask.array as da +import dask class Whitening(TransformerMixin, BaseEstimator): @@ -43,10 +42,19 @@ class Whitening(TransformerMixin, BaseEstimator): def __init__(self, pinv: bool = False): self.pinv = pinv - def fit(self, X: da.Array, y=None): + def fit(self, X, y=None): + # CHECKING THE TYPES + if isinstance(X, dask.array.Array): + import dask.array as numerical_module + from dask.array.linalg import inv, cholesky + + else: + import numpy as numerical_module + from scipy.linalg import inv, cholesky + # 1. Computes the mean vector and the covariance matrix of the training set - mu = da.mean(X, axis=0) - cov = da.cov(X.T) + mu = numerical_module.mean(X, axis=0) + cov = numerical_module.cov(X.T) # 2. Computes the inverse of the covariance matrix inv_cov = pinv(cov) if self.pinv else inv(cov) @@ -60,7 +68,7 @@ class Whitening(TransformerMixin, BaseEstimator): return self - def transform(self, X: da.Array): + def transform(self, X): return ((X - self.input_subtract) / self.input_divide) @ self.weights def _more_tags(self): diff --git a/bob/learn/em/test/test_linear.py b/bob/learn/em/test/test_linear.py index f858af0..98ad1fb 100644 --- a/bob/learn/em/test/test_linear.py +++ b/bob/learn/em/test/test_linear.py @@ -18,10 +18,16 @@ from bob.learn.em.linear import ( ) -def test_whitening_py(): +def run_whitening(with_dask): + + # CHECKING THE TYPES + if with_dask: + import dask.array as numerical_module + else: + import numpy as numerical_module # Tests our Whitening extractor. - data = da.array( + data = numerical_module.array( [ [1.2622, -1.6443, 0.1889], [0.4286, -0.8922, 1.3020], @@ -31,18 +37,20 @@ def test_whitening_py(): [0.4301, 0.4886, -0.1456], ] ) - sample = da.array([1, 2, 3.0]) + sample = numerical_module.array([1, 2, 3.0]) # Expected results (from matlab) - mean_ref = da.array([0.096324163333333, -0.465965438333333, 0.366839091666667]) - whit_ref = da.array( + mean_ref = numerical_module.array( + [0.096324163333333, -0.465965438333333, 0.366839091666667] + ) + whit_ref = numerical_module.array( [ [1.608410253685985, 0, 0], [1.079813355720326, 1.411083365535711, 0], [0.693459921529905, 0.571417184139332, 1.800117179839927], ] ) - sample_whitened_ref = da.array( + sample_whitened_ref = numerical_module.array( [5.942255453628436, 4.984316201643742, 4.739998188373740] ) @@ -70,10 +78,16 @@ def test_whitening_py(): assert np.allclose(s2, sample_whitened_ref, eps, eps) -def test_wccn_py(): +def run_wccn(with_dask): + + # CHECKING THE TYPES + if with_dask: + import dask.array as numerical_module + else: + import numpy as numerical_module # Tests our Whitening extractor. - X = da.array( + X = numerical_module.array( [ [1.2622, -1.6443, 0.1889], [0.4286, -0.8922, 1.3020], @@ -85,18 +99,18 @@ def test_wccn_py(): ) y = [0, 0, 1, 1, 2, 2] - sample = da.array([1, 2, 3.0]) + sample = numerical_module.array([1, 2, 3.0]) # Expected results - mean_ref = da.array([0.0, 0.0, 0.0]) - weight_ref = da.array( + mean_ref = numerical_module.array([0.0, 0.0, 0.0]) + weight_ref = numerical_module.array( [ [15.8455444, 0.0, 0.0], [-10.7946764, 2.87942129, 0.0], [18.76762201, -2.19719292, 2.1505817], ] ) - sample_wccn_ref = da.array([50.55905765, -0.83273618, 6.45174511]) + sample_wccn_ref = numerical_module.array([50.55905765, -0.83273618, 6.45174511]) # Runs WCCN (first method) t = WCCN() @@ -118,3 +132,19 @@ def test_wccn_py(): assert np.allclose(t.input_subtract, mean_ref, eps, eps) assert np.allclose(t.weights, weight_ref, eps, eps) assert np.allclose(s2, sample_wccn_ref, eps, eps) + + +def test_wccn_numpy(): + run_wccn(with_dask=False) + + +def test_wccn_dask(): + run_wccn(with_dask=True) + + +def test_whitening_numpy(): + run_whitening(with_dask=False) + + +def test_whitening_dask(): + run_whitening(with_dask=True) -- GitLab