Making linear models work with numpy and dask

665363a7 · Tiago de Freitas Pereira · e94c6aa5 · 665363a7 · 665363a7 · 665363a7
Commit 665363a7 authored 3 years ago by Tiago de Freitas Pereira
--- a/bob/learn/em/linear/wccn.py
+++ b/bob/learn/em/linear/wccn.py
 from sklearn.base import BaseEstimator
 from sklearn.base import TransformerMixin
-# import numpy as np
-from scipy.linalg import inv, cholesky
+import dask
+# Dask doesn't have an implementation for `pinv`
 from scipy.linalg import pinv
-import dask.array as da
-import numpy as np
 class WCCN(TransformerMixin, BaseEstimator):
@@ -37,23 +37,34 @@ class WCCN(TransformerMixin, BaseEstimator):
    def __init__(self, pinv=False):
        self.pinv = pinv
-    def fit(self, X: da.Array, y: list):
+    def fit(self, X, y):
+        # CHECKING THE TYPES
+        if isinstance(X, dask.array.Array):
+            import dask.array as numerical_module
+            from dask.array.linalg import inv, cholesky
+        else:
+            import numpy as numerical_module
+            from scipy.linalg import inv, cholesky
        possible_labels = set(y)
-        y_ = np.array(y)
+        y_ = numerical_module.array(y)
        n_classes = len(possible_labels)
        # 1. compute the means for each label
-        mu_l = da.array(
+        mu_l = numerical_module.array(
-            [da.mean(X[np.where(y_ == l)[0]], axis=0) for l in possible_labels]
+            [
+                numerical_module.mean(X[numerical_module.where(y_ == l)[0]], axis=0)
+                for l in possible_labels
+            ]
        )
        # 2. Compute Sw
-        Sw = da.zeros((X.shape[1], X.shape[1]), dtype=float)
+        Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float)
        for l in possible_labels:
-            indexes = np.where(y_ == l)[0]
+            indexes = numerical_module.where(y_ == l)[0]
            X_l_mu_l = X[indexes] - mu_l[l]
            Sw += X_l_mu_l.T @ X_l_mu_l
@@ -71,7 +82,7 @@ class WCCN(TransformerMixin, BaseEstimator):
        return self
-    def transform(self, X: da.Array):
+    def transform(self, X):
        return ((X - self.input_subtract) / self.input_divide) @ self.weights

--- a/bob/learn/em/linear/whitening.py
+++ b/bob/learn/em/linear/whitening.py
 from sklearn.base import BaseEstimator
 from sklearn.base import TransformerMixin
 import numpy as np
-from dask.array.linalg import inv, cholesky
 from scipy.linalg import pinv
-import dask.array as da
+import dask
 class Whitening(TransformerMixin, BaseEstimator):
@@ -43,10 +42,19 @@ class Whitening(TransformerMixin, BaseEstimator):
    def __init__(self, pinv: bool = False):
        self.pinv = pinv
-    def fit(self, X: da.Array, y=None):
+    def fit(self, X, y=None):
+        # CHECKING THE TYPES
+        if isinstance(X, dask.array.Array):
+            import dask.array as numerical_module
+            from dask.array.linalg import inv, cholesky
+        else:
+            import numpy as numerical_module
+            from scipy.linalg import inv, cholesky
        # 1. Computes the mean vector and the covariance matrix of the training set
-        mu = da.mean(X, axis=0)
+        mu = numerical_module.mean(X, axis=0)
-        cov = da.cov(X.T)
+        cov = numerical_module.cov(X.T)
        # 2. Computes the inverse of the covariance matrix
        inv_cov = pinv(cov) if self.pinv else inv(cov)
@@ -60,7 +68,7 @@ class Whitening(TransformerMixin, BaseEstimator):
        return self
-    def transform(self, X: da.Array):
+    def transform(self, X):
        return ((X - self.input_subtract) / self.input_divide) @ self.weights
    def _more_tags(self):

--- a/bob/learn/em/test/test_linear.py
+++ b/bob/learn/em/test/test_linear.py
@@ -18,10 +18,16 @@ from bob.learn.em.linear import (
 )
-def test_whitening_py():
+def run_whitening(with_dask):
+    # CHECKING THE TYPES
+    if with_dask:
+        import dask.array as numerical_module
+    else:
+        import numpy as numerical_module
    # Tests our Whitening extractor.
-    data = da.array(
+    data = numerical_module.array(
        [
            [1.2622, -1.6443, 0.1889],
            [0.4286, -0.8922, 1.3020],
@@ -31,18 +37,20 @@ def test_whitening_py():
            [0.4301, 0.4886, -0.1456],
        ]
    )
-    sample = da.array([1, 2, 3.0])
+    sample = numerical_module.array([1, 2, 3.0])
    # Expected results (from matlab)
-    mean_ref = da.array([0.096324163333333, -0.465965438333333, 0.366839091666667])
+    mean_ref = numerical_module.array(
-    whit_ref = da.array(
+        [0.096324163333333, -0.465965438333333, 0.366839091666667]
+    )
+    whit_ref = numerical_module.array(
        [
            [1.608410253685985, 0, 0],
            [1.079813355720326, 1.411083365535711, 0],
            [0.693459921529905, 0.571417184139332, 1.800117179839927],
        ]
    )
-    sample_whitened_ref = da.array(
+    sample_whitened_ref = numerical_module.array(
        [5.942255453628436, 4.984316201643742, 4.739998188373740]
    )
@@ -70,10 +78,16 @@ def test_whitening_py():
    assert np.allclose(s2, sample_whitened_ref, eps, eps)
-def test_wccn_py():
+def run_wccn(with_dask):
+    # CHECKING THE TYPES
+    if with_dask:
+        import dask.array as numerical_module
+    else:
+        import numpy as numerical_module
    # Tests our Whitening extractor.
-    X = da.array(
+    X = numerical_module.array(
        [
            [1.2622, -1.6443, 0.1889],
            [0.4286, -0.8922, 1.3020],
@@ -85,18 +99,18 @@ def test_wccn_py():
    )
    y = [0, 0, 1, 1, 2, 2]
-    sample = da.array([1, 2, 3.0])
+    sample = numerical_module.array([1, 2, 3.0])
    # Expected results
-    mean_ref = da.array([0.0, 0.0, 0.0])
+    mean_ref = numerical_module.array([0.0, 0.0, 0.0])
-    weight_ref = da.array(
+    weight_ref = numerical_module.array(
        [
            [15.8455444, 0.0, 0.0],
            [-10.7946764, 2.87942129, 0.0],
            [18.76762201, -2.19719292, 2.1505817],
        ]
    )
-    sample_wccn_ref = da.array([50.55905765, -0.83273618, 6.45174511])
+    sample_wccn_ref = numerical_module.array([50.55905765, -0.83273618, 6.45174511])
    # Runs WCCN (first method)
    t = WCCN()
@@ -118,3 +132,19 @@ def test_wccn_py():
    assert np.allclose(t.input_subtract, mean_ref, eps, eps)
    assert np.allclose(t.weights, weight_ref, eps, eps)
    assert np.allclose(s2, sample_wccn_ref, eps, eps)
+def test_wccn_numpy():
+    run_wccn(with_dask=False)
+def test_wccn_dask():
+    run_wccn(with_dask=True)
+def test_whitening_numpy():
+    run_whitening(with_dask=False)
+def test_whitening_dask():
+    run_whitening(with_dask=True)