Skip to content
Snippets Groups Projects
Commit 665363a7 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Making linear models work with numpy and dask

parent e94c6aa5
No related branches found
No related tags found
1 merge request!44Moved the necessary bits of bob.learn.linear to this package
Pipeline #58302 passed
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin from sklearn.base import TransformerMixin
# import numpy as np
from scipy.linalg import inv, cholesky import dask
# Dask doesn't have an implementation for `pinv`
from scipy.linalg import pinv from scipy.linalg import pinv
import dask.array as da
import numpy as np
class WCCN(TransformerMixin, BaseEstimator): class WCCN(TransformerMixin, BaseEstimator):
...@@ -37,23 +37,34 @@ class WCCN(TransformerMixin, BaseEstimator): ...@@ -37,23 +37,34 @@ class WCCN(TransformerMixin, BaseEstimator):
def __init__(self, pinv=False): def __init__(self, pinv=False):
self.pinv = pinv self.pinv = pinv
def fit(self, X: da.Array, y: list): def fit(self, X, y):
# CHECKING THE TYPES
if isinstance(X, dask.array.Array):
import dask.array as numerical_module
from dask.array.linalg import inv, cholesky
else:
import numpy as numerical_module
from scipy.linalg import inv, cholesky
possible_labels = set(y) possible_labels = set(y)
y_ = np.array(y) y_ = numerical_module.array(y)
n_classes = len(possible_labels) n_classes = len(possible_labels)
# 1. compute the means for each label # 1. compute the means for each label
mu_l = da.array( mu_l = numerical_module.array(
[da.mean(X[np.where(y_ == l)[0]], axis=0) for l in possible_labels] [
numerical_module.mean(X[numerical_module.where(y_ == l)[0]], axis=0)
for l in possible_labels
]
) )
# 2. Compute Sw # 2. Compute Sw
Sw = da.zeros((X.shape[1], X.shape[1]), dtype=float) Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float)
for l in possible_labels: for l in possible_labels:
indexes = np.where(y_ == l)[0] indexes = numerical_module.where(y_ == l)[0]
X_l_mu_l = X[indexes] - mu_l[l] X_l_mu_l = X[indexes] - mu_l[l]
Sw += X_l_mu_l.T @ X_l_mu_l Sw += X_l_mu_l.T @ X_l_mu_l
...@@ -71,7 +82,7 @@ class WCCN(TransformerMixin, BaseEstimator): ...@@ -71,7 +82,7 @@ class WCCN(TransformerMixin, BaseEstimator):
return self return self
def transform(self, X: da.Array): def transform(self, X):
return ((X - self.input_subtract) / self.input_divide) @ self.weights return ((X - self.input_subtract) / self.input_divide) @ self.weights
......
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin from sklearn.base import TransformerMixin
import numpy as np import numpy as np
from dask.array.linalg import inv, cholesky
from scipy.linalg import pinv from scipy.linalg import pinv
import dask.array as da import dask
class Whitening(TransformerMixin, BaseEstimator): class Whitening(TransformerMixin, BaseEstimator):
...@@ -43,10 +42,19 @@ class Whitening(TransformerMixin, BaseEstimator): ...@@ -43,10 +42,19 @@ class Whitening(TransformerMixin, BaseEstimator):
def __init__(self, pinv: bool = False): def __init__(self, pinv: bool = False):
self.pinv = pinv self.pinv = pinv
def fit(self, X: da.Array, y=None): def fit(self, X, y=None):
# CHECKING THE TYPES
if isinstance(X, dask.array.Array):
import dask.array as numerical_module
from dask.array.linalg import inv, cholesky
else:
import numpy as numerical_module
from scipy.linalg import inv, cholesky
# 1. Computes the mean vector and the covariance matrix of the training set # 1. Computes the mean vector and the covariance matrix of the training set
mu = da.mean(X, axis=0) mu = numerical_module.mean(X, axis=0)
cov = da.cov(X.T) cov = numerical_module.cov(X.T)
# 2. Computes the inverse of the covariance matrix # 2. Computes the inverse of the covariance matrix
inv_cov = pinv(cov) if self.pinv else inv(cov) inv_cov = pinv(cov) if self.pinv else inv(cov)
...@@ -60,7 +68,7 @@ class Whitening(TransformerMixin, BaseEstimator): ...@@ -60,7 +68,7 @@ class Whitening(TransformerMixin, BaseEstimator):
return self return self
def transform(self, X: da.Array): def transform(self, X):
return ((X - self.input_subtract) / self.input_divide) @ self.weights return ((X - self.input_subtract) / self.input_divide) @ self.weights
def _more_tags(self): def _more_tags(self):
......
...@@ -18,10 +18,16 @@ from bob.learn.em.linear import ( ...@@ -18,10 +18,16 @@ from bob.learn.em.linear import (
) )
def test_whitening_py(): def run_whitening(with_dask):
# CHECKING THE TYPES
if with_dask:
import dask.array as numerical_module
else:
import numpy as numerical_module
# Tests our Whitening extractor. # Tests our Whitening extractor.
data = da.array( data = numerical_module.array(
[ [
[1.2622, -1.6443, 0.1889], [1.2622, -1.6443, 0.1889],
[0.4286, -0.8922, 1.3020], [0.4286, -0.8922, 1.3020],
...@@ -31,18 +37,20 @@ def test_whitening_py(): ...@@ -31,18 +37,20 @@ def test_whitening_py():
[0.4301, 0.4886, -0.1456], [0.4301, 0.4886, -0.1456],
] ]
) )
sample = da.array([1, 2, 3.0]) sample = numerical_module.array([1, 2, 3.0])
# Expected results (from matlab) # Expected results (from matlab)
mean_ref = da.array([0.096324163333333, -0.465965438333333, 0.366839091666667]) mean_ref = numerical_module.array(
whit_ref = da.array( [0.096324163333333, -0.465965438333333, 0.366839091666667]
)
whit_ref = numerical_module.array(
[ [
[1.608410253685985, 0, 0], [1.608410253685985, 0, 0],
[1.079813355720326, 1.411083365535711, 0], [1.079813355720326, 1.411083365535711, 0],
[0.693459921529905, 0.571417184139332, 1.800117179839927], [0.693459921529905, 0.571417184139332, 1.800117179839927],
] ]
) )
sample_whitened_ref = da.array( sample_whitened_ref = numerical_module.array(
[5.942255453628436, 4.984316201643742, 4.739998188373740] [5.942255453628436, 4.984316201643742, 4.739998188373740]
) )
...@@ -70,10 +78,16 @@ def test_whitening_py(): ...@@ -70,10 +78,16 @@ def test_whitening_py():
assert np.allclose(s2, sample_whitened_ref, eps, eps) assert np.allclose(s2, sample_whitened_ref, eps, eps)
def test_wccn_py(): def run_wccn(with_dask):
# CHECKING THE TYPES
if with_dask:
import dask.array as numerical_module
else:
import numpy as numerical_module
# Tests our Whitening extractor. # Tests our Whitening extractor.
X = da.array( X = numerical_module.array(
[ [
[1.2622, -1.6443, 0.1889], [1.2622, -1.6443, 0.1889],
[0.4286, -0.8922, 1.3020], [0.4286, -0.8922, 1.3020],
...@@ -85,18 +99,18 @@ def test_wccn_py(): ...@@ -85,18 +99,18 @@ def test_wccn_py():
) )
y = [0, 0, 1, 1, 2, 2] y = [0, 0, 1, 1, 2, 2]
sample = da.array([1, 2, 3.0]) sample = numerical_module.array([1, 2, 3.0])
# Expected results # Expected results
mean_ref = da.array([0.0, 0.0, 0.0]) mean_ref = numerical_module.array([0.0, 0.0, 0.0])
weight_ref = da.array( weight_ref = numerical_module.array(
[ [
[15.8455444, 0.0, 0.0], [15.8455444, 0.0, 0.0],
[-10.7946764, 2.87942129, 0.0], [-10.7946764, 2.87942129, 0.0],
[18.76762201, -2.19719292, 2.1505817], [18.76762201, -2.19719292, 2.1505817],
] ]
) )
sample_wccn_ref = da.array([50.55905765, -0.83273618, 6.45174511]) sample_wccn_ref = numerical_module.array([50.55905765, -0.83273618, 6.45174511])
# Runs WCCN (first method) # Runs WCCN (first method)
t = WCCN() t = WCCN()
...@@ -118,3 +132,19 @@ def test_wccn_py(): ...@@ -118,3 +132,19 @@ def test_wccn_py():
assert np.allclose(t.input_subtract, mean_ref, eps, eps) assert np.allclose(t.input_subtract, mean_ref, eps, eps)
assert np.allclose(t.weights, weight_ref, eps, eps) assert np.allclose(t.weights, weight_ref, eps, eps)
assert np.allclose(s2, sample_wccn_ref, eps, eps) assert np.allclose(s2, sample_wccn_ref, eps, eps)
def test_wccn_numpy():
run_wccn(with_dask=False)
def test_wccn_dask():
run_wccn(with_dask=True)
def test_whitening_numpy():
run_whitening(with_dask=False)
def test_whitening_dask():
run_whitening(with_dask=True)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment