test_scatter.py 5.26 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
# Mon Jun 20 16:15:36 2011 +0200
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland

"""Tests for statistical methods
"""

import os, sys
12
from bob.math import scatter, scatters
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy
import nose.tools

def means(data):
  return numpy.mean(data, axis=0)

def py_scatters(data):
  # Step 1: compute the class means mu_c, starting from the sum_c
  mu_c = numpy.array([numpy.mean(data[k][:], axis=0) for k in range(len(data))])

  # Step 2: computes the number of elements in each class
  n_c = numpy.array([data[k].shape[0] for k in range(len(data))])

  # Step 3: computes the global mean mu
  mu = numpy.sum(mu_c.T * n_c, axis=1) / sum(data[k].shape[0] for k in range(len(data)))

  # Step 4: compute the between-class scatter Sb
  mu_c_mu = (mu_c - mu)
  Sb = numpy.dot(n_c * mu_c_mu.T, mu_c_mu)

  # Step 5: compute the within-class scatter Sw
  Sw = numpy.zeros((data[0].shape[1], data[0].shape[1]), dtype=float)
  for k in range(len(data)):
    X_c_mu_c = (data[k][:] - mu_c[k,:])
    Sw += numpy.dot(X_c_mu_c.T, X_c_mu_c)

  return (Sw, Sb, mu)

41
def test_scatter():
42

43
  data = numpy.random.rand(50,4)
44
45

  # This test demonstrates how to use the scatter matrix function of bob.
46
47
  S, M = scatter(data)
  S /= (data.shape[0]-1)
48
49
50

  # Do the same with numpy and compare. Note that with numpy we are computing
  # the covariance matrix which is the scatter matrix divided by (N-1).
51
  K = numpy.cov(data.T)
52
53
54
55
56
57
58
59
60
61
62
  M_ = means(data)
  assert  (abs(S-K) < 1e-10).all()
  assert  (abs(M-M_) < 1e-10).all()

def test_scatter_variation_1():

  data = numpy.random.rand(50,4)

  # This test demonstrates how to use the scatter matrix function of bob.
  M = numpy.ndarray((data.shape[1],), dtype=float)
  S = scatter(data, m=M)
63
  S = S[0]
64
65
66
67
  S /= (data.shape[0]-1)

  # Do the same with numpy and compare. Note that with numpy we are computing
  # the covariance matrix which is the scatter matrix divided by (N-1).
68
  K = numpy.cov(data.T)
69
70
71
72
73
74
75
76
77
78
79
  M_ = means(data)
  assert  (abs(S-K) < 1e-10).all()
  assert  (abs(M-M_) < 1e-10).all()

def test_scatter_variation_2():

  data = numpy.random.rand(50,4)

  # This test demonstrates how to use the scatter matrix function of bob.
  S = numpy.ndarray((data.shape[1], data.shape[1]), dtype=float)
  M = scatter(data, s=S)
80
  M = M[0]
81
82
83
84
  S /= (data.shape[0]-1)

  # Do the same with numpy and compare. Note that with numpy we are computing
  # the covariance matrix which is the scatter matrix divided by (N-1).
85
  K = numpy.cov(data.T)
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  M_ = means(data)
  assert  (abs(S-K) < 1e-10).all()
  assert  (abs(M-M_) < 1e-10).all()

def test_scatter_variation_3():

  data = numpy.random.rand(50,4)

  # This test demonstrates how to use the scatter matrix function of bob.
  S = numpy.ndarray((data.shape[1], data.shape[1]), dtype=float)
  M = numpy.ndarray((data.shape[1],), dtype=float)
  retval = scatter(data, m=M, s=S)
  assert not retval
  S /= (data.shape[0]-1)

  # Do the same with numpy and compare. Note that with numpy we are computing
  # the covariance matrix which is the scatter matrix divided by (N-1).
103
  K = numpy.cov(data.T)
104
105
106
107
108
109
110
111
112
113
114
  M_ = means(data)
  assert  (abs(S-K) < 1e-10).all()
  assert  (abs(M-M_) < 1e-10).all()

def test_fast_scatter():

  data = numpy.random.rand(50,4)

  # This test demonstrates how to use the scatter matrix function of bob.
  S = numpy.ndarray((data.shape[1], data.shape[1]), dtype=float)
  M = numpy.ndarray((data.shape[1],), dtype=float)
115
  scatter(data, S, M)
116
117
118
119
  S /= (data.shape[0]-1)

  # Do the same with numpy and compare. Note that with numpy we are computing
  # the covariance matrix which is the scatter matrix divided by (N-1).
120
  K = numpy.cov(data.T)
121
  M_ = means(data)
122
123
124
  assert  (abs(S-K) < 1e-10).all()
  assert  (abs(M-M_) < 1e-10).all()

125
126
127
128
129
130
131
def test_scatters():

  data = [
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      ]
132

133
134
  Sw_, Sb_, m_ = py_scatters(data)
  Sw, Sb, m = scatters(data)
135
136
137
138
  assert numpy.allclose(Sw, Sw_)
  assert numpy.allclose(Sb, Sb_)
  assert numpy.allclose(m, m_)

139
140
141
142
143
144
145
146
147
148
def test_scatters_variation_1():

  data = [
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      ]

  Sw_, Sb_, m_ = py_scatters(data)

149
  N = data[0].shape[1]
150
151
152
  Sw = numpy.ndarray((N,N), numpy.float64)
  Sb = numpy.ndarray((N,N), numpy.float64)
  m = numpy.ndarray((N,), numpy.float64)
153
  assert not scatters(data, Sw, Sb, m)
154
155
156
157
  assert numpy.allclose(Sw, Sw_)
  assert numpy.allclose(Sb, Sb_)
  assert numpy.allclose(m, m_)

158
159
160
161
162
163
164
165
166
167
168
def test_scatters_variation_2():

  data = [
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      ]

  Sw_, Sb_, m_ = py_scatters(data)

  N = data[0].shape[1]
169
170
  Sw = numpy.ndarray((N,N), numpy.float64)
  Sb = numpy.ndarray((N,N), numpy.float64)
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
  assert len(scatters(data, Sw, Sb)) == 1
  assert numpy.allclose(Sw, Sw_)
  assert numpy.allclose(Sb, Sb_)

def test_fast_scatters():

  data = [
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      numpy.random.rand(50,4),
      ]

  Sw_, Sb_, m_ = py_scatters(data)

  Sw = numpy.empty_like(Sw_)
  Sb = numpy.empty_like(Sb_)
  m = numpy.empty_like(m_)
188
  scatters(data, Sw, Sb, m)
189
190
  assert numpy.allclose(Sw, Sw_)
  assert numpy.allclose(Sb, Sb_)
191
  assert numpy.allclose(m, m_)