test_trainer.py 8.13 KB
Newer Older
1
2
3
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.anjos@idiap.ch>
André Anjos's avatar
André Anjos committed
4
# Sun  4 Mar 20:06:14 2012
5
6
7
8
9
10
11
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland


"""Tests for libsvm training
"""

André Anjos's avatar
André Anjos committed
12
import os
13
14
15
import numpy
import tempfile
import pkg_resources
André Anjos's avatar
André Anjos committed
16
17
18
import nose.tools

from . import File, Machine, Trainer
19

André Anjos's avatar
André Anjos committed
20
def F(f):
21
  """Returns the test file on the "data" subdirectory"""
André Anjos's avatar
André Anjos committed
22
  return pkg_resources.resource_filename(__name__, os.path.join('data', f))
23
24
25
26
27
28
29

def tempname(suffix, prefix='bobtest_'):
  (fd, name) = tempfile.mkstemp(suffix, prefix)
  os.close(fd)
  os.unlink(name)
  return name

André Anjos's avatar
André Anjos committed
30
TEST_MACHINE_NO_PROBS = F('heart_no_probs.svmmodel')
Artur Costa Pazo's avatar
Artur Costa Pazo committed
31
TEST_MACHINE_ONE_CLASS = F('heart_one_class.svmmodel')
32

André Anjos's avatar
André Anjos committed
33
34
35
HEART_DATA = F('heart.svmdata') #13 inputs
HEART_MACHINE = F('heart.svmmodel') #supports probabilities
HEART_EXPECTED = F('heart.out') #expected probabilities
36

37
38
39
40
def _check_abs_diff(a, b, maxval):
  assert numpy.all(abs(a - b) < maxval), "Maximum " \
          "difference exceeded limit (%g): %g" % (maxval, abs(a - b).max())

André Anjos's avatar
André Anjos committed
41
42
43
44
45
46
47
def test_initialization():

  # tests and examplifies some initialization parameters

  # all defaults
  trainer = Trainer()

André Anjos's avatar
André Anjos committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
  # check defaults are right
  nose.tools.eq_(trainer.machine_type, 'C_SVC')
  nose.tools.eq_(trainer.kernel_type, 'RBF')
  nose.tools.eq_(trainer.cache_size, 100.)
  nose.tools.eq_(trainer.stop_epsilon, 1e-3)
  assert trainer.shrinking
  assert not trainer.probability

def test_get_and_set():

  trainer = Trainer()

  # now tries setting the various properties
  trainer.machine_type = 'NU_SVC'
  nose.tools.eq_(trainer.machine_type, 'NU_SVC')
  trainer.kernel_type = 'LINEAR'
  nose.tools.eq_(trainer.kernel_type, 'LINEAR')
  trainer.cache_size = 2
  nose.tools.eq_(trainer.cache_size, 2)
  trainer.coef0 = 2
  nose.tools.eq_(trainer.coef0, 2)
  trainer.cost = 2
  nose.tools.eq_(trainer.cost, 2)
  trainer.degree = 2
  nose.tools.eq_(trainer.degree, 2)
  trainer.gamma = 2
  nose.tools.eq_(trainer.gamma, 2)
André Anjos's avatar
André Anjos committed
75
76
  trainer.nu = 0.5
  nose.tools.eq_(trainer.nu, 0.5)
André Anjos's avatar
André Anjos committed
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
  trainer.stop_epsilon = 2
  nose.tools.eq_(trainer.stop_epsilon, 2)
  trainer.shrinking = False
  nose.tools.eq_(trainer.shrinking, False)
  trainer.probability = True
  nose.tools.eq_(trainer.probability, True)

@nose.tools.raises(ValueError)
def test_set_machine_raises():

  trainer = Trainer()
  trainer.machine_type = 'wrong'

@nose.tools.raises(ValueError)
def test_set_kernel_raises():

  trainer = Trainer()
  trainer.kernel_type = 'wrong'

@nose.tools.raises(TypeError)
def test_cannot_delete():

  trainer = Trainer()
  del trainer.kernel_type

André Anjos's avatar
André Anjos committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def test_training():

  # For this example I'm using an SVM file because of convinience. You only
  # need to make sure you can gather the input into 2D double arrays in which
  # each array represents data from one class and each line on such array
  # contains a sample.
  f = File(HEART_DATA)
  labels, data = f.read_all()
  neg = numpy.vstack([k for i,k in enumerate(data) if labels[i] < 0])
  pos = numpy.vstack([k for i,k in enumerate(data) if labels[i] > 0])

  # Data is also pre-scaled so features remain in the range between -1 and
  # +1. libsvm, apparently, suggests you do that for all features. Our
  # bindings to libsvm do not include scaling. If you want to implement that
  # generically, please do it.

  trainer = Trainer()
  machine = trainer.train((pos, neg)) #ordering only affects labels
  previous = Machine(TEST_MACHINE_NO_PROBS)
  nose.tools.eq_(machine.machine_type, previous.machine_type)
  nose.tools.eq_(machine.kernel_type, previous.kernel_type)
123
  assert numpy.isclose(machine.gamma, previous.gamma)
André Anjos's avatar
André Anjos committed
124
  nose.tools.eq_(machine.shape, previous.shape)
125
126
  _check_abs_diff(machine.input_subtract, previous.input_subtract, 1e-8)
  _check_abs_diff(machine.input_divide, previous.input_divide, 1e-8)
André Anjos's avatar
André Anjos committed
127
128
129
130
131
132
133
134
135
136
137

  curr_label = machine.predict_class(data)
  prev_label = previous.predict_class(data)
  assert numpy.array_equal(curr_label, prev_label)

  curr_labels, curr_scores = machine.predict_class_and_scores(data)
  prev_labels, prev_scores = previous.predict_class_and_scores(data)
  assert numpy.array_equal(curr_labels, prev_labels)

  curr_scores = numpy.array(curr_scores)
  prev_scores = numpy.array(prev_scores)
138
  _check_abs_diff(curr_scores, prev_scores, 1e-8)
André Anjos's avatar
André Anjos committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

def test_training_with_probability():

  f = File(HEART_DATA)
  labels, data = f.read_all()
  neg = numpy.vstack([k for i,k in enumerate(data) if labels[i] < 0])
  pos = numpy.vstack([k for i,k in enumerate(data) if labels[i] > 0])

  # Data is also pre-scaled so features remain in the range between -1 and
  # +1. libsvm, apparently, suggests you do that for all features. Our
  # bindings to libsvm do not include scaling. If you want to implement that
  # generically, please do it.

  trainer = Trainer(probability=True)
  machine = trainer.train((pos, neg)) #ordering only affects labels
  previous = Machine(HEART_MACHINE)
  nose.tools.eq_(machine.machine_type, previous.machine_type)
  nose.tools.eq_(machine.kernel_type, previous.kernel_type)
157
  assert numpy.isclose(machine.gamma, previous.gamma)
André Anjos's avatar
André Anjos committed
158
  nose.tools.eq_(machine.shape, previous.shape)
159
160
  _check_abs_diff(machine.input_subtract, previous.input_subtract, 1e-8)
  _check_abs_diff(machine.input_divide, previous.input_divide, 1e-8)
André Anjos's avatar
André Anjos committed
161
162
163
164
165
166
167
168
169
170
171
172
173

  # check labels
  curr_label = machine.predict_class(data)
  prev_label = previous.predict_class(data)
  assert numpy.array_equal(curr_label, prev_label)

  # check scores
  curr_labels, curr_scores = machine.predict_class_and_scores(data)
  prev_labels, prev_scores = previous.predict_class_and_scores(data)
  assert numpy.array_equal(curr_labels, prev_labels)

  curr_scores = numpy.array(curr_scores)
  prev_scores = numpy.array(prev_scores)
174
  _check_abs_diff(curr_scores, prev_scores, 1e-8)
André Anjos's avatar
André Anjos committed
175
176
177
178
179
180
181
182

  # check probabilities -- probA and probB do not get the exact same values
  # as when using libsvm's svm-train.c. The reason may lie in the order in
  # which the samples are arranged.
  curr_labels, curr_scores = machine.predict_class_and_probabilities(data)
  prev_labels, prev_scores = previous.predict_class_and_probabilities(data)
  curr_scores = numpy.array(curr_scores)
  prev_scores = numpy.array(prev_scores)
183
  #_check_abs_diff(curr_scores, prev_scores, 1e-8)
Artur Costa Pazo's avatar
Artur Costa Pazo committed
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204

def test_training_one_class():

  # For this example I'm using an OC-SVM file because of convinience. You only
  # need to make sure you can gather the input into 2D singular arrays in which
  # the only array represents data from one class and each line on such array
  # contains a sample.
  f = File(HEART_DATA)
  labels, data = f.read_all()
  pos = numpy.vstack([k for i,k in enumerate(data) if labels[i] > 0])

  # Data is also pre-scaled so features remain in the range between -1 and
  # +1. libsvm, apparently, suggests you do that for all features. Our
  # bindings to libsvm do not include scaling. If you want to implement that
  # generically, please do it.

  trainer = Trainer(machine_type='ONE_CLASS')
  machine = trainer.train([pos]) #ordering only affects labels
  previous = Machine(TEST_MACHINE_ONE_CLASS)
  nose.tools.eq_(machine.machine_type, previous.machine_type)
  nose.tools.eq_(machine.kernel_type, previous.kernel_type)
205
  assert numpy.isclose(machine.gamma, previous.gamma)
Artur Costa Pazo's avatar
Artur Costa Pazo committed
206
  nose.tools.eq_(machine.shape, previous.shape)
207
208
  _check_abs_diff(machine.input_subtract, previous.input_subtract, 1e-8)
  _check_abs_diff(machine.input_divide, previous.input_divide, 1e-8)
Artur Costa Pazo's avatar
Artur Costa Pazo committed
209
210
211
212
213
214
215
216
217
218
219

  curr_label = machine.predict_class(data)
  prev_label = previous.predict_class(data)
  assert numpy.array_equal(curr_label, prev_label)

  curr_labels, curr_scores = machine.predict_class_and_scores(data)
  prev_labels, prev_scores = previous.predict_class_and_scores(data)
  assert numpy.array_equal(curr_labels, prev_labels)

  curr_scores = numpy.array(curr_scores)
  prev_scores = numpy.array(prev_scores)
220
  _check_abs_diff(curr_scores, prev_scores)
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237

def test_successive_training():

  # Tests successive training works: i.e., training a couple of machines one
  # after the other.

  numpy.random.seed(10)

  for i in range(2):
    pos = numpy.random.normal(0., 1, size=(100, 2))
    neg = numpy.random.normal(1., 1, size=(100, 2))
    data = [pos, neg]

    trainer = Trainer()
    trainer.kernel_type = 'LINEAR'
    trainer.cost = 1
    trainer.train(data)