diff --git a/xbob/learn/mlp/test_shuffler.py b/xbob/learn/mlp/test_shuffler.py index 90de091bc471b2c6ac99ce24a19ecfe213da6dc4..bcfee7f08c48eb22249213f7d04b612d395827c9 100644 --- a/xbob/learn/mlp/test_shuffler.py +++ b/xbob/learn/mlp/test_shuffler.py @@ -1,222 +1,221 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : # Andre Anjos <andre.anjos@idiap.ch> -# Thu Jul 14 12:51:05 2011 +0200 +# Tue 29 Apr 2014 16:16:33 CEST # -# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland """All kinds of tests on the DataShuffler class """ -import os, sys -import unittest import time -import bob import numpy - -class DataShufflerTest(unittest.TestCase): - """Performs various shuffer tests.""" - - def setUp(self): - - self.set1 = [] - self.data1 = numpy.array([1, 0, 0], dtype='float64') - self.target1 = numpy.array([1], dtype='float64') - self.set1.append(self.data1) - self.set1.append(self.data1*2) - self.set1.append(self.data1*3) - self.set1 = numpy.array(self.set1) - - self.set2 = [] - self.data2 = numpy.array([0, 1, 0], dtype='float64') - self.target2 = numpy.array([2], dtype='float64') - self.set2.append(self.data2) - self.set2.append(self.data2*2) - self.set2.append(self.data2*3) - self.set2 = numpy.array(self.set2) - - self.set3 = [] - self.data3 = numpy.array([0, 0, 1], dtype='float64') - self.target3 = numpy.array([3], dtype='float64') - self.set3.append(self.data3) - self.set3.append(self.data3*2) - self.set3.append(self.data3*3) - self.set3 = numpy.array(self.set3) - - def test01_Initialization(self): - - # Test if we can correctly initialize the shuffler - - shuffle = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - - self.assertEqual(shuffle.data_width, 3) - self.assertEqual(shuffle.target_width, 1) - - def test01a_InitializationWithArrays(self): - - # Test if we can initialize the shuffler with simple arrays - data = [ - numpy.zeros((10,2), 'float64'), - numpy.ones ((10,2), 'float64'), - ] - - target = [ - numpy.array([+1,+1], 'float64'), - numpy.array([-1,-1], 'float64'), +import nose.tools + +from . import DataShuffler + +import xbob.core.random + +# Some data structures used for the tests +fixture = dict() +fixture['set1'] = [] +fixture['data1'] = numpy.array([1, 0, 0], dtype='float64') +fixture['target1'] = numpy.array([1], dtype='float64') +fixture['set1'].append(fixture['data1']) +fixture['set1'].append(fixture['data1']*2) +fixture['set1'].append(fixture['data1']*3) +fixture['set1'] = numpy.array(fixture['set1']) + +fixture['set2'] = [] +fixture['data2'] = numpy.array([0, 1, 0], dtype='float64') +fixture['target2'] = numpy.array([2], dtype='float64') +fixture['set2'].append(fixture['data2']) +fixture['set2'].append(fixture['data2']*2) +fixture['set2'].append(fixture['data2']*3) +fixture['set2'] = numpy.array(fixture['set2']) + +fixture['set3'] = [] +fixture['data3'] = numpy.array([0, 0, 1], dtype='float64') +fixture['target3'] = numpy.array([3], dtype='float64') +fixture['set3'].append(fixture['data3']) +fixture['set3'].append(fixture['data3']*2) +fixture['set3'].append(fixture['data3']*3) +fixture['set3'] = numpy.array(fixture['set3']) + +def test_initialization(): + + # Test if we can correctly initialize the shuffler + + shuffle = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + + nose.tools.eq_(shuffle.data_width, 3) + nose.tools.eq_(shuffle.target_width, 1) + +def test_initialization_with_arrays(): + + # Test if we can initialize the shuffler with simple arrays + data = [ + numpy.zeros((10,2), 'float64'), + numpy.ones ((10,2), 'float64'), ] - shuffle = bob.trainer.DataShuffler(data, target) - self.assertEqual(shuffle.data_width, 2) - self.assertEqual(shuffle.target_width, 2) - - def test02_Drawing(self): + target = [ + numpy.array([+1,+1], 'float64'), + numpy.array([-1,-1], 'float64'), + ] + + shuffle = DataShuffler(data, target) + nose.tools.eq_(shuffle.data_width, 2) + nose.tools.eq_(shuffle.target_width, 2) + +def test_drawing(): + + # Tests that drawing works in a particular way + + N = 6 #multiple of number of classes + + shuffle = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + + [data, target] = shuffle(N) + + nose.tools.eq_(data.shape, (N, shuffle.data_width)) + nose.tools.eq_(target.shape, (N, shuffle.target_width)) + + # Finally, we also test if the data is well separated. We have to have 2 + # of each class since N is multiple of 9 + class1_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data1']) != 0]) + nose.tools.eq_(class1_count, 2) + class2_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data2']) != 0]) + nose.tools.eq_(class2_count, 2) + class3_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data3']) != 0]) + nose.tools.eq_(class3_count, 2) + + N = 28 #not multiple anymore + + [data, target] = shuffle(N) + + nose.tools.eq_(data.shape, (N, shuffle.data_width)) + nose.tools.eq_(target.shape, (N, shuffle.target_width)) - # Tests that drawing works in a particular way - - N = 6 #multiple of number of classes + # Finally, we also test if the data is well separated. We have to have 2 + # of each class since N is multiple of 9 + class1_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data1']) != 0]) + nose.tools.eq_(class1_count, 10) + class2_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data2']) != 0]) + nose.tools.eq_(class2_count, 9) + class3_count = len([data[i,:] for i in range(N) \ + if numpy.dot(data[i,:], fixture['data3']) != 0]) + nose.tools.eq_(class3_count, 9) - shuffle = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - - [data, target] = shuffle(N) - - self.assertEqual(data.shape, (N, shuffle.data_width)) - self.assertEqual(target.shape, (N, shuffle.target_width)) +def test_seeding(): - # Finally, we also test if the data is well separated. We have to have 2 - # of each class since N is multiple of 9 - class1_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data1) != 0]) - self.assertEqual(class1_count, 2) - class2_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data2) != 0]) - self.assertEqual(class2_count, 2) - class3_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data3) != 0]) - self.assertEqual(class3_count, 2) + # Test if we can correctly set the seed and that this act is effective - N = 28 #not multiple anymore - - [data, target] = shuffle(N) + # First test that, by making two shufflers, we get different replies + shuffle1 = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + shuffle2 = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + + N = 100 + + # This will use the current time as seed. + [data1, target1] = shuffle1(N) + time.sleep(0.1) # Sleeps 0.1 second to make sure we get different seeds + [data2, target2] = shuffle2(N) + + assert not (data1 == data2).all() + # Note targets will always be the same given N because of the internal + # design of the C++ DataShuffler. + + # Now show that by drawing twice does not get the same replies! + # This indicates that the internal random generator is updated at each draw + # as one expects. + [data1_2, target1_2] = shuffle1(N) + + assert not (data1 == data1_2).all() + + # Finally show that, by setting the seed, we can get the same results + shuffle1 = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + shuffle2 = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + + # Use the same seed for 2 different random number generators + rng1 = xbob.core.random.mt19937(32) + rng2 = xbob.core.random.mt19937(32) + + [data1, target1] = shuffle1(rng1, N) + [data2, target2] = shuffle2(rng2, N) + + assert (data1 == data2).all() + +def test_normalization(): + + # Tests that the shuffler can get the std. normalization right + # Compares results to numpy + shuffle = DataShuffler([fixture['set1'], fixture['set2'], fixture['set3']], + [fixture['target1'], fixture['target2'], fixture['target3']]) + + npy = numpy.array([[1,0,0], [2,0,0], [3,0,0], + [0,1,0], [0,2,0], [0,3,0], + [0,0,1], [0,0,2], [0,0,3]], 'float64') + precalc_mean = numpy.array(numpy.mean(npy,0)) + precalc_stddev = numpy.array(numpy.std(npy,0, ddof=1)) + [mean, stddev] = shuffle.stdnorm() + + assert (mean == precalc_mean).all() + assert (stddev == precalc_stddev).all() + + # Now we set the stdnorm flag on and expect data + assert not shuffle.auto_stdnorm + shuffle.auto_stdnorm = True + assert shuffle.auto_stdnorm + + [data, target] = shuffle(10000) + + # Makes sure the data is approximately zero mean and has std.dev. ~ 1 + # Note: Results will not be of a better precision because we only have 9 + # samples in the Shuffler... + nose.tools.eq_(round(data.mean()), 0) + nose.tools.eq_(round(numpy.std(data, ddof=1)), 1) + +def test_normalization_big(): + + rng = xbob.core.random.mt19937() + + set1 = [] + draw25 = xbob.core.random.normal(mean=2.0, sigma=5.0, dtype=float) + for i in range(10000): + set1.append(numpy.array([draw25(rng)], dtype='float64')) + set1 = numpy.array(set1) + target1 = numpy.array([1], dtype='float64') + + set2 = [] + draw32 = xbob.core.random.normal(mean=3.0, sigma=2.0, dtyle=float) + for i in range(10000): + set2.append(numpy.array([draw32(rng)], dtype='float64')) + set2 = numpy.array(set2) + target2 = numpy.array([2], dtype='float64') + + shuffle = DataShuffler([set1, set2], [target1, target2]) + shuffle.auto_stdnorm = True + prev_mean, prev_stddev = shuffle.stdnorm() + + [data, target] = shuffle(200000) + assert abs(data.mean()) < 1e-1 + assert abs(numpy.std(data, ddof=1) - 1.0) < 1e-1 - self.assertEqual(data.shape, (N, shuffle.data_width)) - self.assertEqual(target.shape, (N, shuffle.target_width)) - - # Finally, we also test if the data is well separated. We have to have 2 - # of each class since N is multiple of 9 - class1_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data1) != 0]) - self.assertEqual(class1_count, 10) - class2_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data2) != 0]) - self.assertEqual(class2_count, 9) - class3_count = len([data[i,:] for i in range(N) \ - if numpy.dot(data[i,:], self.data3) != 0]) - self.assertEqual(class3_count, 9) - - def test03_Seeding(self): - - # Test if we can correctly set the seed and that this act is effective - - # First test that, by making two shufflers, we get different replies - shuffle1 = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - shuffle2 = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - - N = 100 - - # This will use the current time as seed. - [data1, target1] = shuffle1(N) - time.sleep(1) # Sleeps 1 second to make sure we get different seeds - [data2, target2] = shuffle2(N) - - self.assertFalse( (data1 == data2).all() ) - # Note targets will always be the same given N because of the internal - # design of the C++ DataShuffler. - - # Now show that by drawing twice does not get the same replies! - # This indicates that the internal random generator is updated at each draw - # as one expects. - [data1_2, target1_2] = shuffle1(N) - - self.assertFalse( (data1 == data1_2).all() ) - - # Finally show that, by setting the seed, we can get the same results - shuffle1 = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - shuffle2 = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - - # A great seed if you are working in python (the microseconds) - rng1 = bob.core.random.mt19937(32) - rng2 = bob.core.random.mt19937(32) - - [data1, target1] = shuffle1(rng1, N) - [data2, target2] = shuffle2(rng2, N) - - self.assertTrue( (data1 == data2).all() ) - - def test04_Normalization(self): - - # Tests that the shuffler can get the std. normalization right - # Compares results to numpy - shuffle = bob.trainer.DataShuffler([self.set1, self.set2, self.set3], - [self.target1, self.target2, self.target3]) - - npy = numpy.array([[1,0,0], [2,0,0], [3,0,0], - [0,1,0], [0,2,0], [0,3,0], - [0,0,1], [0,0,2], [0,0,3]], 'float64') - precalc_mean = numpy.array(numpy.mean(npy,0)) - precalc_stddev = numpy.array(numpy.std(npy,0, ddof=1)) - [mean, stddev] = shuffle.stdnorm() - - self.assertTrue( (mean == precalc_mean).all() ) - self.assertTrue( (stddev == precalc_stddev).all() ) - - # Now we set the stdnorm flag on and expect data - self.assertFalse( shuffle.auto_stdnorm ) - shuffle.auto_stdnorm = True - self.assertTrue( shuffle.auto_stdnorm ) - - [data, target] = shuffle(10000) - - # Makes sure the data is approximately zero mean and has std.dev. ~ 1 - # Note: Results will not be of a better precision because we only have 9 - # samples in the Shuffler... - self.assertEqual( round(data.mean()), 0 ) - self.assertEqual( round(numpy.std(data, ddof=1)), 1 ) - - def test05_NormalizationBig(self): - - rng = bob.core.random.mt19937() - - set1 = [] - draw25 = bob.core.random.normal_float64(mean=2.0, sigma=5.0) - for i in range(10000): - set1.append(numpy.array([draw25(rng)], dtype='float64')) - set1 = numpy.array(set1) - target1 = numpy.array([1], dtype='float64') - - set2 = [] - draw32 = bob.core.random.normal_float64(mean=3.0, sigma=2.0) - for i in range(10000): - set2.append(numpy.array([draw32(rng)], dtype='float64')) - set2 = numpy.array(set2) - target2 = numpy.array([2], dtype='float64') - - shuffle = bob.trainer.DataShuffler([set1, set2], [target1, target2]) - shuffle.auto_stdnorm = True - prev_mean, prev_stddev = shuffle.stdnorm() - - [data, target] = shuffle(200000) - self.assertTrue( abs(data.mean()) < 1e-1 ) - self.assertTrue( abs(numpy.std(data, ddof=1) - 1.0) < 1e-1 ) - - #note that resetting auto_stdnorm will make the whole go back to normal, - #but the std normalization values remain the same... - shuffle.auto_stdnorm = False - back_mean, back_stddev = shuffle.stdnorm() - self.assertTrue( abs( (back_mean - prev_mean ).sum() ) < 1e-10) - self.assertTrue( abs( (back_stddev - prev_stddev).sum() ) < 1e-10) + #note that resetting auto_stdnorm will make the whole go back to normal, + #but the std normalization values remain the same... + shuffle.auto_stdnorm = False + back_mean, back_stddev = shuffle.stdnorm() + assert abs( (back_mean - prev_mean ).sum() < 1e-10) + assert abs( (back_stddev - prev_stddev).sum() < 1e-10)