Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.learn.em
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.learn.em
Commits
fb7b686b
Commit
fb7b686b
authored
3 years ago
by
Yannick DAYER
Browse files
Options
Downloads
Patches
Plain Diff
Added GMM dask tests, renamed tests
parent
807ca422
No related branches found
No related tags found
2 merge requests
!42
GMM implementation in Python
,
!40
Transition to a pure python implementation
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
bob/learn/em/test/test_gmm.py
+130
-56
130 additions, 56 deletions
bob/learn/em/test/test_gmm.py
with
130 additions
and
56 deletions
bob/learn/em/test/test_gmm.py
+
130
−
56
View file @
fb7b686b
...
@@ -101,7 +101,7 @@ def test_GMMStats():
...
@@ -101,7 +101,7 @@ def test_GMMStats():
# Clean-up
# Clean-up
os
.
unlink
(
filename
)
os
.
unlink
(
filename
)
def
test_GMMMachine
_1
():
def
test_GMMMachine
():
# Test a GMMMachine basic features
# Test a GMMMachine basic features
weights
=
np
.
array
([
0.5
,
0.5
],
"
float64
"
)
weights
=
np
.
array
([
0.5
,
0.5
],
"
float64
"
)
...
@@ -182,9 +182,41 @@ def test_GMMMachine_1():
...
@@ -182,9 +182,41 @@ def test_GMMMachine_1():
assert
(
gmm
==
gmm6
)
is
False
assert
(
gmm
==
gmm6
)
is
False
assert
gmm
.
is_similar_to
(
gmm6
)
is
False
assert
gmm
.
is_similar_to
(
gmm6
)
is
False
# Saving and loading
filename
=
tempfile
.
mkstemp
(
suffix
=
"
.hdf5
"
)[
1
]
gmm
.
save
(
HDF5File
(
filename
,
"
w
"
))
gmm1
=
GMMMachine
.
from_hdf5
(
HDF5File
(
filename
,
"
r
"
))
assert
gmm
==
gmm1
gmm
.
save
(
filename
)
gmm1
=
GMMMachine
.
from_hdf5
(
filename
)
assert
gmm
==
gmm1
os
.
unlink
(
filename
)
# Weights
n_gaussians
=
5
machine
=
GMMMachine
(
n_gaussians
)
default_weights
=
np
.
full
(
shape
=
(
n_gaussians
,),
fill_value
=
1.0
/
n_gaussians
)
default_log_weights
=
np
.
full
(
shape
=
(
n_gaussians
,),
fill_value
=
np
.
log
(
1.0
/
n_gaussians
)
)
# Test weights getting and setting
np
.
testing
.
assert_almost_equal
(
machine
.
weights
,
default_weights
)
np
.
testing
.
assert_almost_equal
(
machine
.
log_weights
,
default_log_weights
)
def
test_GMMMachine_2
():
modified_weights
=
default_weights
# Test a GMMMachine (statistics)
modified_weights
[:
n_gaussians
//
2
]
=
(
1
/
n_gaussians
)
/
2
modified_weights
[
n_gaussians
//
2
+
n_gaussians
%
2
:]
=
(
1
/
n_gaussians
)
*
1.5
# Ensure setter works (log_weights is updated correctly)
machine
.
weights
=
modified_weights
np
.
testing
.
assert_almost_equal
(
machine
.
weights
,
modified_weights
)
np
.
testing
.
assert_almost_equal
(
machine
.
log_weights
,
np
.
log
(
modified_weights
))
def
test_GMMMachine_stats
():
"""
Tests a GMMMachine (statistics)
"""
arrayset
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
arrayset
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
gmm
=
GMMMachine
(
n_gaussians
=
2
)
gmm
=
GMMMachine
(
n_gaussians
=
2
)
...
@@ -206,8 +238,8 @@ def test_GMMMachine_2():
...
@@ -206,8 +238,8 @@ def test_GMMMachine_2():
np
.
testing
.
assert_almost_equal
(
stats
.
sum_pxx
,
stats_ref
.
sum_pxx
,
decimal
=
10
)
np
.
testing
.
assert_almost_equal
(
stats
.
sum_pxx
,
stats_ref
.
sum_pxx
,
decimal
=
10
)
def
test_GMMMachine_
3
():
def
test_GMMMachine_
ll_computation
():
#
Test a GMMMachine (log-likelihood computation)
"""
Test a GMMMachine (log-likelihood computation)
"""
data
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/data.hdf5
"
))
data
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/data.hdf5
"
))
gmm
=
GMMMachine
(
n_gaussians
=
2
)
gmm
=
GMMMachine
(
n_gaussians
=
2
)
...
@@ -215,13 +247,12 @@ def test_GMMMachine_3():
...
@@ -215,13 +247,12 @@ def test_GMMMachine_3():
gmm
.
means
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/means.hdf5
"
))
gmm
.
means
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/means.hdf5
"
))
gmm
.
variances
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variances.hdf5
"
))
gmm
.
variances
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variances.hdf5
"
))
# Compare the log-likelihood with the one obtained using Chris Matlab
# Compare the log-likelihood with the one obtained using Chris Matlab implementation
# implementation
matlab_ll_ref
=
-
2.361583051672024e+02
matlab_ll_ref
=
-
2.361583051672024e+02
np
.
testing
.
assert_almost_equal
(
gmm
.
log_likelihood
(
data
),
matlab_ll_ref
,
decimal
=
10
)
np
.
testing
.
assert_almost_equal
(
gmm
.
log_likelihood
(
data
),
matlab_ll_ref
,
decimal
=
10
)
def
test_GMMMachine_
4
():
def
test_GMMMachine_
single_ll_vs_multiple
():
np
.
random
.
seed
(
3
)
# FIXING A SEED
np
.
random
.
seed
(
3
)
# FIXING A SEED
...
@@ -241,7 +272,7 @@ def test_GMMMachine_4():
...
@@ -241,7 +272,7 @@ def test_GMMMachine_4():
assert
np
.
isclose
(
ll
,
gmm
.
log_likelihood
(
data
).
mean
())
assert
np
.
isclose
(
ll
,
gmm
.
log_likelihood
(
data
).
mean
())
def
test_GMMStats_
2
():
def
test_GMMStats_
operations
():
"""
Test a GMMStats.
"""
"""
Test a GMMStats.
"""
# Initializing a GMMStats
# Initializing a GMMStats
data
=
np
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
],
[
7
,
8
,
9
]])
data
=
np
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
],
[
7
,
8
,
9
],
[
7
,
8
,
9
]])
...
@@ -319,18 +350,17 @@ def test_machine_parameters():
...
@@ -319,18 +350,17 @@ def test_machine_parameters():
np
.
testing
.
assert_equal
(
machine
.
variances
,
np
.
ones
((
n_gaussians
,
n_features
)))
np
.
testing
.
assert_equal
(
machine
.
variances
,
np
.
ones
((
n_gaussians
,
n_features
)))
# Setters
# Setters
new_means
=
np
.
repeat
([[
1
],
[
2
],
[
3
]],
n_features
,
axis
=
1
)
new_means
=
np
.
repeat
([[
1
],
[
2
],
[
3
]],
n_features
,
axis
=
1
)
machine
.
means
=
new_means
machine
.
means
=
new_means
assert
machine
.
means
.
shape
==
(
n_gaussians
,
n_features
)
assert
machine
.
means
.
shape
==
(
n_gaussians
,
n_features
)
np
.
testing
.
assert_
almost_
equal
(
machine
.
means
,
new_means
)
np
.
testing
.
assert_equal
(
machine
.
means
,
new_means
)
new_variances
=
np
.
repeat
([[
0.2
],
[
1.1
],
[
1
]],
n_features
,
axis
=
1
)
new_variances
=
np
.
repeat
([[
0.2
],
[
1.1
],
[
1
]],
n_features
,
axis
=
1
)
machine
.
variances
=
new_variances
machine
.
variances
=
new_variances
assert
machine
.
variances
.
shape
==
(
n_gaussians
,
n_features
)
assert
machine
.
variances
.
shape
==
(
n_gaussians
,
n_features
)
np
.
testing
.
assert_
almost_
equal
(
machine
.
variances
,
new_variances
)
np
.
testing
.
assert_equal
(
machine
.
variances
,
new_variances
)
def
test_kmeans_plusplus_init
():
def
test_
gmm_
kmeans_plusplus_init
():
n_gaussians
=
3
n_gaussians
=
3
machine
=
GMMMachine
(
machine
=
GMMMachine
(
n_gaussians
,
n_gaussians
,
...
@@ -344,7 +374,7 @@ def test_kmeans_plusplus_init():
...
@@ -344,7 +374,7 @@ def test_kmeans_plusplus_init():
np
.
testing
.
assert_almost_equal
(
machine
.
variances
,
expected_variances
)
np
.
testing
.
assert_almost_equal
(
machine
.
variances
,
expected_variances
)
def
test_kmeans_parallel_init
():
def
test_
gmm_
kmeans_parallel_init
():
n_gaussians
=
3
n_gaussians
=
3
machine
=
GMMMachine
(
machine
=
GMMMachine
(
n_gaussians
,
n_gaussians
,
...
@@ -567,9 +597,6 @@ def loadGMM():
...
@@ -567,9 +597,6 @@ def loadGMM():
return
gmm
return
gmm
def
equals
(
x
,
y
,
epsilon
):
return
(
abs
(
x
-
y
)
<
epsilon
).
all
()
def
test_gmm_ML_1
():
def
test_gmm_ML_1
():
"""
Trains a GMMMachine with ML_GMMTrainer
"""
"""
Trains a GMMMachine with ML_GMMTrainer
"""
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
...
@@ -632,44 +659,6 @@ def test_gmm_ML_2():
...
@@ -632,44 +659,6 @@ def test_gmm_ML_2():
np
.
testing
.
assert_allclose
(
gmm
.
weights
,
weightsML_ref
,
atol
=
1e-4
)
np
.
testing
.
assert_allclose
(
gmm
.
weights
,
weightsML_ref
,
atol
=
1e-4
)
def
test_gmm_ML_parallel
():
"""
Trains a GMMMachine with ML_GMMTrainer; compares to a reference
"""
ar
=
da
.
array
(
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataNormalized.hdf5
"
)))
# Initialize GMMMachine
gmm
=
GMMMachine
(
n_gaussians
=
5
)
gmm
.
means
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
)
gmm
.
variances
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
)
gmm
.
weights
=
np
.
exp
(
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
))
threshold
=
0.001
gmm
.
variance_thresholds
=
threshold
# Initialize ML Trainer
gmm
.
mean_var_update_threshold
=
0.001
gmm
.
max_fitting_steps
=
25
gmm
.
convergence_threshold
=
0.00001
gmm
.
update_means
=
True
gmm
.
update_variances
=
True
gmm
.
update_weights
=
True
# Run ML
gmm
.
fit
(
ar
)
# Test results
# Load torch3vision reference
meansML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterML.hdf5
"
))
variancesML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterML.hdf5
"
))
weightsML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterML.hdf5
"
))
# Compare to current results
np
.
testing
.
assert_allclose
(
gmm
.
means
,
meansML_ref
,
atol
=
3e-3
)
np
.
testing
.
assert_allclose
(
gmm
.
variances
,
variancesML_ref
,
atol
=
3e-3
)
np
.
testing
.
assert_allclose
(
gmm
.
weights
,
weightsML_ref
,
atol
=
1e-4
)
def
test_gmm_MAP_1
():
def
test_gmm_MAP_1
():
"""
Train a GMMMachine with MAP_GMMTrainer
"""
"""
Train a GMMMachine with MAP_GMMTrainer
"""
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/faithful.torch3_f64.hdf5
"
))
...
@@ -788,7 +777,7 @@ def test_gmm_MAP_3():
...
@@ -788,7 +777,7 @@ def test_gmm_MAP_3():
def
test_gmm_test
():
def
test_gmm_test
():
"""
Tests a GMMMachine by computing scores against a model and comparing to a reference
"""
Tests a GMMMachine by computing scores against a model and comparing to a reference
"""
"""
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataforMAP.hdf5
"
))
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataforMAP.hdf5
"
))
...
@@ -810,3 +799,88 @@ def test_gmm_test():
...
@@ -810,3 +799,88 @@ def test_gmm_test():
# Compare current results to torch3vision
# Compare current results to torch3vision
assert
abs
(
score
-
score_mean_ref
)
/
score_mean_ref
<
1e-4
assert
abs
(
score
-
score_mean_ref
)
/
score_mean_ref
<
1e-4
def
test_gmm_ML_dask
():
"""
Trains a GMMMachine with dask array data; compares to a reference
"""
ar
=
da
.
array
(
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataNormalized.hdf5
"
)))
# Initialize GMMMachine
gmm
=
GMMMachine
(
n_gaussians
=
5
)
gmm
.
means
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
)
gmm
.
variances
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
)
gmm
.
weights
=
np
.
exp
(
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterKMeans.hdf5
"
)).
astype
(
"
float64
"
))
threshold
=
0.001
gmm
.
variance_thresholds
=
threshold
# Initialize ML Trainer
gmm
.
mean_var_update_threshold
=
0.001
gmm
.
max_fitting_steps
=
25
gmm
.
convergence_threshold
=
0.00001
gmm
.
update_means
=
True
gmm
.
update_variances
=
True
gmm
.
update_weights
=
True
# Run ML
gmm
.
fit
(
ar
)
# Test results
# Load torch3vision reference
meansML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterML.hdf5
"
))
variancesML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterML.hdf5
"
))
weightsML_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterML.hdf5
"
))
# Compare to current results
np
.
testing
.
assert_allclose
(
gmm
.
means
,
meansML_ref
,
atol
=
3e-3
)
np
.
testing
.
assert_allclose
(
gmm
.
variances
,
variancesML_ref
,
atol
=
3e-3
)
np
.
testing
.
assert_allclose
(
gmm
.
weights
,
weightsML_ref
,
atol
=
1e-4
)
def
test_gmm_MAP_dask
():
"""
Test a GMMMachine for MAP with a dask array as data.
"""
ar
=
da
.
array
(
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataforMAP.hdf5
"
)))
# Initialize GMMMachine
n_gaussians
=
5
prior_gmm
=
GMMMachine
(
n_gaussians
)
prior_gmm
.
means
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterML.hdf5
"
))
prior_gmm
.
variances
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterML.hdf5
"
))
prior_gmm
.
weights
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterML.hdf5
"
))
threshold
=
0.001
prior_gmm
.
variance_thresholds
=
threshold
# Initialize MAP Trainer
prior
=
0.001
accuracy
=
0.00001
gmm
=
GMMMachine
(
n_gaussians
,
trainer
=
"
map
"
,
ubm
=
prior_gmm
,
convergence_threshold
=
prior
,
max_fitting_steps
=
1
,
update_means
=
True
,
update_variances
=
False
,
update_weights
=
False
,
mean_var_update_threshold
=
accuracy
,
relevance_factor
=
None
,
)
gmm
.
variance_thresholds
=
threshold
# Train
gmm
=
gmm
.
fit
(
ar
)
# Test results
# Load torch3vision reference
meansMAP_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/meansAfterMAP.hdf5
"
))
variancesMAP_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/variancesAfterMAP.hdf5
"
))
weightsMAP_ref
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/weightsAfterMAP.hdf5
"
))
# Compare to current results
# Gaps are quite large. This might be explained by the fact that there is no
# adaptation of a given Gaussian in torch3 when the corresponding responsibilities
# are below the responsibilities threshold
np
.
testing
.
assert_allclose
(
gmm
.
means
,
meansMAP_ref
,
atol
=
2e-1
)
np
.
testing
.
assert_allclose
(
gmm
.
variances
,
variancesMAP_ref
,
atol
=
1e-4
)
np
.
testing
.
assert_allclose
(
gmm
.
weights
,
weightsMAP_ref
,
atol
=
1e-4
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment