Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.learn.em
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.learn.em
Commits
ded8a6e9
Commit
ded8a6e9
authored
3 years ago
by
Yannick DAYER
Browse files
Options
Downloads
Patches
Plain Diff
Adapt GMMMachine to new KMeansMachine.
parent
54fff114
No related branches found
No related tags found
2 merge requests
!42
GMM implementation in Python
,
!40
Transition to a pure python implementation
Pipeline
#56662
passed
3 years ago
Stage: build
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
bob/learn/em/data/gmm_ML.hdf5
+0
-0
0 additions, 0 deletions
bob/learn/em/data/gmm_ML.hdf5
bob/learn/em/mixture/gmm.py
+17
-17
17 additions, 17 deletions
bob/learn/em/mixture/gmm.py
bob/learn/em/test/test_gmm.py
+20
-8
20 additions, 8 deletions
bob/learn/em/test/test_gmm.py
with
37 additions
and
25 deletions
bob/learn/em/data/gmm_ML.hdf5
+
0
−
0
View file @
ded8a6e9
No preview for this file type
This diff is collapsed.
Click to expand it.
bob/learn/em/mixture/gmm.py
+
17
−
17
View file @
ded8a6e9
...
...
@@ -13,7 +13,6 @@ import numpy as np
from
sklearn.base
import
BaseEstimator
from
bob.learn.em.cluster
import
KMeansMachine
from
bob.learn.em.cluster
import
KMeansTrainer
from
h5py
import
File
as
HDF5File
...
...
@@ -274,7 +273,7 @@ class GMMMachine(BaseEstimator):
max_fitting_steps
:
Union
[
int
,
None
]
=
200
,
random_state
:
Union
[
int
,
np
.
random
.
RandomState
]
=
0
,
weights
:
"
Union[np.ndarray[(
'
n_gaussians
'
,), float], None]
"
=
None
,
k_means_trainer
:
Union
[
KMeans
Tra
ine
r
,
None
]
=
None
,
k_means_trainer
:
Union
[
KMeans
Mach
ine
,
None
]
=
None
,
update_means
:
bool
=
True
,
update_variances
:
bool
=
False
,
update_weights
:
bool
=
False
,
...
...
@@ -317,7 +316,7 @@ class GMMMachine(BaseEstimator):
Ratio for MAP adaptation. Used when `trainer ==
"
map
"
` and
`relevance_factor is None`)
relevance_factor:
Factor for the computation of alpha with Reyolds adaptation. (Used when
Factor for the computation of alpha with Rey
n
olds adaptation. (Used when
`trainer ==
"
map
"
`)
variance_thresholds:
The variance flooring thresholds, i.e. the minimum allowed value of variance in each dimension.
...
...
@@ -395,14 +394,14 @@ class GMMMachine(BaseEstimator):
def
variances
(
self
,
variances
:
"
np.ndarray[(
'
n_gaussians
'
,
'
n_features
'
), float]
"
):
self
.
_variances
=
np
.
maximum
(
self
.
variance_thresholds
,
variances
)
# Recompute g_norm for each gaussian [array of shape (n_gaussians,)]
n_log_2pi
=
self
.
variances
.
shape
[
-
1
]
*
np
.
log
(
2
*
np
.
pi
)
n_log_2pi
=
self
.
_
variances
.
shape
[
-
1
]
*
np
.
log
(
2
*
np
.
pi
)
self
.
_g_norms
=
np
.
array
(
n_log_2pi
+
np
.
log
(
self
.
_variances
).
sum
(
axis
=-
1
))
@property
def
variance_thresholds
(
self
):
"""
Threshold below which variances are clamped to prevent precision losses.
"""
if
self
.
_variance_thresholds
is
None
:
r
aise
ValueError
(
"
GMMMachine variance thresholds were never set.
"
)
r
eturn
EPSILON
return
self
.
_variance_thresholds
@variance_thresholds.setter
...
...
@@ -411,7 +410,8 @@ class GMMMachine(BaseEstimator):
threshold
:
"
Union[float, np.ndarray[(
'
n_gaussians
'
,
'
n_features
'
), float]]
"
,
):
self
.
_variance_thresholds
=
threshold
self
.
variances
=
np
.
maximum
(
threshold
,
self
.
variances
)
if
self
.
_variances
is
not
None
:
self
.
variances
=
np
.
maximum
(
threshold
,
self
.
_variances
)
@property
def
g_norms
(
self
):
...
...
@@ -542,12 +542,11 @@ class GMMMachine(BaseEstimator):
if
data
is
None
:
raise
ValueError
(
"
Data is required when training with k-means.
"
)
logger
.
info
(
"
Initializing GMM with k-means.
"
)
kmeans_trainer
=
self
.
k_means_trainer
or
KMeansTrainer
(
kmeans_machine
=
self
.
k_means_trainer
or
KMeansMachine
(
self
.
n_gaussians
,
random_state
=
self
.
random_state
,
)
kmeans_machine
=
KMeansMachine
(
self
.
n_gaussians
).
fit
(
data
,
trainer
=
kmeans_trainer
)
kmeans_machine
=
kmeans_machine
.
fit
(
data
)
(
variances
,
...
...
@@ -706,6 +705,10 @@ class GMMMachine(BaseEstimator):
else
:
logger
.
debug
(
"
GMM means already set. Initialization was not run!
"
)
if
self
.
_variances
is
None
:
logger
.
warning
(
"
Variances were not defined before fit. Using variance=1
"
)
self
.
variances
=
np
.
ones_like
(
self
.
means
)
average_output
=
0
logger
.
info
(
"
Training GMM...
"
)
step
=
0
...
...
@@ -713,11 +716,7 @@ class GMMMachine(BaseEstimator):
step
+=
1
logger
.
info
(
f
"
Iteration
{
step
:
3
d
}
"
+
(
f
"
/
{
self
.
max_fitting_steps
:
3
d
}
"
if
self
.
max_fitting_steps
is
not
None
else
""
)
+
(
f
"
/
{
self
.
max_fitting_steps
:
3
d
}
"
if
self
.
max_fitting_steps
else
""
)
)
average_output_previous
=
average_output
...
...
@@ -750,8 +749,9 @@ class GMMMachine(BaseEstimator):
and
convergence_value
<=
self
.
convergence_threshold
):
logger
.
info
(
"
Reached convergence threshold. Training stopped.
"
)
return
self
logger
.
info
(
"
Reached maximum step. Training stopped without convergence.
"
)
break
else
:
logger
.
info
(
"
Reached maximum step. Training stopped without convergence.
"
)
self
.
compute
()
return
self
...
...
This diff is collapsed.
Click to expand it.
bob/learn/em/test/test_gmm.py
+
20
−
8
View file @
ded8a6e9
...
...
@@ -22,7 +22,7 @@ from bob.io.base import load as load_array
from
bob.learn.em.mixture
import
GMMMachine
from
bob.learn.em.mixture
import
GMMStats
from
bob.learn.em.cluster
import
KMeans
Tra
ine
r
from
bob.learn.em.cluster
import
KMeans
Mach
ine
def
test_GMMStats
():
# Test a GMMStats
...
...
@@ -133,11 +133,10 @@ def test_GMMMachine_1():
# Checks particular varianceThresholds-related methods
varianceThresholds1D
=
np
.
array
([
0.3
,
1
,
0.5
],
"
float64
"
)
gmm
.
variance_thresholds
=
varianceThresholds1D
np
.
testing
.
assert_equal
(
gmm
.
variance_thresholds
[
0
,:],
varianceThresholds1D
)
np
.
testing
.
assert_equal
(
gmm
.
variance_thresholds
[
1
,:],
varianceThresholds1D
)
np
.
testing
.
assert_equal
(
gmm
.
variance_thresholds
,
varianceThresholds1D
)
gmm
.
variance_thresholds
=
0.005
np
.
testing
.
assert_equal
(
gmm
.
variance_thresholds
,
np
.
full
((
2
,
3
),
0.005
)
)
np
.
testing
.
assert_equal
(
gmm
.
variance_thresholds
,
0.005
)
gmm
.
means
=
newMeans
gmm
.
variances
=
newVariances
...
...
@@ -251,6 +250,7 @@ def test_GMMStats_2():
machine
=
GMMMachine
(
n_gaussians
)
machine
.
means
=
np
.
array
([[
0
,
0
,
0
],
[
8
,
8
,
8
]])
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
# Populate the GMMStats
stats
=
machine
.
acc_statistics
(
data
)
...
...
@@ -314,6 +314,7 @@ def test_machine_parameters():
n_features
=
2
machine
=
GMMMachine
(
n_gaussians
)
machine
.
means
=
np
.
repeat
([[
0
],
[
1
],
[
-
1
]],
n_features
,
1
)
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
np
.
testing
.
assert_equal
(
machine
.
means
,
np
.
repeat
([[
0
],
[
1
],
[
-
1
]],
n_features
,
1
))
np
.
testing
.
assert_equal
(
machine
.
variances
,
np
.
ones
((
n_gaussians
,
n_features
)))
...
...
@@ -331,7 +332,10 @@ def test_machine_parameters():
def
test_kmeans_plusplus_init
():
n_gaussians
=
3
machine
=
GMMMachine
(
n_gaussians
,
k_means_trainer
=
KMeansTrainer
(
"
k-means++
"
))
machine
=
GMMMachine
(
n_gaussians
,
k_means_trainer
=
KMeansMachine
(
n_clusters
=
n_gaussians
,
init_method
=
"
k-means++
"
),
)
data
=
np
.
array
([[
1.5
,
1
],
[
1
,
1.5
],
[
-
1
,
0.5
],
[
-
1.5
,
0
],
[
2
,
2
],
[
2.5
,
2.5
]])
machine
=
machine
.
fit
(
data
)
expected_means
=
np
.
array
([[
2.25
,
2.25
],
[
-
1.25
,
0.25
],
[
1.25
,
1.25
]])
...
...
@@ -342,7 +346,10 @@ def test_kmeans_plusplus_init():
def
test_kmeans_parallel_init
():
n_gaussians
=
3
machine
=
GMMMachine
(
n_gaussians
,
k_means_trainer
=
KMeansTrainer
(
"
k-means||
"
))
machine
=
GMMMachine
(
n_gaussians
,
k_means_trainer
=
KMeansMachine
(
n_clusters
=
n_gaussians
,
init_method
=
"
k-means||
"
),
)
data
=
np
.
array
([[
1.5
,
1
],
[
1
,
1.5
],
[
-
1
,
0.5
],
[
-
1.5
,
0
],
[
2
,
2
],
[
2.5
,
2.5
]])
machine
=
machine
.
fit
(
data
)
expected_means
=
np
.
array
([[
1.25
,
1.25
],
[
-
1.25
,
0.25
],
[
2.25
,
2.25
]])
...
...
@@ -356,6 +363,7 @@ def test_likelihood():
n_gaussians
=
3
machine
=
GMMMachine
(
n_gaussians
)
machine
.
means
=
np
.
repeat
([[
0
],
[
1
],
[
-
1
]],
3
,
1
)
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
log_likelihood
=
machine
.
log_likelihood
(
data
)
expected_ll
=
np
.
array
(
[
-
3.6519900964986527
,
-
3.83151883210222
,
-
3.83151883210222
,
-
5.344374066745753
]
...
...
@@ -390,6 +398,7 @@ def test_likelihood_weight():
n_gaussians
=
3
machine
=
GMMMachine
(
n_gaussians
)
machine
.
means
=
np
.
repeat
([[
0
],
[
1
],
[
-
1
]],
3
,
1
)
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
machine
.
weights
=
[
0.6
,
0.1
,
0.3
]
log_likelihood
=
machine
.
log_likelihood
(
data
)
expected_ll
=
np
.
array
(
...
...
@@ -429,7 +438,7 @@ def test_ml_em():
machine
=
GMMMachine
(
n_gaussians
,
update_means
=
True
,
update_variances
=
True
,
update_weights
=
True
)
machine
.
means
=
np
.
repeat
([[
2
],
[
8
]],
n_features
,
1
)
machine
.
initialize_gaussians
(
None
)
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
stats
=
machine
.
e_step
(
data
)
machine
.
m_step
(
stats
)
...
...
@@ -447,6 +456,7 @@ def test_map_em():
n_gaussians
=
2
prior_machine
=
GMMMachine
(
n_gaussians
)
prior_machine
.
means
=
np
.
array
([[
2
,
2
,
2
],
[
8
,
8
,
8
]])
prior_machine
.
variances
=
np
.
ones_like
(
prior_machine
.
means
)
prior_machine
.
weights
=
np
.
array
([
0.5
,
0.5
])
machine
=
GMMMachine
(
n_gaussians
,
trainer
=
"
map
"
,
ubm
=
prior_machine
,
update_means
=
True
,
update_variances
=
True
,
update_weights
=
True
)
...
...
@@ -483,6 +493,7 @@ def test_ml_transformer():
machine
=
GMMMachine
(
n_gaussians
,
update_means
=
True
,
update_variances
=
True
,
update_weights
=
True
)
machine
.
means
=
np
.
array
([[
2
,
2
,
2
],
[
8
,
8
,
8
]])
machine
.
variances
=
np
.
ones_like
(
machine
.
means
)
machine
=
machine
.
fit
(
data
)
...
...
@@ -514,6 +525,7 @@ def test_map_transformer():
n_features
=
3
prior_machine
=
GMMMachine
(
n_gaussians
)
prior_machine
.
means
=
np
.
array
([[
2
,
2
,
2
],
[
8
,
8
,
8
]])
prior_machine
.
variances
=
np
.
ones_like
(
prior_machine
.
means
)
prior_machine
.
weights
=
np
.
array
([
0.5
,
0.5
])
machine
=
GMMMachine
(
n_gaussians
,
trainer
=
"
map
"
,
ubm
=
prior_machine
,
update_means
=
True
,
update_variances
=
True
,
update_weights
=
True
)
...
...
@@ -586,7 +598,7 @@ def test_gmm_ML_1():
def
test_gmm_ML_2
():
"""
Trains a GMMMachine with ML_GMMTrainer; compares to a
n old
reference
"""
"""
Trains a GMMMachine with ML_GMMTrainer; compares to a reference
"""
ar
=
load_array
(
resource_filename
(
"
bob.learn.em
"
,
"
data/dataNormalized.hdf5
"
))
# Initialize GMMMachine
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment