Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.bio.gmm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
bob
bob.bio.gmm
Commits
285167a4
Commit
285167a4
authored
3 years ago
by
Yannick DAYER
Browse files
Options
Downloads
Patches
Plain Diff
Update gmm bioalgorithm
parent
248cf61a
No related branches found
No related tags found
1 merge request
!26
Python implementation of GMM
Pipeline
#57967
failed
3 years ago
Stage: build
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
bob/bio/gmm/algorithm/GMM.py
+61
-41
61 additions, 41 deletions
bob/bio/gmm/algorithm/GMM.py
with
61 additions
and
41 deletions
bob/bio/gmm/algorithm/GMM.py
+
61
−
41
View file @
285167a4
...
@@ -14,18 +14,21 @@ import logging
...
@@ -14,18 +14,21 @@ import logging
from
typing
import
Callable
from
typing
import
Callable
import
os
import
dask.array
as
da
import
dask.array
as
da
import
numpy
as
np
import
numpy
as
np
import
copy
import
dask
import
dask
from
h5py
import
File
as
HDF5File
from
h5py
import
File
as
HDF5File
from
sklearn.base
import
BaseEstimator
from
sklearn.base
import
BaseEstimator
from
bob.bio.base.pipelines.vanilla_biometrics.abstract_classes
import
BioAlgorithm
from
bob.bio.base.pipelines.vanilla_biometrics.abstract_classes
import
BioAlgorithm
from
bob.learn.em.cluster
import
KMeansMachine
from
bob.learn.em.mixture
import
GMMMachine
from
bob.learn.em.mixture
import
GMMMachine
from
bob.learn.em.mixture
import
GMMStats
from
bob.learn.em.mixture
import
GMMStats
from
bob.learn.em.mixture
import
linear_scoring
from
bob.learn.em.mixture
import
linear_scoring
from
bob.pipelines.wrappers
import
DaskWrapper
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -118,8 +121,7 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -118,8 +121,7 @@ class GMM(BioAlgorithm, BaseEstimator):
self
.
responsibility_threshold
=
responsibility_threshold
self
.
responsibility_threshold
=
responsibility_threshold
def
scoring_function_wrapped
(
*
args
,
**
kwargs
):
def
scoring_function_wrapped
(
*
args
,
**
kwargs
):
with
dask
.
config
.
set
(
scheduler
=
"
threads
"
):
return
scoring_function
(
*
args
,
**
kwargs
)
return
scoring_function
(
*
args
,
**
kwargs
)
self
.
scoring_function
=
scoring_function_wrapped
self
.
scoring_function
=
scoring_function_wrapped
...
@@ -154,7 +156,7 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -154,7 +156,7 @@ class GMM(BioAlgorithm, BaseEstimator):
self
.
ubm
.
save
(
hdf5
)
self
.
ubm
.
save
(
hdf5
)
def
load_ubm
(
self
,
ubm_file
):
def
load_ubm
(
self
,
ubm_file
):
hdf5file
=
HDF5File
(
ubm_file
)
hdf5file
=
HDF5File
(
ubm_file
,
"
r
"
)
logger
.
debug
(
"
Loading model from file
'
%s
'"
,
ubm_file
)
logger
.
debug
(
"
Loading model from file
'
%s
'"
,
ubm_file
)
# read UBM
# read UBM
self
.
ubm
=
GMMMachine
.
from_hdf5
(
hdf5file
)
self
.
ubm
=
GMMMachine
.
from_hdf5
(
hdf5file
)
...
@@ -163,19 +165,17 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -163,19 +165,17 @@ class GMM(BioAlgorithm, BaseEstimator):
def
project
(
self
,
array
):
def
project
(
self
,
array
):
"""
Computes GMM statistics against a UBM, given a 2D array of feature vectors
"""
"""
Computes GMM statistics against a UBM, given a 2D array of feature vectors
"""
self
.
_check_feature
(
array
)
self
.
_check_feature
(
array
)
logger
.
debu
g
(
"
.... Projecting %d feature vectors
"
,
array
.
shape
[
0
])
logger
.
warnin
g
(
"
.... Projecting %d feature vectors
"
,
array
.
shape
[
0
])
# Accumulates statistics
# Accumulates statistics
with
dask
.
config
.
set
(
scheduler
=
"
threads
"
):
gmm_stats
=
self
.
ubm
.
transform
(
array
)
gmm_stats
=
GMMStats
(
self
.
ubm
.
shape
[
0
],
self
.
ubm
.
shape
[
1
])
gmm_stats
.
compute
()
self
.
ubm
.
acc_statistics
(
array
,
gmm_stats
)
gmm_stats
.
compute
()
# return the resulting statistics
# return the resulting statistics
return
gmm_stats
return
gmm_stats
def
read_feature
(
self
,
feature_file
):
def
read_feature
(
self
,
feature_file
):
"""
Read the type of features that we require, namely GMM_Stats
"""
"""
Read the type of features that we require, namely GMM_Stats
"""
return
GMMStats
.
from_hdf5
(
HDF5File
(
feature_file
))
return
GMMStats
.
from_hdf5
(
HDF5File
(
feature_file
,
"
r
"
))
def
write_feature
(
self
,
feature
,
feature_file
):
def
write_feature
(
self
,
feature
,
feature_file
):
"""
Write the features (GMM_Stats)
"""
"""
Write the features (GMM_Stats)
"""
...
@@ -184,7 +184,7 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -184,7 +184,7 @@ class GMM(BioAlgorithm, BaseEstimator):
def
enroll
(
self
,
data
):
def
enroll
(
self
,
data
):
"""
Enrolls a GMM using MAP adaptation, given a list of 2D np.ndarray
'
s of feature vectors
"""
"""
Enrolls a GMM using MAP adaptation, given a list of 2D np.ndarray
'
s of feature vectors
"""
[
self
.
_check_feature
(
feature
)
for
feature
in
data
]
[
self
.
_check_feature
(
feature
)
for
feature
in
data
]
array
=
np
.
vstack
(
data
)
array
=
da
.
vstack
(
data
)
# Use the array to train a GMM and return it
# Use the array to train a GMM and return it
logger
.
debug
(
"
.... Enrolling with %d feature vectors
"
,
array
.
shape
[
0
])
logger
.
debug
(
"
.... Enrolling with %d feature vectors
"
,
array
.
shape
[
0
])
...
@@ -193,7 +193,7 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -193,7 +193,7 @@ class GMM(BioAlgorithm, BaseEstimator):
gmm
=
GMMMachine
(
gmm
=
GMMMachine
(
n_gaussians
=
self
.
number_of_gaussians
,
n_gaussians
=
self
.
number_of_gaussians
,
trainer
=
"
map
"
,
trainer
=
"
map
"
,
ubm
=
self
.
ubm
,
ubm
=
copy
.
deepcopy
(
self
.
ubm
)
,
convergence_threshold
=
self
.
training_threshold
,
convergence_threshold
=
self
.
training_threshold
,
max_fitting_steps
=
self
.
gmm_enroll_iterations
,
max_fitting_steps
=
self
.
gmm_enroll_iterations
,
random_state
=
self
.
rng
,
random_state
=
self
.
rng
,
...
@@ -202,18 +202,14 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -202,18 +202,14 @@ class GMM(BioAlgorithm, BaseEstimator):
update_weights
=
True
,
# TODO default?
update_weights
=
True
,
# TODO default?
)
)
gmm
.
variance_thresholds
=
self
.
variance_threshold
gmm
.
variance_thresholds
=
self
.
variance_threshold
gmm
=
gmm
.
fit
(
array
)
gmm
.
fit
(
array
)
# info = {k: type(v) for k, v in gmm.__dict__.items()}
# for k, v in gmm.gaussians_.__dict__.items():
# info[k] = type(v)
# raise ValueError(str(info))
return
gmm
return
gmm
def
read_
model
(
self
,
model_file
):
def
read_
biometric_reference
(
self
,
model_file
):
"""
Reads the model, which is a GMM machine
"""
"""
Reads the model, which is a GMM machine
"""
return
GMMMachine
.
from_hdf5
(
HDF5File
(
model_file
),
ubm
=
self
.
ubm
)
return
GMMMachine
.
from_hdf5
(
HDF5File
(
model_file
,
"
r
"
),
ubm
=
self
.
ubm
)
def
write_
model
(
self
,
model
,
model_file
):
def
write_
biometric_reference
(
self
,
model
,
model_file
):
"""
Write the features (GMM_Stats)
"""
"""
Write the features (GMM_Stats)
"""
return
model
.
save
(
model_file
)
return
model
.
save
(
model_file
)
...
@@ -230,11 +226,13 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -230,11 +226,13 @@ class GMM(BioAlgorithm, BaseEstimator):
The probe data to compare to the model.
The probe data to compare to the model.
"""
"""
logger
.
debug
(
f
"
scoring
{
biometric_reference
}
,
{
data
}
"
)
assert
isinstance
(
biometric_reference
,
GMMMachine
)
assert
isinstance
(
biometric_reference
,
GMMMachine
)
stats
=
self
.
project
(
data
)
return
self
.
scoring_function
(
return
self
.
scoring_function
(
models_means
=
[
biometric_reference
],
models_means
=
[
biometric_reference
],
ubm
=
self
.
ubm
,
ubm
=
self
.
ubm
,
test_stats
=
d
at
a
,
test_stats
=
st
at
s
,
frame_length_normalization
=
True
,
frame_length_normalization
=
True
,
)[
0
,
0
]
)[
0
,
0
]
...
@@ -253,6 +251,7 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -253,6 +251,7 @@ class GMM(BioAlgorithm, BaseEstimator):
The probe data to compare to the models.
The probe data to compare to the models.
"""
"""
logger
.
debug
(
f
"
scoring
{
biometric_references
}
,
{
data
}
"
)
assert
isinstance
(
biometric_references
[
0
],
GMMMachine
),
type
(
assert
isinstance
(
biometric_references
[
0
],
GMMMachine
),
type
(
biometric_references
[
0
]
biometric_references
[
0
]
)
)
...
@@ -266,9 +265,11 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -266,9 +265,11 @@ class GMM(BioAlgorithm, BaseEstimator):
def
score_for_multiple_probes
(
self
,
model
,
probes
):
def
score_for_multiple_probes
(
self
,
model
,
probes
):
"""
This function computes the score between the given model and several given probe files.
"""
"""
This function computes the score between the given model and several given probe files.
"""
logger
.
debug
(
f
"
scoring
{
model
}
,
{
probes
}
"
)
assert
isinstance
(
model
,
GMMMachine
)
assert
isinstance
(
model
,
GMMMachine
)
stats
=
[]
for
probe
in
probes
:
for
probe
in
probes
:
assert
isinstance
(
probe
,
GMMStats
)
stats
.
append
(
self
.
project
(
probe
)
)
# logger.warn("Please verify that this function is correct")
# logger.warn("Please verify that this function is correct")
return
(
return
(
self
.
scoring_function
(
self
.
scoring_function
(
...
@@ -283,31 +284,50 @@ class GMM(BioAlgorithm, BaseEstimator):
...
@@ -283,31 +284,50 @@ class GMM(BioAlgorithm, BaseEstimator):
def
fit
(
self
,
X
,
y
=
None
,
**
kwargs
):
def
fit
(
self
,
X
,
y
=
None
,
**
kwargs
):
"""
Trains the UBM.
"""
"""
Trains the UBM.
"""
ubm_filename
=
"
UBM_mobio_001.hdf5
"
# Manually set "projector" file TODO remove
if
not
os
.
path
.
exists
(
ubm_filename
):
# Stack all the samples in a 2D array of features
# Stack all the samples in a 2D array of features
array
=
da
.
vstack
(
X
)
array
=
np
.
vstack
(
X
)
.
persist
()
logger
.
debug
(
"
UBM with %d feature vectors
"
,
array
.
shape
[
0
])
logger
.
debug
(
"
UBM with %d feature vectors
"
,
array
.
shape
[
0
])
logger
.
debug
(
f
"
Creating UBM machine with
{
self
.
number_of_gaussians
}
gaussians
"
)
logger
.
debug
(
f
"
Creating UBM machine with
{
self
.
number_of_gaussians
}
gaussians
"
)
self
.
ubm
=
GMMMachine
(
self
.
ubm
=
GMMMachine
(
n_gaussians
=
self
.
number_of_gaussians
,
n_gaussians
=
self
.
number_of_gaussians
,
trainer
=
"
ml
"
,
trainer
=
"
ml
"
,
max_fitting_steps
=
self
.
ubm_training_iterations
,
max_fitting_steps
=
self
.
ubm_training_iterations
,
convergence_threshold
=
self
.
training_threshold
,
convergence_threshold
=
self
.
training_threshold
,
update_means
=
self
.
update_means
,
update_means
=
self
.
update_means
,
update_variances
=
self
.
update_variances
,
update_variances
=
self
.
update_variances
,
update_weights
=
self
.
update_weights
,
update_weights
=
self
.
update_weights
,
# TODO more params?
k_means_trainer
=
KMeansMachine
(
)
self
.
number_of_gaussians
,
convergence_threshold
=
self
.
training_threshold
,
# TODO Have a separate threshold for kmeans instead of re-using the one for GMM...
max_iter
=
self
.
kmeans_training_iterations
,
# TODO pass this param through GMMMachine instead of the full KMeansMachine?
init_method
=
"
k-means||
"
,
init_max_iter
=
5
,
)
# TODO more params?
)
# Trains the GMM
logger
.
info
(
"
Training UBM GMM
"
)
# Resetting the pseudo random number generator so we can have the same initialization for serial and parallel execution.
# self.rng = bob.core.random.mt19937(self.init_seed)
self
.
ubm
=
self
.
ubm
.
fit
(
array
)
# Trains the GMM
logger
.
warning
(
f
"
Saving trained ubm to
{
ubm_filename
}
"
)
logger
.
info
(
"
Training UBM GMM
"
)
self
.
save_ubm
(
ubm_filename
)
# Resetting the pseudo random number generator so we can have the same initialization for serial and parallel execution.
# self.rng = bob.core.random.mt19937(self.init_seed)
self
.
ubm
=
self
.
ubm
.
fit
(
array
)
if
not
np
.
all
(
self
.
ubm
.
weights
):
logger
.
error
(
"
zero weights after gmm training
"
)
raise
ValueError
(
"
!! zero weights after gmm training...
"
)
else
:
logger
.
warning
(
f
"
Loading trained ubm from
{
ubm_filename
}
"
)
self
.
load_ubm
(
ubm_filename
)
return
self
return
self
def
transform
(
self
,
X
,
**
kwargs
):
def
transform
(
self
,
X
,
**
kwargs
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment