Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.bio.gmm
Commits
2f94648c
Commit
2f94648c
authored
May 11, 2015
by
Manuel Günther
Browse files
Added IVector algorithm
parent
5e240088
Changes
10
Hide whitespace changes
Inline
Side-by-side
bob/bio/gmm/algorithm/GMM.py
View file @
2f94648c
...
...
@@ -132,7 +132,8 @@ class GMM (Algorithm):
"""Save projector to file"""
# Saves the UBM to file
logger
.
debug
(
" .... Saving model to file '%s'"
,
projector_file
)
self
.
ubm
.
save
(
bob
.
io
.
base
.
HDF5File
(
projector_file
,
"w"
))
hdf5
=
projector_file
if
isinstance
(
projector_file
,
bob
.
io
.
base
.
HDF5File
)
else
bob
.
io
.
base
.
HDF5File
(
projector_file
,
'w'
)
self
.
ubm
.
save
(
hdf5
)
def
train_projector
(
self
,
train_features
,
projector_file
):
...
...
bob/bio/gmm/algorithm/IVector.py
0 → 100644
View file @
2f94648c
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
import
bob.core
import
bob.io.base
import
bob.learn.linear
import
bob.learn.em
import
numpy
from
.GMM
import
GMM
from
bob.bio.base.algorithm
import
Algorithm
import
logging
logger
=
logging
.
getLogger
(
"bob.bio.gmm"
)
class
IVector
(
GMM
):
"""Tool for extracting I-Vectors"""
def
__init__
(
self
,
# IVector training
subspace_dimension_of_t
,
# T subspace dimension
tv_training_iterations
=
25
,
# Number of EM iterations for the JFA training
update_sigma
=
True
,
# parameters of the GMM
**
kwargs
):
"""Initializes the local GMM tool with the given file selector object"""
# call base class constructor with its set of parameters
GMM
.
__init__
(
self
,
**
kwargs
)
# call tool constructor to overwrite what was set before
Algorithm
.
__init__
(
self
,
performs_projection
=
True
,
use_projected_features_for_enrollment
=
True
,
requires_enroller_training
=
False
,
# not needed anymore because it's done while training the projector
split_training_features_by_client
=
False
,
subspace_dimension_of_t
=
subspace_dimension_of_t
,
tv_training_iterations
=
tv_training_iterations
,
update_sigma
=
update_sigma
,
multiple_model_scoring
=
None
,
multiple_probe_scoring
=
None
,
**
kwargs
)
self
.
update_sigma
=
update_sigma
self
.
subspace_dimension_of_t
=
subspace_dimension_of_t
self
.
tv_training_iterations
=
tv_training_iterations
self
.
ivector_trainer
=
bob
.
learn
.
em
.
IVectorTrainer
(
update_sigma
=
update_sigma
)
self
.
whitening_trainer
=
bob
.
learn
.
linear
.
WhiteningTrainer
()
def
_check_projected
(
self
,
feature
):
"""Checks that the features are appropriate"""
if
not
isinstance
(
feature
,
numpy
.
ndarray
)
or
len
(
feature
.
shape
)
!=
1
or
feature
.
dtype
!=
numpy
.
float64
:
raise
ValueError
(
"The given feature is not appropriate"
)
if
self
.
whitener
is
not
None
and
feature
.
shape
[
0
]
!=
self
.
whitener
.
shape
[
1
]:
raise
ValueError
(
"The given feature is expected to have %d elements, but it has %d"
%
(
self
.
whitener
.
shape
[
1
],
feature
.
shape
[
0
]))
def
train_ivector
(
self
,
training_stats
):
logger
.
info
(
" -> Training IVector enroller"
)
self
.
tv
=
bob
.
learn
.
em
.
IVectorMachine
(
self
.
ubm
,
self
.
subspace_dimension_of_t
)
self
.
tv
.
variance_threshold
=
self
.
variance_threshold
# train IVector model
bob
.
learn
.
em
.
train
(
self
.
ivector_trainer
,
self
.
tv
,
training_stats
,
self
.
tv_training_iterations
,
rng
=
self
.
rng
)
def
train_whitening
(
self
,
training_features
):
ivectors_matrix
=
numpy
.
vstack
(
training_features
)
# create a Linear Machine
self
.
whitener
=
bob
.
learn
.
linear
.
Machine
(
ivectors_matrix
.
shape
[
1
],
ivectors_matrix
.
shape
[
1
])
# create the whitening trainer
self
.
whitening_trainer
.
train
(
ivectors_matrix
,
self
.
whitener
)
def
train_projector
(
self
,
train_features
,
projector_file
):
"""Train Projector and Enroller at the same time"""
[
self
.
_check_feature
(
feature
)
for
feature
in
train_features
]
# train UBM
data
=
numpy
.
vstack
(
train_features
)
self
.
train_ubm
(
data
)
del
data
# train IVector
logger
.
info
(
" -> Projecting training data"
)
training_stats
=
[
self
.
project_ubm
(
feature
)
for
feature
in
train_features
]
# train IVector
self
.
train_ivector
(
training_stats
)
# project training i-vectors
whitening_train_data
=
[
self
.
project_ivec
(
stats
)
for
stats
in
training_stats
]
self
.
train_whitening
(
whitening_train_data
)
# save
self
.
save_projector
(
projector_file
)
def
save_projector
(
self
,
projector_file
):
# Save the IVector base AND the UBM AND the whitening into the same file
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
projector_file
,
"w"
)
hdf5file
.
create_group
(
'Projector'
)
hdf5file
.
cd
(
'Projector'
)
self
.
save_ubm
(
hdf5file
)
hdf5file
.
cd
(
'/'
)
hdf5file
.
create_group
(
'Enroller'
)
hdf5file
.
cd
(
'Enroller'
)
self
.
tv
.
save
(
hdf5file
)
hdf5file
.
cd
(
'/'
)
hdf5file
.
create_group
(
'Whitener'
)
hdf5file
.
cd
(
'Whitener'
)
self
.
whitener
.
save
(
hdf5file
)
def
load_tv
(
self
,
tv_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
tv_file
)
self
.
tv
=
bob
.
learn
.
em
.
IVectorMachine
(
hdf5file
)
# add UBM model from base class
self
.
tv
.
ubm
=
self
.
ubm
def
load_whitening
(
self
,
whitening_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
whitening_file
)
self
.
whitener
=
bob
.
learn
.
linear
.
Machine
(
hdf5file
)
def
load_projector
(
self
,
projector_file
):
"""Load the GMM and the ISV model from the same HDF5 file"""
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
projector_file
)
# Load Projector
hdf5file
.
cd
(
'/Projector'
)
self
.
load_ubm
(
hdf5file
)
# Load Enroller
hdf5file
.
cd
(
'/Enroller'
)
self
.
load_tv
(
hdf5file
)
# Load Whitening
hdf5file
.
cd
(
'/Whitener'
)
self
.
load_whitening
(
hdf5file
)
def
project_ivec
(
self
,
gmm_stats
):
return
self
.
tv
.
project
(
gmm_stats
)
def
project_whitening
(
self
,
ivector
):
whitened
=
self
.
whitener
.
forward
(
ivector
)
return
whitened
/
numpy
.
linalg
.
norm
(
whitened
)
#######################################################
############## IVector projection #####################
def
project
(
self
,
feature_array
):
"""Computes GMM statistics against a UBM, then corresponding Ux vector"""
self
.
_check_feature
(
feature_array
)
# project UBM
projected_ubm
=
self
.
project_ubm
(
feature_array
)
# project I-Vector
ivector
=
self
.
project_ivec
(
projected_ubm
)
# whiten I-Vector
return
self
.
project_whitening
(
ivector
)
#######################################################
################## ISV model enroll ####################
def
write_feature
(
self
,
data
,
feature_file
):
"""Saves the feature, which is the (whitened) I-Vector."""
bob
.
bio
.
base
.
save
(
data
,
feature_file
)
def
read_feature
(
self
,
feature_file
):
"""Read the type of features that we require, namely i-vectors (stored as simple numpy arrays)"""
return
bob
.
bio
.
base
.
load
(
feature_file
)
#######################################################
################## Model Enrollment ###################
def
enroll
(
self
,
enroll_features
):
"""Performs IVector enrollment"""
[
self
.
_check_projected
(
feature
)
for
feature
in
enroll_features
]
model
=
numpy
.
mean
(
numpy
.
vstack
(
enroll_features
),
axis
=
0
)
return
model
######################################################
################ Feature comparison ##################
def
read_model
(
self
,
model_file
):
"""Reads the whitened i-vector that holds the model"""
return
bob
.
bio
.
base
.
load
(
model_file
)
def
read_probe
(
self
,
probe_file
):
"""read probe file which is an i-vector"""
return
bob
.
bio
.
base
.
load
(
probe_file
)
def
score
(
self
,
model
,
probe
):
"""Computes the score for the given model and the given probe."""
self
.
_check_projected
(
model
)
self
.
_check_projected
(
probe
)
return
numpy
.
dot
(
model
/
numpy
.
linalg
.
norm
(
model
),
probe
/
numpy
.
linalg
.
norm
(
probe
))
def
score_for_multiple_probes
(
self
,
model
,
probes
):
"""This function computes the score between the given model and several given probe files."""
[
self
.
_check_projected
(
probe
)
for
probe
in
probes
]
probe
=
numpy
.
mean
(
numpy
.
vstack
(
probes
),
axis
=
0
)
return
self
.
score
(
model
,
probe
)
bob/bio/gmm/algorithm/__init__.py
View file @
2f94648c
from
.GMM
import
GMM
,
GMMRegular
from
.JFA
import
JFA
from
.ISV
import
ISV
from
.IVector
import
IVector
bob/bio/gmm/config/algorithm/isv.py
View file @
2f94648c
#!/usr/bin/env python
import
bob.bio.gmm
import
numpy
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
ISV
(
# ISV parameters
...
...
bob/bio/gmm/config/algorithm/ivector.py
0 → 100644
View file @
2f94648c
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
IVector
(
# IVector parameters
subspace_dimension_of_t
=
400
,
update_sigma
=
True
,
tv_training_iterations
=
3
,
# Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians
=
512
,
)
bob/bio/gmm/test/data/ivector_model.hdf5
0 → 100644
View file @
2f94648c
File added
bob/bio/gmm/test/data/ivector_projected.hdf5
0 → 100644
View file @
2f94648c
File added
bob/bio/gmm/test/data/ivector_projector.hdf5
0 → 100644
View file @
2f94648c
File added
bob/bio/gmm/test/test_algorithms.py
View file @
2f94648c
...
...
@@ -326,80 +326,63 @@ def test_jfa():
# assert abs(jfa1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5, jfa1.score_for_multiple_probes(model, [probe, probe])
"""
def test10_ivector(self):
# NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work.
# read input
feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5'))
# assure that the config file is readable
tool = self.config('ivector')
self.assertTrue(isinstance(tool, facereclib.tools.IVector))
# here, we use a reduced complexity for test purposes
tool = facereclib.tools.IVector(
number_of_gaussians = 2,
subspace_dimension_of_t=2, # T subspace dimension
update_sigma = False, # TODO Do another test with True
tv_training_iterations = 1, # Number of EM iterations for the JFA training
variance_threshold = 1e-5,
INIT_SEED = seed_value
)
self.assertTrue(tool.performs_projection)
self.assertTrue(tool.requires_projector_training)
self.assertTrue(tool.use_projected_features_for_enrollment)
self.assertFalse(tool.split_training_features_by_client)
self.assertFalse(tool.requires_enroller_training)
def
test_ivector
():
temp_file
=
bob
.
io
.
base
.
test_utils
.
temporary_filename
()
ivec1
=
bob
.
bio
.
base
.
load_resource
(
"ivector"
,
"algorithm"
)
assert
isinstance
(
ivec1
,
bob
.
bio
.
gmm
.
algorithm
.
IVector
)
assert
isinstance
(
ivec1
,
bob
.
bio
.
gmm
.
algorithm
.
GMM
)
assert
isinstance
(
ivec1
,
bob
.
bio
.
base
.
algorithm
.
Algorithm
)
assert
ivec1
.
performs_projection
assert
ivec1
.
requires_projector_training
assert
ivec1
.
use_projected_features_for_enrollment
assert
not
ivec1
.
split_training_features_by_client
assert
not
ivec1
.
requires_enroller_training
# create smaller IVector object
ivec2
=
bob
.
bio
.
gmm
.
algorithm
.
IVector
(
number_of_gaussians
=
2
,
subspace_dimension_of_t
=
2
,
kmeans_training_iterations
=
1
,
tv_training_iterations
=
1
,
INIT_SEED
=
seed_value
)
train_data
=
utils
.
random_training_set
((
100
,
45
),
count
=
5
,
minimum
=-
5.
,
maximum
=
5.
)
# reference is the same as for GMM projection
reference_file
=
pkg_resources
.
resource_filename
(
'bob.bio.gmm.test'
,
'data/ivector_projector.hdf5'
)
try
:
# train the projector
t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1]
tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t)
if regenerate_refs:
import shutil
shutil.copy2(t, self.reference_dir('ivector_projector.hdf5'))
# load the projector file
tool.load_projector(self.reference_dir('ivector_projector.hdf5'))
# compare ISV projector with reference
hdf5file = bob.io.base.HDF5File(t)
hdf5file.cd('Projector')
projector_reference = bob.learn.em.GMMMachine(hdf5file)
self.assertTrue(tool.m_ubm.is_similar_to(projector_reference))
# compare ISV enroller with reference
hdf5file.cd('/')
hdf5file.cd('Enroller')
enroller_reference = bob.learn.em.IVectorMachine(hdf5file)
enroller_reference.ubm = projector_reference
if not _mac_os:
self.assertTrue(tool.m_tv.is_similar_to(enroller_reference))
os.remove(t)
# project the feature
projected = tool.project(feature)
if regenerate_refs:
tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5'))
# compare the projected feature with the reference
projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(projected,projected_reference))
# enroll model with the projected feature
# This is not yet supported
# model = tool.enroll([projected[0]])
# if regenerate_refs:
# model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w'))
#reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5'))
# compare the IVector model with the reference
#self.assertTrue(model.is_similar_to(reference_model))
# check that the read_probe function reads the correct values
probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(probe,projected))
# score with projected feature and compare to the weird reference score ...
# This in not implemented yet
# score with a concatenation of the probe
# This is not implemented yet
"""
ivec2
.
train_projector
(
train_data
,
temp_file
)
assert
os
.
path
.
exists
(
temp_file
)
if
regenerate_refs
:
shutil
.
copy
(
temp_file
,
reference_file
)
# check projection matrix
ivec1
.
load_projector
(
reference_file
)
ivec2
.
load_projector
(
temp_file
)
assert
ivec1
.
ubm
.
is_similar_to
(
ivec2
.
ubm
)
assert
ivec1
.
tv
.
is_similar_to
(
ivec2
.
tv
)
assert
ivec1
.
whitener
.
is_similar_to
(
ivec2
.
whitener
)
finally
:
if
os
.
path
.
exists
(
temp_file
):
os
.
remove
(
temp_file
)
# generate and project random feature
feature
=
utils
.
random_array
((
20
,
45
),
-
5.
,
5.
,
seed
=
84
)
projected
=
ivec1
.
project
(
feature
)
_compare
(
projected
,
pkg_resources
.
resource_filename
(
'bob.bio.gmm.test'
,
'data/ivector_projected.hdf5'
),
ivec1
.
write_feature
,
ivec1
.
read_feature
)
# enroll model from random features
random_features
=
utils
.
random_training_set
((
20
,
45
),
count
=
5
,
minimum
=-
5.
,
maximum
=
5.
)
enroll_features
=
[
ivec1
.
project
(
feature
)
for
feature
in
random_features
]
model
=
ivec1
.
enroll
(
enroll_features
)
_compare
(
model
,
pkg_resources
.
resource_filename
(
'bob.bio.gmm.test'
,
'data/ivector_model.hdf5'
),
ivec1
.
write_model
,
ivec1
.
read_model
)
# compare model with probe
probe
=
ivec1
.
read_probe
(
pkg_resources
.
resource_filename
(
'bob.bio.gmm.test'
,
'data/ivector_projected.hdf5'
))
reference_score
=
-
0.00187151
assert
abs
(
ivec1
.
score
(
model
,
probe
)
-
reference_score
)
<
1e-5
,
"The scores differ: %3.8f, %3.8f"
%
(
ivec1
.
score
(
model
,
probe
),
reference_score
)
# TODO: implement that
assert
abs
(
ivec1
.
score_for_multiple_probes
(
model
,
[
probe
,
probe
])
-
reference_score
)
<
1e-5
setup.py
View file @
2f94648c
...
...
@@ -121,6 +121,7 @@ setup(
'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm'
,
'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm'
,
'isv = bob.bio.gmm.config.algorithm.isv:algorithm'
,
'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm'
,
],
},
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment