Skip to content
Snippets Groups Projects
Commit 2f94648c authored by Manuel Günther's avatar Manuel Günther
Browse files

Added IVector algorithm

parent 5e240088
No related branches found
No related tags found
No related merge requests found
......@@ -132,7 +132,8 @@ class GMM (Algorithm):
"""Save projector to file"""
# Saves the UBM to file
logger.debug(" .... Saving model to file '%s'", projector_file)
self.ubm.save(bob.io.base.HDF5File(projector_file, "w"))
hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(projector_file, 'w')
self.ubm.save(hdf5)
def train_projector(self, train_features, projector_file):
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
import bob.core
import bob.io.base
import bob.learn.linear
import bob.learn.em
import numpy
from .GMM import GMM
from bob.bio.base.algorithm import Algorithm
import logging
logger = logging.getLogger("bob.bio.gmm")
class IVector (GMM):
"""Tool for extracting I-Vectors"""
def __init__(
self,
# IVector training
subspace_dimension_of_t, # T subspace dimension
tv_training_iterations = 25, # Number of EM iterations for the JFA training
update_sigma = True,
# parameters of the GMM
**kwargs
):
"""Initializes the local GMM tool with the given file selector object"""
# call base class constructor with its set of parameters
GMM.__init__(self, **kwargs)
# call tool constructor to overwrite what was set before
Algorithm.__init__(
self,
performs_projection = True,
use_projected_features_for_enrollment = True,
requires_enroller_training = False, # not needed anymore because it's done while training the projector
split_training_features_by_client = False,
subspace_dimension_of_t = subspace_dimension_of_t,
tv_training_iterations = tv_training_iterations,
update_sigma = update_sigma,
multiple_model_scoring = None,
multiple_probe_scoring = None,
**kwargs
)
self.update_sigma = update_sigma
self.subspace_dimension_of_t = subspace_dimension_of_t
self.tv_training_iterations = tv_training_iterations
self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma)
self.whitening_trainer = bob.learn.linear.WhiteningTrainer()
def _check_projected(self, feature):
"""Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray) or len(feature.shape) != 1 or feature.dtype != numpy.float64:
raise ValueError("The given feature is not appropriate")
if self.whitener is not None and feature.shape[0] != self.whitener.shape[1]:
raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.whitener.shape[1], feature.shape[0]))
def train_ivector(self, training_stats):
logger.info(" -> Training IVector enroller")
self.tv = bob.learn.em.IVectorMachine(self.ubm, self.subspace_dimension_of_t)
self.tv.variance_threshold = self.variance_threshold
# train IVector model
bob.learn.em.train(self.ivector_trainer, self.tv, training_stats, self.tv_training_iterations, rng=self.rng)
def train_whitening(self, training_features):
ivectors_matrix = numpy.vstack(training_features)
# create a Linear Machine
self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1])
# create the whitening trainer
self.whitening_trainer.train(ivectors_matrix, self.whitener)
def train_projector(self, train_features, projector_file):
"""Train Projector and Enroller at the same time"""
[self._check_feature(feature) for feature in train_features]
# train UBM
data = numpy.vstack(train_features)
self.train_ubm(data)
del data
# train IVector
logger.info(" -> Projecting training data")
training_stats = [self.project_ubm(feature) for feature in train_features]
# train IVector
self.train_ivector(training_stats)
# project training i-vectors
whitening_train_data = [self.project_ivec(stats) for stats in training_stats]
self.train_whitening(whitening_train_data)
# save
self.save_projector(projector_file)
def save_projector(self, projector_file):
# Save the IVector base AND the UBM AND the whitening into the same file
hdf5file = bob.io.base.HDF5File(projector_file, "w")
hdf5file.create_group('Projector')
hdf5file.cd('Projector')
self.save_ubm(hdf5file)
hdf5file.cd('/')
hdf5file.create_group('Enroller')
hdf5file.cd('Enroller')
self.tv.save(hdf5file)
hdf5file.cd('/')
hdf5file.create_group('Whitener')
hdf5file.cd('Whitener')
self.whitener.save(hdf5file)
def load_tv(self, tv_file):
hdf5file = bob.io.base.HDF5File(tv_file)
self.tv = bob.learn.em.IVectorMachine(hdf5file)
# add UBM model from base class
self.tv.ubm = self.ubm
def load_whitening(self, whitening_file):
hdf5file = bob.io.base.HDF5File(whitening_file)
self.whitener = bob.learn.linear.Machine(hdf5file)
def load_projector(self, projector_file):
"""Load the GMM and the ISV model from the same HDF5 file"""
hdf5file = bob.io.base.HDF5File(projector_file)
# Load Projector
hdf5file.cd('/Projector')
self.load_ubm(hdf5file)
# Load Enroller
hdf5file.cd('/Enroller')
self.load_tv(hdf5file)
# Load Whitening
hdf5file.cd('/Whitener')
self.load_whitening(hdf5file)
def project_ivec(self, gmm_stats):
return self.tv.project(gmm_stats)
def project_whitening(self, ivector):
whitened = self.whitener.forward(ivector)
return whitened / numpy.linalg.norm(whitened)
#######################################################
############## IVector projection #####################
def project(self, feature_array):
"""Computes GMM statistics against a UBM, then corresponding Ux vector"""
self._check_feature(feature_array)
# project UBM
projected_ubm = self.project_ubm(feature_array)
# project I-Vector
ivector = self.project_ivec(projected_ubm)
# whiten I-Vector
return self.project_whitening(ivector)
#######################################################
################## ISV model enroll ####################
def write_feature(self, data, feature_file):
"""Saves the feature, which is the (whitened) I-Vector."""
bob.bio.base.save(data, feature_file)
def read_feature(self, feature_file):
"""Read the type of features that we require, namely i-vectors (stored as simple numpy arrays)"""
return bob.bio.base.load(feature_file)
#######################################################
################## Model Enrollment ###################
def enroll(self, enroll_features):
"""Performs IVector enrollment"""
[self._check_projected(feature) for feature in enroll_features]
model = numpy.mean(numpy.vstack(enroll_features), axis=0)
return model
######################################################
################ Feature comparison ##################
def read_model(self, model_file):
"""Reads the whitened i-vector that holds the model"""
return bob.bio.base.load(model_file)
def read_probe(self, probe_file):
"""read probe file which is an i-vector"""
return bob.bio.base.load(probe_file)
def score(self, model, probe):
"""Computes the score for the given model and the given probe."""
self._check_projected(model)
self._check_projected(probe)
return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe))
def score_for_multiple_probes(self, model, probes):
"""This function computes the score between the given model and several given probe files."""
[self._check_projected(probe) for probe in probes]
probe = numpy.mean(numpy.vstack(probes), axis=0)
return self.score(model, probe)
from .GMM import GMM, GMMRegular
from .JFA import JFA
from .ISV import ISV
from .IVector import IVector
#!/usr/bin/env python
import bob.bio.gmm
import numpy
algorithm = bob.bio.gmm.algorithm.ISV(
# ISV parameters
......
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.IVector(
# IVector parameters
subspace_dimension_of_t = 400,
update_sigma = True,
tv_training_iterations = 3, # Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians = 512,
)
File added
File added
File added
......@@ -326,80 +326,63 @@ def test_jfa():
# assert abs(jfa1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5, jfa1.score_for_multiple_probes(model, [probe, probe])
"""
def test10_ivector(self):
# NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work.
# read input
feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5'))
# assure that the config file is readable
tool = self.config('ivector')
self.assertTrue(isinstance(tool, facereclib.tools.IVector))
# here, we use a reduced complexity for test purposes
tool = facereclib.tools.IVector(
number_of_gaussians = 2,
subspace_dimension_of_t=2, # T subspace dimension
update_sigma = False, # TODO Do another test with True
tv_training_iterations = 1, # Number of EM iterations for the JFA training
variance_threshold = 1e-5,
INIT_SEED = seed_value
)
self.assertTrue(tool.performs_projection)
self.assertTrue(tool.requires_projector_training)
self.assertTrue(tool.use_projected_features_for_enrollment)
self.assertFalse(tool.split_training_features_by_client)
self.assertFalse(tool.requires_enroller_training)
def test_ivector():
temp_file = bob.io.base.test_utils.temporary_filename()
ivec1 = bob.bio.base.load_resource("ivector", "algorithm")
assert isinstance(ivec1, bob.bio.gmm.algorithm.IVector)
assert isinstance(ivec1, bob.bio.gmm.algorithm.GMM)
assert isinstance(ivec1, bob.bio.base.algorithm.Algorithm)
assert ivec1.performs_projection
assert ivec1.requires_projector_training
assert ivec1.use_projected_features_for_enrollment
assert not ivec1.split_training_features_by_client
assert not ivec1.requires_enroller_training
# create smaller IVector object
ivec2 = bob.bio.gmm.algorithm.IVector(
number_of_gaussians = 2,
subspace_dimension_of_t = 2,
kmeans_training_iterations = 1,
tv_training_iterations = 1,
INIT_SEED = seed_value
)
train_data = utils.random_training_set((100,45), count=5, minimum=-5., maximum=5.)
# reference is the same as for GMM projection
reference_file = pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projector.hdf5')
try:
# train the projector
t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1]
tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t)
if regenerate_refs:
import shutil
shutil.copy2(t, self.reference_dir('ivector_projector.hdf5'))
# load the projector file
tool.load_projector(self.reference_dir('ivector_projector.hdf5'))
# compare ISV projector with reference
hdf5file = bob.io.base.HDF5File(t)
hdf5file.cd('Projector')
projector_reference = bob.learn.em.GMMMachine(hdf5file)
self.assertTrue(tool.m_ubm.is_similar_to(projector_reference))
# compare ISV enroller with reference
hdf5file.cd('/')
hdf5file.cd('Enroller')
enroller_reference = bob.learn.em.IVectorMachine(hdf5file)
enroller_reference.ubm = projector_reference
if not _mac_os:
self.assertTrue(tool.m_tv.is_similar_to(enroller_reference))
os.remove(t)
# project the feature
projected = tool.project(feature)
if regenerate_refs:
tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5'))
# compare the projected feature with the reference
projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(projected,projected_reference))
# enroll model with the projected feature
# This is not yet supported
# model = tool.enroll([projected[0]])
# if regenerate_refs:
# model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w'))
#reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5'))
# compare the IVector model with the reference
#self.assertTrue(model.is_similar_to(reference_model))
# check that the read_probe function reads the correct values
probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(probe,projected))
# score with projected feature and compare to the weird reference score ...
# This in not implemented yet
# score with a concatenation of the probe
# This is not implemented yet
"""
ivec2.train_projector(train_data, temp_file)
assert os.path.exists(temp_file)
if regenerate_refs: shutil.copy(temp_file, reference_file)
# check projection matrix
ivec1.load_projector(reference_file)
ivec2.load_projector(temp_file)
assert ivec1.ubm.is_similar_to(ivec2.ubm)
assert ivec1.tv.is_similar_to(ivec2.tv)
assert ivec1.whitener.is_similar_to(ivec2.whitener)
finally:
if os.path.exists(temp_file): os.remove(temp_file)
# generate and project random feature
feature = utils.random_array((20,45), -5., 5., seed=84)
projected = ivec1.project(feature)
_compare(projected, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5'), ivec1.write_feature, ivec1.read_feature)
# enroll model from random features
random_features = utils.random_training_set((20,45), count=5, minimum=-5., maximum=5.)
enroll_features = [ivec1.project(feature) for feature in random_features]
model = ivec1.enroll(enroll_features)
_compare(model, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_model.hdf5'), ivec1.write_model, ivec1.read_model)
# compare model with probe
probe = ivec1.read_probe(pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5'))
reference_score = -0.00187151
assert abs(ivec1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score)
# TODO: implement that
assert abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5
......@@ -121,6 +121,7 @@ setup(
'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm',
'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm',
'isv = bob.bio.gmm.config.algorithm.isv:algorithm',
'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm',
],
},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment