Commit 2f94648c authored by Manuel Günther's avatar Manuel Günther
Browse files

Added IVector algorithm

parent 5e240088
......@@ -132,7 +132,8 @@ class GMM (Algorithm):
"""Save projector to file"""
# Saves the UBM to file
logger.debug(" .... Saving model to file '%s'", projector_file)
self.ubm.save(bob.io.base.HDF5File(projector_file, "w"))
hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(projector_file, 'w')
self.ubm.save(hdf5)
def train_projector(self, train_features, projector_file):
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
import bob.core
import bob.io.base
import bob.learn.linear
import bob.learn.em
import numpy
from .GMM import GMM
from bob.bio.base.algorithm import Algorithm
import logging
logger = logging.getLogger("bob.bio.gmm")
class IVector (GMM):
"""Tool for extracting I-Vectors"""
def __init__(
self,
# IVector training
subspace_dimension_of_t, # T subspace dimension
tv_training_iterations = 25, # Number of EM iterations for the JFA training
update_sigma = True,
# parameters of the GMM
**kwargs
):
"""Initializes the local GMM tool with the given file selector object"""
# call base class constructor with its set of parameters
GMM.__init__(self, **kwargs)
# call tool constructor to overwrite what was set before
Algorithm.__init__(
self,
performs_projection = True,
use_projected_features_for_enrollment = True,
requires_enroller_training = False, # not needed anymore because it's done while training the projector
split_training_features_by_client = False,
subspace_dimension_of_t = subspace_dimension_of_t,
tv_training_iterations = tv_training_iterations,
update_sigma = update_sigma,
multiple_model_scoring = None,
multiple_probe_scoring = None,
**kwargs
)
self.update_sigma = update_sigma
self.subspace_dimension_of_t = subspace_dimension_of_t
self.tv_training_iterations = tv_training_iterations
self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma)
self.whitening_trainer = bob.learn.linear.WhiteningTrainer()
def _check_projected(self, feature):
"""Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray) or len(feature.shape) != 1 or feature.dtype != numpy.float64:
raise ValueError("The given feature is not appropriate")
if self.whitener is not None and feature.shape[0] != self.whitener.shape[1]:
raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.whitener.shape[1], feature.shape[0]))
def train_ivector(self, training_stats):
logger.info(" -> Training IVector enroller")
self.tv = bob.learn.em.IVectorMachine(self.ubm, self.subspace_dimension_of_t)
self.tv.variance_threshold = self.variance_threshold
# train IVector model
bob.learn.em.train(self.ivector_trainer, self.tv, training_stats, self.tv_training_iterations, rng=self.rng)
def train_whitening(self, training_features):
ivectors_matrix = numpy.vstack(training_features)
# create a Linear Machine
self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1])
# create the whitening trainer
self.whitening_trainer.train(ivectors_matrix, self.whitener)
def train_projector(self, train_features, projector_file):
"""Train Projector and Enroller at the same time"""
[self._check_feature(feature) for feature in train_features]
# train UBM
data = numpy.vstack(train_features)
self.train_ubm(data)
del data
# train IVector
logger.info(" -> Projecting training data")
training_stats = [self.project_ubm(feature) for feature in train_features]
# train IVector
self.train_ivector(training_stats)
# project training i-vectors
whitening_train_data = [self.project_ivec(stats) for stats in training_stats]
self.train_whitening(whitening_train_data)
# save
self.save_projector(projector_file)
def save_projector(self, projector_file):
# Save the IVector base AND the UBM AND the whitening into the same file
hdf5file = bob.io.base.HDF5File(projector_file, "w")
hdf5file.create_group('Projector')
hdf5file.cd('Projector')
self.save_ubm(hdf5file)
hdf5file.cd('/')
hdf5file.create_group('Enroller')
hdf5file.cd('Enroller')
self.tv.save(hdf5file)
hdf5file.cd('/')
hdf5file.create_group('Whitener')
hdf5file.cd('Whitener')
self.whitener.save(hdf5file)
def load_tv(self, tv_file):
hdf5file = bob.io.base.HDF5File(tv_file)
self.tv = bob.learn.em.IVectorMachine(hdf5file)
# add UBM model from base class
self.tv.ubm = self.ubm
def load_whitening(self, whitening_file):
hdf5file = bob.io.base.HDF5File(whitening_file)
self.whitener = bob.learn.linear.Machine(hdf5file)
def load_projector(self, projector_file):
"""Load the GMM and the ISV model from the same HDF5 file"""
hdf5file = bob.io.base.HDF5File(projector_file)
# Load Projector
hdf5file.cd('/Projector')
self.load_ubm(hdf5file)
# Load Enroller
hdf5file.cd('/Enroller')
self.load_tv(hdf5file)
# Load Whitening
hdf5file.cd('/Whitener')
self.load_whitening(hdf5file)
def project_ivec(self, gmm_stats):
return self.tv.project(gmm_stats)
def project_whitening(self, ivector):
whitened = self.whitener.forward(ivector)
return whitened / numpy.linalg.norm(whitened)
#######################################################
############## IVector projection #####################
def project(self, feature_array):
"""Computes GMM statistics against a UBM, then corresponding Ux vector"""
self._check_feature(feature_array)
# project UBM
projected_ubm = self.project_ubm(feature_array)
# project I-Vector
ivector = self.project_ivec(projected_ubm)
# whiten I-Vector
return self.project_whitening(ivector)
#######################################################
################## ISV model enroll ####################
def write_feature(self, data, feature_file):
"""Saves the feature, which is the (whitened) I-Vector."""
bob.bio.base.save(data, feature_file)
def read_feature(self, feature_file):
"""Read the type of features that we require, namely i-vectors (stored as simple numpy arrays)"""
return bob.bio.base.load(feature_file)
#######################################################
################## Model Enrollment ###################
def enroll(self, enroll_features):
"""Performs IVector enrollment"""
[self._check_projected(feature) for feature in enroll_features]
model = numpy.mean(numpy.vstack(enroll_features), axis=0)
return model
######################################################
################ Feature comparison ##################
def read_model(self, model_file):
"""Reads the whitened i-vector that holds the model"""
return bob.bio.base.load(model_file)
def read_probe(self, probe_file):
"""read probe file which is an i-vector"""
return bob.bio.base.load(probe_file)
def score(self, model, probe):
"""Computes the score for the given model and the given probe."""
self._check_projected(model)
self._check_projected(probe)
return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe))
def score_for_multiple_probes(self, model, probes):
"""This function computes the score between the given model and several given probe files."""
[self._check_projected(probe) for probe in probes]
probe = numpy.mean(numpy.vstack(probes), axis=0)
return self.score(model, probe)
from .GMM import GMM, GMMRegular
from .JFA import JFA
from .ISV import ISV
from .IVector import IVector
#!/usr/bin/env python
import bob.bio.gmm
import numpy
algorithm = bob.bio.gmm.algorithm.ISV(
# ISV parameters
......
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.IVector(
# IVector parameters
subspace_dimension_of_t = 400,
update_sigma = True,
tv_training_iterations = 3, # Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians = 512,
)
......@@ -326,80 +326,63 @@ def test_jfa():
# assert abs(jfa1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5, jfa1.score_for_multiple_probes(model, [probe, probe])
"""
def test10_ivector(self):
# NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work.
# read input
feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5'))
# assure that the config file is readable
tool = self.config('ivector')
self.assertTrue(isinstance(tool, facereclib.tools.IVector))
# here, we use a reduced complexity for test purposes
tool = facereclib.tools.IVector(
number_of_gaussians = 2,
subspace_dimension_of_t=2, # T subspace dimension
update_sigma = False, # TODO Do another test with True
tv_training_iterations = 1, # Number of EM iterations for the JFA training
variance_threshold = 1e-5,
INIT_SEED = seed_value
)
self.assertTrue(tool.performs_projection)
self.assertTrue(tool.requires_projector_training)
self.assertTrue(tool.use_projected_features_for_enrollment)
self.assertFalse(tool.split_training_features_by_client)
self.assertFalse(tool.requires_enroller_training)
def test_ivector():
temp_file = bob.io.base.test_utils.temporary_filename()
ivec1 = bob.bio.base.load_resource("ivector", "algorithm")
assert isinstance(ivec1, bob.bio.gmm.algorithm.IVector)
assert isinstance(ivec1, bob.bio.gmm.algorithm.GMM)
assert isinstance(ivec1, bob.bio.base.algorithm.Algorithm)
assert ivec1.performs_projection
assert ivec1.requires_projector_training
assert ivec1.use_projected_features_for_enrollment
assert not ivec1.split_training_features_by_client
assert not ivec1.requires_enroller_training
# create smaller IVector object
ivec2 = bob.bio.gmm.algorithm.IVector(
number_of_gaussians = 2,
subspace_dimension_of_t = 2,
kmeans_training_iterations = 1,
tv_training_iterations = 1,
INIT_SEED = seed_value
)
train_data = utils.random_training_set((100,45), count=5, minimum=-5., maximum=5.)
# reference is the same as for GMM projection
reference_file = pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projector.hdf5')
try:
# train the projector
t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1]
tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t)
if regenerate_refs:
import shutil
shutil.copy2(t, self.reference_dir('ivector_projector.hdf5'))
# load the projector file
tool.load_projector(self.reference_dir('ivector_projector.hdf5'))
# compare ISV projector with reference
hdf5file = bob.io.base.HDF5File(t)
hdf5file.cd('Projector')
projector_reference = bob.learn.em.GMMMachine(hdf5file)
self.assertTrue(tool.m_ubm.is_similar_to(projector_reference))
# compare ISV enroller with reference
hdf5file.cd('/')
hdf5file.cd('Enroller')
enroller_reference = bob.learn.em.IVectorMachine(hdf5file)
enroller_reference.ubm = projector_reference
if not _mac_os:
self.assertTrue(tool.m_tv.is_similar_to(enroller_reference))
os.remove(t)
# project the feature
projected = tool.project(feature)
if regenerate_refs:
tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5'))
# compare the projected feature with the reference
projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(projected,projected_reference))
# enroll model with the projected feature
# This is not yet supported
# model = tool.enroll([projected[0]])
# if regenerate_refs:
# model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w'))
#reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5'))
# compare the IVector model with the reference
#self.assertTrue(model.is_similar_to(reference_model))
# check that the read_probe function reads the correct values
probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5'))
self.assertTrue(numpy.allclose(probe,projected))
# score with projected feature and compare to the weird reference score ...
# This in not implemented yet
# score with a concatenation of the probe
# This is not implemented yet
"""
ivec2.train_projector(train_data, temp_file)
assert os.path.exists(temp_file)
if regenerate_refs: shutil.copy(temp_file, reference_file)
# check projection matrix
ivec1.load_projector(reference_file)
ivec2.load_projector(temp_file)
assert ivec1.ubm.is_similar_to(ivec2.ubm)
assert ivec1.tv.is_similar_to(ivec2.tv)
assert ivec1.whitener.is_similar_to(ivec2.whitener)
finally:
if os.path.exists(temp_file): os.remove(temp_file)
# generate and project random feature
feature = utils.random_array((20,45), -5., 5., seed=84)
projected = ivec1.project(feature)
_compare(projected, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5'), ivec1.write_feature, ivec1.read_feature)
# enroll model from random features
random_features = utils.random_training_set((20,45), count=5, minimum=-5., maximum=5.)
enroll_features = [ivec1.project(feature) for feature in random_features]
model = ivec1.enroll(enroll_features)
_compare(model, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_model.hdf5'), ivec1.write_model, ivec1.read_model)
# compare model with probe
probe = ivec1.read_probe(pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5'))
reference_score = -0.00187151
assert abs(ivec1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score)
# TODO: implement that
assert abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5
......@@ -121,6 +121,7 @@ setup(
'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm',
'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm',
'isv = bob.bio.gmm.config.algorithm.isv:algorithm',
'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm',
],
},
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment