Skip to content
Snippets Groups Projects
Commit 78f69063 authored by Elie KHOURY's avatar Elie KHOURY
Browse files

added post processing steps for I-Vector (sequential and parallel)

parent e48dd818
No related branches found
No related tags found
No related merge requests found
...@@ -24,6 +24,13 @@ class IVector (GMM): ...@@ -24,6 +24,13 @@ class IVector (GMM):
subspace_dimension_of_t, # T subspace dimension subspace_dimension_of_t, # T subspace dimension
tv_training_iterations = 25, # Number of EM iterations for the JFA training tv_training_iterations = 25, # Number of EM iterations for the JFA training
update_sigma = True, update_sigma = True,
use_lda = False,
use_wccn = False,
use_plda = False,
lda_dim = 50,
plda_dim_F = 50,
plda_dim_G = 50,
plda_training_iterations = 50,
# parameters of the GMM # parameters of the GMM
**kwargs **kwargs
): ):
...@@ -37,11 +44,18 @@ class IVector (GMM): ...@@ -37,11 +44,18 @@ class IVector (GMM):
performs_projection = True, performs_projection = True,
use_projected_features_for_enrollment = True, use_projected_features_for_enrollment = True,
requires_enroller_training = False, # not needed anymore because it's done while training the projector requires_enroller_training = False, # not needed anymore because it's done while training the projector
split_training_features_by_client = False, split_training_features_by_client = True,
subspace_dimension_of_t = subspace_dimension_of_t, subspace_dimension_of_t = subspace_dimension_of_t,
tv_training_iterations = tv_training_iterations, tv_training_iterations = tv_training_iterations,
update_sigma = update_sigma, update_sigma = update_sigma,
use_lda = use_lda,
use_wccn = use_wccn,
use_plda = use_plda,
lda_dim = lda_dim,
plda_dim_F = plda_dim_F,
plda_dim_G = plda_dim_G,
plda_training_iterations = plda_training_iterations,
multiple_model_scoring = None, multiple_model_scoring = None,
multiple_probe_scoring = None, multiple_probe_scoring = None,
...@@ -49,19 +63,29 @@ class IVector (GMM): ...@@ -49,19 +63,29 @@ class IVector (GMM):
) )
self.update_sigma = update_sigma self.update_sigma = update_sigma
self.use_lda = use_lda
self.use_wccn = use_wccn
self.use_plda = use_plda
self.subspace_dimension_of_t = subspace_dimension_of_t self.subspace_dimension_of_t = subspace_dimension_of_t
self.tv_training_iterations = tv_training_iterations self.tv_training_iterations = tv_training_iterations
self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma) self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma)
self.whitening_trainer = bob.learn.linear.WhiteningTrainer() self.whitening_trainer = bob.learn.linear.WhiteningTrainer()
self.lda_dim = lda_dim
def _check_projected(self, feature): self.lda_trainer = bob.learn.linear.FisherLDATrainer(strip_to_rank=False)
self.wccn_trainer = bob.learn.linear.WCCNTrainer()
self.plda_trainer = bob.learn.em.PLDATrainer()
self.plda_dim_F = plda_dim_F
self.plda_dim_G = plda_dim_G
self.plda_training_iterations = plda_training_iterations
def _check_ivector(self, feature):
"""Checks that the features are appropriate""" """Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64: if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64:
raise ValueError("The given feature is not appropriate") raise ValueError("The given feature is not appropriate")
if self.whitener is not None and feature.shape[0] != self.whitener.shape[1]:
raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.whitener.shape[1], feature.shape[0]))
def train_ivector(self, training_stats): def train_ivector(self, training_stats):
logger.info(" -> Training IVector enroller") logger.info(" -> Training IVector enroller")
...@@ -72,31 +96,76 @@ class IVector (GMM): ...@@ -72,31 +96,76 @@ class IVector (GMM):
def train_whitener(self, training_features): def train_whitener(self, training_features):
logger.info(" -> Training Whitening")
ivectors_matrix = numpy.vstack(training_features) ivectors_matrix = numpy.vstack(training_features)
# create a Linear Machine # create a Linear Machine
self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1]) self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1])
# create the whitening trainer # create the whitening trainer
self.whitening_trainer.train(ivectors_matrix, self.whitener) self.whitening_trainer.train(ivectors_matrix, self.whitener)
def train_lda(self, training_features):
logger.info(" -> Training LDA projector")
self.lda, __eig_vals = self.lda_trainer.train(training_features)
# resize the machine if desired
if self.lda_dim:
self.lda.resize(self.lda.shape[0], self.lda_dim)
def train_wccn(self, training_features):
logger.info(" -> Training WCCN projector")
self.wccn = self.wccn_trainer.train(training_features)
def train_plda(self, training_features):
logger.info(" -> Training PLDA projector")
self.plda_trainer.init_f_method = 'BETWEEN_SCATTER'
self.plda_trainer.init_g_method = 'WITHIN_SCATTER'
self.plda_trainer.init_sigma_method = 'VARIANCE_DATA'
variance_flooring = 1e-5
training_features = [numpy.vstack(client) for client in training_features]
input_dim = training_features[0].shape[1]
self.plda_base = bob.learn.em.PLDABase(input_dim, self.plda_dim_F, self.plda_dim_G, variance_flooring)
bob.learn.em.train(self.plda_trainer, self.plda_base, training_features, self.plda_training_iterations, rng=self.rng)
def train_projector(self, train_features, projector_file): def train_projector(self, train_features, projector_file):
"""Train Projector and Enroller at the same time""" """Train Projector and Enroller at the same time"""
[self._check_feature(feature) for feature in train_features]
[self._check_feature(feature) for client in train_features for feature in client]
train_features_flatten = [feature for client in train_features for feature in client]
# train UBM # train UBM
data = numpy.vstack(train_features) data = numpy.vstack(train_features_flatten)
self.train_ubm(data) self.train_ubm(data)
del data del data
# train IVector # project training data
logger.info(" -> Projecting training data") logger.info(" -> Projecting training data")
training_stats = [self.project_ubm(feature) for feature in train_features] train_gmm_stats = [[self.project_ubm(feature) for feature in client] for client in train_features]
train_gmm_stats_flatten = [stats for client in train_gmm_stats for stats in client]
# train IVector # train IVector
self.train_ivector(training_stats) logger.info(" -> Projecting training data")
self.train_ivector(train_gmm_stats_flatten)
# project training i-vectors # project training i-vectors
whitening_train_data = [self.project_ivector(stats) for stats in training_stats] train_ivectors = [[self.project_ivector(stats) for stats in client] for client in train_gmm_stats]
self.train_whitener(whitening_train_data) train_ivectors_flatten = [stats for client in train_ivectors for stats in client]
# Train Whitening
self.train_whitener(train_ivectors_flatten)
# whitening and length-normalizing i-vectors
train_ivectors = [[self.project_whitening(ivec) for ivec in client] for client in train_ivectors]
if self.use_lda:
self.train_lda(train_ivectors)
train_ivectors = [[self.project_lda(ivec) for ivec in client] for client in train_ivectors]
if self.use_wccn:
self.train_wccn(train_ivectors)
train_ivectors = [[self.project_wccn(ivec) for ivec in client] for client in train_ivectors]
if self.use_plda:
self.train_plda(train_ivectors)
# save # save
self.save_projector(projector_file) self.save_projector(projector_file)
...@@ -118,7 +187,25 @@ class IVector (GMM): ...@@ -118,7 +187,25 @@ class IVector (GMM):
hdf5file.create_group('Whitener') hdf5file.create_group('Whitener')
hdf5file.cd('Whitener') hdf5file.cd('Whitener')
self.whitener.save(hdf5file) self.whitener.save(hdf5file)
if self.use_lda:
hdf5file.cd('/')
hdf5file.create_group('LDA')
hdf5file.cd('LDA')
self.lda.save(hdf5file)
if self.use_wccn:
hdf5file.cd('/')
hdf5file.create_group('WCCN')
hdf5file.cd('WCCN')
self.wccn.save(hdf5file)
if self.use_plda:
hdf5file.cd('/')
hdf5file.create_group('PLDA')
hdf5file.cd('PLDA')
self.plda_base.save(hdf5file)
def load_tv(self, tv_file): def load_tv(self, tv_file):
hdf5file = bob.io.base.HDF5File(tv_file) hdf5file = bob.io.base.HDF5File(tv_file)
...@@ -130,7 +217,19 @@ class IVector (GMM): ...@@ -130,7 +217,19 @@ class IVector (GMM):
hdf5file = bob.io.base.HDF5File(whitening_file) hdf5file = bob.io.base.HDF5File(whitening_file)
self.whitener = bob.learn.linear.Machine(hdf5file) self.whitener = bob.learn.linear.Machine(hdf5file)
def load_lda(self, lda_file):
hdf5file = bob.io.base.HDF5File(lda_file)
self.lda = bob.learn.linear.Machine(hdf5file)
def load_wccn(self, wccn_file):
hdf5file = bob.io.base.HDF5File(wccn_file)
self.wccn = bob.learn.linear.Machine(hdf5file)
def load_plda(self, plda_file):
hdf5file = bob.io.base.HDF5File(plda_file)
self.plda_base = bob.learn.em.PLDABase(hdf5file)
self.plda_machine = bob.learn.em.PLDAMachine(self.plda_base)
def load_projector(self, projector_file): def load_projector(self, projector_file):
"""Load the GMM and the ISV model from the same HDF5 file""" """Load the GMM and the ISV model from the same HDF5 file"""
hdf5file = bob.io.base.HDF5File(projector_file) hdf5file = bob.io.base.HDF5File(projector_file)
...@@ -146,6 +245,21 @@ class IVector (GMM): ...@@ -146,6 +245,21 @@ class IVector (GMM):
# Load Whitening # Load Whitening
hdf5file.cd('/Whitener') hdf5file.cd('/Whitener')
self.load_whitener(hdf5file) self.load_whitener(hdf5file)
if self.use_lda:
# Load LDA
hdf5file.cd('/LDA')
self.load_lda(hdf5file)
if self.use_wccn:
# Load WCCN
hdf5file.cd('/WCCN')
self.load_wccn(hdf5file)
if self.use_plda:
# Load PLDA
hdf5file.cd('/PLDA')
self.load_plda(hdf5file)
def project_ivector(self, gmm_stats): def project_ivector(self, gmm_stats):
...@@ -155,6 +269,16 @@ class IVector (GMM): ...@@ -155,6 +269,16 @@ class IVector (GMM):
whitened = self.whitener.forward(ivector) whitened = self.whitener.forward(ivector)
return whitened / numpy.linalg.norm(whitened) return whitened / numpy.linalg.norm(whitened)
def project_lda(self, ivector):
out_ivector = numpy.ndarray(self.lda.shape[1], numpy.float64)
self.lda(ivector, out_ivector)
return out_ivector
def project_wccn(self, ivector):
out_ivector = numpy.ndarray(self.wccn.shape[1], numpy.float64)
self.wccn(ivector, out_ivector)
return out_ivector
####################################################### #######################################################
############## IVector projection ##################### ############## IVector projection #####################
def project(self, feature_array): def project(self, feature_array):
...@@ -165,10 +289,17 @@ class IVector (GMM): ...@@ -165,10 +289,17 @@ class IVector (GMM):
# project I-Vector # project I-Vector
ivector = self.project_ivector(projected_ubm) ivector = self.project_ivector(projected_ubm)
# whiten I-Vector # whiten I-Vector
return self.project_whitening(ivector) ivector = self.project_whitening(ivector)
# LDA projection
if self.use_lda:
ivector = self.project_lda(ivector)
# WCCN projection
if self.use_wccn:
ivector = self.project_wccn(ivector)
return ivector
####################################################### #######################################################
################## ISV model enroll #################### ################## Read / Write I-Vectors ####################
def write_feature(self, data, feature_file): def write_feature(self, data, feature_file):
"""Saves the feature, which is the (whitened) I-Vector.""" """Saves the feature, which is the (whitened) I-Vector."""
bob.bio.base.save(data, feature_file) bob.bio.base.save(data, feature_file)
...@@ -178,21 +309,28 @@ class IVector (GMM): ...@@ -178,21 +309,28 @@ class IVector (GMM):
return bob.bio.base.load(feature_file) return bob.bio.base.load(feature_file)
####################################################### #######################################################
################## Model Enrollment ################### ################## Model Enrollment ###################
def enroll(self, enroll_features): def enroll(self, enroll_features):
"""Performs IVector enrollment""" """Performs IVector enrollment"""
[self._check_projected(feature) for feature in enroll_features] [self._check_ivector(feature) for feature in enroll_features]
model = numpy.mean(numpy.vstack(enroll_features), axis=0) average_ivector = numpy.mean(numpy.vstack(enroll_features), axis=0)
return model if self.use_plda:
average_ivector = average_ivector.reshape(1,-1)
self.plda_trainer.enroll(self.plda_machine, average_ivector)
return self.plda_machine
else:
return average_ivector
###################################################### ######################################################
################ Feature comparison ################## ################ Feature comparison ##################
def read_model(self, model_file): def read_model(self, model_file):
"""Reads the whitened i-vector that holds the model""" """Reads the whitened i-vector that holds the model"""
return bob.bio.base.load(model_file) if self.use_plda:
return bob.learn.em.PLDAMachine(bob.io.base.HDF5File(str(model_file)), self.plda_base)
else:
return bob.bio.base.load(model_file)
def read_probe(self, probe_file): def read_probe(self, probe_file):
"""read probe file which is an i-vector""" """read probe file which is an i-vector"""
...@@ -200,13 +338,15 @@ class IVector (GMM): ...@@ -200,13 +338,15 @@ class IVector (GMM):
def score(self, model, probe): def score(self, model, probe):
"""Computes the score for the given model and the given probe.""" """Computes the score for the given model and the given probe."""
self._check_projected(model) self._check_ivector(probe)
self._check_projected(probe) if self.use_plda:
return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe)) return model.log_likelihood_ratio(probe)
else:
self._check_ivector(model)
return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe))
def score_for_multiple_probes(self, model, probes): def score_for_multiple_probes(self, model, probes):
"""This function computes the score between the given model and several given probe files.""" """This function computes the score between the given model and several given probe files."""
[self._check_projected(probe) for probe in probes]
probe = numpy.mean(numpy.vstack(probes), axis=0) probe = numpy.mean(numpy.vstack(probes), axis=0)
return self.score(model, probe) return self.score(model, probe)
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.IVector(
# IVector parameters
subspace_dimension_of_t = 100,
update_sigma = True,
tv_training_iterations = 25, # Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians = 256,
use_lda = True,
use_wccn = True,
use_plda = True,
lda_dim = 50,
plda_dim_F = 50,
plda_dim_G = 50,
plda_training_iterations = 200,
)
...@@ -33,7 +33,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): ...@@ -33,7 +33,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
# Add sub-tasks that can be executed by this script # Add sub-tasks that can be executed by this script
parser = parsers['main'] parser = parsers['main']
parser.add_argument('--sub-task', parser.add_argument('--sub-task',
choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'gmm-project', 'ivector-e-step', 'ivector-m-step', 'ivector-project', 'train-whitener', 'project', 'enroll', 'compute-scores', 'concatenate'), choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'gmm-project', 'ivector-e-step', 'ivector-m-step', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection', 'project', 'train-plda', 'save-projector', 'enroll', 'compute-scores', 'concatenate'),
help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)' help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)'
parser.add_argument('--iteration', type = int, parser.add_argument('--iteration', type = int,
help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)' help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)'
...@@ -46,7 +46,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): ...@@ -46,7 +46,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
# now that we have set up everything, get the command line arguments # now that we have set up everything, get the command line arguments
args = base_tools.initialize(parsers, command_line_parameters, args = base_tools.initialize(parsers, command_line_parameters,
skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'ivector', 'whitening', 'projection', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration'] skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection', 'projection', 'train-plda', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration']
) )
args.skip_projector_training = True args.skip_projector_training = True
...@@ -71,7 +71,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): ...@@ -71,7 +71,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
# now, add the extra steps for ivector # now, add the extra steps for ivector
algorithm = tools.base(args.algorithm) algorithm = tools.base(args.algorithm)
if not args.skip_ivector: if not args.skip_ivector_training:
# gmm projection # gmm projection
job_ids['gmm-projection'] = submitter.submit( job_ids['gmm-projection'] = submitter.submit(
'--sub-task gmm-project', '--sub-task gmm-project',
...@@ -99,18 +99,19 @@ def add_ivector_jobs(args, job_ids, deps, submitter): ...@@ -99,18 +99,19 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
**args.grid.training_queue) **args.grid.training_queue)
deps.append(job_ids['ivector-m-step']) deps.append(job_ids['ivector-m-step'])
# whitening
if not args.skip_whitening: # ivector projection
# ivector projection if not args.skip_ivector_projection:
job_ids['ivector-projection'] = submitter.submit( job_ids['ivector-projection'] = submitter.submit(
'--sub-task ivector-project', '--sub-task ivector-projection',
name = 'pro-ivector', name = 'pro-ivector',
number_of_parallel_jobs = args.grid.number_of_projection_jobs, number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps, dependencies = deps,
**args.grid.projection_queue) **args.grid.projection_queue)
deps.append(job_ids['ivector-projection']) deps.append(job_ids['ivector-projection'])
# TV training # train whitener
if not args.skip_train_whitener:
job_ids['whitener-training'] = submitter.submit( job_ids['whitener-training'] = submitter.submit(
'--sub-task train-whitener', '--sub-task train-whitener',
name = 'train-whitener', name = 'train-whitener',
...@@ -118,6 +119,71 @@ def add_ivector_jobs(args, job_ids, deps, submitter): ...@@ -118,6 +119,71 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
**args.grid.training_queue) **args.grid.training_queue)
deps.append(job_ids['whitener-training']) deps.append(job_ids['whitener-training'])
# whitening projection
if not args.skip_whitening_projection:
job_ids['whitening-projection'] = submitter.submit(
'--sub-task whitening-projection',
name = 'whitened',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
**args.grid.projection_queue)
deps.append(job_ids['whitening-projection'])
# train LDA
if not args.skip_train_lda:
job_ids['lda-training'] = submitter.submit(
'--sub-task train-lda',
name = 'train-lda',
dependencies = deps,
**args.grid.training_queue)
deps.append(job_ids['lda-training'])
# LDA projection
if not args.skip_lda_projection:
job_ids['lda-projection'] = submitter.submit(
'--sub-task lda-projection',
name = 'lda_projection',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
**args.grid.projection_queue)
deps.append(job_ids['lda-projection'])
# train WCCN
if not args.skip_train_wccn:
job_ids['wccn-training'] = submitter.submit(
'--sub-task train-wccn',
name = 'train-wccn',
dependencies = deps,
**args.grid.training_queue)
deps.append(job_ids['wccn-training'])
# WCCN projection
if not args.skip_wccn_projection:
job_ids['wccn-projection'] = submitter.submit(
'--sub-task wccn-projection',
name = 'wccn_projection',
number_of_parallel_jobs = args.grid.number_of_projection_jobs,
dependencies = deps,
**args.grid.projection_queue)
deps.append(job_ids['wccn-projection'])
# train PLDA
if not args.skip_train_plda:
job_ids['plda-training'] = submitter.submit(
'--sub-task train-plda',
name = 'train-plda',
dependencies = deps,
**args.grid.training_queue)
deps.append(job_ids['plda-training'])
# train PLDA
job_ids['save-projector'] = submitter.submit(
'--sub-task save-projector',
name = 'save-projector',
dependencies = deps,
**args.grid.training_queue)
deps.append(job_ids['save-projector'])
return job_ids, deps return job_ids, deps
...@@ -134,7 +200,7 @@ def execute(args): ...@@ -134,7 +200,7 @@ def execute(args):
# now, check what we can do # now, check what we can do
algorithm = tools.base(args.algorithm) algorithm = tools.base(args.algorithm)
# the file selector object # the file selector object
fs = tools.FileSelector.instance() fs = tools.FileSelector.instance()
...@@ -161,7 +227,7 @@ def execute(args): ...@@ -161,7 +227,7 @@ def execute(args):
clean = args.clean_intermediate, clean = args.clean_intermediate,
force = args.force) force = args.force)
elif args.sub_task == 'ivector-project': elif args.sub_task == 'ivector-projection':
tools.ivector_project( tools.ivector_project(
algorithm, algorithm,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
...@@ -172,13 +238,54 @@ def execute(args): ...@@ -172,13 +238,54 @@ def execute(args):
algorithm, algorithm,
force = args.force) force = args.force)
else: elif args.sub_task == 'whitening-projection':
tools.whitening_project(
algorithm,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
elif args.sub_task == 'train-lda':
if algorithm.use_lda:
tools.train_lda(
algorithm,
force = args.force)
elif args.sub_task == 'lda-projection':
if algorithm.use_lda:
tools.lda_project(
algorithm,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
elif args.sub_task == 'train-wccn':
if algorithm.use_wccn:
tools.train_wccn(
algorithm,
force = args.force)
elif args.sub_task == 'wccn-projection':
if algorithm.use_wccn:
tools.wccn_project(
algorithm,
indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs),
force = args.force)
elif args.sub_task == 'train-plda':
if algorithm.use_plda:
tools.train_plda(
algorithm,
force = args.force)
elif args.sub_task == 'save-projector':
tools.save_projector(
algorithm,
force=args.force)
# Not our keyword... # Not our keyword...
else:
return False return False
return True return True
def verify(args, command_line_parameters, external_fake_job_id = 0): def verify(args, command_line_parameters, external_fake_job_id = 0):
"""This is the main entry point for computing verification experiments. """This is the main entry point for computing verification experiments.
You just have to specify configurations for any of the steps of the toolchain, which are: You just have to specify configurations for any of the steps of the toolchain, which are:
......
...@@ -36,11 +36,16 @@ def add_parallel_gmm_options(parsers, sub_module = None): ...@@ -36,11 +36,16 @@ def add_parallel_gmm_options(parsers, sub_module = None):
help = 'The sub-directory (relative to --temp-directory), where intermediate ivector files should be stored') help = 'The sub-directory (relative to --temp-directory), where intermediate ivector files should be stored')
sub_dir_group.add_argument('--projected-ivector-directory', default = 'projected_ivector_temp', sub_dir_group.add_argument('--projected-ivector-directory', default = 'projected_ivector_temp',
help = 'The sub-directory (relative to --temp-directory), where intermediate projected ivector training files should be stored') help = 'The sub-directory (relative to --temp-directory), where intermediate projected ivector training files should be stored')
sub_dir_group.add_argument('--whitened-directory', default = 'whitened_temp',
help = 'The sub-directory (relative to --temp-directory), where intermediate whitened ivector training files should be stored')
sub_dir_group.add_argument('--lda-projected-directory', default = 'lda_projected_temp',
help = 'The sub-directory (relative to --temp-directory), where intermediate LDA projected ivector training files should be stored')
sub_dir_group.add_argument('--wccn-projected-directory', default = 'wccn_projected_temp',
help = 'The sub-directory (relative to --temp-directory), where intermediate WCCN projected ivector training files should be stored')
flag_group.add_argument('-i', '--tv-start-iteration', type=int, default=0, flag_group.add_argument('-i', '--tv-start-iteration', type=int, default=0,
help = 'Specify the first iteration for the IVector training (i.e. to restart from there)') help = 'Specify the first iteration for the IVector training (i.e. to restart from there)')
# Functions to be added to the FileSelector class, once it is instantiated # Functions to be added to the FileSelector class, once it is instantiated
def _kmeans_intermediate_file(self, round): def _kmeans_intermediate_file(self, round):
return os.path.join(self.directories['kmeans'], 'round_%05d' % round, 'kmeans.hdf5') return os.path.join(self.directories['kmeans'], 'round_%05d' % round, 'kmeans.hdf5')
...@@ -91,5 +96,11 @@ def initialize_parallel_gmm(args, sub_module = None): ...@@ -91,5 +96,11 @@ def initialize_parallel_gmm(args, sub_module = None):
fs.directories['ivector'] = os.path.join(args.temp_directory, sub_dir, args.ivector_directory) fs.directories['ivector'] = os.path.join(args.temp_directory, sub_dir, args.ivector_directory)
fs.tv_file = os.path.join(args.temp_directory, sub_dir, "tv.hdf5") fs.tv_file = os.path.join(args.temp_directory, sub_dir, "tv.hdf5")
fs.directories['projected_ivector'] = os.path.join(args.temp_directory, sub_dir, args.projected_ivector_directory)
fs.whitener_file = os.path.join(args.temp_directory, sub_dir, "whitener.hdf5") fs.whitener_file = os.path.join(args.temp_directory, sub_dir, "whitener.hdf5")
fs.lda_file = os.path.join(args.temp_directory, sub_dir, "lda.hdf5")
fs.wccn_file = os.path.join(args.temp_directory, sub_dir, "wccm.hdf5")
fs.plda_file = os.path.join(args.temp_directory, sub_dir, "plda.hdf5")
fs.directories['projected_ivector'] = os.path.join(args.temp_directory, sub_dir, args.projected_ivector_directory)
fs.directories['whitened'] = os.path.join(args.temp_directory, sub_dir, args.whitened_directory)
fs.directories['lda_projected'] = os.path.join(args.temp_directory, sub_dir, args.lda_projected_directory)
fs.directories['wccn_projected'] = os.path.join(args.temp_directory, sub_dir, args.wccn_projected_directory)
...@@ -173,9 +173,144 @@ def train_whitener(algorithm, force=False): ...@@ -173,9 +173,144 @@ def train_whitener(algorithm, force=False):
bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file)) bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file))
bob.bio.base.save(algorithm.whitener, fs.whitener_file) bob.bio.base.save(algorithm.whitener, fs.whitener_file)
# finally, save the projector into one file
algorithm.load_ubm(fs.ubm_file) def whitening_project(algorithm, indices, force=False):
algorithm.load_tv(fs.tv_file) """Performs IVector projection"""
fs = FileSelector.instance()
algorithm.load_whitener(fs.whitener_file) algorithm.load_whitener(fs.whitener_file)
logger.info("Writing projector into file %s", fs.projector_file)
algorithm.save_projector(fs.projector_file) ivector_files = fs.training_list('projected_ivector', 'train_projector')
whitened_files = fs.training_list('whitened', 'train_projector')
logger.info("IVector training: whitening ivectors range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['projected_ivector'], fs.directories['whitened'])
# extract the features
for i in range(indices[0], indices[1]):
ivector_file = ivector_files[i]
whitened_file = whitened_files[i]
if not utils.check_file(whitened_file, force):
# load feature
ivector = algorithm.read_feature(ivector_file)
# project feature
whitened = algorithm.project_whitening(ivector)
# write it
bob.io.base.create_directories_safe(os.path.dirname(whitened_file))
bob.bio.base.save(whitened, whitened_file)
def train_lda(algorithm, force=False):
"""Train the feature projector with the extracted features of the world group."""
fs = FileSelector.instance()
if utils.check_file(fs.lda_file, force, 1000):
logger.info("- LDA projector '%s' already exists.", fs.lda_file)
else:
train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
# perform training
algorithm.train_lda(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file))
bob.bio.base.save(algorithm.lda, fs.lda_file)
def lda_project(algorithm, indices, force=False):
"""Performs IVector projection"""
fs = FileSelector.instance()
algorithm.load_lda(fs.lda_file)
whitened_files = fs.training_list('whitened', 'train_projector')
lda_projected_files = fs.training_list('lda_projected', 'train_projector')
logger.info("IVector training: LDA projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['whitened'], fs.directories['lda_projected'])
# extract the features
for i in range(indices[0], indices[1]):
ivector_file = whitened_files[i]
lda_projected_file = lda_projected_files[i]
if not utils.check_file(lda_projected_file, force):
# load feature
ivector = algorithm.read_feature(ivector_file)
# project feature
lda_projected = algorithm.project_lda(ivector)
# write it
bob.io.base.create_directories_safe(os.path.dirname(lda_projected_file))
bob.bio.base.save(lda_projected, lda_projected_file)
def train_wccn(algorithm, force=False):
"""Train the feature projector with the extracted features of the world group."""
fs = FileSelector.instance()
if utils.check_file(fs.wccn_file, force, 1000):
logger.info("- WCCN projector '%s' already exists.", fs.wccn_file)
else:
if algorithm.use_lda:
input_label = 'lda_projected'
else:
input_label = 'whitened'
train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
# perform training
algorithm.train_wccn(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.wccn_file))
bob.bio.base.save(algorithm.wccn, fs.wccn_file)
def wccn_project(algorithm, indices, force=False):
"""Performs IVector projection"""
fs = FileSelector.instance()
algorithm.load_wccn(fs.wccn_file)
if algorithm.use_lda:
input_label = 'lda_projected'
else:
input_label = 'whitened'
input_files = fs.training_list(input_label, 'train_projector')
wccn_projected_files = fs.training_list('wccn_projected', 'train_projector')
logger.info("IVector training: WCCN projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories[input_label], fs.directories['wccn_projected'])
# extract the features
for i in range(indices[0], indices[1]):
ivector_file = input_files[i]
wccn_projected_file = wccn_projected_files[i]
if not utils.check_file(wccn_projected_file, force):
# load feature
ivector = algorithm.read_feature(ivector_file)
# project feature
wccn_projected = algorithm.project_wccn(ivector)
# write it
bob.io.base.create_directories_safe(os.path.dirname(wccn_projected_file))
bob.bio.base.save(wccn_projected, wccn_projected_file)
def train_plda(algorithm, force=False):
"""Train the feature projector with the extracted features of the world group."""
fs = FileSelector.instance()
if utils.check_file(fs.plda_file, force, 1000):
logger.info("- PLDA projector '%s' already exists.", fs.plda_file)
else:
if algorithm.use_wccn:
input_label = 'wccn_projected'
elif algorithm.use_lda:
input_label = 'lda_projected'
else:
input_label = 'whitened'
train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True)
train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
# perform training
algorithm.train_plda(train_features)
bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file))
bob.bio.base.save(algorithm.plda_base, fs.plda_file)
def save_projector(algorithm, force=False):
fs = FileSelector.instance()
if utils.check_file(fs.projector_file, force, 1000):
logger.info("- Projector '%s' already exists.", fs.projector_file)
else:
# save the projector into one file
algorithm.load_ubm(fs.ubm_file)
algorithm.load_tv(fs.tv_file)
algorithm.load_whitener(fs.whitener_file)
if algorithm.use_lda:
algorithm.load_lda(fs.lda_file)
if algorithm.use_wccn:
algorithm.load_wccn(fs.wccn_file)
if algorithm.use_plda:
algorithm.load_plda(fs.plda_file)
logger.info("Writing projector into file %s", fs.projector_file)
algorithm.save_projector(fs.projector_file)
...@@ -119,11 +119,12 @@ setup( ...@@ -119,11 +119,12 @@ setup(
], ],
'bob.bio.algorithm': [ 'bob.bio.algorithm': [
'gmm = bob.bio.gmm.config.algorithm.gmm:algorithm', 'gmm = bob.bio.gmm.config.algorithm.gmm:algorithm',
'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm', 'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm',
'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm', 'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm',
'isv = bob.bio.gmm.config.algorithm.isv:algorithm', 'isv = bob.bio.gmm.config.algorithm.isv:algorithm',
'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm', 'ivector-cosine = bob.bio.gmm.config.algorithm.ivector_cosine:algorithm',
'ivector-lda-wccn-plda = bob.bio.gmm.config.algorithm.ivector_lda_wccn_plda:algorithm',
], ],
}, },
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment