diff --git a/bob/bio/gmm/algorithm/IVector.py b/bob/bio/gmm/algorithm/IVector.py index 5cb62b79349209d5ce8a821b2b10babf8932f817..5d98a53492abe623e6544e59251f779a615cb665 100644 --- a/bob/bio/gmm/algorithm/IVector.py +++ b/bob/bio/gmm/algorithm/IVector.py @@ -24,6 +24,13 @@ class IVector (GMM): subspace_dimension_of_t, # T subspace dimension tv_training_iterations = 25, # Number of EM iterations for the JFA training update_sigma = True, + use_lda = False, + use_wccn = False, + use_plda = False, + lda_dim = 50, + plda_dim_F = 50, + plda_dim_G = 50, + plda_training_iterations = 50, # parameters of the GMM **kwargs ): @@ -37,11 +44,18 @@ class IVector (GMM): performs_projection = True, use_projected_features_for_enrollment = True, requires_enroller_training = False, # not needed anymore because it's done while training the projector - split_training_features_by_client = False, + split_training_features_by_client = True, subspace_dimension_of_t = subspace_dimension_of_t, tv_training_iterations = tv_training_iterations, update_sigma = update_sigma, + use_lda = use_lda, + use_wccn = use_wccn, + use_plda = use_plda, + lda_dim = lda_dim, + plda_dim_F = plda_dim_F, + plda_dim_G = plda_dim_G, + plda_training_iterations = plda_training_iterations, multiple_model_scoring = None, multiple_probe_scoring = None, @@ -49,19 +63,29 @@ class IVector (GMM): ) self.update_sigma = update_sigma + self.use_lda = use_lda + self.use_wccn = use_wccn + self.use_plda = use_plda self.subspace_dimension_of_t = subspace_dimension_of_t self.tv_training_iterations = tv_training_iterations + self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma) self.whitening_trainer = bob.learn.linear.WhiteningTrainer() - - - def _check_projected(self, feature): + + self.lda_dim = lda_dim + self.lda_trainer = bob.learn.linear.FisherLDATrainer(strip_to_rank=False) + self.wccn_trainer = bob.learn.linear.WCCNTrainer() + self.plda_trainer = bob.learn.em.PLDATrainer() + self.plda_dim_F = plda_dim_F + self.plda_dim_G = plda_dim_G + self.plda_training_iterations = plda_training_iterations + + + + def _check_ivector(self, feature): """Checks that the features are appropriate""" if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64: raise ValueError("The given feature is not appropriate") - if self.whitener is not None and feature.shape[0] != self.whitener.shape[1]: - raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.whitener.shape[1], feature.shape[0])) - def train_ivector(self, training_stats): logger.info(" -> Training IVector enroller") @@ -72,31 +96,76 @@ class IVector (GMM): def train_whitener(self, training_features): + logger.info(" -> Training Whitening") ivectors_matrix = numpy.vstack(training_features) # create a Linear Machine self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1]) # create the whitening trainer self.whitening_trainer.train(ivectors_matrix, self.whitener) + def train_lda(self, training_features): + logger.info(" -> Training LDA projector") + self.lda, __eig_vals = self.lda_trainer.train(training_features) + # resize the machine if desired + if self.lda_dim: + self.lda.resize(self.lda.shape[0], self.lda_dim) + + def train_wccn(self, training_features): + logger.info(" -> Training WCCN projector") + self.wccn = self.wccn_trainer.train(training_features) + + def train_plda(self, training_features): + logger.info(" -> Training PLDA projector") + self.plda_trainer.init_f_method = 'BETWEEN_SCATTER' + self.plda_trainer.init_g_method = 'WITHIN_SCATTER' + self.plda_trainer.init_sigma_method = 'VARIANCE_DATA' + variance_flooring = 1e-5 + training_features = [numpy.vstack(client) for client in training_features] + input_dim = training_features[0].shape[1] + self.plda_base = bob.learn.em.PLDABase(input_dim, self.plda_dim_F, self.plda_dim_G, variance_flooring) + bob.learn.em.train(self.plda_trainer, self.plda_base, training_features, self.plda_training_iterations, rng=self.rng) + def train_projector(self, train_features, projector_file): """Train Projector and Enroller at the same time""" - [self._check_feature(feature) for feature in train_features] + + [self._check_feature(feature) for client in train_features for feature in client] + train_features_flatten = [feature for client in train_features for feature in client] # train UBM - data = numpy.vstack(train_features) + data = numpy.vstack(train_features_flatten) self.train_ubm(data) del data - # train IVector + # project training data logger.info(" -> Projecting training data") - training_stats = [self.project_ubm(feature) for feature in train_features] + train_gmm_stats = [[self.project_ubm(feature) for feature in client] for client in train_features] + train_gmm_stats_flatten = [stats for client in train_gmm_stats for stats in client] + # train IVector - self.train_ivector(training_stats) + logger.info(" -> Projecting training data") + self.train_ivector(train_gmm_stats_flatten) # project training i-vectors - whitening_train_data = [self.project_ivector(stats) for stats in training_stats] - self.train_whitener(whitening_train_data) + train_ivectors = [[self.project_ivector(stats) for stats in client] for client in train_gmm_stats] + train_ivectors_flatten = [stats for client in train_ivectors for stats in client] + + # Train Whitening + self.train_whitener(train_ivectors_flatten) + + # whitening and length-normalizing i-vectors + train_ivectors = [[self.project_whitening(ivec) for ivec in client] for client in train_ivectors] + + if self.use_lda: + self.train_lda(train_ivectors) + train_ivectors = [[self.project_lda(ivec) for ivec in client] for client in train_ivectors] + + if self.use_wccn: + self.train_wccn(train_ivectors) + train_ivectors = [[self.project_wccn(ivec) for ivec in client] for client in train_ivectors] + + if self.use_plda: + self.train_plda(train_ivectors) # save self.save_projector(projector_file) @@ -118,7 +187,25 @@ class IVector (GMM): hdf5file.create_group('Whitener') hdf5file.cd('Whitener') self.whitener.save(hdf5file) - + + if self.use_lda: + hdf5file.cd('/') + hdf5file.create_group('LDA') + hdf5file.cd('LDA') + self.lda.save(hdf5file) + + if self.use_wccn: + hdf5file.cd('/') + hdf5file.create_group('WCCN') + hdf5file.cd('WCCN') + self.wccn.save(hdf5file) + + if self.use_plda: + hdf5file.cd('/') + hdf5file.create_group('PLDA') + hdf5file.cd('PLDA') + self.plda_base.save(hdf5file) + def load_tv(self, tv_file): hdf5file = bob.io.base.HDF5File(tv_file) @@ -130,7 +217,19 @@ class IVector (GMM): hdf5file = bob.io.base.HDF5File(whitening_file) self.whitener = bob.learn.linear.Machine(hdf5file) - + def load_lda(self, lda_file): + hdf5file = bob.io.base.HDF5File(lda_file) + self.lda = bob.learn.linear.Machine(hdf5file) + + def load_wccn(self, wccn_file): + hdf5file = bob.io.base.HDF5File(wccn_file) + self.wccn = bob.learn.linear.Machine(hdf5file) + + def load_plda(self, plda_file): + hdf5file = bob.io.base.HDF5File(plda_file) + self.plda_base = bob.learn.em.PLDABase(hdf5file) + self.plda_machine = bob.learn.em.PLDAMachine(self.plda_base) + def load_projector(self, projector_file): """Load the GMM and the ISV model from the same HDF5 file""" hdf5file = bob.io.base.HDF5File(projector_file) @@ -146,6 +245,21 @@ class IVector (GMM): # Load Whitening hdf5file.cd('/Whitener') self.load_whitener(hdf5file) + + if self.use_lda: + # Load LDA + hdf5file.cd('/LDA') + self.load_lda(hdf5file) + + if self.use_wccn: + # Load WCCN + hdf5file.cd('/WCCN') + self.load_wccn(hdf5file) + + if self.use_plda: + # Load PLDA + hdf5file.cd('/PLDA') + self.load_plda(hdf5file) def project_ivector(self, gmm_stats): @@ -155,6 +269,16 @@ class IVector (GMM): whitened = self.whitener.forward(ivector) return whitened / numpy.linalg.norm(whitened) + def project_lda(self, ivector): + out_ivector = numpy.ndarray(self.lda.shape[1], numpy.float64) + self.lda(ivector, out_ivector) + return out_ivector + + def project_wccn(self, ivector): + out_ivector = numpy.ndarray(self.wccn.shape[1], numpy.float64) + self.wccn(ivector, out_ivector) + return out_ivector + ####################################################### ############## IVector projection ##################### def project(self, feature_array): @@ -165,10 +289,17 @@ class IVector (GMM): # project I-Vector ivector = self.project_ivector(projected_ubm) # whiten I-Vector - return self.project_whitening(ivector) + ivector = self.project_whitening(ivector) + # LDA projection + if self.use_lda: + ivector = self.project_lda(ivector) + # WCCN projection + if self.use_wccn: + ivector = self.project_wccn(ivector) + return ivector ####################################################### - ################## ISV model enroll #################### + ################## Read / Write I-Vectors #################### def write_feature(self, data, feature_file): """Saves the feature, which is the (whitened) I-Vector.""" bob.bio.base.save(data, feature_file) @@ -178,21 +309,28 @@ class IVector (GMM): return bob.bio.base.load(feature_file) - ####################################################### ################## Model Enrollment ################### def enroll(self, enroll_features): """Performs IVector enrollment""" - [self._check_projected(feature) for feature in enroll_features] - model = numpy.mean(numpy.vstack(enroll_features), axis=0) - return model + [self._check_ivector(feature) for feature in enroll_features] + average_ivector = numpy.mean(numpy.vstack(enroll_features), axis=0) + if self.use_plda: + average_ivector = average_ivector.reshape(1,-1) + self.plda_trainer.enroll(self.plda_machine, average_ivector) + return self.plda_machine + else: + return average_ivector ###################################################### ################ Feature comparison ################## def read_model(self, model_file): """Reads the whitened i-vector that holds the model""" - return bob.bio.base.load(model_file) + if self.use_plda: + return bob.learn.em.PLDAMachine(bob.io.base.HDF5File(str(model_file)), self.plda_base) + else: + return bob.bio.base.load(model_file) def read_probe(self, probe_file): """read probe file which is an i-vector""" @@ -200,13 +338,15 @@ class IVector (GMM): def score(self, model, probe): """Computes the score for the given model and the given probe.""" - self._check_projected(model) - self._check_projected(probe) - return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe)) + self._check_ivector(probe) + if self.use_plda: + return model.log_likelihood_ratio(probe) + else: + self._check_ivector(model) + return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe)) def score_for_multiple_probes(self, model, probes): """This function computes the score between the given model and several given probe files.""" - [self._check_projected(probe) for probe in probes] probe = numpy.mean(numpy.vstack(probes), axis=0) return self.score(model, probe) diff --git a/bob/bio/gmm/config/algorithm/ivector.py b/bob/bio/gmm/config/algorithm/ivector_cosine.py similarity index 100% rename from bob/bio/gmm/config/algorithm/ivector.py rename to bob/bio/gmm/config/algorithm/ivector_cosine.py diff --git a/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py b/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py new file mode 100644 index 0000000000000000000000000000000000000000..188ea35460fe120b33f5bb34ab49b48fc0d4b2f7 --- /dev/null +++ b/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py @@ -0,0 +1,17 @@ +import bob.bio.gmm + +algorithm = bob.bio.gmm.algorithm.IVector( + # IVector parameters + subspace_dimension_of_t = 100, + update_sigma = True, + tv_training_iterations = 25, # Number of EM iterations for the TV training + # GMM parameters + number_of_gaussians = 256, + use_lda = True, + use_wccn = True, + use_plda = True, + lda_dim = 50, + plda_dim_F = 50, + plda_dim_G = 50, + plda_training_iterations = 200, +) diff --git a/bob/bio/gmm/script/verify_ivector.py b/bob/bio/gmm/script/verify_ivector.py index fbe2f87fc89d3550f45d96da10819a5a411102f2..486ddd7d81978742c7146e74674cb15d14a558d9 100644 --- a/bob/bio/gmm/script/verify_ivector.py +++ b/bob/bio/gmm/script/verify_ivector.py @@ -33,7 +33,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): # Add sub-tasks that can be executed by this script parser = parsers['main'] parser.add_argument('--sub-task', - choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'gmm-project', 'ivector-e-step', 'ivector-m-step', 'ivector-project', 'train-whitener', 'project', 'enroll', 'compute-scores', 'concatenate'), + choices = ('preprocess', 'train-extractor', 'extract', 'normalize-features', 'kmeans-init', 'kmeans-e-step', 'kmeans-m-step', 'gmm-init', 'gmm-e-step', 'gmm-m-step', 'gmm-project', 'ivector-e-step', 'ivector-m-step', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection', 'project', 'train-plda', 'save-projector', 'enroll', 'compute-scores', 'concatenate'), help = argparse.SUPPRESS) #'Executes a subtask (FOR INTERNAL USE ONLY!!!)' parser.add_argument('--iteration', type = int, help = argparse.SUPPRESS) #'Which type of models to generate (Normal or TModels)' @@ -46,7 +46,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): # now that we have set up everything, get the command line arguments args = base_tools.initialize(parsers, command_line_parameters, - skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'ivector', 'whitening', 'projection', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration'] + skips = ['preprocessing', 'extractor-training', 'extraction', 'normalization', 'kmeans', 'gmm', 'ivector-training', 'ivector-projection', 'train-whitener', 'whitening-projection', 'train-lda', 'lda-projection', 'train-wccn', 'wccn-projection', 'projection', 'train-plda', 'enroller-training', 'enrollment', 'score-computation', 'concatenation', 'calibration'] ) args.skip_projector_training = True @@ -71,7 +71,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): # now, add the extra steps for ivector algorithm = tools.base(args.algorithm) - if not args.skip_ivector: + if not args.skip_ivector_training: # gmm projection job_ids['gmm-projection'] = submitter.submit( '--sub-task gmm-project', @@ -99,18 +99,19 @@ def add_ivector_jobs(args, job_ids, deps, submitter): **args.grid.training_queue) deps.append(job_ids['ivector-m-step']) - # whitening - if not args.skip_whitening: - # ivector projection + + # ivector projection + if not args.skip_ivector_projection: job_ids['ivector-projection'] = submitter.submit( - '--sub-task ivector-project', + '--sub-task ivector-projection', name = 'pro-ivector', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, **args.grid.projection_queue) deps.append(job_ids['ivector-projection']) - # TV training + # train whitener + if not args.skip_train_whitener: job_ids['whitener-training'] = submitter.submit( '--sub-task train-whitener', name = 'train-whitener', @@ -118,6 +119,71 @@ def add_ivector_jobs(args, job_ids, deps, submitter): **args.grid.training_queue) deps.append(job_ids['whitener-training']) + # whitening projection + if not args.skip_whitening_projection: + job_ids['whitening-projection'] = submitter.submit( + '--sub-task whitening-projection', + name = 'whitened', + number_of_parallel_jobs = args.grid.number_of_projection_jobs, + dependencies = deps, + **args.grid.projection_queue) + deps.append(job_ids['whitening-projection']) + + # train LDA + if not args.skip_train_lda: + job_ids['lda-training'] = submitter.submit( + '--sub-task train-lda', + name = 'train-lda', + dependencies = deps, + **args.grid.training_queue) + deps.append(job_ids['lda-training']) + + # LDA projection + if not args.skip_lda_projection: + job_ids['lda-projection'] = submitter.submit( + '--sub-task lda-projection', + name = 'lda_projection', + number_of_parallel_jobs = args.grid.number_of_projection_jobs, + dependencies = deps, + **args.grid.projection_queue) + deps.append(job_ids['lda-projection']) + + # train WCCN + if not args.skip_train_wccn: + job_ids['wccn-training'] = submitter.submit( + '--sub-task train-wccn', + name = 'train-wccn', + dependencies = deps, + **args.grid.training_queue) + deps.append(job_ids['wccn-training']) + + # WCCN projection + if not args.skip_wccn_projection: + job_ids['wccn-projection'] = submitter.submit( + '--sub-task wccn-projection', + name = 'wccn_projection', + number_of_parallel_jobs = args.grid.number_of_projection_jobs, + dependencies = deps, + **args.grid.projection_queue) + deps.append(job_ids['wccn-projection']) + + # train PLDA + if not args.skip_train_plda: + job_ids['plda-training'] = submitter.submit( + '--sub-task train-plda', + name = 'train-plda', + dependencies = deps, + **args.grid.training_queue) + deps.append(job_ids['plda-training']) + + # train PLDA + job_ids['save-projector'] = submitter.submit( + '--sub-task save-projector', + name = 'save-projector', + dependencies = deps, + **args.grid.training_queue) + deps.append(job_ids['save-projector']) + return job_ids, deps @@ -134,7 +200,7 @@ def execute(args): # now, check what we can do algorithm = tools.base(args.algorithm) - + # the file selector object fs = tools.FileSelector.instance() @@ -161,7 +227,7 @@ def execute(args): clean = args.clean_intermediate, force = args.force) - elif args.sub_task == 'ivector-project': + elif args.sub_task == 'ivector-projection': tools.ivector_project( algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), @@ -172,13 +238,54 @@ def execute(args): algorithm, force = args.force) - else: + elif args.sub_task == 'whitening-projection': + tools.whitening_project( + algorithm, + indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), + force = args.force) + + elif args.sub_task == 'train-lda': + if algorithm.use_lda: + tools.train_lda( + algorithm, + force = args.force) + + elif args.sub_task == 'lda-projection': + if algorithm.use_lda: + tools.lda_project( + algorithm, + indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), + force = args.force) + + elif args.sub_task == 'train-wccn': + if algorithm.use_wccn: + tools.train_wccn( + algorithm, + force = args.force) + + elif args.sub_task == 'wccn-projection': + if algorithm.use_wccn: + tools.wccn_project( + algorithm, + indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), + force = args.force) + + elif args.sub_task == 'train-plda': + if algorithm.use_plda: + tools.train_plda( + algorithm, + force = args.force) + + elif args.sub_task == 'save-projector': + tools.save_projector( + algorithm, + force=args.force) # Not our keyword... + else: return False return True - def verify(args, command_line_parameters, external_fake_job_id = 0): """This is the main entry point for computing verification experiments. You just have to specify configurations for any of the steps of the toolchain, which are: diff --git a/bob/bio/gmm/test/data/scores-nonorm-ivector-dev b/bob/bio/gmm/test/data/scores-nonorm-ivector-cosine-dev similarity index 100% rename from bob/bio/gmm/test/data/scores-nonorm-ivector-dev rename to bob/bio/gmm/test/data/scores-nonorm-ivector-cosine-dev diff --git a/bob/bio/gmm/test/data/scores-ztnorm-ivector-dev b/bob/bio/gmm/test/data/scores-ztnorm-ivector-cosine-dev similarity index 100% rename from bob/bio/gmm/test/data/scores-ztnorm-ivector-dev rename to bob/bio/gmm/test/data/scores-ztnorm-ivector-cosine-dev diff --git a/bob/bio/gmm/tools/command_line.py b/bob/bio/gmm/tools/command_line.py index a75937934b176b920ae63814901213a504d001b2..fa8d9304a6384d52eeb5dd5c156ddc0c80c42fb8 100644 --- a/bob/bio/gmm/tools/command_line.py +++ b/bob/bio/gmm/tools/command_line.py @@ -36,11 +36,16 @@ def add_parallel_gmm_options(parsers, sub_module = None): help = 'The sub-directory (relative to --temp-directory), where intermediate ivector files should be stored') sub_dir_group.add_argument('--projected-ivector-directory', default = 'projected_ivector_temp', help = 'The sub-directory (relative to --temp-directory), where intermediate projected ivector training files should be stored') + sub_dir_group.add_argument('--whitened-directory', default = 'whitened_temp', + help = 'The sub-directory (relative to --temp-directory), where intermediate whitened ivector training files should be stored') + sub_dir_group.add_argument('--lda-projected-directory', default = 'lda_projected_temp', + help = 'The sub-directory (relative to --temp-directory), where intermediate LDA projected ivector training files should be stored') + sub_dir_group.add_argument('--wccn-projected-directory', default = 'wccn_projected_temp', + help = 'The sub-directory (relative to --temp-directory), where intermediate WCCN projected ivector training files should be stored') flag_group.add_argument('-i', '--tv-start-iteration', type=int, default=0, help = 'Specify the first iteration for the IVector training (i.e. to restart from there)') - # Functions to be added to the FileSelector class, once it is instantiated def _kmeans_intermediate_file(self, round): return os.path.join(self.directories['kmeans'], 'round_%05d' % round, 'kmeans.hdf5') @@ -91,5 +96,11 @@ def initialize_parallel_gmm(args, sub_module = None): fs.directories['ivector'] = os.path.join(args.temp_directory, sub_dir, args.ivector_directory) fs.tv_file = os.path.join(args.temp_directory, sub_dir, "tv.hdf5") - fs.directories['projected_ivector'] = os.path.join(args.temp_directory, sub_dir, args.projected_ivector_directory) fs.whitener_file = os.path.join(args.temp_directory, sub_dir, "whitener.hdf5") + fs.lda_file = os.path.join(args.temp_directory, sub_dir, "lda.hdf5") + fs.wccn_file = os.path.join(args.temp_directory, sub_dir, "wccm.hdf5") + fs.plda_file = os.path.join(args.temp_directory, sub_dir, "plda.hdf5") + fs.directories['projected_ivector'] = os.path.join(args.temp_directory, sub_dir, args.projected_ivector_directory) + fs.directories['whitened'] = os.path.join(args.temp_directory, sub_dir, args.whitened_directory) + fs.directories['lda_projected'] = os.path.join(args.temp_directory, sub_dir, args.lda_projected_directory) + fs.directories['wccn_projected'] = os.path.join(args.temp_directory, sub_dir, args.wccn_projected_directory) diff --git a/bob/bio/gmm/tools/ivector.py b/bob/bio/gmm/tools/ivector.py index 56b2d9e1f03a0ff6517e9c496ec1066193bcec22..d132a920091e1767a1aca8658adb5339350e0153 100644 --- a/bob/bio/gmm/tools/ivector.py +++ b/bob/bio/gmm/tools/ivector.py @@ -173,9 +173,144 @@ def train_whitener(algorithm, force=False): bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file)) bob.bio.base.save(algorithm.whitener, fs.whitener_file) - # finally, save the projector into one file - algorithm.load_ubm(fs.ubm_file) - algorithm.load_tv(fs.tv_file) + +def whitening_project(algorithm, indices, force=False): + """Performs IVector projection""" + fs = FileSelector.instance() algorithm.load_whitener(fs.whitener_file) - logger.info("Writing projector into file %s", fs.projector_file) - algorithm.save_projector(fs.projector_file) + + ivector_files = fs.training_list('projected_ivector', 'train_projector') + whitened_files = fs.training_list('whitened', 'train_projector') + + logger.info("IVector training: whitening ivectors range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['projected_ivector'], fs.directories['whitened']) + # extract the features + for i in range(indices[0], indices[1]): + ivector_file = ivector_files[i] + whitened_file = whitened_files[i] + if not utils.check_file(whitened_file, force): + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + whitened = algorithm.project_whitening(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(whitened_file)) + bob.bio.base.save(whitened, whitened_file) + + +def train_lda(algorithm, force=False): + """Train the feature projector with the extracted features of the world group.""" + fs = FileSelector.instance() + if utils.check_file(fs.lda_file, force, 1000): + logger.info("- LDA projector '%s' already exists.", fs.lda_file) + else: + train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True) + train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training + algorithm.train_lda(train_features) + bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file)) + bob.bio.base.save(algorithm.lda, fs.lda_file) + +def lda_project(algorithm, indices, force=False): + """Performs IVector projection""" + fs = FileSelector.instance() + algorithm.load_lda(fs.lda_file) + + whitened_files = fs.training_list('whitened', 'train_projector') + lda_projected_files = fs.training_list('lda_projected', 'train_projector') + + logger.info("IVector training: LDA projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['whitened'], fs.directories['lda_projected']) + # extract the features + for i in range(indices[0], indices[1]): + ivector_file = whitened_files[i] + lda_projected_file = lda_projected_files[i] + if not utils.check_file(lda_projected_file, force): + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + lda_projected = algorithm.project_lda(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(lda_projected_file)) + bob.bio.base.save(lda_projected, lda_projected_file) + + +def train_wccn(algorithm, force=False): + """Train the feature projector with the extracted features of the world group.""" + fs = FileSelector.instance() + if utils.check_file(fs.wccn_file, force, 1000): + logger.info("- WCCN projector '%s' already exists.", fs.wccn_file) + else: + if algorithm.use_lda: + input_label = 'lda_projected' + else: + input_label = 'whitened' + train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) + train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training + algorithm.train_wccn(train_features) + bob.io.base.create_directories_safe(os.path.dirname(fs.wccn_file)) + bob.bio.base.save(algorithm.wccn, fs.wccn_file) + +def wccn_project(algorithm, indices, force=False): + """Performs IVector projection""" + fs = FileSelector.instance() + algorithm.load_wccn(fs.wccn_file) + if algorithm.use_lda: + input_label = 'lda_projected' + else: + input_label = 'whitened' + + input_files = fs.training_list(input_label, 'train_projector') + wccn_projected_files = fs.training_list('wccn_projected', 'train_projector') + + logger.info("IVector training: WCCN projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories[input_label], fs.directories['wccn_projected']) + # extract the features + for i in range(indices[0], indices[1]): + ivector_file = input_files[i] + wccn_projected_file = wccn_projected_files[i] + if not utils.check_file(wccn_projected_file, force): + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + wccn_projected = algorithm.project_wccn(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(wccn_projected_file)) + bob.bio.base.save(wccn_projected, wccn_projected_file) + + +def train_plda(algorithm, force=False): + """Train the feature projector with the extracted features of the world group.""" + fs = FileSelector.instance() + if utils.check_file(fs.plda_file, force, 1000): + logger.info("- PLDA projector '%s' already exists.", fs.plda_file) + else: + if algorithm.use_wccn: + input_label = 'wccn_projected' + elif algorithm.use_lda: + input_label = 'lda_projected' + else: + input_label = 'whitened' + train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) + train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training + algorithm.train_plda(train_features) + bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file)) + bob.bio.base.save(algorithm.plda_base, fs.plda_file) + + +def save_projector(algorithm, force=False): + fs = FileSelector.instance() + if utils.check_file(fs.projector_file, force, 1000): + logger.info("- Projector '%s' already exists.", fs.projector_file) + else: + # save the projector into one file + algorithm.load_ubm(fs.ubm_file) + algorithm.load_tv(fs.tv_file) + algorithm.load_whitener(fs.whitener_file) + if algorithm.use_lda: + algorithm.load_lda(fs.lda_file) + if algorithm.use_wccn: + algorithm.load_wccn(fs.wccn_file) + if algorithm.use_plda: + algorithm.load_plda(fs.plda_file) + logger.info("Writing projector into file %s", fs.projector_file) + algorithm.save_projector(fs.projector_file) diff --git a/setup.py b/setup.py index c03d3d5bc0c5c66576fea80a65f31a3dd5ae4bf1..5e93b9e14d5d5b47a3f6b00c76bf016253a951c0 100644 --- a/setup.py +++ b/setup.py @@ -119,11 +119,12 @@ setup( ], 'bob.bio.algorithm': [ - 'gmm = bob.bio.gmm.config.algorithm.gmm:algorithm', - 'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm', - 'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm', - 'isv = bob.bio.gmm.config.algorithm.isv:algorithm', - 'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm', + 'gmm = bob.bio.gmm.config.algorithm.gmm:algorithm', + 'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm', + 'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm', + 'isv = bob.bio.gmm.config.algorithm.isv:algorithm', + 'ivector-cosine = bob.bio.gmm.config.algorithm.ivector_cosine:algorithm', + 'ivector-lda-wccn-plda = bob.bio.gmm.config.algorithm.ivector_lda_wccn_plda:algorithm', ], },