From 5595dfc064e823d0ff1a4f7686d7d21a981a941c Mon Sep 17 00:00:00 2001 From: Manuel Guenther <manuel.guenther@idiap.ch> Date: Thu, 18 Jun 2015 16:07:06 +0200 Subject: [PATCH] Made parallel scripts usable for video experiments --- bob/bio/gmm/config/algorithm/gmm.py | 3 --- bob/bio/gmm/config/algorithm/isv.py | 2 -- bob/bio/gmm/script/verify_gmm.py | 21 ++++++++++++--------- bob/bio/gmm/script/verify_isv.py | 7 ++++--- bob/bio/gmm/script/verify_ivector.py | 17 ++++++++++------- bob/bio/gmm/tools/gmm.py | 11 ++++++----- bob/bio/gmm/tools/utils.py | 26 ++++++++++++++++++++++++++ buildout.cfg | 2 +- 8 files changed, 59 insertions(+), 30 deletions(-) diff --git a/bob/bio/gmm/config/algorithm/gmm.py b/bob/bio/gmm/config/algorithm/gmm.py index 592b30e..87b43ac 100644 --- a/bob/bio/gmm/config/algorithm/gmm.py +++ b/bob/bio/gmm/config/algorithm/gmm.py @@ -1,7 +1,4 @@ -#!/usr/bin/env python - import bob.bio.gmm -import numpy algorithm = bob.bio.gmm.algorithm.GMM( number_of_gaussians = 512, diff --git a/bob/bio/gmm/config/algorithm/isv.py b/bob/bio/gmm/config/algorithm/isv.py index 3ae069d..390e829 100644 --- a/bob/bio/gmm/config/algorithm/isv.py +++ b/bob/bio/gmm/config/algorithm/isv.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - import bob.bio.gmm algorithm = bob.bio.gmm.algorithm.ISV( diff --git a/bob/bio/gmm/script/verify_gmm.py b/bob/bio/gmm/script/verify_gmm.py index c1683b2..8501ecc 100644 --- a/bob/bio/gmm/script/verify_gmm.py +++ b/bob/bio/gmm/script/verify_gmm.py @@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): tools.initialize_parallel_gmm(args) # assert that the algorithm is a GMM - if args.algorithm.__class__ not in (algorithm.GMM, algorithm.GMMRegular): + if tools.base(args.algorithm).__class__ not in (algorithm.GMM, algorithm.GMMRegular): raise ValueError("The given algorithm %s is not a (pure) GMM algorithm" % type(args.algorithm)) return args @@ -63,6 +63,8 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): def add_gmm_jobs(args, job_ids, deps, submitter): """Adds all GMM-related jobs.""" + algorithm = tools.base(args.algorithm) + # KMeans if not args.skip_kmeans: # initialization @@ -75,7 +77,7 @@ def add_gmm_jobs(args, job_ids, deps, submitter): deps.append(job_ids['kmeans-init']) # several iterations of E and M steps - for iteration in range(args.kmeans_start_iteration, args.algorithm.kmeans_training_iterations): + for iteration in range(args.kmeans_start_iteration, algorithm.kmeans_training_iterations): # E-step job_ids['kmeans-e-step'] = submitter.submit( '--sub-task kmeans-e-step --iteration %d' % iteration, @@ -106,7 +108,7 @@ def add_gmm_jobs(args, job_ids, deps, submitter): deps.append(job_ids['gmm-init']) # several iterations of E and M steps - for iteration in range(args.gmm_start_iteration, args.algorithm.gmm_training_iterations): + for iteration in range(args.gmm_start_iteration, algorithm.gmm_training_iterations): # E-step job_ids['gmm-e-step'] = submitter.submit( '--sub-task gmm-e-step --iteration %d' % iteration, @@ -138,6 +140,7 @@ def execute(args): return True # now, check what we can do + algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() @@ -145,7 +148,7 @@ def execute(args): # train the feature projector if args.sub_task == 'kmeans-init': tools.kmeans_initialize( - args.algorithm, + algorithm, args.extractor, args.limit_training_data, force = args.force) @@ -153,7 +156,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'kmeans-e-step': tools.kmeans_estep( - args.algorithm, + algorithm, args.extractor, args.iteration, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), @@ -162,7 +165,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'kmeans-m-step': tools.kmeans_mstep( - args.algorithm, + algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, @@ -170,7 +173,7 @@ def execute(args): elif args.sub_task == 'gmm-init': tools.gmm_initialize( - args.algorithm, + algorithm, args.extractor, args.limit_training_data, force = args.force) @@ -178,7 +181,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'gmm-e-step': tools.gmm_estep( - args.algorithm, + algorithm, args.extractor, args.iteration, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), @@ -187,7 +190,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'gmm-m-step': tools.gmm_mstep( - args.algorithm, + algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, diff --git a/bob/bio/gmm/script/verify_isv.py b/bob/bio/gmm/script/verify_isv.py index cd65f55..1c55f65 100644 --- a/bob/bio/gmm/script/verify_isv.py +++ b/bob/bio/gmm/script/verify_isv.py @@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): tools.initialize_parallel_gmm(args, sub_module = 'isv') # assert that the algorithm is a GMM - if args.algorithm.__class__ != algorithm.ISV: + if tools.base(args.algorithm).__class__ != algorithm.ISV: raise ValueError("The given algorithm %s is not a (pure) ISV algorithm" % type(args.algorithm)) return args @@ -101,13 +101,14 @@ def execute(args): return True # now, check what we can do + algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project( - args.algorithm, + algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -115,7 +116,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'train-isv': tools.train_isv( - args.algorithm, + algorithm, force = args.force) else: diff --git a/bob/bio/gmm/script/verify_ivector.py b/bob/bio/gmm/script/verify_ivector.py index 5081c5a..fbe2f87 100644 --- a/bob/bio/gmm/script/verify_ivector.py +++ b/bob/bio/gmm/script/verify_ivector.py @@ -55,7 +55,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []): tools.initialize_parallel_gmm(args, sub_module = 'ivector') # assert that the algorithm is a GMM - if args.algorithm.__class__ != algorithm.IVector: + if tools.base(args.algorithm).__class__ != algorithm.IVector: raise ValueError("The given algorithm %s is not a (pure) IVector algorithm" % type(args.algorithm)) return args @@ -69,6 +69,8 @@ def add_ivector_jobs(args, job_ids, deps, submitter): job_ids, deps = add_gmm_jobs(args, job_ids, deps, submitter) # now, add the extra steps for ivector + algorithm = tools.base(args.algorithm) + if not args.skip_ivector: # gmm projection job_ids['gmm-projection'] = submitter.submit( @@ -80,7 +82,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): deps.append(job_ids['gmm-projection']) # several iterations of E and M steps - for iteration in range(args.tv_start_iteration, args.algorithm.tv_training_iterations): + for iteration in range(args.tv_start_iteration, algorithm.tv_training_iterations): # E-step job_ids['ivector-e-step'] = submitter.submit( '--sub-task ivector-e-step --iteration %d' % iteration, @@ -131,20 +133,21 @@ def execute(args): return True # now, check what we can do + algorithm = tools.base(args.algorithm) # the file selector object fs = tools.FileSelector.instance() if args.sub_task == 'gmm-project': tools.gmm_project( - args.algorithm, + algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'ivector-e-step': tools.ivector_estep( - args.algorithm, + algorithm, args.iteration, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -152,7 +155,7 @@ def execute(args): # train the feature projector elif args.sub_task == 'ivector-m-step': tools.ivector_mstep( - args.algorithm, + algorithm, args.iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, clean = args.clean_intermediate, @@ -160,13 +163,13 @@ def execute(args): elif args.sub_task == 'ivector-project': tools.ivector_project( - args.algorithm, + algorithm, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-whitener': tools.train_whitener( - args.algorithm, + algorithm, force = args.force) else: diff --git a/bob/bio/gmm/tools/gmm.py b/bob/bio/gmm/tools/gmm.py index fba7ee8..dd18cce 100644 --- a/bob/bio/gmm/tools/gmm.py +++ b/bob/bio/gmm/tools/gmm.py @@ -9,6 +9,7 @@ logger = logging.getLogger("bob.bio.gmm") from bob.bio.base.tools.FileSelector import FileSelector from bob.bio.base import utils, tools +from .utils import read_feature def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): @@ -23,7 +24,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): # read data logger.info("UBM training: initializing kmeans") training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data) - data = numpy.vstack([extractor.read_feature(feature_file) for feature_file in training_list]) + data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list]) # Perform KMeans initialization kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1]) @@ -55,7 +56,7 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False): logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)", iteration, *indices) # read data - data = numpy.vstack([extractor.read_feature(training_list[index]) for index in range(indices[0], indices[1])]) + data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])]) # Performs the E-step trainer = algorithm.kmeans_trainer @@ -169,7 +170,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): # read features training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data) - data = numpy.vstack([extractor.read_feature(feature_file) for feature_file in training_list]) + data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list]) # get means and variances of kmeans result kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file)) @@ -209,7 +210,7 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False): logger.info("UBM training: GMM E-Step from range(%d, %d)", indices) # read data - data = numpy.vstack([extractor.read_feature(training_list[index]) for index in range(indices[0], indices[1])]) + data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])]) trainer = algorithm.ubm_trainer trainer.initialize(gmm_machine, None) @@ -299,7 +300,7 @@ def gmm_project(algorithm, extractor, indices, force=False): if not utils.check_file(projected_file, force): # load feature - feature = extractor.read_feature(feature_file) + feature = read_feature(extractor, feature_file) # project feature projected = algorithm.project_ubm(feature) # write it diff --git a/bob/bio/gmm/tools/utils.py b/bob/bio/gmm/tools/utils.py index fe773e0..146fb50 100644 --- a/bob/bio/gmm/tools/utils.py +++ b/bob/bio/gmm/tools/utils.py @@ -1,4 +1,5 @@ import bob.bio.base +import numpy def add_jobs(args, submitter, local_job_adder): """Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed.""" @@ -47,3 +48,28 @@ def add_jobs(args, submitter, local_job_adder): setattr(args, "skip_%s" % key, original_skips[key]) return job_ids + + +def is_video_extension(algorithm): + try: + import bob.bio.video + if isinstance(algorithm, bob.bio.video.algorithm.Algorithm): + return True + except ImportError: + pass + return False + +def base(algorithm): + """Returns the base algorithm, if it is a video extension, otherwise returns the algorithm itself""" + return algorithm.algorithm if is_video_extension(algorithm) else algorithm + +def read_feature(extractor, feature_file): + feature = extractor.read_feature(feature_file) + try: + import bob.bio.video + if isinstance(extractor, bob.bio.video.extractor.Extractor): + assert isinstance(feature, bob.bio.video.FrameContainer) + return numpy.vstack([frame for _,frame,_ in feature]) + except ImportError: + pass + return feature diff --git a/buildout.cfg b/buildout.cfg index 3771d80..0710f05 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -14,7 +14,7 @@ develop = src/bob.bio.base . ; options for bob.buildout -debug = true +debug = false verbose = true newest = false -- GitLab