diff --git a/bob/bio/gmm/script/verify_gmm.py b/bob/bio/gmm/script/verify_gmm.py index 5f7a69c269c7d1fd36f8332531b5c23d874d0ee2..98064b53a275e104a1b0a9a31d2e7c561c5c48b5 100644 --- a/bob/bio/gmm/script/verify_gmm.py +++ b/bob/bio/gmm/script/verify_gmm.py @@ -158,6 +158,7 @@ def execute(args): algorithm, args.extractor, args.limit_training_data, + allow_missing_files = args.allow_missing_files, force = args.force) # train the feature projector @@ -166,6 +167,7 @@ def execute(args): algorithm, args.extractor, args.iteration, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -183,6 +185,7 @@ def execute(args): algorithm, args.extractor, args.limit_training_data, + allow_missing_files = args.allow_missing_files, force = args.force) # train the feature projector @@ -191,6 +194,7 @@ def execute(args): algorithm, args.extractor, args.iteration, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) diff --git a/bob/bio/gmm/script/verify_isv.py b/bob/bio/gmm/script/verify_isv.py index ce43d6fe1e9b473d0ed30172217a17331868e6b7..38867302082100de0a46a57f37b008db6976d571 100644 --- a/bob/bio/gmm/script/verify_isv.py +++ b/bob/bio/gmm/script/verify_isv.py @@ -83,6 +83,7 @@ def add_isv_jobs(args, job_ids, deps, submitter): name = 'pro-gmm', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) deps.append(job_ids['gmm-projection']) @@ -90,6 +91,7 @@ def add_isv_jobs(args, job_ids, deps, submitter): '--sub-task train-isv', name = 'train-isv', dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.training_queue) deps.append(job_ids['isv-training']) @@ -118,12 +120,14 @@ def execute(args): algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), + allow_missing_files = args.allow_missing_files, force = args.force) # train the feature projector elif args.sub_task == 'train-isv': tools.train_isv( algorithm, + allow_missing_files = args.allow_missing_files, force = args.force) else: diff --git a/bob/bio/gmm/script/verify_ivector.py b/bob/bio/gmm/script/verify_ivector.py index bafedfc1c40e13bd8b7caf4ec268f6d779f2b413..84363b2cce4555b8965aab7f876fcc9e8ba12099 100644 --- a/bob/bio/gmm/script/verify_ivector.py +++ b/bob/bio/gmm/script/verify_ivector.py @@ -96,6 +96,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): name='i-e-%d' % iteration, number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = [job_ids['ivector-m-step']] if iteration != args.tv_start_iteration else deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) # M-step @@ -114,6 +115,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): name = 'pro-ivector', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) deps.append(job_ids['ivector-projection']) @@ -123,6 +125,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): '--sub-task train-whitener', name = 'train-whitener', dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.training_queue) deps.append(job_ids['whitener-training']) @@ -133,6 +136,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): name = 'whitened', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) deps.append(job_ids['whitening-projection']) @@ -142,6 +146,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): '--sub-task train-lda', name = 'train-lda', dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.training_queue) deps.append(job_ids['lda-training']) @@ -152,6 +157,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): name = 'lda_projection', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) deps.append(job_ids['lda-projection']) @@ -161,6 +167,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): '--sub-task train-wccn', name = 'train-wccn', dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.training_queue) deps.append(job_ids['wccn-training']) @@ -171,6 +178,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): name = 'wccn_projection', number_of_parallel_jobs = args.grid.number_of_projection_jobs, dependencies = deps, + allow_missing_files = args.allow_missing_files, **args.grid.projection_queue) deps.append(job_ids['wccn-projection']) @@ -179,6 +187,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter): job_ids['plda-training'] = submitter.submit( '--sub-task train-plda', name = 'train-plda', + allow_missing_files = args.allow_missing_files, dependencies = deps, **args.grid.training_queue) deps.append(job_ids['plda-training']) @@ -216,12 +225,14 @@ def execute(args): algorithm, args.extractor, indices = base_tools.indices(fs.training_list('extracted', 'train_projector'), args.grid.number_of_projection_jobs), + allow_missing_files = args.allow_missing_files, force = args.force) elif args.sub_task == 'ivector-e-step': tools.ivector_estep( algorithm, args.iteration, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -237,17 +248,20 @@ def execute(args): elif args.sub_task == 'ivector-projection': tools.ivector_project( algorithm, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) elif args.sub_task == 'train-whitener': tools.train_whitener( algorithm, + allow_missing_files = args.allow_missing_files, force = args.force) elif args.sub_task == 'whitening-projection': tools.whitening_project( algorithm, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -255,12 +269,14 @@ def execute(args): if algorithm.use_lda: tools.train_lda( algorithm, + allow_missing_files = args.allow_missing_files, force = args.force) elif args.sub_task == 'lda-projection': if algorithm.use_lda: tools.lda_project( algorithm, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -268,12 +284,14 @@ def execute(args): if algorithm.use_wccn: tools.train_wccn( algorithm, + allow_missing_files = args.allow_missing_files, force = args.force) elif args.sub_task == 'wccn-projection': if algorithm.use_wccn: tools.wccn_project( algorithm, + allow_missing_files = args.allow_missing_files, indices = base_tools.indices(fs.training_list('projected_gmm', 'train_projector'), args.grid.number_of_projection_jobs), force = args.force) @@ -281,6 +299,7 @@ def execute(args): if algorithm.use_plda: tools.train_plda( algorithm, + allow_missing_files = args.allow_missing_files, force = args.force) elif args.sub_task == 'save-projector': diff --git a/bob/bio/gmm/test/test_scripts.py b/bob/bio/gmm/test/test_scripts.py index 603e609ffbb64f812a0eebb648266178a46e66ea..75b20c337cab4cabae7298e4bbec410e24988fc1 100644 --- a/bob/bio/gmm/test/test_scripts.py +++ b/bob/bio/gmm/test/test_scripts.py @@ -22,7 +22,7 @@ from bob.bio.base.script.verify import main data_dir = pkg_resources.resource_filename('bob.bio.gmm', 'test/data') -def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), executable = main): +def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), executable = main, allow_missing_files=False): try: executable(parameters) @@ -52,8 +52,10 @@ def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('sco assert d[0].shape == d[1].shape # assert that the data order is still correct assert (d[0][:,0:3] == d[1][:, 0:3]).all() - # assert that the values are OK - assert numpy.allclose(d[0][:,3].astype(float), d[1][:,3].astype(float), 1e-5) + + if not allow_missing_files: + # assert that the values are OK + assert numpy.allclose(d[0][:,3].astype(float), d[1][:,3].astype(float), 1e-5) finally: shutil.rmtree(test_dir) @@ -77,6 +79,26 @@ def test_gmm_sequential(): _verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm') + +def test_gmm_sequential_missingfiles(): + test_dir = tempfile.mkdtemp(prefix='bobtest_') + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)', + '--zt-norm', + '-vs', 'test_gmm_sequential', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm', allow_missing_files=True) + + @bob.bio.base.test.utils.grid_available def test_gmm_parallel(): from bob.bio.gmm.script.verify_gmm import main @@ -100,6 +122,30 @@ def test_gmm_parallel(): _verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm') +@bob.bio.base.test.utils.grid_available +def test_gmm_parallel_missingfiles(): + from bob.bio.gmm.script.verify_gmm import main + test_dir = tempfile.mkdtemp(prefix='bobtest_') + test_database = os.path.join(test_dir, "submitted.sql3") + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)', '--import', 'bob.bio.gmm', 'bob.io.image', + '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', + '--clean-intermediate', + '--zt-norm', + '-vs', 'test_gmm_parallel', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm', allow_missing_files=True) + + def test_isv_sequential(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # define dummy parameters @@ -118,6 +164,25 @@ def test_isv_sequential(): _verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv') +def test_isv_sequential_missingfiles(): + test_dir = tempfile.mkdtemp(prefix='bobtest_') + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)', + '--zt-norm', + '-vs', 'test_isv_sequential', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv', allow_missing_files=True) + + @bob.bio.base.test.utils.grid_available def test_isv_parallel(): from bob.bio.gmm.script.verify_isv import main @@ -141,6 +206,30 @@ def test_isv_parallel(): _verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv') +@bob.bio.base.test.utils.grid_available +def test_isv_parallel_missing_files(): + from bob.bio.gmm.script.verify_isv import main + test_dir = tempfile.mkdtemp(prefix='bobtest_') + test_database = os.path.join(test_dir, "submitted.sql3") + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image', + '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', + '--clean-intermediate', + '--zt-norm', + '-vs', 'test_isv_parallel', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv', allow_missing_files=True) + + def test_ivector_cosine_sequential(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # define dummy parameters @@ -159,6 +248,25 @@ def test_ivector_cosine_sequential(): _verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine') +def test_ivector_cosine_sequential_missing_files(): + test_dir = tempfile.mkdtemp(prefix='bobtest_') + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)', + '--zt-norm', + '-vs', 'test_ivector_cosine_sequential', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine', allow_missing_files=True) + + @bob.bio.base.test.utils.grid_available def test_ivector_cosine_parallel(): from bob.bio.gmm.script.verify_ivector import main @@ -181,6 +289,32 @@ def test_ivector_cosine_parallel(): _verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine') + +@bob.bio.base.test.utils.grid_available +def test_ivector_cosine_parallel_missing_files(): + from bob.bio.gmm.script.verify_ivector import main + test_dir = tempfile.mkdtemp(prefix='bobtest_') + + test_database = os.path.join(test_dir, "submitted.sql3") + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image', + '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', + '--clean-intermediate', + '--zt-norm', + '-vs', 'test_ivector_cosine_parallel', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine', allow_missing_files=True) + + def test_ivector_lda_wccn_plda_sequential(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # define dummy parameters @@ -221,6 +355,30 @@ def test_ivector_lda_wccn_plda_parallel(): _verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda') +@bob.bio.base.test.utils.grid_available +def test_ivector_lda_wccn_plda_parallel_missing_files(): + from bob.bio.gmm.script.verify_ivector import main + test_dir = tempfile.mkdtemp(prefix='bobtest_') + test_database = os.path.join(test_dir, "submitted.sql3") + # define dummy parameters + parameters = [ + '-d', 'dummy', + '-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)', + '-e', 'dummy2d', + '-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image', + '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', + '--clean-intermediate', + '--zt-norm', + '-vs', 'test_ivector_lda_wccn_plda_parallel', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.gmm', + '--allow-missing-files' + ] + + _verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda', allow_missing_files=True) + + def test_internal_raises(): test_dir = tempfile.mkdtemp(prefix='bobtest_') test_database = os.path.join(test_dir, "submitted.sql3") @@ -253,3 +411,4 @@ def test_internal_raises(): nose.tools.assert_raises(ValueError, script, internal) shutil.rmtree(test_dir) + diff --git a/bob/bio/gmm/tools/__init__.py b/bob/bio/gmm/tools/__init__.py index b42c342b5c0da565f2957efff645dd3e2fb2bde6..cc4d173f08619a8ae3146d1e9d84af4a2ef7c036 100644 --- a/bob/bio/gmm/tools/__init__.py +++ b/bob/bio/gmm/tools/__init__.py @@ -4,5 +4,6 @@ from .gmm import * from .isv import * from .ivector import * + # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/gmm/tools/gmm.py b/bob/bio/gmm/tools/gmm.py index 27c3f65391ca90ac5082e4a6f8562ce321975c31..26a75e8c83d2ff3301533bf8d116d9afd848c408 100644 --- a/bob/bio/gmm/tools/gmm.py +++ b/bob/bio/gmm/tools/gmm.py @@ -12,7 +12,7 @@ from bob.bio.base import utils, tools from .utils import read_feature -def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): +def kmeans_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False): """Initializes the K-Means training (non-parallel).""" fs = FileSelector.instance() @@ -27,7 +27,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): # read the features reader = functools.partial(read_feature, extractor) - data = utils.vstack_features(reader, training_list) + data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files) # Perform KMeans initialization kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1]) @@ -38,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False): logger.info("UBM training: saved initial KMeans machine to '%s'", output_file) -def kmeans_estep(algorithm, extractor, iteration, indices, force=False): +def kmeans_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False): """Performs a single E-step of the K-Means algorithm (parallel)""" if indices[0] >= indices[1]: return @@ -62,7 +62,8 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False): reader = functools.partial(read_feature, extractor) data = utils.vstack_features( reader, - (training_list[index] for index in range(indices[0], indices[1]))) + (training_list[index] for index in range(indices[0], indices[1])), + allow_missing_files=allow_missing_files) # Performs the E-step trainer = algorithm.kmeans_trainer @@ -162,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle -def gmm_initialize(algorithm, extractor, limit_data = None, force = False): +def gmm_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False): """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel). This might require a lot of memory.""" fs = FileSelector.instance() @@ -178,7 +179,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): # read the features reader = functools.partial(read_feature, extractor) - data = utils.vstack_features(reader, training_list) + data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files) # get means and variances of kmeans result kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file)) @@ -199,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False): logger.info("UBM Training: Wrote GMM file '%s'", output_file) -def gmm_estep(algorithm, extractor, iteration, indices, force=False): +def gmm_estep(algorithm, extractor, iteration, indices, force=False, allow_missing_files = False): """Performs a single E-step of the GMM training (parallel).""" if indices[0] >= indices[1]: return @@ -221,7 +222,8 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False): reader = functools.partial(read_feature, extractor) data = utils.vstack_features( reader, - (training_list[index] for index in range(indices[0], indices[1]))) + (training_list[index] for index in range(indices[0], indices[1])) + , allow_missing_files=allow_missing_files) trainer = algorithm.ubm_trainer trainer.initialize(gmm_machine, None) @@ -294,27 +296,28 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean= shutil.rmtree(old_dir) -def gmm_project(algorithm, extractor, indices, force=False): +def gmm_project(algorithm, extractor, indices, force=False, allow_missing_files = False): """Performs GMM projection""" fs = FileSelector.instance() - algorithm.load_ubm(fs.ubm_file) feature_files = fs.training_list('extracted', 'train_projector') projected_files = fs.training_list('projected_gmm', 'train_projector') - + logger.info("ISV training: Project features range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['extracted'], fs.directories['projected_gmm']) # extract the features for i in range(indices[0], indices[1]): feature_file = feature_files[i] projected_file = projected_files[i] + + if not utils.check_file(projected_file, force): + if len(utils.filter_missing_files([feature_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0: + # load feature + feature = read_feature(extractor, feature_file, allow_missing_files=allow_missing_files) + # project feature + projected = algorithm.project_ubm(feature) + # write it + bob.io.base.create_directories_safe(os.path.dirname(projected_file)) + bob.bio.base.save(projected, projected_file) - if not utils.check_file(projected_file, force): - # load feature - feature = read_feature(extractor, feature_file) - # project feature - projected = algorithm.project_ubm(feature) - # write it - bob.io.base.create_directories_safe(os.path.dirname(projected_file)) - bob.bio.base.save(projected, projected_file) diff --git a/bob/bio/gmm/tools/isv.py b/bob/bio/gmm/tools/isv.py index eba6062f111f275d4070f0f94c9583572d24e512..da28f9590c7b1bd8f86cb63f9e74557c3095dc8b 100644 --- a/bob/bio/gmm/tools/isv.py +++ b/bob/bio/gmm/tools/isv.py @@ -5,9 +5,10 @@ import bob.io.base import os from bob.bio.base.tools.FileSelector import FileSelector -from bob.bio.base import utils, tools +from bob.bio.base import utils, tools -def train_isv(algorithm, force=False): + +def train_isv(algorithm, force=False, allow_missing_files=False): """Finally, the UBM is used to train the ISV projector/enroller.""" fs = FileSelector.instance() @@ -19,6 +20,7 @@ def train_isv(algorithm, force=False): # read training data training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True) + training_list = utils.filter_missing_files(training_list, split_by_client=True, allow_missing_files=allow_missing_files) train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list] # perform ISV training diff --git a/bob/bio/gmm/tools/ivector.py b/bob/bio/gmm/tools/ivector.py index d132a920091e1767a1aca8658adb5339350e0153..af5f5fb493a4b4587acce47af59738c376c46b5e 100644 --- a/bob/bio/gmm/tools/ivector.py +++ b/bob/bio/gmm/tools/ivector.py @@ -9,8 +9,7 @@ from bob.bio.base.tools.FileSelector import FileSelector from bob.bio.base import utils, tools - -def ivector_estep(algorithm, iteration, indices, force=False): +def ivector_estep(algorithm, iteration, indices, force=False, allow_missing_files = False): """Performs a single E-step of the IVector algorithm (parallel)""" fs = FileSelector.instance() stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1]) @@ -38,7 +37,9 @@ def ivector_estep(algorithm, iteration, indices, force=False): # Load data training_list = fs.training_list('projected_gmm', 'train_projector') - data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])] + training_list = [training_list[i] for i in range(indices[0], indices[1])] + training_list = utils.filter_missing_files(training_list, split_by_client=False, allow_missing_files=allow_missing_files) + data = [algorithm.read_gmm_stats(f) for f in training_list] # Perform the E-step trainer.e_step(tv, data) @@ -134,7 +135,7 @@ def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cl shutil.rmtree(old_dir) -def ivector_project(algorithm, indices, force=False): +def ivector_project(algorithm, indices, force=False, allow_missing_files=False): """Performs IVector projection""" # read UBM and TV into the IVector class fs = FileSelector.instance() @@ -149,17 +150,19 @@ def ivector_project(algorithm, indices, force=False): for i in range(indices[0], indices[1]): gmm_stats_file = gmm_stats_files[i] ivector_file = ivector_files[i] - if not utils.check_file(ivector_file, force): - # load feature - feature = algorithm.read_gmm_stats(gmm_stats_file) - # project feature - projected = algorithm.project_ivector(feature) - # write it - bob.io.base.create_directories_safe(os.path.dirname(ivector_file)) - bob.bio.base.save(projected, ivector_file) + + if not utils.check_file(ivector_file, force): + if len(utils.filter_missing_files([gmm_stats_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0: + # load feature + feature = algorithm.read_gmm_stats(gmm_stats_file) + # project feature + projected = algorithm.project_ivector(feature) + # write it + bob.io.base.create_directories_safe(os.path.dirname(ivector_file)) + bob.bio.base.save(projected, ivector_file) -def train_whitener(algorithm, force=False): +def train_whitener(algorithm, force=False, allow_missing_files=False): """Train the feature projector with the extracted features of the world group.""" fs = FileSelector.instance() @@ -167,14 +170,16 @@ def train_whitener(algorithm, force=False): logger.info("- Whitening projector '%s' already exists.", fs.whitener_file) else: train_files = fs.training_list('projected_ivector', 'train_projector') + train_files = utils.filter_missing_files(train_files, split_by_client=False, allow_missing_files=allow_missing_files) train_features = [bob.bio.base.load(f) for f in train_files] + # perform training algorithm.train_whitener(train_features) bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file)) bob.bio.base.save(algorithm.whitener, fs.whitener_file) -def whitening_project(algorithm, indices, force=False): +def whitening_project(algorithm, indices, force=False, allow_missing_files=False): """Performs IVector projection""" fs = FileSelector.instance() algorithm.load_whitener(fs.whitener_file) @@ -187,30 +192,33 @@ def whitening_project(algorithm, indices, force=False): for i in range(indices[0], indices[1]): ivector_file = ivector_files[i] whitened_file = whitened_files[i] - if not utils.check_file(whitened_file, force): - # load feature - ivector = algorithm.read_feature(ivector_file) - # project feature - whitened = algorithm.project_whitening(ivector) - # write it - bob.io.base.create_directories_safe(os.path.dirname(whitened_file)) - bob.bio.base.save(whitened, whitened_file) + if not utils.check_file(whitened_file, force): + if len(utils.filter_missing_files([ivector_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0: + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + whitened = algorithm.project_whitening(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(whitened_file)) + bob.bio.base.save(whitened, whitened_file) -def train_lda(algorithm, force=False): +def train_lda(algorithm, force=False, allow_missing_files=False): """Train the feature projector with the extracted features of the world group.""" fs = FileSelector.instance() if utils.check_file(fs.lda_file, force, 1000): logger.info("- LDA projector '%s' already exists.", fs.lda_file) else: train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True) + train_files = utils.filter_missing_files(train_files, split_by_client=True, allow_missing_files=allow_missing_files) train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training algorithm.train_lda(train_features) bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file)) bob.bio.base.save(algorithm.lda, fs.lda_file) -def lda_project(algorithm, indices, force=False): +def lda_project(algorithm, indices, force=False, allow_missing_files=False): """Performs IVector projection""" fs = FileSelector.instance() algorithm.load_lda(fs.lda_file) @@ -224,16 +232,17 @@ def lda_project(algorithm, indices, force=False): ivector_file = whitened_files[i] lda_projected_file = lda_projected_files[i] if not utils.check_file(lda_projected_file, force): - # load feature - ivector = algorithm.read_feature(ivector_file) - # project feature - lda_projected = algorithm.project_lda(ivector) - # write it - bob.io.base.create_directories_safe(os.path.dirname(lda_projected_file)) - bob.bio.base.save(lda_projected, lda_projected_file) + if len(utils.filter_missing_files([ivector_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0: + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + lda_projected = algorithm.project_lda(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(lda_projected_file)) + bob.bio.base.save(lda_projected, lda_projected_file) -def train_wccn(algorithm, force=False): +def train_wccn(algorithm, force=False, allow_missing_files=False): """Train the feature projector with the extracted features of the world group.""" fs = FileSelector.instance() if utils.check_file(fs.wccn_file, force, 1000): @@ -244,13 +253,15 @@ def train_wccn(algorithm, force=False): else: input_label = 'whitened' train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) + train_files = utils.filter_missing_files(train_files, split_by_client=True, allow_missing_files=allow_missing_files) train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training algorithm.train_wccn(train_features) bob.io.base.create_directories_safe(os.path.dirname(fs.wccn_file)) bob.bio.base.save(algorithm.wccn, fs.wccn_file) -def wccn_project(algorithm, indices, force=False): +def wccn_project(algorithm, indices, force=False, allow_missing_files=False): """Performs IVector projection""" fs = FileSelector.instance() algorithm.load_wccn(fs.wccn_file) @@ -267,17 +278,18 @@ def wccn_project(algorithm, indices, force=False): for i in range(indices[0], indices[1]): ivector_file = input_files[i] wccn_projected_file = wccn_projected_files[i] - if not utils.check_file(wccn_projected_file, force): - # load feature - ivector = algorithm.read_feature(ivector_file) - # project feature - wccn_projected = algorithm.project_wccn(ivector) - # write it - bob.io.base.create_directories_safe(os.path.dirname(wccn_projected_file)) - bob.bio.base.save(wccn_projected, wccn_projected_file) + if not utils.check_file(wccn_projected_file, force): + if len(utils.filter_missing_files([ivector_file], split_by_client=False, allow_missing_files=allow_missing_files)) > 0: + # load feature + ivector = algorithm.read_feature(ivector_file) + # project feature + wccn_projected = algorithm.project_wccn(ivector) + # write it + bob.io.base.create_directories_safe(os.path.dirname(wccn_projected_file)) + bob.bio.base.save(wccn_projected, wccn_projected_file) -def train_plda(algorithm, force=False): +def train_plda(algorithm, force=False, allow_missing_files=False): """Train the feature projector with the extracted features of the world group.""" fs = FileSelector.instance() if utils.check_file(fs.plda_file, force, 1000): @@ -290,7 +302,9 @@ def train_plda(algorithm, force=False): else: input_label = 'whitened' train_files = fs.training_list(input_label, 'train_projector', arrange_by_client = True) + train_files = utils.filter_missing_files(train_files, split_by_client=True, allow_missing_files=allow_missing_files) train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files] + # perform training algorithm.train_plda(train_features) bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file)) diff --git a/bob/bio/gmm/tools/utils.py b/bob/bio/gmm/tools/utils.py index 326fd15d06a57faf42c1e75c2dd385fb55979023..044c5d964b69fd5d24b283c1eebc08fc29c214a7 100644 --- a/bob/bio/gmm/tools/utils.py +++ b/bob/bio/gmm/tools/utils.py @@ -1,5 +1,9 @@ import bob.bio.base import numpy +import os + +import logging +logger = logging.getLogger("bob.bio.gmm") def add_jobs(args, submitter, local_job_adder): """Adds all (desired) jobs of the tool chain to the grid, or to the local list to be executed.""" @@ -63,7 +67,8 @@ def base(algorithm): """Returns the base algorithm, if it is a video extension, otherwise returns the algorithm itself""" return algorithm.algorithm if is_video_extension(algorithm) else algorithm -def read_feature(extractor, feature_file): + +def read_feature(extractor, feature_file, allow_missing_files = False): feature = extractor.read_feature(feature_file) try: import bob.bio.video @@ -73,3 +78,4 @@ def read_feature(extractor, feature_file): except ImportError: pass return feature +