Commit 2a28c616 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Removed some unecessary stuff for the new generation of this package

parent 1e6fc46e
Pipeline #38941 failed with stage
in 3 minutes and 30 seconds
import os
import shutil
import tempfile
import numpy
import nose
import bob.io.image
import bob.bio.base
import bob.bio.gmm
import bob.bio.base.test.utils
from nose.plugins.skip import SkipTest
import pkg_resources
regenerate_reference = False
from bob.bio.base.script.verify import main
data_dir = pkg_resources.resource_filename('bob.bio.gmm', 'test/data')
def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('scores',''), executable = main, allow_missing_files=False):
try:
executable(parameters)
# assert that the score file exists
score_files = [os.path.join(test_dir, sub_dir, 'Default', norm, '%s-dev%s'%score_modifier) for norm in ('nonorm', 'ztnorm')]
assert os.path.exists(score_files[0]), "Score file %s does not exist" % score_files[0]
assert os.path.exists(score_files[1]), "Score file %s does not exist" % score_files[1]
# also assert that the scores are still the same -- though they have no real meaning
reference_files = [os.path.join(data_dir, 'scores-%s%s-dev'%(norm, ref_modifier)) for norm in ('nonorm', 'ztnorm')]
if regenerate_reference:
for i in (0,1):
shutil.copy(score_files[i], reference_files[i])
for i in (0,1):
d = []
# read reference and new data
for score_file in (score_files[i], reference_files[i]):
f = bob.bio.base.score.load.open_file(score_file)
d_ = []
for line in f:
if isinstance(line, bytes): line = line.decode('utf-8')
d_.append(line.rstrip().split())
d.append(numpy.array(d_))
assert d[0].shape == d[1].shape
# assert that the data order is still correct
assert (d[0][:,0:3] == d[1][:, 0:3]).all()
if not allow_missing_files:
# assert that the values are OK
assert numpy.allclose(d[0][:,3].astype(float), d[1][:,3].astype(float), 1e-5)
finally:
shutil.rmtree(test_dir)
def test_gmm_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)',
'--zt-norm',
'-vs', 'test_gmm_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm')
def test_gmm_sequential_missingfiles():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)',
'--zt-norm',
'-vs', 'test_gmm_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_gmm_sequential', ref_modifier='-gmm', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_gmm_parallel():
from bob.bio.gmm.script.verify_gmm import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_gmm_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm')
@bob.bio.base.test.utils.grid_available
def test_gmm_parallel_missingfiles():
from bob.bio.gmm.script.verify_gmm import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.GMM(2, 2, 2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_gmm_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_gmm_parallel', executable=main, ref_modifier='-gmm', allow_missing_files=True)
def test_isv_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_isv_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv')
def test_isv_sequential_missingfiles():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_isv_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_isv_sequential', ref_modifier='-isv', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_isv_parallel():
from bob.bio.gmm.script.verify_isv import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_isv_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv')
@bob.bio.base.test.utils.grid_available
def test_isv_parallel_missing_files():
from bob.bio.gmm.script.verify_isv import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_isv_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_isv_parallel', executable=main, ref_modifier='-isv', allow_missing_files=True)
def test_ivector_cosine_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_ivector_cosine_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine')
def test_ivector_cosine_sequential_missing_files():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)',
'--zt-norm',
'-vs', 'test_ivector_cosine_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_cosine_sequential', ref_modifier='-ivector-cosine', allow_missing_files=True)
@bob.bio.base.test.utils.grid_available
def test_ivector_cosine_parallel():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_cosine_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine')
@bob.bio.base.test.utils.grid_available
def test_ivector_cosine_parallel_missing_files():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_cosine_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_cosine_parallel', executable=main, ref_modifier='-ivector-cosine', allow_missing_files=True)
def test_ivector_lda_wccn_plda_sequential():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)',
'--zt-norm',
'-vs', 'test_ivector_lda_wccn_plda_sequential',
'--temp-directory', test_dir,
'--result-directory', test_dir
]
_verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_sequential', ref_modifier='-ivector-lda-wccn-plda')
@bob.bio.base.test.utils.grid_available
def test_ivector_lda_wccn_plda_parallel():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_lda_wccn_plda_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
_verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda')
@bob.bio.base.test.utils.grid_available
def test_ivector_lda_wccn_plda_parallel_missing_files():
from bob.bio.gmm.script.verify_ivector import main
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)',
'-e', 'dummy2d',
'-a', 'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)', '--import', 'bob.bio.gmm', 'bob.io.image',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_ivector_lda_wccn_plda_parallel',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm',
'--allow-missing-files'
]
_verify(parameters, test_dir, 'test_ivector_lda_wccn_plda_parallel', executable=main, ref_modifier='-ivector-lda-wccn-plda', allow_missing_files=True)
def test_internal_raises():
test_dir = tempfile.mkdtemp(prefix='bobtest_')
test_database = os.path.join(test_dir, "submitted.sql3")
# define dummy parameters
parameters = [
'-d', 'dummy',
'-p', 'dummy',
'-e', 'dummy2d',
'-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure',
'--import', 'bob.bio.gmm', 'bob.io.image',
'--clean-intermediate',
'--zt-norm',
'-vs', 'test_raises',
'--temp-directory', test_dir,
'--result-directory', test_dir,
'--preferred-package', 'bob.bio.gmm'
]
from bob.bio.gmm.script.verify_gmm import main as gmm
from bob.bio.gmm.script.verify_isv import main as isv
from bob.bio.gmm.script.verify_ivector import main as ivector
for script, algorithm in (
(gmm, "bob.bio.gmm.algorithm.GMM(2, 2, 2)"),
(isv, "bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)"),
(ivector, "bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)")):
for option, value in (("--iteration", "0"), ("--group", "dev"), ("--model-type", "N"), ("--score-type", "A")):
internal = parameters + ["--algorithm", algorithm, option, value]
nose.tools.assert_raises(ValueError, script, internal)
shutil.rmtree(test_dir)
......@@ -23,7 +23,6 @@ Users Guide
:maxdepth: 2
implementation
parallel
Reference Manual
================
......
.. _bob.bio.gmm.parallel:
==================================
Executing the Training in Parallel
==================================
Sometimes the training of the GMM-based models require a lot of time.
However, the training procedures can be parallelized, i.e., by running the E-steps of the EM loop in parallel.
For this purpose, we provide a set of scripts ``verify_gmm.py``, ``verify_isv.py`` and ``verify_ivector.py``.
These scripts integrate perfectly into the ``bob.bio`` packages.
Particularly, they have exactly the same set of options as documented in :ref:`bob.bio.base.experiments`.
In fact, the scripts above only run in parallelized mode, i.e., either the ``--grid`` or ``--parallel`` option is required.
During the submission of the jobs, several hundred jobs might be created (depending on the ``number_of_..._training_iterations`` that you specify in the :py:class:`bob.bio.gmm.algorithm.GMM` constructor).
However, after the training has finished, it is possible to use the normal ``verify.py`` script to run similar experiments, e.g., if you want to change the protocol of your experiment.
.. todo:: improve the documentation of the parallelized scripts.
......@@ -15,58 +15,4 @@ Miscellaneous functions
bob.bio.base.get_config
Tools to run recognition experiments
------------------------------------
Command line generation
~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.gmm.tools.add_parallel_gmm_options
bob.bio.gmm.tools.initialize_parallel_gmm
bob.bio.gmm.tools.add_jobs
Parallel GMM
~~~~~~~~~~~~
.. autosummary::
bob.bio.gmm.tools.kmeans_initialize
bob.bio.gmm.tools.kmeans_estep
bob.bio.gmm.tools.kmeans_mstep
bob.bio.gmm.tools.gmm_initialize
bob.bio.gmm.tools.gmm_estep
bob.bio.gmm.tools.gmm_mstep
bob.bio.gmm.tools.gmm_project
Parallel ISV
~~~~~~~~~~~~
.. autosummary::
bob.bio.gmm.tools.train_isv
Parallel I-Vector
~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.gmm.tools.ivector_estep
bob.bio.gmm.tools.ivector_mstep
bob.bio.gmm.tools.ivector_project
bob.bio.gmm.tools.train_whitener
Integration with bob.bio.video
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.gmm.tools.is_video_extension
bob.bio.gmm.tools.base
bob.bio.gmm.tools.read_feature
Details
-------
.. automodule:: bob.bio.gmm.tools
.. include:: links.rst
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment