Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.bio.gmm
Commits
cbb21ad1
Commit
cbb21ad1
authored
Oct 31, 2017
by
Manuel Günther
Browse files
Merge branch 'issue-21' into 'master'
Propagated --allow-missing-files to the UBM training See merge request
!13
parents
e8d4b0b2
b66ea6f3
Pipeline
#13610
canceled with stages
in 1 minute and 10 seconds
Changes
9
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
bob/bio/gmm/script/verify_gmm.py
View file @
cbb21ad1
...
...
@@ -158,6 +158,7 @@ def execute(args):
algorithm
,
args
.
extractor
,
args
.
limit_training_data
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
# train the feature projector
...
...
@@ -166,6 +167,7 @@ def execute(args):
algorithm
,
args
.
extractor
,
args
.
iteration
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'extracted'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
@@ -183,6 +185,7 @@ def execute(args):
algorithm
,
args
.
extractor
,
args
.
limit_training_data
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
# train the feature projector
...
...
@@ -191,6 +194,7 @@ def execute(args):
algorithm
,
args
.
extractor
,
args
.
iteration
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'extracted'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
bob/bio/gmm/script/verify_isv.py
View file @
cbb21ad1
...
...
@@ -83,6 +83,7 @@ def add_isv_jobs(args, job_ids, deps, submitter):
name
=
'pro-gmm'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'gmm-projection'
])
...
...
@@ -90,6 +91,7 @@ def add_isv_jobs(args, job_ids, deps, submitter):
'--sub-task train-isv'
,
name
=
'train-isv'
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'isv-training'
])
...
...
@@ -118,12 +120,14 @@ def execute(args):
algorithm
,
args
.
extractor
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'extracted'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
# train the feature projector
elif
args
.
sub_task
==
'train-isv'
:
tools
.
train_isv
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
else
:
...
...
bob/bio/gmm/script/verify_ivector.py
View file @
cbb21ad1
...
...
@@ -96,6 +96,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name
=
'i-e-%d'
%
iteration
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
[
job_ids
[
'ivector-m-step'
]]
if
iteration
!=
args
.
tv_start_iteration
else
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
# M-step
...
...
@@ -114,6 +115,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name
=
'pro-ivector'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'ivector-projection'
])
...
...
@@ -123,6 +125,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-whitener'
,
name
=
'train-whitener'
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'whitener-training'
])
...
...
@@ -133,6 +136,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name
=
'whitened'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'whitening-projection'
])
...
...
@@ -142,6 +146,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-lda'
,
name
=
'train-lda'
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'lda-training'
])
...
...
@@ -152,6 +157,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name
=
'lda_projection'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'lda-projection'
])
...
...
@@ -161,6 +167,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
'--sub-task train-wccn'
,
name
=
'train-wccn'
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'wccn-training'
])
...
...
@@ -171,6 +178,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
name
=
'wccn_projection'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
allow_missing_files
=
args
.
allow_missing_files
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'wccn-projection'
])
...
...
@@ -179,6 +187,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
job_ids
[
'plda-training'
]
=
submitter
.
submit
(
'--sub-task train-plda'
,
name
=
'train-plda'
,
allow_missing_files
=
args
.
allow_missing_files
,
dependencies
=
deps
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'plda-training'
])
...
...
@@ -216,12 +225,14 @@ def execute(args):
algorithm
,
args
.
extractor
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'extracted'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'ivector-e-step'
:
tools
.
ivector_estep
(
algorithm
,
args
.
iteration
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
@@ -237,17 +248,20 @@ def execute(args):
elif
args
.
sub_task
==
'ivector-projection'
:
tools
.
ivector_project
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
elif
args
.
sub_task
==
'train-whitener'
:
tools
.
train_whitener
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'whitening-projection'
:
tools
.
whitening_project
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
@@ -255,12 +269,14 @@ def execute(args):
if
algorithm
.
use_lda
:
tools
.
train_lda
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'lda-projection'
:
if
algorithm
.
use_lda
:
tools
.
lda_project
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
@@ -268,12 +284,14 @@ def execute(args):
if
algorithm
.
use_wccn
:
tools
.
train_wccn
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'wccn-projection'
:
if
algorithm
.
use_wccn
:
tools
.
wccn_project
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
...
...
@@ -281,6 +299,7 @@ def execute(args):
if
algorithm
.
use_plda
:
tools
.
train_plda
(
algorithm
,
allow_missing_files
=
args
.
allow_missing_files
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'save-projector'
:
...
...
bob/bio/gmm/test/test_scripts.py
View file @
cbb21ad1
...
...
@@ -22,7 +22,7 @@ from bob.bio.base.script.verify import main
data_dir
=
pkg_resources
.
resource_filename
(
'bob.bio.gmm'
,
'test/data'
)
def
_verify
(
parameters
,
test_dir
,
sub_dir
,
ref_modifier
=
""
,
score_modifier
=
(
'scores'
,
''
),
executable
=
main
):
def
_verify
(
parameters
,
test_dir
,
sub_dir
,
ref_modifier
=
""
,
score_modifier
=
(
'scores'
,
''
),
executable
=
main
,
allow_missing_files
=
False
):
try
:
executable
(
parameters
)
...
...
@@ -52,8 +52,10 @@ def _verify(parameters, test_dir, sub_dir, ref_modifier="", score_modifier=('sco
assert
d
[
0
].
shape
==
d
[
1
].
shape
# assert that the data order is still correct
assert
(
d
[
0
][:,
0
:
3
]
==
d
[
1
][:,
0
:
3
]).
all
()
# assert that the values are OK
assert
numpy
.
allclose
(
d
[
0
][:,
3
].
astype
(
float
),
d
[
1
][:,
3
].
astype
(
float
),
1e-5
)
if
not
allow_missing_files
:
# assert that the values are OK
assert
numpy
.
allclose
(
d
[
0
][:,
3
].
astype
(
float
),
d
[
1
][:,
3
].
astype
(
float
),
1e-5
)
finally
:
shutil
.
rmtree
(
test_dir
)
...
...
@@ -77,6 +79,26 @@ def test_gmm_sequential():
_verify
(
parameters
,
test_dir
,
'test_gmm_sequential'
,
ref_modifier
=
'-gmm'
)
def
test_gmm_sequential_missingfiles
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.GMM(2, 2, 2)'
,
'--zt-norm'
,
'-vs'
,
'test_gmm_sequential'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_gmm_sequential'
,
ref_modifier
=
'-gmm'
,
allow_missing_files
=
True
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_gmm_parallel
():
from
bob.bio.gmm.script.verify_gmm
import
main
...
...
@@ -100,6 +122,30 @@ def test_gmm_parallel():
_verify
(
parameters
,
test_dir
,
'test_gmm_parallel'
,
executable
=
main
,
ref_modifier
=
'-gmm'
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_gmm_parallel_missingfiles
():
from
bob.bio.gmm.script.verify_gmm
import
main
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
test_database
=
os
.
path
.
join
(
test_dir
,
"submitted.sql3"
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.GMM(2, 2, 2)'
,
'--import'
,
'bob.bio.gmm'
,
'bob.io.image'
,
'-g'
,
'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)'
,
'-G'
,
test_database
,
'--run-local-scheduler'
,
'--stop-on-failure'
,
'--clean-intermediate'
,
'--zt-norm'
,
'-vs'
,
'test_gmm_parallel'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_gmm_parallel'
,
executable
=
main
,
ref_modifier
=
'-gmm'
,
allow_missing_files
=
True
)
def
test_isv_sequential
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
...
...
@@ -118,6 +164,25 @@ def test_isv_sequential():
_verify
(
parameters
,
test_dir
,
'test_isv_sequential'
,
ref_modifier
=
'-isv'
)
def
test_isv_sequential_missingfiles
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)'
,
'--zt-norm'
,
'-vs'
,
'test_isv_sequential'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_isv_sequential'
,
ref_modifier
=
'-isv'
,
allow_missing_files
=
True
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_isv_parallel
():
from
bob.bio.gmm.script.verify_isv
import
main
...
...
@@ -141,6 +206,30 @@ def test_isv_parallel():
_verify
(
parameters
,
test_dir
,
'test_isv_parallel'
,
executable
=
main
,
ref_modifier
=
'-isv'
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_isv_parallel_missing_files
():
from
bob.bio.gmm.script.verify_isv
import
main
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
test_database
=
os
.
path
.
join
(
test_dir
,
"submitted.sql3"
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.ISV(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, isv_training_iterations=2)'
,
'--import'
,
'bob.bio.gmm'
,
'bob.io.image'
,
'-g'
,
'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)'
,
'-G'
,
test_database
,
'--run-local-scheduler'
,
'--stop-on-failure'
,
'--clean-intermediate'
,
'--zt-norm'
,
'-vs'
,
'test_isv_parallel'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_isv_parallel'
,
executable
=
main
,
ref_modifier
=
'-isv'
,
allow_missing_files
=
True
)
def
test_ivector_cosine_sequential
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
...
...
@@ -159,6 +248,25 @@ def test_ivector_cosine_sequential():
_verify
(
parameters
,
test_dir
,
'test_ivector_cosine_sequential'
,
ref_modifier
=
'-ivector-cosine'
)
def
test_ivector_cosine_sequential_missing_files
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)'
,
'--zt-norm'
,
'-vs'
,
'test_ivector_cosine_sequential'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_ivector_cosine_sequential'
,
ref_modifier
=
'-ivector-cosine'
,
allow_missing_files
=
True
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_ivector_cosine_parallel
():
from
bob.bio.gmm.script.verify_ivector
import
main
...
...
@@ -181,6 +289,32 @@ def test_ivector_cosine_parallel():
_verify
(
parameters
,
test_dir
,
'test_ivector_cosine_parallel'
,
executable
=
main
,
ref_modifier
=
'-ivector-cosine'
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_ivector_cosine_parallel_missing_files
():
from
bob.bio.gmm.script.verify_ivector
import
main
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
test_database
=
os
.
path
.
join
(
test_dir
,
"submitted.sql3"
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2)'
,
'--import'
,
'bob.bio.gmm'
,
'bob.io.image'
,
'-g'
,
'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)'
,
'-G'
,
test_database
,
'--run-local-scheduler'
,
'--stop-on-failure'
,
'--clean-intermediate'
,
'--zt-norm'
,
'-vs'
,
'test_ivector_cosine_parallel'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_ivector_cosine_parallel'
,
executable
=
main
,
ref_modifier
=
'-ivector-cosine'
,
allow_missing_files
=
True
)
def
test_ivector_lda_wccn_plda_sequential
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
# define dummy parameters
...
...
@@ -221,6 +355,30 @@ def test_ivector_lda_wccn_plda_parallel():
_verify
(
parameters
,
test_dir
,
'test_ivector_lda_wccn_plda_parallel'
,
executable
=
main
,
ref_modifier
=
'-ivector-lda-wccn-plda'
)
@
bob
.
bio
.
base
.
test
.
utils
.
grid_available
def
test_ivector_lda_wccn_plda_parallel_missing_files
():
from
bob.bio.gmm.script.verify_ivector
import
main
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
test_database
=
os
.
path
.
join
(
test_dir
,
"submitted.sql3"
)
# define dummy parameters
parameters
=
[
'-d'
,
'dummy'
,
'-p'
,
'bob.bio.base.test.dummy.preprocessor.DummyPreprocessor(return_none=True, probability_of_none=0.5)'
,
'-e'
,
'dummy2d'
,
'-a'
,
'bob.bio.gmm.algorithm.IVector(10, number_of_gaussians=2, kmeans_training_iterations=2, gmm_training_iterations=2, tv_training_iterations=2, use_lda=True, use_wccn=True, use_plda=True, lda_dim=2, plda_dim_F=2, plda_dim_G=2, plda_training_iterations=2)'
,
'--import'
,
'bob.bio.gmm'
,
'bob.io.image'
,
'-g'
,
'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)'
,
'-G'
,
test_database
,
'--run-local-scheduler'
,
'--stop-on-failure'
,
'--clean-intermediate'
,
'--zt-norm'
,
'-vs'
,
'test_ivector_lda_wccn_plda_parallel'
,
'--temp-directory'
,
test_dir
,
'--result-directory'
,
test_dir
,
'--preferred-package'
,
'bob.bio.gmm'
,
'--allow-missing-files'
]
_verify
(
parameters
,
test_dir
,
'test_ivector_lda_wccn_plda_parallel'
,
executable
=
main
,
ref_modifier
=
'-ivector-lda-wccn-plda'
,
allow_missing_files
=
True
)
def
test_internal_raises
():
test_dir
=
tempfile
.
mkdtemp
(
prefix
=
'bobtest_'
)
test_database
=
os
.
path
.
join
(
test_dir
,
"submitted.sql3"
)
...
...
@@ -253,3 +411,4 @@ def test_internal_raises():
nose
.
tools
.
assert_raises
(
ValueError
,
script
,
internal
)
shutil
.
rmtree
(
test_dir
)
bob/bio/gmm/tools/__init__.py
View file @
cbb21ad1
...
...
@@ -4,5 +4,6 @@ from .gmm import *
from
.isv
import
*
from
.ivector
import
*
# gets sphinx autodoc done right - don't remove it
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/bio/gmm/tools/gmm.py
View file @
cbb21ad1
...
...
@@ -12,7 +12,7 @@ from bob.bio.base import utils, tools
from
.utils
import
read_feature
def
kmeans_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
):
def
kmeans_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
,
allow_missing_files
=
False
):
"""Initializes the K-Means training (non-parallel)."""
fs
=
FileSelector
.
instance
()
...
...
@@ -27,7 +27,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
training_list
)
data
=
utils
.
vstack_features
(
reader
,
training_list
,
allow_missing_files
=
allow_missing_files
)
# Perform KMeans initialization
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
algorithm
.
gaussians
,
data
.
shape
[
1
])
...
...
@@ -38,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
logger
.
info
(
"UBM training: saved initial KMeans machine to '%s'"
,
output_file
)
def
kmeans_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
):
def
kmeans_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""Performs a single E-step of the K-Means algorithm (parallel)"""
if
indices
[
0
]
>=
indices
[
1
]:
return
...
...
@@ -62,7 +62,8 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
])))
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
])),
allow_missing_files
=
allow_missing_files
)
# Performs the E-step
trainer
=
algorithm
.
kmeans_trainer
...
...
@@ -162,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
def
gmm_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
):
def
gmm_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
,
allow_missing_files
=
False
):
"""Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
This might require a lot of memory."""
fs
=
FileSelector
.
instance
()
...
...
@@ -178,7 +179,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
# read the features
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
training_list
)
data
=
utils
.
vstack_features
(
reader
,
training_list
,
allow_missing_files
=
allow_missing_files
)
# get means and variances of kmeans result
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
bob
.
io
.
base
.
HDF5File
(
fs
.
kmeans_file
))
...
...
@@ -199,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
logger
.
info
(
"UBM Training: Wrote GMM file '%s'"
,
output_file
)
def
gmm_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
):
def
gmm_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""Performs a single E-step of the GMM training (parallel)."""
if
indices
[
0
]
>=
indices
[
1
]:
return
...
...
@@ -221,7 +222,8 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
])))
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
]))
,
allow_missing_files
=
allow_missing_files
)
trainer
=
algorithm
.
ubm_trainer
trainer
.
initialize
(
gmm_machine
,
None
)
...
...
@@ -294,27 +296,28 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
shutil
.
rmtree
(
old_dir
)
def
gmm_project
(
algorithm
,
extractor
,
indices
,
force
=
False
):
def
gmm_project
(
algorithm
,
extractor
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""Performs GMM projection"""
fs
=
FileSelector
.
instance
()
algorithm
.
load_ubm
(
fs
.
ubm_file
)
feature_files
=
fs
.
training_list
(
'extracted'
,
'train_projector'
)
projected_files
=
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
)
logger
.
info
(
"ISV training: Project features range (%d, %d) from '%s' to '%s'"
,
indices
[
0
],
indices
[
1
],
fs
.
directories
[
'extracted'
],
fs
.
directories
[
'projected_gmm'
])
# extract the features
for
i
in
range
(
indices
[
0
],
indices
[
1
]):
feature_file
=
feature_files
[
i
]
projected_file
=
projected_files
[
i
]
if
not
utils
.
check_file
(
projected_file
,
force
):
if
len
(
utils
.
filter_missing_files
([
feature_file
],
split_by_client
=
False
,
allow_missing_files
=
allow_missing_files
))
>
0
:
# load feature
feature
=
read_feature
(
extractor
,
feature_file
,
allow_missing_files
=
allow_missing_files
)
# project feature
projected
=
algorithm
.
project_ubm
(
feature
)
# write it
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
projected_file
))
bob
.
bio
.
base
.
save
(
projected
,
projected_file
)
if
not
utils
.
check_file
(
projected_file
,
force
):
# load feature
feature
=
read_feature
(
extractor
,
feature_file
)
# project feature
projected
=
algorithm
.
project_ubm
(
feature
)
# write it
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
projected_file
))
bob
.
bio
.
base
.
save
(
projected
,
projected_file
)
bob/bio/gmm/tools/isv.py
View file @
cbb21ad1
...
...
@@ -5,9 +5,10 @@ import bob.io.base
import
os
from
bob.bio.base.tools.FileSelector
import
FileSelector
from
bob.bio.base
import
utils
,
tools
from
bob.bio.base
import
utils
,
tools
def
train_isv
(
algorithm
,
force
=
False
):
def
train_isv
(
algorithm
,
force
=
False
,
allow_missing_files
=
False
):
"""Finally, the UBM is used to train the ISV projector/enroller."""
fs
=
FileSelector
.
instance
()
...
...
@@ -19,6 +20,7 @@ def train_isv(algorithm, force=False):
# read training data
training_list
=
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
,
arrange_by_client
=
True
)
training_list
=
utils
.
filter_missing_files
(
training_list
,
split_by_client
=
True
,
allow_missing_files
=
allow_missing_files
)
train_gmm_stats
=
[[
algorithm
.
read_gmm_stats
(
filename
)
for
filename
in
client_files
]
for
client_files
in
training_list
]
# perform ISV training
...
...
bob/bio/gmm/tools/ivector.py
View file @
cbb21ad1
...
...
@@ -9,8 +9,7 @@ from bob.bio.base.tools.FileSelector import FileSelector
from
bob.bio.base
import
utils
,
tools
def
ivector_estep
(
algorithm
,
iteration
,
indices
,
force
=
False
):
def
ivector_estep
(
algorithm
,
iteration
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""Performs a single E-step of the IVector algorithm (parallel)"""
fs
=
FileSelector
.
instance
()
stats_file
=
fs
.
ivector_stats_file
(
iteration
,
indices
[
0
],
indices
[
1
])
...
...
@@ -38,7 +37,9 @@ def ivector_estep(algorithm, iteration, indices, force=False):
# Load data
training_list
=
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
)
data
=
[
algorithm
.
read_gmm_stats
(
training_list
[
i
])
for
i
in
range
(
indices
[
0
],
indices
[
1
])]
training_list
=
[
training_list
[
i
]
for
i
in
range
(
indices
[
0
],
indices
[
1
])]
training_list
=
utils
.
filter_missing_files
(
training_list
,
split_by_client
=
False
,
allow_missing_files
=
allow_missing_files
)
data
=
[
algorithm
.
read_gmm_stats
(
f
)
for
f
in
training_list
]
# Perform the E-step
trainer
.
e_step
(
tv
,
data
)
...
...
@@ -134,7 +135,7 @@ def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cl
shutil
.
rmtree
(
old_dir
)
def
ivector_project
(
algorithm
,
indices
,
force
=
False
):
def
ivector_project
(
algorithm
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""Performs IVector projection"""
# read UBM and TV into the IVector class
fs
=
FileSelector
.
instance
()
...
...
@@ -149,17 +150,19 @@ def ivector_project(algorithm, indices, force=False):
for
i
in
range
(
indices
[
0
],
indices
[
1
]):
gmm_stats_file
=
gmm_stats_files
[
i
]
ivector_file
=
ivector_files
[
i
]
if
not
utils
.
check_file
(
ivector_file
,
force
):
# load feature
feature
=
algorithm
.
read_gmm_stats
(
gmm_stats_file
)
# project feature
projected
=
algorithm
.
project_ivector
(
feature
)