Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.bio.gmm
Commits
78f69063
Commit
78f69063
authored
Jul 02, 2015
by
Elie KHOURY
Browse files
added post processing steps for I-Vector (sequential and parallel)
parent
e48dd818
Changes
9
Hide whitespace changes
Inline
Side-by-side
bob/bio/gmm/algorithm/IVector.py
View file @
78f69063
...
...
@@ -24,6 +24,13 @@ class IVector (GMM):
subspace_dimension_of_t
,
# T subspace dimension
tv_training_iterations
=
25
,
# Number of EM iterations for the JFA training
update_sigma
=
True
,
use_lda
=
False
,
use_wccn
=
False
,
use_plda
=
False
,
lda_dim
=
50
,
plda_dim_F
=
50
,
plda_dim_G
=
50
,
plda_training_iterations
=
50
,
# parameters of the GMM
**
kwargs
):
...
...
@@ -37,11 +44,18 @@ class IVector (GMM):
performs_projection
=
True
,
use_projected_features_for_enrollment
=
True
,
requires_enroller_training
=
False
,
# not needed anymore because it's done while training the projector
split_training_features_by_client
=
Fals
e
,
split_training_features_by_client
=
Tru
e
,
subspace_dimension_of_t
=
subspace_dimension_of_t
,
tv_training_iterations
=
tv_training_iterations
,
update_sigma
=
update_sigma
,
use_lda
=
use_lda
,
use_wccn
=
use_wccn
,
use_plda
=
use_plda
,
lda_dim
=
lda_dim
,
plda_dim_F
=
plda_dim_F
,
plda_dim_G
=
plda_dim_G
,
plda_training_iterations
=
plda_training_iterations
,
multiple_model_scoring
=
None
,
multiple_probe_scoring
=
None
,
...
...
@@ -49,19 +63,29 @@ class IVector (GMM):
)
self
.
update_sigma
=
update_sigma
self
.
use_lda
=
use_lda
self
.
use_wccn
=
use_wccn
self
.
use_plda
=
use_plda
self
.
subspace_dimension_of_t
=
subspace_dimension_of_t
self
.
tv_training_iterations
=
tv_training_iterations
self
.
ivector_trainer
=
bob
.
learn
.
em
.
IVectorTrainer
(
update_sigma
=
update_sigma
)
self
.
whitening_trainer
=
bob
.
learn
.
linear
.
WhiteningTrainer
()
def
_check_projected
(
self
,
feature
):
self
.
lda_dim
=
lda_dim
self
.
lda_trainer
=
bob
.
learn
.
linear
.
FisherLDATrainer
(
strip_to_rank
=
False
)
self
.
wccn_trainer
=
bob
.
learn
.
linear
.
WCCNTrainer
()
self
.
plda_trainer
=
bob
.
learn
.
em
.
PLDATrainer
()
self
.
plda_dim_F
=
plda_dim_F
self
.
plda_dim_G
=
plda_dim_G
self
.
plda_training_iterations
=
plda_training_iterations
def
_check_ivector
(
self
,
feature
):
"""Checks that the features are appropriate"""
if
not
isinstance
(
feature
,
numpy
.
ndarray
)
or
feature
.
ndim
!=
1
or
feature
.
dtype
!=
numpy
.
float64
:
raise
ValueError
(
"The given feature is not appropriate"
)
if
self
.
whitener
is
not
None
and
feature
.
shape
[
0
]
!=
self
.
whitener
.
shape
[
1
]:
raise
ValueError
(
"The given feature is expected to have %d elements, but it has %d"
%
(
self
.
whitener
.
shape
[
1
],
feature
.
shape
[
0
]))
def
train_ivector
(
self
,
training_stats
):
logger
.
info
(
" -> Training IVector enroller"
)
...
...
@@ -72,31 +96,76 @@ class IVector (GMM):
def
train_whitener
(
self
,
training_features
):
logger
.
info
(
" -> Training Whitening"
)
ivectors_matrix
=
numpy
.
vstack
(
training_features
)
# create a Linear Machine
self
.
whitener
=
bob
.
learn
.
linear
.
Machine
(
ivectors_matrix
.
shape
[
1
],
ivectors_matrix
.
shape
[
1
])
# create the whitening trainer
self
.
whitening_trainer
.
train
(
ivectors_matrix
,
self
.
whitener
)
def
train_lda
(
self
,
training_features
):
logger
.
info
(
" -> Training LDA projector"
)
self
.
lda
,
__eig_vals
=
self
.
lda_trainer
.
train
(
training_features
)
# resize the machine if desired
if
self
.
lda_dim
:
self
.
lda
.
resize
(
self
.
lda
.
shape
[
0
],
self
.
lda_dim
)
def
train_wccn
(
self
,
training_features
):
logger
.
info
(
" -> Training WCCN projector"
)
self
.
wccn
=
self
.
wccn_trainer
.
train
(
training_features
)
def
train_plda
(
self
,
training_features
):
logger
.
info
(
" -> Training PLDA projector"
)
self
.
plda_trainer
.
init_f_method
=
'BETWEEN_SCATTER'
self
.
plda_trainer
.
init_g_method
=
'WITHIN_SCATTER'
self
.
plda_trainer
.
init_sigma_method
=
'VARIANCE_DATA'
variance_flooring
=
1e-5
training_features
=
[
numpy
.
vstack
(
client
)
for
client
in
training_features
]
input_dim
=
training_features
[
0
].
shape
[
1
]
self
.
plda_base
=
bob
.
learn
.
em
.
PLDABase
(
input_dim
,
self
.
plda_dim_F
,
self
.
plda_dim_G
,
variance_flooring
)
bob
.
learn
.
em
.
train
(
self
.
plda_trainer
,
self
.
plda_base
,
training_features
,
self
.
plda_training_iterations
,
rng
=
self
.
rng
)
def
train_projector
(
self
,
train_features
,
projector_file
):
"""Train Projector and Enroller at the same time"""
[
self
.
_check_feature
(
feature
)
for
feature
in
train_features
]
[
self
.
_check_feature
(
feature
)
for
client
in
train_features
for
feature
in
client
]
train_features_flatten
=
[
feature
for
client
in
train_features
for
feature
in
client
]
# train UBM
data
=
numpy
.
vstack
(
train_features
)
data
=
numpy
.
vstack
(
train_features
_flatten
)
self
.
train_ubm
(
data
)
del
data
#
train IVector
#
project training data
logger
.
info
(
" -> Projecting training data"
)
training_stats
=
[
self
.
project_ubm
(
feature
)
for
feature
in
train_features
]
train_gmm_stats
=
[[
self
.
project_ubm
(
feature
)
for
feature
in
client
]
for
client
in
train_features
]
train_gmm_stats_flatten
=
[
stats
for
client
in
train_gmm_stats
for
stats
in
client
]
# train IVector
self
.
train_ivector
(
training_stats
)
logger
.
info
(
" -> Projecting training data"
)
self
.
train_ivector
(
train_gmm_stats_flatten
)
# project training i-vectors
whitening_train_data
=
[
self
.
project_ivector
(
stats
)
for
stats
in
training_stats
]
self
.
train_whitener
(
whitening_train_data
)
train_ivectors
=
[[
self
.
project_ivector
(
stats
)
for
stats
in
client
]
for
client
in
train_gmm_stats
]
train_ivectors_flatten
=
[
stats
for
client
in
train_ivectors
for
stats
in
client
]
# Train Whitening
self
.
train_whitener
(
train_ivectors_flatten
)
# whitening and length-normalizing i-vectors
train_ivectors
=
[[
self
.
project_whitening
(
ivec
)
for
ivec
in
client
]
for
client
in
train_ivectors
]
if
self
.
use_lda
:
self
.
train_lda
(
train_ivectors
)
train_ivectors
=
[[
self
.
project_lda
(
ivec
)
for
ivec
in
client
]
for
client
in
train_ivectors
]
if
self
.
use_wccn
:
self
.
train_wccn
(
train_ivectors
)
train_ivectors
=
[[
self
.
project_wccn
(
ivec
)
for
ivec
in
client
]
for
client
in
train_ivectors
]
if
self
.
use_plda
:
self
.
train_plda
(
train_ivectors
)
# save
self
.
save_projector
(
projector_file
)
...
...
@@ -118,7 +187,25 @@ class IVector (GMM):
hdf5file
.
create_group
(
'Whitener'
)
hdf5file
.
cd
(
'Whitener'
)
self
.
whitener
.
save
(
hdf5file
)
if
self
.
use_lda
:
hdf5file
.
cd
(
'/'
)
hdf5file
.
create_group
(
'LDA'
)
hdf5file
.
cd
(
'LDA'
)
self
.
lda
.
save
(
hdf5file
)
if
self
.
use_wccn
:
hdf5file
.
cd
(
'/'
)
hdf5file
.
create_group
(
'WCCN'
)
hdf5file
.
cd
(
'WCCN'
)
self
.
wccn
.
save
(
hdf5file
)
if
self
.
use_plda
:
hdf5file
.
cd
(
'/'
)
hdf5file
.
create_group
(
'PLDA'
)
hdf5file
.
cd
(
'PLDA'
)
self
.
plda_base
.
save
(
hdf5file
)
def
load_tv
(
self
,
tv_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
tv_file
)
...
...
@@ -130,7 +217,19 @@ class IVector (GMM):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
whitening_file
)
self
.
whitener
=
bob
.
learn
.
linear
.
Machine
(
hdf5file
)
def
load_lda
(
self
,
lda_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
lda_file
)
self
.
lda
=
bob
.
learn
.
linear
.
Machine
(
hdf5file
)
def
load_wccn
(
self
,
wccn_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
wccn_file
)
self
.
wccn
=
bob
.
learn
.
linear
.
Machine
(
hdf5file
)
def
load_plda
(
self
,
plda_file
):
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
plda_file
)
self
.
plda_base
=
bob
.
learn
.
em
.
PLDABase
(
hdf5file
)
self
.
plda_machine
=
bob
.
learn
.
em
.
PLDAMachine
(
self
.
plda_base
)
def
load_projector
(
self
,
projector_file
):
"""Load the GMM and the ISV model from the same HDF5 file"""
hdf5file
=
bob
.
io
.
base
.
HDF5File
(
projector_file
)
...
...
@@ -146,6 +245,21 @@ class IVector (GMM):
# Load Whitening
hdf5file
.
cd
(
'/Whitener'
)
self
.
load_whitener
(
hdf5file
)
if
self
.
use_lda
:
# Load LDA
hdf5file
.
cd
(
'/LDA'
)
self
.
load_lda
(
hdf5file
)
if
self
.
use_wccn
:
# Load WCCN
hdf5file
.
cd
(
'/WCCN'
)
self
.
load_wccn
(
hdf5file
)
if
self
.
use_plda
:
# Load PLDA
hdf5file
.
cd
(
'/PLDA'
)
self
.
load_plda
(
hdf5file
)
def
project_ivector
(
self
,
gmm_stats
):
...
...
@@ -155,6 +269,16 @@ class IVector (GMM):
whitened
=
self
.
whitener
.
forward
(
ivector
)
return
whitened
/
numpy
.
linalg
.
norm
(
whitened
)
def
project_lda
(
self
,
ivector
):
out_ivector
=
numpy
.
ndarray
(
self
.
lda
.
shape
[
1
],
numpy
.
float64
)
self
.
lda
(
ivector
,
out_ivector
)
return
out_ivector
def
project_wccn
(
self
,
ivector
):
out_ivector
=
numpy
.
ndarray
(
self
.
wccn
.
shape
[
1
],
numpy
.
float64
)
self
.
wccn
(
ivector
,
out_ivector
)
return
out_ivector
#######################################################
############## IVector projection #####################
def
project
(
self
,
feature_array
):
...
...
@@ -165,10 +289,17 @@ class IVector (GMM):
# project I-Vector
ivector
=
self
.
project_ivector
(
projected_ubm
)
# whiten I-Vector
return
self
.
project_whitening
(
ivector
)
ivector
=
self
.
project_whitening
(
ivector
)
# LDA projection
if
self
.
use_lda
:
ivector
=
self
.
project_lda
(
ivector
)
# WCCN projection
if
self
.
use_wccn
:
ivector
=
self
.
project_wccn
(
ivector
)
return
ivector
#######################################################
##################
ISV model enroll
####################
##################
Read / Write I-Vectors
####################
def
write_feature
(
self
,
data
,
feature_file
):
"""Saves the feature, which is the (whitened) I-Vector."""
bob
.
bio
.
base
.
save
(
data
,
feature_file
)
...
...
@@ -178,21 +309,28 @@ class IVector (GMM):
return
bob
.
bio
.
base
.
load
(
feature_file
)
#######################################################
################## Model Enrollment ###################
def
enroll
(
self
,
enroll_features
):
"""Performs IVector enrollment"""
[
self
.
_check_projected
(
feature
)
for
feature
in
enroll_features
]
model
=
numpy
.
mean
(
numpy
.
vstack
(
enroll_features
),
axis
=
0
)
return
model
[
self
.
_check_ivector
(
feature
)
for
feature
in
enroll_features
]
average_ivector
=
numpy
.
mean
(
numpy
.
vstack
(
enroll_features
),
axis
=
0
)
if
self
.
use_plda
:
average_ivector
=
average_ivector
.
reshape
(
1
,
-
1
)
self
.
plda_trainer
.
enroll
(
self
.
plda_machine
,
average_ivector
)
return
self
.
plda_machine
else
:
return
average_ivector
######################################################
################ Feature comparison ##################
def
read_model
(
self
,
model_file
):
"""Reads the whitened i-vector that holds the model"""
return
bob
.
bio
.
base
.
load
(
model_file
)
if
self
.
use_plda
:
return
bob
.
learn
.
em
.
PLDAMachine
(
bob
.
io
.
base
.
HDF5File
(
str
(
model_file
)),
self
.
plda_base
)
else
:
return
bob
.
bio
.
base
.
load
(
model_file
)
def
read_probe
(
self
,
probe_file
):
"""read probe file which is an i-vector"""
...
...
@@ -200,13 +338,15 @@ class IVector (GMM):
def
score
(
self
,
model
,
probe
):
"""Computes the score for the given model and the given probe."""
self
.
_check_projected
(
model
)
self
.
_check_projected
(
probe
)
return
numpy
.
dot
(
model
/
numpy
.
linalg
.
norm
(
model
),
probe
/
numpy
.
linalg
.
norm
(
probe
))
self
.
_check_ivector
(
probe
)
if
self
.
use_plda
:
return
model
.
log_likelihood_ratio
(
probe
)
else
:
self
.
_check_ivector
(
model
)
return
numpy
.
dot
(
model
/
numpy
.
linalg
.
norm
(
model
),
probe
/
numpy
.
linalg
.
norm
(
probe
))
def
score_for_multiple_probes
(
self
,
model
,
probes
):
"""This function computes the score between the given model and several given probe files."""
[
self
.
_check_projected
(
probe
)
for
probe
in
probes
]
probe
=
numpy
.
mean
(
numpy
.
vstack
(
probes
),
axis
=
0
)
return
self
.
score
(
model
,
probe
)
bob/bio/gmm/config/algorithm/ivector.py
→
bob/bio/gmm/config/algorithm/ivector
_cosine
.py
View file @
78f69063
File moved
bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py
0 → 100644
View file @
78f69063
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
IVector
(
# IVector parameters
subspace_dimension_of_t
=
100
,
update_sigma
=
True
,
tv_training_iterations
=
25
,
# Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians
=
256
,
use_lda
=
True
,
use_wccn
=
True
,
use_plda
=
True
,
lda_dim
=
50
,
plda_dim_F
=
50
,
plda_dim_G
=
50
,
plda_training_iterations
=
200
,
)
bob/bio/gmm/script/verify_ivector.py
View file @
78f69063
...
...
@@ -33,7 +33,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
# Add sub-tasks that can be executed by this script
parser
=
parsers
[
'main'
]
parser
.
add_argument
(
'--sub-task'
,
choices
=
(
'preprocess'
,
'train-extractor'
,
'extract'
,
'normalize-features'
,
'kmeans-init'
,
'kmeans-e-step'
,
'kmeans-m-step'
,
'gmm-init'
,
'gmm-e-step'
,
'gmm-m-step'
,
'gmm-project'
,
'ivector-e-step'
,
'ivector-m-step'
,
'ivector-project'
,
'train-whitener'
,
'
project
'
,
'enroll'
,
'compute-scores'
,
'concatenate'
),
choices
=
(
'preprocess'
,
'train-extractor'
,
'extract'
,
'normalize-features'
,
'kmeans-init'
,
'kmeans-e-step'
,
'kmeans-m-step'
,
'gmm-init'
,
'gmm-e-step'
,
'gmm-m-step'
,
'gmm-project'
,
'ivector-e-step'
,
'ivector-m-step'
,
'ivector-training'
,
'ivector-project
ion
'
,
'train-whitener'
,
'
whitening-projection'
,
'train-lda'
,
'lda-projection'
,
'train-wccn'
,
'wccn-projection'
,
'project'
,
'train-plda'
,
'save-projector
'
,
'enroll'
,
'compute-scores'
,
'concatenate'
),
help
=
argparse
.
SUPPRESS
)
#'Executes a subtask (FOR INTERNAL USE ONLY!!!)'
parser
.
add_argument
(
'--iteration'
,
type
=
int
,
help
=
argparse
.
SUPPRESS
)
#'Which type of models to generate (Normal or TModels)'
...
...
@@ -46,7 +46,7 @@ def parse_arguments(command_line_parameters, exclude_resources_from = []):
# now that we have set up everything, get the command line arguments
args
=
base_tools
.
initialize
(
parsers
,
command_line_parameters
,
skips
=
[
'preprocessing'
,
'extractor-training'
,
'extraction'
,
'normalization'
,
'kmeans'
,
'gmm'
,
'ivector
'
,
'whitening'
,
'projection
'
,
'enroller-training'
,
'enrollment'
,
'score-computation'
,
'concatenation'
,
'calibration'
]
skips
=
[
'preprocessing'
,
'extractor-training'
,
'extraction'
,
'normalization'
,
'kmeans'
,
'gmm'
,
'ivector
-training'
,
'ivector-projection'
,
'train-whitener'
,
'whitening-projection'
,
'train-lda'
,
'lda-projection'
,
'train-wccn'
,
'wccn-projection'
,
'projection'
,
'train-plda
'
,
'enroller-training'
,
'enrollment'
,
'score-computation'
,
'concatenation'
,
'calibration'
]
)
args
.
skip_projector_training
=
True
...
...
@@ -71,7 +71,7 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
# now, add the extra steps for ivector
algorithm
=
tools
.
base
(
args
.
algorithm
)
if
not
args
.
skip_ivector
:
if
not
args
.
skip_ivector
_training
:
# gmm projection
job_ids
[
'gmm-projection'
]
=
submitter
.
submit
(
'--sub-task gmm-project'
,
...
...
@@ -99,18 +99,19 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'ivector-m-step'
])
# whitening
if
not
args
.
skip_whitening
:
#
ivector
projection
# ivector projection
if
not
args
.
skip_
ivector
_
projection
:
job_ids
[
'ivector-projection'
]
=
submitter
.
submit
(
'--sub-task ivector-project'
,
'--sub-task ivector-project
ion
'
,
name
=
'pro-ivector'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'ivector-projection'
])
# TV training
# train whitener
if
not
args
.
skip_train_whitener
:
job_ids
[
'whitener-training'
]
=
submitter
.
submit
(
'--sub-task train-whitener'
,
name
=
'train-whitener'
,
...
...
@@ -118,6 +119,71 @@ def add_ivector_jobs(args, job_ids, deps, submitter):
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'whitener-training'
])
# whitening projection
if
not
args
.
skip_whitening_projection
:
job_ids
[
'whitening-projection'
]
=
submitter
.
submit
(
'--sub-task whitening-projection'
,
name
=
'whitened'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'whitening-projection'
])
# train LDA
if
not
args
.
skip_train_lda
:
job_ids
[
'lda-training'
]
=
submitter
.
submit
(
'--sub-task train-lda'
,
name
=
'train-lda'
,
dependencies
=
deps
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'lda-training'
])
# LDA projection
if
not
args
.
skip_lda_projection
:
job_ids
[
'lda-projection'
]
=
submitter
.
submit
(
'--sub-task lda-projection'
,
name
=
'lda_projection'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'lda-projection'
])
# train WCCN
if
not
args
.
skip_train_wccn
:
job_ids
[
'wccn-training'
]
=
submitter
.
submit
(
'--sub-task train-wccn'
,
name
=
'train-wccn'
,
dependencies
=
deps
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'wccn-training'
])
# WCCN projection
if
not
args
.
skip_wccn_projection
:
job_ids
[
'wccn-projection'
]
=
submitter
.
submit
(
'--sub-task wccn-projection'
,
name
=
'wccn_projection'
,
number_of_parallel_jobs
=
args
.
grid
.
number_of_projection_jobs
,
dependencies
=
deps
,
**
args
.
grid
.
projection_queue
)
deps
.
append
(
job_ids
[
'wccn-projection'
])
# train PLDA
if
not
args
.
skip_train_plda
:
job_ids
[
'plda-training'
]
=
submitter
.
submit
(
'--sub-task train-plda'
,
name
=
'train-plda'
,
dependencies
=
deps
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'plda-training'
])
# train PLDA
job_ids
[
'save-projector'
]
=
submitter
.
submit
(
'--sub-task save-projector'
,
name
=
'save-projector'
,
dependencies
=
deps
,
**
args
.
grid
.
training_queue
)
deps
.
append
(
job_ids
[
'save-projector'
])
return
job_ids
,
deps
...
...
@@ -134,7 +200,7 @@ def execute(args):
# now, check what we can do
algorithm
=
tools
.
base
(
args
.
algorithm
)
# the file selector object
fs
=
tools
.
FileSelector
.
instance
()
...
...
@@ -161,7 +227,7 @@ def execute(args):
clean
=
args
.
clean_intermediate
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'ivector-project'
:
elif
args
.
sub_task
==
'ivector-project
ion
'
:
tools
.
ivector_project
(
algorithm
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
...
...
@@ -172,13 +238,54 @@ def execute(args):
algorithm
,
force
=
args
.
force
)
else
:
elif
args
.
sub_task
==
'whitening-projection'
:
tools
.
whitening_project
(
algorithm
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
elif
args
.
sub_task
==
'train-lda'
:
if
algorithm
.
use_lda
:
tools
.
train_lda
(
algorithm
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'lda-projection'
:
if
algorithm
.
use_lda
:
tools
.
lda_project
(
algorithm
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
elif
args
.
sub_task
==
'train-wccn'
:
if
algorithm
.
use_wccn
:
tools
.
train_wccn
(
algorithm
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'wccn-projection'
:
if
algorithm
.
use_wccn
:
tools
.
wccn_project
(
algorithm
,
indices
=
base_tools
.
indices
(
fs
.
training_list
(
'projected_gmm'
,
'train_projector'
),
args
.
grid
.
number_of_projection_jobs
),
force
=
args
.
force
)
elif
args
.
sub_task
==
'train-plda'
:
if
algorithm
.
use_plda
:
tools
.
train_plda
(
algorithm
,
force
=
args
.
force
)
elif
args
.
sub_task
==
'save-projector'
:
tools
.
save_projector
(
algorithm
,
force
=
args
.
force
)
# Not our keyword...
else
:
return
False
return
True
def
verify
(
args
,
command_line_parameters
,
external_fake_job_id
=
0
):
"""This is the main entry point for computing verification experiments.
You just have to specify configurations for any of the steps of the toolchain, which are:
...
...
bob/bio/gmm/test/data/scores-nonorm-ivector-dev
→
bob/bio/gmm/test/data/scores-nonorm-ivector-
cosine-
dev
View file @
78f69063
File moved
bob/bio/gmm/test/data/scores-ztnorm-ivector-dev
→
bob/bio/gmm/test/data/scores-ztnorm-ivector-
cosine-
dev
View file @
78f69063
File moved
bob/bio/gmm/tools/command_line.py
View file @
78f69063
...
...
@@ -36,11 +36,16 @@ def add_parallel_gmm_options(parsers, sub_module = None):
help
=
'The sub-directory (relative to --temp-directory), where intermediate ivector files should be stored'
)
sub_dir_group
.
add_argument
(
'--projected-ivector-directory'
,
default
=
'projected_ivector_temp'
,
help
=
'The sub-directory (relative to --temp-directory), where intermediate projected ivector training files should be stored'
)
sub_dir_group
.
add_argument
(
'--whitened-directory'
,
default
=
'whitened_temp'
,
help
=
'The sub-directory (relative to --temp-directory), where intermediate whitened ivector training files should be stored'
)
sub_dir_group
.
add_argument
(
'--lda-projected-directory'
,
default
=
'lda_projected_temp'
,
help
=
'The sub-directory (relative to --temp-directory), where intermediate LDA projected ivector training files should be stored'
)
sub_dir_group
.
add_argument
(
'--wccn-projected-directory'
,
default
=
'wccn_projected_temp'
,
help
=
'The sub-directory (relative to --temp-directory), where intermediate WCCN projected ivector training files should be stored'
)
flag_group
.
add_argument
(
'-i'
,
'--tv-start-iteration'
,
type
=
int
,
default
=
0
,
help
=
'Specify the first iteration for the IVector training (i.e. to restart from there)'
)
# Functions to be added to the FileSelector class, once it is instantiated
def
_kmeans_intermediate_file
(
self
,
round
):
return
os
.
path
.
join
(
self
.
directories
[
'kmeans'
],
'round_%05d'
%
round
,
'kmeans.hdf5'
)
...
...
@@ -91,5 +96,11 @@ def initialize_parallel_gmm(args, sub_module = None):
fs
.
directories
[
'ivector'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
ivector_directory
)
fs
.
tv_file
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
"tv.hdf5"
)
fs
.
directories
[
'projected_ivector'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
projected_ivector_directory
)
fs
.
whitener_file
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
"whitener.hdf5"
)
fs
.
lda_file
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
"lda.hdf5"
)
fs
.
wccn_file
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
"wccm.hdf5"
)
fs
.
plda_file
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
"plda.hdf5"
)
fs
.
directories
[
'projected_ivector'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
projected_ivector_directory
)
fs
.
directories
[
'whitened'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
whitened_directory
)
fs
.
directories
[
'lda_projected'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
lda_projected_directory
)
fs
.
directories
[
'wccn_projected'
]
=
os
.
path
.
join
(
args
.
temp_directory
,
sub_dir
,
args
.
wccn_projected_directory
)
bob/bio/gmm/tools/ivector.py
View file @
78f69063
...
...
@@ -173,9 +173,144 @@ def train_whitener(algorithm, force=False):
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
fs
.
whitener_file
))
bob
.
bio
.
base
.
save
(
algorithm
.
whitener
,
fs
.
whitener_file
)
# finally, save the projector into one file
algorithm
.
load_ubm
(
fs
.
ubm_file
)
algorithm
.
load_tv
(
fs
.
tv_file
)
def
whitening_project
(
algorithm
,
indices
,
force
=
False
):
"""Performs IVector projection"""
fs
=
FileSelector
.
instance
()
algorithm
.
load_whitener
(
fs
.
whitener_file
)
logger
.
info
(
"Writing projector into file %s"
,
fs
.
projector_file
)
algorithm
.
save_projector
(
fs
.
projector_file
)
ivector_files
=
fs
.
training_list
(
'projected_ivector'
,
'train_projector'
)
whitened_files
=
fs
.
training_list
(
'whitened'
,
'train_projector'
)
logger
.
info
(
"IVector training: whitening ivectors range (%d, %d) from '%s' to '%s'"
,
indices
[
0
],
indices
[
1
],
fs
.
directories
[
'projected_ivector'
],
fs
.
directories
[
'whitened'
])
# extract the features
for
i
in
range
(
indices
[
0
],
indices
[
1
]):
ivector_file
=
ivector_files
[
i
]
whitened_file
=
whitened_files
[
i
]
if
not
utils
.
check_file
(
whitened_file
,
force
):
# load feature
ivector
=
algorithm
.
read_feature
(
ivector_file
)
# project feature
whitened
=
algorithm
.
project_whitening
(
ivector
)
# write it
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
whitened_file
))
bob
.
bio
.
base
.
save
(
whitened
,
whitened_file
)
def
train_lda
(
algorithm
,
force
=
False
):