Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.bio.gmm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
bob
bob.bio.gmm
Commits
df13718f
There was a problem fetching the pipeline summary.
Commit
df13718f
authored
7 years ago
by
akomaty@idiap.ch
Browse files
Options
Downloads
Patches
Plain Diff
solved some conflicts
parent
5d053949
Branches
diarization
No related tags found
1 merge request
!7
Diarization
Pipeline
#
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bob/bio/gmm/test/test_algorithms.py
+3
-1
3 additions, 1 deletion
bob/bio/gmm/test/test_algorithms.py
bob/bio/gmm/tools/gmm.py
+35
-21
35 additions, 21 deletions
bob/bio/gmm/tools/gmm.py
with
38 additions
and
22 deletions
bob/bio/gmm/test/test_algorithms.py
+
3
−
1
View file @
df13718f
...
@@ -169,7 +169,9 @@ def test_gmm_segment():
...
@@ -169,7 +169,9 @@ def test_gmm_segment():
assert
isinstance
(
projected
,
list
)
assert
isinstance
(
projected
,
list
)
projected_ref
=
pkg_resources
.
resource_filename
(
'
bob.bio.gmm.test
'
,
'
data/gmmsegment_projected.hdf5
'
)
projected_ref
=
pkg_resources
.
resource_filename
(
'
bob.bio.gmm.test
'
,
'
data/gmmsegment_projected.hdf5
'
)
import
ipdb
;
ipdb
.
set_trace
()
gmm2
.
write_feature
(
projected
,
temp_file
)
gmm1
.
write_feature
(
projected
,
temp_file
)
gmm1
.
write_feature
(
projected
,
temp_file
)
hdf5fileref
=
bob
.
io
.
base
.
HDF5File
(
projected_ref
,
'
r
'
)
hdf5fileref
=
bob
.
io
.
base
.
HDF5File
(
projected_ref
,
'
r
'
)
...
...
This diff is collapsed.
Click to expand it.
bob/bio/gmm/tools/gmm.py
+
35
−
21
View file @
df13718f
...
@@ -3,6 +3,7 @@ import bob.learn.em
...
@@ -3,6 +3,7 @@ import bob.learn.em
import
shutil
import
shutil
import
numpy
import
numpy
import
os
import
os
import
functools
import
logging
import
logging
logger
=
logging
.
getLogger
(
"
bob.bio.gmm
"
)
logger
=
logging
.
getLogger
(
"
bob.bio.gmm
"
)
...
@@ -12,7 +13,7 @@ from bob.bio.base import utils, tools
...
@@ -12,7 +13,7 @@ from bob.bio.base import utils, tools
from
.utils
import
read_feature
from
.utils
import
read_feature
from
bob.bio.gmm.algorithm
import
GMMSegment
from
bob.bio.gmm.algorithm
import
GMMSegment
def
kmeans_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
):
def
kmeans_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
,
allow_missing_files
=
False
):
"""
Initializes the K-Means training (non-parallel).
"""
"""
Initializes the K-Means training (non-parallel).
"""
fs
=
FileSelector
.
instance
()
fs
=
FileSelector
.
instance
()
...
@@ -24,7 +25,9 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
...
@@ -24,7 +25,9 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
# read data
# read data
logger
.
info
(
"
UBM training: initializing kmeans
"
)
logger
.
info
(
"
UBM training: initializing kmeans
"
)
training_list
=
utils
.
selected_elements
(
fs
.
training_list
(
'
extracted
'
,
'
train_projector
'
),
limit_data
)
training_list
=
utils
.
selected_elements
(
fs
.
training_list
(
'
extracted
'
,
'
train_projector
'
),
limit_data
)
data
=
numpy
.
vstack
([
read_feature
(
extractor
,
feature_file
)
for
feature_file
in
training_list
])
# read the features
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
training_list
,
allow_missing_files
=
allow_missing_files
)
# Perform KMeans initialization
# Perform KMeans initialization
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
algorithm
.
gaussians
,
data
.
shape
[
1
])
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
algorithm
.
gaussians
,
data
.
shape
[
1
])
...
@@ -35,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
...
@@ -35,7 +38,7 @@ def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
logger
.
info
(
"
UBM training: saved initial KMeans machine to
'
%s
'"
,
output_file
)
logger
.
info
(
"
UBM training: saved initial KMeans machine to
'
%s
'"
,
output_file
)
def
kmeans_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
):
def
kmeans_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""
Performs a single E-step of the K-Means algorithm (parallel)
"""
"""
Performs a single E-step of the K-Means algorithm (parallel)
"""
if
indices
[
0
]
>=
indices
[
1
]:
if
indices
[
0
]
>=
indices
[
1
]:
return
return
...
@@ -55,8 +58,12 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
...
@@ -55,8 +58,12 @@ def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
logger
.
info
(
"
UBM training: KMeans E-Step round %d from range(%d, %d)
"
,
iteration
,
*
indices
)
logger
.
info
(
"
UBM training: KMeans E-Step round %d from range(%d, %d)
"
,
iteration
,
*
indices
)
# read data
# read the features
data
=
numpy
.
vstack
([
read_feature
(
extractor
,
training_list
[
index
])
for
index
in
range
(
indices
[
0
],
indices
[
1
])])
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
])),
allow_missing_files
=
allow_missing_files
)
# Performs the E-step
# Performs the E-step
trainer
=
algorithm
.
kmeans_trainer
trainer
=
algorithm
.
kmeans_trainer
...
@@ -95,7 +102,7 @@ def _accumulate(filenames):
...
@@ -95,7 +102,7 @@ def _accumulate(filenames):
zeroeth
+=
zeroeth_
zeroeth
+=
zeroeth_
first
+=
first_
first
+=
first_
nsamples
+=
nsamples_
nsamples
+=
nsamples_
dist
+=
dist_
dist
+=
dist_
return
(
zeroeth
,
first
,
nsamples
,
dist
)
return
(
zeroeth
,
first
,
nsamples
,
dist
)
def
kmeans_mstep
(
algorithm
,
iteration
,
number_of_parallel_jobs
,
force
=
False
,
clean
=
False
):
def
kmeans_mstep
(
algorithm
,
iteration
,
number_of_parallel_jobs
,
force
=
False
,
clean
=
False
):
...
@@ -156,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
...
@@ -156,7 +163,7 @@ def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, cle
def
gmm_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
):
def
gmm_initialize
(
algorithm
,
extractor
,
limit_data
=
None
,
force
=
False
,
allow_missing_files
=
False
):
"""
Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
"""
Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
This might require a lot of memory.
"""
This might require a lot of memory.
"""
fs
=
FileSelector
.
instance
()
fs
=
FileSelector
.
instance
()
...
@@ -168,9 +175,11 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
...
@@ -168,9 +175,11 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
else
:
else
:
logger
.
info
(
"
UBM Training: Initializing GMM
"
)
logger
.
info
(
"
UBM Training: Initializing GMM
"
)
# read features
training_list
=
utils
.
selected_elements
(
fs
.
training_list
(
'
extracted
'
,
'
train_projector
'
),
limit_data
)
training_list
=
utils
.
selected_elements
(
fs
.
training_list
(
'
extracted
'
,
'
train_projector
'
),
limit_data
)
data
=
numpy
.
vstack
([
read_feature
(
extractor
,
feature_file
)
for
feature_file
in
training_list
])
# read the features
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
training_list
,
allow_missing_files
=
allow_missing_files
)
# get means and variances of kmeans result
# get means and variances of kmeans result
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
bob
.
io
.
base
.
HDF5File
(
fs
.
kmeans_file
))
kmeans_machine
=
bob
.
learn
.
em
.
KMeansMachine
(
bob
.
io
.
base
.
HDF5File
(
fs
.
kmeans_file
))
...
@@ -191,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
...
@@ -191,7 +200,7 @@ def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
logger
.
info
(
"
UBM Training: Wrote GMM file
'
%s
'"
,
output_file
)
logger
.
info
(
"
UBM Training: Wrote GMM file
'
%s
'"
,
output_file
)
def
gmm_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
):
def
gmm_estep
(
algorithm
,
extractor
,
iteration
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""
Performs a single E-step of the GMM training (parallel).
"""
"""
Performs a single E-step of the GMM training (parallel).
"""
if
indices
[
0
]
>=
indices
[
1
]:
if
indices
[
0
]
>=
indices
[
1
]:
return
return
...
@@ -209,8 +218,13 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
...
@@ -209,8 +218,13 @@ def gmm_estep(algorithm, extractor, iteration, indices, force=False):
logger
.
info
(
"
UBM training: GMM E-Step from range(%d, %d)
"
,
*
indices
)
logger
.
info
(
"
UBM training: GMM E-Step from range(%d, %d)
"
,
*
indices
)
# read data
# read the features
data
=
numpy
.
vstack
([
read_feature
(
extractor
,
training_list
[
index
])
for
index
in
range
(
indices
[
0
],
indices
[
1
])])
reader
=
functools
.
partial
(
read_feature
,
extractor
)
data
=
utils
.
vstack_features
(
reader
,
(
training_list
[
index
]
for
index
in
range
(
indices
[
0
],
indices
[
1
]))
,
allow_missing_files
=
allow_missing_files
)
trainer
=
algorithm
.
ubm_trainer
trainer
=
algorithm
.
ubm_trainer
trainer
.
initialize
(
gmm_machine
,
None
)
trainer
.
initialize
(
gmm_machine
,
None
)
...
@@ -282,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
...
@@ -282,7 +296,7 @@ def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=
shutil
.
rmtree
(
old_dir
)
shutil
.
rmtree
(
old_dir
)
def
gmm_project
(
algorithm
,
extractor
,
indices
,
force
=
False
):
def
gmm_project
(
algorithm
,
extractor
,
indices
,
force
=
False
,
allow_missing_files
=
False
):
"""
Performs GMM projection
"""
"""
Performs GMM projection
"""
fs
=
FileSelector
.
instance
()
fs
=
FileSelector
.
instance
()
...
@@ -299,11 +313,11 @@ def gmm_project(algorithm, extractor, indices, force=False):
...
@@ -299,11 +313,11 @@ def gmm_project(algorithm, extractor, indices, force=False):
projected_file
=
projected_files
[
i
]
projected_file
=
projected_files
[
i
]
if
not
utils
.
check_file
(
projected_file
,
force
):
if
not
utils
.
check_file
(
projected_file
,
force
):
# load feature
if
len
(
utils
.
filter_missing_files
([
feature_file
],
split_by_client
=
False
,
allow_missing_files
=
allow_missing_files
))
>
0
:
feature
=
read_feature
(
extractor
,
feature
_file
)
# load
feature
# project
feature
feature
=
read_feature
(
extractor
,
feature
_file
)
projected
=
algorithm
.
project
_ubm
(
feature
)
#
project
feature
# write it
projected
=
algorithm
.
project_ubm
(
feature
)
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
projected_file
))
# write it
bob
.
io
.
base
.
create_directories_safe
(
os
.
path
.
dirname
(
projected_file
))
algorithm
.
write_feature
(
projected
,
projected_file
)
algorithm
.
write_feature
(
projected
,
projected_file
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment