Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.learn.em
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.learn.em
Commits
dafa7052
Commit
dafa7052
authored
2 years ago
by
Yannick DAYER
Browse files
Options
Downloads
Patches
Plain Diff
[py] Update ISV to accept probe templates.
parent
49c8696f
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!73
Update ISV to accept probe templates. Update QA.
Pipeline
#69695
failed
2 years ago
Stage: qa
Stage: test
Stage: doc
Stage: dist
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.pre-commit-config.yaml
+4
-4
4 additions, 4 deletions
.pre-commit-config.yaml
src/bob/learn/em/factor_analysis.py
+54
-37
54 additions, 37 deletions
src/bob/learn/em/factor_analysis.py
src/bob/learn/em/ivector.py
+14
-9
14 additions, 9 deletions
src/bob/learn/em/ivector.py
with
72 additions
and
50 deletions
.pre-commit-config.yaml
+
4
−
4
View file @
dafa7052
...
@@ -2,20 +2,20 @@
...
@@ -2,20 +2,20 @@
# See https://pre-commit.com/hooks.html for more hooks
# See https://pre-commit.com/hooks.html for more hooks
repos
:
repos
:
-
repo
:
https://github.com/timothycrosley/isort
-
repo
:
https://github.com/timothycrosley/isort
rev
:
5.1
0.1
rev
:
5.1
2.0
hooks
:
hooks
:
-
id
:
isort
-
id
:
isort
args
:
[
--settings-path
,
"
pyproject.toml"
]
args
:
[
--settings-path
,
"
pyproject.toml"
]
-
repo
:
https://github.com/psf/black
-
repo
:
https://github.com/psf/black
rev
:
22.
3
.0
rev
:
22.
12
.0
hooks
:
hooks
:
-
id
:
black
-
id
:
black
-
repo
:
https://github.com/pycqa/flake8
-
repo
:
https://github.com/pycqa/flake8
rev
:
3.9.2
rev
:
6.0.0
hooks
:
hooks
:
-
id
:
flake8
-
id
:
flake8
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
rev
:
v4.
2
.0
rev
:
v4.
4
.0
hooks
:
hooks
:
-
id
:
check-ast
-
id
:
check-ast
-
id
:
check-case-conflict
-
id
:
check-case-conflict
...
...
This diff is collapsed.
Click to expand it.
src/bob/learn/em/factor_analysis.py
+
54
−
37
View file @
dafa7052
...
@@ -148,7 +148,6 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -148,7 +148,6 @@ class FactorAnalysisBase(BaseEstimator):
@property
@property
def
feature_dimension
(
self
):
def
feature_dimension
(
self
):
"""
Get the UBM Dimension
"""
"""
Get the UBM Dimension
"""
# TODO: Add this on the GMMMachine class
# TODO: Add this on the GMMMachine class
return
self
.
ubm
.
means
.
shape
[
1
]
return
self
.
ubm
.
means
.
shape
[
1
]
...
@@ -159,16 +158,12 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -159,16 +158,12 @@ class FactorAnalysisBase(BaseEstimator):
@property
@property
def
mean_supervector
(
self
):
def
mean_supervector
(
self
):
"""
"""
Returns the mean supervector
"""
Returns the mean supervector
"""
return
self
.
ubm
.
means
.
flatten
()
return
self
.
ubm
.
means
.
flatten
()
@property
@property
def
variance_supervector
(
self
):
def
variance_supervector
(
self
):
"""
"""
Returns the variance supervector
"""
Returns the variance supervector
"""
return
self
.
ubm
.
variances
.
flatten
()
return
self
.
ubm
.
variances
.
flatten
()
@property
@property
...
@@ -199,10 +194,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -199,10 +194,7 @@ class FactorAnalysisBase(BaseEstimator):
self
.
_V
=
np
.
array
(
value
)
self
.
_V
=
np
.
array
(
value
)
def
estimate_number_of_classes
(
self
,
y
):
def
estimate_number_of_classes
(
self
,
y
):
"""
"""
Estimates the number of classes given the labels
"""
Estimates the number of classes given the labels
"""
return
len
(
unique_labels
(
y
))
return
len
(
unique_labels
(
y
))
def
initialize_using_array
(
self
,
X
):
def
initialize_using_array
(
self
,
X
):
...
@@ -279,6 +271,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -279,6 +271,7 @@ class FactorAnalysisBase(BaseEstimator):
D: (n_gaussians*feature_dimension) represents the client offset vector
D: (n_gaussians*feature_dimension) represents the client offset vector
"""
"""
if
self
.
random_state
is
not
None
:
if
self
.
random_state
is
not
None
:
np
.
random
.
seed
(
self
.
random_state
)
np
.
random
.
seed
(
self
.
random_state
)
...
@@ -319,6 +312,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -319,6 +312,7 @@ class FactorAnalysisBase(BaseEstimator):
(n_classes, n_gaussians) representing the accumulated 0th order statistics
(n_classes, n_gaussians) representing the accumulated 0th order statistics
"""
"""
# 0th order stats
# 0th order stats
n_acc
=
np
.
zeros
((
n_classes
,
self
.
ubm
.
n_gaussians
),
like
=
X
[
0
].
n
)
n_acc
=
np
.
zeros
((
n_classes
,
self
.
ubm
.
n_gaussians
),
like
=
X
[
0
].
n
)
...
@@ -380,12 +374,11 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -380,12 +374,11 @@ class FactorAnalysisBase(BaseEstimator):
i: int
i: int
Class id to return the statistics for
Class id to return the statistics for
"""
"""
indices
=
np
.
where
(
np
.
array
(
y
)
==
i
)[
0
]
indices
=
np
.
where
(
np
.
array
(
y
)
==
i
)[
0
]
return
[
X
[
i
]
for
i
in
indices
]
return
[
X
[
i
]
for
i
in
indices
]
"""
# Estimating U and x #
Estimating U and x
"""
def
_compute_id_plus_u_prod_ih
(
self
,
x_i
,
UProd
):
def
_compute_id_plus_u_prod_ih
(
self
,
x_i
,
UProd
):
"""
"""
...
@@ -430,6 +423,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -430,6 +423,7 @@ class FactorAnalysisBase(BaseEstimator):
E[y_i] for class `i`
E[y_i] for class `i`
"""
"""
f_i
=
x_i
.
sum_px
f_i
=
x_i
.
sum_px
n_i
=
x_i
.
n
n_i
=
x_i
.
n
n_ic
=
np
.
repeat
(
n_i
,
self
.
feature_dimension
)
n_ic
=
np
.
repeat
(
n_i
,
self
.
feature_dimension
)
...
@@ -447,9 +441,8 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -447,9 +441,8 @@ class FactorAnalysisBase(BaseEstimator):
self
.
mean_supervector
+
self
.
_D
*
latent_z_i
self
.
mean_supervector
+
self
.
_D
*
latent_z_i
)
)
"""
# JFA Part (eq 29) #
# JFA Part (eq 29)
"""
V_dot_v
=
V
@
latent_y_i
if
latent_y_i
is
not
None
else
0
V_dot_v
=
V
@
latent_y_i
if
latent_y_i
is
not
None
else
0
fn_x_ih
-=
n_ic
*
V_dot_v
if
latent_y_i
is
not
None
else
0
fn_x_ih
-=
n_ic
*
V_dot_v
if
latent_y_i
is
not
None
else
0
...
@@ -545,6 +538,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -545,6 +538,7 @@ class FactorAnalysisBase(BaseEstimator):
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
"""
"""
# Inverting A1 over the zero axis
# Inverting A1 over the zero axis
# https://stackoverflow.com/questions/11972102/is-there-a-way-to-efficiently-invert-an-array-of-matrices-with-numpy
# https://stackoverflow.com/questions/11972102/is-there-a-way-to-efficiently-invert-an-array-of-matrices-with-numpy
inv_A1
=
np
.
linalg
.
inv
(
acc_U_A1
)
inv_A1
=
np
.
linalg
.
inv
(
acc_U_A1
)
...
@@ -568,6 +562,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -568,6 +562,7 @@ class FactorAnalysisBase(BaseEstimator):
https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L325
https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L325
"""
"""
# UProd = (self.ubm.n_gaussians, self.r_U, self.r_U)
# UProd = (self.ubm.n_gaussians, self.r_U, self.r_U)
Uc
=
self
.
_U
.
reshape
(
Uc
=
self
.
_U
.
reshape
(
...
@@ -662,9 +657,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -662,9 +657,7 @@ class FactorAnalysisBase(BaseEstimator):
return
acc_U_A1
,
acc_U_A2
return
acc_U_A1
,
acc_U_A2
"""
# Estimating D and z #
Estimating D and z
"""
def
update_z
(
self
,
X
,
y
,
latent_x
,
latent_y
,
latent_z
,
n_acc
,
f_acc
):
def
update_z
(
self
,
X
,
y
,
latent_x
,
latent_y
,
latent_z
,
n_acc
,
f_acc
):
"""
"""
...
@@ -878,6 +871,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -878,6 +871,7 @@ class FactorAnalysisBase(BaseEstimator):
latent_x = (n_classes, r_U, n_sessions)
latent_x = (n_classes, r_U, n_sessions)
"""
"""
kw
=
dict
(
like
=
like
)
if
isinstance
(
like
,
dask
.
array
.
core
.
Array
)
else
{}
kw
=
dict
(
like
=
like
)
if
isinstance
(
like
,
dask
.
array
.
core
.
Array
)
else
{}
# x (Eq. 36)
# x (Eq. 36)
...
@@ -897,9 +891,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -897,9 +891,7 @@ class FactorAnalysisBase(BaseEstimator):
return
latent_x
,
latent_y
,
latent_z
return
latent_x
,
latent_y
,
latent_z
"""
# Estimating V and y #
Estimating V and y
"""
def
update_y
(
def
update_y
(
self
,
self
,
...
@@ -948,6 +940,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -948,6 +940,7 @@ class FactorAnalysisBase(BaseEstimator):
Accumulated 1st order statistics for each class (math:`F_{i}`)
Accumulated 1st order statistics for each class (math:`F_{i}`)
"""
"""
# V.T / sigma
# V.T / sigma
VTinvSigma
=
self
.
_V
.
T
/
self
.
variance_supervector
VTinvSigma
=
self
.
_V
.
T
/
self
.
variance_supervector
...
@@ -1019,7 +1012,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -1019,7 +1012,7 @@ class FactorAnalysisBase(BaseEstimator):
I
=
np
.
eye
(
self
.
r_V
,
self
.
r_V
)
# noqa: E741
I
=
np
.
eye
(
self
.
r_V
,
self
.
r_V
)
# noqa: E741
# TODO: make the inver
t
ion matrix function as a parameter
# TODO: make the inver
s
ion matrix function as a parameter
return
np
.
linalg
.
inv
(
I
+
(
VProd
*
n_acc_i
[:,
None
,
None
]).
sum
(
axis
=
0
))
return
np
.
linalg
.
inv
(
I
+
(
VProd
*
n_acc_i
[:,
None
,
None
]).
sum
(
axis
=
0
))
def
_compute_vprod
(
self
):
def
_compute_vprod
(
self
):
...
@@ -1176,17 +1169,15 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -1176,17 +1169,15 @@ class FactorAnalysisBase(BaseEstimator):
)
# Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - D*z_{i})
)
# Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - D*z_{i})
# Looping over the sessions of a ;ane;
# Looping over the sessions of a ;ane;
for
session_id
in
range
(
len
(
X_i
)
)
:
for
session_id
,
x_i_s
in
enumerate
(
X_i
):
n_i
=
X
_i
[
session_id
].
n
n_i
=
x
_i
_s
[
session_id
].
n
U_dot_x
=
U
@
latent_x_i
[:,
session_id
]
U_dot_x
=
U
@
latent_x_i
[:,
session_id
]
tmp_CD
=
np
.
repeat
(
n_i
,
self
.
feature_dimension
)
tmp_CD
=
np
.
repeat
(
n_i
,
self
.
feature_dimension
)
fn_y_i
-=
tmp_CD
*
U_dot_x
fn_y_i
-=
tmp_CD
*
U_dot_x
return
fn_y_i
return
fn_y_i
"""
# Scoring #
Scoring
"""
def
estimate_x
(
self
,
X
):
def
estimate_x
(
self
,
X
):
...
@@ -1212,6 +1203,7 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -1212,6 +1203,7 @@ class FactorAnalysisBase(BaseEstimator):
X_i: list of :py:class:`bob.learn.em.GMMStats`
X_i: list of :py:class:`bob.learn.em.GMMStats`
List of statistics for a class
List of statistics for a class
"""
"""
I
=
np
.
eye
(
self
.
r_U
,
self
.
r_U
)
# noqa: E741
I
=
np
.
eye
(
self
.
r_U
,
self
.
r_U
)
# noqa: E741
Uc
=
self
.
_U
.
reshape
(
Uc
=
self
.
_U
.
reshape
(
...
@@ -1319,10 +1311,11 @@ class FactorAnalysisBase(BaseEstimator):
...
@@ -1319,10 +1311,11 @@ class FactorAnalysisBase(BaseEstimator):
z
z
"""
"""
return
self
.
enroll
([
self
.
ubm
.
acc_stats
(
X
)])
return
self
.
enroll
([
self
.
ubm
.
acc_stats
(
X
)])
def
_prepare_dask_input
(
self
,
X
,
y
):
def
_prepare_dask_input
(
self
,
X
,
y
):
"""
P
e
rpare the input for the fit method
"""
"""
Pr
e
pare the input for the fit method
"""
logger
.
info
(
logger
.
info
(
"
Rechunking bag of stats to delayed list of stats per class. If your worker runs
"
"
Rechunking bag of stats to delayed list of stats per class. If your worker runs
"
"
out of memory in this training step, you have to use workers with more memory.
"
"
out of memory in this training step, you have to use workers with more memory.
"
...
@@ -1406,9 +1399,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1406,9 +1399,7 @@ class ISVMachine(FactorAnalysisBase):
)
)
def
e_step
(
self
,
X
,
y
,
n_samples_per_class
,
n_acc
,
f_acc
):
def
e_step
(
self
,
X
,
y
,
n_samples_per_class
,
n_acc
,
f_acc
):
"""
"""
E-step of the EM algorithm.
"""
E-step of the EM algorithm
"""
# self.initialize_XYZ(y)
# self.initialize_XYZ(y)
UProd
=
self
.
_compute_uprod
()
UProd
=
self
.
_compute_uprod
()
_
,
_
,
latent_z
=
self
.
initialize_XYZ
(
_
,
_
,
latent_z
=
self
.
initialize_XYZ
(
...
@@ -1452,6 +1443,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1452,6 +1443,7 @@ class ISVMachine(FactorAnalysisBase):
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
"""
"""
acc_U_A1
=
[
acc
[
0
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A1
=
[
acc
[
0
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A2
=
[
acc
[
1
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A2
=
[
acc
[
1
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
...
@@ -1476,6 +1468,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1476,6 +1468,7 @@ class ISVMachine(FactorAnalysisBase):
Returns self.
Returns self.
"""
"""
if
isinstance
(
X
,
dask
.
bag
.
Bag
):
if
isinstance
(
X
,
dask
.
bag
.
Bag
):
X
,
y
=
self
.
_prepare_dask_input
(
X
,
y
)
X
,
y
=
self
.
_prepare_dask_input
(
X
,
y
)
...
@@ -1535,6 +1528,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1535,6 +1528,7 @@ class ISVMachine(FactorAnalysisBase):
z
z
"""
"""
iterations
=
self
.
enroll_iterations
iterations
=
self
.
enroll_iterations
# We have only one class for enrollment
# We have only one class for enrollment
y
=
list
(
np
.
zeros
(
len
(
X
),
dtype
=
np
.
int32
))
y
=
list
(
np
.
zeros
(
len
(
X
),
dtype
=
np
.
int32
))
...
@@ -1584,6 +1578,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1584,6 +1578,7 @@ class ISVMachine(FactorAnalysisBase):
z
z
"""
"""
return
self
.
enroll
([
self
.
ubm
.
acc_stats
(
X
)])
return
self
.
enroll
([
self
.
ubm
.
acc_stats
(
X
)])
def
score
(
self
,
latent_z
,
data
):
def
score
(
self
,
latent_z
,
data
):
...
@@ -1596,7 +1591,7 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1596,7 +1591,7 @@ class ISVMachine(FactorAnalysisBase):
Latent representation of the client (E[z_i])
Latent representation of the client (E[z_i])
data : list of :py:class:`bob.learn.em.GMMStats`
data : list of :py:class:`bob.learn.em.GMMStats`
List of statistics to be scored
List of statistics
of one probe template
to be scored
Returns
Returns
-------
-------
...
@@ -1604,7 +1599,14 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1604,7 +1599,14 @@ class ISVMachine(FactorAnalysisBase):
The linear scored
The linear scored
"""
"""
x
=
self
.
estimate_x
(
data
)
template_data
=
data
[
0
]
if
len
(
data
)
>
1
:
for
d
in
data
[
1
:]:
template_data
+=
d
x
=
self
.
estimate_x
(
template_data
)
Ux
=
self
.
_U
@
x
Ux
=
self
.
_U
@
x
# TODO: I don't know why this is not the enrolled model
# TODO: I don't know why this is not the enrolled model
...
@@ -1615,10 +1617,10 @@ class ISVMachine(FactorAnalysisBase):
...
@@ -1615,10 +1617,10 @@ class ISVMachine(FactorAnalysisBase):
return
linear_scoring
(
return
linear_scoring
(
z
.
reshape
((
self
.
ubm
.
n_gaussians
,
self
.
feature_dimension
)),
z
.
reshape
((
self
.
ubm
.
n_gaussians
,
self
.
feature_dimension
)),
self
.
ubm
,
self
.
ubm
,
data
,
template_
data
,
Ux
.
reshape
((
self
.
ubm
.
n_gaussians
,
self
.
feature_dimension
)),
Ux
.
reshape
((
self
.
ubm
.
n_gaussians
,
self
.
feature_dimension
)),
frame_length_normalization
=
True
,
frame_length_normalization
=
True
,
)[
0
]
)[
0
]
[
0
]
class
JFAMachine
(
FactorAnalysisBase
):
class
JFAMachine
(
FactorAnalysisBase
):
...
@@ -1759,6 +1761,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -1759,6 +1761,7 @@ class JFAMachine(FactorAnalysisBase):
Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V)
Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V)
"""
"""
acc_V_A1
=
[
acc
[
0
]
for
acc
in
acc_V_A1_acc_V_A2_list
]
acc_V_A1
=
[
acc
[
0
]
for
acc
in
acc_V_A1_acc_V_A2_list
]
acc_V_A2
=
[
acc
[
1
]
for
acc
in
acc_V_A1_acc_V_A2_list
]
acc_V_A2
=
[
acc
[
1
]
for
acc
in
acc_V_A1_acc_V_A2_list
]
...
@@ -1807,6 +1810,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -1807,6 +1810,7 @@ class JFAMachine(FactorAnalysisBase):
E[y]
E[y]
"""
"""
VProd
=
self
.
_compute_vprod
()
VProd
=
self
.
_compute_vprod
()
latent_x
,
latent_y
,
latent_z
=
self
.
initialize_XYZ
(
latent_x
,
latent_y
,
latent_z
=
self
.
initialize_XYZ
(
...
@@ -1856,6 +1860,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -1856,6 +1860,7 @@ class JFAMachine(FactorAnalysisBase):
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
Accumulated statistics for U_A2(n_gaussians* feature_dimension, r_U)
"""
"""
# self.initialize_XYZ(y)
# self.initialize_XYZ(y)
UProd
=
self
.
_compute_uprod
()
UProd
=
self
.
_compute_uprod
()
latent_x
,
_
,
latent_z
=
self
.
initialize_XYZ
(
latent_x
,
_
,
latent_z
=
self
.
initialize_XYZ
(
...
@@ -1897,6 +1902,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -1897,6 +1902,7 @@ class JFAMachine(FactorAnalysisBase):
Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V)
Accumulated statistics for V_A2(n_gaussians* feature_dimension, r_V)
"""
"""
acc_U_A1
=
[
acc
[
0
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A1
=
[
acc
[
0
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A2
=
[
acc
[
1
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
acc_U_A2
=
[
acc
[
1
]
for
acc
in
acc_U_A1_acc_U_A2_list
]
...
@@ -1995,6 +2001,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -1995,6 +2001,7 @@ class JFAMachine(FactorAnalysisBase):
Accumulated statistics for D_A2(n_gaussians* feature_dimension, )
Accumulated statistics for D_A2(n_gaussians* feature_dimension, )
"""
"""
_
,
_
,
latent_z
=
self
.
initialize_XYZ
(
_
,
_
,
latent_z
=
self
.
initialize_XYZ
(
n_samples_per_class
=
n_samples_per_class
n_samples_per_class
=
n_samples_per_class
)
)
...
@@ -2030,6 +2037,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -2030,6 +2037,7 @@ class JFAMachine(FactorAnalysisBase):
Accumulated statistics for D_A2(n_gaussians* feature_dimension, )
Accumulated statistics for D_A2(n_gaussians* feature_dimension, )
"""
"""
acc_D_A1
=
[
acc
[
0
]
for
acc
in
acc_D_A1_acc_D_A2_list
]
acc_D_A1
=
[
acc
[
0
]
for
acc
in
acc_D_A1_acc_D_A2_list
]
acc_D_A2
=
[
acc
[
1
]
for
acc
in
acc_D_A1_acc_D_A2_list
]
acc_D_A2
=
[
acc
[
1
]
for
acc
in
acc_D_A1_acc_D_A2_list
]
...
@@ -2055,6 +2063,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -2055,6 +2063,7 @@ class JFAMachine(FactorAnalysisBase):
z, y latent variables
z, y latent variables
"""
"""
iterations
=
self
.
enroll_iterations
iterations
=
self
.
enroll_iterations
# We have only one class for enrollment
# We have only one class for enrollment
y
=
list
(
np
.
zeros
(
len
(
X
),
dtype
=
np
.
int32
))
y
=
list
(
np
.
zeros
(
len
(
X
),
dtype
=
np
.
int32
))
...
@@ -2118,6 +2127,7 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -2118,6 +2127,7 @@ class JFAMachine(FactorAnalysisBase):
Returns self.
Returns self.
"""
"""
if
isinstance
(
X
,
dask
.
bag
.
Bag
):
if
isinstance
(
X
,
dask
.
bag
.
Bag
):
X
,
y
=
self
.
_prepare_dask_input
(
X
,
y
)
X
,
y
=
self
.
_prepare_dask_input
(
X
,
y
)
...
@@ -2240,9 +2250,16 @@ class JFAMachine(FactorAnalysisBase):
...
@@ -2240,9 +2250,16 @@ class JFAMachine(FactorAnalysisBase):
The linear scored
The linear scored
"""
"""
latent_y
=
model
[
0
]
latent_y
=
model
[
0
]
latent_z
=
model
[
1
]
latent_z
=
model
[
1
]
template_data
=
data
[
0
]
if
len
(
data
>
1
):
for
d
in
data
[
1
:]:
template_data
+=
d
x
=
self
.
estimate_x
(
data
)
x
=
self
.
estimate_x
(
data
)
Ux
=
self
.
_U
@
x
Ux
=
self
.
_U
@
x
...
...
This diff is collapsed.
Click to expand it.
src/bob/learn/em/ivector.py
+
14
−
9
View file @
dafa7052
...
@@ -287,8 +287,13 @@ class IVectorMachine(BaseEstimator):
...
@@ -287,8 +287,13 @@ class IVectorMachine(BaseEstimator):
)
)
self
.
sigma
=
copy
.
deepcopy
(
self
.
ubm
.
variances
)
self
.
sigma
=
copy
.
deepcopy
(
self
.
ubm
.
variances
)
logger
.
info
(
"
Training I-Vector...
"
)
for
step
in
range
(
self
.
max_iterations
):
for
step
in
range
(
self
.
max_iterations
):
logger
.
info
(
f
"
IVector step
{
step
+
1
:{
len
(
str
(
self
.
max_iterations
))
}
d
}
/
{
self
.
max_iterations
}
.
"
)
if
chunky
:
if
chunky
:
# Compute the IVectorStats of each chunk
stats
=
[
stats
=
[
dask
.
delayed
(
e_step
)(
dask
.
delayed
(
e_step
)(
machine
=
self
,
machine
=
self
,
...
@@ -300,27 +305,27 @@ class IVectorMachine(BaseEstimator):
...
@@ -300,27 +305,27 @@ class IVectorMachine(BaseEstimator):
# Workaround to prevent memory issues at compute with too many chunks.
# Workaround to prevent memory issues at compute with too many chunks.
# This adds pairs of stats together instead of sending all the stats to
# This adds pairs of stats together instead of sending all the stats to
# one worker.
# one worker.
while
(
l
:
=
len
(
stats
))
>
1
:
while
(
l
ength
:
=
len
(
stats
))
>
1
:
last
=
stats
[
-
1
]
last
=
stats
[
-
1
]
stats
=
[
stats
=
[
dask
.
delayed
(
operator
.
add
)(
stats
[
i
],
stats
[
l
//
2
+
i
])
dask
.
delayed
(
operator
.
add
)(
for
i
in
range
(
l
//
2
)
stats
[
i
],
stats
[
length
//
2
+
i
]
)
for
i
in
range
(
length
//
2
)
]
]
if
l
%
2
!=
0
:
if
l
ength
%
2
!=
0
:
stats
.
append
(
last
)
stats
.
append
(
last
)
stats_sum
=
stats
[
0
]
stats_sum
=
stats
[
0
]
# Update the machine parameters with the aggregated stats
new_machine
=
dask
.
compute
(
new_machine
=
dask
.
compute
(
dask
.
delayed
(
m_step
)(
self
,
stats_sum
)
dask
.
delayed
(
m_step
)(
self
,
stats_sum
)
)[
0
]
)[
0
]
for
attr
in
[
"
T
"
,
"
sigma
"
]:
for
attr
in
[
"
T
"
,
"
sigma
"
]:
setattr
(
self
,
attr
,
getattr
(
new_machine
,
attr
))
setattr
(
self
,
attr
,
getattr
(
new_machine
,
attr
))
else
:
else
:
# Working directly on numpy array, not dask.Bags
stats
=
e_step
(
machine
=
self
,
data
=
X
)
stats
=
e_step
(
machine
=
self
,
data
=
X
)
_
=
m_step
(
self
,
stats
)
_
=
m_step
(
self
,
stats
)
logger
.
info
(
f
"
IVector step
{
step
+
1
:{
len
(
str
(
self
.
max_iterations
))
}
d
}
/
{
self
.
max_iterations
}
.
"
)
logger
.
info
(
f
"
Reached
{
step
+
1
}
steps.
"
)
logger
.
info
(
f
"
Reached
{
step
+
1
}
steps.
"
)
return
self
return
self
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment