Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.pad.voice
Commits
6c05dfc4
Commit
6c05dfc4
authored
Oct 11, 2016
by
Pavel KORSHUNOV
Browse files
Added support voicePA and moved extractors to bob.bio.spear
parent
adf69607
Pipeline
#4713
passed with stages
in 16 minutes and 49 seconds
Changes
28
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/pad/voice/config/algorithm/gmm_tomi.py
deleted
100644 → 0
View file @
adf69607
#!/usr/bin/env python
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
GMMRegular
(
number_of_gaussians
=
512
,
kmeans_training_iterations
=
10
,
# Maximum number of iterations for K-Means
gmm_training_iterations
=
10
,
# Maximum number of iterations for ML GMM Training
)
bob/pad/voice/config/algorithm/gmm_tomi_scfc.py
deleted
100644 → 0
View file @
adf69607
#!/usr/bin/env python
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
GMMRegular
(
number_of_gaussians
=
512
,
kmeans_training_iterations
=
10
,
# Maximum number of iterations for K-Means
gmm_training_iterations
=
10
,
# Maximum number of iterations for ML GMM Training
training_threshold
=
5e-5
,
# Threshold to end the ML training, make it smaller
variance_threshold
=
5e-7
,
# Minimum value that a variance can reach, make it smaller than default
)
bob/pad/voice/config/algorithm/isv_avspoof.py
deleted
100644 → 0
View file @
adf69607
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
ISV
(
# ISV parameters
subspace_dimension_of_u
=
50
,
# GMM parameters
number_of_gaussians
=
256
,
training_threshold
=
0.0
,
)
bob/pad/voice/config/algorithm/ivec_avspoof.py
deleted
100644 → 0
View file @
adf69607
import
bob.bio.gmm
algorithm
=
bob
.
bio
.
gmm
.
algorithm
.
IVector
(
# IVector parameters
subspace_dimension_of_t
=
100
,
update_sigma
=
True
,
tv_training_iterations
=
25
,
# Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians
=
256
,
training_threshold
=
0.0
,
use_lda
=
True
,
use_wccn
=
True
,
use_plda
=
True
,
lda_dim
=
50
,
plda_dim_F
=
50
,
plda_dim_G
=
50
,
plda_training_iterations
=
200
,
)
bob/pad/voice/config/database/asvspoof.py
View file @
6c05dfc4
...
...
@@ -4,8 +4,7 @@ import bob.pad.voice.database
# directory where the wave files are stored
asvspoof_input_dir
=
"/idiap/resource/database/ASVspoof2015/ASVspoof2015_development"
#Path to ASVspoof in Idiap
# asvspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/asvspoof" #path to a local copy
asvspoof_input_dir
=
"[YOUR_AVSPOOF_WAV_DIRECTORY]"
asvspoof_input_ext
=
".wav"
...
...
bob/pad/voice/config/database/asvspoof_verify.py
deleted
100644 → 0
View file @
adf69607
#!/usr/bin/env python
import
bob.bio.base.database
# directory where the wave files are stored
# asvspoof_input_dir = "/idiap/resource/database/mobio/denoisedAUDIO_16k/" #MOBIO database for UBM training
asvspoof_input_dir
=
"/idiap/resource/database/ASVspoof2015/ASVspoof2015_development"
#Path to ASVspoof in Idiap
# asvspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/asvspoof" #path to a local copy
asvspoof_input_ext
=
".wav"
database_licit
=
bob
.
bio
.
base
.
database
.
ASVspoofBioDatabase
(
protocol
=
'licit'
,
original_directory
=
asvspoof_input_dir
,
original_extension
=
asvspoof_input_ext
,
training_depends_on_protocol
=
True
,
)
database_spoof
=
bob
.
bio
.
base
.
database
.
ASVspoofBioDatabase
(
protocol
=
'spoof'
,
original_directory
=
asvspoof_input_dir
,
original_extension
=
asvspoof_input_ext
,
training_depends_on_protocol
=
True
,
)
bob/pad/voice/config/database/avspoof.py
View file @
6c05dfc4
...
...
@@ -4,8 +4,7 @@ import bob.pad.voice.database
# directory where the wave files are stored
avspoof_input_dir
=
"/idiap/project/lobi/AVSpoof/data/"
# folder at Idiap
# avspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/AVspoof_full/data/" # local copy
avspoof_input_dir
=
"[YOUR_AVSPOOF_WAV_DIRECTORY]"
avspoof_input_ext
=
".wav"
...
...
bob/pad/voice/config/database/avspoof_verify.py
deleted
100644 → 0
View file @
adf69607
#!/usr/bin/env python
import
bob.bio.base.database
# directory where the wave files are stored
avspoof_input_dir
=
"/idiap/project/lobi/AVSpoof/data/"
# avspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/AVspoof_full/data/"
avspoof_input_ext
=
".wav"
database_licit
=
bob
.
bio
.
base
.
database
.
AVspoofBioDatabase
(
protocol
=
'licit'
,
original_directory
=
avspoof_input_dir
,
original_extension
=
avspoof_input_ext
,
training_depends_on_protocol
=
True
,
)
database_spoof
=
bob
.
bio
.
base
.
database
.
AVspoofBioDatabase
(
protocol
=
'spoof'
,
original_directory
=
avspoof_input_dir
,
original_extension
=
avspoof_input_ext
,
training_depends_on_protocol
=
True
,
)
bob/pad/voice/config/database/voicepa.py
0 → 100644
View file @
6c05dfc4
#!/usr/bin/env python
import
bob.pad.voice.database
# directory where the wave files are stored
voicepa_input_dir
=
"[YOUR_VOICEPA_WAV_DIRECTORY]"
voicepa_input_ext
=
".wav"
database
=
bob
.
pad
.
voice
.
database
.
VoicePAPadDatabase
(
protocol
=
'greattest'
,
original_directory
=
voicepa_input_dir
,
original_extension
=
voicepa_input_ext
,
training_depends_on_protocol
=
True
,
)
bob/pad/voice/config/extractor/imfcc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of Inversed MFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
True
,
# Mel-scaling is what make these IMMFC features
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
energy_filter
=
True
,
# The paper uses power of FFT magnitude
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
inverse_filter
=
True
,
# this uses inversed Mel-filter
)
bob/pad/voice/config/extractor/lbp_eurecom.py
View file @
6c05dfc4
import
bob.
pad.voice
.extractor
import
bob.
bio.spear
.extractor
"""
This LBP-based feature extraction is implemented based on the paper
...
...
@@ -13,7 +13,7 @@ of the signal. Histogram size is 58 (as lbp_uniform LBP8,1 has 58 different valu
GLCMs are not computed for top and bottom row of the spectrogram. Hence, the resulted size of the feature vector is 2842.
"""
lfcc16_eurecom
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
lfcc16_eurecom
=
bob
.
bio
.
spear
.
extractor
.
CepstralExtended
(
# the parameters are as specified in the paper by Eurecom
# Federico Alegre, Ravichander Vipperla, Asmaa Amehraye, Nicholas Evans.
# "A new speaker verification spoofing countermeasure based on local binary patterns".
...
...
bob/pad/voice/config/extractor/lfcc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of LFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
False
,
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
energy_filter
=
True
,
# The paper uses power of FFT magnitude
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/extractor/mfcc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of MFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
True
,
# Mel-scaling is what make these MMFC features
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
energy_filter
=
True
,
# The paper uses power of FFT magnitude
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/extractor/rfcc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of RFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
# win_length_ms=32., # 32 ms - this insures the window size is 512 and also FFT size is 512
# win_shift_ms=16., # This is the overlap - half of the window
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
False
,
rect_filter
=
True
,
# these features are computed with a rectangular filter, hence the name RFCC
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
energy_filter
=
True
,
# The paper uses power of FFT magnitude
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/extractor/scfc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
# SCFC features as per paper that compares different features
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of SCFC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
False
,
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
rect_filter
=
True
,
# in the paper, these features are computed with a rectangular filter
scfc_features
=
True
,
log_filter
=
False
,
# it is important to set it to False, so no log is applied to SCFC features
# energy_filter =True, # The paper DOES NOT use power of FFT magnitude for SCMC and SCFC features
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/extractor/scmc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
# SCMC features as per paper that compares different features
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of SCMC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
False
,
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
rect_filter
=
True
,
# in the paper, these features are computed with a rectangular filter
scmc_features
=
True
,
# energy_filter =True, # The paper DOES NOT use power of FFT magnitude for SCMC and SCFC features
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/extractor/ssfc20.py
deleted
100644 → 0
View file @
adf69607
import
bob.pad.voice.extractor
extractor
=
bob
.
pad
.
voice
.
extractor
.
CepstralExtended
(
# SSFC features as per paper that compares different features
pre_emphasis_coef
=
0.97
,
# as per the algorithm implemented in the paper
n_ceps
=
20
,
# number of SSFC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters
=
20
,
# number of filters in the bank is also 20
win_length_ms
=
20.
,
# 20 ms - this is the value in the paper
win_shift_ms
=
10.
,
# This is the overlap - half of the window
f_max
=
8000
,
# this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale
=
False
,
with_delta
=
True
,
# As reported in the paper
with_delta_delta
=
True
,
# As reported in the paper
rect_filter
=
True
,
# in the paper, these features are computed with a rectangular filter
ssfc_features
=
True
,
normalize_spectrum
=
True
,
# normalized spectrum is used to compute SSFC features
energy_filter
=
False
,
# The Spectral Flux does not take the power of FFT itself but of the difference
dct_norm
=
True
,
# The paper uses normed DCT-II variant
delta_win
=
1
,
# the paper computes deltas on window of size 1
)
bob/pad/voice/config/grid/modest.py
deleted
100644 → 0
View file @
adf69607
import
bob.bio.base
# define a queue with modest parameters
grid
=
bob
.
bio
.
base
.
grid
.
Grid
(
number_of_preprocessing_jobs
=
48
,
number_of_extraction_jobs
=
48
,
number_of_projection_jobs
=
48
,
number_of_enrollment_jobs
=
48
,
number_of_scoring_jobs
=
1
,
training_queue
=
'32G'
,
# preprocessing
preprocessing_queue
=
'4G-io-big'
,
# feature extraction
extraction_queue
=
'4G-io-big'
,
# feature projection
projection_queue
=
'4G-io-big'
,
# model enrollment
enrollment_queue
=
'8G'
,
# scoring
scoring_queue
=
'8G-io-big'
)
bob/pad/voice/database/__init__.py
View file @
6c05dfc4
...
...
@@ -3,6 +3,7 @@ from .asvspoof import ASVspoofPadDatabase
from
.avspoof
import
AVspoofPadDatabase
from
.replay
import
ReplayPadDatabase
from
.replaymobile
import
ReplayMobilePadDatabase
from
.voicepa
import
VoicePAPadDatabase
# gets sphinx autodoc done right - don't remove it
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/pad/voice/database/asvspoof.py
View file @
6c05dfc4
...
...
@@ -4,9 +4,8 @@
# Tue 17 May 15:43:22 CEST 2016
"""
ASVspoof database implementation of bob.db.verification.utils.Database interface.
It is an extension of an SQL-based database interface, which directly talks to ASVspoof database, for
verification experiments (good to use in bob.pad.base framework).
This is a high level interface for presentation attack ASVspoof database.
It is an extension of an interface defined inside bob.pad.base PAD framework.
"""
from
bob.pad.voice.database
import
PadVoiceFile
...
...
Prev
1
2
Next
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment