Commit 6c05dfc4 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV
Browse files

Added support voicePA and moved extractors to bob.bio.spear

parent adf69607
Pipeline #4713 passed with stages
in 16 minutes and 49 seconds
#!/usr/bin/env python
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.GMMRegular(
number_of_gaussians = 512,
kmeans_training_iterations = 10, # Maximum number of iterations for K-Means
gmm_training_iterations = 10, # Maximum number of iterations for ML GMM Training
)
#!/usr/bin/env python
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.GMMRegular(
number_of_gaussians = 512,
kmeans_training_iterations = 10, # Maximum number of iterations for K-Means
gmm_training_iterations = 10, # Maximum number of iterations for ML GMM Training
training_threshold = 5e-5, # Threshold to end the ML training, make it smaller
variance_threshold = 5e-7, # Minimum value that a variance can reach, make it smaller than default
)
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.ISV(
# ISV parameters
subspace_dimension_of_u = 50,
# GMM parameters
number_of_gaussians = 256,
training_threshold = 0.0,
)
import bob.bio.gmm
algorithm = bob.bio.gmm.algorithm.IVector(
# IVector parameters
subspace_dimension_of_t = 100,
update_sigma = True,
tv_training_iterations = 25, # Number of EM iterations for the TV training
# GMM parameters
number_of_gaussians = 256,
training_threshold = 0.0,
use_lda = True,
use_wccn = True,
use_plda = True,
lda_dim = 50,
plda_dim_F = 50,
plda_dim_G = 50,
plda_training_iterations = 200,
)
......@@ -4,8 +4,7 @@ import bob.pad.voice.database
# directory where the wave files are stored
asvspoof_input_dir = "/idiap/resource/database/ASVspoof2015/ASVspoof2015_development" #Path to ASVspoof in Idiap
# asvspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/asvspoof" #path to a local copy
asvspoof_input_dir = "[YOUR_AVSPOOF_WAV_DIRECTORY]"
asvspoof_input_ext = ".wav"
......
#!/usr/bin/env python
import bob.bio.base.database
# directory where the wave files are stored
# asvspoof_input_dir = "/idiap/resource/database/mobio/denoisedAUDIO_16k/" #MOBIO database for UBM training
asvspoof_input_dir = "/idiap/resource/database/ASVspoof2015/ASVspoof2015_development" #Path to ASVspoof in Idiap
# asvspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/asvspoof" #path to a local copy
asvspoof_input_ext = ".wav"
database_licit = bob.bio.base.database.ASVspoofBioDatabase(
protocol='licit',
original_directory=asvspoof_input_dir,
original_extension=asvspoof_input_ext,
training_depends_on_protocol=True,
)
database_spoof = bob.bio.base.database.ASVspoofBioDatabase(
protocol='spoof',
original_directory=asvspoof_input_dir,
original_extension=asvspoof_input_ext,
training_depends_on_protocol=True,
)
......@@ -4,8 +4,7 @@ import bob.pad.voice.database
# directory where the wave files are stored
avspoof_input_dir = "/idiap/project/lobi/AVSpoof/data/" # folder at Idiap
# avspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/AVspoof_full/data/" # local copy
avspoof_input_dir = "[YOUR_AVSPOOF_WAV_DIRECTORY]"
avspoof_input_ext = ".wav"
......
#!/usr/bin/env python
import bob.bio.base.database
# directory where the wave files are stored
avspoof_input_dir = "/idiap/project/lobi/AVSpoof/data/"
# avspoof_input_dir = "/Users/pavelkor/Documents/pav/idiap/data/AVspoof_full/data/"
avspoof_input_ext = ".wav"
database_licit = bob.bio.base.database.AVspoofBioDatabase(
protocol='licit',
original_directory=avspoof_input_dir,
original_extension=avspoof_input_ext,
training_depends_on_protocol=True,
)
database_spoof = bob.bio.base.database.AVspoofBioDatabase(
protocol='spoof',
original_directory=avspoof_input_dir,
original_extension=avspoof_input_ext,
training_depends_on_protocol=True,
)
#!/usr/bin/env python
import bob.pad.voice.database
# directory where the wave files are stored
voicepa_input_dir = "[YOUR_VOICEPA_WAV_DIRECTORY]"
voicepa_input_ext = ".wav"
database = bob.pad.voice.database.VoicePAPadDatabase(
protocol='greattest',
original_directory=voicepa_input_dir,
original_extension=voicepa_input_ext,
training_depends_on_protocol=True,
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of Inversed MFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=True, # Mel-scaling is what make these IMMFC features
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
energy_filter =True, # The paper uses power of FFT magnitude
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
inverse_filter=True, # this uses inversed Mel-filter
)
import bob.pad.voice.extractor
import bob.bio.spear.extractor
"""
This LBP-based feature extraction is implemented based on the paper
......@@ -13,7 +13,7 @@ of the signal. Histogram size is 58 (as lbp_uniform LBP8,1 has 58 different valu
GLCMs are not computed for top and bottom row of the spectrogram. Hence, the resulted size of the feature vector is 2842.
"""
lfcc16_eurecom = bob.pad.voice.extractor.CepstralExtended(
lfcc16_eurecom = bob.bio.spear.extractor.CepstralExtended(
# the parameters are as specified in the paper by Eurecom
# Federico Alegre, Ravichander Vipperla, Asmaa Amehraye, Nicholas Evans.
# "A new speaker verification spoofing countermeasure based on local binary patterns".
......
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of LFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=False,
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
energy_filter =True, # The paper uses power of FFT magnitude
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of MFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=True, # Mel-scaling is what make these MMFC features
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
energy_filter =True, # The paper uses power of FFT magnitude
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters emulate the ones in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of RFCC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
# win_length_ms=32., # 32 ms - this insures the window size is 512 and also FFT size is 512
# win_shift_ms=16., # This is the overlap - half of the window
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=False,
rect_filter=True, # these features are computed with a rectangular filter, hence the name RFCC
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
energy_filter =True, # The paper uses power of FFT magnitude
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
# SCFC features as per paper that compares different features
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of SCFC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=False,
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
rect_filter=True, # in the paper, these features are computed with a rectangular filter
scfc_features=True,
log_filter=False, # it is important to set it to False, so no log is applied to SCFC features
# energy_filter =True, # The paper DOES NOT use power of FFT magnitude for SCMC and SCFC features
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# the parameters are as specified in the paper "A Comparison of Features for Synthetic Speech Detection" by
# Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
# SCMC features as per paper that compares different features
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of SCMC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=False,
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
rect_filter=True, # in the paper, these features are computed with a rectangular filter
scmc_features=True,
# energy_filter =True, # The paper DOES NOT use power of FFT magnitude for SCMC and SCFC features
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.pad.voice.extractor
extractor = bob.pad.voice.extractor.CepstralExtended(
# SSFC features as per paper that compares different features
pre_emphasis_coef=0.97, # as per the algorithm implemented in the paper
n_ceps=20, # number of SSFC coefficients should be 20 - this is the number we cut off from whole Cepstrum
n_filters=20, # number of filters in the bank is also 20
win_length_ms=20., # 20 ms - this is the value in the paper
win_shift_ms=10., # This is the overlap - half of the window
f_max=8000, # this number insures we take half of the frequencies after FFT - so we retain only 257 values for 512 window
mel_scale=False,
with_delta=True, # As reported in the paper
with_delta_delta=True, # As reported in the paper
rect_filter=True, # in the paper, these features are computed with a rectangular filter
ssfc_features=True,
normalize_spectrum=True, # normalized spectrum is used to compute SSFC features
energy_filter=False, # The Spectral Flux does not take the power of FFT itself but of the difference
dct_norm = True, # The paper uses normed DCT-II variant
delta_win=1, # the paper computes deltas on window of size 1
)
import bob.bio.base
# define a queue with modest parameters
grid = bob.bio.base.grid.Grid(
number_of_preprocessing_jobs=48,
number_of_extraction_jobs=48,
number_of_projection_jobs=48,
number_of_enrollment_jobs=48,
number_of_scoring_jobs=1,
training_queue = '32G',
# preprocessing
preprocessing_queue = '4G-io-big',
# feature extraction
extraction_queue = '4G-io-big',
# feature projection
projection_queue = '4G-io-big',
# model enrollment
enrollment_queue = '8G',
# scoring
scoring_queue = '8G-io-big'
)
......@@ -3,6 +3,7 @@ from .asvspoof import ASVspoofPadDatabase
from .avspoof import AVspoofPadDatabase
from .replay import ReplayPadDatabase
from .replaymobile import ReplayMobilePadDatabase
from .voicepa import VoicePAPadDatabase
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -4,9 +4,8 @@
# Tue 17 May 15:43:22 CEST 2016
"""
ASVspoof database implementation of bob.db.verification.utils.Database interface.
It is an extension of an SQL-based database interface, which directly talks to ASVspoof database, for
verification experiments (good to use in bob.pad.base framework).
This is a high level interface for presentation attack ASVspoof database.
It is an extension of an interface defined inside bob.pad.base PAD framework.
"""
from bob.pad.voice.database import PadVoiceFile
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment