Commit c2b2c55e authored by Yannick DAYER's avatar Yannick DAYER

test pipeline, and delayed audio sample loading.

parent de05cd1d
Pipeline #52701 failed with stage
in 25 minutes and 42 seconds
...@@ -35,4 +35,6 @@ if "protocol" not in locals(): ...@@ -35,4 +35,6 @@ if "protocol" not in locals():
database = VoxforgeBioDatabase( database = VoxforgeBioDatabase(
protocol=protocol, protocol=protocol,
dataset_protocol_path="/idiap/temp/ydayer/bob_data/datasets/database-protocols-voxforge-short.tar.gz",
annotations_path="results~/annotations", # TODO remove (and add annotations to server?)
) )
...@@ -12,6 +12,7 @@ from bob.bio.base.pipelines.vanilla_biometrics.legacy import get_temp_directory ...@@ -12,6 +12,7 @@ from bob.bio.base.pipelines.vanilla_biometrics.legacy import get_temp_directory
from bob.bio.base.transformers import AlgorithmTransformer from bob.bio.base.transformers import AlgorithmTransformer
from bob.bio.gmm.algorithm import GMM from bob.bio.gmm.algorithm import GMM
from bob.bio.spear.extractor import Cepstral from bob.bio.spear.extractor import Cepstral
from bob.pipelines import wrap
from bob.pipelines.sample_loaders import AnnotationsLoader from bob.pipelines.sample_loaders import AnnotationsLoader
temp_dir = get_temp_directory("spear_mfcc60_voxforgegmm") temp_dir = get_temp_directory("spear_mfcc60_voxforgegmm")
...@@ -22,18 +23,21 @@ annotations_loader = AnnotationsLoader( ...@@ -22,18 +23,21 @@ annotations_loader = AnnotationsLoader(
annotation_directory="results~/annotations", annotation_directory="results~/annotations",
) )
extractor_transformer = Cepstral() extractor_transformer = Cepstral()
legacy_algorithm = GMM( legacy_algorithm = GMM(
number_of_gaussians=256, number_of_gaussians=256,
training_threshold=0.0, # Maximum number of iterations as stopping criterion training_threshold=0.0, # Maximum number of iterations as stopping criterion
) )
algorithm_transformer = AlgorithmTransformer( algorithm_transformer = wrap(
legacy_algorithm, projector_file=os.path.join(temp_dir, "projector.hdf5") ["sample"],
AlgorithmTransformer(
legacy_algorithm, projector_file=os.path.join(temp_dir, "projector.hdf5")
),
) )
transformer = Pipeline( transformer = Pipeline(
[ [
("load_annotations", annotations_loader),
("extractor", extractor_transformer), ("extractor", extractor_transformer),
("algorithm_transformer", algorithm_transformer), ("algorithm_transformer", algorithm_transformer),
] ]
......
...@@ -20,7 +20,6 @@ from bob.extension.download import download_and_unzip ...@@ -20,7 +20,6 @@ from bob.extension.download import download_and_unzip
from bob.extension.download import get_file from bob.extension.download import get_file
from bob.extension.download import search_file from bob.extension.download import search_file
from bob.extension.scripts.click_helper import verbosity_option from bob.extension.scripts.click_helper import verbosity_option
from bob.io.audio import reader as AudioReader
from bob.pipelines.sample_loaders import AnnotationsLoader from bob.pipelines.sample_loaders import AnnotationsLoader
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -48,6 +47,11 @@ def get_voxforge_protocol_file(): ...@@ -48,6 +47,11 @@ def get_voxforge_protocol_file():
) )
def path_loader(path):
logger.debug(f"Reading CSV row for {path}")
return path
def VoxforgeBioDatabase( def VoxforgeBioDatabase(
protocol="Default", protocol="Default",
dataset_protocol_path=None, dataset_protocol_path=None,
...@@ -118,8 +122,8 @@ def VoxforgeBioDatabase( ...@@ -118,8 +122,8 @@ def VoxforgeBioDatabase(
# Define the data loading transformers # Define the data loading transformers
# Loads an AudioReader object from a wav file # Loads an AudioReader object from a wav file
reader_loader = CSVToSampleLoaderBiometrics( path_to_data_loader = CSVToSampleLoaderBiometrics(
data_loader=AudioReader, data_loader=path_loader,
dataset_original_directory=data_path, dataset_original_directory=data_path,
extension=".wav", extension=".wav",
) )
...@@ -140,7 +144,7 @@ def VoxforgeBioDatabase( ...@@ -140,7 +144,7 @@ def VoxforgeBioDatabase(
# Build the data loading pipeline # Build the data loading pipeline
sample_loader = Pipeline( sample_loader = Pipeline(
[ [
("db:reader_loader", reader_loader), ("db:reader_loader", path_to_data_loader),
("db:reader_to_sample", reader_to_sample), ("db:reader_to_sample", reader_to_sample),
("db:annotations_loader", annotations_loader), ("db:annotations_loader", annotations_loader),
] ]
......
...@@ -28,12 +28,11 @@ from sklearn.base import TransformerMixin ...@@ -28,12 +28,11 @@ from sklearn.base import TransformerMixin
import bob.ap import bob.ap
from bob.bio.base.extractor import Extractor from bob.pipelines import SampleSet
from bob.pipelines import Sample
from .. import utils from .. import utils
logger = logging.getLogger("bob.bio.spear") logger = logging.getLogger(__name__)
class Cepstral(BaseEstimator, TransformerMixin): class Cepstral(BaseEstimator, TransformerMixin):
...@@ -54,30 +53,12 @@ class Cepstral(BaseEstimator, TransformerMixin): ...@@ -54,30 +53,12 @@ class Cepstral(BaseEstimator, TransformerMixin):
with_delta_delta=True, with_delta_delta=True,
n_ceps=19, # 0-->18 n_ceps=19, # 0-->18
pre_emphasis_coef=0.95, pre_emphasis_coef=0.95,
features_mask=numpy.arange(0, 60), features_mask=None,
# Normalization # Normalization
normalize_flag=True, normalize_flag=True,
**kwargs **kwargs,
): ):
# call base class constructor with its set of parameters super().__init__(**kwargs)
Extractor.__init__(
self,
win_length_ms=win_length_ms,
win_shift_ms=win_shift_ms,
n_filters=n_filters,
dct_norm=dct_norm,
f_min=f_min,
f_max=f_max,
delta_win=delta_win,
mel_scale=mel_scale,
with_energy=with_energy,
with_delta=with_delta,
with_delta_delta=with_delta_delta,
n_ceps=n_ceps,
pre_emphasis_coef=pre_emphasis_coef,
features_mask=features_mask,
normalize_flag=normalize_flag,
)
# copy parameters # copy parameters
self.win_length_ms = win_length_ms self.win_length_ms = win_length_ms
self.win_shift_ms = win_shift_ms self.win_shift_ms = win_shift_ms
...@@ -96,10 +77,6 @@ class Cepstral(BaseEstimator, TransformerMixin): ...@@ -96,10 +77,6 @@ class Cepstral(BaseEstimator, TransformerMixin):
self.normalize_flag = normalize_flag self.normalize_flag = normalize_flag
def normalize_features(self, params): def normalize_features(self, params):
#########################
# Initialisation part #
#########################
normalized_vector = [ normalized_vector = [
[0 for i in range(params.shape[1])] for j in range(params.shape[0]) [0 for i in range(params.shape[1])] for j in range(params.shape[0])
] ]
...@@ -113,12 +90,12 @@ class Cepstral(BaseEstimator, TransformerMixin): ...@@ -113,12 +90,12 @@ class Cepstral(BaseEstimator, TransformerMixin):
data = numpy.array(normalized_vector) data = numpy.array(normalized_vector)
return data return data
def __call__(self, sample: Sample): def transform_one(self, sample, **kwargs):
"""Computes and returns normalized cepstral features for the given input data""" """Computes and returns normalized cepstral features for the given input data"""
logger.debug(f"Cepstral transform of {sample}")
rate = getattr(sample, "sample_rate") rate = getattr(sample, "rate")
wavsample = getattr(sample, "data") wavsample = getattr(sample, "data")[0]
vad_labels = getattr(sample, "annotations") vad_labels = numpy.array(getattr(sample, "annotations"))
# Set parameters # Set parameters
wl = self.win_length_ms wl = self.win_length_ms
...@@ -138,7 +115,10 @@ class Cepstral(BaseEstimator, TransformerMixin): ...@@ -138,7 +115,10 @@ class Cepstral(BaseEstimator, TransformerMixin):
ceps.with_delta_delta = self.with_delta_delta ceps.with_delta_delta = self.with_delta_delta
cepstral_features = ceps(wavsample) cepstral_features = ceps(wavsample)
features_mask = self.features_mask if self.features_mask is None:
features_mask = numpy.arange(0, 60)
else:
features_mask = self.features_mask
if vad_labels is not None: # don't apply VAD if vad_labels is not None: # don't apply VAD
filtered_features = numpy.ndarray( filtered_features = numpy.ndarray(
shape=((vad_labels == 1).sum(), len(features_mask)), dtype=numpy.float64 shape=((vad_labels == 1).sum(), len(features_mask)), dtype=numpy.float64
...@@ -167,16 +147,21 @@ class Cepstral(BaseEstimator, TransformerMixin): ...@@ -167,16 +147,21 @@ class Cepstral(BaseEstimator, TransformerMixin):
else: else:
normalized_features = filtered_features normalized_features = filtered_features
if normalized_features.shape[0] == 0: if normalized_features.shape[0] == 0:
logger.warn("No speech found for this utterance") logger.warning("No speech found for this utterance")
# But do not keep it empty!!! This avoids errors in next steps # But do not keep it empty!!! This avoids errors in next steps
normalized_features = numpy.array([numpy.zeros(len(features_mask))]) normalized_features = numpy.array([numpy.zeros(len(features_mask))])
return normalized_features return normalized_features
def transform(self, samples): def transform(self, samples):
output = [] result = []
for sample in samples: for sample in samples:
output.append(self(sample)) if isinstance(sample, SampleSet):
return output result.append(SampleSet(samples=[], parent=sample))
for s in sample:
result[-1].insert(-1, self.transform_one(s))
else:
result.append(self.transform_one(sample))
return result
def fit(self, X, y=None, **fit_params): def fit(self, X, y=None, **fit_params):
return self return self
......
...@@ -2,49 +2,72 @@ ...@@ -2,49 +2,72 @@
# @author: Yannick Dayer <yannick.dayer@idiap.ch> # @author: Yannick Dayer <yannick.dayer@idiap.ch>
# @date: Thu 01 Jul 2021 10:41:55 UTC+02 # @date: Thu 01 Jul 2021 10:41:55 UTC+02
import logging
from functools import lru_cache
from functools import partial
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin from sklearn.base import TransformerMixin
from bob.io.audio import reader as AudioReader from bob.io.audio import reader as AudioReader
from bob.pipelines import Sample from bob.pipelines import DelayedSample
from bob.pipelines.sample import SampleBatch
logger = logging.getLogger(__name__)
audio_reader_keys = [
"rate",
"number_of_samples",
"number_of_channels",
"bits_per_sample",
"duration",
"encoding",
"type",
"compression_factor",
]
@lru_cache()
def load_metadata_from_file(filename: str):
"""Extracts data and a set of metadata from a reader object."""
logger.debug(f"Reading metadata from audio file {filename}")
reader = AudioReader(filename)
return {key: getattr(reader, key) for key in audio_reader_keys}
def load_data_from_file(filename: str):
logger.debug(f"Reading data from audio file {filename}")
reader = AudioReader(filename)
return reader.load()
def get_audio_attribute(sample, key):
if key == "data":
return load_data_from_file(sample.data)
return load_metadata_from_file(sample.data)[key]
class AudioReaderToSample(BaseEstimator, TransformerMixin): class AudioReaderToSample(BaseEstimator, TransformerMixin):
"""Transforms a Sample's data containing an audio reader to an audio signal. """Transforms a Sample's data containing a path to an audio signal.
The Sample's metadata are updated. The Sample's metadata are updated.
""" """
def __init__(self, **kwargs) -> None: def populate_from_reader(self, sample: DelayedSample) -> DelayedSample:
super().__init__(**kwargs) """Assigns the Sample's data and metadata."""
# dict[Sample attribute name, AudioReader field name] delayed_attr = {
self.metadata_keys = { key: partial(get_audio_attribute, sample, key) for key in audio_reader_keys
"sample_rate": "rate",
"number_of_samples": "number_of_samples",
"number_of_channels": "number_of_channels",
"bits_per_sample": "bits_per_sample",
"audio_duration": "duration",
"audio_encoding": "encoding",
"audio_sample_type": "type",
"audio_compression_factor": "compression_factor",
} }
new_sample = DelayedSample(
def extract_from_reader(self, reader: AudioReader) -> dict: load=partial(get_audio_attribute, sample, "data"),
"""Extracts a set of metadata from a reader object""" parent=sample,
results = {"data": reader.load()} delayed_attributes=delayed_attr,
for metadata, reader_key in self.metadata_keys.items(): )
results[metadata] = getattr(reader, reader_key)
return results
def populate_from_reader(self, sample: Sample) -> Sample:
"""Loads from the AudioReader of the Sample and set its fields accordingly."""
extracted = self.extract_from_reader(sample.data)
kwargs = {e: extracted[e] for e in extracted if e != "data"}
new_sample = Sample(data=extracted["data"], parent=sample, **kwargs)
return new_sample return new_sample
def transform(self, samples: SampleBatch): def transform(self, samples: list):
output = [] output = []
for sample in samples: for sample in samples:
output.append(self.populate_from_reader(sample)) output.append(self.populate_from_reader(sample))
......
...@@ -119,10 +119,10 @@ setup( ...@@ -119,10 +119,10 @@ setup(
], ],
"bob.bio.extractor": [ "bob.bio.extractor": [
"cqcc20e = bob.bio.spear.config.extractor.cqcc20:cqcc20", # Extractor (reads Matlab files) for CQCC features "cqcc20e = bob.bio.spear.config.extractor.cqcc20:cqcc20", # Extractor (reads Matlab files) for CQCC features
"mfcc-60 = bob.bio.spear.config.extractor.mfcc_60:extractor", # 60-dim MFCC features "mfcc-60 = bob.bio.spear.config.extractor.mfcc_60:extractor", # 60-dim MFCC features
"lfcc-60 = bob.bio.spear.config.extractor.lfcc_60:extractor", # 60-dim LFCC features "lfcc-60 = bob.bio.spear.config.extractor.lfcc_60:extractor", # 60-dim LFCC features
"htk = bob.bio.spear.config.extractor.htk:extractor", # HTK features "htk = bob.bio.spear.config.extractor.htk:extractor", # HTK features
"spro = bob.bio.spear.config.extractor.spro:extractor", # SPRO features "spro = bob.bio.spear.config.extractor.spro:extractor", # SPRO features
# 20 SSFCs with delta and delta-delta # 20 SSFCs with delta and delta-delta
"ssfc20 = bob.bio.spear.config.extractor.ssfc20:extractor", "ssfc20 = bob.bio.spear.config.extractor.ssfc20:extractor",
# 20 SCFCs with delta and delta-delta # 20 SCFCs with delta and delta-delta
...@@ -134,30 +134,37 @@ setup( ...@@ -134,30 +134,37 @@ setup(
# 20 MFCC with delta and delta-delta # 20 MFCC with delta and delta-delta
"mfcc20 = bob.bio.spear.config.extractor.mfcc20:extractor", "mfcc20 = bob.bio.spear.config.extractor.mfcc20:extractor",
# 20 IMFCC with delta and delta-delta # 20 IMFCC with delta and delta-delta
"imfcc20 = bob.bio.spear.config.extractor.imfcc20:extractor", "imfcc20 = bob.bio.spear.config.extractor.imfcc20:extractor",
# 20 LFCCs with delta and delta-delta # 20 LFCCs with delta and delta-delta
"lfcc20 = bob.bio.spear.config.extractor.lfcc20:extractor", "lfcc20 = bob.bio.spear.config.extractor.lfcc20:extractor",
], ],
"bob.bio.algorithm": [ "bob.bio.algorithm": [
"gmm-voxforge = bob.bio.spear.config.algorithm.gmm_voxforge:algorithm", # GMM config used for voxforge "gmm-voxforge = bob.bio.spear.config.algorithm.gmm_voxforge:algorithm", # GMM config used for voxforge
"ivec-cosine-voxforge = bob.bio.spear.config.algorithm.ivec_cosine_voxforge:algorithm", # IVec Cosine config used for voxforge "ivec-cosine-voxforge = bob.bio.spear.config.algorithm.ivec_cosine_voxforge:algorithm", # IVec Cosine config used for voxforge
"ivec-plda-voxforge = bob.bio.spear.config.algorithm.ivec_plda_voxforge:algorithm", # IVec PLDA used for voxforge "ivec-plda-voxforge = bob.bio.spear.config.algorithm.ivec_plda_voxforge:algorithm", # IVec PLDA used for voxforge
"isv-voxforge = bob.bio.spear.config.algorithm.isv_voxforge:algorithm", # ISV config used for voxforge "isv-voxforge = bob.bio.spear.config.algorithm.isv_voxforge:algorithm", # ISV config used for voxforge
"jfa-voxforge = bob.bio.spear.config.algorithm.jfa_voxforge:algorithm", # JFA config used for voxforge "jfa-voxforge = bob.bio.spear.config.algorithm.jfa_voxforge:algorithm", # JFA config used for voxforge
"gmm-timit = bob.bio.spear.config.algorithm.gmm_timit:algorithm", # GMM config used for TIMIT "gmm-timit = bob.bio.spear.config.algorithm.gmm_timit:algorithm", # GMM config used for TIMIT
"gmm-banca = bob.bio.spear.config.algorithm.gmm_regular_banca:algorithm", # GMM config used for BANCA "gmm-banca = bob.bio.spear.config.algorithm.gmm_regular_banca:algorithm", # GMM config used for BANCA
"ivec-plda-mobio = bob.bio.spear.config.algorithm.ivec_plda_mobio:algorithm", # IVec PLDA used for MOBIO "ivec-plda-mobio = bob.bio.spear.config.algorithm.ivec_plda_mobio:algorithm", # IVec PLDA used for MOBIO
"isv-mobio = bob.bio.spear.config.algorithm.isv_mobio:algorithm", # ISV used for MOBIO "isv-mobio = bob.bio.spear.config.algorithm.isv_mobio:algorithm", # ISV used for MOBIO
"ivec-avspoof = bob.bio.spear.config.algorithm.ivec_avspoof:algorithm", # IVec PLDA used for AVspoof "ivec-avspoof = bob.bio.spear.config.algorithm.ivec_avspoof:algorithm", # IVec PLDA used for AVspoof
# I-Vector config used for AVspoof # I-Vector config used for AVspoof
"isv-avspoof = bob.bio.spear.config.algorithm.isv_avspoof:algorithm", # ISV config used for AVspoof "isv-avspoof = bob.bio.spear.config.algorithm.isv_avspoof:algorithm", # ISV config used for AVspoof
# GMM training algorithm as per the paper "A Comparison of Features for Synthetic Speech Detection" # GMM training algorithm as per the paper "A Comparison of Features for Synthetic Speech Detection"
"gmm-tomi = bob.bio.spear.config.algorithm.gmm_tomi:algorithm", "gmm-tomi = bob.bio.spear.config.algorithm.gmm_tomi:algorithm",
# the same as above but with smaller thresholds # the same as above but with smaller thresholds
"gmm-tomi-scfc = bob.bio.spear.config.algorithm.gmm_tomi_scfc:algorithm", "gmm-tomi-scfc = bob.bio.spear.config.algorithm.gmm_tomi_scfc:algorithm",
],
"bob.bio.pipeline": [
"test_pipeline = bob.bio.spear.config.pipeline.mfcc60_voxforgegmm:pipeline"
],
"bob.bio.config": [
"voxforge = bob.bio.spear.config.database.voxforge:database"
"test_pipeline = bob.bio.spear.config.pipeline.mfcc60_voxforgegmm:pipeline"
], ],
"bob.bio.grid": [ "bob.bio.grid": [
"modest = bob.bio.spear.config.grid.modest:grid", "modest = bob.bio.spear.config.grid.modest:grid",
], ],
"bob.db.cli": [ "bob.db.cli": [
"download-voxforge = bob.bio.spear.database.voxforge:download_voxforge", "download-voxforge = bob.bio.spear.database.voxforge:download_voxforge",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment