Commit 386d8494 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV

Added RFCC, IMFCC, SCFC, SCMC, SSFC features, revamped tests

parent 750df966
Pipeline #3900 failed with stages
in 43 minutes and 11 seconds
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @author Pavel Korshunov <Pavel.Korshunov@idiap.ch>
* @date Thu 6 Feb 09:00:05 2014
*
* @brief Bindings to the base class bob::ap::Ceps
......@@ -13,7 +14,7 @@
PyDoc_STRVAR(s_ceps_str, BOB_EXT_MODULE_PREFIX ".Ceps");
PyDoc_STRVAR(s_ceps_doc,
"Ceps(sampling_frequency, [win_length_ms=20., [win_shift_ms=10., [n_filters=24, [n_ceps=19, [f_min=0., [f_max=4000., [delta_win=2, [pre_emphasis_coeff=0.95, [mel_scale=True, [dct_norm=True]]]]]]]]]]) -> new Ceps\n\
"Ceps(sampling_frequency, [win_length_ms=20., [win_shift_ms=10., [normalize_mean=True, [n_filters=24, [n_ceps=19, [f_min=0., [f_max=4000., [delta_win=2, [pre_emphasis_coeff=0.95, [mel_scale=True, [dct_norm=True, [ssfc_features=False, [scfc_features=False, [scmc_features=False, [rect_filter=False, [inverse_filter=False, [normalize_spectrum=False]]]]]]]]]]]]]]]]]) -> new Ceps\n\
Ceps(other) -> new Ceps\n\
\n\
Objects of this class, after configuration, can extract the\n\
......@@ -30,6 +31,11 @@ win_length_ms\n\
win_shift_ms\n\
[float] the window shift in miliseconds\n\
\n\
normalize_mean\n\
[bool] Tells whether frame should be normalized \n\
by subtracting mean (True) or dividing by max_range (False)\n\
``True`` is the default value.\n\
\n\
n_filters\n\
[int] the number of filter bands\n\
\n\
......@@ -57,6 +63,34 @@ mel_scale\n\
dct_norm\n\
[bool] A factor by which the cepstral coefficients are\n\
multiplied\n\
ssfc_features\n\
[bool] Set to true if you want to compute\n\
Subband Spectral Flux Coefficients (SSFC), which measures\n\
the frame-by-frame change in the power spectrum\n\
\n\
scfc_features\n\
[bool] Set to true if you want to compute\n\
Spectral Centroid Frequency Coefficients (SCFC), which\n\
capture detailed information about subbands similar to formant frequencies\n\
\n\
scmc_features\n\
[bool] Set to true if you want to compute\n\
Spectral Centroid Magnitude Coefficients (SCMC), which\n\
capture detailed information about subbands similar to SCFC features\n\
\n\
rect_filter\n\
[bool] tells whether to apply the filter in the\n\
inversed order, i.e., from high frequencies to low\n\
(set it to ``True''). ``False`` is the default value.\n\
\n\
inverse_filter\n\
[bool] tells whether cepstral features are extracted\n\
using a rectungular filter (set it to ``True``), i.e., RFCC features,\n\
instead of the default filter (the default value is ``False``)\n\
\n\
normalize_spectrum\n\
[bool] Tells whether to normalize the power spectrum of the signal.\n\
The default value is ``False``.\n\
\n\
other\n\
[Ceps] an object of which is or inherits from ``Ceps``\n\
......@@ -124,6 +158,7 @@ static int PyBobApCeps_InitParameters
"sampling_frequency",
"win_length_ms",
"win_shift_ms",
"normalize_mean",
"n_filters",
"n_ceps",
"f_min",
......@@ -131,34 +166,56 @@ static int PyBobApCeps_InitParameters
"delta_win",
"pre_emphasis_coeff",
"mel_scale",
"rect_filter",
"inverse_filter",
"normalize_spectrum",
"dct_norm",
"ssfc_features",
"scfc_features",
"scmc_features",
0};
static char** kwlist = const_cast<char**>(const_kwlist);
double sampling_frequency = 0.;
double win_length_ms = 20.;
double win_shift_ms = 10.;
PyObject* normalize_mean = Py_True;
Py_ssize_t n_filters = 24;
Py_ssize_t n_ceps = 19;
double f_min = 0.;
double f_max = 4000.;
double f_max = 8000.;
Py_ssize_t delta_win = 2;
double pre_emphasis_coeff = 0.95;
PyObject* mel_scale = Py_True;
PyObject* dct_norm = Py_True;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|ddnnddndOO", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms, &n_filters,
PyObject* rect_filter = Py_False;
PyObject* inverse_filter = Py_False;
PyObject* normalize_spectrum = Py_False;
PyObject* dct_norm = Py_False;
PyObject* ssfc_features = Py_False;
PyObject* scfc_features = Py_False;
PyObject* scmc_features = Py_False;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|ddOnnddndOOOOOOOO", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms, &normalize_mean, &n_filters,
&n_ceps, &f_min, &f_max, &delta_win, &pre_emphasis_coeff,
&mel_scale, &dct_norm))
&mel_scale, &rect_filter, &inverse_filter, &normalize_spectrum, &dct_norm,
&ssfc_features, &scfc_features, &scmc_features))
return -1;
bool normalize_mean_ = PyObject_IsTrue(normalize_mean);
bool mel_scale_ = PyObject_IsTrue(mel_scale);
bool rect_filter_ = PyObject_IsTrue(rect_filter);
bool inverse_filter_ = PyObject_IsTrue(inverse_filter);
bool normalize_spectrum_ = PyObject_IsTrue(normalize_spectrum);
bool dct_norm_ = PyObject_IsTrue(dct_norm);
bool ssfc_features_ = PyObject_IsTrue(ssfc_features);
bool scfc_features_ = PyObject_IsTrue(scfc_features);
bool scmc_features_ = PyObject_IsTrue(scmc_features);
try {
self->cxx = new bob::ap::Ceps(sampling_frequency,
win_length_ms, win_shift_ms, n_filters, n_ceps, f_min, f_max,
delta_win, pre_emphasis_coeff, mel_scale_, dct_norm_);
win_length_ms, win_shift_ms, normalize_mean_, n_filters, n_ceps, f_min, f_max,
delta_win, pre_emphasis_coeff, mel_scale_, rect_filter_, inverse_filter_, normalize_spectrum_,
dct_norm_, ssfc_features_, scfc_features_, scmc_features_);
if (!self->cxx) {
PyErr_Format(PyExc_MemoryError, "cannot create new object of type `%s' - no more memory", Py_TYPE(self)->tp_name);
return -1;
......@@ -229,7 +286,7 @@ static PyObject* PyBobApCeps_Repr(PyBobApCepsObject* self) {
Py_ssize_t n_filters = self->cxx->getNFilters();
Py_ssize_t n_ceps = self->cxx->getNCeps();
Py_ssize_t delta_win = self->cxx->getDeltaWin();
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f, n_filters=%" PY_FORMAT_SIZE_T "d, n_ceps=%" PY_FORMAT_SIZE_T "d, f_min=%f, f_max=%f, delta_win=%" PY_FORMAT_SIZE_T "d, pre_emphasis_coeff=%f, mel_scale=%s, dct_norm=%s)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs(), n_filters, n_ceps, self->cxx->getFMin(), self->cxx->getFMax(), delta_win, self->cxx->getPreEmphasisCoeff(), self->cxx->getMelScale()?"True":"False", self->cxx->getDctNorm()?"True":"False");
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f, normalize_mean=%s, n_filters=%" PY_FORMAT_SIZE_T "d, n_ceps=%" PY_FORMAT_SIZE_T "d, f_min=%f, f_max=%f, delta_win=%" PY_FORMAT_SIZE_T "d, pre_emphasis_coeff=%f, mel_scale=%s, rect_filter=%s, inverse_filter=%s, normalize_spectrum=%s, dct_norm=%s, ssfc_features=%s, scfc_features=%s, scmc_features=%s)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs(), self->cxx->getNormalizeMean()?"True":"False", n_filters, n_ceps, self->cxx->getFMin(), self->cxx->getFMax(), delta_win, self->cxx->getPreEmphasisCoeff(), self->cxx->getMelScale()?"True":"False", self->cxx->getRectangularFilter()?"True":"False", self->cxx->getInverseFilter()?"True":"False", self->cxx->getNormalizeSpectrum()?"True":"False", self->cxx->getDctNorm()?"True":"False", self->cxx->getSSFCFeatures()?"True":"False", self->cxx->getSCFCFeatures()?"True":"False", self->cxx->getSCMCFeatures()?"True":"False");
return
# if PY_VERSION_HEX >= 0x03000000
PyUnicode_FromStringAndSize
......
......@@ -2,6 +2,7 @@
* @date Wed Jan 11:09:30 2013 +0200
* @author Elie Khoury <Elie.Khoury@idiap.ch>
* @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
* @author Pavel Korshunov <Pavel.Korshunov@idiap.ch>
*
* @brief Implement Linear and Mel Frequency Cepstral Coefficients
* functions (MFCC and LFCC)
......@@ -14,11 +15,15 @@
bob::ap::Ceps::Ceps(const double sampling_frequency,
const double win_length_ms, const double win_shift_ms,
const bool normalize_mean,
const size_t n_filters, const size_t n_ceps, const double f_min,
const double f_max, const size_t delta_win, const double pre_emphasis_coeff,
const bool mel_scale, const bool dct_norm):
bob::ap::Spectrogram(sampling_frequency, win_length_ms, win_shift_ms,
n_filters, f_min, f_max, pre_emphasis_coeff, mel_scale),
const bool mel_scale, const bool rect_filter, const bool inverse_filter, const bool normalize_spectrum,
const bool dct_norm, const bool ssfc_features,
const bool scfc_features, const bool scmc_features):
bob::ap::Spectrogram(sampling_frequency, win_length_ms, win_shift_ms, normalize_mean,
n_filters, f_min, f_max, pre_emphasis_coeff, mel_scale, rect_filter, inverse_filter,
normalize_spectrum, ssfc_features, scfc_features, scmc_features),
m_n_ceps(n_ceps), m_delta_win(delta_win), m_dct_norm(dct_norm),
m_with_energy(false), m_with_delta(false), m_with_delta_delta(false)
{
......@@ -95,12 +100,21 @@ void bob::ap::Ceps::setDctNorm(bool dct_norm)
void bob::ap::Ceps::initCacheDctKernel()
{
// Dct Kernel initialization
// Dct Kernel initialization, we implement DCT-II variant here
m_dct_kernel.resize(m_n_ceps,m_n_filters);
blitz::firstIndex i;
blitz::secondIndex j;
// If normalize, use the Matlab-based implementation
double dct_coeff = m_dct_norm ? (double)sqrt(2./(double)(m_n_filters)) : 1.;
m_dct_kernel = dct_coeff * blitz::cos(M_PI*(i+1)*(j+0.5)/(double)(m_n_filters));
m_dct_kernel = dct_coeff * blitz::cos(M_PI*(i)*(j+0.5)/(double)(m_n_filters));
// Finish normalization: multiple first row by sqrt(0.5), as per Matlab implementation of DCT-II
if (m_dct_norm) {
blitz::Array<double,1> firstIndex_coeff (m_n_ceps);
firstIndex_coeff = blitz::where(i == 0, sqrt(0.5), 1.); //first element is sqrt(0.5), the rest are 1.
m_dct_kernel = firstIndex_coeff(i) * m_dct_kernel(i,j); // elementwise multiplication
}
}
......@@ -112,6 +126,10 @@ blitz::TinyVector<int,2> bob::ap::Ceps::getShape(const size_t input_size) const
// 1. Number of frames
res(0) = 1+((input_size-m_win_length)/m_win_shift);
//reduce the number of frames by 1 for SSFC features, so the resulted matrix is of correct size
if (m_ssfc_features)
res(0) -= 1;
// 2. Dimension of the feature vector
int dim0=m_n_ceps;
if (m_with_energy) dim0 += 1;
......@@ -134,33 +152,80 @@ blitz::TinyVector<int,2> bob::ap::Ceps::getShape(const blitz::Array<double,1>& i
void bob::ap::Ceps::operator()(const blitz::Array<double,1>& input,
blitz::Array<double,2>& ceps_matrix)
{
// Get expected dimensionality of output array
blitz::TinyVector<int,2> feature_shape = bob::ap::Ceps::getShape(input);
// Check dimensionality of output array
bob::core::array::assertSameShape(ceps_matrix, feature_shape);
int n_frames=feature_shape(0);
int shift_frame=0;
double last_frame_elem=0;
// Create the holder for the previous frame and make sure it's the same as the current frame
// Used by SSFC features computation
blitz::Array<double,1> _prev_frame_d;
_prev_frame_d.resize(m_cache_frame_d.shape());
// Create the temporary holder for SSFC features computation
blitz::Array<double,1> _temp_frame_d;
_temp_frame_d.resize(m_cache_frame_d.shape());
if (m_ssfc_features) {
//we are going to always process the next frame within the loop
shift_frame = 1;
// Init the first frame to the input
extractNormalizeFrame(input, 0, _prev_frame_d);
// Apply pre-emphasis
pre_emphasis(_prev_frame_d, last_frame_elem);
// Apply the Hamming window
hammingWindow(_prev_frame_d);
// Take the power spectrum of the first part of the FFT
powerSpectrumFFT(_prev_frame_d);
}
blitz::Range r1(0,m_n_ceps-1);
for (int i=0; i<n_frames; ++i)
{
// Set padded frame to zero
extractNormalizeFrame(input, i, m_cache_frame_d);
// Init the current frame from the input, we process (i+1)th frame for SSFC features
extractNormalizeFrame(input, i+shift_frame, m_cache_frame_d);
// Update output with energy if required
if (m_with_energy)
ceps_matrix(i,(int)m_n_ceps) = logEnergy(m_cache_frame_d);
// Apply pre-emphasis
pre_emphasis(m_cache_frame_d);
pre_emphasis(m_cache_frame_d, last_frame_elem);
// Apply the Hamming window
hammingWindow(m_cache_frame_d);
// Take the power spectrum of the first part of the FFT
// Note that after this call, we only operate on the first half of m_cache_frame_d array. The second half is ignored.
// powerSpectrumFFT changes first half+1 elements of m_cache_frame_d array
powerSpectrumFFT(m_cache_frame_d);
// Filter with the triangular filter bank (either in linear or Mel domain)
if (m_ssfc_features)
{
// retrieve the previous frame into our temp
_temp_frame_d = _prev_frame_d;
// remember the current frame for the next round, before we change current frame
_prev_frame_d = m_cache_frame_d;
// Computation of SSFC features:
// We take the previous frame and find the difference between values of current and previous frames
m_cache_frame_d -= _temp_frame_d;
// We compute norm2 for the difference as per SSFC features
m_cache_frame_d = blitz::pow2(m_cache_frame_d);
// Then, we can apply the filter and DCT later on
}
// Filter with triangular or rectangular filter bank (either in linear or Mel domain)
filterBank(m_cache_frame_d);
// Apply DCT kernel and update the output
blitz::Array<double,1> ceps_matrix_row(ceps_matrix(i,r1));
applyDct(ceps_matrix_row);
if (m_scfc_features)
// do not apply DCT on SCFC features
ceps_matrix_row = m_cache_filters(r1);
else
applyDct(ceps_matrix_row);
}
//compute the center of the cut-off frequencies
......@@ -226,8 +291,9 @@ void bob::ap::Ceps::addDerivative(const blitz::Array<double,2>& input, blitz::Ar
}
}
// Sum of the integer squared from 1 to delta_win
const double sum = m_delta_win*(m_delta_win+1)*(2*m_delta_win+1)/3;
output /= sum;
// pavel - remove division for the sake of compitability with Matlab code of RFFC features comparison paper
//const double sum = m_delta_win*(m_delta_win+1)*(2*m_delta_win+1)/3;
//output /= sum;
}
/*
......
......@@ -10,8 +10,8 @@
#include <bob.core/assert.h>
bob::ap::Energy::Energy(const double sampling_frequency, const double win_length_ms,
const double win_shift_ms):
bob::ap::FrameExtractor(sampling_frequency, win_length_ms, win_shift_ms),
const double win_shift_ms, const bool normalize_mean):
bob::ap::FrameExtractor(sampling_frequency, win_length_ms, win_shift_ms, normalize_mean),
m_energy_floor(1.)
{
// Initializes logarithm of flooring values
......
......@@ -2,7 +2,8 @@
* @date Wed Jan 11:09:30 2013 +0200
* @author Elie Khoury <Elie.Khoury@idiap.ch>
* @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
*
* @author Pavel Korshunov <Pavel.Korshunov@idiap.ch>
*
* Copyright (C) Idiap Research Institute, Martigny, Switzerland
*/
......@@ -12,23 +13,27 @@
#include <bob.core/check.h>
bob::ap::FrameExtractor::FrameExtractor(const double sampling_frequency,
const double win_length_ms, const double win_shift_ms):
const double win_length_ms, const double win_shift_ms,
const bool normalize_mean):
m_sampling_frequency(sampling_frequency), m_win_length_ms(win_length_ms),
m_win_shift_ms(win_shift_ms)
m_win_shift_ms(win_shift_ms), m_normalize_mean(normalize_mean)
{
// Initialization
initWinLength();
initWinShift();
initMaxRange();
}
bob::ap::FrameExtractor::FrameExtractor(const FrameExtractor& other):
m_sampling_frequency(other.m_sampling_frequency),
m_win_length_ms(other.m_win_length_ms),
m_win_shift_ms(other.m_win_shift_ms)
m_win_shift_ms(other.m_win_shift_ms),
m_normalize_mean(other.m_normalize_mean)
{
// Initialization
initWinLength();
initWinShift();
initMaxRange();
}
bob::ap::FrameExtractor::~FrameExtractor()
......@@ -42,10 +47,12 @@ bob::ap::FrameExtractor& bob::ap::FrameExtractor::operator=(const bob::ap::Frame
m_sampling_frequency = other.m_sampling_frequency;
m_win_length_ms = other.m_win_length_ms;
m_win_shift_ms = other.m_win_shift_ms;
m_normalize_mean = other.m_normalize_mean;
// Initialization
initWinLength();
initWinShift();
initMaxRange();
}
return *this;
}
......@@ -67,6 +74,7 @@ void bob::ap::FrameExtractor::setSamplingFrequency(const double sampling_frequen
m_sampling_frequency = sampling_frequency;
initWinLength();
initWinShift();
initMaxRange();
}
void bob::ap::FrameExtractor::setWinLengthMs(const double win_length_ms)
......@@ -87,6 +95,7 @@ void bob::ap::FrameExtractor::initWinLength()
if (m_win_length == 0)
throw std::runtime_error("The length of the window is 0. You should use a larger sampling rate or window length in miliseconds");
initWinSize();
}
void bob::ap::FrameExtractor::initWinShift()
......@@ -100,6 +109,12 @@ void bob::ap::FrameExtractor::initWinSize()
m_cache_frame_d.resize(m_win_size);
}
void bob::ap::FrameExtractor::initMaxRange()
{
// update m_max_range, since m_sampling_frequency may have changed or set inside an Init()
m_max_range = pow(2.0, m_sampling_frequency/1000)/2.0 - 0.5;
}
void bob::ap::FrameExtractor::extractNormalizeFrame(const blitz::Array<double,1>& input,
const size_t i, blitz::Array<double,1>& frame_d) const
{
......@@ -109,8 +124,19 @@ void bob::ap::FrameExtractor::extractNormalizeFrame(const blitz::Array<double,1>
blitz::Range rf(0,(int)m_win_length-1);
blitz::Range ri(i*(int)m_win_shift,i*(int)m_win_shift+(int)m_win_length-1);
frame_d(rf) = input(ri);
// Subtract mean value
frame_d -= blitz::mean(frame_d);
if (m_normalize_mean) { // added by Pavel Korshunov
// We normalize by subtracting mean value
frame_d(rf) -= blitz::mean(frame_d);
}
else {
//Otherwise, we normalize by dividing by maximum possible range, which is set in initWinLength()
//This method of normalization is used in the following paper from Interspeech 2015:
//"A Comparison of Features for Synthetic Speech Detection" by Md Sahidullah, Tomi Kinnunen, Cemal Hanilci
if (m_max_range == 0)
throw std::runtime_error("FrameExtractor: the maximum range in frame is 0. Please make sure you provide non-zero sampling frequency.");
frame_d /= m_max_range;
}
}
......
This diff is collapsed.
......@@ -13,7 +13,7 @@
PyDoc_STRVAR(s_energy_str, BOB_EXT_MODULE_PREFIX ".Energy");
PyDoc_STRVAR(s_energy_doc,
"Energy(sampling_frequency, [win_length_ms=20., [win_shift_ms=10.]]) -> new Energy\n\
"Energy(sampling_frequency, [win_length_ms=20., [win_shift_ms=10., [normalize_mean=True]]]) -> new Energy\n\
Energy(other) -> new Energy\n\
\n\
Objects of this class, after configuration, can extract the energy\n\
......@@ -30,6 +30,11 @@ win_length_ms\n\
win_shift_ms\n\
[float] the window shift in miliseconds\n\
\n\
normalize_mean\n\
[bool] Tells whether frame should be normalized \n\
by subtracting mean (True) or dividing by max_range (False)\n\
``True`` is the default value.\n\
\n\
other\n\
[Energy] an object of which is or inherits from ``Energy``\n\
that will be deep-copied into a new instance.\n\
......@@ -92,17 +97,22 @@ static int PyBobApEnergy_InitParameters
"sampling_frequency",
"win_length_ms",
"win_shift_ms",
"normalize_mean",
0};
static char** kwlist = const_cast<char**>(const_kwlist);
double sampling_frequency = 0.;
double win_length_ms = 20.;
double win_shift_ms = 10.;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|dd", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms)) return -1;
PyObject* normalize_mean = Py_True;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|ddO", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms, &normalize_mean)) return -1;
bool normalize_mean_ = PyObject_IsTrue(normalize_mean);
try {
self->cxx = new bob::ap::Energy(sampling_frequency, win_length_ms, win_shift_ms);
self->cxx = new bob::ap::Energy(sampling_frequency,
win_length_ms, win_shift_ms, normalize_mean_);
if (!self->cxx) {
PyErr_Format(PyExc_MemoryError, "cannot create new object of type `%s' - no more memory", Py_TYPE(self)->tp_name);
return -1;
......@@ -168,7 +178,7 @@ static int PyBobApEnergy_Init(PyBobApEnergyObject* self,
static PyObject* PyBobApEnergy_Repr(PyBobApEnergyObject* self) {
static const int MAXSIZE = 256;
char buffer[MAXSIZE];
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs());
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f, normalize_mean=%s)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs(), self->cxx->getNormalizeMean()?"True":"False");
return
# if PY_VERSION_HEX >= 0x03000000
PyUnicode_FromStringAndSize
......
......@@ -13,7 +13,7 @@
PyDoc_STRVAR(s_frame_extractor_str, BOB_EXT_MODULE_PREFIX ".FrameExtractor");
PyDoc_STRVAR(s_frame_extractor_doc,
"FrameExtractor(sampling_frequency, [win_length_ms=20., [win_shift_ms=10.]]) -> new FrameExtractor\n\
"FrameExtractor(sampling_frequency, [win_length_ms=20., [win_shift_ms=10., [normalize_mean=True]]]) -> new FrameExtractor\n\
FrameExtractor(other) -> new FrameExtractor\n\
\n\
This class is a base type for classes that perform audio\n\
......@@ -35,6 +35,11 @@ win_length_ms\n\
win_shift_ms\n\
[float] the window shift in miliseconds\n\
\n\
normalize_mean\n\
[bool] Tells whether frame should be normalized \n\
by subtracting mean (True) or dividing by max_range (False)\n\
``True`` is the default value.\n\
\n\
other\n\
[FrameExtractor] an object of which is or inherits from a FrameExtractor\n\
that will be deep-copied into a new instance.\n\
......@@ -95,17 +100,22 @@ static int PyBobApFrameExtractor_InitParameters
"sampling_frequency",
"win_length_ms",
"win_shift_ms",
"normalize_mean",
0};
static char** kwlist = const_cast<char**>(const_kwlist);
double sampling_frequency = 0.;
double win_length_ms = 20.;
double win_shift_ms = 10.;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|dd", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms)) return -1;
PyObject* normalize_mean = Py_True;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|ddO", kwlist,
&sampling_frequency, &win_length_ms, &win_shift_ms, &normalize_mean)) return -1;
bool normalize_mean_ = PyObject_IsTrue(normalize_mean);
try {
self->cxx = new bob::ap::FrameExtractor(sampling_frequency, win_length_ms, win_shift_ms);
self->cxx = new bob::ap::FrameExtractor(sampling_frequency,
win_length_ms, win_shift_ms, normalize_mean_);
if (!self->cxx) {
PyErr_Format(PyExc_MemoryError, "cannot create new object of type `%s' - no more memory", Py_TYPE(self)->tp_name);
return -1;
......@@ -170,7 +180,7 @@ static int PyBobApFrameExtractor_Init(PyBobApFrameExtractorObject* self,
static PyObject* PyBobApFrameExtractor_Repr(PyBobApFrameExtractorObject* self) {
static const int MAXSIZE = 256;
char buffer[MAXSIZE];
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs());
auto count = std::snprintf(buffer, MAXSIZE, "%s(sampling_frequency=%f, win_length_ms=%f, win_shift_ms=%f, normalize_mean=%s)", Py_TYPE(self)->tp_name, self->cxx->getSamplingFrequency(), self->cxx->getWinLengthMs(), self->cxx->getWinShiftMs(), self->cxx->getNormalizeMean()?"True":"False");
return
# if PY_VERSION_HEX >= 0x03000000
PyUnicode_FromStringAndSize
......@@ -318,6 +328,38 @@ static int PyBobApFrameExtractor_SetWinShiftMs
}
PyDoc_STRVAR(s_normalize_mean_str, "normalize_mean");
PyDoc_STRVAR(s_normalize_mean_doc,
"Tells whether frame should be normalized by subtracting mean (True) or dividing by max_range (False)\n\
");
static PyObject* PyBobApFrameExtractor_GetNormalizeMean
(PyBobApFrameExtractorObject* self, void* /*closure*/) {
if (self->cxx->getNormalizeMean()) Py_RETURN_TRUE;
else Py_RETURN_FALSE;
}
static int PyBobApFrameExtractor_SetNormalizeMean
(PyBobApFrameExtractorObject* self, PyObject* o, void* /*closure*/) {
bool b = PyObject_IsTrue(o);
if (PyErr_Occurred()) return -1;
try {
self->cxx->setNormalizeMean(b);
}
catch (std::exception& ex) {
PyErr_SetString(PyExc_RuntimeError, ex.what());
return -1;
}
catch (...) {
PyErr_Format(PyExc_RuntimeError, "cannot reset `normalize_mean' of %s: unknown exception caught", Py_TYPE(self)->tp_name);
return -1;
}
return 0;
}
PyDoc_STRVAR(s_win_length_str, "win_length");
PyDoc_STRVAR(s_win_length_doc,
"The normalized window length w.r.t. the sample frequency"
......@@ -374,6 +416,13 @@ static PyGetSetDef PyBobApFrameExtractor_getseters[] = {
s_win_shift_doc,
0
},
{
s_normalize_mean_str,
(getter)PyBobApFrameExtractor_GetNormalizeMean,
(setter)PyBobApFrameExtractor_SetNormalizeMean,
s_normalize_mean_doc,
0
},
{0} /* Sentinel */
};
......
/**
* @author Elie Khoury <Elie.Khoury@idiap.ch>
* @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
* @author Pavel Korshunov <Pavel.Korshunov@idiap.ch>
* @date Wed Jan 11:10:20 2013 +0200
*
* @brief Implement Linear and Mel Frequency Cepstral Coefficients
......@@ -42,11 +43,13 @@ class Ceps: public Spectrogram
* @brief Constructor. Initializes working arrays
*/
Ceps(const double sampling_frequency, const double win_length_ms=20.,
const double win_shift_ms=10., const size_t n_filters=24,
const double win_shift_ms=10., const bool normalize_mean=true, const size_t n_filters=24,
const size_t n_ceps=19, const double f_min=0.,
const double f_max=4000., const size_t delta_win=2,
const double pre_emphasis_coef=0.95, const bool mel_scale=true,
const bool dct_norm=false);
const bool rect_filter=false, const bool inverse_filter=false,
const bool dct_norm=false, const bool ssfc_features=false,
const bool scfc_features=false, const bool scmc_features=false, bool normalize_spectrum=false);
/**
* @brief Copy constructor.
......
......@@ -31,7 +31,7 @@ class Energy: public FrameExtractor
* @brief Constructor. Initializes working arrays
*/
Energy(const double sampling_frequency, const double win_length_ms=20.,
const double win_shift_ms=10.);
const double win_shift_ms=10., const bool normalize_mean=true);
/**
* @brief Copy constructor
......
......@@ -32,7 +32,8 @@ class FrameExtractor
* @brief Constructor. Initializes working arrays
*/
FrameExtractor(const double sampling_frequency,
const double win_length_ms=20., const double win_shift_ms=10.);
const double win_length_ms=20., const double win_shift_ms=10.,
const bool normalize_mean=true);
/**
* @brief Copy Constructor
......@@ -90,6 +91,11 @@ class FrameExtractor
*/
size_t getWinShift() const
{ return m_win_shift; }
/**
* @brief Tells whether frame should be normalized by subtracting mean (True) or dividing by max_range (False)
*/
bool getNormalizeMean() const
{ return m_normalize_mean; }
/**
* @brief Sets the sampling frequency/frequency rate
......@@ -103,6 +109,11 @@ class FrameExtractor
* @brief Sets the window shift in miliseconds
*/
virtual void setWinShiftMs(const double win_shift_ms);
/**
* @brief Sets whether frame should be normalized by subtracting mean (True) or dividing by max_range (False)
*/
virtual void setNormalizeMean(const double normalize_mean)
{ m_normalize_mean = normalize_mean; }
protected:
/**
......@@ -114,6 +125,7 @@ class FrameExtractor
virtual void initWinSize();
virtual void initWinLength();
virtual void initWinShift();
virtual void initMaxRange();
double m_sampling_frequency; ///< The sampling frequency
double m_win_length_ms; ///< The window length in miliseconds
......@@ -121,6 +133,8 @@ class FrameExtractor
double m_win_shift_ms;
size_t m_win_shift;
size_t m_win_size;
double m_max_range; //half of the maximum possible dynamic range of the original signal (for 16 bits, it is 32768)
bool m_normalize_mean; //normalize the frame by subtracting its mean
mutable blitz::Array<double,1> m_cache_frame_d;
};
......
......@@ -2,6 +2,7 @@
* @date Wed Jan 11:10:20 2013 +0200
* @author Elie Khoury <Elie.Khoury@idiap.ch>
* @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
* @author Pavel Korshunov <Pavel.Korshunov@idiap.ch>
*
* @brief Implement spectrogram
*
......@@ -40,9 +41,12 @@ class Spectrogram: public Energy
*/
Spectrogram(const double sampling_frequency,
const double win_length_ms=20., const double win_shift_ms=10.,
const bool normalize_mean=true,
const size_t n_filters=24, const double f_min=0.,
const double f_max=4000., const double pre_emphasis_coeff=0.95,
bool mel_scale=true);
const double f_max=8000., const double pre_emphasis_coeff=0.95,
const bool mel_scale=true, const bool rect_filter=false, const bool inverse_filter=false,
const bool normalize_spectrum=false, const bool ssfc_features=false,
const bool scfc_features=false, const bool scmc_features=false);
/**
* @brief Copy Constructor
......@@ -103,6 +107,37 @@ class Spectrogram: public Energy
*/
bool getMelScale() const
{ return m_mel_scale; }
/**
* @brief Tells whether the frequencies of the filters in the filter bank
* are scaled using rectangular filter instead of Mel-scale or linear scale
*/
bool getRectangularFilter() const
{ return m_rect_filter; }
/**
* @brief Tells whether to apply the filter in the inversed order, i.e., from high frequencies to low.
*/
bool getInverseFilter() const
{return m_inverse_filter;}
/**
* @brief Tells whether to normalize power spectrum.
*/
bool getNormalizeSpectrum() const
{return m_normalize_spectrum;}
/**
* @brief Tells whether SSFC features are being computed
*/
bool getSSFCFeatures() const
{ return m_ssfc_features; }
/**
* @brief Tells whether SCFC features are being computed
*/
bool getSCFCFeatures() const
{ return m_scfc_features; }
/**
* @brief Tells whether SCMC features are being computed
*/
bool getSCMCFeatures() const
{ return m_scmc_features; }
/**
* @brief Returns the pre-emphasis coefficient.