Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.bio.spear
Manage
Activity
Members
Labels
Plan
Issues
3
Issue boards
Milestones
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
bob
bob.bio.spear
Commits
b6761b1e
Commit
b6761b1e
authored
2 years ago
by
Yannick DAYER
Browse files
Options
Downloads
Patches
Plain Diff
Add utils to augment audio with noise and reverb.
parent
51e49e39
Branches
augmentation
No related tags found
No related merge requests found
Pipeline
#60956
failed with stage
in 3 minutes and 9 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
bob/bio/spear/transformer/augmentation.py
+220
-0
220 additions, 0 deletions
bob/bio/spear/transformer/augmentation.py
with
220 additions
and
0 deletions
bob/bio/spear/transformer/augmentation.py
0 → 100644
+
220
−
0
View file @
b6761b1e
#!/usr/bin/env python
# Yannick Dayer <yannick.dayer@idiap.ch>
# Thu 14 Apr 2022 11:31:46 UTC+02
"""
Transformer definition for audio data augmentation.
Notably for the NIST-SRE database.
"""
import
librosa
import
numpy
as
np
from
scipy.signal
import
convolve
from
sklearn.base
import
BaseEstimator
def
reverberate
(
audio
,
audio_rate
,
rir_file
):
"""
Reverberates the input audio signal according to a Room Impulse Response (rir).
Parameters
----------
audio : ndarray, shape (n_samples,)
The audio signal to reverberate.
rir_file : str
The file containing the impulse response.
audio_rate : int
The sampling rate of the input audio signal. Output will be resampled at this
rate.
Returns
-------
reverberated : ndarray, shape (n_samples,)
The reverberated signal.
"""
# Load the impulse response and resample it to the input audio rate
rir
,
_
=
librosa
.
load
(
rir_file
,
sr
=
audio_rate
)
# Compute the reverberation
reverberated
=
convolve
(
audio
,
rir
,
mode
=
"
full
"
)
# Copy Kaldi's behavior: output is shifted by the peak offset of the impulse
# response and truncated to the length of the input signal.
peak_offset
=
np
.
argmax
(
rir
)
reverberated
=
reverberated
[
peak_offset
:
peak_offset
+
len
(
audio
)]
# Scale the output to have the same energy as the input
power_before
=
np
.
sum
(
audio
**
2
)
/
len
(
audio
)
power_after
=
np
.
sum
(
reverberated
**
2
)
/
len
(
reverberated
)
reverberated
*=
np
.
sqrt
(
power_before
/
power_after
)
# Return the reverberated signal
return
reverberated
def
add_noise
(
audio
:
np
.
ndarray
,
audio_rate
:
int
,
noise_files
:
list
,
noise_offsets
:
list
,
noise_durations
:
list
,
noise_levels_db
:
list
,
normalize
:
bool
=
True
,
)
->
np
.
ndarray
:
"""
Adds noise to the input audio signal.
Parameters
----------
audio: shape (n_samples,)
The audio signal to reverberate.
noise_files
The list of noise files to use.
noise_offsets
The list of offsets (in seconds) to use for each noise file.
noise_durations
The list of durations (in seconds) to use for each noise file. If the file is
too short, the audio noise will be repeated.
noise_levels_db
The list of noise levels in dB to use for each noise file.
normalize
Whether to normalize the output signal to have the same energy as the input.
Returns
-------
noise_added: shape (n_samples,)
The noise added signal.
"""
power_before
=
np
.
sum
(
audio
**
2
)
/
len
(
audio
)
# Add noises to the signal
for
noise_file
,
noise_offset
,
noise_duration
,
noise_level_db
in
zip
(
noise_files
,
noise_offsets
,
noise_durations
,
noise_levels_db
):
# Convert times to sample counts
offset_samples
=
int
(
noise_offset
*
audio_rate
)
duration_samples
=
int
(
noise_duration
*
audio_rate
)
# Ignore this noise if offset is greater than the length of the input
if
offset_samples
>=
len
(
audio
):
continue
# Change the noise duration to fit in the input
if
offset_samples
+
duration_samples
>
len
(
audio
):
duration_samples
=
len
(
audio
)
-
offset_samples
# Load the noise file and resample to match the input audio rate
noise
,
_
=
librosa
.
load
(
noise_file
,
sr
=
audio_rate
,
mono
=
True
,
res_type
=
"
soxr_hq
"
)
# Repeat or crop the noise to match the duration of the signal
if
len
(
noise
)
<
duration_samples
:
noise
=
np
.
pad
(
noise
,
(
0
,
duration_samples
-
len
(
noise
)),
mode
=
"
wrap
"
)
else
:
noise
=
noise
[:
duration_samples
]
# Scale the noise using the provided SNR
audio_power
=
np
.
sum
(
audio
**
2
)
/
len
(
audio
)
noise_power
=
np
.
sum
(
noise
**
2
)
/
len
(
noise
)
scale
=
np
.
sqrt
(
10
**
(
-
noise_level_db
/
10
)
*
audio_power
/
noise_power
)
audio
[
offset_samples
:
offset_samples
+
duration_samples
]
+=
(
scale
*
noise
)
# Normalize the audio
if
normalize
:
power_after
=
np
.
sum
(
audio
**
2
)
/
len
(
audio
)
audio
*=
np
.
sqrt
(
power_before
/
power_after
)
# Original Kaldi commands saves with int16 format (effectively truncating).
audio
=
np
.
trunc
(
audio
*
32768
)
/
32768
# Return the signal with added noises
return
audio
class
Augmentation
(
BaseEstimator
):
"""
Transformer for audio data augmentation.
Requires wrapping with a :py:class:`bob.pipelines.SampleWrapper`.
Samples must contain an ``augmentation`` metadata.
"""
def
__init__
(
self
,
**
kwargs
):
"""
Initialize the transformer.
Parameters
----------
**kwargs
Keyword arguments.
"""
super
(
Augmentation
,
self
).
__init__
(
**
kwargs
)
def
fit
(
self
,
X
,
y
=
None
):
return
self
def
transform
(
self
,
X
,
sample_rate
:
"
list[int]
"
,
rir_file
:
"
list[str]
"
,
noise_files
:
"
list[list[str]]
"
,
noise_offsets
:
"
list[list[float]]
"
,
noise_durations
:
"
list[list[float]]
"
,
noise_levels_db
:
"
list[list[float]]
"
,
):
"""
Transform the data.
Parameters
----------
X : numpy.ndarray
The data to transform.
Returns
-------
numpy.ndarray
The transformed data.
"""
output
=
[]
for
x
,
sr
,
rir
,
n_files
,
n_offsets
,
n_durations
,
n_levels_db
in
zip
(
X
,
sample_rate
,
rir_file
,
noise_files
,
noise_offsets
,
noise_durations
,
noise_levels_db
,
):
# Samples can either have a reverberation or a noise added
if
rir
is
not
None
:
res
=
reverberate
(
x
,
sr
,
rir
)
elif
len
(
n_files
)
>
0
:
res
=
add_noise
(
x
,
sr
,
n_files
,
n_offsets
,
n_durations
,
n_levels_db
)
else
:
res
=
x
output
.
append
(
res
)
return
output
def
_more_tags
(
self
):
return
{
"
requires_fit
"
:
False
,
"
stateless
"
:
True
,
"
bob_transform_extra_input
"
:
[
(
"
sample_rate
"
,
"
rate
"
),
(
"
rir_file
"
,
"
rir_file
"
),
(
"
noise_files
"
,
"
noise_files
"
),
(
"
noise_offsets
"
,
"
noise_offsets
"
),
(
"
noise_durations
"
,
"
noise_durations
"
),
(
"
noise_levels_db
"
,
"
noise_levels_db
"
),
],
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment