Commit 27fad473 authored by Sushil Bhattacharjee's avatar Sushil Bhattacharjee

code to add babble noise to voxforge data

parent cb54ae59
Pipeline #24356 failed with stage
in 3 seconds
# bit of a hack right now. should be cleaned up for proper use. Stuff hardcoded for voxforge for now.
import os
import sys
import wave
import audioop
import numpy as np
import logging
import math
from scipy.io import wavfile
#from scipy import interpolate
#ALL_NOISE_TYPES = ['white','pink','hfchannel','babble','factory1','factory2','buccaneer1','buccaneer2','destroyerengine','destroyerops','f16','leopard','m109','machinegun','volvo']
#nTypes = {'white','babble'};
#nTypes = {'pink','factory1'};
#total_num_noise_types = len(ALL_NOISE_TYPES)
def load_resample_noise(srcfile, outrate=16000):
if not os.path.exists(srcfile):
logging.warning('Source file not found: {}'.format(srcfile))
return None
try:
s_read = wave.open(srcfile, 'r')
except:
logging.warning('Failed to open file: {}'.format(srcfile))
return None
insamprate = s_read.getframerate()
sampwidth = s_read.getsampwidth()
inchannels = s_read.getnchannels()
nsamples = s_read.getnframes()
indata = s_read.readframes(nsamples)
s_read.close()
converted=None
if insamprate != outrate:
converted, newstate = audioop.ratecv(indata, sampwidth, inchannels, insamprate, outrate, None)
else:
converted = indata
newstate = None
out_data = np.fromstring(converted, dtype=np.int16)
#print('converted:', converted.shape)
print(out_data.shape)
return out_data
def add_babble_noise(audio_sample_file, noise_data, noise_level=10):
speech_fs, speech_data = wavfile.read(audio_sample_file)
len_speech = len(speech_data)
len_noise = len(noise_data)
assert len_noise >= len_speech, 'Noise sample is shorter than speech sample. Quitting'
nseg = noise_data[0:len_speech]
#nseg = nseg - np.mean(nseg) # It appears that the mean is around 2 % of the max.
Pnseg = float(sum(nseg*nseg))/float(len(nseg)) # noise-power
my_eps = np.finfo(float).eps
# Compute the power in dB.
Px = sum(speech_data.astype(float)*speech_data.astype(float))/float(len(speech_data))
if(Px < 0):
print(Pnseg, Px)
print(sum(speech_data*speech_data))
print(len(speech_data))
factor = math.sqrt(10**(-noise_level/10)*Pnseg/Px)
#print('Factor:',factor)
noisy_speech = speech_data+(factor*nseg).astype(noise_data.dtype)
# Fit it between -1:1. ## don't do it in python. this was good for matlab.
#xn = np.round(0.999*xn/(max(abs(xn)) + my_eps)).astype(speech_data.dtype)
return speech_fs, noisy_speech
def load_speech_file_list():
file_list = 'voxforge_denoised_filelist.txt'
with open(file_list) as f:
content = f.readlines()
f.close()
list_of_files = [x.strip() for x in content]
print('no. of speech files:', len(list_of_files))
return list_of_files
def main():
#data_root = './data'
#noise_file = 'conversationNoise_SIGNAL019-20kHz.wav'
#speech_file = 'voxforge_samples/enroll/JayCutlersBrother-20080919-ick-a0040-16k.wav'
voxforge_root = '/idiap/resource/database/VoxForge/audio/denoised/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit'
out_root = '/idiap/temp/sbhatta/work/voxforge_babble'
list_of_files = load_speech_file_list()
# for i in range(5):
# print(list_of_files[i])
# my_file = list_of_files[i]
# parts = my_file.split(os.sep)
# print(parts[1:])
# #['.', '1028-20100710-hne', 'wav', 'ar-01.wav']
noise_filename = 'babble_SIGNAL019-20kHz.wav' #os.path.join(data_root, noise_file)
noise_data = load_resample_noise(noise_filename, outrate=16000)
print('noise loaded')
#wavfile.write("./data/test_resampled_noise.wav", speech_fs, noise_data)
noise_db = 10
short_list = list_of_files[0:5]
#for i, fv in enumerate(short_list):
for i, fv in enumerate(list_of_files):
f = os.path.join(voxforge_root, fv)
parts = fv.split(os.sep)
#print(parts[1:])
first_dir = os.path.join(out_root, parts[1])
if not os.path.exists(first_dir):
os.makedirs(first_dir)
print(first_dir)
second_dir = os.path.join(first_dir, parts[2])
if not os.path.exists(second_dir):
os.makedirs(second_dir)
out_f = os.path.join(out_root, fv)
if not os.path.isfile(out_f):
speech_fs, noisy_speech = add_babble_noise(f, noise_data, noise_level=noise_db)
wavfile.write(out_f, speech_fs, noisy_speech)
#speech_filename = os.path.join(data_root, speech_file)
#speech_fs, noisy_speech = add_babble_noise(speech_filename, noise_data, noise_level=10)
#wavfile.write("./data/test_resampled_noise.wav", 16000, noisy_speech)
if __name__ == "__main__":
main()
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment