Commit 9fc09006 by Hannah MUCKENHIRN

Initial commit

parents
This diff is collapsed. Click to expand it.
This repository is based on the software Torch and allows to reproduce the results of the following paper:
---
@INPROCEEDINGS{Muckenhirn_IJCB_2017,
author = {Muckenhirn, Hannah and Magimai.-Doss, Mathew and Marcel, S{\'{e}}bastien},
title = {End-to-End Convolutional Neural Network-based Voice Presentation Attack Detection},
booktitle = {International Joint Conference on Biometrics},
year = {2017},
}
---
If you use this code and/or its results, please cite the paper.
# Torch Installation
To install Torch, follow the instructions given in http://torch.ch/docs/getting-started.html
The experiments require two additional packages: `signal` and `sndfile`
# Running experiments
## Create data files
## Train the CNN
## Forward development and evaluation sets
## Evaluate score files
# Plotting filters responses
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
cmd=torch.CmdLine();
cmd:option('-model','','model to analyse');
cmd:option('-comp',"",'2nd model to compare');
cmd:option('-l',0,'Layer to use');
cmd:option('-nfft',512,'FFT points');
cmd:option('-cum',"true",'plot cumulative response, true by default');
cmd:option('-save',"",'save figure to this filename');
par=cmd:parse(arg);
require "nn"
signal = require "signal"
gnuplot = require "gnuplot"
-- load model
local modelfile=torch.load(par.model);
net=modelfile[1]
params=modelfile[2]
nlay=par.l
nfft=par.nfft
save=par.save
nfilt=net:get(nlay).weight:size(1);
kw=net:get(nlay).kW;
dim=net:get(nlay).weight:size(2)/kw;
local freq=torch.range(1,nfft/2+1)
freq:mul(8000)
freq:div(nfft/2+1);
function filterFFT(net,nlay,nfft)
local fftlen=nfft/2+1
if nlay==1 then
Fcomp=torch.Tensor(nfilt,nfft,2)
F=torch.Tensor(nfilt,fftlen)
for i=1,nfilt do
-- complex Fourier trans
wts= net:get(nlay).weight[i]
padding = torch.FloatTensor(nfft-wts:size(1)):fill(0)
sig = torch.cat(wts, padding, 1)
Fcomp[i]=signal.fft(sig);
local Ftemp=Fcomp[i]
Fabs = torch.cmul(Ftemp,Ftemp)
Fabs = torch.sum( Fabs,2)
Fabs = torch.squeeze( Fabs:sqrt() )
-- normalized magnitude spectrum
F[i]=torch.div(Fabs:narrow(1,1,fftlen),Fabs:narrow(1,1,fftlen):sum());
end
else
Fcomp=torch.Tensor(nfilt,dim,nfft,2);
F=torch.Tensor(nfilt,dim,fftlen);
for i=1,nfilt do
for j=1,dim do
local ff=torch.reshape(net:get(nlay).weight[i],dim,kw)[j]
padding = torch.FloatTensor(nfft-ff:size(1)):fill(0)
sig = torch.cat(ff, padding, 1)
Fcomp[i][j]=signal.fft(sig);
local Ftemp=Fcomp[i][j]
local Fabs = torch.cmul(Ftemp,Ftemp)
Fabs = torch.sum( Fabs,2)
Fabs = torch.squeeze( Fabs:sqrt() )
F[i][j]=torch.div(Fabs:narrow(1,1,fftlen),Fabs:narrow(1,1,fftlen):sum());
end
end
end
return F,Fcomp
end
F=filterFFT(net,nlay,nfft)
if par.cum=="true" then
Fcum=torch.Tensor(F[1]:size()):fill(0)
for i=1,nfilt do
Fcum=Fcum+F[i]
end
if save~="" then
gnuplot.pngfigure(save)
gnuplot.plot({freq,Fcum})
gnuplot.title("Cumulative response")
gnuplot.xlabel("Hz")
gnuplot.plotflush()
else
gnuplot.figure(1)
gnuplot.plot({freq,Fcum})
gnuplot.title("Cumulative response")
gnuplot.xlabel("Hz")
end
else
-- plotting a few random filters
local p=torch.randperm(nfilt)
if nlay==1 then
for i=1,20 do
gnuplot.figure(i)
gnuplot.plot({freq,F[p[i]]})
end
else
local p2=torch.randperm(dim)
for i=1,4 do
gnuplot.figure(i)
gnuplot.plot(F[p[i]][p2[i]])
end
end
end
require "paths"
require "nn"
require "math"
require "gnuplot"
torch.setdefaulttensortype('torch.FloatTensor');
-- functions
paths.dofile("speech_dataset.lua")
cmd=torch.CmdLine();
cmd:option('-modelID',"",'name of model to use');
cmd:option('-model',"",'model to use');
cmd:option('-type',"eval",'[dev,eval]');
cmd:option('-data',"",'[dev,eval]');
cmd:option('-label',"",'[dev,eval]');
cmd:option('-path',"",'[dev,eval]');
cmd:option('-VAD',"",'');
params2=cmd:parse(arg);
model=torch.load(params2.model);
modelID=params2.modelID;
net=model[1]
params=model[2]
dirname="forward_" .. params.arch .. "_"
for k,v in pairs(params) do
if v~=0 and v~="" and k~="save" and k~="trainData" and k~="trainLabel" and k~="devData" and k~="devLabel" and k~="trainVAD" and k~="devVAD" then
dirname=dirname .. k .. "=" .. v .. "_"
end
end
dirname=dirname .. modelID .. "_"
os.execute("mkdir -v " .. params.save .. "/" .. dirname)
--params.nvalid=193404 --AVspoofPA eval:48896 -- AVspoofLA:25636 -- ASVspoof 193404
-- DATASET
local configValid={}
configValid.datafile=params2.data
configValid.labelfile=params2.label
configValid.pathfile=params2.path
configValid.vadfile= params2.VAD
--configValid.nData=params.nvalid
configValid.feat="wav"
configValid.nSamplePerFrame=160 -- 10ms @ 16kHz
configValid.norm=params.norm
configValid.contextframe=params.context
validSet=SpeechDataset(configValid)
if params.norm=="dset" then
validSet.normMean=params.normMean
validSet.normStd=params.normStd
end
validSet:normalize()
print( validSet.nData .. " validation sequences")
print("Creating network ...");
nInput=validSet.nInput
nOutput=2;
print(nInput .. " samples for each example");
-- TEST
--os.execute("ls " .. params.save .. "/" .. dirname)
--lfs.mkdir(params.save .. "/" .. dirname)
seq=torch.Tensor(nInput,1):fill(0);
local savepath=params.save .. "/" .. dirname
file_real=io.open(savepath .. "/scores-"..params2.type.."-real","w")
file_attack=io.open(savepath .. "/scores-"..params2.type.."-attack","w")
file_scores=io.open(savepath .. "/scores-"..params2.type,"w")
local errNumV=0
c=1
print("number of frames: " .. validSet.nExample)
print("number of utterances: " .. validSet.nData)
for i = 1,validSet.nData do
local nbVoicedFrames=0
local score=0
target=validSet:get_label(c)
path = validSet:get_path(c)
if path == nil then
path = 'label'
end
for j=1,validSet.nFrame[i] do
vad = validSet:get_vad(c)
if (vad == 1) then
validSet:get_data(c,seq)
net:forward(seq)
score = score + math.exp(net.output[1])
nbVoicedFrames = nbVoicedFrames + 1
end
c=c+1
end
if (nbVoicedFrames~=0) then
score = score/nbVoicedFrames
else
print("The utterance " .. i .. " does not contain speech")
end
if target==1 then
file_real:write(target .. ' ' .. target .. ' ' .. path .. ' ' .. score .. '\n')
file_scores:write(target .. ' ' .. target .. ' ' .. path .. ' ' .. score .. '\n')
else
file_attack:write(target .. " attack " .. path .. ' ' .. score .. '\n')
file_scores:write(target .. " attack " .. path .. ' ' .. score .. '\n')
end
end
file_real:close()
file_attack:close()
-- use: torch saveLabel.lua listfile_real listfile_attack outputfile
label={}
for l in io.lines(arg[1]) do
table.insert(label,1)
end
for l in io.lines(arg[2]) do
table.insert(label,2)
end
torch.save(arg[3],label)
-- use: torch savePaths.lua listfile_real listfile_attack outputfile
path_table={}
for l in io.lines(arg[1]) do
table.insert(path_table, l)
end
for l in io.lines(arg[2]) do
table.insert(path_table, l)
end
torch.save(arg[3],path_table)
-- use: savePaths listfile_real listfile_attack outputfile
vad_table={}
for l in io.lines(arg[1]) do
local t={} ; i=1
for str in string.gmatch(l, "([^ ]+)") do
t[i] = tonumber(str)
i = i + 1
end
data=torch.Tensor(t)
table.insert(vad_table, data:squeeze())
end
for l in io.lines(arg[2]) do
local t={} ; i=1
for str in string.gmatch(l, "([^ ]+)") do
t[i] = tonumber(str)
i = i + 1
end
data=torch.Tensor(t)
table.insert(vad_table, data:squeeze())
end
torch.save(arg[3],vad_table)
-- use: torch saveWav.lua listfile_real listfile_attack outputfile
-- read all wavefile in listfile, load them, store them in a table and save the table in binary in outputfile
require "sndfile"
function wav_load(filename_real, filename_attack)
-- open txt file with filenames
data_filenames={}
i=0
for line in io.lines(filename_real) do
table.insert(data_filenames,line);
i=i+1
end
for line in io.lines(filename_attack) do
table.insert(data_filenames,line);
i=i+1
end
local nSeq=#data_filenames;
local data={}
-- load audio files
for j=1,nSeq do
f=sndfile.SndFile(data_filenames[j]);
local nSample=f:info().frames;
local sRate=f:info().samplerate;
local seq=torch.ShortTensor(nSample,1);
f:readShort(seq);
f:close();
table.insert(data,seq:squeeze());
end
return data
end
data=wav_load(arg[1], arg[2])
torch.save(arg[3],data)
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
local SpeechDataset = torch.class('SpeechDataset')
function SpeechDataset:__init(config)
print("loading " .. config.datafile)
collectgarbage()
self.data=torch.load(config.datafile);
print(type(self.data));
self.countGarbageCollector=0;
self.label=torch.load(config.labelfile);
print("data loaded")
self.path = nil
if config.pathfile ~= nil then
self.path = torch.load(config.pathfile)
end
self.vad = nil
if config.vadfile ~= "" then
self.vad = torch.load(config.vadfile)
end
self.nData = #self.data
self.samp = config.nSamplePerFrame -- number of raw samples per considered labeled frame (e.g. 10ms -> 160 samples @ 16kHz)
self.feat=config.feat;
if self.feat:match("MFCC") then
self.isMFCC=true
else
self.isMFCC=false;
end
self.norm=config.norm
self.cont=config.contextframe
self.nInput=self.samp*(2*self.cont+1)
self.countTooShort=0
self.isTooShort = torch.Tensor(self.nData):zero()
for i=1,self.nData do
if (self.data[i]:size(1) < self.nInput) then
self.isTooShort[i]=1
if (self.data[i]:size(1) > self.nInput/2+1) then
local origSize=self.data[i]:size(1)
self.data[i]:resize(self.nInput)
for j=1,self.nInput-origSize do
self.data[i][j+origSize]=self.data[i][origSize-j]
end
else
self.data[i]:resize(self.nInput)
self.countTooShort=self.countTooShort+1
end
end
end
self.nFrame=torch.Tensor(self.nData);
for i=1,self.nData do
self.nFrame[i]=math.floor(self.data[i]:size(1)/self.samp);
end
self.nExample=self.nFrame:sum()
print("There are "..self.countTooShort.." utterances too short over "..self.nData)
self.map=torch.Tensor(self.nExample,2);
local k=1;
for i=1,self.nData do
for j=1,self.nFrame[i] do
self.map[k][1]=i;
self.map[k][2]=j;
k=k+1;
end
end
if config.normMean~=nil then
self.normMean=config.normMean
self.normStd=config.normStd
end
end
function SpeechDataset:normalize()
if self.isMFCC then
if self.norm=="seq" then
for i=1,self.nData do
local nframe=self.data[i]:size(1)/self.samp
self.data[i]:resize(nframe,self.samp)
for j=1,self.samp do
self.data[i]:select(2,j):add(-self.data[i]:select(2,j):mean())
self.data[i]:select(2,j):div(self.data[i]:select(2,j):std())
end
self.data[i]:resize(nframe*self.samp)
end
elseif self.norm=="dset" then
for i=1,self.nData do
local nframe=self.data[i]:size(1)/self.samp
self.data[i]:resize(nframe,self.samp)
end
if self.normMean==nil then
self.normMean=torch.Tensor(self.samp)
self.normStd=torch.Tensor(self.samp)
for j=1,self.samp do
local mean=0
local norm=0
local var=0
for i=1,#self.data do
local nframe=self.data[i]:size(1)
local s=self.data[i]:select(2,j):sum()
local s2=torch.pow(self.data[i]:select(2,j),2):sum()
mean=mean+s
var=var+s2
norm=norm+nframe
end
self.normStd[j]=math.sqrt((var-(mean*mean)/norm)/(norm-1))
self.normMean[j]=mean/norm
end
end
for i=1,#self.data do
local nframe=self.data[i]:size(1)
for j=1,self.samp do
self.data[i]:select(2,j):add(-self.normMean[j])
self.data[i]:select(2,j):div(self.normStd[j])
end
self.data[i]:resize(nframe*self.samp)
end
end
else
if self.norm=="dset" then
if self.normMean==nil then
local mean=0
local norm=0
local var=0
for i=1,#self.data do
if i % 2000 == 0 then
print (i .. " 1st loop, memory: " .. collectgarbage("count"))
collectgarbage()
end
local normMean=self.data[i]:float():mean()
local seq=self.data[i]:float():add(-normMean)
local s=seq:sum()
local s2=torch.pow(seq,2):sum()
mean=mean+s
var=var+s2
norm=norm+seq:size(1)
end
self.normStd=math.sqrt((var-(mean*mean)/norm)/(norm-1))
self.normMean=mean/norm
end
end
end
end
function SpeechDataset:get_data(k,temp)
local i=self.map[k][1]
local j=self.map[k][2]
local nsam_adj=self.cont*self.samp
if j-1-self.cont<=0 then
local supfr=self.cont-j+1
temp:narrow(1,supfr*self.samp+1,self.nInput-supfr*self.samp):copy(self.data[i]:narrow(1,1,self.nInput-supfr*self.samp));
for k=1,supfr do
temp:narrow(1,(k-1)*self.samp+1,self.samp):copy(self.data[i]:narrow(1,1,self.samp));
end
elseif (j+self.cont)> self.nFrame[i] then
local supsam= j+self.cont - self.nFrame[i]
local Lunp=(2*self.cont+1-supsam)*self.samp --number of sample no out of bound
temp:narrow(1,1,Lunp):copy(self.data[i]:narrow(1,self.data[i]:size(1)-Lunp,Lunp));
for k=1,supsam do
temp:narrow(1,(k-1)*self.samp+1+Lunp,self.samp):copy(self.data[i]:narrow(1,self.data[i]:size(1)-self.samp+1,self.samp));
end
else
temp:copy(self.data[i]:narrow(1,(j-1-self.cont)*self.samp+1,self.nInput))
end
if self.countGarbageCollector%1000==0 then
collectgarbage()
end
self.countGarbageCollector=self.countGarbageCollector+1
local normMean=self.data[i]:float():mean()
temp=temp:float():add(-normMean);
if self.norm=="win" then
temp=temp:float():add(-temp:float():mean());
temp=temp:float():div(temp:float():std());
elseif self.isMFCC==false and self.norm=="seq" then
normStd=self.data[i]:float():std()
temp=temp:float():div(normStd);
elseif self.isMFCC==false and self.norm=="dset" then
temp=temp:float():add(-self.normMean);
temp=temp:float():div(self.normStd);
end
end
function SpeechDataset:get_label(k)
return self.label[self.map[k][1]]
end
function SpeechDataset:get_path(k)
if self.path == nil then
return nil
end
return self.path[self.map[k][1]]
end
function SpeechDataset:get_vad(k)
if self.vad == nil then
return 1
end
local vad=0
i=self.map[k][1]
j=self.map[k][2]
if((self.vad[i]:size(1))+1==j) then
vad=self.vad[i][j-1]
elseif((self.vad[i]:size(1))+2==j) then
vad=self.vad[i][j-2]
else
vad=self.vad[i][j]
end
return vad
end