Commit e53d809e authored by André Anjos's avatar André Anjos 💬
Browse files

Merge branch 'update_avspoof' into 'master'

AVspoof: added spoof attacks to ASV protocols

Updated protocols of AVspoof database

See merge request !7
parents 2d399621 d04703c3
Pipeline #1159 skipped
This diff is collapsed.
......@@ -40,11 +40,19 @@ class RecognitionTraining:
self.parameters = parameters
# verification.Database implements bob.db.verification.utils interface
self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
# self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
self.db = bob.db.avspoof.Database()
# retrieve all file paths for the given parameters
self.objs = sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group']), key=lambda x: x.id)
self.objs = sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group'], cls=('enroll', 'probe')), key=lambda x: x.id)
# print("protocol: %s, group: %s, objects: %d" % (parameters['protocol'], parameters['group'], len(self.objs)))
if parameters['protocol'] == "smalltest":
assert (len(self.objs) == 741)
else:
assert (len(self.objs) == 4973)
self.next_index = 0
self.force_start_index = force_start_index
......@@ -77,7 +85,7 @@ class RecognitionTraining:
# read audio data
if self.outputs['speech'].isConnected():
rate, audio = scipy.io.wavfile.read(obj.make_path(self.root_folder, self.db.original_extension))
rate, audio = scipy.io.wavfile.read(obj.make_path(directory=self.root_folder, extension='.wav'))
data = {
'value': numpy.cast['float'](audio)
}
......@@ -100,15 +108,35 @@ class RecognitionTemplates:
self.parameters = parameters
# verification.Database implements bob.db.verification.utils interface
self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
# self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
self.db = bob.db.avspoof.Database()
# template Ids are model Ids (which again are just Ids of clients) in the terminoloy of the bob.db interfaces
self.template_ids = sorted(self.db.model_ids(groups=parameters['group']))
if parameters['protocol'] == "smalltest":
if parameters['group'] == "train":
self.template_ids = sorted([1, 3])
if parameters['group'] == "devel":
self.template_ids = sorted([15, 20])
if parameters['group'] == "test":
self.template_ids = sorted([18, 33])
else:
self.template_ids = sorted([client.id for client in self.db.clients(groups=parameters['group'])])
# print("protocol: %s, group: %s, purpose: %s, template_ids: %d" % (parameters['protocol'], parameters['group'], parameters['purpose'], len(self.template_ids)))
if parameters['protocol'] == "smalltest":
assert (len(self.template_ids) == 2)
else:
if parameters['group'] == "test":
assert (len(self.template_ids) == 16)
else:
assert (len(self.template_ids) == 14)
self.objs = []
# store the File objects for each template
for template_id in self.template_ids:
self.objs.append(sorted(self.db.objects(protocol=parameters['protocol'], purposes=parameters['purpose'], groups=parameters['group'], model_ids = (template_id,)), key=lambda x: x.id))
self.objs.append(sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group'], cls=parameters['purpose'], clients = (template_id,)), key=lambda x: x.id))
self.current_template_index = 0
self.current_obj_index = 0
self.next_index = 0
......@@ -139,6 +167,7 @@ class RecognitionTemplates:
# current file object that we want to read and output
obj = self.objs[self.current_template_index][self.current_obj_index]
if self.current_obj_index == 0:
# we update the 'template_id' and 'client_id' only when the current template/model changes
if self.force_start_index <= self.next_index and (self.force_end_index is None or self.force_end_index >= self.next_index):
......@@ -155,7 +184,7 @@ class RecognitionTemplates:
self.outputs['file_id'].write({'text': obj.id}, self.next_index)
if self.outputs['speech'].isConnected():
rate, audio = scipy.io.wavfile.read(obj.make_path(self.root_folder, self.db.original_extension))
rate, audio = scipy.io.wavfile.read(obj.make_path(directory=self.root_folder, extension='.wav'))
data = {
'value': numpy.cast['float'](audio)
}
......@@ -174,7 +203,7 @@ class RecognitionTemplates:
return True
class ProbesReal:
class Probes:
# Reasoning: Each client may have a number of probes. Each probe may be
# composed of any number of sample. So, each probe receives an unique
# identifier. Those identifiers are linked to the client identifier and
......@@ -188,13 +217,75 @@ class ProbesReal:
self.parameters = parameters
# verification.Database implements bob.db.verification.utils interface
self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
# self.db = bob.db.avspoof.verification.Database(original_directory=self.root_folder, original_extension='.wav')
self.db = bob.db.avspoof.Database()
# template Ids are model Ids (which again are just Ids of clients) in the terminoloy of the bob.db interfaces
self.template_ids = sorted(self.db.model_ids(groups=parameters['group']))
self.objs = sorted(self.db.objects(protocol=parameters['protocol'],
purposes=parameters['purpose'], groups=parameters['group']), key=lambda x: x.id)
if parameters['purpose'] == "attack":
self.template_ids = None
else:
if parameters['protocol'] == "smalltest":
if parameters['group'] == "train":
self.template_ids = sorted([1, 3])
if parameters['group'] == "devel":
self.template_ids = sorted([15, 20])
if parameters['group'] == "test":
self.template_ids = sorted([18, 33])
else:
self.template_ids = sorted([client.id for client in self.db.clients(groups=parameters['group'])])
self.objs = sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group'], cls=parameters['purpose']), key=lambda x: x.id)
# print("protocol: %s, group: %s, purpose: %s, objects: %d" % (parameters['protocol'], parameters['group'], parameters['purpose'], len(self.objs)))
if parameters['purpose'] == "attack":
if parameters['protocol'] == "smalltest":
assert (len(self.objs) == 6910)
if parameters['protocol'] == "grandtest":
if parameters['group'] == "train":
assert (len(self.objs) == 56470)
if parameters['group'] == "devel":
assert (len(self.objs) == 56470)
if parameters['group'] == "test":
assert (len(self.objs) == 63380)
if parameters['protocol'] == "physical_access":
if parameters['group'] == "train":
assert (len(self.objs) == 38580)
if parameters['group'] == "devel":
assert (len(self.objs) == 38580)
if parameters['group'] == "test":
assert (len(self.objs) == 43320)
if parameters['protocol'] == "logical_access":
if parameters['group'] == "train":
assert (len(self.objs) == 17890)
if parameters['group'] == "devel":
assert (len(self.objs) == 17890)
if parameters['group'] == "test":
assert (len(self.objs) == 20060)
else:
if parameters['protocol'] == "smalltest":
assert (len(self.template_ids) == 2)
else:
if parameters['group'] == "test":
assert (len(self.template_ids) == 16)
else:
assert (len(self.template_ids) == 14)
if parameters['protocol'] == "smalltest":
if parameters['group'] == "train":
assert (len(self.objs) == 623)
if parameters['group'] == "devel":
assert (len(self.objs) == 586)
if parameters['group'] == "test":
assert (len(self.objs) == 573)
else:
if parameters['group'] == "train":
assert (len(self.objs) == 4193)
if parameters['group'] == "devel":
assert (len(self.objs) == 4225)
if parameters['group'] == "test":
assert (len(self.objs) == 4708)
self.next_index = 0
self.force_start_index = force_start_index
......@@ -223,21 +314,30 @@ class ProbesReal:
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write({'text': obj.id}, self.next_index)
if self.outputs['probe_id'].isConnected():
self.outputs['probe_id'].write({'text': obj.id}, self.next_index)
if self.template_ids is None: # we are processing attacks
if self.outputs['attack_id'].isConnected():
self.outputs['attack_id'].write({'text': obj.id}, self.next_index)
else: # we are porcessing real data
if self.outputs['probe_id'].isConnected():
self.outputs['probe_id'].write({'text': obj.id}, self.next_index)
if self.outputs['client_id'].isConnected():
self.outputs['client_id'].write({'text': obj.client_id}, self.next_index)
if self.outputs['template_ids'].isConnected():
data = {
if self.template_ids is None: # it means we are processing attacks, so template_ids is just one client_id
data = {
'text': numpy.array([obj.client_id], dtype=numpy.string_)
}
else:
data = {
'text': numpy.array(self.template_ids, dtype=numpy.string_)
}
self.outputs['template_ids'].write(data, self.next_index)
}
self.outputs['template_ids'].write(data, self.next_index)
# read audio data
if self.outputs['speech'].isConnected():
rate, audio = scipy.io.wavfile.read(obj.make_path(self.root_folder, self.db.original_extension))
rate, audio = scipy.io.wavfile.read(obj.make_path(directory=self.root_folder, extension='.wav'))
data = {
'value': numpy.cast['float'](audio)
}
......@@ -261,7 +361,46 @@ class SimpleAntispoofing:
# retrieve all file paths for the given parameters
self.objs_real = sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group'], cls='real'), key=lambda x: x.id)
# print("protocol: %s, group: %s, real objects: %d" % (parameters['protocol'], parameters['group'], len(self.objs_real)))
self.objs_attack = sorted(self.db.objects(protocol=parameters['protocol'], groups=parameters['group'], cls='attack'), key=lambda x: x.id)
# print("protocol: %s, group: %s, attack objects: %d" % (parameters['protocol'], parameters['group'], len(self.objs_attack)))
if parameters['protocol'] == "smalltest":
assert (len(self.objs_attack) == 6910)
if parameters['group'] == "train":
assert (len(self.objs_real) == 741)
if parameters['group'] == "devel":
assert (len(self.objs_real) == 695)
if parameters['group'] == "test":
assert (len(self.objs_real) == 680)
else:
if parameters['group'] == "train":
assert (len(self.objs_real) == 4973)
if parameters['group'] == "devel":
assert (len(self.objs_real) == 4995)
if parameters['group'] == "test":
assert (len(self.objs_real) == 5576)
if parameters['protocol'] == "grandtest":
if parameters['group'] == "train":
assert (len(self.objs_attack) == 56470)
if parameters['group'] == "devel":
assert (len(self.objs_attack) == 56470)
if parameters['group'] == "test":
assert (len(self.objs_attack) == 63380)
if parameters['protocol'] == "physical_access":
if parameters['group'] == "train":
assert (len(self.objs_attack) == 38580)
if parameters['group'] == "devel":
assert (len(self.objs_attack) == 38580)
if parameters['group'] == "test":
assert (len(self.objs_attack) == 43320)
if parameters['protocol'] == "logical_access":
if parameters['group'] == "train":
assert (len(self.objs_attack) == 17890)
if parameters['group'] == "devel":
assert (len(self.objs_attack) == 17890)
if parameters['group'] == "test":
assert (len(self.objs_attack) == 20060)
# we have two sets of data, so to serve them sequentially, we need to know their sizes and
# when the second set is following after the first set, hence, this approach with offsets
......@@ -330,5 +469,4 @@ class SimpleAntispoofing:
self.next_index += 1
return True
return True
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment