Commit b54986f3 authored by Philip ABBET's avatar Philip ABBET

Add nist_sre12/3 (api change: beat.backend.python v1.4.2)

parent b59ab3fb
{
"description": "The NIST Speaker Recognition Evaluation 2012 (SRE'12)",
"root_folder": "/idiap/temp/ekhoury/NIST_DATA/DENOISED_FOR_BEAT",
"protocols": [
{
"name": "female",
"template": "advanced_speaker_recognition",
"sets": [
{
"name": "train",
"template": "train",
"view": "Train",
"parameters": {
"protocol": "core-c1",
"gender": "female"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"protocol": "core-c1",
"gender": "female",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"protocol": "core-c1",
"gender": "female",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"protocol": "core-c2",
"gender": "female",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"protocol": "core-c2",
"gender": "female",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
}
]
},
{
"name": "male",
"template": "advanced_speaker_recognition",
"sets": [
{
"name": "train",
"template": "train",
"view": "Train",
"parameters": {
"protocol": "core-c1",
"gender": "male"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"protocol": "core-c1",
"gender": "male",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"protocol": "core-c1",
"gender": "male",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"protocol": "core-c2",
"gender": "male",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"protocol": "core-c2",
"gender": "male",
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
}
]
}
]
}
###############################################################################
# #
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.examples module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
import numpy as np
import bob.db.nist_sre12
import bob.io.base
import bob.io.audio
#----------------------------------------------------------
def get_client_end_index(objs, client_id, client_start_index,
start_index, end_index, indice_in_tuple=1):
client_end_index = client_start_index
while client_end_index + 1 <= end_index:
obj = objs[client_end_index + 1 - start_index]
if isinstance(obj, tuple):
obj = obj[indice_in_tuple]
if obj.client_id != client_id:
return client_end_index
client_end_index += 1
return end_index
#----------------------------------------------------------
def get_value_end_index(objs, value, index_in_tuple, value_start_index,
start_index, end_index):
value_end_index = value_start_index
while value_end_index + 1 <= end_index:
id = objs[value_end_index + 1 - start_index][index_in_tuple]
if id != value:
return value_end_index
value_end_index += 1
return end_index
#----------------------------------------------------------
class Train:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- client_id: "{{ system_user.username }}/text/1"
One "file_id" is associated with a given "speech".
Several "speech" are associated with a given "client_id".
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
# Open the database and load the objects to provide via the outputs
self.db = bob.db.nist_sre12.Database()
self.objs = sorted(self.db.objects(protocol=parameters['protocol'],
gender=parameters['gender']),
key=lambda x: (x.client_id, x.id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
obj = self.objs[self.next_index - self.start_index]
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': str(obj.client_id)
},
client_end_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': str(obj.id)
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.sph')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
class Templates:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- template_id: "{{ system_user.username }}/text/1"
- client_id: "{{ system_user.username }}/text/1"
One "file_id" is associated with a given "speech".
Several "speech" are associated with a given "template_id".
Several "template_id" are associated with a given "client_id".
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| template_id | | template_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| client_id |
-----------------------------------------------------------------------------------------------
Note: for this particular database, there is only one "template_id"
per "client_id".
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.nist_sre12.Database()
template_ids = self.db.model_ids(protocol=parameters['protocol'],
groups=self.parameters['group'])
self.objs = []
for template_id in template_ids:
objs = self.db.objects(protocol=parameters['protocol'],
gender=parameters['gender'],
groups=self.parameters['group'],
purposes='enroll',
model_ids=[template_id])
self.objs.extend([ (template_id, obj) for obj in objs ])
self.objs = sorted(self.objs, key=lambda x: (x[1].client_id, x[0], x[1].id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_id, obj) = self.objs[self.next_index - self.start_index]
# Output: template_id (only provide data when the template_id change)
if self.outputs['template_id'].isConnected() and \
self.outputs['template_id'].last_written_data_index < self.next_index:
template_end_index = get_value_end_index(self.objs, template_id, 0,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_id'].write(
{
'text': template_id
},
template_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': obj.client_id
},
client_end_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.sph')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
class Probes:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- probe_id: "{{ system_user.username }}/text/1",
- client_id: "{{ system_user.username }}/text/1"
- template_ids: "{{ system_user.username }}/array_1d_text/1",
One "file_id" is associated with a given "speech".
One "probe_id" is associated with a given "speech".
Several "speech" are associated with a given "client_id".
Several "client_id" are associated with a given "template_ids".
Each probe must be matched against a number of templates defined by a list of
client identifiers.
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| template_ids |
-----------------------------------------------------------------------------------------------
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.nist_sre12.Database()
template_ids = self.db.model_ids(protocol=parameters['protocol'],
groups=self.parameters['group'])
template_probes = {}
for template_id in template_ids:
objs = sorted(self.db.objects(protocol=parameters['protocol'],
groups=self.parameters['group'],
gender=parameters['gender'],
purposes='probe',
model_ids=[template_id]),
key=lambda x: (x.client_id, x.id))
template_probes[template_id] = [ p.id for p in objs ]
objs = sorted(self.db.objects(protocol=parameters['protocol'],
gender=parameters['gender'],
groups=self.parameters['group'],
purposes='probe'),
key=lambda x: (x.client_id, x.id))
self.objs = []
for obj in objs:
templates = [ template_id for template_id in template_ids
if obj.id in template_probes[template_id] ]
self.objs.append( (templates, obj) )
self.objs = sorted(self.objs, key=lambda x: (len(x[0]), x[0], x[1].client_id, x[1].id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_ids, obj) = self.objs[self.next_index - self.start_index]
# Output: template_ids (only provide data when the template_ids change)
if self.outputs['template_ids'].isConnected() and \
self.outputs['template_ids'].last_written_data_index < self.next_index:
template_ids_end_index = get_value_end_index(self.objs, template_ids, 0,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_ids'].write(
{
'value': template_ids
},
template_ids_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': obj.client_id
},
client_end_index
)
# Output: probe_id (provide data at each iteration)
if self.outputs['probe_id'].isConnected():
self.outputs['probe_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.sph')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
def setup_tests():
# Install a mock load function for the images
def mock_load(root_folder):
return np.ndarray((3, 10, 20), dtype=np.uint8)
bob.io.base.load = mock_load
#----------------------------------------------------------
# Test the behavior of the views (on fake data)
if __name__ == '__main__':
setup_tests()
from beat.backend.python.database import DatabaseTester
DatabaseTester('Train', Train,
[
'client_id',
'file_id',
'speech',
],
parameters=dict(
protocol='core-c1',
gender='female',
),
irregular_outputs=[
'client_id',
]
)
DatabaseTester('Templates', Templates,
[
'client_id',
'template_id',
'file_id',
'speech',
],
parameters=dict(
protocol='core-c1',
gender='female',
group='eval',
),
irregular_outputs=[
'client_id',
'template_id',
]
)
DatabaseTester('Probes', Probes,