Commit eb16d51a authored by Philip ABBET's avatar Philip ABBET

Add voxforge/3 (api change: beat.backend.python v1.4.2)

parent edd6c41c
{
"description": "The VoxForge Database",
"root_folder": "/idiap/resource/database/VoxForge/dbase/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit",
"protocols": [
{
"name": "default",
"template": "advanced_speaker_recognition",
"sets": [
{
"name": "train",
"template": "train",
"view": "Train",
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"group": "dev"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "dev_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"group": "dev"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_templates",
"template": "templates",
"view": "Templates",
"parameters": {
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
},
{
"name": "test_probes",
"template": "probes",
"view": "Probes",
"parameters": {
"group": "eval"
},
"outputs": {
"file_id": "{{ system_user.username }}/text/1",
"probe_id": "{{ system_user.username }}/text/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"speech": "{{ system_user.username }}/array_1d_floats/1"
}
}
]
}
]
}
###############################################################################
# #
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.examples module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
import numpy as np
import bob.db.voxforge
import bob.io.base
import bob.io.audio
#----------------------------------------------------------
def get_client_end_index(objs, client_id, client_start_index,
start_index, end_index, indice_in_tuple=1):
client_end_index = client_start_index
while client_end_index + 1 <= end_index:
obj = objs[client_end_index + 1 - start_index]
if isinstance(obj, tuple):
obj = obj[indice_in_tuple]
if obj.client_id != client_id:
return client_end_index
client_end_index += 1
return end_index
#----------------------------------------------------------
def get_value_end_index(objs, value, index_in_tuple, value_start_index,
start_index, end_index):
value_end_index = value_start_index
while value_end_index + 1 <= end_index:
id = objs[value_end_index + 1 - start_index][index_in_tuple]
if id != value:
return value_end_index
value_end_index += 1
return end_index
#----------------------------------------------------------
class Train:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- client_id: "{{ system_user.username }}/text/1"
One "file_id" is associated with a given "speech".
Several "speech" are associated with a given "client_id".
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
# Open the database and load the objects to provide via the outputs
self.db = bob.db.voxforge.Database()
self.objs = sorted(self.db.objects(groups='world'),
key=lambda x: (x.client_id, x.id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
obj = self.objs[self.next_index - self.start_index]
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': str(obj.client_id)
},
client_end_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': str(obj.id)
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.wav')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
class Templates:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- template_id: "{{ system_user.username }}/text/1"
- client_id: "{{ system_user.username }}/text/1"
One "file_id" is associated with a given "speech".
Several "speech" are associated with a given "template_id".
Several "template_id" are associated with a given "client_id".
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| template_id | | template_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| client_id |
-----------------------------------------------------------------------------------------------
Note: for this particular database, there is only one "template_id"
per "client_id".
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.voxforge.Database()
template_ids = self.db.model_ids(groups=self.parameters['group'])
self.objs = []
for template_id in template_ids:
objs = self.db.objects(groups=self.parameters['group'],
purposes='enroll',
model_ids=[template_id])
self.objs.extend([ (template_id, obj) for obj in objs ])
self.objs = sorted(self.objs, key=lambda x: (x[1].client_id, x[0], x[1].id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_id, obj) = self.objs[self.next_index - self.start_index]
# Output: template_id (only provide data when the template_id change)
if self.outputs['template_id'].isConnected() and \
self.outputs['template_id'].last_written_data_index < self.next_index:
template_end_index = get_value_end_index(self.objs, template_id, 0,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_id'].write(
{
'text': template_id
},
template_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': obj.client_id
},
client_end_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.wav')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
class Probes:
"""Outputs:
- speech: "{{ system_user.username }}/array_1d_floats/1"
- file_id: "{{ system_user.username }}/text/1"
- probe_id: "{{ system_user.username }}/text/1",
- client_id: "{{ system_user.username }}/text/1"
- template_ids: "{{ system_user.username }}/array_1d_text/1",
One "file_id" is associated with a given "speech".
One "probe_id" is associated with a given "speech".
Several "speech" are associated with a given "client_id".
Several "client_id" are associated with a given "template_ids".
Each probe must be matched against a number of templates defined by a list of
client identifiers.
--------------- --------------- --------------- --------------- --------------- ---------------
| speech | | speech | | speech | | speech | | speech | | speech |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| template_ids |
-----------------------------------------------------------------------------------------------
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.voxforge.Database()
template_ids = self.db.model_ids(groups=self.parameters['group'])
template_probes = {}
for template_id in template_ids:
objs = sorted(self.db.objects(groups=self.parameters['group'],
purposes='probe',
model_ids=[template_id]),
key=lambda x: (x.client_id, x.id))
template_probes[template_id] = [ p.id for p in objs ]
objs = sorted(self.db.objects(groups=self.parameters['group'],
purposes='probe'),
key=lambda x: (x.client_id, x.id))
self.objs = []
for obj in objs:
templates = [ template_id for template_id in template_ids
if obj.id in template_probes[template_id] ]
self.objs.append( (templates, obj) )
self.objs = sorted(self.objs, key=lambda x: (len(x[0]), x[0], x[1].client_id, x[1].id))
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_ids, obj) = self.objs[self.next_index - self.start_index]
# Output: template_ids (only provide data when the template_ids change)
if self.outputs['template_ids'].isConnected() and \
self.outputs['template_ids'].last_written_data_index < self.next_index:
template_ids_end_index = get_value_end_index(self.objs, template_ids, 0,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_ids'].write(
{
'value': template_ids
},
template_ids_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': obj.client_id
},
client_end_index
)
# Output: probe_id (provide data at each iteration)
if self.outputs['probe_id'].isConnected():
self.outputs['probe_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'text': obj.id
},
self.next_index
)
# Output: speech (provide data at each iteration)
if self.outputs['speech'].isConnected():
filename = obj.make_path(self.root_folder, '.wav')
audio = bob.io.base.load(filename)
self.outputs['speech'].write(
{
'value': np.cast['float'](audio[0] * pow(2, 15))
},
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
def setup_tests():
# Install a mock load function for the images
def mock_load(root_folder):
return np.ndarray((1, 512), dtype=np.uint8)
bob.io.base.load = mock_load
#----------------------------------------------------------
# Test the behavior of the views (on fake data)
if __name__ == '__main__':
setup_tests()
from beat.backend.python.database import DatabaseTester
DatabaseTester('Train', Train,
[
'client_id',
'file_id',
'speech',
],
parameters=dict(
),
irregular_outputs=[
'client_id',
]
)
DatabaseTester('Templates', Templates,
[
'client_id',
'template_id',
'file_id',
'speech',
],
parameters=dict(
group='dev',
),
irregular_outputs=[
'client_id',
'template_id',
]
)
DatabaseTester('Probes', Probes,
[
'template_ids',
'client_id',
'probe_id',
'file_id',
'speech',
],
parameters=dict(
group='dev',
),
irregular_outputs=[
'template_ids',
'client_id',
]
)
.. Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ ..
.. Contact: beat.support@idiap.ch ..
.. ..
.. This file is part of the beat.examples module of the BEAT platform. ..
.. ..
.. Commercial License Usage ..
.. Licensees holding valid commercial BEAT licenses may use this file in ..
.. accordance with the terms contained in a written agreement between you ..
.. and Idiap. For further information contact tto@idiap.ch ..
.. ..
.. Alternatively, this file may be used under the terms of the GNU Affero ..
.. Public License version 3 as published by the Free Software and appearing ..
.. in the file LICENSE.AGPL included in the packaging of this file. ..
.. The BEAT platform is distributed in the hope that it will be useful, but ..
.. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ..
.. or FITNESS FOR A PARTICULAR PURPOSE. ..
.. ..
.. You should have received a copy of the GNU Affero Public License along ..
.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. ..
-----------------------
The VoxForge Database
-----------------------
Changelog
=========
* **Version 3**, 02/Nov/2017:
- Port to beat.backend.python v1.4.2
* **Version 2**, 26/Jan/2016:
- Port to Bob v2
* **Version 1**, 21/Oct/2014:
- Initial release
Description
===========
`VoxForge <http://www.voxforge.org/>`_ offers a collection of transcribed
speech for use with Free and Open Source Speech Recognition Engines.
`bob.db.voxforge <https://pypi.python.org/pypi/bob.db.voxforge>`_ defines
speaker recognition protocol that uses a small subset of the english audio
files (only 6561 files) belonging to 30 speakers randomly selected. This subset
is split into three equivalent parts: Training (10 speakers), Development (10
speakers) and Test (10 speakers) sets.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment