diff --git a/advanced/databases/voxforge/5.json b/advanced/databases/voxforge/5.json new file mode 100644 index 0000000000000000000000000000000000000000..1365bdf883757485d4b60beb521a0b422b44e103 --- /dev/null +++ b/advanced/databases/voxforge/5.json @@ -0,0 +1,41 @@ +{ + "description": "The VoxForge Database", + "root_folder": "/idiap/resource/database/VoxForge/dbase/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit", + "protocols": [ + { + "name": "default", + "template": "advanced_speaker_recognition/1", + "views": { + "train": { + "view": "Train", + "parameters": {} + }, + "dev_templates": { + "view": "Templates", + "parameters": { + "group": "dev" + } + }, + "dev_probes": { + "view": "Probes", + "parameters": { + "group": "dev" + } + }, + "test_templates": { + "view": "Templates", + "parameters": { + "group": "eval" + } + }, + "test_probes": { + "view": "Probes", + "parameters": { + "group": "eval" + } + } + } + } + ], + "schema_version": 2 +} \ No newline at end of file diff --git a/advanced/databases/voxforge/5.py b/advanced/databases/voxforge/5.py new file mode 100644 index 0000000000000000000000000000000000000000..ac3e69855f51457bcc6e937d732f5a08ec81f6a1 --- /dev/null +++ b/advanced/databases/voxforge/5.py @@ -0,0 +1,323 @@ +############################################################################### +# # +# Copyright (c) 2018 Idiap Research Institute, http://www.idiap.ch/ # +# Contact: beat.support@idiap.ch # +# # +# This file is part of the beat.examples module of the BEAT platform. # +# # +# Commercial License Usage # +# Licensees holding valid commercial BEAT licenses may use this file in # +# accordance with the terms contained in a written agreement between you # +# and Idiap. For further information contact tto@idiap.ch # +# # +# Alternatively, this file may be used under the terms of the GNU Affero # +# Public License version 3 as published by the Free Software and appearing # +# in the file LICENSE.AGPL included in the packaging of this file. # +# The BEAT platform is distributed in the hope that it will be useful, but # +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # +# or FITNESS FOR A PARTICULAR PURPOSE. # +# # +# You should have received a copy of the GNU Affero Public License along # +# with the BEAT platform. If not, see http://www.gnu.org/licenses/. # +# # +############################################################################### + +import numpy as np +from collections import namedtuple + +from beat.backend.python.database import View + +import bob.db.voxforge +import bob.io.base +import bob.io.audio + + +#---------------------------------------------------------- + + +class Train(View): + """Outputs: + - speech: "{{ system_user.username }}/array_1d_floats/1" + - file_id: "{{ system_user.username }}/text/1" + - client_id: "{{ system_user.username }}/text/1" + + One "file_id" is associated with a given "speech". + Several "speech" are associated with a given "client_id". + + --------------- --------------- --------------- --------------- --------------- --------------- + | speech | | speech | | speech | | speech | | speech | | speech | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | + --------------- --------------- --------------- --------------- --------------- --------------- + ----------------------------------------------- ----------------------------------------------- + | client_id | | client_id | + ----------------------------------------------- ----------------------------------------------- + """ + + def index(self, root_folder, parameters): + Entry = namedtuple('Entry', ['client_id', 'file_id', 'speech']) + + # Open the database and load the objects to provide via the outputs + db = bob.db.voxforge.Database() + + objs = sorted(db.objects(groups='world'), + key=lambda x: (x.client_id, x.id)) + + return [ Entry(x.client_id, x.id, x.make_path(root_folder, '.wav')) for x in objs ] + + + def get(self, output, index): + obj = self.objs[index] + + if output == 'client_id': + return { + 'text': str(obj.client_id) + } + + elif output == 'file_id': + return { + 'text': str(obj.file_id) + } + + elif output == 'speech': + audio = bob.io.base.load(obj.speech) + + return { + 'value': np.cast['float'](audio[0] * pow(2, 15)) + } + + +#---------------------------------------------------------- + + +class Templates(View): + """Outputs: + - speech: "{{ system_user.username }}/array_1d_floats/1" + - file_id: "{{ system_user.username }}/text/1" + - template_id: "{{ system_user.username }}/text/1" + - client_id: "{{ system_user.username }}/text/1" + + One "file_id" is associated with a given "speech". + Several "speech" are associated with a given "template_id". + Several "template_id" are associated with a given "client_id". + + --------------- --------------- --------------- --------------- --------------- --------------- + | speech | | speech | | speech | | speech | | speech | | speech | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | + --------------- --------------- --------------- --------------- --------------- --------------- + ----------------------------------------------- ----------------------------------------------- + | template_id | | template_id | + ----------------------------------------------- ----------------------------------------------- + ----------------------------------------------------------------------------------------------- + | client_id | + ----------------------------------------------------------------------------------------------- + + Note: for this particular database, there is only one "template_id" + per "client_id". + """ + + def index(self, root_folder, parameters): + Entry = namedtuple('Entry', ['client_id', 'template_id', 'file_id', 'speech']) + + # Open the database and load the objects to provide via the outputs + db = bob.db.voxforge.Database() + + template_ids = db.model_ids(groups=parameters['group']) + + entries = [] + + for template_id in template_ids: + objs = db.objects(groups=parameters['group'], + purposes='enroll', + model_ids=[template_id]) + + entries.extend([ Entry(x.client_id, template_id, x.id, x.make_path(root_folder, '.wav')) + for x in objs ]) + + return sorted(entries, key=lambda x: (x.client_id, x.template_id, x.file_id)) + + + def get(self, output, index): + obj = self.objs[index] + + if output == 'client_id': + return { + 'text': str(obj.client_id) + } + + elif output == 'template_id': + return { + 'text': str(obj.template_id) + } + + elif output == 'file_id': + return { + 'text': str(obj.file_id) + } + + elif output == 'speech': + audio = bob.io.base.load(obj.speech) + + return { + 'value': np.cast['float'](audio[0] * pow(2, 15)) + } + + +#---------------------------------------------------------- + + +class Probes(View): + """Outputs: + - speech: "{{ system_user.username }}/array_1d_floats/1" + - file_id: "{{ system_user.username }}/text/1" + - probe_id: "{{ system_user.username }}/text/1", + - client_id: "{{ system_user.username }}/text/1" + - template_ids: "{{ system_user.username }}/array_1d_text/1", + + One "file_id" is associated with a given "speech". + One "probe_id" is associated with a given "speech". + Several "speech" are associated with a given "client_id". + Several "client_id" are associated with a given "template_ids". + + Each probe must be matched against a number of templates defined by a list of + client identifiers. + + --------------- --------------- --------------- --------------- --------------- --------------- + | speech | | speech | | speech | | speech | | speech | | speech | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | + --------------- --------------- --------------- --------------- --------------- --------------- + ----------------------------------------------- ----------------------------------------------- + | client_id | | client_id | + ----------------------------------------------- ----------------------------------------------- + ----------------------------------------------------------------------------------------------- + | template_ids | + ----------------------------------------------------------------------------------------------- + """ + + def index(self, root_folder, parameters): + Entry = namedtuple('Entry', ['template_ids', 'client_id', 'probe_id', 'file_id', 'speech']) + + # Open the database and load the objects to provide via the outputs + db = bob.db.voxforge.Database() + + template_ids = db.model_ids(groups=parameters['group']) + + template_probes = {} + for template_id in template_ids: + objs = sorted(db.objects(groups=parameters['group'], + purposes='probe', + model_ids=[template_id]), + key=lambda x: (x.client_id, x.id)) + + template_probes[template_id] = [ p.id for p in objs ] + + objs = sorted(db.objects(groups=parameters['group'], + purposes='probe'), + key=lambda x: (x.client_id, x.id)) + + entries = [] + for obj in objs: + templates = [ template_id for template_id in template_ids + if obj.id in template_probes[template_id] ] + entries.append( (templates, obj) ) + + return sorted([ Entry(x[0], x[1].client_id, x[1].id, x[1].id, + x[1].make_path(root_folder, '.wav')) + for x in entries ], + key=lambda x: (len(x.template_ids), x.template_ids, x.client_id, x.file_id)) + + + def get(self, output, index): + obj = self.objs[index] + + if output == 'template_ids': + return { + 'text': [ str(x) for x in obj.template_ids ] + } + + elif output == 'client_id': + return { + 'text': str(obj.client_id) + } + + elif output == 'probe_id': + return { + 'text': str(obj.probe_id) + } + + elif output == 'file_id': + return { + 'text': str(obj.file_id) + } + + elif output == 'speech': + audio = bob.io.base.load(obj.speech) + + return { + 'value': np.cast['float'](audio[0] * pow(2, 15)) + } + + +#---------------------------------------------------------- + + +def setup_tests(): + # Install a mock load function for the images + def mock_load(root_folder): + return np.ndarray((1, 512), dtype=np.uint8) + + bob.io.base.load = mock_load + + +#---------------------------------------------------------- + + +# Test the behavior of the views (on fake data) +if __name__ == '__main__': + + setup_tests() + + view = Train() + view.objs = view.index( + root_folder='', + parameters=dict( + ) + ) + view.get('client_id', 0) + view.get('file_id', 0) + view.get('speech', 0) + + + view = Templates() + view.objs = view.index( + root_folder='', + parameters=dict( + group='dev', + ) + ) + view.get('client_id', 0) + view.get('template_id', 0) + view.get('file_id', 0) + view.get('speech', 0) + + + view = Probes() + view.objs = view.index( + root_folder='', + parameters=dict( + group='dev', + ) + ) + view.get('template_ids', 0) + view.get('client_id', 0) + view.get('probe_id', 0) + view.get('file_id', 0) + view.get('speech', 0) diff --git a/advanced/databases/voxforge/5.rst b/advanced/databases/voxforge/5.rst new file mode 100644 index 0000000000000000000000000000000000000000..1e5de09a78dffa345abfcb982f04385a7a6a239a --- /dev/null +++ b/advanced/databases/voxforge/5.rst @@ -0,0 +1 @@ +The VoxForge Database \ No newline at end of file