From a81d163847baa4166c012337102aa8b84c421b52 Mon Sep 17 00:00:00 2001 From: Philip ABBET <philip.abbet@idiap.ch> Date: Fri, 3 Nov 2017 10:49:39 +0100 Subject: [PATCH] Fix kboc16: There was already two versions of it on the platform, different from ours --- advanced/databases/kboc16/1.json | 109 +++---- advanced/databases/kboc16/1.py | 26 +- advanced/databases/kboc16/1.rst | 23 +- advanced/databases/kboc16/2.py | 476 ++++++++----------------------- advanced/databases/kboc16/2.rst | 44 ++- advanced/databases/kboc16/3.json | 76 +++++ advanced/databases/kboc16/3.py | 430 ++++++++++++++++++++++++++++ advanced/databases/kboc16/3.rst | 80 ++++++ 8 files changed, 768 insertions(+), 496 deletions(-) create mode 100644 advanced/databases/kboc16/3.json create mode 100644 advanced/databases/kboc16/3.py create mode 100644 advanced/databases/kboc16/3.rst diff --git a/advanced/databases/kboc16/1.json b/advanced/databases/kboc16/1.json index cf37377..3aee554 100644 --- a/advanced/databases/kboc16/1.json +++ b/advanced/databases/kboc16/1.json @@ -1,76 +1,41 @@ { - "description": "The KBOC16 database", - "root_folder": "/idiap/group/biometric/databases/kboc16", - "protocols": [ - { - "name": "A", - "template": "simple_keystroke_recognition_kboc16", - "sets": [ + "root_folder": "/idiap/group/biometric/databases/kboc16", + "description": "The KBOC16 database", + "protocols": [ { - "name": "templates", - "template": "templates", - "view": "Templates", - "parameters": { - "protocol": "A" - }, - "outputs": { - "file_id": "{{ system_user.username }}/uint64/1", - "client_id": "{{ system_user.username }}/text/1", - "template_id": "{{ system_user.username }}/text/1", - "keystroke": "{{ user.username }}/kboc16_keystroke/1" - } - }, - { - "name": "probes", - "template": "probes", - "view": "Probes", - "parameters": { - "protocol": "A" - }, - "outputs": { - "file_id": "{{ system_user.username }}/uint64/1", - "probe_id": "{{ system_user.username }}/uint64/1", - "client_id": "{{ system_user.username }}/text/1", - "template_ids": "{{ system_user.username }}/array_1d_text/1", - "keystroke": "{{ user.username }}/kboc16_keystroke/1" - } - } - ] - }, - { - "name": "D", - "template": "simple_keystroke_recognition_kboc16", - "sets": [ - { - "name": "templates", - "template": "templates", - "view": "Templates", - "parameters": { - "protocol": "D" - }, - "outputs": { - "file_id": "{{ system_user.username }}/uint64/1", - "client_id": "{{ system_user.username }}/text/1", - "template_id": "{{ system_user.username }}/text/1", - "keystroke": "{{ user.username }}/kboc16_keystroke/1" - } - }, - { - "name": "probes", - "template": "probes", - "view": "Probes", - "parameters": { - "protocol": "D" - }, - "outputs": { - "file_id": "{{ system_user.username }}/uint64/1", - "probe_id": "{{ system_user.username }}/uint64/1", - "client_id": "{{ system_user.username }}/text/1", - "template_ids": "{{ system_user.username }}/array_1d_text/1", - "keystroke": "{{ user.username }}/kboc16_keystroke/1" - } + "name": "D", + "template": "simple_keystroke_recognition_kboc16", + "sets": [ + { + "outputs": { + "keystroke": "{{ system_user.username }}/kboc16_keystroke/1", + "file_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_id": "{{ system_user.username }}/text/1" + }, + "view": "Templates", + "name": "templates", + "parameters": { + "protocol": "D" + }, + "template": "templates" + }, + { + "outputs": { + "probe_id": "{{ system_user.username }}/uint64/1", + "keystroke": "{{ system_user.username }}/kboc16_keystroke/1", + "file_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_ids": "{{ system_user.username }}/array_1d_text/1" + }, + "view": "Probes", + "name": "probes", + "parameters": { + "protocol": "D" + }, + "template": "probes" + } + ] } - ] - } - ] + ] } diff --git a/advanced/databases/kboc16/1.py b/advanced/databases/kboc16/1.py index 03ad415..60437f2 100644 --- a/advanced/databases/kboc16/1.py +++ b/advanced/databases/kboc16/1.py @@ -1,33 +1,9 @@ -############################################################################### -# # -# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ # -# Contact: beat.support@idiap.ch # -# # -# This file is part of the beat.examples module of the BEAT platform. # -# # -# Commercial License Usage # -# Licensees holding valid commercial BEAT licenses may use this file in # -# accordance with the terms contained in a written agreement between you # -# and Idiap. For further information contact tto@idiap.ch # -# # -# Alternatively, this file may be used under the terms of the GNU Affero # -# Public License version 3 as published by the Free Software and appearing # -# in the file LICENSE.AGPL included in the packaging of this file. # -# The BEAT platform is distributed in the hope that it will be useful, but # -# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # -# or FITNESS FOR A PARTICULAR PURPOSE. # -# # -# You should have received a copy of the GNU Affero Public License along # -# with the BEAT platform. If not, see http://www.gnu.org/licenses/. # -# # -############################################################################### - import string import numpy import bob.db.kboc16 -keystroke_feature = ['', 'holdtime', 'key_events'] +keystroke_feature = ['', 'timestamps', 'key_events'] #keystroke_type = ['', 'given_name', 'family_name', 'email', 'nationality', 'id_number'] def keystroke_reader(filename): diff --git a/advanced/databases/kboc16/1.rst b/advanced/databases/kboc16/1.rst index e058314..201e96a 100644 --- a/advanced/databases/kboc16/1.rst +++ b/advanced/databases/kboc16/1.rst @@ -1,24 +1,3 @@ -.. Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ .. -.. Contact: beat.support@idiap.ch .. -.. .. -.. This file is part of the beat.examples module of the BEAT platform. .. -.. .. -.. Commercial License Usage .. -.. Licensees holding valid commercial BEAT licenses may use this file in .. -.. accordance with the terms contained in a written agreement between you .. -.. and Idiap. For further information contact tto@idiap.ch .. -.. .. -.. Alternatively, this file may be used under the terms of the GNU Affero .. -.. Public License version 3 as published by the Free Software and appearing .. -.. in the file LICENSE.AGPL included in the packaging of this file. .. -.. The BEAT platform is distributed in the hope that it will be useful, but .. -.. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY .. -.. or FITNESS FOR A PARTICULAR PURPOSE. .. -.. .. -.. You should have received a copy of the GNU Affero Public License along .. -.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. .. - - The KBOC16 Database ------------------- @@ -61,4 +40,4 @@ available in the publications section of the `ATVS group webpage .. [BTAS16] To appear. Please remember to reference [BTAS15]_ and [BTAS16]_ on any work made public, -whatever the form, based directly or indirectly on any part of the KBOC16 DB. +whatever the form, based directly or indirectly on any part of the KBOC16 DB. \ No newline at end of file diff --git a/advanced/databases/kboc16/2.py b/advanced/databases/kboc16/2.py index 34c0dd0..03ad415 100644 --- a/advanced/databases/kboc16/2.py +++ b/advanced/databases/kboc16/2.py @@ -1,6 +1,6 @@ ############################################################################### # # -# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ # +# Copyright (c) 2016 Idiap Research Institute, http://www.idiap.ch/ # # Contact: beat.support@idiap.ch # # # # This file is part of the beat.examples module of the BEAT platform. # @@ -23,408 +23,180 @@ ############################################################################### import string -import numpy as np +import numpy import bob.db.kboc16 -#---------------------------------------------------------- - +keystroke_feature = ['', 'holdtime', 'key_events'] +#keystroke_type = ['', 'given_name', 'family_name', 'email', 'nationality', 'id_number'] def keystroke_reader(filename): - times = [] - keys = [] - - for line in open(filename, 'r').readlines(): - parts = string.split(line) - times.append(numpy.int32(parts[1])) - keys.append(parts[0]) - - return dict( - holdtime = times, - key_events = keys, - ) - - -#---------------------------------------------------------- - - -def get_client_end_index(objs, client_id, client_start_index, - start_index, end_index): - client_end_index = client_start_index - - while client_end_index + 1 <= end_index: - obj = objs[client_end_index + 1 - start_index] - - if isinstance(obj, tuple): - obj = obj[1] - - if obj.client_id != client_id: - return client_end_index - - client_end_index += 1 - - return end_index - - -#---------------------------------------------------------- - - -def get_value_end_index(objs, value, index_in_tuple, value_start_index, - start_index, end_index): - value_end_index = value_start_index - - while value_end_index + 1 <= end_index: - id = objs[value_end_index + 1 - start_index][index_in_tuple] + data = {} + valuesTime = [] + valuesKey = [] + for line in open(filename, 'r').readlines(): + parts = string.split(line) + valuesTime.append(numpy.int32(parts[1])) + valuesKey.append(parts[0]) - if id != value: - return value_end_index + data[keystroke_feature[1]] = valuesTime + data[keystroke_feature[2]] = valuesKey - value_end_index += 1 - - return end_index - - -#---------------------------------------------------------- + return data class Templates: - """Outputs: - - keystroke: "{{ user.username }}/kboc16_keystroke/1 - - file_id: "{{ system_user.username }}/uint64/1" - - template_id: "{{ system_user.username }}/text/1" - - client_id: "{{ system_user.username }}/text/1" - - One "file_id" is associated with a given "keystroke". - Several "keystroke" are associated with a given "template_id". - Several "template_id" are associated with a given "client_id". - - --------------- --------------- --------------- --------------- --------------- --------------- - | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | - --------------- --------------- --------------- --------------- --------------- --------------- - --------------- --------------- --------------- --------------- --------------- --------------- - | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | - --------------- --------------- --------------- --------------- --------------- --------------- - ----------------------------------------------- ----------------------------------------------- - | template_id | | template_id | - ----------------------------------------------- ----------------------------------------------- - ----------------------------------------------------------------------------------------------- - | client_id | - ----------------------------------------------------------------------------------------------- - - Note: for this particular database, there is only one "template_id" - per "client_id". - """ - - def setup(self, root_folder, outputs, parameters, force_start_index=None, - force_end_index=None): - - # Initialisations - self.root_folder = root_folder - self.outputs = outputs - self.parameters = parameters - - # Open the database and load the objects to provide via the outputs - self.db = bob.db.kboc16.Database() - - template_ids = self.db.model_ids(groups='eval', - protocol=parameters['protocol']) - - self.objs = [] - - for template_id in template_ids: - objs = self.db.objects(groups='eval', - protocol=self.parameters['protocol'], - purposes='enrol', - model_ids=[template_id]) - - self.objs.extend([ (template_id, obj) for obj in objs ]) - - self.objs = sorted(self.objs, key=lambda x: (x[1].client_id, x[0], x[1].id)) - - # Determine the range of indices that must be provided - self.start_index = force_start_index if force_start_index is not None else 0 - self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1 - - self.objs = self.objs[self.start_index : self.end_index + 1] - - self.next_index = self.start_index + # Reasoning: Each client may have a number of models in certain databases. + # So, each model receives an unique identifier. Those identifiers are linked + # to the client identifier and contain a number of keystrokes to generated + # the model from. + + def setup(self, root_folder, outputs, parameters, force_start_index=None, force_end_index=None): + self.root_folder = root_folder + self.outputs = outputs + self.parameters = parameters + + self.db = bob.db.kboc16.Database() + + self.template_ids = sorted([str(v) for v in self.db.model_ids(groups='eval', + protocol=parameters['protocol'])]) + self.objs = None + + self.current_template_index = 0 + self.current_obj_index = 0 + self.next_index = 0 + + self.force_start_index = force_start_index + self.force_end_index = force_end_index + + # Retrieve only 'useful' data + ### End index + #if self.force_end_index is not None: + # self.objs = self.objs[:self.force_end_index+1] + ### Start index + if self.force_start_index is None: + self.force_start_index = 0 + while self.next_index < self.force_start_index: + self.next() return True - def done(self, last_data_index): - return last_data_index >= self.end_index + def done(self): + return (self.current_template_index >= len(self.template_ids)) or (self.force_end_index is not None and self.force_end_index < self.next_index) def next(self): - (template_id, obj) = self.objs[self.next_index - self.start_index] - - - # Output: template_id (only provide data when the template_id change) - if self.outputs['template_id'].isConnected() and \ - self.outputs['template_id'].last_written_data_index < self.next_index: - - template_end_index = get_value_end_index(self.objs, template_id, 0, - self.next_index, - self.start_index, - self.end_index) - - self.outputs['template_id'].write( - { - 'text': str(template_id) - }, - template_end_index - ) + if self.objs is None: + # probe for the specific objects concerning a given client + self.objs = sorted(self.db.objects(model_ids=[self.template_ids[self.current_template_index]], + groups='eval', + protocol=self.parameters['protocol'], + purposes='enrol'), key=lambda x: x.id) + if self.force_start_index <= self.next_index and (self.force_end_index is None or self.force_end_index >= self.next_index): + # For this database, 'self.template_ids[self.current_template_index]' corresponds to 'obj.client_id' + if self.outputs['template_id'].isConnected(): + self.outputs['template_id'].write({'text': str(self.template_ids[self.current_template_index])}, self.next_index+len(self.objs)-1) - # Output: client_id (only provide data when the client_id change) - if self.outputs['client_id'].isConnected() and \ - self.outputs['client_id'].last_written_data_index < self.next_index: + if self.outputs['client_id'].isConnected(): + self.outputs['client_id'].write({'text': str(self.objs[0].client_id)}, self.next_index+len(self.objs)-1) - client_end_index = get_client_end_index(self.objs, obj.client_id, - self.next_index, - self.start_index, - self.end_index) + obj = self.objs[self.current_obj_index] - self.outputs['client_id'].write( - { - 'text': str(obj.client_id) - }, - client_end_index - ) + if self.outputs['file_id'].isConnected() or self.outputs['keystroke'].isConnected(): + if self.force_start_index <= self.next_index and (self.force_end_index is None or self.force_end_index >= self.next_index): + if self.outputs['file_id'].isConnected(): + self.outputs['file_id'].write({'value': numpy.uint64(obj.id)}, self.next_index) + if self.outputs['keystroke'].isConnected(): + data = keystroke_reader(obj.make_path(self.root_folder, '.txt')) + self.outputs['keystroke'].write(data, self.next_index) - # Output: file_id (provide data at each iteration) - if self.outputs['file_id'].isConnected(): - self.outputs['file_id'].write( - { - 'value': np.uint64(obj.id) - }, - self.next_index - ) - + self.next_index += 1 + self.current_obj_index += 1 - # Output: image (provide data at each iteration) - if self.outputs['keystroke'].isConnected(): - self.outputs['keystroke'].write( - keystroke_reader(obj.make_path(self.root_folder, '.txt')), - self.next_index - ) + else: + self.next_index += len(self.objs) + self.current_obj_index = len(self.objs) - - # Determine the next data index that must be provided - self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs - if x.isConnected() ] - ) + if self.current_obj_index == len(self.objs): + self.objs = None + self.current_obj_index = 0 + self.current_template_index += 1 return True -#---------------------------------------------------------- - - class Probes: - """Outputs: - - keystroke: "{{ user.username }}/kboc16_keystroke/1 - - file_id: "{{ system_user.username }}/uint64/1" - - client_id: "{{ system_user.username }}/text/1" - - probe_id: "{{ system_user.username }}/uint64/1", - - template_ids: "{{ system_user.username }}/array_1d_text/1", - - One "file_id" is associated with a given "keystroke". - One "probe_id" is associated with a given "keystroke". - Several "keystroke" are associated with a given "client_id". - Several "client_id" are associated with a given "template_ids". - - --------------- --------------- --------------- --------------- --------------- --------------- - | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | - --------------- --------------- --------------- --------------- --------------- --------------- - --------------- --------------- --------------- --------------- --------------- --------------- - | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | - --------------- --------------- --------------- --------------- --------------- --------------- - --------------- --------------- --------------- --------------- --------------- --------------- - | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | - --------------- --------------- --------------- --------------- --------------- --------------- - ----------------------------------------------- ----------------------------------------------- - | client_id | | client_id | - ----------------------------------------------- ----------------------------------------------- - ----------------------------------------------------------------------------------------------- - | template_ids | - ----------------------------------------------------------------------------------------------- - """ - - def setup(self, root_folder, outputs, parameters, force_start_index=None, - force_end_index=None): - - # Initialisations + # Reasoning: Each client may have a number of probes. Each probe may be + # composed of any number of keystrokes. So, each probe receives an unique + # identifier. Those identifiers are linked to the client identifier and + # contain a number of keystrokes to generated the match/comparison score. + # Each probe must be matched against a number of models defined by a list + # of client identifiers. + + def setup(self, root_folder, outputs, parameters, force_start_index=None, force_end_index=None): self.root_folder = root_folder - self.outputs = outputs - self.parameters = parameters - - # Open the database and load the objects to provide via the outputs - self.db = bob.db.kboc16.Database() - - template_ids = sorted(self.db.model_ids(protocol=parameters['protocol'], - groups='eval'), - key=lambda x: int(x)) - - template_probes = {} - for template_id in template_ids: - objs = sorted(self.db.objects(protocol=parameters['protocol'], - groups='eval', - purposes='probe', - model_ids=[template_id]), - key=lambda x: (x.client_id, x.id)) + self.outputs = outputs + self.parameters = parameters - template_probes[template_id] = [ p.id for p in objs ] + self.db = bob.db.kboc16.Database() - objs = sorted(self.db.objects(protocol=parameters['protocol'], - groups='eval', - purposes='probe'), - key=lambda x: (x.client_id, x.id)) + self.template_ids = sorted([str(v) for v in self.db.model_ids(groups='eval', + protocol=parameters['protocol'])]) + self.objs = sorted(self.db.objects(groups='eval', protocol=self.parameters['protocol'], purposes='probe'), key=lambda x: x.id) - self.objs = [] - for obj in objs: - templates = [ template_id for template_id in template_ids - if obj.id in template_probes[template_id] ] - self.objs.append( (templates, obj) ) + self.template_probes = {} + for t_id in self.template_ids: + self.template_probes[t_id] = [p.id for p in sorted(self.db.objects(model_ids=[t_id], groups='eval', protocol=self.parameters['protocol'], purposes='probe'), key=lambda x: x.id)] - self.objs = sorted(self.objs, key=lambda x: (len(x[0]), x[0], x[1].client_id, x[1].id)) + self.next_index = 0 - # Determine the range of indices that must be provided - self.start_index = force_start_index if force_start_index is not None else 0 - self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1 + self.force_start_index = force_start_index + self.force_end_index = force_end_index - self.objs = self.objs[self.start_index : self.end_index + 1] - - self.next_index = self.start_index + # Retrieve only 'useful' data + ### End index + if self.force_end_index is not None: + self.objs = self.objs[:self.force_end_index+1] + ### Start index + if self.force_start_index is not None: + self.objs = self.objs[self.force_start_index:] + self.next_index = self.force_start_index + else: + self.force_start_index = 0 return True - def done(self, last_data_index): - return last_data_index >= self.end_index + def done(self): + return (self.next_index-self.force_start_index >= len(self.objs)) def next(self): - (template_ids, obj) = self.objs[self.next_index - self.start_index] - - - # Output: template_ids (only provide data when the template_ids change) - if self.outputs['template_ids'].isConnected() and \ - self.outputs['template_ids'].last_written_data_index < self.next_index: - - template_ids_end_index = get_value_end_index(self.objs, template_ids, 0, - self.next_index, - self.start_index, - self.end_index) - - self.outputs['template_ids'].write( - { - 'text': [ str(x) for x in template_ids ] - }, - template_ids_end_index - ) - - - # Output: client_id (only provide data when the client_id change) - if self.outputs['client_id'].isConnected() and \ - self.outputs['client_id'].last_written_data_index < self.next_index: - - client_end_index = get_client_end_index(self.objs, obj.client_id, - self.next_index, - self.start_index, - self.end_index) - self.outputs['client_id'].write( - { - 'text': str(obj.client_id) - }, - client_end_index - ) + obj = self.objs[self.next_index-self.force_start_index] + if self.outputs['file_id'].isConnected(): + self.outputs['file_id'].write({'value': numpy.uint64(obj.id)}, self.next_index) - # Output: probe_id (provide data at each iteration) if self.outputs['probe_id'].isConnected(): - self.outputs['probe_id'].write( - { - 'value': np.uint64(obj.id) - }, - self.next_index - ) - - # Output: file_id (provide data at each iteration) - if self.outputs['file_id'].isConnected(): - self.outputs['file_id'].write( - { - 'value': np.uint64(obj.id) - }, - self.next_index - ) + self.outputs['probe_id'].write({'value': numpy.uint64(obj.id)}, self.next_index) + if self.outputs['client_id'].isConnected(): + self.outputs['client_id'].write({'text': str(obj.client_id)}, self.next_index) - # Output: image (provide data at each iteration) - if self.outputs['keystroke'].isConnected(): - self.outputs['keystroke'].write( - keystroke_reader(obj.make_path(self.root_folder, '.txt')), - self.next_index - ) + if self.outputs['template_ids'].isConnected(): + data = { + 'text': [t_id for t_id in self.template_ids if obj.id in self.template_probes[t_id]] + } + self.outputs['template_ids'].write(data, self.next_index) + if self.outputs['keystroke'].isConnected(): + data = keystroke_reader(obj.make_path(self.root_folder, '.txt')) + self.outputs['keystroke'].write(data, self.next_index) - # Determine the next data index that must be provided - self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs - if x.isConnected() ] - ) + self.next_index += 1 return True - - -#---------------------------------------------------------- - - -def setup_tests(): - # Install a mock load function for the keystrokes - def mock_keystroke_reader(filename): - return {} - - global keystroke_reader - keystroke_reader = mock_keystroke_reader - - -#---------------------------------------------------------- - - -# Test the behavior of the views (on fake data) -if __name__ == '__main__': - - setup_tests() - - from beat.backend.python.database import DatabaseTester - - DatabaseTester('Templates', Templates, - [ - 'client_id', - 'template_id', - 'file_id', - 'keystroke', - ], - dict( - protocol = 'A', - ) - ) - - DatabaseTester('Probes', Probes, - [ - 'template_ids', - 'client_id', - 'probe_id', - 'file_id', - 'keystroke', - ], - dict( - protocol = 'A', - ), - irregular_outputs = [ - 'client_id', - ] - ) diff --git a/advanced/databases/kboc16/2.rst b/advanced/databases/kboc16/2.rst index dfc6911..ccb5f0c 100644 --- a/advanced/databases/kboc16/2.rst +++ b/advanced/databases/kboc16/2.rst @@ -1,35 +1,15 @@ -.. Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ .. -.. Contact: beat.support@idiap.ch .. -.. .. -.. This file is part of the beat.examples module of the BEAT platform. .. -.. .. -.. Commercial License Usage .. -.. Licensees holding valid commercial BEAT licenses may use this file in .. -.. accordance with the terms contained in a written agreement between you .. -.. and Idiap. For further information contact tto@idiap.ch .. -.. .. -.. Alternatively, this file may be used under the terms of the GNU Affero .. -.. Public License version 3 as published by the Free Software and appearing .. -.. in the file LICENSE.AGPL included in the packaging of this file. .. -.. The BEAT platform is distributed in the hope that it will be useful, but .. -.. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY .. -.. or FITNESS FOR A PARTICULAR PURPOSE. .. -.. .. -.. You should have received a copy of the GNU Affero Public License along .. -.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. .. - - The KBOC16 Database ------------------- + Changelog ========= -* **Version 2**, 30/Oct/2017: +* **Version 2**, 4/Apr/2016: - - Port to beat.backend.python v1.4.2 + - Added protocol ``A`` -* **Version 1**: +* **Version 1**, 09/Feb/2016: - Initial release @@ -68,9 +48,23 @@ competition results we refer the reader to (the following articles are publicly available in the publications section of the `ATVS group webpage <http://atvs.ii.uam.es/listpublications.do>`_) +Implemented protocols are: + +* **Protocol A**: Complete dataset (300 users). This is the dataset used during + the final kboc16 evaluation (same dataset in the offline evaluation). +* **Protocol D**: Subset with first 100 users of the complete dataset. This + dataset was used to better generalize the models of the participants and + avoid overfitting. + + + +References +========== + + .. [BTAS15] A. Morales, M. Falanga, J. Fierrez, C. Sansone and J. Ortega-Garcia, ''Keystroke Dynamics Recognition based on Personal Data: A Comparative Experimental Evaluation Implementing Reproducible Research'', in Proc. of the IEEE Seventh International Conference on Biometrics: Theory, Applications and Systems, Arlington, Virginia, USA, September 2015. .. [BTAS16] To appear. Please remember to reference [BTAS15]_ and [BTAS16]_ on any work made public, -whatever the form, based directly or indirectly on any part of the KBOC16 DB. +whatever the form, based directly or indirectly on any part of the KBOC16 DB. \ No newline at end of file diff --git a/advanced/databases/kboc16/3.json b/advanced/databases/kboc16/3.json new file mode 100644 index 0000000..cf37377 --- /dev/null +++ b/advanced/databases/kboc16/3.json @@ -0,0 +1,76 @@ +{ + "description": "The KBOC16 database", + "root_folder": "/idiap/group/biometric/databases/kboc16", + "protocols": [ + { + "name": "A", + "template": "simple_keystroke_recognition_kboc16", + "sets": [ + { + "name": "templates", + "template": "templates", + "view": "Templates", + "parameters": { + "protocol": "A" + }, + "outputs": { + "file_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_id": "{{ system_user.username }}/text/1", + "keystroke": "{{ user.username }}/kboc16_keystroke/1" + } + }, + { + "name": "probes", + "template": "probes", + "view": "Probes", + "parameters": { + "protocol": "A" + }, + "outputs": { + "file_id": "{{ system_user.username }}/uint64/1", + "probe_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_ids": "{{ system_user.username }}/array_1d_text/1", + "keystroke": "{{ user.username }}/kboc16_keystroke/1" + } + } + ] + }, + { + "name": "D", + "template": "simple_keystroke_recognition_kboc16", + "sets": [ + { + "name": "templates", + "template": "templates", + "view": "Templates", + "parameters": { + "protocol": "D" + }, + "outputs": { + "file_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_id": "{{ system_user.username }}/text/1", + "keystroke": "{{ user.username }}/kboc16_keystroke/1" + } + }, + { + "name": "probes", + "template": "probes", + "view": "Probes", + "parameters": { + "protocol": "D" + }, + "outputs": { + "file_id": "{{ system_user.username }}/uint64/1", + "probe_id": "{{ system_user.username }}/uint64/1", + "client_id": "{{ system_user.username }}/text/1", + "template_ids": "{{ system_user.username }}/array_1d_text/1", + "keystroke": "{{ user.username }}/kboc16_keystroke/1" + } + } + ] + } + ] +} diff --git a/advanced/databases/kboc16/3.py b/advanced/databases/kboc16/3.py new file mode 100644 index 0000000..34c0dd0 --- /dev/null +++ b/advanced/databases/kboc16/3.py @@ -0,0 +1,430 @@ +############################################################################### +# # +# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ # +# Contact: beat.support@idiap.ch # +# # +# This file is part of the beat.examples module of the BEAT platform. # +# # +# Commercial License Usage # +# Licensees holding valid commercial BEAT licenses may use this file in # +# accordance with the terms contained in a written agreement between you # +# and Idiap. For further information contact tto@idiap.ch # +# # +# Alternatively, this file may be used under the terms of the GNU Affero # +# Public License version 3 as published by the Free Software and appearing # +# in the file LICENSE.AGPL included in the packaging of this file. # +# The BEAT platform is distributed in the hope that it will be useful, but # +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY # +# or FITNESS FOR A PARTICULAR PURPOSE. # +# # +# You should have received a copy of the GNU Affero Public License along # +# with the BEAT platform. If not, see http://www.gnu.org/licenses/. # +# # +############################################################################### + +import string +import numpy as np +import bob.db.kboc16 + + +#---------------------------------------------------------- + + +def keystroke_reader(filename): + times = [] + keys = [] + + for line in open(filename, 'r').readlines(): + parts = string.split(line) + times.append(numpy.int32(parts[1])) + keys.append(parts[0]) + + return dict( + holdtime = times, + key_events = keys, + ) + + +#---------------------------------------------------------- + + +def get_client_end_index(objs, client_id, client_start_index, + start_index, end_index): + client_end_index = client_start_index + + while client_end_index + 1 <= end_index: + obj = objs[client_end_index + 1 - start_index] + + if isinstance(obj, tuple): + obj = obj[1] + + if obj.client_id != client_id: + return client_end_index + + client_end_index += 1 + + return end_index + + +#---------------------------------------------------------- + + +def get_value_end_index(objs, value, index_in_tuple, value_start_index, + start_index, end_index): + value_end_index = value_start_index + + while value_end_index + 1 <= end_index: + id = objs[value_end_index + 1 - start_index][index_in_tuple] + + if id != value: + return value_end_index + + value_end_index += 1 + + return end_index + + +#---------------------------------------------------------- + + +class Templates: + """Outputs: + - keystroke: "{{ user.username }}/kboc16_keystroke/1 + - file_id: "{{ system_user.username }}/uint64/1" + - template_id: "{{ system_user.username }}/text/1" + - client_id: "{{ system_user.username }}/text/1" + + One "file_id" is associated with a given "keystroke". + Several "keystroke" are associated with a given "template_id". + Several "template_id" are associated with a given "client_id". + + --------------- --------------- --------------- --------------- --------------- --------------- + | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | + --------------- --------------- --------------- --------------- --------------- --------------- + ----------------------------------------------- ----------------------------------------------- + | template_id | | template_id | + ----------------------------------------------- ----------------------------------------------- + ----------------------------------------------------------------------------------------------- + | client_id | + ----------------------------------------------------------------------------------------------- + + Note: for this particular database, there is only one "template_id" + per "client_id". + """ + + def setup(self, root_folder, outputs, parameters, force_start_index=None, + force_end_index=None): + + # Initialisations + self.root_folder = root_folder + self.outputs = outputs + self.parameters = parameters + + # Open the database and load the objects to provide via the outputs + self.db = bob.db.kboc16.Database() + + template_ids = self.db.model_ids(groups='eval', + protocol=parameters['protocol']) + + self.objs = [] + + for template_id in template_ids: + objs = self.db.objects(groups='eval', + protocol=self.parameters['protocol'], + purposes='enrol', + model_ids=[template_id]) + + self.objs.extend([ (template_id, obj) for obj in objs ]) + + self.objs = sorted(self.objs, key=lambda x: (x[1].client_id, x[0], x[1].id)) + + # Determine the range of indices that must be provided + self.start_index = force_start_index if force_start_index is not None else 0 + self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1 + + self.objs = self.objs[self.start_index : self.end_index + 1] + + self.next_index = self.start_index + + return True + + + def done(self, last_data_index): + return last_data_index >= self.end_index + + + def next(self): + (template_id, obj) = self.objs[self.next_index - self.start_index] + + + # Output: template_id (only provide data when the template_id change) + if self.outputs['template_id'].isConnected() and \ + self.outputs['template_id'].last_written_data_index < self.next_index: + + template_end_index = get_value_end_index(self.objs, template_id, 0, + self.next_index, + self.start_index, + self.end_index) + + self.outputs['template_id'].write( + { + 'text': str(template_id) + }, + template_end_index + ) + + + # Output: client_id (only provide data when the client_id change) + if self.outputs['client_id'].isConnected() and \ + self.outputs['client_id'].last_written_data_index < self.next_index: + + client_end_index = get_client_end_index(self.objs, obj.client_id, + self.next_index, + self.start_index, + self.end_index) + + self.outputs['client_id'].write( + { + 'text': str(obj.client_id) + }, + client_end_index + ) + + + # Output: file_id (provide data at each iteration) + if self.outputs['file_id'].isConnected(): + self.outputs['file_id'].write( + { + 'value': np.uint64(obj.id) + }, + self.next_index + ) + + + # Output: image (provide data at each iteration) + if self.outputs['keystroke'].isConnected(): + self.outputs['keystroke'].write( + keystroke_reader(obj.make_path(self.root_folder, '.txt')), + self.next_index + ) + + + # Determine the next data index that must be provided + self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs + if x.isConnected() ] + ) + + return True + + +#---------------------------------------------------------- + + +class Probes: + """Outputs: + - keystroke: "{{ user.username }}/kboc16_keystroke/1 + - file_id: "{{ system_user.username }}/uint64/1" + - client_id: "{{ system_user.username }}/text/1" + - probe_id: "{{ system_user.username }}/uint64/1", + - template_ids: "{{ system_user.username }}/array_1d_text/1", + + One "file_id" is associated with a given "keystroke". + One "probe_id" is associated with a given "keystroke". + Several "keystroke" are associated with a given "client_id". + Several "client_id" are associated with a given "template_ids". + + --------------- --------------- --------------- --------------- --------------- --------------- + | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | file_id | | file_id | | file_id | | file_id | | file_id | | file_id | + --------------- --------------- --------------- --------------- --------------- --------------- + --------------- --------------- --------------- --------------- --------------- --------------- + | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id | + --------------- --------------- --------------- --------------- --------------- --------------- + ----------------------------------------------- ----------------------------------------------- + | client_id | | client_id | + ----------------------------------------------- ----------------------------------------------- + ----------------------------------------------------------------------------------------------- + | template_ids | + ----------------------------------------------------------------------------------------------- + """ + + def setup(self, root_folder, outputs, parameters, force_start_index=None, + force_end_index=None): + + # Initialisations + self.root_folder = root_folder + self.outputs = outputs + self.parameters = parameters + + # Open the database and load the objects to provide via the outputs + self.db = bob.db.kboc16.Database() + + template_ids = sorted(self.db.model_ids(protocol=parameters['protocol'], + groups='eval'), + key=lambda x: int(x)) + + template_probes = {} + for template_id in template_ids: + objs = sorted(self.db.objects(protocol=parameters['protocol'], + groups='eval', + purposes='probe', + model_ids=[template_id]), + key=lambda x: (x.client_id, x.id)) + + template_probes[template_id] = [ p.id for p in objs ] + + objs = sorted(self.db.objects(protocol=parameters['protocol'], + groups='eval', + purposes='probe'), + key=lambda x: (x.client_id, x.id)) + + self.objs = [] + for obj in objs: + templates = [ template_id for template_id in template_ids + if obj.id in template_probes[template_id] ] + self.objs.append( (templates, obj) ) + + self.objs = sorted(self.objs, key=lambda x: (len(x[0]), x[0], x[1].client_id, x[1].id)) + + # Determine the range of indices that must be provided + self.start_index = force_start_index if force_start_index is not None else 0 + self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1 + + self.objs = self.objs[self.start_index : self.end_index + 1] + + self.next_index = self.start_index + + return True + + + def done(self, last_data_index): + return last_data_index >= self.end_index + + + def next(self): + (template_ids, obj) = self.objs[self.next_index - self.start_index] + + + # Output: template_ids (only provide data when the template_ids change) + if self.outputs['template_ids'].isConnected() and \ + self.outputs['template_ids'].last_written_data_index < self.next_index: + + template_ids_end_index = get_value_end_index(self.objs, template_ids, 0, + self.next_index, + self.start_index, + self.end_index) + + self.outputs['template_ids'].write( + { + 'text': [ str(x) for x in template_ids ] + }, + template_ids_end_index + ) + + + # Output: client_id (only provide data when the client_id change) + if self.outputs['client_id'].isConnected() and \ + self.outputs['client_id'].last_written_data_index < self.next_index: + + client_end_index = get_client_end_index(self.objs, obj.client_id, + self.next_index, + self.start_index, + self.end_index) + + self.outputs['client_id'].write( + { + 'text': str(obj.client_id) + }, + client_end_index + ) + + + # Output: probe_id (provide data at each iteration) + if self.outputs['probe_id'].isConnected(): + self.outputs['probe_id'].write( + { + 'value': np.uint64(obj.id) + }, + self.next_index + ) + + # Output: file_id (provide data at each iteration) + if self.outputs['file_id'].isConnected(): + self.outputs['file_id'].write( + { + 'value': np.uint64(obj.id) + }, + self.next_index + ) + + + # Output: image (provide data at each iteration) + if self.outputs['keystroke'].isConnected(): + self.outputs['keystroke'].write( + keystroke_reader(obj.make_path(self.root_folder, '.txt')), + self.next_index + ) + + + # Determine the next data index that must be provided + self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs + if x.isConnected() ] + ) + + return True + + +#---------------------------------------------------------- + + +def setup_tests(): + # Install a mock load function for the keystrokes + def mock_keystroke_reader(filename): + return {} + + global keystroke_reader + keystroke_reader = mock_keystroke_reader + + +#---------------------------------------------------------- + + +# Test the behavior of the views (on fake data) +if __name__ == '__main__': + + setup_tests() + + from beat.backend.python.database import DatabaseTester + + DatabaseTester('Templates', Templates, + [ + 'client_id', + 'template_id', + 'file_id', + 'keystroke', + ], + dict( + protocol = 'A', + ) + ) + + DatabaseTester('Probes', Probes, + [ + 'template_ids', + 'client_id', + 'probe_id', + 'file_id', + 'keystroke', + ], + dict( + protocol = 'A', + ), + irregular_outputs = [ + 'client_id', + ] + ) diff --git a/advanced/databases/kboc16/3.rst b/advanced/databases/kboc16/3.rst new file mode 100644 index 0000000..f7260d3 --- /dev/null +++ b/advanced/databases/kboc16/3.rst @@ -0,0 +1,80 @@ +.. Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ .. +.. Contact: beat.support@idiap.ch .. +.. .. +.. This file is part of the beat.examples module of the BEAT platform. .. +.. .. +.. Commercial License Usage .. +.. Licensees holding valid commercial BEAT licenses may use this file in .. +.. accordance with the terms contained in a written agreement between you .. +.. and Idiap. For further information contact tto@idiap.ch .. +.. .. +.. Alternatively, this file may be used under the terms of the GNU Affero .. +.. Public License version 3 as published by the Free Software and appearing .. +.. in the file LICENSE.AGPL included in the packaging of this file. .. +.. The BEAT platform is distributed in the hope that it will be useful, but .. +.. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY .. +.. or FITNESS FOR A PARTICULAR PURPOSE. .. +.. .. +.. You should have received a copy of the GNU Affero Public License along .. +.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. .. + + +The KBOC16 Database +------------------- + +Changelog +========= + +* **Version 3**, 30/Oct/2017: + + - Port to beat.backend.python v1.4.2 + +* **Version 2**, 4/Apr/2016: + + - Added protocol ``A`` + +* **Version 1**: + + - Initial release + + +Description +=========== + +The KBOC16 database is part of the `KBOC competition +<https://sites.google.com/site/btas16kboc/database>`_, an official competition +of The IEEE Eighth International Conference on Biometrics: Theory, +Applications, and Systems (BTAS 2016) organized by `ATVS Biometric Research +Group <http://atvs.ii.uam.es>`_. + + +The database consists of keystroke sequences from 300 subjects acquired in four +different sessions (5 samples per session including genuine and impostors) +distributed in a four month time span. Thus, three different levels of +temporal variability are taken into account: (1) within the same session (the +samples are not acquired consecutively), (2) within weeks (between two +consecutive sessions), and (3) within months (between non-consecutive +sessions). + +Each session comprises 5 case-insensitive repetitions of the subject’s name and +surname typed in a natural and continuous manner (case insensitive). Mistakes +(and backspace) are not permitted. The time (in milliseconds) elapsed between +consecutive key events (press and release) is provided as keystroke dynamics +sequence. + +There are two types of sequences: 1) Genuine sequences: typed by the owner of +the password and 2) Impostor sequences: typed by other users (different to the +owner) who try to spoof the system. The number of genuine and impostor samples +per user varies (this information remains sequestered). + +For further information on the reproducible keystroke experiments and +competition results we refer the reader to (the following articles are publicly +available in the publications section of the `ATVS group webpage +<http://atvs.ii.uam.es/listpublications.do>`_) + +.. [BTAS15] A. Morales, M. Falanga, J. Fierrez, C. Sansone and J. Ortega-Garcia, ''Keystroke Dynamics Recognition based on Personal Data: A Comparative Experimental Evaluation Implementing Reproducible Research'', in Proc. of the IEEE Seventh International Conference on Biometrics: Theory, Applications and Systems, Arlington, Virginia, USA, September 2015. + +.. [BTAS16] To appear. + +Please remember to reference [BTAS15]_ and [BTAS16]_ on any work made public, +whatever the form, based directly or indirectly on any part of the KBOC16 DB. -- GitLab