Commit 6d6dae2c authored by Philip ABBET's avatar Philip ABBET

Add atvskeystroke/3 (api change: beat.backend.python v1.4.1)

parent 81fcf187
{
"description": "The ATVS Keystroke database",
"root_folder": "/idiap/group/biometric/databases/atvs_keystroke",
"protocols": [
{
"name": "A",
"template": "simple_keystroke_recognition",
"sets": [
{
"name": "templates",
"template": "templates",
"view": "Templates",
"parameters": {
"protocol": "A"
},
"outputs": {
"file_id": "{{ system_user.username }}/uint64/1",
"client_id": "{{ system_user.username }}/text/1",
"template_id": "{{ system_user.username }}/text/1",
"keystroke": "{{ user.username }}/atvs_keystroke/1"
}
},
{
"name": "probes",
"template": "probes",
"view": "Probes",
"parameters": {
"protocol": "A"
},
"outputs": {
"file_id": "{{ system_user.username }}/uint64/1",
"probe_id": "{{ system_user.username }}/uint64/1",
"client_id": "{{ system_user.username }}/text/1",
"template_ids": "{{ system_user.username }}/array_1d_text/1",
"keystroke": "{{ user.username }}/atvs_keystroke/1"
}
}
]
}
]
}
\ No newline at end of file
###############################################################################
# #
# Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ #
# Contact: beat.support@idiap.ch #
# #
# This file is part of the beat.examples module of the BEAT platform. #
# #
# Commercial License Usage #
# Licensees holding valid commercial BEAT licenses may use this file in #
# accordance with the terms contained in a written agreement between you #
# and Idiap. For further information contact tto@idiap.ch #
# #
# Alternatively, this file may be used under the terms of the GNU Affero #
# Public License version 3 as published by the Free Software and appearing #
# in the file LICENSE.AGPL included in the packaging of this file. #
# The BEAT platform is distributed in the hope that it will be useful, but #
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY #
# or FITNESS FOR A PARTICULAR PURPOSE. #
# #
# You should have received a copy of the GNU Affero Public License along #
# with the BEAT platform. If not, see http://www.gnu.org/licenses/. #
# #
###############################################################################
import numpy as np
import bob.db.atvskeystroke
keystroke_feature = ['', 'holdtime', 'rplatency', 'pplatency', 'rrlatency', 'prlatency']
keystroke_type = ['', 'given_name', 'family_name', 'email', 'nationality', 'id_number']
#----------------------------------------------------------
def keystroke_reader(filename):
counter = 0
feat = 0
data = {}
for line in open(filename, 'r').readlines():
if not line.strip(): continue
if counter % 6 == 0:
feat += 1
label = line.strip()
data[keystroke_feature[feat]] = {}
else:
values = [np.int32(v) for v in line.strip().split(' ')]
data[keystroke_feature[feat]][keystroke_type[counter % 6]] = np.array(values)
counter += 1
return data
#----------------------------------------------------------
def get_client_end_index(objs, client_id, client_start_index,
start_index, end_index):
client_end_index = client_start_index
while client_end_index + 1 <= end_index:
obj = objs[client_end_index + 1 - start_index]
if isinstance(obj, tuple):
obj = obj[1]
if obj.client_id != client_id:
return client_end_index
client_end_index += 1
return end_index
#----------------------------------------------------------
def get_template_end_index(objs, template_id, template_start_index,
start_index, end_index):
template_end_index = template_start_index
while template_end_index + 1 <= end_index:
id = objs[template_end_index + 1 - start_index][0]
if id != template_id:
return template_end_index
template_end_index += 1
return end_index
#----------------------------------------------------------
class Templates:
"""Outputs:
- keystroke: "{{ user.username }}/atvs_keystroke/1
- file_id: "{{ system_user.username }}/uint64/1"
- client_id: "{{ system_user.username }}/text/1"
- template_id: "{{ system_user.username }}/text/1"
One "file_id" is associated with a given "keystroke".
Several "keystroke" are associated with a given "client_id".
Several "client_id" are associated with a given "template_id".
--------------- --------------- --------------- --------------- --------------- ---------------
| keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| template_id |
-----------------------------------------------------------------------------------------------
Note: for this particular database, there is only one "client_id"
per "template_id".
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.atvskeystroke.Database()
template_ids = sorted(self.db.model_ids(groups='eval',
protocol=parameters['protocol']),
key=lambda x: int(x))
self.objs = []
for template_id in template_ids:
objs = sorted(self.db.objects(groups='eval',
protocol=self.parameters['protocol'],
purposes='enrol',
model_ids=[template_id]),
key=lambda x: (x.client_id, x.id))
self.objs.extend([ (template_id, obj) for obj in objs ])
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_id, obj) = self.objs[self.next_index - self.start_index]
# Output: template_id (only provide data when the template_id change)
if self.outputs['template_id'].isConnected() and \
self.outputs['template_id'].last_written_data_index < self.next_index:
template_end_index = get_template_end_index(self.objs, template_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_id'].write(
{
'text': str(template_id)
},
template_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': str(obj.client_id)
},
client_end_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'value': np.uint64(obj.id)
},
self.next_index
)
# Output: image (provide data at each iteration)
if self.outputs['keystroke'].isConnected():
self.outputs['keystroke'].write(
keystroke_reader(obj.make_path(self.root_folder, '.txt')),
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
class Probes:
"""Outputs:
- keystroke: "{{ user.username }}/atvs_keystroke/1
- file_id: "{{ system_user.username }}/uint64/1"
- client_id: "{{ system_user.username }}/text/1"
- probe_id: "{{ system_user.username }}/uint64/1",
- template_ids: "{{ system_user.username }}/array_1d_text/1",
One "file_id" is associated with a given "keystroke".
One "probe_id" is associated with a given "keystroke".
Several "keystroke" are associated with a given "client_id".
Several "client_id" are associated with a given "template_ids".
--------------- --------------- --------------- --------------- --------------- ---------------
| keystroke | | keystroke | | keystroke | | keystroke | | keystroke | | keystroke |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| file_id | | file_id | | file_id | | file_id | | file_id | | file_id |
--------------- --------------- --------------- --------------- --------------- ---------------
--------------- --------------- --------------- --------------- --------------- ---------------
| probe_id | | probe_id | | probe_id | | probe_id | | probe_id | | probe_id |
--------------- --------------- --------------- --------------- --------------- ---------------
----------------------------------------------- -----------------------------------------------
| client_id | | client_id |
----------------------------------------------- -----------------------------------------------
-----------------------------------------------------------------------------------------------
| template_ids |
-----------------------------------------------------------------------------------------------
"""
def setup(self, root_folder, outputs, parameters, force_start_index=None,
force_end_index=None):
# Initialisations
self.root_folder = root_folder
self.outputs = outputs
self.parameters = parameters
# Open the database and load the objects to provide via the outputs
self.db = bob.db.atvskeystroke.Database()
template_ids = sorted(self.db.model_ids(groups='eval',
protocol=parameters['protocol']),
key=lambda x: int(x))
self.objs = []
for template_id in template_ids:
objs = sorted(self.db.objects(groups='eval',
protocol=self.parameters['protocol'],
purposes='probe',
model_ids=[template_id]),
key=lambda x: (x.client_id, x.id))
self.objs.extend([ (template_id, obj) for obj in objs ])
# Determine the range of indices that must be provided
self.start_index = force_start_index if force_start_index is not None else 0
self.end_index = force_end_index if force_end_index is not None else len(self.objs) - 1
self.objs = self.objs[self.start_index : self.end_index + 1]
self.next_index = self.start_index
return True
def done(self, last_data_index):
return last_data_index >= self.end_index
def next(self):
(template_id, obj) = self.objs[self.next_index - self.start_index]
# Output: template_ids (only provide data when the template_ids change)
if self.outputs['template_ids'].isConnected() and \
self.outputs['template_ids'].last_written_data_index < self.next_index:
template_end_index = get_template_end_index(self.objs, template_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['template_ids'].write(
{
'text': [ str(template_id) ]
},
template_end_index
)
# Output: client_id (only provide data when the client_id change)
if self.outputs['client_id'].isConnected() and \
self.outputs['client_id'].last_written_data_index < self.next_index:
client_end_index = get_client_end_index(self.objs, obj.client_id,
self.next_index,
self.start_index,
self.end_index)
self.outputs['client_id'].write(
{
'text': str(obj.client_id)
},
client_end_index
)
# Output: probe_id (provide data at each iteration)
if self.outputs['probe_id'].isConnected():
self.outputs['probe_id'].write(
{
'value': np.uint64(obj.id)
},
self.next_index
)
# Output: file_id (provide data at each iteration)
if self.outputs['file_id'].isConnected():
self.outputs['file_id'].write(
{
'value': np.uint64(obj.id)
},
self.next_index
)
# Output: image (provide data at each iteration)
if self.outputs['keystroke'].isConnected():
self.outputs['keystroke'].write(
keystroke_reader(obj.make_path(self.root_folder, '.txt')),
self.next_index
)
# Determine the next data index that must be provided
self.next_index = 1 + min([ x.last_written_data_index for x in self.outputs
if x.isConnected() ]
)
return True
#----------------------------------------------------------
# Test the behavior of the views (on fake data)
if __name__ == '__main__':
# Install a mock load method for the keystrokes
def mock_keystroke_reader(filename):
return {}
keystroke_reader = mock_keystroke_reader
# Mock output class
class MockOutput:
def __init__(self, name, connected):
self.name = name
self.connected = connected
self.last_written_data_index = -1
self.written_data = []
def write(self, data, end_data_index):
self.written_data.append(( self.last_written_data_index + 1, end_data_index, data ))
self.last_written_data_index = end_data_index
def isConnected(self):
return self.connected
# Tester utility class
from beat.backend.python.outputs import OutputList
import itertools
class Tester:
def __init__(self, name, view_class, outputs_declaration, parameters,
irregular_outputs=[]):
self.name = name
self.view_class = view_class
self.outputs_declaration = {}
self.parameters = parameters
self.irregular_outputs = irregular_outputs
self.determine_increments(outputs_declaration)
for L in range(0, len(self.outputs_declaration) + 1):
for subset in itertools.combinations(self.outputs_declaration.keys(), L):
self.run(subset)
def determine_increments(self, outputs_declaration):
outputs = OutputList()
for name in outputs_declaration:
outputs.add(MockOutput(name, True))
view = self.view_class()
view.setup('', outputs, self.parameters)
view.next()
print "View '%s', increments found:" % self.name
for output in outputs:
self.outputs_declaration[output.name] = output.last_written_data_index + 1
print ' - %s: %d' % (output.name, output.last_written_data_index + 1)
def run(self, connected_outputs):
if len(connected_outputs) == 0:
return
print "Testing '%s', with %d output(s): %s" % (self.name, len(connected_outputs),
', '.join(connected_outputs))
connected_outputs = dict([ x for x in self.outputs_declaration.items()
if x[0] in connected_outputs ])
not_connected_outputs = dict([ x for x in self.outputs_declaration.items()
if x[0] not in connected_outputs ])
outputs = OutputList()
for name in self.outputs_declaration.keys():
outputs.add(MockOutput(name, name in connected_outputs))
parameters = dict()
view = self.view_class()
view.setup('', outputs, self.parameters)
next_expected_indices = {}
for name, increment in connected_outputs.items():
next_expected_indices[name] = 0
next_index = 0
def _done():
for output in outputs:
if output.isConnected() and not view.done(output.last_written_data_index):
return False
return True
while not(_done()):
view.next()
for name in connected_outputs.keys():
if name not in self.irregular_outputs:
assert(outputs[name].written_data[-1][0] == next_expected_indices[name])
assert(outputs[name].written_data[-1][1] == next_expected_indices[name] + connected_outputs[name] - 1)
else:
assert(outputs[name].written_data[-1][0] == next_expected_indices[name])
assert(outputs[name].written_data[-1][1] >= next_expected_indices[name])
for name in not_connected_outputs.keys():
assert(len(outputs[name].written_data) == 0)
next_index = 1 + min([ x.written_data[-1][1] for x in outputs if x.isConnected() ])
for name in connected_outputs.keys():
if name not in self.irregular_outputs:
if next_index == next_expected_indices[name] + connected_outputs[name]:
next_expected_indices[name] += connected_outputs[name]
else:
if next_index > outputs[name].written_data[-1][1]:
next_expected_indices[name] = outputs[name].written_data[-1][1] + 1
for name in connected_outputs.keys():
if name not in self.irregular_outputs:
assert(len(outputs[name].written_data) == next_index / connected_outputs[name])
else:
print " Irregular output '%s': %s" % (name, str([ (x[0], x[1]) for x in outputs[name].written_data ]))
# The actual tests
Tester('Templates', Templates,
[
'template_id',
'client_id',
'file_id',
'keystroke',
],
dict(
protocol = 'A',
)
)
Tester('Probes', Probes,
[
'template_ids',
'client_id',
'probe_id',
'file_id',
'keystroke',
],
dict(
protocol = 'A',
),
irregular_outputs = [
'client_id',
]
)
.. Copyright (c) 2017 Idiap Research Institute, http://www.idiap.ch/ ..
.. Contact: beat.support@idiap.ch ..
.. ..
.. This file is part of the beat.examples module of the BEAT platform. ..
.. ..
.. Commercial License Usage ..
.. Licensees holding valid commercial BEAT licenses may use this file in ..
.. accordance with the terms contained in a written agreement between you ..
.. and Idiap. For further information contact tto@idiap.ch ..
.. ..
.. Alternatively, this file may be used under the terms of the GNU Affero ..
.. Public License version 3 as published by the Free Software and appearing ..
.. in the file LICENSE.AGPL included in the packaging of this file. ..
.. The BEAT platform is distributed in the hope that it will be useful, but ..
.. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ..
.. or FITNESS FOR A PARTICULAR PURPOSE. ..
.. ..
.. You should have received a copy of the GNU Affero Public License along ..
.. with the BEAT platform. If not, see http://www.gnu.org/licenses/. ..
The ATVS-Keystroke Database
---------------------------
Changelog
=========
* **Version 3**, 27/Oct/2017:
- Port to beat.backend.python v1.4.1
* **Version 2**, 26/Jan/2016:
- Port to Bob v2
* **Version 1**, 09/Jul/2015:
- Initial release
Description
===========
The `ATVS-Keystroke database <http://atvs.ii.uam.es/keystroke_db.html>`_
is a dataset captured for performance evaluation of Keystroke Dynamics
recognition systems (see [BTAS15]_ for all the details). The database
comprises 63 users with 12 genuine access and 12 impostor access for
each user for a total number of samples equal to 7680 (63 users x 24 access
x 5 data). There are people from two different nationalities with 60% of males
and 40% females. The acquisition was made in two sessions according a
semi-supervised protocol:
1. **First session**: the users were asked to introduce their personal data
in the platform. This process was repeated six times.
2. **Second session**: after at least 24 hours, the users were asked to
introduce once again their personal data in the platform. The process was
repeated six times. In addition, in this second session, each user acted as an
impostor trying to spoof the system with the personal data of another user.
The personal data of three other users was showed to each of the impostor and
they introduced them four times for a total number of impostor access of
twelve per user.
The information provided by the users includes sensitive data and therefore,
it has been post-processed to remove all the personal information (the
characters pressed) and to maintain the privacy of the users enrolled in the
database. The keystroke dynamic patterns were recorded using a key-logger
(programmed in Java). The key-logger detects two different types of events:
press and release. The timestamps for each of the detected events were
recorded in milliseconds.
For further information on the database we refer the reader to (the following
article is publicly available in the publications section of the
`ATVS group webpage <http://atvs.ii.uam.es/listpublications.do>`_ .)
.. [BTAS15] A. Morales, M. Falanga, J. Fierrez, C. Sansone and J. Ortega-Garcia, ''Keystroke Dynamics Recognition based on Personal Data: A Comparative Experimental Evaluation Implementing Reproducible Research'', in Proc. of the IEEE Seventh International Conference on Biometrics: Theory, Applications and Systems, Arlington, Virginia, USA, September 2015.
Please remember to reference [BTAS15]_ on any work made public, whatever the
form, based directly or indirectly on any part of the ATVS-Keystroke DB.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment