Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.pad.base
Commits
04a1faf2
Commit
04a1faf2
authored
Jun 28, 2018
by
Pavel KORSHUNOV
Browse files
Merge branch 'padfilelist-vuln' of gitlab.idiap.ch:bob/bob.pad.base into padfilelist-vuln
parents
c24fafdf
7b8030c4
Pipeline
#21437
passed with stage
in 19 minutes and 34 seconds
Changes
2
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/pad/base/database/PadBioFileDB.py
View file @
04a1faf2
...
...
@@ -6,7 +6,7 @@ used by both verification and PAD experiments.
from
bob.pad.base.database
import
PadFile
from
bob.pad.base.database
import
FileListPadDatabase
from
bob.bio.base.database
import
BioDatabase
from
bob.bio.base.database
import
FileList
BioDatabase
from
bob.bio.base.database.file
import
BioFile
import
bob.io.base
...
...
@@ -48,66 +48,182 @@ class HighPadFile(PadFile):
class
HighPadDatabase
(
FileListPadDatabase
):
def
__init__
(
self
,
filelists_directory
=
None
,
original_directory
=
"[DB_DATA_DIRECTORY]"
,
original_extension
=
".wav"
,
file_class
=
None
,
db_name
=
''
,
**
kwargs
):
# call base class constructor
from
pkg_resources
import
resource_filename
folder
=
resource_filename
(
__name__
,
'../lists/'
+
db_name
)
super
(
HighPadDatabase
,
self
).
__init__
(
folder
,
db_name
,
pad_file_class
=
HighPadFile
,
if
not
filelists_directory
:
# if not provided, we assume the lists are located in '../lists'
from
pkg_resources
import
resource_filename
filelists_directory
=
resource_filename
(
__name__
,
'../lists/'
+
db_name
)
if
not
file_class
:
file_class
=
HighPadFile
super
(
HighPadDatabase
,
self
).
__init__
(
filelists_directory
,
db_name
,
pad_file_class
=
file_class
,
original_directory
=
original_directory
,
original_extension
=
original_extension
,
**
kwargs
)
class
HighBioFile
(
BioFile
):
def
__init__
(
self
,
f
):
"""
Initializes this File object with an File equivalent from the underlying SQl-based interface for
database. Replace this class for the specific database.
"""
super
(
HighBioFile
,
self
).
__init__
(
client_id
=
f
.
client_id
,
path
=
f
.
path
,
file_id
=
f
.
id
)
self
.
__f
=
f
def
load
(
self
,
directory
=
None
,
extension
=
'.wav'
):
path
=
self
.
make_path
(
directory
,
extension
)
if
extension
==
'.wav'
:
rate
,
audio
=
scipy
.
io
.
wavfile
.
read
(
path
)
# We consider there is only 1 channel in the audio file => data[0]
return
rate
,
numpy
.
cast
[
'float'
](
audio
)
elif
extension
==
'.avi'
:
return
bob
.
io
.
base
.
load
(
path
)
class
HighBioDatabase
(
BioDatabase
):
class
HighBioDatabase
(
FileListBioDatabase
):
"""
Implements verification API for querying High database.
"""
def
__init__
(
self
,
filelists_directory
=
None
,
original_directory
=
"[DB_DATA_DIRECTORY]"
,
original_extension
=
".wav"
,
db_name
=
''
,
file_class
=
None
,
**
kwargs
):
if
not
filelists_directory
:
# if not provided, we assume the lists are located in '../lists'
from
pkg_resources
import
resource_filename
filelists_directory
=
resource_filename
(
__name__
,
'../lists/'
+
db_name
)
if
not
file_class
:
file_class
=
HighPadFile
# call base class constructors to open a session to the database
super
(
HighBioDatabase
,
self
).
__init__
(
name
=
db_name
,
super
(
HighBioDatabase
,
self
).
__init__
(
filelists_directory
,
db_name
,
bio_file_class
=
file_class
,
original_directory
=
original_directory
,
original_extension
=
original_extension
,
**
kwargs
)
self
.
__db
=
HighPadDatabase
(
db_name
=
db_name
,
original_directory
=
original_directory
,
original_extension
=
original_extension
,
**
kwargs
)
self
.
_pad_db
=
HighPadDatabase
(
filelists_directory
=
filelists_directory
,
db_name
=
db_name
,
file_class
=
file_class
,
original_directory
=
original_directory
,
original_extension
=
original_extension
,
**
kwargs
)
self
.
low_level_group_names
=
(
'train'
,
'dev'
,
'eval'
)
self
.
high_level_group_names
=
(
'world'
,
'dev'
,
'eval'
)
def
_convert_protocol
(
self
,
protocol
=
None
):
"""
This conversion of the protocol with appended '-licit' or '-spoof' is a hack for verification experiments.
To adapt spoofing databases to the verification experiments, we need to be able to split a given protocol
into two parts: when data for licit (only real/genuine data is used) and data for spoof
(attacks are used instead of real data) is used in the experiment.
Hence, we use this trick with appending '-licit' or '-spoof' to the
protocol name, so we can distinguish these two scenarios.
By default, if nothing is appended, we assume licit protocol.
The distinction between licit and spoof is expressed via purposes parameters, but
the difference is in the terminology only.
"""
if
protocol
==
'.'
:
protocol
=
None
# if protocol was empty, we return None
if
not
protocol
:
return
None
,
None
# lets check if we have an appendix to the protocol name
modifier
=
None
if
protocol
:
modifier
=
protocol
.
split
(
'-'
)[
-
1
]
# if protocol was empty or there was no correct appendix, we just assume the 'licit' option
if
not
(
modifier
==
'licit'
or
modifier
==
'spoof'
):
modifier
=
'licit'
else
:
# put back everything except the appendix into the protocol
protocol
=
'-'
.
join
(
protocol
.
split
(
'-'
)[:
-
1
])
return
protocol
,
modifier
def
_convert_purposes
(
self
,
purposes
,
modifier
):
"""
We assume there is no enrollment data, since
PAD File database has real and attack lists only,
so we cannot assume any availability of enrollment data
If your PAD File lists also have for_model.lst
and/or for_probe.lst files, you need to change this method
Args:
purposes: The original purposes supplied by Bio verification framework
modifier: Indicates whether it is licit or spoof scenario
Returns: corrected purposes according to either licit or spoof scenarios
"""
if
isinstance
(
purposes
,
str
):
purposes
=
[
purposes
]
elif
purposes
is
not
None
:
purposes
=
list
(
purposes
)
# licit scenario considers genuine data only
# we return all real data
purposes
=
[
'real'
]
# spoof scenario uses spoofed data for probe
# but, during scoring, this scenario also needs a real-probe data
# for cases when model_id is equal to client_id
# Hence, we request both real and attack data
if
modifier
==
'spoof'
:
# we return real and attack data
purposes
.
append
(
'attack'
)
return
purposes
def
_filter_by_model_ids
(
self
,
objects
,
model_ids
):
"""
From all File objects, keep only those, whose client_id is in model_ids
Args:
objects: File objects derived from BioFile
model_ids: The list of the requested model Ids
Returns: The list of File objects
"""
if
not
model_ids
:
return
[]
filtered_objects
=
[]
for
f
in
objects
:
if
f
.
client_id
in
model_ids
:
if
hasattr
(
f
,
'attack_type'
)
and
f
.
attack_type
is
not
None
:
f
.
client_id
=
'attack/{}'
.
format
(
f
.
client_id
)
filtered_objects
.
append
(
f
)
return
filtered_objects
def
client_id_from_model_id
(
self
,
model_id
,
group
=
'dev'
):
"""
This wrapper around PAD database does not have a knowledge of
model ids used in verification experiments, so we just assume that
the client_id is the same as model_id, which is actually true
for most of the verification databases as well.
"""
return
model_id
def
model_ids_with_protocol
(
self
,
groups
=
None
,
protocol
=
None
,
**
kwargs
):
groups
=
self
.
convert_names_to_lowlevel
(
groups
,
self
.
low_level_group_names
,
self
.
high_level_group_names
)
"""
This wrapper around PAD database does not have a knowledge of
model ids used in verification experiments, so we just assume that
the model_ids are the same as client ids, which is actually true
for most of the verification databases as well.
return
[
client
.
id
for
client
in
self
.
__db
.
clients
(
groups
=
groups
,
**
kwargs
)]
"""
# we need to correctly convert groups first
groups
=
self
.
convert_names_to_lowlevel
(
groups
,
self
.
low_level_group_names
,
self
.
high_level_group_names
)
# we also need to convert protocol name (it can have either '-licit' or '-spoof' appendix)
# to the expected protocol name without appendix
return
self
.
_pad_db
.
client_ids
(
protocol
=
self
.
_convert_protocol
(
protocol
)[
0
],
groups
=
groups
,
**
kwargs
)
def
arrange_by_client
(
self
,
files
):
client_files
=
{}
for
f
in
files
:
if
str
(
f
.
client_id
)
not
in
client_files
:
client_files
[
str
(
f
.
client_id
)]
=
[]
client_files
[
str
(
f
.
client_id
)].
append
(
f
)
files_by_clients
=
[]
for
client
in
sorted
(
client_files
.
keys
()):
files_by_clients
.
append
(
client_files
[
client
])
return
files_by_clients
def
objects
(
self
,
protocol
=
None
,
purposes
=
None
,
model_ids
=
None
,
groups
=
None
,
**
kwargs
):
"""
...
...
@@ -144,63 +260,24 @@ class HighBioDatabase(BioDatabase):
# convert group names from the conventional names in verification experiments to the internal database names
if
groups
is
None
:
# all groups are assumed
groups
=
self
.
high_level_group_names
matched_groups
=
self
.
convert_names_to_lowlevel
(
groups
,
self
.
low_level_group_names
,
self
.
high_level_group_names
)
# this conversion of the protocol with appended '-licit' or '-spoof' is a hack for verification experiments.
# To adapt spoofing databases to the verification experiments, we need to be able to split a given protocol
# into two parts: when data for licit (only real/genuine data is used) and data for spoof
# (attacks are used instead of real data) is used in the experiment.
# Hence, we use this trick with appending '-licit' or '-spoof' to the
# protocol name, so we can distinguish these two scenarios.
# By default, if nothing is appended, we assume licit protocol.
# The distinction between licit and spoof is expressed via purposes parameters, but
# the difference is in the terminology only.
# lets check if we have an appendix to the protocol name
appendix
=
None
if
protocol
:
appendix
=
protocol
.
split
(
'-'
)[
-
1
]
groups
=
self
.
convert_names_to_lowlevel
(
groups
,
self
.
low_level_group_names
,
self
.
high_level_group_names
)
# if protocol was empty or there was no correct appendix, we just assume the 'licit' option
if
not
(
appendix
==
'licit'
or
appendix
==
'spoof'
):
appendix
=
'licit'
else
:
# put back everything except the appendix into the protocol
protocol
=
'-'
.
join
(
protocol
.
split
(
'-'
)[:
-
1
])
protocol
,
modifier
=
self
.
_convert_protocol
(
protocol
)
purposes
=
self
.
_convert_purposes
(
purposes
,
modifier
)
# if protocol was empty, we set it to the None
if
not
protocol
:
protocol
=
None
# Query the underline PAD database
objects
=
self
.
_pad_db
.
objects
(
protocol
=
protocol
,
groups
=
groups
,
purposes
=
purposes
,
**
kwargs
)
correct_purposes
=
purposes
# licit protocol is for real access data only
if
appendix
==
'licit'
:
# by default we assume all real data, since this database has no enroll data
if
purposes
is
None
:
correct_purposes
=
(
'real'
,)
# spoof protocol uses real data for enrollment and spoofed data for probe
# so, probe set is the same as attack set
if
appendix
==
'spoof'
:
# we return attack data only, since this database does not have explicit enroll data
if
purposes
is
None
:
correct_purposes
=
(
'attack'
,)
# otherwise replace 'probe' with 'attack'
elif
isinstance
(
purposes
,
(
tuple
,
list
)):
correct_purposes
=
[]
for
purpose
in
purposes
:
if
purpose
==
'probe'
:
correct_purposes
+=
[
'attack'
]
else
:
correct_purposes
+=
[
purpose
]
elif
purposes
==
'probe'
:
correct_purposes
=
(
'attack'
,)
# now, query the underline PAD database
objects
=
self
.
__db
.
objects
(
protocol
=
protocol
,
groups
=
matched_groups
,
purposes
=
correct_purposes
,
**
kwargs
)
# note that PAD database does not know anything about model_ids, so these are ignored
# Hence, for the spoofing protocol, we need to filter out the files and
# keep only those that belong to model_ids
# We also modify the client_id to reflect that it is an attack
if
modifier
==
'spoof'
and
model_ids
is
not
None
:
objects
=
self
.
_filter_by_model_ids
(
objects
,
model_ids
)
# make sure to return BioFile representation of a file, not the database one
return
[
HighBioFile
(
f
)
for
f
in
objects
]
return
[
HighPadFile
(
client_id
=
f
.
client_id
,
path
=
f
.
path
,
file_id
=
f
.
path
,
attack_type
=
f
.
attack_type
)
for
f
in
objects
]
def
annotations
(
self
,
file
):
pass
bob/pad/base/script/spoof.py
View file @
04a1faf2
...
...
@@ -15,7 +15,7 @@ import logging
logger
=
logging
.
getLogger
(
"bob.pad.base"
)
from
..
import
tools
from
bob.pad.base
import
tools
import
bob.bio.base.tools
as
biotools
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment