diff --git a/bob/pad/base/database/__init__.py b/bob/pad/base/database/__init__.py index b310f8abf59daa121625e2f4c5e599a63f894b53..d799ff81bad11a4df213f34badd4a096a1e152a1 100644 --- a/bob/pad/base/database/__init__.py +++ b/bob/pad/base/database/__init__.py @@ -1,8 +1,29 @@ -from .database import PadDatabase from .file import PadFile +from .database import PadDatabase +from .filelist.query import FileListPadDatabase +from .filelist.models import Client +from . import filelist -# to fix sphinx warnings of not able to find classes, when path is shortened -PadDatabase.__module__ = "bob.pad.base.database" -PadFile.__module__ = "bob.pad.base.database" # gets sphinx autodoc done right - don't remove it +def __appropriate__(*args): + """Says object was actually declared here, and not in the import module. + Fixing sphinx warnings of not being able to find classes, when path is shortened. + Parameters: + + *args: An iterable of objects to modify + + Resolves `Sphinx referencing issues + <https://github.com/sphinx-doc/sphinx/issues/3048>` + """ + + for obj in args: + obj.__module__ = __name__ + + +__appropriate__( + PadFile, + PadDatabase, + FileListPadDatabase, + Client, +) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/pad/base/database/database.py b/bob/pad/base/database/database.py index ae319880bb1791b3f74030782538430402a61407..d05d3e40822a296538a99b37001ffc580e436d1b 100644 --- a/bob/pad/base/database/database.py +++ b/bob/pad/base/database/database.py @@ -21,11 +21,8 @@ class PadDatabase(BioDatabase): name : str A unique name for the database. - all_files_options : dict - Dictionary of options passed to the second-level database query when retrieving all data. - - check_original_files_for_existence : bool - Enables to test for the original data files when querying the database. + protocol : str or ``None`` + The name of the protocol that defines the default experimental setup for this database. original_directory : str The directory where the original data of the database are stored. @@ -33,9 +30,6 @@ class PadDatabase(BioDatabase): original_extension : str The file name extension of the original data. - protocol : str or ``None`` - The name of the protocol that defines the default experimental setup for this database. - kwargs : ``key=value`` pairs The arguments of the :py:class:`bob.bio.base.database.BioDatabase` base class constructor. @@ -44,14 +38,18 @@ class PadDatabase(BioDatabase): def __init__( self, name, - all_files_options={}, # additional options for the database query that can be used to extract all files - check_original_files_for_existence=False, + protocol='Default', original_directory=None, original_extension=None, - protocol='Default', **kwargs # The rest of the default parameters of the base class ): - super(PadDatabase, self).__init__(name=name, all_files_options=all_files_options, check_original_files_for_existence=check_original_files_for_existence, original_directory=original_directory, original_extension=original_extension, protocol=protocol, **kwargs) + super(PadDatabase, self).__init__( + name=name, + protocol=protocol, + original_directory=original_directory, + original_extension=original_extension, + **kwargs) + def original_file_names(self, files): """original_file_names(files) -> paths @@ -124,7 +122,7 @@ class PadDatabase(BioDatabase): Usually it is either 'real' or 'attack'. model_ids : [various type] - This parameter is not suported in PAD databases yet + This parameter is not supported in PAD databases yet """ raise NotImplementedError("This function must be implemented in your derived class.") diff --git a/bob/pad/base/database/file.py b/bob/pad/base/database/file.py index e8745a827d9f96830f34ba67520a031f610b904a..2c1a2fd44159cfbe46fe778017654fd1eed7e5fb 100644 --- a/bob/pad/base/database/file.py +++ b/bob/pad/base/database/file.py @@ -29,5 +29,5 @@ class PadFile(BioFile): assert isinstance(attack_type, str) # just copy the information + # The attack type of the sample, None if it is a genuine sample. self.attack_type = attack_type - """The attack type of the sample, None if it is a genuine sample.""" diff --git a/bob/pad/base/database/filelist/__init__.py b/bob/pad/base/database/filelist/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7432ac3a6e538042c4f0e62d1fed9f26571ecb33 --- /dev/null +++ b/bob/pad/base/database/filelist/__init__.py @@ -0,0 +1,29 @@ +from .models import ListReader, Client, FileListFile +from .query import FileListPadDatabase +from .driver import Interface + + +# gets sphinx autodoc done right - don't remove it +def __appropriate__(*args): + """Says object was actually declared here, and not in the import module. + Fixing sphinx warnings of not being able to find classes, when path is shortened. + Parameters: + + *args: An iterable of objects to modify + + Resolves `Sphinx referencing issues + <https://github.com/sphinx-doc/sphinx/issues/3048>` + """ + + for obj in args: + obj.__module__ = __name__ + + +__appropriate__( + ListReader, + Client, + FileListFile, + FileListPadDatabase, + Interface, +) +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/pad/base/database/filelist/driver.py b/bob/pad/base/database/filelist/driver.py new file mode 100644 index 0000000000000000000000000000000000000000..48ea840a0990dbeb6e14af3e70f27957730e0157 --- /dev/null +++ b/bob/pad/base/database/filelist/driver.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <laurent.el-shafey@idiap.ch> +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""Commands the PAD Filelists database can respond to. +""" + +import os +import sys +from bob.db.base.driver import Interface as BaseInterface + + +def dumplist(args): + """Dumps lists of files based on your criteria""" + + from .query import FileListPadDatabase + db = FileListPadDatabase(args.list_directory, 'pad_filelist') + + r = db.objects( + purposes=args.purpose, + groups=args.group, + protocol=args.protocol + ) + + output = sys.stdout + if args.selftest: + from bob.db.base.utils import null + output = null() + + for f in r: + output.write('%s\n' % f.make_path(directory=args.directory, extension=args.extension)) + + return 0 + + +def checkfiles(args): + """Checks existence of files based on your criteria""" + + from .query import FileListPadDatabase + db = FileListPadDatabase(args.list_directory, 'pad_filelist') + + r = db.objects(protocol=args.protocol) + + # go through all files, check if they are available on the filesystem + good = [] + bad = [] + for f in r: + if os.path.exists(f.make_path(args.directory, args.extension)): + good.append(f) + else: + bad.append(f) + + # report + output = sys.stdout + if args.selftest: + from bob.db.base.utils import null + output = null() + + if bad: + for f in bad: + output.write('Cannot find file "%s"\n' % f.make_path(args.directory, args.extension)) + output.write('%d files (out of %d) were not found at "%s"\n' % (len(bad), len(r), args.directory)) + + return 0 + + +class Interface(BaseInterface): + def name(self): + return 'pad_filelist' + + def version(self): + import pkg_resources # part of setuptools + return pkg_resources.require('bob.pad.base')[0].version + + def files(self): + return () + + def type(self): + return 'text' + + def add_commands(self, parser): + from . import __doc__ as docs + + subparsers = self.setup_parser(parser, + "Presentation Attack Detection File Lists database", docs) + + import argparse + + # the "dumplist" action + parser = subparsers.add_parser('dumplist', help=dumplist.__doc__) + parser.add_argument('-l', '--list-directory', required=True, + help="The directory which contains the file lists.") + parser.add_argument('-d', '--directory', default='', + help="if given, this path will be prepended to every entry returned.") + parser.add_argument('-e', '--extension', default='', + help="if given, this extension will be appended to every entry returned.") + parser.add_argument('-u', '--purpose', + help="if given, this value will limit the output files to those designed " + "for the given purposes.", + choices=('real', 'attack', '')) + parser.add_argument('-g', '--group', + help="if given, this value will limit the output files to those belonging to a " + "particular protocolar group.", + choices=('dev', 'eval', 'train', '')) + parser.add_argument('-p', '--protocol', default=None, + help="If set, the protocol is appended to the directory that contains the file lists.") + parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS) + parser.set_defaults(func=dumplist) # action + + # the "checkfiles" action + parser = subparsers.add_parser('checkfiles', help=checkfiles.__doc__) + parser.add_argument('-l', '--list-directory', required=True, + help="The directory which contains the file lists.") + parser.add_argument('-d', '--directory', dest="directory", default='', + help="if given, this path will be prepended to every entry returned.") + parser.add_argument('-e', '--extension', dest="extension", default='', + help="if given, this extension will be appended to every entry returned.") + parser.add_argument('-p', '--protocol', default=None, + help="If set, the protocol is appended to the directory that contains the file lists.") + parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS) + + parser.set_defaults(func=checkfiles) # action diff --git a/bob/pad/base/database/filelist/models.py b/bob/pad/base/database/filelist/models.py new file mode 100644 index 0000000000000000000000000000000000000000..f9eb0fc5d3d7167eb4a6ce8d351ef328d79d0c4a --- /dev/null +++ b/bob/pad/base/database/filelist/models.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Thu Nov 17 16:09:22 CET 2016 +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +This file defines simple Client and File interfaces that are comparable with other bob.db databases. +""" + +import os +import fileinput +import re +from bob.pad.base.database import PadFile + + +class Client(object): + """ + The clients of this database contain ONLY client ids. Nothing special. + """ + + def __init__(self, client_id): + self.id = client_id + """The ID of the client, which is stored as a :py:class:`str` object.""" + + +class FileListFile(PadFile): + """ + Initialize the File object with the minimum required data. + + **Parameters** + + path : str + The path of this file, relative to the basic directory. + Please do not specify any file extensions. + This path will be used as an underlying file_id, as it is assumed to be unique + + client_id : various type + The id of the client, this file belongs to. + The type of it is dependent on your implementation. + If you use an SQL database, this should be an SQL type like Integer or String. + """ + + def __init__(self, file_name, client_id, attack_type=None): + super(FileListFile, self).__init__(client_id=client_id, path=file_name, attack_type=attack_type, file_id=file_name) + + +############################################################################# +### internal access functions for the file lists; do not export! +############################################################################# + +class ListReader(object): + def __init__(self, store_lists): + self.m_read_lists = {} + self.m_store_lists = store_lists + + def _read_multi_column_list(self, list_file): + rows = [] + if not os.path.isfile(list_file): + raise RuntimeError('File %s does not exist.' % (list_file,)) + try: + for line in fileinput.input(list_file): + parsed_line = re.findall('[\w/(-.)]+', line) + if len(parsed_line): + # perform some sanity checks + if len(parsed_line) not in (2, 3): + raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % + (line.rstrip(), list_file)) + if len(rows) and len(rows[0]) != len(parsed_line): + raise IOError("The parsed line '%s' from file '%s' has a different number of elements " + "than the first parsed line '%s'!" % (parsed_line, list_file, rows[0])) + # append the read line + rows.append(parsed_line) + fileinput.close() + except IOError as e: + raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e)) + + # return the read list as a vector of columns + return rows + + def _read_column_list(self, list_file, column_count): + # read the list + rows = self._read_multi_column_list(list_file) + # extract the file from the first two columns + file_list = [] + for row in rows: + if column_count == 2: + assert len(row) == 2 + # we expect: filename client_id + file_list.append(FileListFile(file_name=row[0], client_id=row[1])) + elif column_count == 3: + assert len(row) == 3 + # we expect: filename, model_id, client_id + file_list.append(FileListFile(file_name=row[0], client_id=row[1], attack_type=row[2])) + else: + raise ValueError("The given column count %d cannot be interpreted. This is a BUG, please " + "report to the author." % column_count) + + return file_list + + def read_list(self, list_file, group, type=None): + """Reads the list of Files from the given list file (if not done yet) and returns it.""" + if group not in self.m_read_lists: + self.m_read_lists[group] = {} + if type not in self.m_read_lists[group]: + if type == 'for_real': + files_list = self._read_column_list(list_file, 2) + elif type == 'for_attack': + files_list = self._read_column_list(list_file, 3) + else: + raise ValueError("The given type must be one of %s, but not '%s'" % (('for_real', 'for_attack'), type)) + if self.m_store_lists: + self.m_read_lists[group][type] = files_list + return files_list + return self.m_read_lists[group][type] diff --git a/bob/pad/base/database/filelist/query.py b/bob/pad/base/database/filelist/query.py new file mode 100644 index 0000000000000000000000000000000000000000..f1191af6face2ba5d9be290295e09d644ce5c744 --- /dev/null +++ b/bob/pad/base/database/filelist/query.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <Laurent.El-Shafey@idiap.ch> +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Thu Nov 17 16:09:22 CET 2016 +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""This module provides the Database interface allowing the user to query the +PAD database based on file lists provided in the corresponding directory. +""" + +import os + +from .models import Client, ListReader +from .. import PadFile +from .. import PadDatabase +from bob.bio.base.database import FileListBioDatabase + +class FileListPadDatabase(FileListBioDatabase, PadDatabase): + """This class provides a user-friendly interface to databases that are given as file lists. + + Keyword parameters: + + filelists_directory : str + The directory that contains the filelists defining the protocol(s). If you use the protocol + attribute when querying the database, it will be appended to the base directory, such that + several protocols are supported by the same class instance of `bob.pad.base`. + + name : str + The name of the database + + protocol : str + The protocol of the database. This should be a folder inside ``filelists_directory``. + + pad_file_class : class + The class that should be used for return the files. + This can be `PadFile`, `PadVoiceFile`, or anything similar. + + original_directory : str or ``None`` + The directory, where the original data can be found + + original_extension : str or [str] or ``None`` + The filename extension of the original data, or multiple extensions + + annotation_directory : str or ``None`` + The directory, where additional annotation files can be found + + annotation_extension : str or ``None`` + The filename extension of the annotation files + + annotation_type : str + The type of the annotation file to read, see `bob.db.base.read_annotation_file` for accepted formats. + + train_subdir : str or ``None`` + Specify a custom subdirectory for the filelists of the development set (default is 'train') + + dev_subdir : str or ``None`` + Specify a custom subdirectory for the filelists of the development set (default is 'dev') + + eval_subdir : str or ``None`` + Specify a custom subdirectory for the filelists of the development set (default is 'eval') + + keep_read_lists_in_memory : bool + If set to true, the lists are read only once and stored in memory + """ + + def __init__( + self, + filelists_directory, + name, + protocol=None, + pad_file_class=PadFile, + + original_directory=None, + original_extension=None, + + # PAD annotations should be supported in the future + annotation_directory=None, + annotation_extension="", + annotation_type=None, + + train_subdir=None, + dev_subdir=None, + eval_subdir=None, + + real_filename=None, # File containing the real files + attack_filename=None, # File containing the real files + + # if set to True (the RECOMMENDED default) lists are read only once and stored in memory. + keep_read_lists_in_memory=True, + **kwargs + ): + """We call PadDatabase.__init__() instead of super() because of we do not want + bob.bio.base.database.FileListBioDatabase.__init__() to be called by super(). + bob.bio.base.database.FileListBioDatabase depends on bob.bio.base.database.ZTBioDatabase, which would + throw an exception, since we do not implement here methods for ZT-based metric.""" + + PadDatabase.__init__(self, + name=name, + protocol=protocol, + original_directory=original_directory, + original_extension=original_extension, + annotation_directory=annotation_directory, + annotation_extension=annotation_extension, + annotation_type=annotation_type, + filelists_directory=filelists_directory, + # extra args for pretty printing + train_sub_directory=train_subdir, + dev_sub_directory=dev_subdir, + eval_sub_directory=eval_subdir, + real_filename=real_filename, + attack_filename=attack_filename, + **kwargs) + + self.pad_file_class = pad_file_class + self.list_readers = {} + + self.m_base_dir = os.path.abspath(filelists_directory) + if not os.path.isdir(self.m_base_dir): + raise RuntimeError('Invalid directory specified %s.' % self.m_base_dir) + + # sub-directories for train, dev, and eval sets: + self.m_dev_subdir = dev_subdir if dev_subdir is not None else 'dev' + self.m_eval_subdir = eval_subdir if eval_subdir is not None else 'eval' + self.m_train_subdir = train_subdir if train_subdir is not None else 'train' + + # real list: format: filename client_id + self.m_real_filename = real_filename if real_filename is not None else 'for_real.lst' + # attack list: format: filename client_id attack_type + self.m_attack_filename = attack_filename if attack_filename is not None else 'for_attack.lst' + + self.keep_read_lists_in_memory = keep_read_lists_in_memory + + def _list_reader(self, protocol): + if protocol not in self.list_readers: + if protocol is not None: + protocol_dir = os.path.join(self.get_base_directory(), protocol) + if not os.path.isdir(protocol_dir): + raise ValueError( + "The directory %s for the given protocol '%s' does not exist" % (protocol_dir, protocol)) + self.list_readers[protocol] = ListReader(self.keep_read_lists_in_memory) + + return self.list_readers[protocol] + + def _make_pad(self, files): + return [self.pad_file_class(client_id=f.client_id, path=f.path, attack_type=f.attack_type, file_id=f.id) + for f in files] + + def groups(self, protocol=None): + """This function returns the list of groups for this database. + + protocol : str or ``None`` + The protocol for which the groups should be retrieved. + + Returns: a list of groups + """ + groups = [] + if protocol is not None: + if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_dev_subdir)): + groups.append('dev') + if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_eval_subdir)): + groups.append('eval') + if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_train_subdir)): + groups.append('train') + else: + if os.path.isdir(os.path.join(self.get_base_directory(), self.m_dev_subdir)): + groups.append('dev') + if os.path.isdir(os.path.join(self.get_base_directory(), self.m_eval_subdir)): + groups.append('eval') + if os.path.isdir(os.path.join(self.get_base_directory(), self.m_train_subdir)): + groups.append('train') + return groups + + def _get_list_file(self, group, type=None, protocol=None): + if protocol: + base_directory = os.path.join(self.get_base_directory(), protocol) + else: + base_directory = self.get_base_directory() + + group_dir = self.m_dev_subdir if group == 'dev' else self.m_eval_subdir if group == 'eval' else self.m_train_subdir + list_name = {'for_real': self.m_real_filename, + 'for_attack': self.m_attack_filename, + }[type] + return os.path.join(base_directory, group_dir, list_name) + + + def client_ids(self, protocol=None, groups=None): + """Returns a list of client ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval", "train"). + + Returns: A list containing all the client ids which have the given properties. + """ + + groups = self.check_parameters_for_validity(groups, "group", + self.groups(protocol), + default_parameters=self.groups(protocol)) + + return self.__client_id_list__(groups, 'for_real', protocol) + + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """Returns a set of :py:class:`File` objects for the specific query by the user. + + Keyword Parameters: + + groups : str or [str] or ``None`` + One of the groups ("dev", "eval", "train") or a tuple with several of them. + If 'None' is given (this is the default), it is considered the same as a + tuple with all possible values. + + protocol : str or ``None`` + The protocol to consider + + purposes : str or [str] or ``None`` + The purposes required to be retrieved ("real", "attack") or a tuple + with several of them. If 'None' is given (this is the default), it is + considered the same as a tuple with all possible values. + + model_ids : [various type] + This parameter is not supported in PAD databases yet + + Returns: A list of :py:class:`File` objects considering all the filtering criteria. + """ + + purposes = self.check_parameters_for_validity(purposes, "purpose", ('real', 'attack')) + groups = self.check_parameters_for_validity(groups, "group", + self.groups(protocol), + default_parameters=self.groups(protocol)) + + # first, collect all the lists that we want to process + lists = [] + for group in ('train', 'dev', 'eval'): + if group in groups: + if 'real' in purposes: + lists.append( + self._list_reader(protocol).read_list(self._get_list_file(group, 'for_real', protocol=protocol), + group, 'for_real')) + if 'attack' in purposes: + lists.append( + self._list_reader(protocol).read_list(self._get_list_file(group, 'for_attack', + protocol=protocol), + group, 'for_attack')) + + # now, go through the lists and add add corresponding files + retval = [] + + # non-probe files; just filter by model id + for flist in lists: + for fileobj in flist: + retval.append(fileobj) + + return self._make_pad(retval) + + def annotations(self, file): + return super(FileListPadDatabase, self).annotations(file) diff --git a/bob/pad/base/test/data/attack1.wav b/bob/pad/base/test/data/attack1.wav new file mode 100644 index 0000000000000000000000000000000000000000..b168c86c35d9df40c294a5baf01a33ab4330c42c Binary files /dev/null and b/bob/pad/base/test/data/attack1.wav differ diff --git a/bob/pad/base/test/data/attack2.wav b/bob/pad/base/test/data/attack2.wav new file mode 100644 index 0000000000000000000000000000000000000000..3ec9cde617e1df01502cee4ea43582131ae434b8 Binary files /dev/null and b/bob/pad/base/test/data/attack2.wav differ diff --git a/bob/pad/base/test/data/attack3.wav b/bob/pad/base/test/data/attack3.wav new file mode 100644 index 0000000000000000000000000000000000000000..ff550aed452311ab069444078ee3b74e9760f94b Binary files /dev/null and b/bob/pad/base/test/data/attack3.wav differ diff --git a/bob/pad/base/test/data/example_filelist/dev/for_attack.lst b/bob/pad/base/test/data/example_filelist/dev/for_attack.lst new file mode 100644 index 0000000000000000000000000000000000000000..a8208801f33054a4ccfcc66efdfedd867ce80cf0 --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/dev/for_attack.lst @@ -0,0 +1 @@ +data/attack2 F2 Attack_1 diff --git a/bob/pad/base/test/data/example_filelist/dev/for_real.lst b/bob/pad/base/test/data/example_filelist/dev/for_real.lst new file mode 100644 index 0000000000000000000000000000000000000000..078fc99736d17c482f824d9f8df6aca478958c4d --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/dev/for_real.lst @@ -0,0 +1,2 @@ +data/real2 F2 +data/real3 M2 diff --git a/bob/pad/base/test/data/example_filelist/eval/for_attack.lst b/bob/pad/base/test/data/example_filelist/eval/for_attack.lst new file mode 100644 index 0000000000000000000000000000000000000000..cd29fdc19588d815ef3493bc0087ba1657b46391 --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/eval/for_attack.lst @@ -0,0 +1 @@ +data/attack3 unknown Attack_2 diff --git a/bob/pad/base/test/data/example_filelist/eval/for_real.lst b/bob/pad/base/test/data/example_filelist/eval/for_real.lst new file mode 100644 index 0000000000000000000000000000000000000000..6a97efbd529829ccd7272f89400bbf72be93c719 --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/eval/for_real.lst @@ -0,0 +1 @@ +data/real4 F4 diff --git a/bob/pad/base/test/data/example_filelist/train/for_attack.lst b/bob/pad/base/test/data/example_filelist/train/for_attack.lst new file mode 100644 index 0000000000000000000000000000000000000000..724327abad38530492919419e9e746db331eb917 --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/train/for_attack.lst @@ -0,0 +1 @@ +data/attack1 M1 Attack_1 diff --git a/bob/pad/base/test/data/example_filelist/train/for_real.lst b/bob/pad/base/test/data/example_filelist/train/for_real.lst new file mode 100644 index 0000000000000000000000000000000000000000..fe7c5da8e54e1bec5faea143ef0022898c1fe3b0 --- /dev/null +++ b/bob/pad/base/test/data/example_filelist/train/for_real.lst @@ -0,0 +1,2 @@ +data/real1 M1 +data/real2 F1 diff --git a/bob/pad/base/test/data/real1.wav b/bob/pad/base/test/data/real1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e61fa13c19f6af658512e6c0adfb8cf6396498c8 Binary files /dev/null and b/bob/pad/base/test/data/real1.wav differ diff --git a/bob/pad/base/test/data/real2.wav b/bob/pad/base/test/data/real2.wav new file mode 100644 index 0000000000000000000000000000000000000000..527826039d312b0c3d7dc4e152ff1011fd5ebdb2 Binary files /dev/null and b/bob/pad/base/test/data/real2.wav differ diff --git a/bob/pad/base/test/data/real3.wav b/bob/pad/base/test/data/real3.wav new file mode 100644 index 0000000000000000000000000000000000000000..b2cbbcba81f553e3362da0fcb8f168f68fe14721 Binary files /dev/null and b/bob/pad/base/test/data/real3.wav differ diff --git a/bob/pad/base/test/data/real4.wav b/bob/pad/base/test/data/real4.wav new file mode 100644 index 0000000000000000000000000000000000000000..527826039d312b0c3d7dc4e152ff1011fd5ebdb2 Binary files /dev/null and b/bob/pad/base/test/data/real4.wav differ diff --git a/bob/pad/base/test/dummy/filelist.py b/bob/pad/base/test/dummy/filelist.py new file mode 100644 index 0000000000000000000000000000000000000000..ebb31b612dd2b9c215a34d3aacd7cf0c839c0652 --- /dev/null +++ b/bob/pad/base/test/dummy/filelist.py @@ -0,0 +1,20 @@ + +from bob.pad.base.database import FileListPadDatabase +import pkg_resources + +database = FileListPadDatabase( + name='test_filelist', + protocol=None, + filelists_directory=pkg_resources.resource_filename('bob.pad.base.test', 'data/example_filelist'), + original_directory=pkg_resources.resource_filename('bob.pad.base.test', 'data'), + original_extension=".wav", + train_subdir='.', + dev_subdir='.', + eval_subdir='.', + real_filename='for_real.lst', + attack_filename='for_attack.lst', + keep_read_lists_in_memory=True, + check_original_files_for_existence=True, + training_depends_on_protocol=False, + models_depend_on_protocol=False +) diff --git a/bob/pad/base/test/test_filelist.py b/bob/pad/base/test/test_filelist.py new file mode 100644 index 0000000000000000000000000000000000000000..bee7eb03406cac9db2cede7f9cc756895609e311 --- /dev/null +++ b/bob/pad/base/test/test_filelist.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Thu Nov 17 16:09:22 CET 2016 +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +Tests for the PAD Filelist database. +""" + +import os +import bob.io.base.test_utils +from bob.pad.base.database import FileListPadDatabase + + +example_dir = os.path.realpath(bob.io.base.test_utils.datafile('.', __name__, 'data/example_filelist')) + + +def test_query(): + + db = FileListPadDatabase(example_dir, 'test_padfilelist') + assert len(db.groups()) == 3 # 3 groups (dev, eval, train) + + print(db.client_ids()) + # 5 client ids for real data of train, dev and eval sets (ignore all ids that are in attacks only) + assert len(db.client_ids()) == 5 + assert len(db.client_ids(groups='train')) == 2 # 2 client ids for train + assert len(db.client_ids(groups='dev')) == 2 # 2 client ids for dev + assert len(db.client_ids(groups='eval')) == 1 # 2 client ids for eval + + assert len(db.objects(groups='train')) == 3 # 3 samples in the train set + + assert len(db.objects(groups='dev', purposes='real')) == 2 # 2 samples of real data in the dev set + assert len(db.objects(groups='dev', purposes='attack')) == 1 # 1 attack in the dev set + + +def test_query_protocol(): + db = FileListPadDatabase(os.path.dirname(example_dir), 'test_padfilelist') + p = 'example_filelist' + + assert len(db.groups(protocol=p)) == 3 # 3 groups (dev, eval, train) + + assert len(db.client_ids(protocol=p)) == 5 # 6 client ids for train, dev and eval + assert len(db.client_ids(groups='train', protocol=p)) == 2 # 2 client ids for train + assert len(db.client_ids(groups='dev', protocol=p)) == 2 # 2 client ids for dev + assert len(db.client_ids(groups='eval', protocol=p)) == 1 # 2 client ids for eval + + assert len(db.objects(groups='train', protocol=p)) == 3 # 3 samples in the train set + + assert len(db.objects(groups='dev', purposes='real', protocol=p)) == 2 # 2 samples of real data in the dev set + assert len(db.objects(groups='dev', purposes='attack', protocol=p)) == 1 # 1 attack in the dev set + + +def test_driver_api(): + from bob.db.base.script.dbmanage import main + assert main(('pad_filelist dumplist --list-directory=%s --self-test' % example_dir).split()) == 0 + assert main(('pad_filelist dumplist --list-directory=%s --purpose=real --group=dev --self-test' % + example_dir).split()) == 0 + assert main(('pad_filelist checkfiles --list-directory=%s --self-test' % example_dir).split()) == 0 diff --git a/setup.py b/setup.py index 8c6626c00e5d85c9a2124cf4a843954fd59ce565..a102ab98124fd5947a4de8434e0461c8ae0a855e 100644 --- a/setup.py +++ b/setup.py @@ -109,6 +109,7 @@ setup( # bob database declaration 'bob.db': [ 'dummy = bob.pad.base.test.dummy.database:Interface', # driver for bobdb_manage + 'pad_filelist = bob.pad.base.database.filelist.driver:Interface', ], 'bob.pad.database': [