Skip to content
Snippets Groups Projects
Commit 0ab2e9a5 authored by Amir Mohammadi's avatar Amir Mohammadi
Browse files

Add the filelist interface

parent 655b88cf
No related branches found
No related tags found
1 merge request!54Add the filelist interface
Pipeline #
Showing
with 1261 additions and 7 deletions
......@@ -11,3 +11,4 @@ src
develop-eggs
sphinx
dist
build
......@@ -2,6 +2,9 @@ from .file import BioFile
from .file import BioFileSet
from .database import BioDatabase
from .database import ZTBioDatabase
from .filelist.query import FileListBioDatabase
from .filelist.models import Client
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
......@@ -15,12 +18,16 @@ def __appropriate__(*args):
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args: obj.__module__ = __name__
for obj in args:
obj.__module__ = __name__
__appropriate__(
BioFile,
BioFileSet,
BioDatabase,
ZTBioDatabase,
)
FileListBioDatabase,
Client,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -8,8 +8,6 @@ import six
from numpy.testing.decorators import setastest
import bob.db.base
import bob.bio.base.database
class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)):
"""This class represents the basic API for database access.
......@@ -54,8 +52,6 @@ class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)):
protocol : str or ``None``
The name of the protocol that defines the default experimental setup for this database.
.. todo:: Check if the ``None`` protocol is supported.
training_depends_on_protocol : bool
Specifies, if the training set used for training the extractor and the projector depend on the protocol.
This flag is used to avoid re-computation of data when running on the different protocols of the same database.
......@@ -637,7 +633,7 @@ class ZTBioDatabase(BioDatabase):
All keyword parameters will be passed unaltered to the :py:class:`bob.bio.base.database.BioDatabase` constructor.
"""
# call base class constructor
BioDatabase.__init__(self, name, **kwargs)
super(ZTBioDatabase, self).__init__(name, **kwargs)
self.z_probe_options = z_probe_options
......
from .models import ListReader, Client
from .query import FileListBioDatabase
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
"""Says object was actually declared here, and not in the import module.
Fixing sphinx warnings of not being able to find classes, when path is shortened.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args:
obj.__module__ = __name__
__appropriate__(
ListReader,
Client,
FileListBioDatabase,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Laurent El Shafey <laurent.el-shafey@idiap.ch>
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""Commands the Verification Filelists database can respond to.
"""
import os
import sys
from bob.db.base.driver import Interface as BaseInterface
def dumplist(args):
"""Dumps lists of files based on your criteria"""
from .query import FileListBioDatabase
db = FileListBioDatabase(args.list_directory, 'bio_filelist', use_dense_probe_file_list=False)
r = db.objects(
purposes=args.purpose,
groups=args.group,
classes=args.sclass,
protocol=args.protocol
)
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
for f in r:
output.write('%s\n' % f.make_path(directory=args.directory, extension=args.extension))
return 0
def checkfiles(args):
"""Checks existence of files based on your criteria"""
from .query import FileListBioDatabase
db = FileListBioDatabase(args.list_directory, 'bio_filelist', use_dense_probe_file_list=False)
r = db.objects(protocol=args.protocol)
# go through all files, check if they are available on the filesystem
good = []
bad = []
for f in r:
if os.path.exists(f.make_path(args.directory, args.extension)):
good.append(f)
else:
bad.append(f)
# report
output = sys.stdout
if args.selftest:
from bob.db.base.utils import null
output = null()
if bad:
for f in bad:
output.write('Cannot find file "%s"\n' % f.make_path(args.directory, args.extension))
output.write('%d files (out of %d) were not found at "%s"\n' %
(len(bad), len(r), args.directory))
return 0
class Interface(BaseInterface):
def name(self):
return 'bio_filelist'
def version(self):
import pkg_resources # part of setuptools
return pkg_resources.require('bob.bio.base')[0].version
def files(self):
return ()
def type(self):
return 'text'
def add_commands(self, parser):
from . import __doc__ as docs
subparsers = self.setup_parser(parser,
"Face Verification File Lists database", docs)
import argparse
# the "dumplist" action
parser = subparsers.add_parser('dumplist', help=dumplist.__doc__)
parser.add_argument('-l', '--list-directory', required=True,
help="The directory which contains the file lists.")
parser.add_argument('-d', '--directory', default='',
help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', default='',
help="if given, this extension will be appended to every entry returned.")
parser.add_argument('-u', '--purpose',
help="if given, this value will limit the output files to those designed for the given purposes.",
choices=('enroll', 'probe', ''))
parser.add_argument('-g', '--group',
help="if given, this value will limit the output files to those belonging to a particular protocolar group.",
choices=('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2', ''))
parser.add_argument('-c', '--class', dest="sclass",
help="if given, this value will limit the output files to those belonging to the given classes.",
choices=('client', 'impostor', ''))
parser.add_argument('-p', '--protocol', default=None,
help="If set, the protocol is appended to the directory that contains the file lists.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=dumplist) # action
# the "checkfiles" action
parser = subparsers.add_parser('checkfiles', help=checkfiles.__doc__)
parser.add_argument('-l', '--list-directory', required=True,
help="The directory which contains the file lists.")
parser.add_argument('-d', '--directory', dest="directory", default='',
help="if given, this path will be prepended to every entry returned.")
parser.add_argument('-e', '--extension', dest="extension", default='',
help="if given, this extension will be appended to every entry returned.")
parser.add_argument('-p', '--protocol', default=None,
help="If set, the protocol is appended to the directory that contains the file lists.")
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=checkfiles) # action
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
# @date: Wed Oct 24 10:47:43 CEST 2012
#
# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
This file defines simple Client and File interfaces that are comparable with other bob.db databases.
"""
import os
import fileinput
import re
class Client(object):
"""
The clients of this database contain ONLY client ids. Nothing special.
"""
def __init__(self, client_id):
self.id = client_id
"""The ID of the client, which is stored as a :py:class:`str` object."""
class FileListFile(object):
"""
Initialize the File object with the minimum required data.
If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical.
If the ``claimed_id`` is not specified, it is expected to be the ``client_id``.
**Parameters**
client_id : various type
The id of the client, this file belongs to.
The type of it is dependent on your implementation.
If you use an SQL database, this should be an SQL type like Integer or String.
path : str
The path of this file, relative to the basic directory.
If you use an SQL database, this should be the SQL type String.
Please do not specify any file extensions.
file_id : various type
The id of the file.
The type of it is dependent on your implementation.
If you use an SQL database, this should be an SQL type like Integer or String.
If you are using an automatically determined file id, you can skip selecting the file id.
"""
def __init__(self, file_name, client_id, model_id=None, claimed_id=None):
# super(FileListFile, self).__init__(client_id=client_id, path=file_name, file_id=file_name)
super(FileListFile, self).__init__()
self.client_id = client_id
self.path = file_name
self.id = file_name
# Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file.
# Hence, there might be several probe files with the same file id, but different model ids.
# Therefore, please DO NOT USE the model_id outside of this class (or the according database queries).
# when the model id is not specified, we use the client id instead
self._model_id = client_id if model_id is None else model_id
# when the claimed id is not specified, we use the client id instead
self.claimed_id = client_id if claimed_id is None else claimed_id
#############################################################################
# internal access functions for the file lists; do not export!
#############################################################################
class ListReader(object):
def __init__(self, store_lists):
self.m_read_lists = {}
self.m_model_dicts = {}
self.m_store_lists = store_lists
def _read_multi_column_list(self, list_file):
rows = []
if not os.path.isfile(list_file):
raise RuntimeError('File %s does not exist.' % (list_file,))
try:
for line in fileinput.input(list_file):
parsed_line = re.findall('[\w/(-.)]+', line)
if len(parsed_line):
# perform some sanity checks
if len(parsed_line) not in (2, 3, 4):
raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % (
line.rstrip(), list_file))
if len(rows) and len(rows[0]) != len(parsed_line):
raise IOError(
"The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" % (
parsed_line, list_file, rows[0]))
# append the read line
rows.append(parsed_line)
fileinput.close()
except IOError as e:
raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e))
# return the read list as a vector of columns
return rows
def _read_column_list(self, list_file, column_count):
# read the list
rows = self._read_multi_column_list(list_file)
# extract the file from the first two columns
file_list = []
for row in rows:
if column_count == 2:
assert len(row) == 2
# we expect: filename client_id
file_list.append(FileListFile(file_name=row[0], client_id=row[1]))
elif column_count == 3:
assert len(row) in (2, 3)
# we expect: filename, model_id, client_id
file_list.append(FileListFile(file_name=row[0], client_id=row[2] if len(row) > 2 else row[1], model_id=row[1]))
elif column_count == 4:
assert len(row) in (3, 4)
# we expect: filename, model_id, claimed_id, client_id
file_list.append(FileListFile(file_name=row[0], client_id=row[3] if len(row) > 3 else row[1], model_id=row[1],
claimed_id=row[2]))
else:
raise ValueError(
"The given column count %d cannot be interpreted. This is a BUG, please report to the author." % column_count)
return file_list
def _create_model_dictionary(self, files):
# remember model ids
retval = {}
for file in files:
if file._model_id not in retval:
retval[file._model_id] = file.client_id
else:
if retval[file._model_id] != file.client_id:
raise ValueError(
"The read model id '%s' is associated to two different client ids '%s' and '%s'!" % (
file._model_id, file.client_id, retval[file._model_id]))
return retval
def read_list(self, list_file, group, type=None):
"""Reads the list of Files from the given list file (if not done yet) and returns it."""
if group in ('world', 'optional_world_1', 'optional_world_2'):
if group not in self.m_read_lists:
# read the world list into memory
list = self._read_column_list(list_file, 2)
if self.m_store_lists:
self.m_read_lists[group] = list
return list
# just return the previously read list
return self.m_read_lists[group]
else:
if group not in self.m_read_lists:
self.m_read_lists[group] = {}
if type not in self.m_read_lists[group]:
if type in ('for_models', 'for_tnorm'):
list = self._read_column_list(list_file, 3)
elif type == 'for_scores':
list = self._read_column_list(list_file, 4)
elif type in ('for_probes', 'for_znorm'):
list = self._read_column_list(list_file, 2)
else:
raise ValueError("The given type must be one of %s, but not '%s'" % (
('for_models', 'for_scores', 'for_probes', 'for_tnorm', 'for_znorm'), type))
if self.m_store_lists:
self.m_read_lists[group][type] = list
return list
return self.m_read_lists[group][type]
def read_models(self, list_file, group, type=None):
"""Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it"""
assert group in ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2')
assert type in ('for_models', 'for_tnorm')
if group not in self.m_model_dicts:
self.m_model_dicts[group] = {}
if type not in self.m_model_dicts[group]:
dict = self._create_model_dictionary(self.read_list(list_file, group, type))
if self.m_store_lists:
self.m_model_dicts[group][type] = dict
return dict
return self.m_model_dicts[group][type]
This diff is collapsed.
key1 10 20
key2 30 40
data/model3_session1_sample1 3 3
data/model3_session1_sample2 3 3
data/model3_session1_sample3 3 3
data/model3_session2_sample1 3 3
data/model4_session1_sample1 4 4
data/model4_session1_sample2 4 4
data/model4_session1_sample3 4 4
data/model4_session2_sample1 4 4
data/model3_session3_sample1 3
data/model3_session3_sample2 3
data/model3_session3_sample3 3
data/model3_session4_sample1 3
data/model4_session3_sample1 4
data/model4_session3_sample2 4
data/model4_session3_sample1 4
data/model4_session3_sample2 4
data/model4_session3_sample3 4
data/model4_session4_sample1 4
data/model3_session3_sample1 3 3 3
data/model3_session3_sample2 3 3 3
data/model3_session3_sample3 3 3 3
data/model3_session4_sample1 3 3 3
data/model4_session3_sample1 3 3 4
data/model4_session3_sample2 3 3 4
data/model4_session3_sample1 4 4 4
data/model4_session3_sample2 4 4 4
data/model4_session3_sample3 4 4 4
data/model4_session4_sample1 4 4 4
data/model3_session3_sample1 4 4 3
data/model3_session3_sample2 4 4 3
data/model7_session1_sample1 7 7
data/model7_session1_sample2 7 7
data/model7_session1_sample3 7 7
data/model7_session2_sample1 7 7
data/model8_session1_sample1 8 8
data/model8_session1_sample2 8 8
data/model8_session1_sample3 8 8
data/model8_session2_sample1 8 8
data/model9_session1_sample1 9
data/model9_session1_sample2 9
data/model9_session1_sample3 9
data/model9_session2_sample1 9
data/model10_session1_sample1 10
data/model10_session1_sample2 10
data/model10_session1_sample3 10
data/model10_session2_sample1 10
data/model5_session1_sample1 5 5
data/model5_session1_sample2 5 5
data/model5_session1_sample3 5 5
data/model5_session2_sample1 5 5
data/model6_session1_sample1 6 6
data/model6_session1_sample2 6 6
data/model6_session1_sample3 6 6
data/model6_session2_sample1 6 6
data/model5_session3_sample1 5
data/model5_session3_sample2 5
data/model5_session3_sample3 5
data/model5_session4_sample1 5
data/model6_session3_sample1 6
data/model6_session3_sample2 6
data/model6_session3_sample3 6
data/model6_session4_sample1 6
data/model5_session3_sample1 5 5 5
data/model5_session3_sample2 5 5 5
data/model5_session3_sample3 5 5 5
data/model5_session4_sample1 5 5 5
data/model6_session3_sample1 6 6 6
data/model6_session3_sample2 6 6 6
data/model6_session3_sample3 6 6 6
data/model6_session4_sample1 6 6 6
data/model7_session1_sample1 7 7
data/model7_session1_sample2 7 7
data/model7_session1_sample3 7 7
data/model7_session2_sample1 7 7
data/model8_session1_sample1 8 8
data/model8_session1_sample2 8 8
data/model8_session1_sample3 8 8
data/model8_session2_sample1 8 8
data/model9_session1_sample1 9
data/model9_session1_sample2 9
data/model9_session1_sample3 9
data/model9_session2_sample1 9
data/model10_session1_sample1 10
data/model10_session1_sample2 10
data/model10_session1_sample3 10
data/model10_session2_sample1 10
data/model11_session1_sample1 1
data/model11_session1_sample2 1
data/model11_session1_sample3 1
data/model11_session2_sample1 1
data/model12_session1_sample1 2
data/model12_session1_sample2 2
data/model12_session1_sample3 2
data/model12_session2_sample1 2
data/model13_session1_sample1 1
data/model13_session1_sample2 1
data/model13_session1_sample3 1
data/model13_session2_sample1 1
data/model14_session1_sample1 2
data/model14_session1_sample2 2
data/model14_session1_sample3 2
data/model14_session2_sample1 2
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment