Commit 24ac70d6 authored by André Anjos's avatar André Anjos 💬
Browse files

Make up a guide for this basic database interface (c.f. bob.db.base#14)

parent 4dc26c11
Pipeline #8927 failed with stages
in 3 minutes and 20 seconds
*~
*.swp
*.pyc
*.so
*.dylib
bin
eggs
parts
......@@ -17,3 +15,4 @@ dist
build
*.egg
src/
bob/db/atnt/data/
include README.rst bootstrap-buildout.py buildout.cfg develop.cfg version.txt requirements.txt
include README.rst buildout.cfg develop.cfg version.txt requirements.txt
recursive-include doc *.py *.rst
......@@ -7,8 +7,10 @@
import os
import sys
import pkg_resources
from bob.db.base.driver import Interface as BaseInterface
def dumplist(args):
"""Dumps lists of files based on your criteria."""
......@@ -35,17 +37,11 @@ def dumplist(args):
def checkfiles(args):
"""Checks the existence of the files based on your criteria."""
from .__init__ import Database
from . import Database
db = Database()
r = db.objects()
# go through all files, check if they are available
good = {}
bad = {}
for f in r:
if os.path.exists(f.make_path(directory=args.directory, extension=args.extension)): good[f.id] = f.make_path(directory=args.directory, extension=args.extension)
else: bad[f.id] = f.make_path(directory=args.directory, extension=args.extension)
bad = [f for f in db.objects() if not os.path.exists(f.make_path(directory=args.directory, extension=args.extension))]
# report
output = sys.stdout
......@@ -55,9 +51,9 @@ def checkfiles(args):
if bad:
for f in bad:
output.write('Cannot find file "%s"\n' % (f,))
output.write('Cannot find file "%s"\n' % f.make_path(directory=args.directory, extension=args.extension))
output.write('%d files (out of %d) were not found at "%s"\n' % \
(len(bad), len(r), args.directory))
(len(bad), len(db.objects()), args.directory))
return 0
......@@ -100,6 +96,53 @@ def path(args):
return 0
def download(arguments):
"""Downloads and uncompresses the AT&T database"""
"""
Parameters:
arguments (argparse.Namespace): A set of arguments passed by the
command-line parser
Returns:
int: A POSIX compliant return value of ``0`` if the download is successful,
or ``1`` in case it is not.
Raises:
urllib2.HTTPError: if the target resource does not exist on the webserver
"""
source_url = 'http://www.cl.cam.ac.uk/research/dtg/attarchive/pub/data/att_faces.zip'
import tempfile
import zipfile
if sys.version_info[0] <= 2:
import urllib2 as urllib
else:
import urllib.request as urllib
if not arguments.quiet:
print ("Extracting url `%s' into `%s'" %(source_url, arguments.output_dir))
u = urllib.urlopen(source_url)
f = tempfile.NamedTemporaryFile(suffix = ".zip")
open(f.name, 'wb').write(u.read())
z = zipfile.ZipFile(f, mode='r')
members = z.infolist()
for k,m in enumerate(members):
if not arguments.quiet:
print("x [%d/%d] %s" % (k+1, len(members), m.filename,))
z.extract(m, arguments.output_dir)
z.close()
f.close()
return 0
class Interface(BaseInterface):
......@@ -107,7 +150,6 @@ class Interface(BaseInterface):
return 'atnt'
def version(self):
import pkg_resources # part of setuptools
return pkg_resources.require('bob.db.%s' % self.name())[0].version
def files(self):
......@@ -140,9 +182,10 @@ class Interface(BaseInterface):
dump_parser.set_defaults(func=dumplist) #action
# add the checkfiles command
from .models import DEFAULT_DATADIR
check_parser = subparsers.add_parser('checkfiles', help="Check if the files exist, based on your criteria")
check_parser.add_argument('-d', '--directory', required=True, help="the path to the AT&T images.")
check_parser.add_argument('-e', '--extension', default=".pgm", help="the extension of the AT&T images default: '.pgm'.")
check_parser.add_argument('-d', '--directory', default=DEFAULT_DATADIR, help="the path to the AT&T images [default: %(default)s]")
check_parser.add_argument('-e', '--extension', default=".pgm", help="the extension of the AT&T images [default: %(default)s]")
check_parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
check_parser.set_defaults(func=checkfiles) #action
......@@ -160,3 +203,8 @@ class Interface(BaseInterface):
parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS)
parser.set_defaults(func=path) #action
# adds the "download" command
parser = subparsers.add_parser('download', help=download.__doc__)
parser.add_argument('-o', "--output-dir", help='the directory where to extract the AT&T database at [default: %(default)s]', default=DEFAULT_DATADIR)
parser.add_argument('-q', "--quiet", action='store_true', help='if set, do it quietly', default=False)
parser.set_defaults(func=download)
......@@ -13,9 +13,16 @@ import bob
import bob.db.base
import bob.io.image # to be able to load images when File.load is called!
# Location where the data files are typically decompressed at
import pkg_resources
DEFAULT_DATADIR = pkg_resources.resource_filename(__name__, 'data')
class Client(object):
"""The clients of this database contain ONLY client ids. Nothing special."""
"""The clients of this database contain ONLY client ids. Nothing special.
"""
m_valid_client_ids = set(range(1, 41))
def __init__(self, client_id):
......@@ -25,10 +32,26 @@ class Client(object):
class File (bob.db.base.File):
"""Files of this database are composed from the client id and a file id."""
"""Files of this database are composed from the client id and a file id.
Parameters:
client_id (int): The unique client identity
client_file_id (int): The unique file identity for this given client
install_path (str): The installation path for the database
default_ext (str): The default extension for the database (normally,
should be ``.pgm``)
"""
m_valid_file_ids = set(range(1, 11))
def __init__(self, client_id, client_file_id):
def __init__(self, client_id, client_file_id, install_path, default_ext):
assert client_file_id in self.m_valid_file_ids
# compute the file id on the fly
file_id = (client_id - 1) * len(self.m_valid_file_ids) + client_file_id
......@@ -37,20 +60,83 @@ class File (bob.db.base.File):
# call base class constructor
bob.db.base.File.__init__(self, file_id=file_id, path=path)
self.client_id = client_id
self.install_path = install_path
self.default_ext = default_ext
@staticmethod
def from_file_id(file_id):
def _from_file_id(file_id, install_path, default_ext):
"""Returns the File object for a given file_id"""
client_id = int((file_id - 1) / len(File.m_valid_file_ids) + 1)
client_file_id = (file_id - 1) % len(File.m_valid_file_ids) + 1
return File(client_id, client_file_id)
return File(client_id, client_file_id, install_path, default_ext)
@staticmethod
def from_path(path):
def _from_path(path, install_path, default_ext):
"""Returns the File object for a given path"""
# get the last two paths
paths = os.path.split(path)
file_name = os.path.splitext(paths[1])[0]
paths = os.path.split(paths[0])
assert paths[1][0] == 's'
return File(int(paths[1][1:]), int(file_name))
return File(int(paths[1][1:]), int(file_name), install_path,
default_ext)
def make_path(self, directory=None, extension=None):
"""Wraps the current path so that a complete path is formed
Parameters:
directory (:py:class:`str`, Optional): An optional directory name that
will be prefixed to the returned result. If not set, use the database
raw files installation directory as set on the database.
extension (:py:class:`str`, Optional): An optional extension that will
be suffixed to the returned filename. The extension normally includes
the leading ``.`` character as in ``.jpg`` or ``.hdf5``. If not set,
use the default for the database, which is ``.pgm``.
Returns:
str: The newly generated file path.
"""
directory = directory or self.install_path
extension = extension or self.default_ext
# assure that directory and extension are actually strings
# create the path
return str(os.path.join(directory or '', self.path + (extension or '')))
def load(self, directory=None, extension=None):
"""Loads the data at the specified location and using the given extension.
Uses :py:func:`bob.io.base.load` to load the contents of the file named
``<directory>/<self.path>+<extension>``. Returns whatever that function
returns.
Parameters:
directory (:py:class:`str`, Optional): An optional directory name that
will be prefixed to the returned result. If not set, use the database
raw files installation directory as set on the database.
extension (:py:class:`str`, Optional): An optional extension that will
be suffixed to the returned filename. The extension normally includes
the leading ``.`` character as in ``.jpg`` or ``.hdf5``. If not set,
use the default for the database, which is ``.pgm``.
Returns:
object: The return value of :py:func:`bob.io.base.load` given the input
file type.
"""
return bob.io.base.load(self.make_path(directory, extension))
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
from .models import Client, File
from .models import Client, File, DEFAULT_DATADIR
import bob.db.base
class Database(bob.db.base.Database):
"""Wrapper class for the AT&T (aka ORL) database of faces (http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html).
This class defines a simple protocol for training, enrollment and probe by splitting the few images of the database in a reasonable manner.
Due to the small size of the database, there is only a 'dev' group, and I did not define an 'eval' group."""
def __init__(self, original_directory=None, original_extension='.pgm'):
def __init__(self, original_directory=DEFAULT_DATADIR, original_extension='.pgm'):
"""**Constructor Documentation**
Generates a database.
......@@ -20,7 +18,8 @@ class Database(bob.db.base.Database):
Keyword parameters
original_directory : str, optional
The directory, where you extracted the original images to.
The directory, where you extracted the original images to. If not
provided, then use a directory internal to the package (``data``)
original_extension : str
The filename extension of the original images. Rarely changed.
......@@ -128,7 +127,7 @@ class Database(bob.db.base.Database):
protocol
ignored.
"""
return File.from_file_id(file_id).client_id
return File._from_file_id(file_id).client_id
def get_client_id_from_model_id(self, model_id, groups=None, protocol=None):
"""Returns the client id from the given model id.
......@@ -196,17 +195,20 @@ class Database(bob.db.base.Database):
if 'enroll' in purposes:
for client_id in ids:
for file_id in self.m_enroll_files:
retval.append(File(client_id, file_id))
retval.append(File(client_id, file_id,
self.original_directory, self.original_extension))
if 'probe' in purposes:
file_ids = File.m_valid_file_ids - self.m_enroll_files
# for probe, we use all clients of the given groups
for client_id in self.client_ids(groups):
for file_id in file_ids:
retval.append(File(client_id, file_id))
retval.append(File(client_id, file_id,
self.original_directory, self.original_extension))
return retval
def paths(self, file_ids, prefix=None, suffix=None, preserve_order=True):
"""Returns a full file paths considering particular file ids, a given
directory and an extension
......@@ -230,9 +232,10 @@ class Database(bob.db.base.Database):
file ids.
"""
files = [File.from_file_id(id) for id in file_ids]
files = [File._from_file_id(id) for id in file_ids]
return [f.make_path(prefix, suffix) for f in files]
def reverse(self, paths, preserve_order=True):
"""Reverses the lookup: from certain paths, return a list of
File objects
......@@ -249,4 +252,4 @@ class Database(bob.db.base.Database):
Returns a list (that may be empty).
"""
return [File.from_path(p) for p in paths]
return [File._from_path(p) for p in paths]
##############################################################################
#
# Copyright (c) 2006 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Bootstrap a buildout-based project
Simply run this script in a directory containing a buildout.cfg.
The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
"""
import os
import shutil
import sys
import tempfile
from optparse import OptionParser
__version__ = '2015-07-01'
# See zc.buildout's changelog if this version is up to date.
tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
usage = '''\
[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''
parser = OptionParser(usage=usage)
parser.add_option("--version",
action="store_true", default=False,
help=("Return bootstrap.py version."))
parser.add_option("-t", "--accept-buildout-test-releases",
dest='accept_buildout_test_releases',
action="store_true", default=False,
help=("Normally, if you do not specify a --version, the "
"bootstrap script and buildout gets the newest "
"*final* versions of zc.buildout and its recipes and "
"extensions for you. If you use this flag, "
"bootstrap and buildout will get the newest releases "
"even if they are alphas or betas."))
parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration "
"file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--buildout-version",
help="Use a specific zc.buildout version")
parser.add_option("--setuptools-version",
help="Use a specific setuptools version")
parser.add_option("--setuptools-to-dir",
help=("Allow for re-use of existing directory of "
"setuptools versions"))
options, args = parser.parse_args()
if options.version:
print("bootstrap.py version %s" % __version__)
sys.exit(0)
######################################################################
# load/install setuptools
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
ez = {}
if os.path.exists('ez_setup.py'):
exec(open('ez_setup.py').read(), ez)
else:
exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
if not options.allow_site_packages:
# ez_setup imports site, which adds site packages
# this will remove them from the path to ensure that incompatible versions
# of setuptools are not in the path
import site
# inside a virtualenv, there is no 'getsitepackages'.
# We can't remove these reliably
if hasattr(site, 'getsitepackages'):
for sitepackage_path in site.getsitepackages():
# Strip all site-packages directories from sys.path that
# are not sys.prefix; this is because on Windows
# sys.prefix is a site-package directory.
if sitepackage_path != sys.prefix:
sys.path[:] = [x for x in sys.path
if sitepackage_path not in x]
setup_args = dict(to_dir=tmpeggs, download_delay=0)
if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version
if options.setuptools_to_dir is not None:
setup_args['to_dir'] = options.setuptools_to_dir
ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
# This does not (always?) update the default working set. We will
# do it.
for path in sys.path:
if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path)
######################################################################
# Install buildout
ws = pkg_resources.working_set
setuptools_path = ws.find(
pkg_resources.Requirement.parse('setuptools')).location
# Fix sys.path here as easy_install.pth added before PYTHONPATH
cmd = [sys.executable, '-c',
'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
'from setuptools.command.easy_install import main; main()',
'-mZqNxd', tmpeggs]
find_links = os.environ.get(
'bootstrap-testing-find-links',
options.find_links or
('http://downloads.buildout.org/'
if options.accept_buildout_test_releases else None)
)
if find_links:
cmd.extend(['-f', find_links])
requirement = 'zc.buildout'
version = options.buildout_version
if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout.
import setuptools.package_index
_final_parts = '*final-', '*final'
def _final_version(parsed_version):
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
index = setuptools.package_index.PackageIndex(
search_path=[setuptools_path])
if find_links:
index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement)
if index.obtain(req) is not None:
best = []
bestv = None
for dist in index[req.project_name]:
distv = dist.parsed_version
if _final_version(distv):
if bestv is None or distv > bestv:
best = [dist]
bestv = distv
elif distv == bestv:
best.append(dist)
if best:
best.sort()
version = best[-1].version
if version:
requirement = '=='.join((requirement, version))
cmd.append(requirement)
import subprocess
if subprocess.call(cmd) != 0:
raise Exception(
"Failed to execute command:\n%s" % repr(cmd)[1:-1])
######################################################################
# Import and run buildout
ws.add_entry(tmpeggs)
ws.require(requirement)
import zc.buildout.buildout
if not [a for a in args if '=' not in a]:
args.append('bootstrap')
# if -c was provided, we push it back into args for buildout' main function
if options.config_file is not None:
args[0:0] = ['-c', options.config_file]
zc.buildout.buildout.main(args)
shutil.rmtree(tmpeggs)
bob.bio.base
\ No newline at end of file
.. vim: set fileencoding=utf-8 :
.. @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
.. @date: Thu Dec 6 12:28:25 CET 2012
.. testsetup:: interface
import tempfile
from bob.db.atnt.driver import download
from bob.db.atnt.models import DEFAULT_DATADIR
class Arguments(object): pass
args = Arguments()
setattr(args, 'output_dir', DEFAULT_DATADIR)
setattr(args, 'quiet', True)
assert download(args) == 0
==============
User's Guide
==============
This package contains the access API and descriptions for the `AT&T`_ database of faces, which is formerly known as the ORL database.
The actual raw data for the database should be downloaded from the original URL.
This package contains the access API and descriptions for the `AT&T`_ database
of faces, which is formerly known as the ORL database. This package only
contains the Bob_ accessor methods to use the dataset directly from python. The
actual raw data for the database should be downloaded from the original URL. A
convenient command is provided for this purpose:
.. code-block:: sh
$ bob_dbmanage.py atnt download
This command will try to download and install the database on a directory that
is internal to the package. In case you don't have write access to such
directory, use the ``--output-dir`` flag to specify an alternate directory:
This package only contains the Bob_ accessor methods to use the DB directly from python, using a single self-designed evaluation protocol.
.. code-block:: sh
$ bob_dbmanage.py atnt download --output-dir raw
The command above will download the raw data files of the AT&T database into
the directory ``raw`` inside your current working directory.
The Database Interface
----------------------
The :py:class:`bob.db.atnt.Database` provides an interface to access the data.
For more information, please refer to its documentation: :py:class:`bob.db.atnt.Database`
The :py:class:`bob.db.atnt.Database` provides an interface to access samples
from this dataset. The database object is initialized by passing it the
location where the raw samples have been downloaded. Assuming downloading to