Skip to content
Snippets Groups Projects
query.py 12.3 KiB
Newer Older
André Anjos's avatar
André Anjos committed
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Andre Anjos <andre.dos.anjos@gmail.com>
# Tue 17 May 13:58:09 2011
André Anjos's avatar
André Anjos committed

"""This module provides the Dataset interface allowing the user to query the
replay attack database in the most obvious ways.
"""

import os
import logging
from bob.db import utils
from .models import *
from .driver import Interface
André Anjos's avatar
André Anjos committed

SQLITE_FILE = INFO.files()[0]
André Anjos's avatar
André Anjos committed

class Database(object):
  """The dataset class opens and maintains a connection opened to the Database.

  It provides many different ways to probe for the characteristics of the data
  and for the data itself inside the database.
  """

  def __init__(self):
    # opens a session to the database - keep it open until the end
    self.connect()
André Anjos's avatar
André Anjos committed
  def connect(self):
    """Tries connecting or re-connecting to the database"""
    if not os.path.exists(SQLITE_FILE):
      self.session = None

    else:
      self.session = utils.session_try_readonly(INFO.type(), SQLITE_FILE)
André Anjos's avatar
André Anjos committed

  def is_valid(self):
    """Returns if a valid session has been opened for reading the database"""

    return self.session is not None

  def assert_validity(self):
    """Raise a RuntimeError if the database backend is not available"""
André Anjos's avatar
André Anjos committed

    if not self.is_valid():
      raise RuntimeError, "Database '%s' cannot be found at expected location '%s'. Create it and then try re-connecting using Database.connect()" % (INFO.name(), SQLITE_FILE)
André Anjos's avatar
André Anjos committed

  def objects(self, support=Attack.attack_support_choices,
      protocol='grandtest', groups=Client.set_choices, cls=('attack', 'real'),
      light=File.light_choices, clients=None):
    """Returns a list of unique :py:class:`.File` objects for the specific
    query by the user.
André Anjos's avatar
André Anjos committed

    Keyword parameters:
André Anjos's avatar
André Anjos committed

    support
      One of the valid support types as returned by attack_supports() or all,
      as a tuple.  If you set this parameter to an empty string or the value
      None, we use reset it to the default, which is to get all.

    protocol
      The protocol for the attack. One of the ones returned by protocols(). If
      you set this parameter to an empty string or the value None, we use reset
      it to the default, "grandtest".

    groups
      One of the protocolar subgroups of data as returned by groups() or a
      tuple with several of them.  If you set this parameter to an empty string
      or the value None, we use reset it to the default which is to get all.

    cls
      Either "attack", "real", "enroll" or any combination of those (in a
      tuple). Defines the class of data to be retrieved.  If you set this
      parameter to an empty string or the value None, we use reset it to the
      default, ("real", "attack").

    light
      One of the lighting conditions as returned by lights() or a combination
      of the two (in a tuple), which is also the default.

    clients
      If set, should be a single integer or a list of integers that define the
      client identifiers from which files should be retrieved. If ommited, set
      to None or an empty list, then data from all clients is retrieved.

    Returns: A list of :py:class:`.File` objects.
André Anjos's avatar
André Anjos committed
    """

    self.assert_validity()
André Anjos's avatar
André Anjos committed

    def check_validity(l, obj, valid, default):
      """Checks validity of user input data against a set of valid values"""
      if not l: return default
      elif not isinstance(l, (tuple, list)):
        return check_validity((l,), obj, valid, default)
André Anjos's avatar
André Anjos committed
      for k in l:
        if k not in valid:
          raise RuntimeError, 'Invalid %s "%s". Valid values are %s, or lists/tuples of those' % (obj, k, valid)
      return l

    # check if groups set are valid
    VALID_GROUPS = self.groups()
    groups = check_validity(groups, "group", VALID_GROUPS, None)
André Anjos's avatar
André Anjos committed

    # check if supports set are valid
    VALID_SUPPORTS = self.attack_supports()
    support = check_validity(support, "support", VALID_SUPPORTS, None)
André Anjos's avatar
André Anjos committed

    # by default, do NOT grab enrollment data from the database
    VALID_CLASSES = ('real', 'attack', 'enroll')
    cls = check_validity(cls, "class", VALID_CLASSES, ('real', 'attack'))

    # check protocol validity
    if not protocol: protocol = 'grandtest' #default
    VALID_PROTOCOLS = [k.name for k in self.protos()]
André Anjos's avatar
André Anjos committed
    if protocol not in VALID_PROTOCOLS:
      raise RuntimeError, 'Invalid protocol "%s". Valid values are %s' % \
          (protocol, VALID_PROTOCOLS)

    # checks client identity validity
    VALID_CLIENTS = [k.id for k in self.clients()]
    clients = check_validity(clients, "client", VALID_CLIENTS, None)

André Anjos's avatar
André Anjos committed
    # resolve protocol object
    protocol = self.protocol(protocol)

    # checks if the light is valid
    VALID_LIGHTS = self.lights()
    light = check_validity(light, "light", VALID_LIGHTS, None)
André Anjos's avatar
André Anjos committed

    # now query the database
André Anjos's avatar
André Anjos committed

    # real-accesses are simpler to query
    if 'enroll' in cls:
      q = self.session.query(File).join(RealAccess).join(Client)
      if groups: q = q.filter(Client.set.in_(groups))
      if clients: q = q.filter(Client.id.in_(clients))
      if light: q = q.filter(File.light.in_(light))
      q = q.filter(RealAccess.purpose=='enroll')
      q = q.order_by(Client.id)
      retval += list(q)

André Anjos's avatar
André Anjos committed
    # real-accesses are simpler to query
    if 'real' in cls:
      q = self.session.query(File).join(RealAccess).join(Client)
      if groups: q = q.filter(Client.set.in_(groups))
      if clients: q = q.filter(Client.id.in_(clients))
      if light: q = q.filter(File.light.in_(light))
      q = q.filter(RealAccess.protocols.contains(protocol))
      q = q.order_by(Client.id)
      retval += list(q)
André Anjos's avatar
André Anjos committed

    # attacks will have to be filtered a little bit more
    if 'attack' in cls:
      q = self.session.query(File).join(Attack).join(Client)
      if groups: q = q.filter(Client.set.in_(groups))
      if clients: q = q.filter(Client.id.in_(clients))
      if support: q = q.filter(Attack.attack_support.in_(support))
      if light: q = q.filter(File.light.in_(light))
      q = q.filter(Attack.protocols.contains(protocol))
      q = q.order_by(Client.id)
      retval += list(q)
André Anjos's avatar
André Anjos committed

    return retval

  def files(self, directory=None, extension=None, **object_query):
    """Returns a set of filenames for the specific query by the user.
    .. deprecated:: 1.1.0
      This function is *deprecated*, use :py:meth:`.Database.objects` instead.
    Keyword Parameters:
    directory
      A directory name that will be prepended to the final filepath returned
    extension
      A filename extension that will be appended to the final filepath returned
    object_query
      All remaining arguments are passed to :py:meth:`.Database.objects`
      untouched. Please check the documentation for such method for more
      details.
    Returns: A dictionary containing the resolved filenames considering all
    the filtering criteria. The keys of the dictionary are unique identities
    for each file in the replay attack database. Conserve these numbers if you
    wish to save processing results later on.
    import warnings
    warnings.warn("The method Database.files() is deprecated, use Database.objects() for more powerful object retrieval", DeprecationWarning)
    return dict([(k.id, k.make_path(directory, extension)) for k in self.objects(**object_query)])
  def clients(self):
    """Returns an iterable with all known clients"""
    self.assert_validity()
    return list(self.session.query(Client))
  def has_client_id(self, id):
    """Returns True if we have a client with a certain integer identifier"""

    self.assert_validity()
    return self.session.query(Client).filter(Client.id==id).count() != 0

André Anjos's avatar
André Anjos committed
  def protocols(self):
André Anjos's avatar
André Anjos committed
    """Returns all protocol objects.
André Anjos's avatar
André Anjos committed

    self.assert_validity()
    return list(self.session.query(Protocol))

André Anjos's avatar
André Anjos committed
  def has_protocol(self, name):
    """Tells if a certain protocol is available"""

    self.assert_validity()
    return self.session.query(Protocol).filter(Protocol.name==name).count() != 0
André Anjos's avatar
André Anjos committed

  def protocol(self, name):
    """Returns the protocol object in the database given a certain name. Raises
    an error if that does not exist."""

    self.assert_validity()
    return self.session.query(Protocol).filter(Protocol.name==name).one()
André Anjos's avatar
André Anjos committed

  def groups(self):
    """Returns the names of all registered groups"""
André Anjos's avatar
André Anjos committed
    return Client.set_choices

  def lights(self):
    """Returns light variations available in the database"""
André Anjos's avatar
André Anjos committed
    return File.light_choices

  def attack_supports(self):
    """Returns attack supports available in the database"""
André Anjos's avatar
André Anjos committed
    return Attack.attack_support_choices

  def attack_devices(self):
    """Returns attack devices available in the database"""
André Anjos's avatar
André Anjos committed
    return Attack.attack_device_choices

  def attack_sampling_devices(self):
    """Returns sampling devices available in the database"""
André Anjos's avatar
André Anjos committed
    return Attack.sample_device_choices

  def attack_sample_types(self):
    """Returns attack sample types available in the database"""
    return Attack.sample_type_choices
André Anjos's avatar
André Anjos committed
  def paths(self, ids, prefix='', suffix=''):
    """Returns a full file paths considering particular file ids, a given
    directory and an extension
André Anjos's avatar
André Anjos committed
    Keyword Parameters:

    id
      The ids of the object in the database table "file". This object should be
      a python iterable (such as a tuple or list).

    prefix
      The bit of path to be prepended to the filename stem

    suffix
      The extension determines the suffix that will be appended to the filename
      stem.

    Returns a list (that may be empty) of the fully constructed paths given the
    file ids.
    """

    self.assert_validity()
André Anjos's avatar
André Anjos committed

    fobj = self.session.query(File).filter(File.id.in_(ids))
André Anjos's avatar
André Anjos committed
    retval = []
    for p in ids:
      retval.extend([k.make_path(prefix, suffix) for k in fobj if k.id == p])
André Anjos's avatar
André Anjos committed
    return retval

  def reverse(self, paths):
    """Reverses the lookup: from certain stems, returning file ids
André Anjos's avatar
André Anjos committed
    Keyword Parameters:

    paths
      The filename stems I'll query for. This object should be a python
      iterable (such as a tuple or list)

    Returns a list (that may be empty).
    """

    self.assert_validity()
André Anjos's avatar
André Anjos committed

    fobj = self.session.query(File).filter(File.path.in_(paths))
André Anjos's avatar
André Anjos committed
    for p in paths:
      retval.extend([k.id for k in fobj if k.path == p])
    return retval

  def save_one(self, id, obj, directory, extension):
    """Saves a single object supporting the bob save() protocol.

    .. deprecated:: 1.1.0

      This function is *deprecated*, use :py:meth:`.File.save()` instead.

André Anjos's avatar
André Anjos committed
    This method will call save() on the the given object using the correct
    database filename stem for the given id.
André Anjos's avatar
André Anjos committed
    Keyword Parameters:

    id
      The id of the object in the database table "file".

    obj
      The object that needs to be saved, respecting the bob save() protocol.

    directory
      This is the base directory to which you want to save the data. The
      directory is tested for existence and created if it is not there with
      os.makedirs()

    extension
      The extension determines the way each of the arrays will be saved.
    """

    import warnings
    warnings.warn("The method Database.save_one() is deprecated, use the File object directly as returned by Database.objects() for more powerful object manipulation.", DeprecationWarning)
André Anjos's avatar
André Anjos committed

    self.assert_validity()
André Anjos's avatar
André Anjos committed

    fobj = self.session.query(File).filter_by(id=id).one()
André Anjos's avatar
André Anjos committed
    fullpath = os.path.join(directory, str(fobj.path) + extension)
    fulldir = os.path.dirname(fullpath)
    utils.makedirs_safe(fulldir)
    save(obj, fullpath)

  def save(self, data, directory, extension):
    """This method takes a dictionary of blitz arrays or bob.database.Array's
    and saves the data respecting the original arrangement as returned by
    files().

    .. deprecated:: 1.1.0

      This function is *deprecated*, use :py:meth:`.File.save()` instead.

André Anjos's avatar
André Anjos committed
    Keyword Parameters:

    data
      A dictionary with two keys 'real' and 'attack', each containing a
      dictionary mapping file ids from the original database to an object that
      supports the bob "save()" protocol.

    directory
      This is the base directory to which you want to save the data. The
      directory is tested for existence and created if it is not there with
      os.makedirs()

    extension
      The extension determines the way each of the arrays will be saved.
    """

    import warnings
    warnings.warn("The method Database.save() is deprecated, use the File object directly as returned by Database.objects() for more powerful object manipulation.", DeprecationWarning)

André Anjos's avatar
André Anjos committed
    for key, value in data:
      self.save_one(key, value, directory, extension)