database.py 4.83 KB
Newer Older
1
2
3
4
5
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Pavel Korshunov <pavel.korshunov@idiap.ch>
# Tue 17 May 15:43:22 CEST 2016

6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import sys

import bob.pad.base.database
import bob.io.base

from bob.pad.voice.database import PadVoiceFile

from bob.io.base.test_utils import datafile
from bob.db.base.driver import Interface as BaseInterface

import pkg_resources

data_dir = pkg_resources.resource_filename('bob.pad.voice', 'test/data')

dummy_name = "speech_spoof_test"


def F(f):
    """Returns the test file on the "data" subdirectory"""
    return datafile(f, __name__)


class TestFile(PadVoiceFile):
    def __init__(self, path, file_id):
        attacktype = None
        if "attack" in path:
            attacktype = 'attack'
        super(TestFile, self).__init__(client_id=1, path=path, file_id=file_id, attack_type=attacktype)



def dumplist(args):
    """Dumps lists of files based on your criteria"""

    db = DummyDatabaseSpeechSpoof()

    data = db.all_files()

    output = sys.stdout
    if args.selftest:
        from bob.db.base.utils import null
        output = null()

    files = data[0] + data[1]
    for f in files:
        output.write('%s\n' % (f.make_path(args.directory, args.extension),))

    return 0


class Interface(BaseInterface):
    def name(self):
        return dummy_name

    def version(self):
        return '0.0.1'

    def files(self):
        from pkg_resources import resource_filename
        raw_files = ('*.wav',)
        return [resource_filename(__name__, k) for k in raw_files]

    def type(self):
        return 'rawfiles'

    def add_commands(self, parser):
        from argparse import SUPPRESS

        subparsers = self.setup_parser(parser,
                                       "Dummy Speech Database", "Dummy speech database with attacks for testing")

        dumpparser = subparsers.add_parser('dumplist', help="")
        dumpparser.add_argument('-d', '--directory', dest="directory", default='',
                                help="if given, this path will be prepended to every entry returned (defaults to '%(default)s')")
        dumpparser.add_argument('-e', '--extension', dest="extension", default='.wav',
                                help="if given, this extension will be appended to every entry returned (defaults to '%(default)s')")
        dumpparser.add_argument('--self-test', dest="selftest", default=False,
                                action='store_true', help=SUPPRESS)

        dumpparser.set_defaults(func=dumplist)  # action


class DummyDatabaseSpeechSpoof(bob.pad.base.database.PadDatabase):
    """ Implements API of antispoofing interface for this Test database"""

    def __init__(self, protocol='Default', original_directory=data_dir, original_extension=".wav", **kwargs):
        # call base class constructors to open a session to the database
        bob.pad.base.database.PadDatabase.__init__(self, name='testspeech', protocol=protocol,
                                                 original_directory=original_directory,
                                                 original_extension=original_extension, **kwargs)

    def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs):
        """Returns a set of Files for the specific query by the user.

        Keyword Parameters:

        groups
            One of the groups ('dev', 'eval', 'train') or a tuple with several of them.
            If 'None' is given (this is the default), it is considered the same as a
            tuple with all possible values.

        protocol
          The protocol for which the clients should be retrieved.
          The protocol is dependent on your database.
          If you do not have protocols defined, just ignore this field.

        purposes
            The purposes can be either 'real' or 'attack'.

        model_ids
            This parameter is not supported in this implementation.


        Returns: A set of Files with the specified properties.
        """

        return_list = []
        if 'real' in purposes:
            if 'train' in groups:
                return_list.append(TestFile("genuine_laptop_sentence01", 1))
            if 'dev' in groups:
                return_list.append(TestFile("genuine_laptop2_sentence01", 3))
            if 'eval' in groups:
                return_list.append(TestFile("genuine_phone_sentence01", 5))
        if 'attack' in purposes:
            if 'train' in groups:
                return_list.append(TestFile("attack_laptop_sentence01", 2))
            if 'dev' in groups:
                return_list.append(TestFile("attack_phone_sentence01", 4))
            if 'eval' in groups:
                return_list.append(TestFile("attack_ss_sentence01", 6))

        return return_list


database = DummyDatabaseSpeechSpoof(
    protocol='Default',
    original_directory=data_dir,
    original_extension=".wav",
    training_depends_on_protocol=True,
)