LSTMEval.py 7.51 KB
Newer Older
1
2
3
4
5
6
7
8
9
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
# @date: Wed 19 Oct 23:43:22 2016

from bob.pad.base.algorithm import Algorithm
import numpy


10
# import tensorflow as tf
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
11
import os
12
13

import logging
14

15
16
17
logger = logging.getLogger("bob.pad.voice")


18
class LSTMEval(Algorithm):
19
20
21
    """This class is used to test all the possible functions of the tool chain, but it does basically nothing."""

    def __init__(self,
22
23
                 input_shape=[200, 81],  # [temporal_length, feature_size]
                 lstm_network_size=60,  # the output size of LSTM cell
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
24
                 normalization_file=None,  # file with normalization parameters from train set
25
26
27
28
29
30
31
32
                 **kwargs):
        """Generates a test value that is read and written"""

        # call base class constructor registering that this tool performs everything.
        Algorithm.__init__(
            self,
            performs_projection=True,
            requires_projector_training=False,
33
            **kwargs
34
        )
35

36
37
38
        self.input_shape = input_shape
        self.num_time_steps = input_shape[0]
        self.lstm_network_size = lstm_network_size
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
39
40
41
42
43
44
45
46
        self.data_std = None
        if normalization_file and os.path.exists(normalization_file):
            npzfile = numpy.load(normalization_file)
            self.data_mean = npzfile['data_mean']
            self.data_std = npzfile['data_std']
        else:
            self.data_mean = 0
            self.data_std = 1
47

48
        self.data_reader = None
49
        self.session = None
50
        self.dnn_model = None
51
        self.data_placeholder = None
52

53
54
#    def __del__(self):
#        self.session.close()
55

56
57
58
59
    def simple_lstm_network(self, train_data_shuffler, batch_size=10, lstm_cell_size=64,
                            num_time_steps=28, num_classes=10, seed=10, reuse=False):
        import tensorflow as tf
        from bob.learn.tensorflow.layers import lstm
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
60
#        slim = tf.contrib.slim
61
62
63
64
65
66
67
68
69
70

        if isinstance(train_data_shuffler, tf.Tensor):
            inputs = train_data_shuffler
        else:
            inputs = train_data_shuffler("data", from_queue=False)

        initializer = tf.contrib.layers.xavier_initializer(seed=seed)

        # Creating an LSTM network
        graph = lstm(inputs, lstm_cell_size, num_time_steps=num_time_steps, batch_size=batch_size,
71
72
                     output_activation_size=num_classes, scope='lstm',
                     weights_initializer=initializer, activation=tf.nn.sigmoid, reuse=reuse)
73
74

        # fully connect the LSTM output to the classes
75
76
#        graph = slim.fully_connected(graph, num_classes, activation_fn=None, scope='fc1',
#                                     weights_initializer=initializer, reuse=reuse)
77
78
79

        return graph

Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
80
81
82
83
84
    def normalize_data(self, features):
        mean = numpy.mean(features, axis=0)
        std = numpy.std(features, axis=0)
        return numpy.divide(features - mean, std)

85
86
    def _check_feature(self, feature):
        """Checks that the features are appropriate."""
87
        if not isinstance(feature, numpy.ndarray) or feature.ndim != 2 or feature.dtype != numpy.float32:
88
89
            raise ValueError("The given feature is not appropriate", feature)
        return True
90

91
92
93
94
    def restore_trained_model(self, projector_file):
        import tensorflow as tf
        if self.session is None:
            self.session = tf.Session()
95
        data_pl = tf.placeholder(tf.float32, shape=(None, ) + tuple(self.input_shape))
96
        graph = self.simple_lstm_network(data_pl, batch_size=1,
97
                                         lstm_cell_size=self.lstm_network_size, num_time_steps=self.num_time_steps,
98
                                         num_classes=2, reuse=False)
99
100

        self.session.run(tf.global_variables_initializer())
101
102
        saver = tf.train.Saver()
#        saver = tf.train.import_meta_graph(projector_file + ".meta", clear_devices=True)
103
        saver.restore(self.session, projector_file)
104
        return tf.nn.softmax(graph, name="softmax"), data_pl
105

106
107
    def load_projector(self, projector_file):
        logger.info("Loading pretrained model from {0}".format(projector_file))
108
109

        self.dnn_model, self.data_placeholder = self.restore_trained_model(projector_file)
110

111
    def project_feature(self, feature):
112

113
        logger.info(" .... Projecting %d features vector" % feature.shape[0])
114
115
        from bob.learn.tensorflow.datashuffler import DiskAudio
        if not self.data_reader:
116
            self.data_reader = DiskAudio([0], [0], [1] + self.input_shape)
117
        # frames, labels = self.data_reader.extract_frames_from_wav(feature, 0)
118
        frames, labels = self.data_reader.split_features_in_windows(features=feature, label=1,
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
119
120
121
                                                                    win_size=self.num_time_steps,
                                                                    sliding_step=5)

122
123
#        frames = numpy.asarray(frames)
#        logger.info(" .... And frames of shape {0} are extracted to pass into DNN model".format(frames.shape))
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
124
125
126
        if frames is None:
            return None

Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
127
128
129
        if self.data_std:
            frames = numpy.divide(frames - self.data_mean, self.data_std)

130
        projections = numpy.zeros((len(frames), 2), dtype=numpy.float32)
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
131
132
133
134
135
        for i in range(frames.shape[0]):
            # normalize frame using pre-loaded normalization parameters
#            if self.data_std:
#                frame = numpy.divide(frame - self.data_mean, self.data_std)
            frame = frames[i]
136
137
138
139
140
141
142
143
144
145
            frame = numpy.reshape(frame, ([1] + list(frames[0].shape)))
#        frames = numpy.reshape(frames, (frames.shape[0], -1, 1))
            logger.info(" .... projecting frame of shape {0} onto DNN model".format(frame.shape))

            if self.session is not None:
                forward_output = self.session.run(self.dnn_model, feed_dict={self.data_placeholder: frame})
                projections[i]=forward_output[0]
            else:
                raise ValueError("Tensorflow session was not initialized, so cannot project on DNN model!")
        logger.info("Projected scores {0}".format(projections))
Pavel KORSHUNOV's avatar
Pavel KORSHUNOV committed
146

147
        return numpy.asarray(projections, dtype=numpy.float32)
148

149
150
    def project(self, feature):
        """project(feature) -> projected
151

152
153
        This function will project the given feature.
        It is assured that the :py:meth:`load_projector` was called once before the ``project`` function is executed.
154

155
        **Parameters:**
156

157
158
        feature : object
          The feature to be projected.
159

160
        **Returns:**
161

162
163
164
        projected : object
          The projected features.
          Must be writable with the :py:meth:`write_feature` function and readable with the :py:meth:`read_feature` function.
165

166
167
168
169
170
171
172
        """
        if len(feature) > 0:
            feature = numpy.cast['float32'](feature)
            self._check_feature(feature)
            return self.project_feature(feature)
        else:
            return numpy.zeros(1, dtype=numpy.float64)
173

174
175
    def score_for_multiple_projections(self, toscore):
        """scorescore_for_multiple_projections(toscore) -> score
176

177
        **Returns:**
178

179
180
181
182
        score : float
          A score value for the object ``toscore``.
        """
        scores = numpy.asarray(toscore, dtype=numpy.float32)
183
        real_scores = scores[:, 1]
184
185
        logger.debug("Mean score %f", numpy.mean(real_scores))
        return [numpy.mean(real_scores)]
186

187
188
189
190
191
    def score(self, toscore):
        """Returns the evarage value of the probe"""
        logger.debug("score() score %f", toscore)
        # return only real score
        return [toscore[0]]
192
193


194
algorithm = LSTMEval()