test_cnn.py 10.9 KB
Newer Older
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
1
2
3
4
5
6
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Thu 13 Oct 2016 13:35 CEST

import numpy
7
from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, ImageAugmentation, ScaleFactor, Linear
8
from bob.learn.tensorflow.network import Chopra
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
9
from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss
10
from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer, constant
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
11
from .test_cnn_scratch import validate_network
12
from bob.learn.tensorflow.network import Embedding, LightCNN9
13

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
14
from bob.learn.tensorflow.utils import load_mnist
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
15
16
17
18
19
20
21
22
23
24
import tensorflow as tf
import bob.io.base
import shutil
from scipy.spatial.distance import cosine
import bob.measure

"""
Some unit tests for the datashuffler
"""

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
25
batch_size = 32
Tiago Pereira's avatar
Tiago Pereira committed
26
validation_batch_size = 400
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
27
iterations = 300
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
28
seed = 10
29
numpy.random.seed(seed)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
30

Tiago Pereira's avatar
Tiago Pereira committed
31
def dummy_experiment(data_s, embedding):
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
32
33
34
35
36
37
38
39
    """
    Create a dummy experiment and return the EER
    """
    data_shuffler = object.__new__(Memory)
    data_shuffler.__dict__ = data_s.__dict__.copy()

    # Extracting features for enrollment
    enroll_data, enroll_labels = data_shuffler.get_batch()
Tiago Pereira's avatar
Tiago Pereira committed
40
    enroll_features = embedding(enroll_data)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
41
42
43
44
    del enroll_data

    # Extracting features for probing
    probe_data, probe_labels = data_shuffler.get_batch()
Tiago Pereira's avatar
Tiago Pereira committed
45
    probe_features = embedding(probe_data)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
46
47
48
49
50
51
52
53
54
55
56
    del probe_data

    # Creating models
    models = []
    for i in range(len(data_shuffler.possible_labels)):
        indexes_model = numpy.where(enroll_labels == data_shuffler.possible_labels[i])[0]
        models.append(numpy.mean(enroll_features[indexes_model, :], axis=0))

    # Probing
    positive_scores = numpy.zeros(shape=0)
    negative_scores = numpy.zeros(shape=0)
57

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
    for i in range(len(data_shuffler.possible_labels)):
        # Positive scoring
        indexes = probe_labels == data_shuffler.possible_labels[i]
        positive_data = probe_features[indexes, :]
        p = [cosine(models[i], positive_data[j]) for j in range(positive_data.shape[0])]
        positive_scores = numpy.hstack((positive_scores, p))

        # negative scoring
        indexes = probe_labels != data_shuffler.possible_labels[i]
        negative_data = probe_features[indexes, :]
        n = [cosine(models[i], negative_data[j]) for j in range(negative_data.shape[0])]
        negative_scores = numpy.hstack((negative_scores, n))

    threshold = bob.measure.eer_threshold((-1) * negative_scores, (-1) * positive_scores)
    far, frr = bob.measure.farfrr((-1) * negative_scores, (-1) * positive_scores, threshold)

    return (far + frr) / 2.


def test_cnn_trainer():
78

Tiago Pereira's avatar
Tiago Pereira committed
79
    # Loading data
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
80
81
82
83
84
    train_data, train_labels, validation_data, validation_labels = load_mnist()
    train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
    validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1))

    # Creating datashufflers
85
    data_augmentation = ImageAugmentation()
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
86
    train_data_shuffler = Memory(train_data, train_labels,
Tiago Pereira's avatar
Tiago Pereira committed
87
                                 input_shape=[None, 28, 28, 1],
88
                                 batch_size=batch_size,
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
89
90
                                 data_augmentation=data_augmentation,
                                 normalizer=ScaleFactor())
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
91

92
93
94
95
    directory = "./temp/cnn"

    # Loss for the softmax
    loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
96

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
97
98
99
    # Preparing the architecture
    architecture = Chopra(seed=seed,
                          fc1_output=10)
Tiago Pereira's avatar
Tiago Pereira committed
100
101
102
    input_pl = train_data_shuffler("data", from_queue=True)
    graph = architecture(input_pl)
    embedding = Embedding(train_data_shuffler("data", from_queue=False), graph)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
103

104
    # One graph trainer
Tiago Pereira's avatar
Tiago Pereira committed
105
    trainer = Trainer(train_data_shuffler,
106
107
                      iterations=iterations,
                      analizer=None,
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
108
109
                      temp_dir=directory
                      )
Tiago Pereira's avatar
Tiago Pereira committed
110
111
112
113
114
115
116
117
118
    trainer.create_network_from_scratch(graph=graph,
                                        loss=loss,
                                        learning_rate=constant(0.01, name="regular_lr"),
                                        optimizer=tf.train.GradientDescentOptimizer(0.01),
                                        )
    trainer.train()
    #trainer.train(validation_data_shuffler)

    # Using embedding to compute the accuracy
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
119
    accuracy = validate_network(embedding, validation_data, validation_labels)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
120
    # At least 80% of accuracy
Tiago Pereira's avatar
Tiago Pereira committed
121
    assert accuracy > 50.
122
    shutil.rmtree(directory)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
123
    del trainer
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
124
    del graph
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
125

126

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def test_lightcnn_trainer():

    # generating fake data
    train_data = numpy.random.normal(0, 0.2, size=(100, 128, 128, 1))
    train_data = numpy.vstack((train_data, numpy.random.normal(2, 0.2, size=(100, 128, 128, 1))))
    train_labels = numpy.hstack((numpy.zeros(100), numpy.ones(100))).astype("uint64")
    
    validation_data = numpy.random.normal(0, 0.2, size=(100, 128, 128, 1))
    validation_data = numpy.vstack((validation_data, numpy.random.normal(2, 0.2, size=(100, 128, 128, 1))))
    validation_labels = numpy.hstack((numpy.zeros(100), numpy.ones(100))).astype("uint64")


    # Creating datashufflers
    data_augmentation = ImageAugmentation()
    train_data_shuffler = Memory(train_data, train_labels,
                                 input_shape=[None, 128, 128, 1],
                                 batch_size=batch_size,
                                 data_augmentation=data_augmentation,
                                 normalizer=Linear())

    directory = "./temp/cnn"

    # Loss for the softmax
    loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)

    # Preparing the architecture
    architecture = LightCNN9(seed=seed,
                             n_classes=2)
    input_pl = train_data_shuffler("data", from_queue=True)
    graph = architecture(input_pl)
    embedding = Embedding(train_data_shuffler("data", from_queue=False), graph)

    # One graph trainer
    trainer = Trainer(train_data_shuffler,
161
                      iterations=5,
162
163
164
165
166
                      analizer=None,
                      temp_dir=directory
                      )
    trainer.create_network_from_scratch(graph=graph,
                                        loss=loss,
167
168
                                        learning_rate=constant(0.001, name="regular_lr"),
                                        optimizer=tf.train.GradientDescentOptimizer(0.001),
169
170
171
172
173
174
                                        )
    trainer.train()
    #trainer.train(validation_data_shuffler)

    # Using embedding to compute the accuracy
    accuracy = validate_network(embedding, validation_data, validation_labels, input_shape=[None, 128, 128, 1], normalizer=Linear())
175
    assert True
176
177
178
    shutil.rmtree(directory)
    del trainer
    del graph
179

180
181


Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
182
183
184
185
186
187
188
189

def test_siamesecnn_trainer():
    train_data, train_labels, validation_data, validation_labels = load_mnist()
    train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
    validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1))

    # Creating datashufflers
    train_data_shuffler = SiameseMemory(train_data, train_labels,
Tiago Pereira's avatar
Tiago Pereira committed
190
191
192
                                        input_shape=[None, 28, 28, 1],
                                        batch_size=batch_size,
                                        normalizer=ScaleFactor())
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
193
    validation_data_shuffler = SiameseMemory(validation_data, validation_labels,
Tiago Pereira's avatar
Tiago Pereira committed
194
195
196
                                             input_shape=[None, 28, 28, 1],
                                             batch_size=validation_batch_size,
                                             normalizer=ScaleFactor())
197
    directory = "./temp/siamesecnn"
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
198

199
200
    # Preparing the architecture
    architecture = Chopra(seed=seed, fc1_output=10)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
201

202
203
    # Loss for the Siamese
    loss = ContrastiveLoss(contrastive_margin=4.)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
204

Tiago Pereira's avatar
Tiago Pereira committed
205
    input_pl = train_data_shuffler("data")
Tiago Pereira's avatar
Tiago Pereira committed
206
    graph = dict()
Tiago Pereira's avatar
Tiago Pereira committed
207
    graph['left'] = architecture(input_pl['left'])
208
    graph['right'] = architecture(input_pl['right'], reuse=True)
Tiago Pereira's avatar
Tiago Pereira committed
209
210

    trainer = SiameseTrainer(train_data_shuffler,
211
212
                             iterations=iterations,
                             analizer=None,
Tiago Pereira's avatar
Tiago Pereira committed
213
                             temp_dir=directory)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
214

Tiago Pereira's avatar
Tiago Pereira committed
215
216
217
218
    trainer.create_network_from_scratch(graph=graph,
                                        loss=loss,
                                        learning_rate=constant(0.01, name="regular_lr"),
                                        optimizer=tf.train.GradientDescentOptimizer(0.01),)
Tiago Pereira's avatar
Tiago Pereira committed
219
    trainer.train()
Tiago Pereira's avatar
Tiago Pereira committed
220
221
222
    embedding = Embedding(train_data_shuffler("data", from_queue=False)['left'], graph['left'])
    eer = dummy_experiment(validation_data_shuffler, embedding)
    assert eer < 0.15
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
223
    shutil.rmtree(directory)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
224

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
225
226
    del architecture
    del trainer  # Just to clean tf.variables
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
227
228
229
230
231
232
233
234
235


def test_tripletcnn_trainer():
    train_data, train_labels, validation_data, validation_labels = load_mnist()
    train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
    validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1))

    # Creating datashufflers
    train_data_shuffler = TripletMemory(train_data, train_labels,
Tiago Pereira's avatar
Tiago Pereira committed
236
237
238
                                        input_shape=[None, 28, 28, 1],
                                        batch_size=batch_size,
                                        normalizer=ScaleFactor())
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
239
    validation_data_shuffler = TripletMemory(validation_data, validation_labels,
Tiago Pereira's avatar
Tiago Pereira committed
240
241
242
                                             input_shape=[None, 28, 28, 1],
                                             batch_size=validation_batch_size,
                                             normalizer=ScaleFactor())
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
243

244
    directory = "./temp/tripletcnn"
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
245

246
247
    # Preparing the architecture
    architecture = Chopra(seed=seed, fc1_output=10)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
248

249
250
    # Loss for the Siamese
    loss = TripletLoss(margin=4.)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
251

Tiago Pereira's avatar
Tiago Pereira committed
252
    input_pl = train_data_shuffler("data")
Tiago Pereira's avatar
Tiago Pereira committed
253
    graph = dict()
Tiago Pereira's avatar
Tiago Pereira committed
254
    graph['anchor'] = architecture(input_pl['anchor'])
255
256
    graph['positive'] = architecture(input_pl['positive'], reuse=True)
    graph['negative'] = architecture(input_pl['negative'], reuse=True)
Tiago Pereira's avatar
Tiago Pereira committed
257

258
    # One graph trainer
Tiago Pereira's avatar
Tiago Pereira committed
259
    trainer = TripletTrainer(train_data_shuffler,
260
261
                             iterations=iterations,
                             analizer=None,
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
262
263
                             temp_dir=directory
                             )
Tiago Pereira's avatar
Tiago Pereira committed
264
265
266
267
    trainer.create_network_from_scratch(graph=graph,
                                        loss=loss,
                                        learning_rate=constant(0.01, name="regular_lr"),
                                        optimizer=tf.train.GradientDescentOptimizer(0.01),)
268
269
    trainer.train(train_data_shuffler)

Tiago Pereira's avatar
Tiago Pereira committed
270
271
272
    embedding = Embedding(train_data_shuffler("data", from_queue=False)['anchor'], graph['anchor'])
    eer = dummy_experiment(validation_data_shuffler, embedding)
    assert eer < 0.15
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
273
    shutil.rmtree(directory)
Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
274

Tiago de Freitas Pereira's avatar
Tiago de Freitas Pereira committed
275
276
    del architecture
    del trainer  # Just to clean tf.variables
277