Independent learning rate

parent 615b9059
......@@ -14,9 +14,10 @@ class BaseLoss(object):
One exam
"""
def __init__(self, loss, operation):
def __init__(self, loss, operation, name="loss"):
self.loss = loss
self.operation = operation
self.name = name
def __call__(self, graph, label):
return self.operation(self.loss(graph, label))
return self.operation(self.loss(graph, label), name=self.name)
......@@ -92,7 +92,7 @@ class FaceNetSimple(SequenceNetwork):
self.add(Conv2D(name="conv1", kernel_size=conv1_kernel_size,
filters=conv1_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
stride=conv1_stride,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
......@@ -102,7 +102,7 @@ class FaceNetSimple(SequenceNetwork):
##########
self.add(Conv2D(name="conv2", kernel_size=conv2_kernel_size,
filters=conv2_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
......@@ -111,7 +111,7 @@ class FaceNetSimple(SequenceNetwork):
##########
self.add(Conv2D(name="conv3", kernel_size=conv3_kernel_size,
filters=conv3_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
......@@ -120,7 +120,7 @@ class FaceNetSimple(SequenceNetwork):
##########
self.add(Conv2D(name="conv4", kernel_size=conv4_kernel_size,
filters=conv4_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
......@@ -128,7 +128,7 @@ class FaceNetSimple(SequenceNetwork):
##########
self.add(Conv2D(name="conv5", kernel_size=conv5_kernel_size,
filters=conv5_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
......@@ -136,20 +136,20 @@ class FaceNetSimple(SequenceNetwork):
##########
self.add(Conv2D(name="conv6", kernel_size=conv6_kernel_size,
filters=conv6_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
self.add(MaxPooling(name="pooling6", shape=pool6_shape, strides=pool6_stride))
self.add(FullyConnected(name="fc1", output_dim=fc1_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
self.add(FullyConnected(name="fc2", output_dim=fc2_output,
activation=tf.nn.relu,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
......
......@@ -24,7 +24,7 @@ from .. import util
SEED = 10
from bob.learn.tensorflow.datashuffler import TripletDisk, TripletWithSelectionDisk, TripletWithFastSelectionDisk
from bob.learn.tensorflow.network import Lenet, MLP, LenetDropout, VGG, Chopra, Dummy, FaceNet, FaceNetSimple
from bob.learn.tensorflow.trainers import SiameseTrainer, TripletTrainer
from bob.learn.tensorflow.trainers import SiameseTrainer, TripletTrainer, constant
from bob.learn.tensorflow.loss import ContrastiveLoss, TripletLoss
import numpy
......@@ -56,18 +56,10 @@ def main():
extension=".hdf5")
for o in train_objects]
#train_data_shuffler = TripletWithSelectionDisk(train_file_names, train_labels,
# input_shape=[125, 125, 3],
# batch_size=BATCH_SIZE)
#train_data_shuffler = TripletWithFastSelectionDisk(train_file_names, train_labels,
# input_shape=[112, 112, 3],
# batch_size=BATCH_SIZE)
train_data_shuffler = TripletDisk(train_file_names, train_labels,
input_shape=[112, 112, 3],
batch_size=BATCH_SIZE)
train_data_shuffler = TripletWithFastSelectionDisk(train_file_names, train_labels,
input_shape=[224, 224, 3],
batch_size=BATCH_SIZE,
total_identities=16)
# Preparing train set
......@@ -81,28 +73,21 @@ def main():
for o in validation_objects]
validation_data_shuffler = TripletDisk(validation_file_names, validation_labels,
input_shape=[112, 112, 3],
input_shape=[224, 224, 3],
batch_size=VALIDATION_BATCH_SIZE)
# Preparing the architecture
# LENET PAPER CHOPRA
#architecture = Chopra(seed=SEED)
architecture = FaceNet(seed=SEED, use_gpu=USE_GPU)
#loss = ContrastiveLoss(contrastive_margin=50.)
#optimizer = tf.train.GradientDescentOptimizer(0.00001)
#trainer = SiameseTrainer(architecture=architecture,
# loss=loss,
# iterations=ITERATIONS,
# snapshot=VALIDATION_TEST,
# optimizer=optimizer)
architecture = FaceNetSimple(seed=SEED, use_gpu=USE_GPU)
optimizer = tf.train.GradientDescentOptimizer(0.05)
loss = TripletLoss(margin=0.2)
trainer = TripletTrainer(architecture=architecture, loss=loss,
iterations=ITERATIONS,
base_learning_rate=0.05,
learning_rate=constant(),
optimizer=optimizer,
prefetch=False,
temp_dir="/idiap/temp/tpereira/CNN_MODELS/triplet-cnn-RANDOM-selection-gpu")
temp_dir="/idiap/temp/tpereira/CNN_MODELS_GRAD_DESC/triplet-cnn-selection-gpu")
#import ipdb; ipdb.set_trace()
#trainer.train(train_data_shuffler, validation_data_shuffler)
trainer.train(train_data_shuffler)
......@@ -7,7 +7,8 @@ import numpy
from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, Disk, SiameseDisk, TripletDisk, ImageAugmentation
from bob.learn.tensorflow.network import Chopra, Lenet
from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss
from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer
from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer, constant
# from ..analyzers import ExperimentAnalizer, SoftmaxAnalizer
from bob.learn.tensorflow.util import load_mnist
import tensorflow as tf
......@@ -119,7 +120,6 @@ def test_cnn_trainer():
# At least 80% of accuracy
assert accuracy > 80.
shutil.rmtree(directory)
session.close()
def test_siamesecnn_trainer():
......@@ -150,6 +150,7 @@ def test_siamesecnn_trainer():
iterations=iterations,
prefetch=False,
analizer=None,
learning_rate=constant(0.05, name="siamese_lr"),
temp_dir=directory)
trainer.train(train_data_shuffler)
......@@ -165,7 +166,6 @@ def test_siamesecnn_trainer():
# At least 80% of accuracy
assert eer < 0.25
shutil.rmtree(directory)
session.close()
def test_tripletcnn_trainer():
......@@ -196,6 +196,7 @@ def test_tripletcnn_trainer():
iterations=iterations,
prefetch=False,
analizer=None,
learning_rate=constant(0.05, name="triplet_lr"),
temp_dir=directory)
trainer.train(train_data_shuffler)
......@@ -211,4 +212,3 @@ def test_tripletcnn_trainer():
# At least 80% of accuracy
assert eer < 0.25
shutil.rmtree(directory)
session.close()
......@@ -4,17 +4,16 @@
# @date: Thu 13 Oct 2016 13:35 CEST
import numpy
from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, Disk, SiameseDisk, TripletDisk
from bob.learn.tensorflow.network import Chopra, MLP
from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss
from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer
from bob.learn.tensorflow.datashuffler import Memory
from bob.learn.tensorflow.network import MLP
from bob.learn.tensorflow.loss import BaseLoss
from bob.learn.tensorflow.trainers import Trainer, constant
# from ..analyzers import ExperimentAnalizer, SoftmaxAnalizer
from bob.learn.tensorflow.util import load_mnist
import tensorflow as tf
import bob.io.base
import os
import shutil
from scipy.spatial.distance import cosine
import bob.measure
"""
......@@ -52,6 +51,7 @@ def test_dnn_trainer():
iterations=iterations,
analizer=None,
prefetch=False,
learning_rate=constant(0.05, name="dnn_lr"),
temp_dir=directory)
trainer.train(train_data_shuffler)
......
......@@ -14,6 +14,7 @@ import bob.io.base
from .Trainer import Trainer
import os
import sys
from .learning_rate import constant
class SiameseTrainer(Trainer):
......@@ -48,9 +49,10 @@ class SiameseTrainer(Trainer):
temp_dir="cnn",
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
decay_steps=1000,
#base_learning_rate=0.001,
#weight_decay=0.9,
#decay_steps=1000,
learning_rate=constant(),
###### training options ##########
convergence_threshold=0.01,
......@@ -74,9 +76,10 @@ class SiameseTrainer(Trainer):
temp_dir=temp_dir,
# Learning rate
base_learning_rate=base_learning_rate,
weight_decay=weight_decay,
decay_steps=decay_steps,
#base_learning_rate=base_learning_rate,
#weight_decay=weight_decay,
#decay_steps=decay_steps,
learning_rate=learning_rate,
###### training options ##########
convergence_threshold=convergence_threshold,
......
......@@ -13,6 +13,7 @@ from ..analyzers import SoftmaxAnalizer
from tensorflow.core.framework import summary_pb2
import time
from bob.learn.tensorflow.datashuffler.OnlineSampling import OnLineSampling
from .learning_rate import constant
#os.environ["CUDA_VISIBLE_DEVICES"] = "1,3,0,2"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
......@@ -51,9 +52,7 @@ class Trainer(object):
temp_dir="cnn",
# Learning rate
base_learning_rate=0.1,
weight_decay=0.9,
decay_steps=1000,
learning_rate=constant(),
###### training options ##########
convergence_threshold=0.01,
......@@ -79,9 +78,10 @@ class Trainer(object):
self.loss = loss
self.temp_dir = temp_dir
self.base_learning_rate = base_learning_rate
self.weight_decay = weight_decay
self.decay_steps = decay_steps
#self.base_learning_rate = base_learning_rate
self.learning_rate = learning_rate
#self.weight_decay = weight_decay
#self.decay_steps = decay_steps
self.iterations = iterations
self.snapshot = snapshot
......@@ -92,7 +92,6 @@ class Trainer(object):
# Training variables used in the fit
self.optimizer = None
self.training_graph = None
self.learning_rate = None
self.training_graph = None
self.train_data_shuffler = None
self.summaries_train = None
......@@ -271,13 +270,14 @@ class Trainer(object):
# TODO: find an elegant way to provide this as a parameter of the trainer
self.global_step = tf.Variable(0, trainable=False)
self.learning_rate = tf.train.exponential_decay(
learning_rate=self.base_learning_rate, # Learning rate
global_step=self.global_step,
decay_steps=self.decay_steps,
decay_rate=self.weight_decay, # Decay step
staircase=False
)
#self.learning_rate = tf.Variable(self.base_learning_rate)
#self.learning_rate = tf.train.exponential_decay(
# learning_rate=self.base_learning_rate, # Learning rate
# global_step=self.global_step,
# decay_steps=self.decay_steps,
# decay_rate=self.weight_decay, # Decay step
# staircase=False
#)
self.training_graph = self.compute_graph(train_data_shuffler, prefetch=self.prefetch, name="train")
# Preparing the optimizer
......@@ -291,6 +291,7 @@ class Trainer(object):
config = tf.ConfigProto(log_device_placement=True)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as session:
tf.initialize_all_variables().run()
......@@ -349,5 +350,3 @@ class Trainer(object):
# now they should definetely stop
self.thread_pool.request_stop()
self.thread_pool.join(threads)
session.close() # For some reason the session is not closed after the context manager finishes
......@@ -15,6 +15,7 @@ import bob.io.base
from .Trainer import Trainer
import os
import sys
from .learning_rate import constant
class TripletTrainer(Trainer):
......@@ -48,9 +49,10 @@ class TripletTrainer(Trainer):
temp_dir="cnn",
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
decay_steps=1000,
#base_learning_rate=0.001,
#weight_decay=0.9,
#decay_steps=1000,
learning_rate=constant(),
###### training options ##########
convergence_threshold=0.01,
......@@ -74,9 +76,10 @@ class TripletTrainer(Trainer):
temp_dir=temp_dir,
# Learning rate
base_learning_rate=base_learning_rate,
weight_decay=weight_decay,
decay_steps=decay_steps,
#base_learning_rate=base_learning_rate,
#weight_decay=weight_decay,
#decay_steps=decay_steps,
learning_rate=learning_rate,
###### training options ##########
convergence_threshold=convergence_threshold,
......
......@@ -5,7 +5,7 @@ __path__ = extend_path(__path__, __name__)
from .Trainer import Trainer
from .SiameseTrainer import SiameseTrainer
from .TripletTrainer import TripletTrainer
from .learning_rate import exponential_decay, constant
import numpy
......
import tensorflow as tf
def exponential_decay(base_learning_rate=0.05,
decay_steps=1000,
weight_decay=0.9,
staircase=False):
global_step = tf.Variable(0, trainable=False)
return tf.train.exponential_decay(base_learning_rate=base_learning_rate,
global_step=global_step,
decay_steps=decay_steps,
decay_rate=weight_decay,
staircase=staircase
)
def constant(base_learning_rate=0.05, name="constant_learning_rate"):
return tf.Variable(base_learning_rate, name=name)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment