diff --git a/bob/learn/tensorflow/__init__.py b/bob/learn/tensorflow/__init__.py index 34746c635541bd580ceca2934a08fb18e32b1b01..52d02b3c7946ba0d38305a180923e0b21d79d971 100644 --- a/bob/learn/tensorflow/__init__.py +++ b/bob/learn/tensorflow/__init__.py @@ -3,4 +3,8 @@ def get_config(): Returns a string containing the configuration information. """ import bob.extension - return bob.extension.get_config(__name__) \ No newline at end of file + return bob.extension.get_config(__name__) + + +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] \ No newline at end of file diff --git a/bob/learn/tensorflow/analyzers/ExperimentAnalizer.py b/bob/learn/tensorflow/analyzers/ExperimentAnalizer.py index 56ca885025b41c8c0689700775dfaebeba92e922..3c3a3d63ceb5f0c0437b470bdbd05513ba8baf3e 100644 --- a/bob/learn/tensorflow/analyzers/ExperimentAnalizer.py +++ b/bob/learn/tensorflow/analyzers/ExperimentAnalizer.py @@ -11,7 +11,7 @@ import bob.measure from tensorflow.core.framework import summary_pb2 from scipy.spatial.distance import cosine -from bob.learn.tensorflow.datashuffler import Memory, Disk +from ..datashuffler import Memory, Disk class ExperimentAnalizer: diff --git a/bob/learn/tensorflow/datashuffler/Base.py b/bob/learn/tensorflow/datashuffler/Base.py index 47d18f005609d2d7f043e31f73db91554ddc7f2e..dab2e9ba672610cac2acfbb4fa756c8b55c5306e 100644 --- a/bob/learn/tensorflow/datashuffler/Base.py +++ b/bob/learn/tensorflow/datashuffler/Base.py @@ -7,7 +7,7 @@ import numpy import tensorflow as tf import bob.ip.base import numpy -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class Base(object): diff --git a/bob/learn/tensorflow/datashuffler/Disk.py b/bob/learn/tensorflow/datashuffler/Disk.py index d72c80c77cfc3ee768b40f0be06efebaded76f0c..8dd6059f2b4a980ad12b2e8bc77d979de4c8e835 100644 --- a/bob/learn/tensorflow/datashuffler/Disk.py +++ b/bob/learn/tensorflow/datashuffler/Disk.py @@ -11,7 +11,7 @@ import bob.core from .Base import Base logger = bob.core.log.setup("bob.learn.tensorflow") -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class Disk(Base): diff --git a/bob/learn/tensorflow/datashuffler/DiskAudio.py b/bob/learn/tensorflow/datashuffler/DiskAudio.py index 76e936ab767b4766528318300a6bf04303b0c810..2f5ee83f1fe7c7e419cdd25a7d76f5183bc8af4e 100644 --- a/bob/learn/tensorflow/datashuffler/DiskAudio.py +++ b/bob/learn/tensorflow/datashuffler/DiskAudio.py @@ -4,13 +4,15 @@ # @date: Wed 19 Oct 23:43:22 2016 import numpy -import bob.core from .Base import Base +#import time + from scipy.io.wavfile import read as readWAV -logger = bob.core.log.setup("bob.learn.tensorflow") -logger.propagate = False +# logger = bob.core.log.setup("bob.learn.tensorflow") +import logging +logger = logging.getLogger("bob.learn.tensorflow") class DiskAudio(Base): @@ -22,7 +24,7 @@ class DiskAudio(Base): context_size=20, win_length_ms=10, rate=16000, - out_file="" + out_file="temp.txt" ): """ This datashuffler deals with speech databases that are stored in the disk. @@ -32,7 +34,7 @@ class DiskAudio(Base): self.out_file = out_file self.context_size = context_size self.win_length_ms = win_length_ms - self.m_win_length = self.win_length_ms * rate / 1000 # number of values in a given window + self.m_win_length = int(self.win_length_ms * rate / 1000) # number of values in a given window self.m_frame_length = self.m_win_length * (2 * self.context_size + 1) input_shape = [self.m_frame_length, 1] @@ -55,10 +57,18 @@ class DiskAudio(Base): # Seting the seed numpy.random.seed(seed) +# self.max_queue_size = 20000 + # a flexible queue that stores audio frames extracted from files +# self.frames_storage = queue.Queue(self.max_queue_size) self.frames_storage = [] # a similar queue for the corresponding labels self.labels_storage = [] + + self.indices = None + self.cur_index = 0 + self.data_finished = False + # if self.out_file != "": # bob.io.base.create_directories_safe(os.path.dirname(self.out_file)) # f = open(self.out_file, "w") @@ -66,7 +76,6 @@ class DiskAudio(Base): # f.write("%d %s\n" % (self.labels[i], str(self.data[i]))) # f.close() - def load_from_file(self, file_name): rate, audio = readWAV(file_name) # We consider there is only 1 channel in the audio file => data[0] @@ -74,33 +83,62 @@ class DiskAudio(Base): return rate, data - def get_batch(self, noise=False): + def randomized_indices(self, max_size): # Shuffling samples - indexes = numpy.array(range(self.data.shape[0])) - numpy.random.shuffle(indexes) + indices = numpy.array(range(max_size)) + numpy.random.shuffle(indices) + return indices + + def get_batch(self, noise=False): + # start = time.time() + if self.data_finished: + return None, None + + if self.indices is None or self.cur_index == 0: + self.indices = self.randomized_indices(self.data.shape[0]) + f = None if self.out_file != "": f = open(self.out_file, "a") - i = 0 + + i = self.cur_index # if not enough in the storage, we pre-load frames from the audio files - while len(self.frames_storage) < self.batch_size: + while len(self.labels_storage) < self.batch_size and i < self.indices.shape[0]: if f is not None: - f.write("%s\n" % self.data[indexes[i]]) - frames, labels = self.extract_frames_from_file(self.data[indexes[i]], self.labels[indexes[i]]) + f.write("%s\n" % self.data[self.indices[i]]) + frames, labels = self.extract_frames_from_file(self.data[self.indices[i]], self.labels[self.indices[i]]) self.frames_storage.extend(frames) self.labels_storage.extend(labels) i += 1 + self.cur_index = i + if f is not None: + f.close() + f= None + + # if we ran through the whole data already (we ignore the last incomplete batch) + # self.indices is a list of file names + # so, we want exit current datashuffling thread if cur_index reached the end of the file list + if self.cur_index >= self.indices.shape[0] and len(self.labels_storage) < self.batch_size: + # reset everything + self.frames_storage = [] + self.labels_storage = [] + self.cur_index = 0 + self.data_finished = True + return None, None + # our temp frame queue should have enough data selected_data = numpy.asarray(self.frames_storage[:self.batch_size]) selected_labels = numpy.asarray(self.labels_storage[:self.batch_size]) # remove them from the list del self.frames_storage[:self.batch_size] del self.labels_storage[:self.batch_size] + selected_data = numpy.reshape(selected_data, (self.batch_size, -1, 1)) - if f is not None: - f.close() - return [selected_data.astype("float32"), selected_labels.astype("int64")] + + # end = time.time() + # logger.info("Get Batch time = {0}".format(float(end - start))) + return [selected_data, selected_labels.astype("int64")] def extract_frames_from_file(self, filename, label): rate, wav_signal = self.load_from_file(filename) @@ -118,8 +156,11 @@ class DiskAudio(Base): # make sure the array is divided into equal chunks windows = numpy.split(wav_signal[:self.m_win_length * m_num_win], m_num_win) - final_frames = [] - final_labels = [label] * m_num_win + # final_frames = [] + # final_labels = [label] * m_num_win + + final_frames = numpy.empty([m_num_win, self.m_frame_length], dtype=numpy.float32) + final_labels = label * numpy.ones(m_num_win, dtype=numpy.int64) # loop through the windows for i, window in zip(range(0, len(windows)), windows): # window with surrounding context will form the frame we seek @@ -128,11 +169,13 @@ class DiskAudio(Base): # copy the first frame necessary number of times if i < self.context_size: left_context = numpy.tile(windows[0], self.context_size - i) - final_frames.append(numpy.append(left_context, windows[:i + self.context_size + 1])) + # final_frames.append(numpy.append(left_context, windows[:i + self.context_size + 1])) + final_frames[i, :] = numpy.append(left_context, windows[:i + self.context_size + 1]) elif (i + self.context_size) > (m_num_win - 1): right_context = numpy.tile(windows[-1], i + self.context_size - m_num_win + 1) - final_frames.append(numpy.append(windows[i - self.context_size:], right_context)) + # final_frames.append(numpy.append(windows[i - self.context_size:], right_context)) + final_frames[i, :] = numpy.append(windows[i - self.context_size:], right_context) else: - final_frames.append(numpy.ravel(windows[i - self.context_size:i + self.context_size + 1])) - + # final_frames.append(numpy.ravel(windows[i - self.context_size:i + self.context_size + 1])) + final_frames[i, :] = numpy.ravel(windows[i - self.context_size:i + self.context_size + 1]) return final_frames, final_labels diff --git a/bob/learn/tensorflow/datashuffler/Memory.py b/bob/learn/tensorflow/datashuffler/Memory.py index b184efc1643e34d5a97a96bcc32e9ed935fd9580..0f0bcf1d86e6f7c411f94c2ee963bafd1ce462dd 100644 --- a/bob/learn/tensorflow/datashuffler/Memory.py +++ b/bob/learn/tensorflow/datashuffler/Memory.py @@ -5,7 +5,7 @@ import numpy from .Base import Base -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear import tensorflow as tf diff --git a/bob/learn/tensorflow/datashuffler/OnlineSampling.py b/bob/learn/tensorflow/datashuffler/OnlineSampling.py index d9aab7e1e36ee85f3500f55e3766c3f5ffbd73a3..f7f27601dfe4855091554eabcdc1b37e52f84e79 100644 --- a/bob/learn/tensorflow/datashuffler/OnlineSampling.py +++ b/bob/learn/tensorflow/datashuffler/OnlineSampling.py @@ -5,7 +5,7 @@ import tensorflow as tf from .Base import Base -from bob.learn.tensorflow.network import SequenceNetwork +from ..network import SequenceNetwork class OnlineSampling(object): diff --git a/bob/learn/tensorflow/datashuffler/SiameseDisk.py b/bob/learn/tensorflow/datashuffler/SiameseDisk.py index 21645b2b4c01a81d5c2bcdd73a307eea23e63614..754052589463802bab9b78568a9dd76a65f0d3d2 100644 --- a/bob/learn/tensorflow/datashuffler/SiameseDisk.py +++ b/bob/learn/tensorflow/datashuffler/SiameseDisk.py @@ -10,7 +10,7 @@ logger = bob.core.log.setup("bob.learn.tensorflow") from .Disk import Disk from .Siamese import Siamese -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class SiameseDisk(Siamese, Disk): diff --git a/bob/learn/tensorflow/datashuffler/SiameseMemory.py b/bob/learn/tensorflow/datashuffler/SiameseMemory.py index 24e5749687a614615e9a11632a8d5fc51b6f5880..d45316f556eca63fcef6b931d959f4aed4ec37eb 100644 --- a/bob/learn/tensorflow/datashuffler/SiameseMemory.py +++ b/bob/learn/tensorflow/datashuffler/SiameseMemory.py @@ -8,7 +8,7 @@ import numpy from .Memory import Memory from .Siamese import Siamese import tensorflow as tf -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class SiameseMemory(Siamese, Memory): diff --git a/bob/learn/tensorflow/datashuffler/TripletDisk.py b/bob/learn/tensorflow/datashuffler/TripletDisk.py index 102c1f5cd3605f3a8c596389b6d200ceab6b89df..67c0ece93f4c0595c82f756715d3579641ad3852 100644 --- a/bob/learn/tensorflow/datashuffler/TripletDisk.py +++ b/bob/learn/tensorflow/datashuffler/TripletDisk.py @@ -14,7 +14,7 @@ import tensorflow as tf from .Disk import Disk from .Triplet import Triplet -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class TripletDisk(Triplet, Disk): diff --git a/bob/learn/tensorflow/datashuffler/TripletMemory.py b/bob/learn/tensorflow/datashuffler/TripletMemory.py index e37522884ae8e9e5539e5cacce72f3ee5b2b0725..cc1bc61d13583d5b58ff1f84458e083c9ff5d94d 100644 --- a/bob/learn/tensorflow/datashuffler/TripletMemory.py +++ b/bob/learn/tensorflow/datashuffler/TripletMemory.py @@ -8,7 +8,7 @@ import tensorflow as tf from .Memory import Memory from .Triplet import Triplet -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class TripletMemory(Triplet, Memory): diff --git a/bob/learn/tensorflow/datashuffler/TripletWithFastSelectionDisk.py b/bob/learn/tensorflow/datashuffler/TripletWithFastSelectionDisk.py index 8a9cde8f5846a292db0c72273d15be3a4831b8c8..11673c9011738782c7fa3559916996aca304dfdd 100644 --- a/bob/learn/tensorflow/datashuffler/TripletWithFastSelectionDisk.py +++ b/bob/learn/tensorflow/datashuffler/TripletWithFastSelectionDisk.py @@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist import logging logger = logging.getLogger("bob.learn") -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class TripletWithFastSelectionDisk(Triplet, Disk, OnlineSampling): diff --git a/bob/learn/tensorflow/datashuffler/TripletWithSelectionDisk.py b/bob/learn/tensorflow/datashuffler/TripletWithSelectionDisk.py index b5cc73001eba038a6775177c87990a1a8fc00446..cab01c987c1917964137f94a8ea698127a20acdf 100644 --- a/bob/learn/tensorflow/datashuffler/TripletWithSelectionDisk.py +++ b/bob/learn/tensorflow/datashuffler/TripletWithSelectionDisk.py @@ -14,7 +14,7 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear import logging logger = logging.getLogger("bob.learn.tensorflow") -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear class TripletWithSelectionDisk(Triplet, Disk, OnlineSampling): diff --git a/bob/learn/tensorflow/datashuffler/TripletWithSelectionMemory.py b/bob/learn/tensorflow/datashuffler/TripletWithSelectionMemory.py index d04a483a6ecca46592c27d40c84710313f59740a..c3f8434fea9745b7c848120ecae791458a1bc6d3 100644 --- a/bob/learn/tensorflow/datashuffler/TripletWithSelectionMemory.py +++ b/bob/learn/tensorflow/datashuffler/TripletWithSelectionMemory.py @@ -9,7 +9,7 @@ import tensorflow as tf from .OnlineSampling import OnlineSampling from .Memory import Memory from .Triplet import Triplet -from bob.learn.tensorflow.datashuffler.Normalizer import Linear +from .Normalizer import Linear from scipy.spatial.distance import euclidean, cdist import logging diff --git a/bob/learn/tensorflow/datashuffler/__init__.py b/bob/learn/tensorflow/datashuffler/__init__.py index de255be1a22658b569af600d1ad6ec2f7f50a143..a9e753ce7114db5c09aefb16f5215439c92118e8 100644 --- a/bob/learn/tensorflow/datashuffler/__init__.py +++ b/bob/learn/tensorflow/datashuffler/__init__.py @@ -1,4 +1,5 @@ # see https://docs.python.org/3/library/pkgutil.html +from .Normalizer import ScaleFactor, MeanOffset, Linear from .Base import Base from .OnlineSampling import OnlineSampling from .Siamese import Siamese @@ -18,8 +19,6 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk from .DataAugmentation import DataAugmentation from .ImageAugmentation import ImageAugmentation -from .Normalizer import ScaleFactor, MeanOffset, Linear - from .DiskAudio import DiskAudio # gets sphinx autodoc done right - don't remove it diff --git a/bob/learn/tensorflow/layers/Conv1D.py b/bob/learn/tensorflow/layers/Conv1D.py index fb9250c4959973b007f4ad89b12cd48104b5c372..af16b47783110f960f1292b3564e8efc8b7ce39a 100644 --- a/bob/learn/tensorflow/layers/Conv1D.py +++ b/bob/learn/tensorflow/layers/Conv1D.py @@ -70,13 +70,19 @@ class Conv1D(Layer): self.init_value = init_value def create_variables(self, input_layer): - self.input_layer = input_layer # TODO: Do an assert here - if len(input_layer.get_shape().as_list()) != 3: - raise ValueError("The input as a convolutional layer must have 3 dimensions, " - "but {0} were provided".format(len(input_layer.get_shape().as_list()))) - n_channels = input_layer.get_shape().as_list()[2] + input_shape = input_layer.get_shape().as_list() + if len(input_shape) != 3: + if len(input_shape) == 4: + self.input_layer = tf.reshape(input_layer, [-1, input_shape[2], input_shape[3]]) + else: + raise ValueError("The input as a convolutional layer must have 3 dimensions, " + "but {0} were provided".format(len(input_layer.get_shape().as_list()))) + else: + self.input_layer = input_layer + print("Conv1 layer shape: ", self.input_layer.get_shape().as_list()) + n_channels = self.input_layer.get_shape().as_list()[2] if self.W is None: if self.init_value is None: diff --git a/bob/learn/tensorflow/layers/FullyConnected.py b/bob/learn/tensorflow/layers/FullyConnected.py index e7806145f6e29475f003bb843d4d3a3553e84cb1..6ba1f59f82183af3baa1294808d89714e4692ba1 100644 --- a/bob/learn/tensorflow/layers/FullyConnected.py +++ b/bob/learn/tensorflow/layers/FullyConnected.py @@ -101,6 +101,7 @@ class FullyConnected(Layer): else: fc = self.input_layer + print("FC layer shape: ", fc.get_shape().as_list()) if self.batch_norm: fc = self.batch_normalize(fc, training_phase) diff --git a/bob/learn/tensorflow/layers/LogSoftMax.py b/bob/learn/tensorflow/layers/LogSoftMax.py new file mode 100644 index 0000000000000000000000000000000000000000..128ee02dad7362239ee30dcd6f919dc5db8aa972 --- /dev/null +++ b/bob/learn/tensorflow/layers/LogSoftMax.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> +# @date: Wed 11 May 2016 17:38 CEST + +import tensorflow as tf +from .Layer import Layer + + +class LogSoftMax(Layer): + """ + Wraps the tensorflow Log_softmax + + **Parameters** + + name: str + The name of the layer + + stride: + Shape of the stride + + batch_norm: bool + Do batch norm? + + activation: bool + Tensor Flow activation + + """ + + def __init__(self, name, + batch_norm=False, + activation=None, + use_gpu=False): + super(LogSoftMax, self).__init__(name, use_gpu=use_gpu, activation=activation, batch_norm=batch_norm) + + def create_variables(self, input_layer): + self.input_layer = input_layer + return + + def get_graph(self, training_phase=True): + with tf.name_scope(str(self.name)): + output = tf.nn.log_softmax(self.input_layer) + + if self.batch_norm: + output = self.batch_normalize(output, training_phase) + + if self.activation is not None: + output = self.activation(output) + + return output diff --git a/bob/learn/tensorflow/layers/MaxPooling.py b/bob/learn/tensorflow/layers/MaxPooling.py index 321a5cf2ab27f7710845c60eb42d722caea530b7..e9922189388296770f971c0b4070161def109308 100644 --- a/bob/learn/tensorflow/layers/MaxPooling.py +++ b/bob/learn/tensorflow/layers/MaxPooling.py @@ -39,7 +39,12 @@ class MaxPooling(Layer): self.strides = strides def create_variables(self, input_layer): - self.input_layer = input_layer + if len(input_layer.get_shape()) == 3: + shape = input_layer.get_shape().as_list() + self.input_layer = tf.reshape(input_layer, [-1, 1, shape[1], shape[2]]) + else: + self.input_layer = input_layer + print("MaxPooling layer shape: ", self.input_layer.get_shape().as_list()) return def get_graph(self, training_phase=True): diff --git a/bob/learn/tensorflow/layers/__init__.py b/bob/learn/tensorflow/layers/__init__.py index 96804ad1435b5f7c5ef5d4bd6fedfbce2677085b..40b28599d657f85921cd1d2c37999f83eea96e52 100644 --- a/bob/learn/tensorflow/layers/__init__.py +++ b/bob/learn/tensorflow/layers/__init__.py @@ -7,6 +7,7 @@ from .MaxPooling import MaxPooling from .AveragePooling import AveragePooling from .Dropout import Dropout from .InputLayer import InputLayer +from .LogSoftMax import LogSoftMax # gets sphinx autodoc done right - don't remove it @@ -32,6 +33,7 @@ __appropriate__( AveragePooling, Dropout, InputLayer, + LogSoftMax, ) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/learn/tensorflow/loss/NegLogLoss.py b/bob/learn/tensorflow/loss/NegLogLoss.py index b4da1601b5977143d62cadd6fd853bf3a41fc100..d67982cf5065d8b6f69773f1dcd7b571361532f3 100644 --- a/bob/learn/tensorflow/loss/NegLogLoss.py +++ b/bob/learn/tensorflow/loss/NegLogLoss.py @@ -35,9 +35,10 @@ class NegLogLoss(BaseLoss): def __call__(self, graph, label): # get the log-probabilities with log softmax - log_probabilities = tf.nn.log_softmax(graph) + # removed it since we have a LogSoftMax layer now + # log_probabilities = tf.nn.log_softmax(graph) # negative of the log-probability that correspond to the correct label - correct_probabilities = self.gather_nd(log_probabilities, label) + correct_probabilities = self.gather_nd(graph, label) neg_log_prob = tf.negative(correct_probabilities) # use negative log likelihood as the loss - return self.operation(neg_log_prob) + return self.operation(neg_log_prob) \ No newline at end of file diff --git a/bob/learn/tensorflow/network/DeeperAudio.py b/bob/learn/tensorflow/network/DeeperAudio.py new file mode 100644 index 0000000000000000000000000000000000000000..01b874038e76436da21972cc2a870d3ed807ed46 --- /dev/null +++ b/bob/learn/tensorflow/network/DeeperAudio.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Thu 06 April 2017 09:39:36 CEST + +""" +A deeper 5-layers architecture for audio data +""" + +import tensorflow as tf +from .SequenceNetwork import SequenceNetwork +from ..layers import Conv1D, FullyConnected, LogSoftMax, MaxPooling +from bob.learn.tensorflow.initialization import Uniform + + +# construct HardTanh activation function +def hard_tanh(x, name=None): + one = tf.constant(1, dtype=tf.float32) + neg_one = tf.constant(-1, dtype=tf.float32) + return tf.minimum(tf.maximum(x, neg_one), one) + + +class DeeperAudio(SequenceNetwork): + + def __init__(self, + conv1_kernel_size=160, + conv1_output=32, + conv1_stride=20, + + pooling_shape=[1, 1, 2, 1], + pooling_stride=[1, 1, 2, 1], + + conv2_kernel_size=32, + conv2_output=64, + conv2_stride=2, + + conv3_kernel_size=1, + conv3_output=64, + conv3_stride=1, + + fc1_output=60, + + n_classes=2, + default_feature_layer="fc2", + + seed=10, + use_gpu=False + ): + + super(DeeperAudio, self).__init__(default_feature_layer=default_feature_layer, + use_gpu=use_gpu) + + self.add(Conv1D(name="conv1", kernel_size=conv1_kernel_size, + filters=conv1_output, + stride=conv1_stride, + activation=hard_tanh, + weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), + bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), + use_gpu=use_gpu + )) + + self.add(MaxPooling(name="pooling1", shape=pooling_shape)) + + self.add(Conv1D(name="conv2", kernel_size=conv2_kernel_size, + filters=conv2_output, + stride=conv2_stride, + activation=hard_tanh, + weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), + bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), + use_gpu=use_gpu + )) + + self.add(MaxPooling(name="pooling2", shape=pooling_shape)) + + + self.add(Conv1D(name="conv3", kernel_size=conv3_kernel_size, + filters=conv3_output, + stride=conv3_stride, + activation=hard_tanh, + weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), + bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), + use_gpu=use_gpu + )) + + self.add(MaxPooling(name="pooling3", shape=pooling_shape)) + + self.add(FullyConnected(name="fc1", output_dim=fc1_output, + activation=hard_tanh, + weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), + bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), + use_gpu=use_gpu + )) + + self.add(FullyConnected(name="fc2", output_dim=n_classes, + activation=None, + weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), + bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), + use_gpu=use_gpu + )) + + self.add(LogSoftMax(name="logsoftmax", activation=None, use_gpu=use_gpu)) diff --git a/bob/learn/tensorflow/network/SequenceNetwork.py b/bob/learn/tensorflow/network/SequenceNetwork.py index 97f94a11a9a5a83273008421c406dd269858af7c..75606de7636eeafa648b4f5af2aaca5a84f00da3 100644 --- a/bob/learn/tensorflow/network/SequenceNetwork.py +++ b/bob/learn/tensorflow/network/SequenceNetwork.py @@ -10,8 +10,10 @@ import six import numpy import pickle +import bob.io.base + from collections import OrderedDict -from bob.learn.tensorflow.layers import Layer, MaxPooling, Dropout, Conv2D, FullyConnected +from bob.learn.tensorflow.layers import Layer, MaxPooling, Dropout, Conv2D, FullyConnected, LogSoftMax from bob.learn.tensorflow.utils.session import Session @@ -123,11 +125,11 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): # Feeding the placeholder if self.inference_placeholder is None: - self.compute_inference_placeholder(data.shape[1:]) + self.compute_inference_placeholder([None] + list(data.shape[1:])) feed_dict = {self.inference_placeholder: data} if self.inference_graph is None: - self.compute_inference_graph(self.inference_placeholder, feature_layer) + self.compute_inference_graph(feature_layer) embedding = session.run([self.inference_graph], feed_dict=feed_dict)[0] @@ -143,7 +145,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): variables = {} for k in self.sequence_net: # TODO: IT IS NOT SMART TESTING ALONG THIS PAGE - if not isinstance(self.sequence_net[k], MaxPooling) and not isinstance(self.sequence_net[k], Dropout): + if not isinstance(self.sequence_net[k], MaxPooling) and \ + not isinstance(self.sequence_net[k], Dropout) and \ + not isinstance(self.sequence_net[k], LogSoftMax): variables[self.sequence_net[k].W.name] = self.sequence_net[k].W variables[self.sequence_net[k].b.name] = self.sequence_net[k].b @@ -172,7 +176,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): for k in self.sequence_net.keys(): current_layer = self.sequence_net[k] - if not isinstance(self.sequence_net[k], MaxPooling) and not isinstance(self.sequence_net[k], Dropout): + if not isinstance(self.sequence_net[k], MaxPooling) and \ + not isinstance(self.sequence_net[k], Dropout) and \ + not isinstance(self.sequence_net[k], LogSoftMax): self.variable_summaries(current_layer.W, current_layer.name + '/weights') self.variable_summaries(current_layer.b, current_layer.name + '/bias') @@ -235,7 +241,7 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): # Saving the architecture if self.pickle_architecture is not None: hdf5.set('architecture', self.pickle_architecture) - hdf5.set('deployment_shape', self.deployment_shape) + # hdf5.set('deployment_shape', numpy.array(self.deployment_shape)) # Directory that stores the tensorflow variables hdf5.create_group('/tensor_flow') @@ -265,7 +271,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): hdf5.cd('/tensor_flow') for k in self.sequence_net: # TODO: IT IS NOT SMART TESTING ALONG THIS PAGE - if not isinstance(self.sequence_net[k], MaxPooling): + if not isinstance(self.sequence_net[k], MaxPooling) and \ + not isinstance(self.sequence_net[k], LogSoftMax): self.sequence_net[k].W.assign(hdf5.read(self.sequence_net[k].W.name)).eval(session=session) session.run(self.sequence_net[k].W) self.sequence_net[k].b.assign(hdf5.read(self.sequence_net[k].b.name)).eval(session=session) @@ -277,7 +284,7 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): hdf5.cd("..") - def load_hdf5(self, hdf5, shape=None, batch=1, use_gpu=False): + def load_hdf5(self, hdf5path, shape=None, batch=1, use_gpu=False): """ Load the network from scratch. This will build the graphs @@ -290,6 +297,7 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): batch: The size of the batch use_gpu: Load all the variables in the GPU? """ + hdf5 = bob.io.base.HDF5File(hdf5path) session = Session.instance().session @@ -299,19 +307,21 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): # Loading architecture self.sequence_net = pickle.loads(hdf5.read('architecture')) - self.deployment_shape = hdf5.read('deployment_shape') self.turn_gpu_onoff(use_gpu) if shape is None: + self.deployment_shape = hdf5.read('deployment_shape') shape = self.deployment_shape shape[0] = batch # Loading variables place_holder = tf.placeholder(tf.float32, shape=shape, name="load") self.compute_graph(place_holder) - tf.global_variables_initializer().run(session=session) - self.load_variables_only(hdf5, session) + tf.initialize_all_variables().run(session=session) + self.load_variables_only(hdf5) + + # self.pickle_net(shape) def save(self, saver, path): @@ -334,4 +344,4 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): self.inference_graph = tf.get_collection("inference_graph")[0] self.inference_placeholder = tf.get_collection("inference_placeholder")[0] - return saver + return saver \ No newline at end of file diff --git a/bob/learn/tensorflow/network/SimpleAudio.py b/bob/learn/tensorflow/network/SimpleAudio.py index 7c3460443d9a0f6ebe75958c310c0d3a6ff760ec..082a1197d4538b7f1dd5cdc52c8dbf1951cbd690 100644 --- a/bob/learn/tensorflow/network/SimpleAudio.py +++ b/bob/learn/tensorflow/network/SimpleAudio.py @@ -9,7 +9,7 @@ Class that creates the lenet architecture import tensorflow as tf from .SequenceNetwork import SequenceNetwork -from ..layers import Conv1D, FullyConnected +from ..layers import Conv1D, FullyConnected, LogSoftMax from bob.learn.tensorflow.initialization import Uniform @@ -60,4 +60,6 @@ class SimpleAudio(SequenceNetwork): weights_initialization=Uniform(seed=seed, use_gpu=use_gpu), bias_initialization=Uniform(seed=seed, use_gpu=use_gpu), use_gpu=use_gpu - )) \ No newline at end of file + )) + + self.add(LogSoftMax(name="logsoftmax", activation=None, use_gpu=use_gpu)) diff --git a/bob/learn/tensorflow/network/__init__.py b/bob/learn/tensorflow/network/__init__.py index e9d04042e596ed4550ead36d3cc08ffc604475c5..0caa8efa45d8575da96f84bc15d4d9f1642c6c29 100644 --- a/bob/learn/tensorflow/network/__init__.py +++ b/bob/learn/tensorflow/network/__init__.py @@ -9,6 +9,7 @@ from .FaceNetSimple import FaceNetSimple from .VGG16 import VGG16 from .VGG16_mod import VGG16_mod from .SimpleAudio import SimpleAudio +from .DeeperAudio import DeeperAudio # gets sphinx autodoc done right - don't remove it def __appropriate__(*args): @@ -36,6 +37,6 @@ __appropriate__( VGG16, VGG16_mod, SimpleAudio, + DeeperAudio, ) __all__ = [_ for _ in dir() if not _.startswith('_')] - diff --git a/bob/learn/tensorflow/trainers/Trainer.py b/bob/learn/tensorflow/trainers/Trainer.py index 8b980819c94148d57fa4a372e098a9b462db6efe..7d47bb97f8935df8e629f7860db1b40dd9abf580 100644 --- a/bob/learn/tensorflow/trainers/Trainer.py +++ b/bob/learn/tensorflow/trainers/Trainer.py @@ -12,7 +12,7 @@ import bob.core from ..analyzers import SoftmaxAnalizer from tensorflow.core.framework import summary_pb2 import time -from bob.learn.tensorflow.datashuffler import OnlineSampling +from ..datashuffler import OnlineSampling from bob.learn.tensorflow.utils.session import Session from .learning_rate import constant diff --git a/bob/learn/tensorflow/trainers/TrainerSeq.py b/bob/learn/tensorflow/trainers/TrainerSeq.py new file mode 100644 index 0000000000000000000000000000000000000000..2299c96fb0e310d608f681fac6448368568f9934 --- /dev/null +++ b/bob/learn/tensorflow/trainers/TrainerSeq.py @@ -0,0 +1,679 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Pavel Korshunov <pavel.korshunov@idiap.ch> +# @date: Thu 09 Mar 2017 15:25:22 CEST + +import tensorflow as tf +from ..network import SequenceNetwork +import threading +import os +import bob.io.base +import bob.core +from ..analyzers import SoftmaxAnalizer +from tensorflow.core.framework import summary_pb2 +import time +from ..datashuffler import OnlineSampling +from bob.learn.tensorflow.utils.session import Session +from .learning_rate import constant + +import numpy +logger = bob.core.log.setup("bob.learn.tensorflow") + + +class TrainerSeq(object): + """ + One graph trainer that trains in epochs (go through all data in one epoch) but by computing forward-backward + pass for each mini batch. + Use this trainer when your CNN consists of one graph, as oppose to Seamese or Triplet networks + + **Parameters** + + architecture: + The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork` + + optimizer: + One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html + + use_gpu: bool + Use GPUs in the training + + loss: :py:class:`bob.learn.tensorflow.loss.BaseLoss` + Loss function + + temp_dir: str + The output directory + + learning_rate: `bob.learn.tensorflow.trainers.learning_rate` + Initial learning rate + + convergence_threshold: + + iterations: int + Maximum number of iterations + + snapshot: int + Will take a snapshot of the network at every `n` iterations + + prefetch: bool + Use extra Threads to deal with the I/O + + model_from_file: str + If you want to use a pretrained model + + analizer: + Neural network analizer :py:mod:`bob.learn.tensorflow.analyzers` + + verbosity_level: + + """ + + def __init__(self, + architecture, + optimizer=tf.train.AdamOptimizer(), + use_gpu=False, + loss=None, + temp_dir="cnn", + + # Learning rate + learning_rate=constant(), + + ###### training options ########## + convergence_threshold=0.01, + iterations=5000, + snapshot=500, + validation_snapshot=100, + prefetch=False, + epochs=10, + + ## Analizer + analizer=SoftmaxAnalizer(), + + ### Pretrained model + model_from_file="", + + verbosity_level=2): + + if not isinstance(architecture, SequenceNetwork): + raise ValueError("`architecture` should be instance of `SequenceNetwork`") + + self.architecture = architecture + self.optimizer_class = optimizer + self.use_gpu = use_gpu + self.loss = loss + self.temp_dir = temp_dir + + self.learning_rate = learning_rate + + self.iterations = iterations + self.snapshot = snapshot + self.validation_snapshot = validation_snapshot + self.convergence_threshold = convergence_threshold + self.prefetch = prefetch + + self.epochs = epochs # how many epochs to run + + # Training variables used in the fit + self.optimizer = None + self.training_graph = None + self.train_data_shuffler = None + self.summaries_train = None + self.train_summary_writter = None + self.train_thread_pool = None + self.train_threads = None + + # Validation data + self.validation_graph = None + self.validation_data_shuffler = None + self.validation_summary_writter = None + self.valid_thread_pool = None + self.valid_threads = None + + # Analizer + self.analizer = analizer + + self.enqueue_op_train = None + self.enqueue_op_valid = None + self.global_epoch = None + self.threads_lock_train = threading.RLock() + self.threads_lock_valid = threading.RLock() + + + self.model_from_file = model_from_file + self.session = None + + bob.core.log.set_verbosity_level(logger, verbosity_level) + + def __del__(self): + tf.reset_default_graph() + + def compute_graph(self, data_shuffler, prefetch=False, name="", training=True): + """ + Computes the graph for the trainer. + + ** Parameters ** + + data_shuffler: Data shuffler + prefetch: Uses prefetch + name: Name of the graph + training: Is it a training graph? + """ + + # Defining place holders + if prefetch: + [placeholder_data, placeholder_labels] = data_shuffler.get_placeholders_forprefetch(name=name) + + # Defining a placeholder queue for prefetching + queue = tf.FIFOQueue(capacity=100000, + dtypes=[tf.float32, tf.int64], + shapes=[placeholder_data.get_shape().as_list()[1:], []]) + + # Fetching the place holders from the queue + if training: + self.enqueue_op_train = queue.enqueue_many([placeholder_data, placeholder_labels]) + else: + self.enqueue_op_valid = queue.enqueue_many([placeholder_data, placeholder_labels]) + feature_batch, label_batch = queue.dequeue_many(data_shuffler.batch_size) + + # Creating the architecture for train and validation + if not isinstance(self.architecture, SequenceNetwork): + raise ValueError("The variable `architecture` must be an instance of " + "`bob.learn.tensorflow.network.SequenceNetwork`") + else: + [feature_batch, label_batch] = data_shuffler.get_placeholders(name=name) + + # Creating graphs and defining the loss + network_graph = self.architecture.compute_graph(feature_batch, training=training) + graph = self.loss(network_graph, label_batch) + if not training: + return [network_graph, graph, label_batch] + + return graph + + def get_feed_dict(self, data_shuffler): + """ + Given a data shuffler prepared the dictionary to be injected in the graph + + ** Parameters ** + data_shuffler: + + """ + [data, labels] = data_shuffler.get_batch() + # when we run out of data + if data is None: + return None + + [data_placeholder, label_placeholder] = data_shuffler.get_placeholders() + + feed_dict = {data_placeholder: data, + label_placeholder: labels} + return feed_dict + + def fit(self): + """ + Run one iteration (`forward` and `backward`) + + ** Parameters ** + session: Tensorflow session + step: Iteration number + + """ + + if self.prefetch: + _, l, lr, summary = self.session.run([self.optimizer, self.training_graph, + self.learning_rate, self.summaries_train]) + else: + feed_dict = self.get_feed_dict(self.train_data_shuffler) + # if we run out of data + if feed_dict is None: + return None, None + _, l, lr, summary = self.session.run([self.optimizer, self.training_graph, + self.learning_rate, self.summaries_train], feed_dict=feed_dict) + return l, summary + + def compute_validation(self, data_shuffler): + """ + Computes the loss in the validation set + + ** Parameters ** + session: Tensorflow session + data_shuffler: The data shuffler to be used + step: Iteration number + + """ + if self.prefetch: + prediction, l, labels = self.session.run(self.validation_graph) + else: + # Opening a new session for validation + [data, labels] = data_shuffler.get_batch() + # when we run out of data + if data is None: + return None, None + + [data_placeholder, label_placeholder] = data_shuffler.get_placeholders() + + feed_dict = {data_placeholder: data, + label_placeholder: labels} + + prediction, l, labels = self.session.run(self.validation_graph, feed_dict=feed_dict) + + prediction = numpy.argmax(prediction, 1) + prediction_err = numpy.asarray([prediction != labels], dtype=numpy.int16) + + return prediction_err, l + + def create_general_summary(self): + """ + Creates a simple tensorboard summary with the value of the loss and learning rate + """ + # Train summary + tf.summary.scalar('loss', self.training_graph) + tf.summary.scalar('lr', self.learning_rate) + return tf.summary.merge_all() + + def start_thread(self, training=True): + """ + Start pool of train_threads for pre-fetching + + **Parameters** + session: Tensorflow session + """ + + threads = [] + for n in range(10): + if training: + t = threading.Thread(target=self.load_and_enqueue, args=()) + else: + t = threading.Thread(target=self.load_and_enqueue_valid, args=()) + t.daemon = True # thread will close when parent quits + t.start() + threads.append(t) + return threads + + def load_and_enqueue(self): + """ + Injecting data in the place holder queue + + **Parameters** + session: Tensorflow session + """ + + while not self.train_thread_pool.should_stop(): + with self.threads_lock_train: + [train_data, train_labels] = self.train_data_shuffler.get_batch() + + # if we run out of data, stop + if train_data is None or self.train_data_shuffler.data_finished: + # print("None data, exiting the thread") + self.train_thread_pool.request_stop() + # self.train_thread_pool.join(self.train_threads) + return + + [train_placeholder_data, train_placeholder_labels] = self.train_data_shuffler.get_placeholders() + + feed_dict = {train_placeholder_data: train_data, + train_placeholder_labels: train_labels} + + self.session.run(self.enqueue_op_train, feed_dict=feed_dict) + + def load_and_enqueue_valid(self): + """ + Injecting data in the place holder queue + + **Parameters** + session: Tensorflow session + """ + if self.validation_data_shuffler is None: + return + + while not self.valid_thread_pool.should_stop(): + with self.threads_lock_valid: + [valid_data, valid_labels] = self.validation_data_shuffler.get_batch() + + # if we run out of data, stop + if valid_data is None or self.validation_data_shuffler.data_finished: + # print("None validation data, exiting the thread") + self.valid_thread_pool.request_stop() + # self.valid_thread_pool.join(self.valid_threads) + return + + [valid_placeholder_data, valid_placeholder_labels] = self.validation_data_shuffler.get_placeholders() + + feed_dict = {valid_placeholder_data: valid_data, + valid_placeholder_labels: valid_labels} + + self.session.run(self.enqueue_op_valid, feed_dict=feed_dict) + + def bootstrap_graphs(self, train_data_shuffler, validation_data_shuffler): + """ + Create all the necessary graphs for training, validation and inference graphs + """ + + # Creating train graph + self.training_graph = self.compute_graph(train_data_shuffler, prefetch=self.prefetch, name="train") + tf.add_to_collection("training_graph", self.training_graph) + + # Creating inference graph + self.architecture.compute_inference_placeholder(train_data_shuffler.deployment_shape) + self.architecture.compute_inference_graph() + tf.add_to_collection("inference_placeholder", self.architecture.inference_placeholder) + tf.add_to_collection("inference_graph", self.architecture.inference_graph) + + # Creating validation graph + if validation_data_shuffler is not None: + self.validation_graph = self.compute_graph(validation_data_shuffler, prefetch=self.prefetch, + name="validation", training=False) + tf.add_to_collection("validation_graph", self.validation_graph) + + self.bootstrap_placeholders(train_data_shuffler, validation_data_shuffler) + + def bootstrap_placeholders(self, train_data_shuffler, validation_data_shuffler): + """ + Persist the placeholders + + ** Parameters ** + train_data_shuffler: Data shuffler for training + validation_data_shuffler: Data shuffler for validation + + """ + + # Persisting the placeholders + if self.prefetch: + batch, label = train_data_shuffler.get_placeholders_forprefetch("train") + else: + batch, label = train_data_shuffler.get_placeholders() + + tf.add_to_collection("train_placeholder_data", batch) + tf.add_to_collection("train_placeholder_label", label) + + # Creating validation graph + if validation_data_shuffler is not None: + if self.prefetch: + batch, label = validation_data_shuffler.get_placeholders_forprefetch("validation") + else: + batch, label = validation_data_shuffler.get_placeholders() + tf.add_to_collection("validation_placeholder_data", batch) + tf.add_to_collection("validation_placeholder_label", label) + + def bootstrap_graphs_fromhdf5file(self, train_data_shuffler, validation_data_shuffler): + + self.bootstrap_graphs(train_data_shuffler, validation_data_shuffler) + + # TODO: find an elegant way to provide this as a parameter of the trainer + self.global_epoch = tf.Variable(0, trainable=False, name="global_epoch") + tf.add_to_collection("global_epoch", self.global_epoch) + + # Preparing the optimizer + self.optimizer_class._learning_rate = self.learning_rate + self.optimizer = self.optimizer_class.minimize(self.training_graph, global_step=self.global_epoch) + tf.add_to_collection("optimizer", self.optimizer) + tf.add_to_collection("learning_rate", self.learning_rate) + + # Train summary + self.summaries_train = self.create_general_summary() + tf.add_to_collection("summaries_train", self.summaries_train) + + tf.add_to_collection("summaries_train", self.summaries_train) + + tf.initialize_all_variables().run(session=self.session) + + # Original tensorflow saver object + saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=30) + + self.architecture.load_hdf5(self.model_from_file, shape=[1, 6560, 1]) + # fname, _ = os.path.splitext(self.model_from_file) + # self.model_from_file = fname + '.ckp' + # self.architecture.save(saver, self.model_from_file) + return saver + + def bootstrap_graphs_fromfile(self, train_data_shuffler, validation_data_shuffler): + """ + Bootstrap all the necessary data from file + + ** Parameters ** + session: Tensorflow session + train_data_shuffler: Data shuffler for training + validation_data_shuffler: Data shuffler for validation + + + """ + saver = self.architecture.load(self.model_from_file, clear_devices=False) + + # Loading training graph + self.training_graph = tf.get_collection("training_graph")[0] + + # Loding other elements + self.optimizer = tf.get_collection("optimizer")[0] + self.learning_rate = tf.get_collection("learning_rate")[0] + self.summaries_train = tf.get_collection("summaries_train")[0] + self.global_epoch = tf.get_collection("global_epoch")[0] + + if validation_data_shuffler is not None: + self.validation_graph = tf.get_collection("validation_graph")[0] + + self.bootstrap_placeholders_fromfile(train_data_shuffler, validation_data_shuffler) + + return saver + + def bootstrap_placeholders_fromfile(self, train_data_shuffler, validation_data_shuffler): + """ + Load placeholders from file + + ** Parameters ** + + train_data_shuffler: Data shuffler for training + validation_data_shuffler: Data shuffler for validation + + """ + + train_data_shuffler.set_placeholders(tf.get_collection("train_placeholder_data")[0], + tf.get_collection("train_placeholder_label")[0]) + + if validation_data_shuffler is not None: + train_data_shuffler.set_placeholders(tf.get_collection("validation_placeholder_data")[0], + tf.get_collection("validation_placeholder_label")[0]) + + def launch_train_threads(self): + self.train_thread_pool = tf.train.Coordinator() + tf.train.start_queue_runners(coord=self.train_thread_pool, sess=self.session) + self.train_threads = self.start_thread() + + def launch_valid_threads(self): + self.valid_thread_pool = tf.train.Coordinator() + tf.train.start_queue_runners(coord=self.valid_thread_pool, sess=self.session) + self.valid_threads = self.start_thread(training=False) + + def train(self, train_data_shuffler, validation_data_shuffler=None): + """ + Train the network: + + ** Parameters ** + + train_data_shuffler: Data shuffler for training + validation_data_shuffler: Data shuffler for validation + """ + + # Creating directory + bob.io.base.create_directories_safe(self.temp_dir) + self.train_data_shuffler = train_data_shuffler + self.validation_data_shuffler = validation_data_shuffler + + logger.info("Initializing !!") + + # Pickle the architecture to save + self.architecture.pickle_net(train_data_shuffler.deployment_shape) + + Session.create() + self.session = Session.instance(new=True).session + + # Loading a pretrained model + if self.model_from_file != "": + logger.info("Loading pretrained model from {0}".format(self.model_from_file)) + if self.model_from_file.lower().endswith('.hdf5'): + saver = self.bootstrap_graphs_fromhdf5file(train_data_shuffler, validation_data_shuffler) + elif self.model_from_file.lower().endswith('.ckp'): + saver = self.bootstrap_graphs_fromfile(train_data_shuffler, validation_data_shuffler) + else: + raise ValueError("Unknown format of the model %s. Only HDF5 or pickled formats are supported" + % self.model_from_file) + + epoch = self.global_epoch.eval(session=self.session) + + else: + epoch = 0 + # Bootstraping all the graphs + self.bootstrap_graphs(train_data_shuffler, validation_data_shuffler) + + # TODO: find an elegant way to provide this as a parameter of the trainer + self.global_epoch = tf.Variable(0, trainable=False, name="global_epoch") + tf.add_to_collection("global_epoch", self.global_epoch) + + # Preparing the optimizer + self.optimizer_class._learning_rate = self.learning_rate + self.optimizer = self.optimizer_class.minimize(self.training_graph, global_step=self.global_epoch) + tf.add_to_collection("optimizer", self.optimizer) + tf.add_to_collection("learning_rate", self.learning_rate) + + # Train summary + self.summaries_train = self.create_general_summary() + tf.add_to_collection("summaries_train", self.summaries_train) + + tf.add_to_collection("summaries_train", self.summaries_train) + + tf.initialize_all_variables().run(session=self.session) + + # Original tensorflow saver object + saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=30) + + if isinstance(train_data_shuffler, OnlineSampling): + train_data_shuffler.set_feature_extractor(self.architecture, session=self.session) + + self.architecture.save(saver, os.path.join(self.temp_dir, 'model_initial.ckp')) + with self.session.as_default(): + path = os.path.join(self.temp_dir, 'model_initial.hdf5') + self.architecture.save_hdf5(bob.io.base.HDF5File(path, 'w')) + + # TENSOR BOARD SUMMARY + self.train_summary_writter = tf.summary.FileWriter(os.path.join(self.temp_dir, 'train'), self.session.graph) + start = time.time() + total_train_data = 0 + total_valid_data = 0 + for epoch in range(epoch, self.epochs): + + batch_num = 0 + total_train_loss = 0 + logger.info("\nTRAINING EPOCH {0}".format(epoch)) + self.train_data_shuffler.data_finished = False + + # Start a thread to enqueue data asynchronously, and hide I/O latency. + if self.prefetch: + self.launch_train_threads() + + while True: + # start = time.time() + cur_loss, summary = self.fit() + # end = time.time() + # logger.info("Fit time = {0}".format(float(end - start))) + # we are done when we went through the whole data + if cur_loss is None or self.train_data_shuffler.data_finished: + break + + batch_num += 1 + total_train_loss += cur_loss + + # Reporting loss for each snapshot + if (batch_num/2.0) % self.snapshot == 0: + logger.info("Loss training set, epoch={0}, batch_num={1} = {2}".format( + epoch, batch_num, total_train_loss/batch_num)) + self.train_summary_writter.add_summary(summary, epoch*total_train_data+batch_num) + end = time.time() + logger.info("Training Batch = {0}, time = {1}".format(batch_num, float(end - start))) + summary = summary_pb2.Summary.Value(tag="elapsed_time", simple_value=float(end - start)) + self.train_summary_writter.add_summary( + summary_pb2.Summary(value=[summary]), epoch*total_train_data+batch_num) + path = os.path.join(self.temp_dir, 'model_epoch{0}_batch{1}.ckp'.format(epoch, batch_num)) + self.architecture.save(saver, path) + with self.session.as_default(): + path = os.path.join(self.temp_dir, 'model_epoch{0}_batch{1}.hdf5'.format(epoch, batch_num)) + self.architecture.save_hdf5(bob.io.base.HDF5File(path, 'w')) + start = time.time() + + total_train_data = batch_num + logger.info("Number of training batches={0}".format(total_train_data)) + logger.info("Taking snapshot for epoch %d", epoch) + if total_train_data: + logger.info("Loss total TRAINING for epoch={0} = {1}".format( + epoch, total_train_loss / total_train_data)) + path = os.path.join(self.temp_dir, 'model_epoch{0}.ckp'.format(epoch)) + self.architecture.save(saver, path) + with self.session.as_default(): + path = os.path.join(self.temp_dir, 'model_epoch{0}.hdf5'.format(epoch)) + self.architecture.save_hdf5(bob.io.base.HDF5File(path, 'w')) + + + # Running validation for the current epoch + if self.validation_data_shuffler is not None: + batch_num = 0 + total_valid_loss = 0 + total_prediction_err = 0 + start = time.time() + logger.info("\nVALIDATION EPOCH {0}".format(epoch)) + self.validation_data_shuffler.data_finished = False + + # Start a thread to enqueue data asynchronously, and hide I/O latency. + if self.prefetch: + self.launch_valid_threads() + + while True: + prediction_err, cur_loss = self.compute_validation(self.validation_data_shuffler) + # we are done when we went through the whole data + if cur_loss is None or self.validation_data_shuffler.data_finished: + break + + batch_num += 1 + total_valid_loss += cur_loss + total_prediction_err += numpy.mean(numpy.array(prediction_err)) + + if self.validation_summary_writter is None: + self.validation_summary_writter = tf.summary.FileWriter( + os.path.join(self.temp_dir, 'validation'), self.session.graph) + if (batch_num/2.0) % self.validation_snapshot == 0: + summaries = [summary_pb2.Summary.Value(tag="loss", simple_value=float(total_valid_loss/batch_num))] + self.validation_summary_writter.add_summary( + summary_pb2.Summary(value=summaries), epoch*total_valid_data+batch_num) + logger.info("Loss validation batch={0} = {1}".format( + batch_num, total_valid_loss/batch_num)) + end = time.time() + logger.info("Validation Batch = {0}, time = {1}".format(batch_num, float(end - start))) + + summaries = [summary_pb2.Summary.Value(tag="Error", simple_value=float(total_prediction_err / batch_num))] + self.validation_summary_writter.add_summary( + summary_pb2.Summary(value=summaries), epoch * total_valid_data + batch_num) + logger.info("Error validation batch={0} = {1}".format( + batch_num, total_prediction_err / batch_num)) + start = time.time() + + total_valid_data = batch_num + logger.info("Total number of validation batches={0}".format(total_valid_data)) + if total_valid_data: + logger.info("Loss total VALIDATION for epoch={0} = {1}".format( + epoch, total_valid_loss / total_valid_data)) + logger.info("Error total VALIDATION for epoch={0} = {1}".format( + epoch, total_prediction_err / total_valid_data)) + + logger.info("Training finally finished") + + self.train_summary_writter.close() + if self.validation_data_shuffler is not None: + self.validation_summary_writter.close() + + # Saving the final network + path = os.path.join(self.temp_dir, 'model.ckp') + self.architecture.save(saver, path) + with self.session.as_default(): + path = os.path.join(self.temp_dir, 'model.hdf5') + self.architecture.save_hdf5(bob.io.base.HDF5File(path, 'w')) + + if self.prefetch: + # now they should definetely stop + self.train_thread_pool.request_stop() + self.valid_thread_pool.request_stop() diff --git a/bob/learn/tensorflow/trainers/__init__.py b/bob/learn/tensorflow/trainers/__init__.py index ee0a98197281f67b7defe48a59d91ff672bae6fa..2ee615c2a6bbd14ce865821238808c5899c681d7 100644 --- a/bob/learn/tensorflow/trainers/__init__.py +++ b/bob/learn/tensorflow/trainers/__init__.py @@ -1,8 +1,8 @@ - +from .learning_rate import exponential_decay, constant from .Trainer import Trainer +from .TrainerSeq import TrainerSeq from .SiameseTrainer import SiameseTrainer from .TripletTrainer import TripletTrainer -from .learning_rate import exponential_decay, constant import numpy @@ -38,7 +38,8 @@ __appropriate__( TripletTrainer, exponential_decay, constant, - ) + TrainerSeq, +) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/learn/tensorflow/trainers/learning_rate.py b/bob/learn/tensorflow/trainers/learning_rate.py index 9a206f75c1b0adaec02ec7f1b7b37c5fd265dd85..473689817ed16114c69e930c3196ae54984a4d9e 100644 --- a/bob/learn/tensorflow/trainers/learning_rate.py +++ b/bob/learn/tensorflow/trainers/learning_rate.py @@ -4,7 +4,8 @@ import tensorflow as tf def exponential_decay(base_learning_rate=0.05, decay_steps=1000, weight_decay=0.9, - staircase=False): + staircase=False, + name="exponential_learning_rate"): """ Implements the exponential_decay update of the learning rate. @@ -19,11 +20,12 @@ def exponential_decay(base_learning_rate=0.05, """ global_step = tf.Variable(0, trainable=False) - return tf.train.exponential_decay(base_learning_rate=base_learning_rate, + return tf.train.exponential_decay(learning_rate=base_learning_rate, global_step=global_step, decay_steps=decay_steps, decay_rate=weight_decay, - staircase=staircase + staircase=staircase, + name=name ) diff --git a/bob/learn/tensorflow/utils/session.py b/bob/learn/tensorflow/utils/session.py index 7709cfa2e7a55926f4ceaf77610a825cd7d40ae7..5a3364193b646aad90f8ebc407d76a35ea2078a9 100644 --- a/bob/learn/tensorflow/utils/session.py +++ b/bob/learn/tensorflow/utils/session.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : # @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -# @date: Wed 11 May 2016 09:39:36 CEST +# @date: Wed 11 May 2016 09:39:36 CEST import tensorflow as tf from .singleton import Singleton -from tensorflow.python import debug as tf_debug +# from tensorflow.python import debug as tf_debug @Singleton class Session(object): diff --git a/bob/learn/tensorflow/utils/singleton.py b/bob/learn/tensorflow/utils/singleton.py index 46594bd5edb7004af5957b9c8444c5500d89bea2..2c9d9384726d73aa36fcc7c53a2f24bd806a02d4 100644 --- a/bob/learn/tensorflow/utils/singleton.py +++ b/bob/learn/tensorflow/utils/singleton.py @@ -11,7 +11,6 @@ class Singleton(object): To get the singleton instance, use the :py:meth:`instance` method. Trying to use `__call__` will result in a `TypeError` being raised. Limitations: - * The decorated class cannot be inherited from. * The documentation of the decorated class is replaced with the documentation of this class. """ @@ -24,7 +23,6 @@ class Singleton(object): self.__module__ = decorated.__module__ self.__mro__ = decorated.__mro__ self.__bases__ = [] - self._instance = None def create(self, *args, **kwargs): @@ -48,4 +46,4 @@ class Singleton(object): raise TypeError('Singletons must be accessed through the `instance()` method.') def __instancecheck__(self, inst): - return isinstance(inst, self._decorated) + return isinstance(inst, self._decorated) \ No newline at end of file diff --git a/doc/index.rst b/doc/index.rst index f8c5a036858c84dd74616a897145b65cac35965f..6fd19c640ab3fe01d70f187ac8720d67e7024a39 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -3,9 +3,9 @@ .. _bob.learn.tensorflow: -======================== +========================= Tensorflow Bob Bindings -======================== +========================= `bob.learn.tensorflow` is a high-level library, written in Python that runs on top of TensorFlow. The goal here is to be able to do fast experimentation with neural networks. diff --git a/doc/py_api.rst b/doc/py_api.rst index 8c5d922a38587d9da75f16d182b5125f6c7bb39e..53bf15ba86dd707cb10799c773ebcea608e0c0e3 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -22,6 +22,8 @@ Architectures bob.learn.tensorflow.network.MLP bob.learn.tensorflow.network.VGG16 bob.learn.tensorflow.network.VGG16_mod + bob.learn.tensorflow.network.SimpleAudio + bob.learn.tensorflow.network.DeeperAudio Trainers @@ -32,6 +34,7 @@ Trainers bob.learn.tensorflow.trainers.Trainer bob.learn.tensorflow.trainers.SiameseTrainer bob.learn.tensorflow.trainers.TripletTrainer + bob.learn.tensorflow.trainers.TrainerSeq Learning rate ------------- @@ -48,11 +51,13 @@ Layers .. autosummary:: bob.learn.tensorflow.layers.Layer + bob.learn.tensorflow.layers.Conv1D bob.learn.tensorflow.layers.Conv2D bob.learn.tensorflow.layers.Dropout bob.learn.tensorflow.layers.FullyConnected bob.learn.tensorflow.layers.MaxPooling bob.learn.tensorflow.layers.AveragePooling + bob.learn.tensorflow.layers.LogSoftMax Data Shufflers @@ -72,6 +77,7 @@ Data Shufflers bob.learn.tensorflow.datashuffler.TripletWithFastSelectionDisk bob.learn.tensorflow.datashuffler.TripletWithSelectionDisk bob.learn.tensorflow.datashuffler.OnlineSampling + bob.learn.tensorflow.datashuffler.DiskAudio diff --git a/doc/references.rst b/doc/references.rst index 4d5442553e4effed1fcbbde58697dc580c75589e..2f82bc9321662e7d01db9f769bb4dc5c6548dc2f 100644 --- a/doc/references.rst +++ b/doc/references.rst @@ -1,9 +1,9 @@ .. vim: set fileencoding=utf-8 : .. date: Thu Sep 20 11:58:57 CEST 2012 -=========== +============ References -=========== +============ .. [facenet_2015] Schroff, Florian, Dmitry Kalenichenko, and James Philbin. "Facenet: A unified embedding for face recognition and clustering." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015. \ No newline at end of file diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 4c795a89db0a08f92eb0cd0cc01f14309a224d6d..80335b9dffffc8e23724f31b5af4fb2d33ba14e3 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -2,9 +2,9 @@ .. date: Thu Sep 20 11:58:57 CEST 2012 -=========== +============ User guide -=========== +============ Quick start