Skip to content
Snippets Groups Projects
Commit f965e6b6 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV
Browse files

Merge branch 'epoch' into 'master'

Merge branch with audio-stuff into master

See merge request !6
parents a3fa9ee1 bb3677e5
No related branches found
No related tags found
1 merge request!6Merge branch with audio-stuff into master
Pipeline #
Showing
with 153 additions and 43 deletions
...@@ -3,4 +3,8 @@ def get_config(): ...@@ -3,4 +3,8 @@ def get_config():
Returns a string containing the configuration information. Returns a string containing the configuration information.
""" """
import bob.extension import bob.extension
return bob.extension.get_config(__name__) return bob.extension.get_config(__name__)
\ No newline at end of file
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
\ No newline at end of file
...@@ -11,7 +11,7 @@ import bob.measure ...@@ -11,7 +11,7 @@ import bob.measure
from tensorflow.core.framework import summary_pb2 from tensorflow.core.framework import summary_pb2
from scipy.spatial.distance import cosine from scipy.spatial.distance import cosine
from bob.learn.tensorflow.datashuffler import Memory, Disk from ..datashuffler import Memory, Disk
class ExperimentAnalizer: class ExperimentAnalizer:
......
...@@ -7,7 +7,7 @@ import numpy ...@@ -7,7 +7,7 @@ import numpy
import tensorflow as tf import tensorflow as tf
import bob.ip.base import bob.ip.base
import numpy import numpy
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class Base(object): class Base(object):
......
...@@ -11,7 +11,7 @@ import bob.core ...@@ -11,7 +11,7 @@ import bob.core
from .Base import Base from .Base import Base
logger = bob.core.log.setup("bob.learn.tensorflow") logger = bob.core.log.setup("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class Disk(Base): class Disk(Base):
......
...@@ -4,13 +4,15 @@ ...@@ -4,13 +4,15 @@
# @date: Wed 19 Oct 23:43:22 2016 # @date: Wed 19 Oct 23:43:22 2016
import numpy import numpy
import bob.core
from .Base import Base from .Base import Base
#import time
from scipy.io.wavfile import read as readWAV from scipy.io.wavfile import read as readWAV
logger = bob.core.log.setup("bob.learn.tensorflow") # logger = bob.core.log.setup("bob.learn.tensorflow")
logger.propagate = False import logging
logger = logging.getLogger("bob.learn.tensorflow")
class DiskAudio(Base): class DiskAudio(Base):
...@@ -22,7 +24,7 @@ class DiskAudio(Base): ...@@ -22,7 +24,7 @@ class DiskAudio(Base):
context_size=20, context_size=20,
win_length_ms=10, win_length_ms=10,
rate=16000, rate=16000,
out_file="" out_file="temp.txt"
): ):
""" """
This datashuffler deals with speech databases that are stored in the disk. This datashuffler deals with speech databases that are stored in the disk.
...@@ -32,7 +34,7 @@ class DiskAudio(Base): ...@@ -32,7 +34,7 @@ class DiskAudio(Base):
self.out_file = out_file self.out_file = out_file
self.context_size = context_size self.context_size = context_size
self.win_length_ms = win_length_ms self.win_length_ms = win_length_ms
self.m_win_length = self.win_length_ms * rate / 1000 # number of values in a given window self.m_win_length = int(self.win_length_ms * rate / 1000) # number of values in a given window
self.m_frame_length = self.m_win_length * (2 * self.context_size + 1) self.m_frame_length = self.m_win_length * (2 * self.context_size + 1)
input_shape = [self.m_frame_length, 1] input_shape = [self.m_frame_length, 1]
...@@ -55,10 +57,18 @@ class DiskAudio(Base): ...@@ -55,10 +57,18 @@ class DiskAudio(Base):
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
# self.max_queue_size = 20000
# a flexible queue that stores audio frames extracted from files # a flexible queue that stores audio frames extracted from files
# self.frames_storage = queue.Queue(self.max_queue_size)
self.frames_storage = [] self.frames_storage = []
# a similar queue for the corresponding labels # a similar queue for the corresponding labels
self.labels_storage = [] self.labels_storage = []
self.indices = None
self.cur_index = 0
self.data_finished = False
# if self.out_file != "": # if self.out_file != "":
# bob.io.base.create_directories_safe(os.path.dirname(self.out_file)) # bob.io.base.create_directories_safe(os.path.dirname(self.out_file))
# f = open(self.out_file, "w") # f = open(self.out_file, "w")
...@@ -66,7 +76,6 @@ class DiskAudio(Base): ...@@ -66,7 +76,6 @@ class DiskAudio(Base):
# f.write("%d %s\n" % (self.labels[i], str(self.data[i]))) # f.write("%d %s\n" % (self.labels[i], str(self.data[i])))
# f.close() # f.close()
def load_from_file(self, file_name): def load_from_file(self, file_name):
rate, audio = readWAV(file_name) rate, audio = readWAV(file_name)
# We consider there is only 1 channel in the audio file => data[0] # We consider there is only 1 channel in the audio file => data[0]
...@@ -74,33 +83,62 @@ class DiskAudio(Base): ...@@ -74,33 +83,62 @@ class DiskAudio(Base):
return rate, data return rate, data
def get_batch(self, noise=False): def randomized_indices(self, max_size):
# Shuffling samples # Shuffling samples
indexes = numpy.array(range(self.data.shape[0])) indices = numpy.array(range(max_size))
numpy.random.shuffle(indexes) numpy.random.shuffle(indices)
return indices
def get_batch(self, noise=False):
# start = time.time()
if self.data_finished:
return None, None
if self.indices is None or self.cur_index == 0:
self.indices = self.randomized_indices(self.data.shape[0])
f = None f = None
if self.out_file != "": if self.out_file != "":
f = open(self.out_file, "a") f = open(self.out_file, "a")
i = 0
i = self.cur_index
# if not enough in the storage, we pre-load frames from the audio files # if not enough in the storage, we pre-load frames from the audio files
while len(self.frames_storage) < self.batch_size: while len(self.labels_storage) < self.batch_size and i < self.indices.shape[0]:
if f is not None: if f is not None:
f.write("%s\n" % self.data[indexes[i]]) f.write("%s\n" % self.data[self.indices[i]])
frames, labels = self.extract_frames_from_file(self.data[indexes[i]], self.labels[indexes[i]]) frames, labels = self.extract_frames_from_file(self.data[self.indices[i]], self.labels[self.indices[i]])
self.frames_storage.extend(frames) self.frames_storage.extend(frames)
self.labels_storage.extend(labels) self.labels_storage.extend(labels)
i += 1 i += 1
self.cur_index = i
if f is not None:
f.close()
f= None
# if we ran through the whole data already (we ignore the last incomplete batch)
# self.indices is a list of file names
# so, we want exit current datashuffling thread if cur_index reached the end of the file list
if self.cur_index >= self.indices.shape[0] and len(self.labels_storage) < self.batch_size:
# reset everything
self.frames_storage = []
self.labels_storage = []
self.cur_index = 0
self.data_finished = True
return None, None
# our temp frame queue should have enough data # our temp frame queue should have enough data
selected_data = numpy.asarray(self.frames_storage[:self.batch_size]) selected_data = numpy.asarray(self.frames_storage[:self.batch_size])
selected_labels = numpy.asarray(self.labels_storage[:self.batch_size]) selected_labels = numpy.asarray(self.labels_storage[:self.batch_size])
# remove them from the list # remove them from the list
del self.frames_storage[:self.batch_size] del self.frames_storage[:self.batch_size]
del self.labels_storage[:self.batch_size] del self.labels_storage[:self.batch_size]
selected_data = numpy.reshape(selected_data, (self.batch_size, -1, 1)) selected_data = numpy.reshape(selected_data, (self.batch_size, -1, 1))
if f is not None:
f.close() # end = time.time()
return [selected_data.astype("float32"), selected_labels.astype("int64")] # logger.info("Get Batch time = {0}".format(float(end - start)))
return [selected_data, selected_labels.astype("int64")]
def extract_frames_from_file(self, filename, label): def extract_frames_from_file(self, filename, label):
rate, wav_signal = self.load_from_file(filename) rate, wav_signal = self.load_from_file(filename)
...@@ -118,8 +156,11 @@ class DiskAudio(Base): ...@@ -118,8 +156,11 @@ class DiskAudio(Base):
# make sure the array is divided into equal chunks # make sure the array is divided into equal chunks
windows = numpy.split(wav_signal[:self.m_win_length * m_num_win], m_num_win) windows = numpy.split(wav_signal[:self.m_win_length * m_num_win], m_num_win)
final_frames = [] # final_frames = []
final_labels = [label] * m_num_win # final_labels = [label] * m_num_win
final_frames = numpy.empty([m_num_win, self.m_frame_length], dtype=numpy.float32)
final_labels = label * numpy.ones(m_num_win, dtype=numpy.int64)
# loop through the windows # loop through the windows
for i, window in zip(range(0, len(windows)), windows): for i, window in zip(range(0, len(windows)), windows):
# window with surrounding context will form the frame we seek # window with surrounding context will form the frame we seek
...@@ -128,11 +169,13 @@ class DiskAudio(Base): ...@@ -128,11 +169,13 @@ class DiskAudio(Base):
# copy the first frame necessary number of times # copy the first frame necessary number of times
if i < self.context_size: if i < self.context_size:
left_context = numpy.tile(windows[0], self.context_size - i) left_context = numpy.tile(windows[0], self.context_size - i)
final_frames.append(numpy.append(left_context, windows[:i + self.context_size + 1])) # final_frames.append(numpy.append(left_context, windows[:i + self.context_size + 1]))
final_frames[i, :] = numpy.append(left_context, windows[:i + self.context_size + 1])
elif (i + self.context_size) > (m_num_win - 1): elif (i + self.context_size) > (m_num_win - 1):
right_context = numpy.tile(windows[-1], i + self.context_size - m_num_win + 1) right_context = numpy.tile(windows[-1], i + self.context_size - m_num_win + 1)
final_frames.append(numpy.append(windows[i - self.context_size:], right_context)) # final_frames.append(numpy.append(windows[i - self.context_size:], right_context))
final_frames[i, :] = numpy.append(windows[i - self.context_size:], right_context)
else: else:
final_frames.append(numpy.ravel(windows[i - self.context_size:i + self.context_size + 1])) # final_frames.append(numpy.ravel(windows[i - self.context_size:i + self.context_size + 1]))
final_frames[i, :] = numpy.ravel(windows[i - self.context_size:i + self.context_size + 1])
return final_frames, final_labels return final_frames, final_labels
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import numpy import numpy
from .Base import Base from .Base import Base
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
import tensorflow as tf import tensorflow as tf
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import tensorflow as tf import tensorflow as tf
from .Base import Base from .Base import Base
from bob.learn.tensorflow.network import SequenceNetwork from ..network import SequenceNetwork
class OnlineSampling(object): class OnlineSampling(object):
......
...@@ -10,7 +10,7 @@ logger = bob.core.log.setup("bob.learn.tensorflow") ...@@ -10,7 +10,7 @@ logger = bob.core.log.setup("bob.learn.tensorflow")
from .Disk import Disk from .Disk import Disk
from .Siamese import Siamese from .Siamese import Siamese
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class SiameseDisk(Siamese, Disk): class SiameseDisk(Siamese, Disk):
......
...@@ -8,7 +8,7 @@ import numpy ...@@ -8,7 +8,7 @@ import numpy
from .Memory import Memory from .Memory import Memory
from .Siamese import Siamese from .Siamese import Siamese
import tensorflow as tf import tensorflow as tf
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class SiameseMemory(Siamese, Memory): class SiameseMemory(Siamese, Memory):
......
...@@ -14,7 +14,7 @@ import tensorflow as tf ...@@ -14,7 +14,7 @@ import tensorflow as tf
from .Disk import Disk from .Disk import Disk
from .Triplet import Triplet from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class TripletDisk(Triplet, Disk): class TripletDisk(Triplet, Disk):
......
...@@ -8,7 +8,7 @@ import tensorflow as tf ...@@ -8,7 +8,7 @@ import tensorflow as tf
from .Memory import Memory from .Memory import Memory
from .Triplet import Triplet from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class TripletMemory(Triplet, Memory): class TripletMemory(Triplet, Memory):
......
...@@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist ...@@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist
import logging import logging
logger = logging.getLogger("bob.learn") logger = logging.getLogger("bob.learn")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class TripletWithFastSelectionDisk(Triplet, Disk, OnlineSampling): class TripletWithFastSelectionDisk(Triplet, Disk, OnlineSampling):
......
...@@ -14,7 +14,7 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear ...@@ -14,7 +14,7 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
import logging import logging
logger = logging.getLogger("bob.learn.tensorflow") logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
class TripletWithSelectionDisk(Triplet, Disk, OnlineSampling): class TripletWithSelectionDisk(Triplet, Disk, OnlineSampling):
......
...@@ -9,7 +9,7 @@ import tensorflow as tf ...@@ -9,7 +9,7 @@ import tensorflow as tf
from .OnlineSampling import OnlineSampling from .OnlineSampling import OnlineSampling
from .Memory import Memory from .Memory import Memory
from .Triplet import Triplet from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from .Normalizer import Linear
from scipy.spatial.distance import euclidean, cdist from scipy.spatial.distance import euclidean, cdist
import logging import logging
......
# see https://docs.python.org/3/library/pkgutil.html # see https://docs.python.org/3/library/pkgutil.html
from .Normalizer import ScaleFactor, MeanOffset, Linear
from .Base import Base from .Base import Base
from .OnlineSampling import OnlineSampling from .OnlineSampling import OnlineSampling
from .Siamese import Siamese from .Siamese import Siamese
...@@ -18,8 +19,6 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk ...@@ -18,8 +19,6 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk
from .DataAugmentation import DataAugmentation from .DataAugmentation import DataAugmentation
from .ImageAugmentation import ImageAugmentation from .ImageAugmentation import ImageAugmentation
from .Normalizer import ScaleFactor, MeanOffset, Linear
from .DiskAudio import DiskAudio from .DiskAudio import DiskAudio
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
......
...@@ -70,13 +70,19 @@ class Conv1D(Layer): ...@@ -70,13 +70,19 @@ class Conv1D(Layer):
self.init_value = init_value self.init_value = init_value
def create_variables(self, input_layer): def create_variables(self, input_layer):
self.input_layer = input_layer
# TODO: Do an assert here # TODO: Do an assert here
if len(input_layer.get_shape().as_list()) != 3: input_shape = input_layer.get_shape().as_list()
raise ValueError("The input as a convolutional layer must have 3 dimensions, " if len(input_shape) != 3:
"but {0} were provided".format(len(input_layer.get_shape().as_list()))) if len(input_shape) == 4:
n_channels = input_layer.get_shape().as_list()[2] self.input_layer = tf.reshape(input_layer, [-1, input_shape[2], input_shape[3]])
else:
raise ValueError("The input as a convolutional layer must have 3 dimensions, "
"but {0} were provided".format(len(input_layer.get_shape().as_list())))
else:
self.input_layer = input_layer
print("Conv1 layer shape: ", self.input_layer.get_shape().as_list())
n_channels = self.input_layer.get_shape().as_list()[2]
if self.W is None: if self.W is None:
if self.init_value is None: if self.init_value is None:
......
...@@ -101,6 +101,7 @@ class FullyConnected(Layer): ...@@ -101,6 +101,7 @@ class FullyConnected(Layer):
else: else:
fc = self.input_layer fc = self.input_layer
print("FC layer shape: ", fc.get_shape().as_list())
if self.batch_norm: if self.batch_norm:
fc = self.batch_normalize(fc, training_phase) fc = self.batch_normalize(fc, training_phase)
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 17:38 CEST
import tensorflow as tf
from .Layer import Layer
class LogSoftMax(Layer):
"""
Wraps the tensorflow Log_softmax
**Parameters**
name: str
The name of the layer
stride:
Shape of the stride
batch_norm: bool
Do batch norm?
activation: bool
Tensor Flow activation
"""
def __init__(self, name,
batch_norm=False,
activation=None,
use_gpu=False):
super(LogSoftMax, self).__init__(name, use_gpu=use_gpu, activation=activation, batch_norm=batch_norm)
def create_variables(self, input_layer):
self.input_layer = input_layer
return
def get_graph(self, training_phase=True):
with tf.name_scope(str(self.name)):
output = tf.nn.log_softmax(self.input_layer)
if self.batch_norm:
output = self.batch_normalize(output, training_phase)
if self.activation is not None:
output = self.activation(output)
return output
...@@ -39,7 +39,12 @@ class MaxPooling(Layer): ...@@ -39,7 +39,12 @@ class MaxPooling(Layer):
self.strides = strides self.strides = strides
def create_variables(self, input_layer): def create_variables(self, input_layer):
self.input_layer = input_layer if len(input_layer.get_shape()) == 3:
shape = input_layer.get_shape().as_list()
self.input_layer = tf.reshape(input_layer, [-1, 1, shape[1], shape[2]])
else:
self.input_layer = input_layer
print("MaxPooling layer shape: ", self.input_layer.get_shape().as_list())
return return
def get_graph(self, training_phase=True): def get_graph(self, training_phase=True):
......
...@@ -7,6 +7,7 @@ from .MaxPooling import MaxPooling ...@@ -7,6 +7,7 @@ from .MaxPooling import MaxPooling
from .AveragePooling import AveragePooling from .AveragePooling import AveragePooling
from .Dropout import Dropout from .Dropout import Dropout
from .InputLayer import InputLayer from .InputLayer import InputLayer
from .LogSoftMax import LogSoftMax
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
...@@ -32,6 +33,7 @@ __appropriate__( ...@@ -32,6 +33,7 @@ __appropriate__(
AveragePooling, AveragePooling,
Dropout, Dropout,
InputLayer, InputLayer,
LogSoftMax,
) )
__all__ = [_ for _ in dir() if not _.startswith('_')] __all__ = [_ for _ in dir() if not _.startswith('_')]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment