Commit 76b877b3 authored by Tiago Pereira's avatar Tiago Pereira
Browse files

Harmonazing audio datashufflers

parent 57d0adb8
......@@ -15,7 +15,8 @@ logger.propagate = False
class DiskAudio(Base):
def __init__(self, data, labels,
input_dtype="float64",
input_shape,
input_dtype="float32",
batch_size=1,
seed=10,
data_augmentation=None,
......@@ -24,6 +25,7 @@ class DiskAudio(Base):
rate=16000,
out_file=""
):
"""
This datashuffler deals with speech databases that are stored in the disk.
The data is loaded and preprocessed on the fly.
......@@ -35,8 +37,6 @@ class DiskAudio(Base):
self.m_win_length = self.win_length_ms * rate / 1000 # number of values in a given window
self.m_frame_length = self.m_win_length * (2 * self.context_size + 1)
input_shape = [self.m_frame_length, 1]
if isinstance(data, list):
data = numpy.array(data)
......@@ -66,7 +66,6 @@ class DiskAudio(Base):
# f.write("%d %s\n" % (self.labels[i], str(self.data[i])))
# f.close()
def load_from_file(self, file_name):
rate, audio = readWAV(file_name)
# We consider there is only 1 channel in the audio file => data[0]
......
......@@ -139,99 +139,55 @@ def test_tripletdisk_shuffler():
def test_triplet_fast_selection_disk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [1, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = TripletWithFastSelectionDisk(train_data, train_labels,
input_shape=batch_shape[1:],
input_shape=batch_shape,
total_identities=1,
batch_size=batch_shape[0])
batch_size=1)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
assert len(batch[0].shape) == len(tuple(batch_shape))
assert len(batch[1].shape) == len(tuple(batch_shape))
assert len(batch[2].shape) == len(tuple(batch_shape))
def test_triplet_selection_disk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [1, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = TripletWithSelectionDisk(train_data, train_labels,
input_shape=batch_shape[1:],
input_shape=batch_shape,
total_identities=1,
batch_size=batch_shape[0])
batch_size=1)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
assert len(batch[0].shape) == len(tuple(batch_shape))
assert len(batch[1].shape) == len(tuple(batch_shape))
assert len(batch[2].shape) == len(tuple(batch_shape))
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list() == batch_shape
placeholders = data_shuffler("data", from_queue=False)
assert placeholders['anchor'].get_shape().as_list() == batch_shape
assert placeholders['positive'].get_shape().as_list() == batch_shape
assert placeholders['negative'].get_shape().as_list() == batch_shape
def test_diskaudio_shuffler():
train_data, train_labels = get_dummy_audiofiles()
batch_shape = [582, 6560, 1]
data_shuffler = DiskAudio(train_data, train_labels, batch_size=batch_shape[0])
batch_size = 582
batch_shape = [None, 6560, 1]
data_shuffler = DiskAudio(train_data, train_labels, batch_shape, batch_size=batch_size)
batch = data_shuffler.get_batch()
assert len(batch) == 2
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list()[0] == batch_shape[0]
"""
Some unit tests that create networks on the fly
"""
batch_size = 16
validation_batch_size = 400
iterations = 50
seed = 10
directory = "./temp/cnn_scratch"
def scratch_network():
# Creating a random network
scratch = SequenceNetwork(default_feature_layer="fc1")
scratch.add(Conv2D(name="conv1", kernel_size=3,
filters=10,
activation=tf.nn.tanh,
batch_norm=False))
scratch.add(FullyConnected(name="fc1", output_dim=10,
activation=None,
batch_norm=False
))
return scratch
def validate_network(validation_data, validation_labels, network):
# Testing
validation_data_shuffler = Memory(validation_data, validation_labels,
input_shape=[28, 28, 1],
batch_size=validation_batch_size)
[data, labels] = validation_data_shuffler.get_batch()
predictions = network.predict(data)
accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0]
assert batch[0].shape[0] == batch_size
assert batch[1].shape[0] == batch_size
return accuracy
placeholders = data_shuffler("data", from_queue=False)
assert placeholders.get_shape().as_list() == batch_shape
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment