Skip to content
Snippets Groups Projects
Commit 76b877b3 authored by Tiago Pereira's avatar Tiago Pereira
Browse files

Harmonazing audio datashufflers

parent 57d0adb8
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,8 @@ logger.propagate = False
class DiskAudio(Base):
def __init__(self, data, labels,
input_dtype="float64",
input_shape,
input_dtype="float32",
batch_size=1,
seed=10,
data_augmentation=None,
......@@ -24,6 +25,7 @@ class DiskAudio(Base):
rate=16000,
out_file=""
):
"""
This datashuffler deals with speech databases that are stored in the disk.
The data is loaded and preprocessed on the fly.
......@@ -35,8 +37,6 @@ class DiskAudio(Base):
self.m_win_length = self.win_length_ms * rate / 1000 # number of values in a given window
self.m_frame_length = self.m_win_length * (2 * self.context_size + 1)
input_shape = [self.m_frame_length, 1]
if isinstance(data, list):
data = numpy.array(data)
......@@ -66,7 +66,6 @@ class DiskAudio(Base):
# f.write("%d %s\n" % (self.labels[i], str(self.data[i])))
# f.close()
def load_from_file(self, file_name):
rate, audio = readWAV(file_name)
# We consider there is only 1 channel in the audio file => data[0]
......
......@@ -139,99 +139,55 @@ def test_tripletdisk_shuffler():
def test_triplet_fast_selection_disk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [1, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = TripletWithFastSelectionDisk(train_data, train_labels,
input_shape=batch_shape[1:],
input_shape=batch_shape,
total_identities=1,
batch_size=batch_shape[0])
batch_size=1)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
assert len(batch[0].shape) == len(tuple(batch_shape))
assert len(batch[1].shape) == len(tuple(batch_shape))
assert len(batch[2].shape) == len(tuple(batch_shape))
def test_triplet_selection_disk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [1, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = TripletWithSelectionDisk(train_data, train_labels,
input_shape=batch_shape[1:],
input_shape=batch_shape,
total_identities=1,
batch_size=batch_shape[0])
batch_size=1)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
assert len(batch[0].shape) == len(tuple(batch_shape))
assert len(batch[1].shape) == len(tuple(batch_shape))
assert len(batch[2].shape) == len(tuple(batch_shape))
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list() == batch_shape
placeholders = data_shuffler("data", from_queue=False)
assert placeholders['anchor'].get_shape().as_list() == batch_shape
assert placeholders['positive'].get_shape().as_list() == batch_shape
assert placeholders['negative'].get_shape().as_list() == batch_shape
def test_diskaudio_shuffler():
train_data, train_labels = get_dummy_audiofiles()
batch_shape = [582, 6560, 1]
data_shuffler = DiskAudio(train_data, train_labels, batch_size=batch_shape[0])
batch_size = 582
batch_shape = [None, 6560, 1]
data_shuffler = DiskAudio(train_data, train_labels, batch_shape, batch_size=batch_size)
batch = data_shuffler.get_batch()
assert len(batch) == 2
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list()[0] == batch_shape[0]
"""
Some unit tests that create networks on the fly
"""
batch_size = 16
validation_batch_size = 400
iterations = 50
seed = 10
directory = "./temp/cnn_scratch"
def scratch_network():
# Creating a random network
scratch = SequenceNetwork(default_feature_layer="fc1")
scratch.add(Conv2D(name="conv1", kernel_size=3,
filters=10,
activation=tf.nn.tanh,
batch_norm=False))
scratch.add(FullyConnected(name="fc1", output_dim=10,
activation=None,
batch_norm=False
))
return scratch
def validate_network(validation_data, validation_labels, network):
# Testing
validation_data_shuffler = Memory(validation_data, validation_labels,
input_shape=[28, 28, 1],
batch_size=validation_batch_size)
[data, labels] = validation_data_shuffler.get_batch()
predictions = network.predict(data)
accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0]
assert batch[0].shape[0] == batch_size
assert batch[1].shape[0] == batch_size
return accuracy
placeholders = data_shuffler("data", from_queue=False)
assert placeholders.get_shape().as_list() == batch_shape
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment