Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.learn.tensorflow
Commits
86f2c9cf
Commit
86f2c9cf
authored
Sep 08, 2016
by
Tiago de Freitas Pereira
Browse files
Reorganized shufflers
parent
5884afff
Changes
6
Hide whitespace changes
Inline
Side-by-side
bob/learn/tensorflow/analyzers/Analizer.py
View file @
86f2c9cf
...
...
@@ -49,6 +49,8 @@ class Analizer:
enroll_features
=
self
.
machine
(
enroll_data
,
session
=
self
.
session
)
del
enroll_data
#import ipdb; ipdb.set_trace();
# Extracting features for probing
probe_data
,
probe_labels
=
self
.
data_shuffler
.
get_batch
(
train_dataset
=
False
)
probe_features
=
self
.
machine
(
probe_data
,
session
=
self
.
session
)
...
...
@@ -56,22 +58,23 @@ class Analizer:
# Creating models
models
=
[]
for
i
in
range
(
self
.
data_shuffler
.
total
_labels
):
indexes_model
=
numpy
.
where
(
enroll_labels
==
i
)[
0
]
for
i
in
range
(
len
(
self
.
data_shuffler
.
possible
_labels
)
)
:
indexes_model
=
numpy
.
where
(
enroll_labels
==
self
.
data_shuffler
.
possible_labels
[
i
]
)[
0
]
models
.
append
(
numpy
.
mean
(
enroll_features
[
indexes_model
,
:],
axis
=
0
))
# Probing
positive_scores
=
numpy
.
zeros
(
shape
=
0
)
negative_scores
=
numpy
.
zeros
(
shape
=
0
)
for
i
in
range
(
self
.
data_shuffler
.
total_labels
):
for
i
in
range
(
len
(
self
.
data_shuffler
.
possible_labels
)):
#for i in self.data_shuffler.possible_labels:
# Positive scoring
indexes
=
probe_labels
==
i
indexes
=
probe_labels
==
self
.
data_shuffler
.
possible_labels
[
i
]
positive_data
=
probe_features
[
indexes
,
:]
p
=
[
cosine
(
models
[
i
],
positive_data
[
j
])
for
j
in
range
(
positive_data
.
shape
[
0
])]
positive_scores
=
numpy
.
hstack
((
positive_scores
,
p
))
# negative scoring
indexes
=
probe_labels
!=
i
indexes
=
probe_labels
!=
self
.
data_shuffler
.
possible_labels
[
i
]
negative_data
=
probe_features
[
indexes
,
:]
n
=
[
cosine
(
models
[
i
],
negative_data
[
j
])
for
j
in
range
(
negative_data
.
shape
[
0
])]
negative_scores
=
numpy
.
hstack
((
negative_scores
,
n
))
...
...
bob/learn/tensorflow/data/BaseDataShuffler.py
View file @
86f2c9cf
...
...
@@ -8,7 +8,13 @@ import tensorflow as tf
class
BaseDataShuffler
(
object
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
The class provide base functionoalies to shuffle the data
...
...
@@ -23,6 +29,7 @@ class BaseDataShuffler(object):
self
.
scale
=
scale
self
.
scale_value
=
0.00390625
self
.
input_dtype
=
input_dtype
# TODO: Check if the bacth size is higher than the input data
self
.
train_batch_size
=
train_batch_size
...
...
@@ -34,9 +41,9 @@ class BaseDataShuffler(object):
# TODO: Check if the labels goes from O to N-1
self
.
labels
=
labels
self
.
total
_labels
=
max
(
labels
)
+
1
self
.
possible
_labels
=
list
(
set
(
self
.
labels
)
)
#
Spliting in
train and validation
#
Computing the data samples fro
train and validation
self
.
n_samples
=
len
(
self
.
labels
)
self
.
n_train_samples
=
int
(
round
(
self
.
n_samples
*
perc_train
))
self
.
n_validation_samples
=
self
.
n_samples
-
self
.
n_train_samples
...
...
@@ -45,6 +52,15 @@ class BaseDataShuffler(object):
self
.
indexes
=
numpy
.
array
(
range
(
self
.
n_samples
))
numpy
.
random
.
shuffle
(
self
.
indexes
)
# Spliting the data between train and validation
self
.
train_data
=
self
.
data
[
self
.
indexes
[
0
:
self
.
n_train_samples
],
...]
self
.
train_labels
=
self
.
labels
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
validation_data
=
self
.
data
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
],
...]
self
.
validation_labels
=
self
.
labels
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
def
get_placeholders_forprefetch
(
self
,
name
=
""
,
train_dataset
=
True
):
"""
Returns a place holder with the size of your batch
...
...
@@ -66,3 +82,35 @@ class BaseDataShuffler(object):
labels
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
shape
[
0
])
return
data
,
labels
def
get_genuine_or_not
(
self
,
input_data
,
input_labels
,
genuine
=
True
):
if
genuine
:
# Getting a client
index
=
numpy
.
random
.
randint
(
len
(
self
.
possible_labels
))
index
=
self
.
possible_labels
[
index
]
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes
[
1
],
...]
else
:
# Picking a pair of labels from different clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
index
[
0
]
=
self
.
possible_labels
[
index
[
0
]]
index
[
1
]
=
self
.
possible_labels
[
index
[
1
]]
# Getting the indexes of the two clients
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
indexes_p
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
numpy
.
random
.
shuffle
(
indexes_p
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes_p
[
0
],
...]
return
data
,
data_p
bob/learn/tensorflow/data/MemoryDataShuffler.py
View file @
86f2c9cf
...
...
@@ -16,7 +16,14 @@ def scale_mean_norm(data, scale=0.00390625):
class
MemoryDataShuffler
(
BaseDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
Shuffler that deal with memory datasets
...
...
@@ -33,23 +40,19 @@ class MemoryDataShuffler(BaseDataShuffler):
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
,
validation_batch_size
=
validation_batch_size
)
# Spliting between train and test
self
.
train_data
=
self
.
data
[
self
.
indexes
[
0
:
self
.
n_train_samples
],
...]
self
.
train_labels
=
self
.
labels
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
train_data
=
self
.
train_data
.
astype
(
input_dtype
)
self
.
validation_data
=
self
.
validation_data
.
astype
(
input_dtype
)
self
.
validation_data
=
self
.
data
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
],
...]
self
.
validation_labels
=
self
.
labels
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
if
self
.
scale
:
self
.
train_data
,
self
.
mean
=
scale_mean_norm
(
self
.
train_data
)
self
.
validation_data
=
(
self
.
validation_data
-
self
.
mean
)
*
self
.
scale_value
self
.
train_data
*=
self
.
scale_value
self
.
validation_data
*
=
self
.
scale_value
def
get_batch
(
self
,
train_dataset
=
True
):
...
...
@@ -70,3 +73,37 @@ class MemoryDataShuffler(BaseDataShuffler):
selected_labels
=
label
[
indexes
[
0
:
n_samples
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
def
get_pair
(
self
,
train_dataset
=
True
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
if
train_dataset
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
shape
=
self
.
train_shape
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
shape
=
self
.
validation_shape
data
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
labels_siamese
=
numpy
.
zeros
(
shape
=
shape
[
0
],
dtype
=
'float32'
)
genuine
=
True
for
i
in
range
(
shape
[
0
]):
data
[
i
,
...],
data_p
[
i
,
...]
=
self
.
get_genuine_or_not
(
target_data
,
target_labels
,
genuine
=
genuine
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
return
data
,
data_p
,
labels_siamese
bob/learn/tensorflow/data/TextDataShuffler.py
View file @
86f2c9cf
...
...
@@ -18,7 +18,13 @@ from .BaseDataShuffler import BaseDataShuffler
class
TextDataShuffler
(
BaseDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
0.00390625
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
Shuffler that deal with file list
...
...
@@ -31,30 +37,32 @@ class TextDataShuffler(BaseDataShuffler):
validation_batch_size:
"""
if
isinstance
(
data
,
list
):
data
=
numpy
.
array
(
data
)
if
isinstance
(
labels
,
list
):
labels
=
numpy
.
array
(
labels
)
super
(
TextDataShuffler
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
,
validation_batch_size
=
validation_batch_size
)
if
isinstance
(
self
.
data
,
list
):
self
.
data
=
numpy
.
array
(
self
.
data
)
if
isinstance
(
self
.
labels
,
list
):
self
.
labels
=
numpy
.
array
(
self
.
labels
)
# Spliting between train and test
self
.
train_data
=
self
.
data
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
train_labels
=
self
.
labels
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
def
load_from_file
(
self
,
file_name
,
shape
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
len
(
d
.
shape
)
==
2
:
data
=
numpy
.
zeros
(
shape
=
tuple
(
shape
[
1
:]))
data
[:,
:,
0
]
=
d
else
:
data
=
d
self
.
validation_data
=
self
.
data
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
self
.
validation_labels
=
self
.
labels
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
return
data
def
get_batch
(
self
,
train_dataset
=
True
):
...
...
@@ -77,20 +85,54 @@ class TextDataShuffler(BaseDataShuffler):
for
i
in
range
(
batch_size
):
file_name
=
files_names
[
indexes
[
i
]]
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
len
(
d
.
shape
)
==
2
:
data
=
numpy
.
zeros
(
shape
=
tuple
(
shape
[
1
:]))
data
[:,
:,
0
]
=
d
else
:
data
=
d
data
=
self
.
load_from_file
(
file_name
,
shape
)
selected_data
[
i
,
...]
=
data
if
self
.
scale
is
not
None
:
selected_data
[
i
,
...]
*=
self
.
scale
if
self
.
scale
:
selected_data
[
i
,
...]
*=
self
.
scale_value
selected_labels
=
label
[
indexes
[
0
:
batch_size
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
def
get_pair
(
self
,
train_dataset
=
True
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
if
train_dataset
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
shape
=
self
.
train_shape
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
shape
=
self
.
validation_shape
data
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
labels_siamese
=
numpy
.
zeros
(
shape
=
shape
[
0
],
dtype
=
'float32'
)
genuine
=
True
for
i
in
range
(
shape
[
0
]):
file_name
,
file_name_p
=
self
.
get_genuine_or_not
(
target_data
,
target_labels
,
genuine
=
genuine
)
data
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name
),
shape
)
data_p
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_p
),
shape
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
if
self
.
scale
:
data
*=
self
.
scale_value
data_p
*=
self
.
scale_value
return
data
,
data_p
,
labels_siamese
bob/learn/tensorflow/script/train_mnist.py
View file @
86f2c9cf
...
...
@@ -39,26 +39,28 @@ def main():
perc_train
=
0.9
# Loading data
data
,
labels
=
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
data
=
numpy
.
reshape
(
data
,
(
data
.
shape
[
0
],
28
,
28
,
1
))
data_shuffler
=
MemoryDataShuffler
(
data
,
labels
,
input_shape
=
[
28
,
28
,
1
],
train_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
100
)
#db = bob.db.mobio.Database()
#objects = db.objects(protocol="male")
#labels = [o.client_id for o in objects]
#file_names = [o.make_path(
# directory="/remote/lustre/2/temp/tpereira/FACEREC_EXPERIMENTS/mobio_male/lda/preprocessed",
# extension=".hdf5")
# for o in objects]
#data_shuffler = TextDataShuffler(file_names, labels,
# input_shape=[80, 64, 1],
# train_batch_size=BATCH_SIZE,
# validation_batch_size=BATCH_SIZE*100)
#data, labels = util.load_mnist(data_dir="./src/bob.db.mnist/bob/db/mnist/")
#data = numpy.reshape(data, (data.shape[0], 28, 28, 1))
#data_shuffler = MemoryDataShuffler(data, labels,
# input_shape=[28, 28, 1],
# train_batch_size=BATCH_SIZE,
# validation_batch_size=BATCH_SIZE*100)
db
=
bob
.
db
.
mobio
.
Database
()
objects
=
db
.
objects
(
protocol
=
"male"
)
labels
=
[
o
.
client_id
for
o
in
objects
]
file_names
=
[
o
.
make_path
(
directory
=
"/remote/lustre/2/temp/tpereira/FACEREC_EXPERIMENTS/mobio_male/lda/preprocessed"
,
extension
=
".hdf5"
)
for
o
in
objects
]
data_shuffler
=
TextDataShuffler
(
file_names
,
labels
,
input_shape
=
[
80
,
64
,
1
],
train_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
100
)
# Preparing the architecture
lenet
=
Lenet
()
...
...
bob/learn/tensorflow/script/train_mnist_siamese.py
View file @
86f2c9cf
...
...
@@ -21,7 +21,7 @@ from docopt import docopt
import
tensorflow
as
tf
from
..
import
util
SEED
=
10
from
bob.learn.tensorflow.data
import
Memory
Pair
DataShuffler
,
TextDataShuffler
from
bob.learn.tensorflow.data
import
MemoryDataShuffler
,
TextDataShuffler
from
bob.learn.tensorflow.network
import
Lenet
from
bob.learn.tensorflow.trainers
import
SiameseTrainer
from
bob.learn.tensorflow.loss
import
ContrastiveLoss
...
...
@@ -40,11 +40,11 @@ def main():
# Loading data
data
,
labels
=
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
data
=
numpy
.
reshape
(
data
,
(
data
.
shape
[
0
],
28
,
28
,
1
))
data_shuffler
=
Memory
Pair
DataShuffler
(
data
,
labels
,
input_shape
=
[
28
,
28
,
1
],
train_batch_size
=
BATCH_SIZE
,
validatio
n_batch_size
=
BATCH_SIZE
*
1000
)
data_shuffler
=
MemoryDataShuffler
(
data
,
labels
,
input_shape
=
[
28
,
28
,
1
],
scale
=
True
,
trai
n_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
1000
)
#db = bob.db.mobio.Database()
#objects = db.objects(protocol="male")
...
...
@@ -54,11 +54,11 @@ def main():
# directory="/remote/lustre/2/temp/tpereira/FACEREC_EXPERIMENTS/mobio_male/lda/preprocessed",
# extension=".hdf5")
# for o in objects]
#data_shuffler = TextDataShuffler(file_names, labels,
# input_shape=[80, 64, 1],
# train_batch_size=BATCH_SIZE,
# validation_batch_size=BATCH_SIZE*100)
# validation_batch_size=BATCH_SIZE*500)
# Preparing the architecture
lenet
=
Lenet
(
default_feature_layer
=
"fc2"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment