Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
bob.learn.tensorflow
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
bob
bob.learn.tensorflow
Commits
5884afff
Commit
5884afff
authored
Sep 07, 2016
by
Tiago de Freitas Pereira
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Added prefetching and text loading
parent
65a91493
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
675 additions
and
30 deletions
+675
-30
bob/learn/tensorflow/data/BaseDataShuffler.py
bob/learn/tensorflow/data/BaseDataShuffler.py
+68
-0
bob/learn/tensorflow/data/MemoryDataShuffler.py
bob/learn/tensorflow/data/MemoryDataShuffler.py
+72
-0
bob/learn/tensorflow/data/MemoryPairDataShuffler.py
bob/learn/tensorflow/data/MemoryPairDataShuffler.py
+157
-0
bob/learn/tensorflow/data/TextDataShuffler.py
bob/learn/tensorflow/data/TextDataShuffler.py
+96
-0
bob/learn/tensorflow/data/TextPairDataShuffler.py
bob/learn/tensorflow/data/TextPairDataShuffler.py
+157
-0
bob/learn/tensorflow/data/__init__.py
bob/learn/tensorflow/data/__init__.py
+4
-2
bob/learn/tensorflow/data/copy of PairDataShuffler.py
bob/learn/tensorflow/data/copy of PairDataShuffler.py
+0
-0
bob/learn/tensorflow/script/train_mnist.py
bob/learn/tensorflow/script/train_mnist.py
+20
-2
bob/learn/tensorflow/script/train_mnist_siamese.py
bob/learn/tensorflow/script/train_mnist_siamese.py
+21
-4
bob/learn/tensorflow/trainers/SiameseTrainer.py
bob/learn/tensorflow/trainers/SiameseTrainer.py
+2
-1
bob/learn/tensorflow/trainers/Trainer.py
bob/learn/tensorflow/trainers/Trainer.py
+78
-21
No files found.
bob/learn/tensorflow/data/BaseDataShuffler.py
0 → 100644
View file @
5884afff
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
tensorflow
as
tf
class
BaseDataShuffler
(
object
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
The class provide base functionoalies to shuffle the data
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
self
.
scale
=
scale
self
.
scale_value
=
0.00390625
# TODO: Check if the bacth size is higher than the input data
self
.
train_batch_size
=
train_batch_size
self
.
validation_batch_size
=
validation_batch_size
self
.
data
=
data
self
.
train_shape
=
tuple
([
train_batch_size
]
+
input_shape
)
self
.
validation_shape
=
tuple
([
validation_batch_size
]
+
input_shape
)
# TODO: Check if the labels goes from O to N-1
self
.
labels
=
labels
self
.
total_labels
=
max
(
labels
)
+
1
# Spliting in train and validation
self
.
n_samples
=
len
(
self
.
labels
)
self
.
n_train_samples
=
int
(
round
(
self
.
n_samples
*
perc_train
))
self
.
n_validation_samples
=
self
.
n_samples
-
self
.
n_train_samples
# Shuffling all the indexes
self
.
indexes
=
numpy
.
array
(
range
(
self
.
n_samples
))
numpy
.
random
.
shuffle
(
self
.
indexes
)
def
get_placeholders_forprefetch
(
self
,
name
=
""
,
train_dataset
=
True
):
"""
Returns a place holder with the size of your batch
"""
shape
=
self
.
train_shape
if
train_dataset
else
self
.
validation_shape
data
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
shape
[
1
:])),
name
=
name
)
labels
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
[
None
,
])
return
data
,
labels
def
get_placeholders
(
self
,
name
=
""
,
train_dataset
=
True
):
"""
Returns a place holder with the size of your batch
"""
shape
=
self
.
train_shape
if
train_dataset
else
self
.
validation_shape
data
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
shape
,
name
=
name
)
labels
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
shape
[
0
])
return
data
,
labels
bob/learn/tensorflow/data/DataShuffler.py
→
bob/learn/tensorflow/data/
Memory
DataShuffler.py
View file @
5884afff
...
...
@@ -6,6 +6,8 @@
import
numpy
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
def
scale_mean_norm
(
data
,
scale
=
0.00390625
):
mean
=
numpy
.
mean
(
data
)
data
=
(
data
-
mean
)
*
scale
...
...
@@ -13,66 +15,39 @@ def scale_mean_norm(data, scale=0.00390625):
return
data
,
mean
class
DataShuffler
(
object
):
def
__init__
(
self
,
data
,
labels
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
The class provide some functionalities for shuffling data
**Parameters**
data:
"""
self
.
perc_train
=
perc_train
self
.
scale
=
scale
self
.
scale_value
=
0.00390625
self
.
train_batch_size
=
train_batch_size
self
.
validation_batch_size
=
validation_batch_size
self
.
data
=
data
self
.
labels
=
labels
# From O to N-1
self
.
total_labels
=
max
(
labels
)
+
1
self
.
n_samples
=
self
.
data
.
shape
[
0
]
self
.
width
=
self
.
data
.
shape
[
1
]
self
.
height
=
self
.
data
.
shape
[
2
]
self
.
channels
=
self
.
data
.
shape
[
3
]
self
.
start_shuffler
()
def
get_placeholders
(
self
,
name
=
""
,
train_dataset
=
True
):
"""
"""
batch
=
self
.
train_batch_size
if
train_dataset
else
self
.
validation_batch_size
data
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch
,
self
.
width
,
self
.
height
,
self
.
channels
),
name
=
name
)
labels
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
batch
)
return
data
,
labels
def
start_shuffler
(
self
):
class
MemoryDataShuffler
(
BaseDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
S
ome base functions for neural network
s
S
huffler that deal with memory dataset
s
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
indexes
=
numpy
.
array
(
range
(
self
.
n_samples
))
numpy
.
random
.
shuffle
(
indexes
)
# Spliting train and validation
train_samples
=
int
(
round
(
self
.
n_samples
*
self
.
perc_train
))
validation_samples
=
self
.
n_samples
-
train_samples
self
.
train_data
=
self
.
data
[
indexes
[
0
:
train_samples
],
:,
:,
:]
self
.
train_labels
=
self
.
labels
[
indexes
[
0
:
train_samples
]]
self
.
validation_data
=
self
.
data
[
indexes
[
train_samples
:
train_samples
+
validation_samples
],
:,
:,
:]
self
.
validation_labels
=
self
.
labels
[
indexes
[
train_samples
:
train_samples
+
validation_samples
]]
super
(
MemoryDataShuffler
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
,
validation_batch_size
=
validation_batch_size
)
# Spliting between train and test
self
.
train_data
=
self
.
data
[
self
.
indexes
[
0
:
self
.
n_train_samples
],
...]
self
.
train_labels
=
self
.
labels
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
validation_data
=
self
.
data
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
],
...]
self
.
validation_labels
=
self
.
labels
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
if
self
.
scale
:
# data = scale_minmax_norm(data,lower_bound = -1, upper_bound = 1)
self
.
train_data
,
self
.
mean
=
scale_mean_norm
(
self
.
train_data
)
self
.
validation_data
=
(
self
.
validation_data
-
self
.
mean
)
*
self
.
scale_value
...
...
@@ -80,13 +55,10 @@ class DataShuffler(object):
if
train_dataset
:
n_samples
=
self
.
train_batch_size
else
:
n_samples
=
self
.
validation_batch_size
if
train_dataset
:
data
=
self
.
train_data
label
=
self
.
train_labels
else
:
n_samples
=
self
.
validation_batch_size
data
=
self
.
validation_data
label
=
self
.
validation_labels
...
...
bob/learn/tensorflow/data/MemoryPairDataShuffler.py
0 → 100644
View file @
5884afff
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
from
.MemoryDataShuffler
import
MemoryDataShuffler
class
MemoryPairDataShuffler
(
MemoryDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
The class provide some functionalities for shuffling data
**Parameters**
data:
"""
data
=
data
labels
=
labels
input_shape
=
input_shape
perc_train
=
perc_train
scale
=
scale
train_batch_size
=
train_batch_size
validation_batch_size
=
validation_batch_size
super
(
MemoryPairDataShuffler
,
self
).
__init__
(
data
,
labels
,
input_shape
=
input_shape
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
*
2
,
validation_batch_size
=
validation_batch_size
)
def
get_pair
(
self
,
train_dataset
=
True
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
def
get_genuine_or_not
(
input_data
,
input_labels
,
genuine
=
True
):
if
genuine
:
# TODO: THIS KEY SELECTION NEEDS TO BE MORE EFFICIENT
# Getting a client
index
=
numpy
.
random
.
randint
(
self
.
total_labels
)
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes
[
1
],
...]
else
:
# Picking a pair from different clients
index
=
numpy
.
random
.
choice
(
self
.
total_labels
,
2
,
replace
=
False
)
# Getting the indexes of the two clients
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
indexes_p
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
numpy
.
random
.
shuffle
(
indexes_p
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes_p
[
0
],
...]
return
data
,
data_p
if
train_dataset
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
shape
=
self
.
train_shape
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
shape
=
self
.
validation_shape
data
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
labels_siamese
=
numpy
.
zeros
(
shape
=
shape
[
0
],
dtype
=
'float32'
)
genuine
=
True
for
i
in
range
(
shape
[
0
]):
data
[
i
,
...],
data_p
[
i
,
...]
=
get_genuine_or_not
(
target_data
,
target_labels
,
genuine
=
genuine
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
return
data
,
data_p
,
labels_siamese
def
get_triplet
(
self
,
n_labels
,
n_triplets
=
1
,
is_target_set_train
=
True
):
"""
Get a triplet
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
def
get_one_triplet
(
input_data
,
input_labels
):
# Getting a pair of clients
index
=
numpy
.
random
.
choice
(
n_labels
,
2
,
replace
=
False
)
label_positive
=
index
[
0
]
label_negative
=
index
[
1
]
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a positive pair
data_anchor
=
input_data
[
indexes
[
0
],
:,
:,
:]
data_positive
=
input_data
[
indexes
[
1
],
:,
:,
:]
# Picking a negative sample
indexes
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
data_negative
=
input_data
[
indexes
[
0
],
:,
:,
:]
return
data_anchor
,
data_positive
,
data_negative
,
label_positive
,
label_positive
,
label_negative
if
is_target_set_train
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
c
=
target_data
.
shape
[
3
]
w
=
target_data
.
shape
[
1
]
h
=
target_data
.
shape
[
2
]
data_a
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
labels_a
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
labels_p
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
labels_n
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
for
i
in
range
(
n_triplets
):
data_a
[
i
,
:,
:,
:],
data_p
[
i
,
:,
:,
:],
data_n
[
i
,
:,
:,
:],
\
labels_a
[
i
],
labels_p
[
i
],
labels_n
[
i
]
=
\
get_one_triplet
(
target_data
,
target_labels
)
return
data_a
,
data_p
,
data_n
,
labels_a
,
labels_p
,
labels_n
bob/learn/tensorflow/data/TextDataShuffler.py
0 → 100644
View file @
5884afff
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
bob.io.base
import
bob.io.image
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
#def scale_mean_norm(data, scale=0.00390625):
# mean = numpy.mean(data)
# data = (data - mean) * scale
# return data, mean
class
TextDataShuffler
(
BaseDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
0.00390625
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super
(
TextDataShuffler
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
,
validation_batch_size
=
validation_batch_size
)
if
isinstance
(
self
.
data
,
list
):
self
.
data
=
numpy
.
array
(
self
.
data
)
if
isinstance
(
self
.
labels
,
list
):
self
.
labels
=
numpy
.
array
(
self
.
labels
)
# Spliting between train and test
self
.
train_data
=
self
.
data
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
train_labels
=
self
.
labels
[
self
.
indexes
[
0
:
self
.
n_train_samples
]]
self
.
validation_data
=
self
.
data
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
self
.
validation_labels
=
self
.
labels
[
self
.
indexes
[
self
.
n_train_samples
:
self
.
n_train_samples
+
self
.
n_validation_samples
]]
def
get_batch
(
self
,
train_dataset
=
True
):
if
train_dataset
:
batch_size
=
self
.
train_batch_size
shape
=
self
.
train_shape
files_names
=
self
.
train_data
label
=
self
.
train_labels
else
:
batch_size
=
self
.
validation_batch_size
shape
=
self
.
validation_shape
files_names
=
self
.
validation_data
label
=
self
.
validation_labels
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
files_names
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
numpy
.
zeros
(
shape
=
shape
)
for
i
in
range
(
batch_size
):
file_name
=
files_names
[
indexes
[
i
]]
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
len
(
d
.
shape
)
==
2
:
data
=
numpy
.
zeros
(
shape
=
tuple
(
shape
[
1
:]))
data
[:,
:,
0
]
=
d
else
:
data
=
d
selected_data
[
i
,
...]
=
data
if
self
.
scale
is
not
None
:
selected_data
[
i
,
...]
*=
self
.
scale
selected_labels
=
label
[
indexes
[
0
:
batch_size
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
bob/learn/tensorflow/data/TextPairDataShuffler.py
0 → 100644
View file @
5884afff
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
from
.TextDataShuffler
import
TextDataShuffler
class
TextPairDataShuffler
(
TextDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
perc_train
=
0.9
,
scale
=
True
,
train_batch_size
=
1
,
validation_batch_size
=
300
):
"""
The class provide some functionalities for shuffling data
**Parameters**
data:
"""
data
=
data
labels
=
labels
input_shape
=
input_shape
perc_train
=
perc_train
scale
=
scale
train_batch_size
=
train_batch_size
validation_batch_size
=
validation_batch_size
super
(
TextPairDataShuffler
,
self
).
__init__
(
data
,
labels
,
input_shape
=
input_shape
,
perc_train
=
perc_train
,
scale
=
scale
,
train_batch_size
=
train_batch_size
*
2
,
validation_batch_size
=
validation_batch_size
)
def
get_pair
(
self
,
train_dataset
=
True
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
def
get_genuine_or_not
(
input_data
,
input_labels
,
genuine
=
True
):
if
genuine
:
# TODO: THIS KEY SELECTION NEEDS TO BE MORE EFFICIENT
# Getting a client
index
=
numpy
.
random
.
randint
(
self
.
total_labels
)
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a pair
data
=
input_data
[
indexes
[
0
]]
data_p
=
input_data
[
indexes
[
1
]]
else
:
# Picking a pair from different clients
index
=
numpy
.
random
.
choice
(
self
.
total_labels
,
2
,
replace
=
False
)
# Getting the indexes of the two clients
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
indexes_p
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
numpy
.
random
.
shuffle
(
indexes_p
)
# Picking a pair
data
=
input_data
[
indexes
[
0
]]
data_p
=
input_data
[
indexes_p
[
0
]]
return
data
,
data_p
if
train_dataset
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
shape
=
self
.
train_shape
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
shape
=
self
.
validation_shape
data
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
labels_siamese
=
numpy
.
zeros
(
shape
=
shape
[
0
],
dtype
=
'float32'
)
genuine
=
True
for
i
in
range
(
shape
[
0
]):
data
[
i
,
...],
data_p
[
i
,
...]
=
get_genuine_or_not
(
target_data
,
target_labels
,
genuine
=
genuine
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
return
data
,
data_p
,
labels_siamese
def
get_triplet
(
self
,
n_labels
,
n_triplets
=
1
,
is_target_set_train
=
True
):
"""
Get a triplet
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
def
get_one_triplet
(
input_data
,
input_labels
):
# Getting a pair of clients
index
=
numpy
.
random
.
choice
(
n_labels
,
2
,
replace
=
False
)
label_positive
=
index
[
0
]
label_negative
=
index
[
1
]
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a positive pair
data_anchor
=
input_data
[
indexes
[
0
],
:,
:,
:]
data_positive
=
input_data
[
indexes
[
1
],
:,
:,
:]
# Picking a negative sample
indexes
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
data_negative
=
input_data
[
indexes
[
0
],
:,
:,
:]
return
data_anchor
,
data_positive
,
data_negative
,
label_positive
,
label_positive
,
label_negative
if
is_target_set_train
:
target_data
=
self
.
train_data
target_labels
=
self
.
train_labels
else
:
target_data
=
self
.
validation_data
target_labels
=
self
.
validation_labels
c
=
target_data
.
shape
[
3
]
w
=
target_data
.
shape
[
1
]
h
=
target_data
.
shape
[
2
]
data_a
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
(
n_triplets
,
w
,
h
,
c
),
dtype
=
'float32'
)
labels_a
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
labels_p
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
labels_n
=
numpy
.
zeros
(
shape
=
n_triplets
,
dtype
=
'float32'
)
for
i
in
range
(
n_triplets
):
data_a
[
i
,
:,
:,
:],
data_p
[
i
,
:,
:,
:],
data_n
[
i
,
:,
:,
:],
\
labels_a
[
i
],
labels_p
[
i
],
labels_n
[
i
]
=
\
get_one_triplet
(
target_data
,
target_labels
)
return
data_a
,
data_p
,
data_n
,
labels_a
,
labels_p
,
labels_n
bob/learn/tensorflow/data/__init__.py
View file @
5884afff
...
...
@@ -2,8 +2,10 @@
from
pkgutil
import
extend_path
__path__
=
extend_path
(
__path__
,
__name__
)
from
.DataShuffler
import
DataShuffler
from
.PairDataShuffler
import
PairDataShuffler
from
.BaseDataShuffler
import
BaseDataShuffler
from
.MemoryDataShuffler
import
MemoryDataShuffler
from
.MemoryPairDataShuffler
import
MemoryPairDataShuffler
from
.TextDataShuffler
import
TextDataShuffler
# gets sphinx autodoc done right - don't remove it
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/learn/tensorflow/data/PairDataShuffler.py
→
bob/learn/tensorflow/data/
copy of
PairDataShuffler.py
View file @
5884afff
File moved
bob/learn/tensorflow/script/train_mnist.py
View file @
5884afff
...
...
@@ -21,10 +21,11 @@ from docopt import docopt
import
tensorflow
as
tf
from
..
import
util
SEED
=
10
from
bob.learn.tensorflow.data
import
DataShuffler
from
bob.learn.tensorflow.data
import
MemoryDataShuffler
,
Text
DataShuffler
from
bob.learn.tensorflow.network
import
Lenet
from
bob.learn.tensorflow.trainers
import
Trainer
from
bob.learn.tensorflow.loss
import
BaseLoss
import
bob.db.mobio
import
numpy
...
...
@@ -40,7 +41,24 @@ def main():
# Loading data
data
,
labels
=
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
data
=
numpy
.
reshape
(
data
,
(
data
.
shape
[
0
],
28
,
28
,
1
))
data_shuffler
=
DataShuffler
(
data
,
labels
,
train_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
100
)
data_shuffler
=
MemoryDataShuffler
(
data
,
labels
,
input_shape
=
[
28
,
28
,
1
],
train_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
100
)
#db = bob.db.mobio.Database()
#objects = db.objects(protocol="male")
#labels = [o.client_id for o in objects]
#file_names = [o.make_path(
# directory="/remote/lustre/2/temp/tpereira/FACEREC_EXPERIMENTS/mobio_male/lda/preprocessed",
# extension=".hdf5")
# for o in objects]
#data_shuffler = TextDataShuffler(file_names, labels,
# input_shape=[80, 64, 1],
# train_batch_size=BATCH_SIZE,
# validation_batch_size=BATCH_SIZE*100)
# Preparing the architecture
lenet
=
Lenet
()
...
...
bob/learn/tensorflow/script/train_mnist_siamese.py
View file @
5884afff
...
...
@@ -21,11 +21,11 @@ from docopt import docopt
import
tensorflow
as
tf
from
..
import
util
SEED
=
10
from
bob.learn.tensorflow.data
import
Pair
DataShuffler
from
bob.learn.tensorflow.data
import
MemoryPairDataShuffler
,
Text
DataShuffler
from
bob.learn.tensorflow.network
import
Lenet
from
bob.learn.tensorflow.trainers
import
SiameseTrainer
from
bob.learn.tensorflow.loss
import
ContrastiveLoss
import
bob.db.mobio
import
numpy
def
main
():
...
...
@@ -40,8 +40,25 @@ def main():
# Loading data
data
,
labels
=
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
data
=
numpy
.
reshape
(
data
,
(
data
.
shape
[
0
],
28
,
28
,
1
))
data_shuffler
=
MemoryPairDataShuffler
(
data
,
labels
,
input_shape
=
[
28
,
28
,
1
],
train_batch_size
=
BATCH_SIZE
,
validation_batch_size
=
BATCH_SIZE
*
1000
)
#db = bob.db.mobio.Database()
#objects = db.objects(protocol="male")
#labels = [o.client_id for o in objects]
#file_names = [o.make_path(
# directory="/remote/lustre/2/temp/tpereira/FACEREC_EXPERIMENTS/mobio_male/lda/preprocessed",
# extension=".hdf5")
# for o in objects]
#data_shuffler = TextDataShuffler(file_names, labels,
# input_shape=[80, 64, 1],