Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
bob.learn.tensorflow
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
bob
bob.learn.tensorflow
Commits
b132e782
Commit
b132e782
authored
Oct 13, 2016
by
Tiago de Freitas Pereira
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Redesign the data shuffler
parent
ee0db09e
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
167 additions
and
326 deletions
+167
-326
bob/learn/tensorflow/datashuffler/Base.py
bob/learn/tensorflow/datashuffler/Base.py
+8
-2
bob/learn/tensorflow/datashuffler/Disk.py
bob/learn/tensorflow/datashuffler/Disk.py
+10
-5
bob/learn/tensorflow/datashuffler/Memory.py
bob/learn/tensorflow/datashuffler/Memory.py
+7
-3
bob/learn/tensorflow/datashuffler/Siamese.py
bob/learn/tensorflow/datashuffler/Siamese.py
+20
-1
bob/learn/tensorflow/datashuffler/SiameseDisk.py
bob/learn/tensorflow/datashuffler/SiameseDisk.py
+11
-129
bob/learn/tensorflow/datashuffler/SiameseMemory.py
bob/learn/tensorflow/datashuffler/SiameseMemory.py
+6
-2
bob/learn/tensorflow/datashuffler/Triplet.py
bob/learn/tensorflow/datashuffler/Triplet.py
+37
-1
bob/learn/tensorflow/datashuffler/TripletDisk.py
bob/learn/tensorflow/datashuffler/TripletDisk.py
+9
-125
bob/learn/tensorflow/datashuffler/TripletMemory.py
bob/learn/tensorflow/datashuffler/TripletMemory.py
+6
-2
bob/learn/tensorflow/datashuffler/__init__.py
bob/learn/tensorflow/datashuffler/__init__.py
+5
-0
bob/learn/tensorflow/script/train_mnist.py
bob/learn/tensorflow/script/train_mnist.py
+40
-49
bob/learn/tensorflow/trainers/Trainer.py
bob/learn/tensorflow/trainers/Trainer.py
+8
-7
No files found.
bob/learn/tensorflow/datashuffler/Base.py
View file @
b132e782
...
...
@@ -6,6 +6,7 @@
import
numpy
import
tensorflow
as
tf
import
bob.ip.base
import
numpy
class
Base
(
object
):
...
...
@@ -13,9 +14,10 @@ class Base(object):
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
The class provide base function
oalies to shuffle the data
The class provide base function
alities to shuffle the data before to train a neural network
**Parameters**
data:
...
...
@@ -24,12 +26,16 @@ class Base(object):
scale:
train_batch_size:
validation_batch_size:
seed: Seed for the random number generator
"""
self
.
seed
=
seed
numpy
.
random
.
seed
(
seed
)
self
.
scale
=
scale
self
.
scale_value
=
0.00390625
self
.
input_dtype
=
input_dtype
# TODO: Check if the bacth size is higher than the input data
self
.
batch_size
=
batch_size
...
...
bob/learn/tensorflow/datashuffler/Disk.py
View file @
b132e782
...
...
@@ -18,9 +18,11 @@ class Disk(Base):
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with file list
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data:
...
...
@@ -43,13 +45,16 @@ class Disk(Base):
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
batch_size
=
batch_size
,
seed
=
seed
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self
.
bob_shape
=
tuple
([
input_shape
[
2
]]
+
list
(
input_shape
[
0
:
2
]))
def
load_from_file
(
self
,
file_name
,
shape
):
def
load_from_file
(
self
,
file_name
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
d
.
shape
[
0
]
!=
3
and
self
.
input_shape
[
2
]
!=
3
:
# GRAY SCALE IMAGE
data
=
numpy
.
zeros
(
shape
=
(
d
.
shape
[
0
],
d
.
shape
[
1
],
1
))
...
...
@@ -61,7 +66,7 @@ class Disk(Base):
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"#########
Imag
e {0} has noise #########"
.
format
(
file_name
))
logger
.
warning
(
"#########
Sampl
e {0} has noise #########"
.
format
(
file_name
))
return
data
...
...
bob/learn/tensorflow/datashuffler/Memory.py
View file @
b132e782
...
...
@@ -14,9 +14,10 @@ class Memory(Base):
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with memory datasets
This datashuffler deal with databases that are stored in a :py:class`numpy.array`
**Parameters**
data:
...
...
@@ -33,8 +34,11 @@ class Memory(Base):
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
batch_size
=
batch_size
,
seed
=
seed
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
self
.
data
=
self
.
data
.
astype
(
input_dtype
)
if
self
.
scale
:
...
...
bob/learn/tensorflow/datashuffler/Siamese.py
View file @
b132e782
...
...
@@ -10,7 +10,11 @@ import tensorflow as tf
class
Siamese
(
Base
):
"""
Siamese Shuffler base class
This datashuffler deal with databases that are provides data to Siamese networks.
Basically the py:meth:`get_batch` method provides you 3 elements in the returned list.
The first two are the batch data, and the last is the label. Either `0` for samples from the same class or `1`
for samples from different class.
"""
def
__init__
(
self
,
**
kwargs
):
...
...
@@ -32,6 +36,21 @@ class Siamese(Base):
return
[
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
label_placeholder
]
def
get_placeholders_forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
label_placeholder
is
None
:
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
[
None
,
])
return
[
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
label_placeholder
]
def
get_genuine_or_not
(
self
,
input_data
,
input_labels
,
genuine
=
True
):
if
genuine
:
...
...
bob/learn/tensorflow/datashuffler/SiameseDisk.py
View file @
b132e782
...
...
@@ -4,29 +4,20 @@
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
bob.io.base
import
bob.io.image
import
bob.ip.base
import
bob.core
logger
=
bob
.
core
.
log
.
setup
(
"bob.learn.tensorflow"
)
import
tensorflow
as
tf
from
.Disk
import
Disk
from
.Siamese
import
Siamese
from
.BaseDataShuffler
import
BaseDataShuffler
#def scale_mean_norm(data, scale=0.00390625):
# mean = numpy.mean(data)
# data = (data - mean) * scale
# return data, mean
class
TextDataShuffler
(
BaseDataShuffler
):
class
SiameseDisk
(
Siamese
,
Disk
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with file list
...
...
@@ -45,101 +36,22 @@ class TextDataShuffler(BaseDataShuffler):
if
isinstance
(
labels
,
list
):
labels
=
numpy
.
array
(
labels
)
super
(
TextDataShuffler
,
self
).
__init__
(
super
(
SiameseDisk
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
batch_size
=
batch_size
,
seed
=
seed
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self
.
bob_shape
=
tuple
([
input_shape
[
2
]]
+
list
(
input_shape
[
0
:
2
]))
def
load_from_file
(
self
,
file_name
,
shape
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
d
.
shape
[
0
]
!=
3
and
self
.
input_shape
[
2
]
!=
3
:
# GRAY SCALE IMAGE
data
=
numpy
.
zeros
(
shape
=
(
d
.
shape
[
0
],
d
.
shape
[
1
],
1
))
data
[:,
:,
0
]
=
d
data
=
self
.
rescale
(
data
)
else
:
d
=
self
.
rescale
(
d
)
data
=
self
.
bob2skimage
(
d
)
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"######### Image {0} has noise #########"
.
format
(
file_name
))
return
data
def
bob2skimage
(
self
,
bob_image
):
"""
Convert bob color image to the skcit image
"""
skimage
=
numpy
.
zeros
(
shape
=
(
bob_image
.
shape
[
1
],
bob_image
.
shape
[
2
],
3
))
skimage
[:,
:,
0
]
=
bob_image
[
0
,
:,
:]
#Copying red
skimage
[:,
:,
1
]
=
bob_image
[
1
,
:,
:]
#Copying green
skimage
[:,
:,
2
]
=
bob_image
[
2
,
:,
:]
#Copying blue
return
skimage
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
numpy
.
zeros
(
shape
=
self
.
shape
)
for
i
in
range
(
self
.
batch_size
):
file_name
=
self
.
data
[
indexes
[
i
]]
data
=
self
.
load_from_file
(
file_name
,
self
.
shape
)
selected_data
[
i
,
...]
=
data
if
self
.
scale
:
selected_data
[
i
,
...]
*=
self
.
scale_value
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
def
rescale
(
self
,
data
):
"""
Reescale a single sample with input_shape
"""
#if self.input_shape != data.shape:
if
self
.
bob_shape
!=
data
.
shape
:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if
self
.
input_shape
[
2
]
==
1
:
copy
=
data
[:,
:,
0
].
copy
()
dst
=
numpy
.
zeros
(
shape
=
self
.
input_shape
[
0
:
2
])
bob
.
ip
.
base
.
scale
(
copy
,
dst
)
dst
=
numpy
.
reshape
(
dst
,
self
.
input_shape
)
else
:
#dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst
=
numpy
.
zeros
(
shape
=
self
.
bob_shape
)
# TODO: LAME SOLUTION
if
data
.
shape
[
0
]
!=
3
:
# GRAY SCALE IMAGES IN A RGB DATABASE
step_data
=
numpy
.
zeros
(
shape
=
(
3
,
data
.
shape
[
0
],
data
.
shape
[
1
]))
step_data
[
0
,
...]
=
data
[:,
:]
step_data
[
1
,
...]
=
data
[:,
:]
step_data
[
2
,
...]
=
data
[:,
:]
data
=
step_data
bob
.
ip
.
base
.
scale
(
data
,
dst
)
return
dst
else
:
return
data
def
get_pair
(
self
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
...
...
@@ -159,10 +71,7 @@ class TextDataShuffler(BaseDataShuffler):
data
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name
),
self
.
shape
)
data_p
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_p
),
self
.
shape
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
labels_siamese
[
i
]
=
not
genuine
genuine
=
not
genuine
if
self
.
scale
:
...
...
@@ -170,30 +79,3 @@ class TextDataShuffler(BaseDataShuffler):
data_p
*=
self
.
scale_value
return
data
,
data_p
,
labels_siamese
def
get_random_triplet
(
self
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
data_a
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
for
i
in
range
(
self
.
shape
[
0
]):
file_name_a
,
file_name_p
,
file_name_n
=
self
.
get_one_triplet
(
self
.
data
,
self
.
labels
)
data_a
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_a
),
self
.
shape
)
data_p
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_p
),
self
.
shape
)
data_n
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_n
),
self
.
shape
)
if
self
.
scale
:
data_a
*=
self
.
scale_value
data_p
*=
self
.
scale_value
data_n
*=
self
.
scale_value
return
data_a
,
data_p
,
data_n
bob/learn/tensorflow/datashuffler/SiameseMemory.py
View file @
b132e782
...
...
@@ -16,7 +16,8 @@ class SiameseMemory(Siamese, Memory):
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with memory datasets
...
...
@@ -35,8 +36,11 @@ class SiameseMemory(Siamese, Memory):
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
batch_size
=
batch_size
,
seed
=
seed
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
self
.
data
=
self
.
data
.
astype
(
input_dtype
)
if
self
.
scale
:
...
...
bob/learn/tensorflow/datashuffler/Triplet.py
View file @
b132e782
...
...
@@ -5,10 +5,16 @@
import
numpy
from
.Base
import
Base
import
tensorflow
as
tf
class
Triplet
(
Base
):
"""
Triplet Shuffler base class
This datashuffler deal with databases that are provides data to Triplet networks.
Basically the py:meth:`get_batch` method provides you 3 elements in the returned list.
The first element is the batch for the anchor, the second one is the batch for the positive class, w.r.t the
anchor, and the last one is the batch for the negative class , w.r.t the anchor.
"""
def
__init__
(
self
,
**
kwargs
):
...
...
@@ -16,6 +22,36 @@ class Triplet(Base):
self
.
data2_placeholder
=
None
self
.
data3_placeholder
=
None
def
get_placeholders
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_anchor"
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_positive"
)
if
self
.
data3_placeholder
is
None
:
self
.
data3_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_negative"
)
return
[
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
data3_placeholder
]
def
get_placeholders_triplet_forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data3_placeholder
is
None
:
self
.
data3_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
return
[
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
data3_placeholder
]
def
get_one_triplet
(
self
,
input_data
,
input_labels
):
# Getting a pair of clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
...
...
bob/learn/tensorflow/datashuffler/TripletDisk.py
View file @
b132e782
...
...
@@ -12,21 +12,17 @@ logger = bob.core.log.setup("bob.learn.tensorflow")
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
from
.Disk
import
Disk
from
.Triplet
import
Triplet
#def scale_mean_norm(data, scale=0.00390625):
# mean = numpy.mean(data)
# data = (data - mean) * scale
# return data, mean
class
TextDataShuffler
(
BaseDataShuffler
):
class
TripletDisk
(
Triplet
,
Disk
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with file list
...
...
@@ -45,7 +41,7 @@ class TextDataShuffler(BaseDataShuffler):
if
isinstance
(
labels
,
list
):
labels
=
numpy
.
array
(
labels
)
super
(
T
extDataShuffler
,
self
).
__init__
(
super
(
T
ripletDisk
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
...
...
@@ -53,125 +49,13 @@ class TextDataShuffler(BaseDataShuffler):
scale
=
scale
,
batch_size
=
batch_size
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self
.
bob_shape
=
tuple
([
input_shape
[
2
]]
+
list
(
input_shape
[
0
:
2
]))
def
load_from_file
(
self
,
file_name
,
shape
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
d
.
shape
[
0
]
!=
3
and
self
.
input_shape
[
2
]
!=
3
:
# GRAY SCALE IMAGE
data
=
numpy
.
zeros
(
shape
=
(
d
.
shape
[
0
],
d
.
shape
[
1
],
1
))
data
[:,
:,
0
]
=
d
data
=
self
.
rescale
(
data
)
else
:
d
=
self
.
rescale
(
d
)
data
=
self
.
bob2skimage
(
d
)
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"######### Image {0} has noise #########"
.
format
(
file_name
))
return
data
def
bob2skimage
(
self
,
bob_image
):
"""
Convert bob color image to the skcit image
"""
skimage
=
numpy
.
zeros
(
shape
=
(
bob_image
.
shape
[
1
],
bob_image
.
shape
[
2
],
3
))
skimage
[:,
:,
0
]
=
bob_image
[
0
,
:,
:]
#Copying red
skimage
[:,
:,
1
]
=
bob_image
[
1
,
:,
:]
#Copying green
skimage
[:,
:,
2
]
=
bob_image
[
2
,
:,
:]
#Copying blue
return
skimage
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
numpy
.
zeros
(
shape
=
self
.
shape
)
for
i
in
range
(
self
.
batch_size
):
file_name
=
self
.
data
[
indexes
[
i
]]
data
=
self
.
load_from_file
(
file_name
,
self
.
shape
)
selected_data
[
i
,
...]
=
data
if
self
.
scale
:
selected_data
[
i
,
...]
*=
self
.
scale_value
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
def
rescale
(
self
,
data
):
"""
Reescale a single sample with input_shape
"""
#if self.input_shape != data.shape:
if
self
.
bob_shape
!=
data
.
shape
:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if
self
.
input_shape
[
2
]
==
1
:
copy
=
data
[:,
:,
0
].
copy
()
dst
=
numpy
.
zeros
(
shape
=
self
.
input_shape
[
0
:
2
])
bob
.
ip
.
base
.
scale
(
copy
,
dst
)
dst
=
numpy
.
reshape
(
dst
,
self
.
input_shape
)
else
:
#dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst
=
numpy
.
zeros
(
shape
=
self
.
bob_shape
)
# TODO: LAME SOLUTION
if
data
.
shape
[
0
]
!=
3
:
# GRAY SCALE IMAGES IN A RGB DATABASE
step_data
=
numpy
.
zeros
(
shape
=
(
3
,
data
.
shape
[
0
],
data
.
shape
[
1
]))
step_data
[
0
,
...]
=
data
[:,
:]
step_data
[
1
,
...]
=
data
[:,
:]
step_data
[
2
,
...]
=
data
[:,
:]
data
=
step_data
bob
.
ip
.
base
.
scale
(
data
,
dst
)
return
dst
else
:
return
data
def
get_pair
(
self
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
data
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
labels_siamese
=
numpy
.
zeros
(
shape
=
self
.
shape
[
0
],
dtype
=
'float32'
)
genuine
=
True
for
i
in
range
(
self
.
shape
[
0
]):
file_name
,
file_name_p
=
self
.
get_genuine_or_not
(
self
.
data
,
self
.
labels
,
genuine
=
genuine
)
data
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name
),
self
.
shape
)
data_p
[
i
,
...]
=
self
.
load_from_file
(
str
(
file_name_p
),
self
.
shape
)
if
zero_one_labels
:
labels_siamese
[
i
]
=
not
genuine
else
:
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
if
self
.
scale
:
data
*=
self
.
scale_value
data_p
*=
self
.
scale_value
return
data
,
data_p
,
labels_siamese
def
get_random_triplet
(
self
):
"""
Get a random pair of samples
...
...
@@ -196,4 +80,4 @@ class TextDataShuffler(BaseDataShuffler):
data_p
*=
self
.
scale_value
data_n
*=
self
.
scale_value
return
data_a
,
data_p
,
data_n
return
[
data_a
,
data_p
,
data_n
]
bob/learn/tensorflow/datashuffler/TripletMemory.py
View file @
b132e782
...
...
@@ -16,7 +16,8 @@ class TripletMemory(Triplet, Memory):
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
batch_size
=
1
,
seed
=
10
):
"""
Shuffler that deal with memory datasets
...
...
@@ -35,8 +36,11 @@ class TripletMemory(Triplet, Memory):
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
batch_size
=
batch_size
,
seed
=
seed
)
# Seting the seed
numpy
.
random
.
seed
(
seed
)
self
.
data
=
self
.
data
.
astype
(
input_dtype
)
if
self
.
scale
:
...
...
bob/learn/tensorflow/datashuffler/__init__.py
View file @
b132e782
...
...
@@ -5,8 +5,13 @@ __path__ = extend_path(__path__, __name__)
from
.Base
import
Base
from
.Siamese
import
Siamese
from
.Memory
import
Memory
from
.Disk
import
Disk
from
.SiameseMemory
import
SiameseMemory
from
.TripletMemory
import
TripletMemory
from
.SiameseDisk
import
SiameseDisk
from
.TripletDisk
import
TripletDisk
# gets sphinx autodoc done right - don't remove it
__all__
=
[
_
for
_
in
dir
()
if
not
_
.
startswith
(
'_'
)]
bob/learn/tensorflow/script/train_mnist.py
View file @
b132e782
...
...
@@ -22,10 +22,11 @@ from docopt import docopt
import
tensorflow
as
tf
from
..
import
util
SEED
=
10
from
bob.learn.tensorflow.data
import
MemoryDataShuffler
,
TextDataShuffler
from
bob.learn.tensorflow.data
shuffler
import
Memory
,
SiameseMemory
,
TripletMemory
from
bob.learn.tensorflow.network
import
Lenet
,
MLP
,
Dummy
,
Chopra
from
bob.learn.tensorflow.trainers
import
Trainer
from
bob.learn.tensorflow.loss
import
BaseLoss
import
bob.io.base
from
..analyzers
import
ExperimentAnalizer
,
SoftmaxAnalizer
import
numpy
...
...
@@ -42,64 +43,37 @@ def main():
mnist
=
True
# Loading data
if
mnist
:
train_data
,
train_labels
,
validation_data
,
validation_labels
=
\
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
train_data
,
train_labels
,
validation_data
,
validation_labels
=
\
util
.
load_mnist
(
data_dir
=
"./src/bob.db.mnist/bob/db/mnist/"
)
train_data
=
numpy
.
reshape
(
train_data
,
(
train_data
.
shape
[
0
],
28
,
28
,
1
))
validation_data
=
numpy
.
reshape
(
validation_data
,
(
validation_data
.
shape
[
0
],
28
,
28
,
1
))
train_data
=
numpy
.
reshape
(
train_data
,
(
train_data
.
shape
[
0
],
28
,
28
,
1
))
validation_data
=
numpy
.
reshape
(
validation_data
,
(
validation_data
.
shape
[
0
],
28
,
28
,
1
))
# Creating datashufflers
train_data_shuffler
=
Memory
(
train_data
,
train_labels
,
input_shape
=
[
28
,
28
,
1
],
batch_size
=
BATCH_SIZE
)
validation_data_shuffler
=
Memory
(
validation_data
,
validation_labels
,
input_shape
=
[
28
,
28
,
1
],
batch_size
=
VALIDATION_BATCH_SIZE
)
train_data_shuffler
=
MemoryDataShuffler
(
train_data
,
train_labels
,
input_shape
=
[
28
,
28
,
1
],
batch_size
=
BATCH_SIZE
)
validation_data_shuffler
=
MemoryDataShuffler
(
validation_data
,
validation_labels
,
input_shape
=
[
28
,
28
,
1
],
batch_size
=
VALIDATION_BATCH_SIZE
)
else
:
import
bob.db.mobio
db
=
bob
.
db
.
mobio
.
Database
()
# Preparing train set
train_objects
=
db
.
objects
(
protocol
=
"male"
,
groups
=
"world"
)
train_labels
=
[
o
.
client_id
for
o
in
train_objects
]
train_file_names
=
[
o
.
make_path
(
directory
=
"/idiap/user/tpereira/face/baselines/eigenface/preprocessed"
,
extension
=
".hdf5"
)
for
o
in
train_objects
]
train_data_shuffler
=
TextDataShuffler
(
train_file_names
,
train_labels
,
scale
=
False
,
input_shape
=
[
80
,
64
,
1
],
batch_size
=
BATCH_SIZE
)
# Preparing train set
validation_objects
=
db
.
objects
(
protocol
=
"male"
,
groups
=
"dev"
)
validation_labels
=
[
o
.
client_id
for
o
in
validation_objects
]
validation_file_names
=
[
o
.
make_path
(
directory
=
"/idiap/user/tpereira/face/baselines/eigenface/preprocessed"
,
extension
=
".hdf5"
)
for
o
in
validation_objects
]
validation_data_shuffler
=
TextDataShuffler
(
validation_file_names
,
validation_labels
,
input_shape
=
[
80
,
64
,
1
],
scale
=
False
,
batch_size
=
VALIDATION_BATCH_SIZE
)
# Preparing the architecture
cnn
=
True
if
cnn
:
architecture
=
Chopra
(
seed
=
SEED
)
architecture
=
Chopra
(
seed
=
SEED
,
fc1_output
=
10
)
#architecture = Lenet(seed=SEED)
#architecture = Dummy(seed=SEED)