Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
bob.learn.tensorflow
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
11
Issues
11
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
bob
bob.learn.tensorflow
Commits
ee0db09e
Commit
ee0db09e
authored
Oct 12, 2016
by
Tiago de Freitas Pereira
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Redesiging the datashuffler
parent
be59f56f
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
651 additions
and
56 deletions
+651
-56
bob/learn/tensorflow/datashuffler/Base.py
bob/learn/tensorflow/datashuffler/Base.py
+119
-0
bob/learn/tensorflow/datashuffler/Disk.py
bob/learn/tensorflow/datashuffler/Disk.py
+86
-0
bob/learn/tensorflow/datashuffler/Memory.py
bob/learn/tensorflow/datashuffler/Memory.py
+52
-0
bob/learn/tensorflow/datashuffler/Siamese.py
bob/learn/tensorflow/datashuffler/Siamese.py
+66
-0
bob/learn/tensorflow/datashuffler/SiameseDisk.py
bob/learn/tensorflow/datashuffler/SiameseDisk.py
+7
-0
bob/learn/tensorflow/datashuffler/SiameseMemory.py
bob/learn/tensorflow/datashuffler/SiameseMemory.py
+7
-43
bob/learn/tensorflow/datashuffler/Triplet.py
bob/learn/tensorflow/datashuffler/Triplet.py
+38
-0
bob/learn/tensorflow/datashuffler/TripletDisk.py
bob/learn/tensorflow/datashuffler/TripletDisk.py
+199
-0
bob/learn/tensorflow/datashuffler/TripletMemory.py
bob/learn/tensorflow/datashuffler/TripletMemory.py
+62
-0
bob/learn/tensorflow/datashuffler/__init__.py
bob/learn/tensorflow/datashuffler/__init__.py
+5
-3
bob/learn/tensorflow/script/train_mobio.py
bob/learn/tensorflow/script/train_mobio.py
+10
-10
No files found.
bob/learn/tensorflow/data
/BaseDataShuffler
.py
→
bob/learn/tensorflow/data
shuffler/Base
.py
View file @
ee0db09e
...
...
@@ -5,9 +5,10 @@
import
numpy
import
tensorflow
as
tf
import
bob.ip.base
class
Base
DataShuffler
(
object
):
class
Base
(
object
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
...
...
@@ -36,7 +37,6 @@ class BaseDataShuffler(object):
self
.
shape
=
tuple
([
batch_size
]
+
input_shape
)
self
.
input_shape
=
tuple
(
input_shape
)
self
.
labels
=
labels
self
.
possible_labels
=
list
(
set
(
self
.
labels
))
...
...
@@ -47,51 +47,9 @@ class BaseDataShuffler(object):
self
.
indexes
=
numpy
.
array
(
range
(
self
.
n_samples
))
numpy
.
random
.
shuffle
(
self
.
indexes
)
# TODO: Reorganize the datas hufflers for siamese and triplets
self
.
data_placeholder
=
None
self
.
data2_placeholder
=
None
self
.
data3_placeholder
=
None
self
.
label_placeholder
=
None
def
get_placeholders_forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
[
None
,
])
return
self
.
data_placeholder
,
self
.
label_placeholder
def
get_placeholders_pair_forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
label_placeholder
is
None
:
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
[
None
,
])
return
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
label_placeholder
def
get_placeholders_triplet_forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
if
self
.
data3_placeholder
is
None
:
self
.
data3_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
return
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
data3_placeholder
def
get_placeholders
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
...
...
@@ -103,91 +61,59 @@ class BaseDataShuffler(object):
if
self
.
label_placeholder
is
None
:
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
self
.
shape
[
0
])
return
self
.
data_placeholder
,
self
.
label_placeholder
return
[
self
.
data_placeholder
,
self
.
label_placeholder
]
def
get_placeholders_
pair
(
self
,
name
=
""
):
def
get_placeholders_
forprefetch
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_right"
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_left"
)
if
self
.
label_placeholder
is
None
:
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
self
.
shape
[
0
],
name
=
name
+
"_label"
)
return
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
label_placeholder
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
tuple
([
None
]
+
list
(
self
.
shape
[
1
:])),
name
=
name
)
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
[
None
,
])
return
[
self
.
data_placeholder
,
self
.
label_placeholder
]
def
get_placeholders_triplet
(
self
,
name
=
""
):
def
bob2skimage
(
self
,
bob_image
):
"""
Returns a place holder with the size of your batch
Convert bob color image to the skcit image
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_anchor"
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_positive"
)
if
self
.
data3_placeholder
is
None
:
self
.
data3_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_negative"
)
skimage
=
numpy
.
zeros
(
shape
=
(
bob_image
.
shape
[
1
],
bob_image
.
shape
[
2
],
3
))
return
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
data3_placeholder
skimage
[:,
:,
0
]
=
bob_image
[
0
,
:,
:]
# Copying red
skimage
[:,
:,
1
]
=
bob_image
[
1
,
:,
:]
# Copying green
skimage
[:,
:,
2
]
=
bob_image
[
2
,
:,
:]
# Copying blue
def
get_genuine_or_not
(
self
,
input_data
,
input_labels
,
genuine
=
True
):
return
skimage
if
genuine
:
# Getting a client
index
=
numpy
.
random
.
randint
(
len
(
self
.
possible_labels
))
index
=
int
(
self
.
possible_labels
[
index
])
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes
[
1
],
...]
def
rescale
(
self
,
data
):
"""
Reescale a single sample with input_shape
"""
# if self.input_shape != data.shape:
if
self
.
bob_shape
!=
data
.
shape
:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if
self
.
input_shape
[
2
]
==
1
:
copy
=
data
[:,
:,
0
].
copy
()
dst
=
numpy
.
zeros
(
shape
=
self
.
input_shape
[
0
:
2
])
bob
.
ip
.
base
.
scale
(
copy
,
dst
)
dst
=
numpy
.
reshape
(
dst
,
self
.
input_shape
)
else
:
# dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst
=
numpy
.
zeros
(
shape
=
self
.
bob_shape
)
# TODO: LAME SOLUTION
if
data
.
shape
[
0
]
!=
3
:
# GRAY SCALE IMAGES IN A RGB DATABASE
step_data
=
numpy
.
zeros
(
shape
=
(
3
,
data
.
shape
[
0
],
data
.
shape
[
1
]))
step_data
[
0
,
...]
=
data
[:,
:]
step_data
[
1
,
...]
=
data
[:,
:]
step_data
[
2
,
...]
=
data
[:,
:]
data
=
step_data
bob
.
ip
.
base
.
scale
(
data
,
dst
)
return
dst
else
:
# Picking a pair of labels from different clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
index
[
0
]
=
self
.
possible_labels
[
int
(
index
[
0
])]
index
[
1
]
=
self
.
possible_labels
[
int
(
index
[
1
])]
# Getting the indexes of the two clients
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
indexes_p
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
numpy
.
random
.
shuffle
(
indexes_p
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes_p
[
0
],
...]
return
data
,
data_p
def
get_one_triplet
(
self
,
input_data
,
input_labels
):
# Getting a pair of clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
index
[
0
]
=
self
.
possible_labels
[
index
[
0
]]
index
[
1
]
=
self
.
possible_labels
[
index
[
1
]]
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a positive pair
data_anchor
=
input_data
[
indexes
[
0
],
...]
data_positive
=
input_data
[
indexes
[
1
],
...]
# Picking a negative sample
indexes
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
data_negative
=
input_data
[
indexes
[
0
],
...]
return
data_anchor
,
data_positive
,
data_negative
return
data
bob/learn/tensorflow/datashuffler/Disk.py
0 → 100644
View file @
ee0db09e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
bob.io.base
import
bob.io.image
import
bob.ip.base
import
bob.core
from
.Base
import
Base
logger
=
bob
.
core
.
log
.
setup
(
"bob.learn.tensorflow"
)
class
Disk
(
Base
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if
isinstance
(
data
,
list
):
data
=
numpy
.
array
(
data
)
if
isinstance
(
labels
,
list
):
labels
=
numpy
.
array
(
labels
)
super
(
Disk
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self
.
bob_shape
=
tuple
([
input_shape
[
2
]]
+
list
(
input_shape
[
0
:
2
]))
def
load_from_file
(
self
,
file_name
,
shape
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
d
.
shape
[
0
]
!=
3
and
self
.
input_shape
[
2
]
!=
3
:
# GRAY SCALE IMAGE
data
=
numpy
.
zeros
(
shape
=
(
d
.
shape
[
0
],
d
.
shape
[
1
],
1
))
data
[:,
:,
0
]
=
d
data
=
self
.
rescale
(
data
)
else
:
d
=
self
.
rescale
(
d
)
data
=
self
.
bob2skimage
(
d
)
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"######### Image {0} has noise #########"
.
format
(
file_name
))
return
data
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
numpy
.
zeros
(
shape
=
self
.
shape
)
for
i
in
range
(
self
.
batch_size
):
file_name
=
self
.
data
[
indexes
[
i
]]
data
=
self
.
load_from_file
(
file_name
,
self
.
shape
)
selected_data
[
i
,
...]
=
data
if
self
.
scale
:
selected_data
[
i
,
...]
*=
self
.
scale_value
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
[
selected_data
.
astype
(
"float32"
),
selected_labels
.
astype
(
"int64"
)]
bob/learn/tensorflow/datashuffler/Memory.py
0 → 100644
View file @
ee0db09e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
from
.Base
import
Base
import
tensorflow
as
tf
class
Memory
(
Base
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super
(
Memory
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
)
self
.
data
=
self
.
data
.
astype
(
input_dtype
)
if
self
.
scale
:
self
.
data
*=
self
.
scale_value
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
self
.
data
[
indexes
[
0
:
self
.
batch_size
],
:,
:,
:]
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
[
selected_data
.
astype
(
"float32"
),
selected_labels
.
astype
(
"int64"
)]
bob/learn/tensorflow/datashuffler/Siamese.py
0 → 100644
View file @
ee0db09e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
from
.Base
import
Base
import
tensorflow
as
tf
class
Siamese
(
Base
):
"""
Siamese Shuffler base class
"""
def
__init__
(
self
,
**
kwargs
):
super
(
Siamese
,
self
).
__init__
(
**
kwargs
)
self
.
data2_placeholder
=
None
def
get_placeholders
(
self
,
name
=
""
):
"""
Returns a place holder with the size of your batch
"""
if
self
.
data_placeholder
is
None
:
self
.
data_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_right"
)
if
self
.
data2_placeholder
is
None
:
self
.
data2_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
self
.
shape
,
name
=
name
+
"_left"
)
if
self
.
label_placeholder
is
None
:
self
.
label_placeholder
=
tf
.
placeholder
(
tf
.
int64
,
shape
=
self
.
shape
[
0
],
name
=
name
+
"_label"
)
return
[
self
.
data_placeholder
,
self
.
data2_placeholder
,
self
.
label_placeholder
]
def
get_genuine_or_not
(
self
,
input_data
,
input_labels
,
genuine
=
True
):
if
genuine
:
# Getting a client
index
=
numpy
.
random
.
randint
(
len
(
self
.
possible_labels
))
index
=
int
(
self
.
possible_labels
[
index
])
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes
[
1
],
...]
else
:
# Picking a pair of labels from different clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
index
[
0
]
=
self
.
possible_labels
[
int
(
index
[
0
])]
index
[
1
]
=
self
.
possible_labels
[
int
(
index
[
1
])]
# Getting the indexes of the two clients
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
indexes_p
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
numpy
.
random
.
shuffle
(
indexes_p
)
# Picking a pair
data
=
input_data
[
indexes
[
0
],
...]
data_p
=
input_data
[
indexes_p
[
0
],
...]
return
data
,
data_p
bob/learn/tensorflow/data
/TextDataShuffler
.py
→
bob/learn/tensorflow/data
shuffler/SiameseDisk
.py
View file @
ee0db09e
...
...
@@ -7,6 +7,9 @@ import numpy
import
bob.io.base
import
bob.io.image
import
bob.ip.base
import
bob.core
logger
=
bob
.
core
.
log
.
setup
(
"bob.learn.tensorflow"
)
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
...
...
@@ -64,6 +67,10 @@ class TextDataShuffler(BaseDataShuffler):
d
=
self
.
rescale
(
d
)
data
=
self
.
bob2skimage
(
d
)
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"######### Image {0} has noise #########"
.
format
(
file_name
))
return
data
def
bob2skimage
(
self
,
bob_image
):
...
...
bob/learn/tensorflow/data
/MemoryDataShuffler
.py
→
bob/learn/tensorflow/data
shuffler/SiameseMemory
.py
View file @
ee0db09e
...
...
@@ -4,18 +4,13 @@
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
def
scale_mean_norm
(
data
,
scale
=
0.00390625
):
mean
=
numpy
.
mean
(
data
)
data
=
(
data
-
mean
)
*
scale
return
data
,
mean
from
.Memory
import
Memory
from
.Siamese
import
Siamese
import
tensorflow
as
tf
class
MemoryDataShuffler
(
BaseDataShuffler
):
class
SiameseMemory
(
Siamese
,
Memory
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
...
...
@@ -34,7 +29,7 @@ class MemoryDataShuffler(BaseDataShuffler):
validation_batch_size:
"""
super
(
MemoryDataShuffler
,
self
).
__init__
(
super
(
SiameseMemory
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
...
...
@@ -47,18 +42,7 @@ class MemoryDataShuffler(BaseDataShuffler):
if
self
.
scale
:
self
.
data
*=
self
.
scale_value
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
self
.
data
[
indexes
[
0
:
self
.
batch_size
],
:,
:,
:]
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
selected_data
,
selected_labels
.
astype
(
"int64"
)
def
get_pair
(
self
,
zero_one_labels
=
True
):
def
get_batch
(
self
,
zero_one_labels
=
True
):
"""
Get a random pair of samples
...
...
@@ -80,24 +64,4 @@ class MemoryDataShuffler(BaseDataShuffler):
labels_siamese
[
i
]
=
-
1
if
genuine
else
+
1
genuine
=
not
genuine
return
data
,
data_p
,
labels_siamese
def
get_random_triplet
(
self
):
"""
Get a random triplet
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
data_a
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
for
i
in
range
(
self
.
shape
[
0
]):
data_a
[
i
,
...],
data_p
[
i
,
...],
data_n
[
i
,
...]
=
self
.
get_one_triplet
(
self
.
data
,
self
.
labels
)
return
data_a
,
data_p
,
data_n
return
[
data
,
data_p
,
labels_siamese
]
bob/learn/tensorflow/datashuffler/Triplet.py
0 → 100644
View file @
ee0db09e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
from
.Base
import
Base
class
Triplet
(
Base
):
"""
Triplet Shuffler base class
"""
def
__init__
(
self
,
**
kwargs
):
super
(
Triplet
,
self
).
__init__
(
**
kwargs
)
self
.
data2_placeholder
=
None
self
.
data3_placeholder
=
None
def
get_one_triplet
(
self
,
input_data
,
input_labels
):
# Getting a pair of clients
index
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
2
,
replace
=
False
)
index
[
0
]
=
self
.
possible_labels
[
index
[
0
]]
index
[
1
]
=
self
.
possible_labels
[
index
[
1
]]
# Getting the indexes of the data from a particular client
indexes
=
numpy
.
where
(
input_labels
==
index
[
0
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
# Picking a positive pair
data_anchor
=
input_data
[
indexes
[
0
],
...]
data_positive
=
input_data
[
indexes
[
1
],
...]
# Picking a negative sample
indexes
=
numpy
.
where
(
input_labels
==
index
[
1
])[
0
]
numpy
.
random
.
shuffle
(
indexes
)
data_negative
=
input_data
[
indexes
[
0
],
...]
return
data_anchor
,
data_positive
,
data_negative
bob/learn/tensorflow/datashuffler/TripletDisk.py
0 → 100644
View file @
ee0db09e
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import
numpy
import
bob.io.base
import
bob.io.image
import
bob.ip.base
import
bob.core
logger
=
bob
.
core
.
log
.
setup
(
"bob.learn.tensorflow"
)
import
tensorflow
as
tf
from
.BaseDataShuffler
import
BaseDataShuffler
#def scale_mean_norm(data, scale=0.00390625):
# mean = numpy.mean(data)
# data = (data - mean) * scale
# return data, mean
class
TextDataShuffler
(
BaseDataShuffler
):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if
isinstance
(
data
,
list
):
data
=
numpy
.
array
(
data
)
if
isinstance
(
labels
,
list
):
labels
=
numpy
.
array
(
labels
)
super
(
TextDataShuffler
,
self
).
__init__
(
data
=
data
,
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self
.
bob_shape
=
tuple
([
input_shape
[
2
]]
+
list
(
input_shape
[
0
:
2
]))
def
load_from_file
(
self
,
file_name
,
shape
):
d
=
bob
.
io
.
base
.
load
(
file_name
)
if
d
.
shape
[
0
]
!=
3
and
self
.
input_shape
[
2
]
!=
3
:
# GRAY SCALE IMAGE
data
=
numpy
.
zeros
(
shape
=
(
d
.
shape
[
0
],
d
.
shape
[
1
],
1
))
data
[:,
:,
0
]
=
d
data
=
self
.
rescale
(
data
)
else
:
d
=
self
.
rescale
(
d
)
data
=
self
.
bob2skimage
(
d
)
# Checking NaN
if
numpy
.
sum
(
numpy
.
isnan
(
data
))
>
0
:
logger
.
warning
(
"######### Image {0} has noise #########"
.
format
(
file_name
))
return
data
def
bob2skimage
(
self
,
bob_image
):
"""
Convert bob color image to the skcit image
"""
skimage
=
numpy
.
zeros
(
shape
=
(
bob_image
.
shape
[
1
],
bob_image
.
shape
[
2
],
3
))
skimage
[:,
:,
0
]
=
bob_image
[
0
,
:,
:]
#Copying red
skimage
[:,
:,
1
]
=
bob_image
[
1
,
:,
:]
#Copying green
skimage
[:,
:,
2
]
=
bob_image
[
2
,
:,
:]
#Copying blue
return
skimage
def
get_batch
(
self
):
# Shuffling samples
indexes
=
numpy
.
array
(
range
(
self
.
data
.
shape
[
0
]))
numpy
.
random
.
shuffle
(
indexes
)
selected_data
=
numpy
.
zeros
(
shape
=
self
.
shape
)
for
i
in
range
(
self
.
batch_size
):
file_name
=
self
.
data
[
indexes
[
i
]]
data
=
self
.
load_from_file
(
file_name
,
self
.
shape
)
selected_data
[
i
,
...]
=
data
if
self
.
scale
:
selected_data
[
i
,
...]
*=
self
.
scale_value
selected_labels
=
self
.
labels
[
indexes
[
0
:
self
.
batch_size
]]
return
selected_data
.
astype
(
"float32"
),
selected_labels
def
rescale
(
self
,
data
):
"""
Reescale a single sample with input_shape
"""
#if self.input_shape != data.shape:
if
self
.
bob_shape
!=
data
.
shape
:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if
self
.
input_shape
[
2
]
==
1
:
copy
=
data
[:,
:,
0
].
copy
()
dst
=
numpy
.
zeros
(
shape
=
self
.
input_shape
[
0
:
2
])
bob
.
ip
.
base
.
scale
(
copy
,
dst
)
dst
=
numpy
.
reshape
(
dst
,
self
.
input_shape
)
else
:
#dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst
=
numpy
.
zeros
(
shape
=
self
.
bob_shape
)
# TODO: LAME SOLUTION
if
data
.
shape
[
0
]
!=
3
:
# GRAY SCALE IMAGES IN A RGB DATABASE
step_data
=
numpy
.
zeros
(
shape
=
(
3
,
data
.
shape
[
0
],
data
.
shape
[
1
]))
step_data
[
0
,
...]
=
data
[:,
:]
step_data
[
1
,
...]
=
data
[:,
:]
step_data
[
2
,
...]
=
data
[:,
:]