Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.learn.tensorflow
Commits
f63f7274
Commit
f63f7274
authored
Mar 08, 2017
by
Tiago de Freitas Pereira
Browse files
Fisher loss
parent
36c835c0
Pipeline
#7688
failed with stages
in 4 minutes and 21 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/learn/tensorflow/datashuffler/TripletWithSelectionMemory.py
View file @
f63f7274
...
...
@@ -9,8 +9,11 @@ import tensorflow as tf
from
.OnlineSampling
import
OnlineSampling
from
.Memory
import
Memory
from
.Triplet
import
Triplet
from
scipy.spatial.distance
import
euclidean
from
bob.learn.tensorflow.datashuffler.Normalizer
import
Linear
from
scipy.spatial.distance
import
euclidean
,
cdist
import
logging
logger
=
logging
.
getLogger
(
"bob.learn"
)
class
TripletWithSelectionMemory
(
Triplet
,
Memory
,
OnlineSampling
):
...
...
@@ -61,7 +64,6 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
def
__init__
(
self
,
data
,
labels
,
input_shape
,
input_dtype
=
"float64"
,
scale
=
True
,
batch_size
=
1
,
seed
=
10
,
data_augmentation
=
None
,
...
...
@@ -73,7 +75,6 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
labels
=
labels
,
input_shape
=
input_shape
,
input_dtype
=
input_dtype
,
scale
=
scale
,
batch_size
=
batch_size
,
seed
=
seed
,
data_augmentation
=
data_augmentation
,
...
...
@@ -86,26 +87,7 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
self
.
data
=
self
.
data
.
astype
(
input_dtype
)
self
.
total_identities
=
total_identities
self
.
first_batch
=
True
def
get_random_batch
(
self
):
"""
Get a random triplet
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
data_a
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
for
i
in
range
(
self
.
shape
[
0
]):
data_a
[
i
,
...],
data_p
[
i
,
...],
data_n
[
i
,
...]
=
self
.
get_one_triplet
(
self
.
data
,
self
.
labels
)
return
[
data_a
,
data_p
,
data_n
]
self
.
batch_increase_factor
=
4
def
get_batch
(
self
):
"""
...
...
@@ -117,54 +99,27 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
**Return**
"""
if
self
.
first_batch
:
self
.
first_batch
=
False
return
self
.
get_random_batch
()
# Selecting the classes used in the selection
indexes
=
numpy
.
random
.
choice
(
len
(
self
.
possible_labels
),
self
.
total_identities
,
replace
=
False
)
samples_per_identity
=
self
.
batch_size
/
self
.
total_identities
anchor_labels
=
numpy
.
ones
(
samples_per_identity
)
*
indexes
[
0
]
samples_per_identity
=
numpy
.
ceil
(
self
.
batch_size
/
float
(
self
.
total_identities
))
anchor_labels
=
numpy
.
ones
(
samples_per_identity
)
*
self
.
possible_labels
[
indexes
[
0
]]
for
i
in
range
(
1
,
self
.
total_identities
):
anchor_labels
=
numpy
.
hstack
((
anchor_labels
,
numpy
.
ones
(
samples_per_identity
)
*
indexes
[
i
]))
anchor_labels
=
numpy
.
hstack
((
anchor_labels
,
numpy
.
ones
(
samples_per_identity
)
*
self
.
possible_labels
[
indexes
[
i
]
]
))
anchor_labels
=
anchor_labels
[
0
:
self
.
batch_size
]
data_a
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
data_n
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
# Fetching the anchors
for
i
in
range
(
self
.
shape
[
0
]):
data_a
[
i
,
...]
=
self
.
get_anchor
(
anchor_labels
[
i
])
features_a
=
self
.
project
(
data_a
)
samples_a
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
# Computing the embedding
for
i
in
range
(
self
.
shape
[
0
]):
label
=
anchor_labels
[
i
]
#anchor = self.get_anchor(label)
positive
,
distance_anchor_positive
=
self
.
get_positive
(
label
,
features_a
[
i
])
negative
=
self
.
get_negative
(
label
,
features_a
[
i
],
distance_anchor_positive
)
data_p
[
i
,
...]
=
positive
data_n
[
i
,
...]
=
negative
# Applying the data augmentation
if
self
.
data_augmentation
is
not
None
:
for
i
in
range
(
data_a
.
shape
[
0
]):
d
=
self
.
bob2skimage
(
self
.
data_augmentation
(
self
.
skimage2bob
(
data_a
[
i
,
...])))
data_a
[
i
,
...]
=
d
d
=
self
.
bob2skimage
(
self
.
data_augmentation
(
self
.
skimage2bob
(
data_p
[
i
,
...])))
data_p
[
i
,
...]
=
d
d
=
self
.
bob2skimage
(
self
.
data_augmentation
(
self
.
skimage2bob
(
data_n
[
i
,
...])))
data_n
[
i
,
...]
=
d
samples_a
[
i
,
...]
=
self
.
get_anchor
(
anchor_labels
[
i
])
embedding_a
=
self
.
project
(
samples_a
)
# Scaling
data_a
=
self
.
normalize_sample
(
data_a
)
data_p
=
self
.
normalize_sample
(
data_p
)
data_n
=
self
.
normalize_sample
(
data_n
)
# Getting the positives
samples_p
,
embedding_p
,
d_anchor_positive
=
self
.
get_positives
(
anchor_labels
,
embedding_a
)
samples_n
=
self
.
get_negative
(
anchor_labels
,
embedding_a
,
d_anchor_positive
)
return
data_a
,
data_p
,
data
_n
return
samples_a
,
samples_p
,
samples
_n
def
get_anchor
(
self
,
label
):
"""
...
...
@@ -175,61 +130,67 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
indexes
=
numpy
.
where
(
self
.
labels
==
label
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
return
self
.
data
[
indexes
[
0
],
...]
return
self
.
normalize_sample
(
self
.
data
[
indexes
[
0
],
...]
)
def
get_positive
(
self
,
label
,
anchor_feature
):
def
get_positive
s
(
self
,
anchor_
label
s
,
embedding_a
):
"""
Get the best positive sample given the anchor.
The best positive sample for the anchor is the farthest from the anchor
Get the a random set of positive pairs
"""
samples_p
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
for
i
in
range
(
self
.
shape
[
0
]):
l
=
anchor_labels
[
i
]
indexes
=
numpy
.
where
(
self
.
labels
==
l
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
samples_p
[
i
,
...]
=
self
.
normalize_sample
(
self
.
data
[
indexes
[
0
],
...])
# Projecting the anchor
#anchor_feature = self.feature_extractor(self.reshape_for_deploy(anchor), session=self.session)
indexes
=
numpy
.
where
(
self
.
labels
==
label
)[
0
]
numpy
.
random
.
shuffle
(
indexes
)
indexes
=
indexes
[
0
:
self
.
batch_size
]
# Limiting to the batch size, otherwise the number of comparisons will explode
distances
=
[]
positive_features
=
self
.
project
(
self
.
data
[
indexes
,
...])
embedding_p
=
self
.
project
(
samples_p
)
# Projecting the positive instances
for
p
in
positive_features
:
distances
.
append
(
euclidean
(
anchor_feature
,
p
))
# Computing the distances
d_anchor_positive
=
[]
for
i
in
range
(
self
.
shape
[
0
]):
d_anchor_positive
.
append
(
euclidean
(
embedding_a
[
i
,
:],
embedding_p
[
i
,
:]))
# Geting the max
index
=
numpy
.
argmax
(
distances
)
return
self
.
data
[
indexes
[
index
],
...],
distances
[
index
]
return
samples_p
,
embedding_p
,
d_anchor_positive
def
get_negative
(
self
,
label
,
anchor_feature
,
distance
_anchor_positive
):
def
get_negative
(
self
,
anchor_labels
,
embedding_a
,
d
_anchor_positive
):
"""
Get the
best negative sample for a pair anchor-posi
tive
Get the
the semi-hard nega
tive
"""
# Projecting the anchor
#anchor_feature = self.feature_extractor(self.reshape_for_deploy(anchor), session=self.session)
# S
electing the negative samples
indexes
=
numpy
.
where
(
self
.
labels
!=
label
)[
0
]
# S
huffling all the dataset
indexes
=
range
(
len
(
self
.
labels
))
numpy
.
random
.
shuffle
(
indexes
)
indexes
=
indexes
[
0
:
self
.
batch_size
]
# Limiting to the batch size, otherwise the number of comparisons will explode
negative_features
=
self
.
project
(
self
.
data
[
indexes
,
...])
distances
=
[]
for
n
in
negative_features
:
d
=
euclidean
(
anchor_feature
,
n
)
negative_samples_search
=
self
.
batch_size
*
self
.
batch_increase_factor
# Limiting to the batch size, otherwise the number of comparisons will explode
indexes
=
indexes
[
0
:
negative_samples_search
]
# Loading samples for the semi-hard search
shape
=
tuple
([
len
(
indexes
)]
+
list
(
self
.
shape
[
1
:]))
temp_samples_n
=
numpy
.
zeros
(
shape
=
shape
,
dtype
=
'float32'
)
samples_n
=
numpy
.
zeros
(
shape
=
self
.
shape
,
dtype
=
'float32'
)
for
i
in
range
(
shape
[
0
]):
temp_samples_n
[
i
,
...]
=
self
.
normalize_sample
(
self
.
data
[
indexes
[
i
],
...])
# Computing all the embeddings
embedding_temp_n
=
self
.
project
(
temp_samples_n
)
# Semi-hard samples criteria
if
d
>
distance_anchor_positive
:
distances
.
append
(
d
)
else
:
distances
.
append
(
numpy
.
inf
)
# Computing the distances
d_anchor_negative
=
cdist
(
embedding_a
,
embedding_temp_n
,
metric
=
'euclidean'
)
# Selecting the negative samples
for
i
in
range
(
self
.
shape
[
0
]):
label
=
anchor_labels
[
i
]
possible_candidates
=
[
d
if
d
>
d_anchor_positive
[
i
]
else
numpy
.
inf
for
d
in
d_anchor_negative
[
i
]]
# Getting the minimum negative sample as the reference for the pair
index
=
numpy
.
argmin
(
distances
)
for
j
in
numpy
.
argsort
(
possible_candidates
):
# if the semi-hardest is inf take the first
if
numpy
.
isinf
(
distances
[
index
]):
index
=
0
# Checking if they don't have the same label
if
self
.
labels
[
indexes
[
j
]]
!=
label
:
samples_n
[
i
,
...]
=
temp_samples_n
[
j
,
...]
if
numpy
.
isinf
(
possible_candidates
[
j
]):
logger
.
info
(
"SEMI-HARD negative not found, took the first one"
)
break
return
s
elf
.
data
[
indexes
[
index
],
...]
return
s
amples_n
bob/learn/tensorflow/loss/TripletFisherLoss.py
View file @
f63f7274
...
...
@@ -15,8 +15,8 @@ class TripletFisherLoss(BaseLoss):
"""
"""
def
__init__
(
self
,
margin
=
0.2
):
self
.
margin
=
margin
def
__init__
(
self
):
pass
def
__call__
(
self
,
anchor_embedding
,
positive_embedding
,
negative_embedding
):
...
...
@@ -31,6 +31,7 @@ class TripletFisherLoss(BaseLoss):
tf
.
reduce_mean
(
negative_embedding
,
axis
=
0
)),
2
)
length
=
anchor_embedding
.
get_shape
().
as_list
()[
0
]
dim
=
anchor_embedding
.
get_shape
().
as_list
()[
1
]
split_positive
=
tf
.
unstack
(
positive_embedding
,
num
=
length
,
axis
=
0
)
split_negative
=
tf
.
unstack
(
negative_embedding
,
num
=
length
,
axis
=
0
)
...
...
@@ -40,11 +41,11 @@ class TripletFisherLoss(BaseLoss):
positive
=
s
[
0
]
negative
=
s
[
1
]
buffer_sw
=
tf
.
reshape
(
tf
.
subtract
(
positive
,
average_class
),
shape
=
(
2
,
1
))
buffer_sw
=
tf
.
matmul
(
buffer_sw
,
tf
.
reshape
(
buffer_sw
,
shape
=
(
1
,
2
)))
buffer_sw
=
tf
.
reshape
(
tf
.
subtract
(
positive
,
average_class
),
shape
=
(
dim
,
1
))
buffer_sw
=
tf
.
matmul
(
buffer_sw
,
tf
.
reshape
(
buffer_sw
,
shape
=
(
1
,
dim
)))
buffer_sb
=
tf
.
reshape
(
tf
.
subtract
(
negative
,
average_total
),
shape
=
(
2
,
1
))
buffer_sb
=
tf
.
matmul
(
buffer_sb
,
tf
.
reshape
(
buffer_sb
,
shape
=
(
1
,
2
)))
buffer_sb
=
tf
.
reshape
(
tf
.
subtract
(
negative
,
average_total
),
shape
=
(
dim
,
1
))
buffer_sb
=
tf
.
matmul
(
buffer_sb
,
tf
.
reshape
(
buffer_sb
,
shape
=
(
1
,
dim
)))
if
Sw
is
None
:
Sw
=
buffer_sw
...
...
bob/learn/tensorflow/network/VGG16.py
View file @
f63f7274
...
...
@@ -225,17 +225,22 @@ class VGG16(SequenceNetwork):
))
self
.
add
(
MaxPooling
(
name
=
"pooling5"
,
strides
=
[
1
,
2
,
2
,
1
]))
self
.
add
(
FullyConnected
(
name
=
"fc6"
,
output_dim
=
fc6_output
,
activation
=
tf
.
nn
.
relu
,
weights_initialization
=
Xavier
(
seed
=
seed
,
use_gpu
=
self
.
use_gpu
),
bias_initialization
=
Constant
(
use_gpu
=
self
.
use_gpu
)
))
self
.
add
(
Dropout
(
name
=
"dropout"
,
keep_prob
=
0.5
))
self
.
add
(
FullyConnected
(
name
=
"fc7"
,
output_dim
=
fc7_output
,
activation
=
tf
.
nn
.
relu
,
weights_initialization
=
Xavier
(
seed
=
seed
,
use_gpu
=
self
.
use_gpu
),
bias_initialization
=
Constant
(
use_gpu
=
self
.
use_gpu
)
))
self
.
add
(
Dropout
(
name
=
"dropout"
,
keep_prob
=
0.5
))
self
.
add
(
FullyConnected
(
name
=
"fc8"
,
output_dim
=
n_classes
,
activation
=
None
,
...
...
bob/learn/tensorflow/script/train.py
View file @
f63f7274
...
...
@@ -43,7 +43,7 @@ def main():
prefetch
=
PREFETCH
,
learning_rate
=
config
.
learning_rate
,
temp_dir
=
OUTPUT_DIR
,
snapshot
=
10
,
snapshot
=
10
0
,
model_from_file
=
PRETRAINED_NET
,
use_gpu
=
USE_GPU
)
...
...
bob/learn/tensorflow/utils/session.py
View file @
f63f7274
...
...
@@ -11,8 +11,9 @@ from tensorflow.python import debug as tf_debug
class
Session
(
object
):
def
__init__
(
self
):
config
=
tf
.
ConfigProto
(
log_device_placement
=
True
,
gpu_options
=
tf
.
GPUOptions
(
per_process_gpu_memory_fraction
=
0.333
))
config
=
tf
.
ConfigProto
(
log_device_placement
=
False
,
allow_soft_placement
=
True
,
gpu_options
=
tf
.
GPUOptions
(
per_process_gpu_memory_fraction
=
0.5
))
config
.
gpu_options
.
allow_growth
=
True
self
.
session
=
tf
.
Session
()
#self.session = tf_debug.LocalCLIDebugWrapperSession(self.session)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment