Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
bob.learn.tensorflow
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
bob
bob.learn.tensorflow
Commits
44e40482
Commit
44e40482
authored
6 years ago
by
Amir MOHAMMADI
Browse files
Options
Downloads
Patches
Plain Diff
Implement model saving in bob tf eval. Fixes
#54
parent
73aa5774
No related branches found
No related tags found
1 merge request
!52
Implement model saving in bob tf eval. Fixes #54
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
bob/learn/tensorflow/script/eval.py
+86
-19
86 additions, 19 deletions
bob/learn/tensorflow/script/eval.py
bob/learn/tensorflow/test/test_estimator_scripts.py
+35
-2
35 additions, 2 deletions
bob/learn/tensorflow/test/test_estimator_scripts.py
with
121 additions
and
21 deletions
bob/learn/tensorflow/script/eval.py
+
86
−
19
View file @
44e40482
...
...
@@ -4,20 +4,86 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
click
import
logging
import
os
import
time
import
six
import
shutil
import
sys
import
tensorflow
as
tf
import
time
from
glob
import
glob
from
collections
import
defaultdict
from
..utils.eval
import
get_global_step
import
click
from
bob.extension.scripts.click_helper
import
(
verbosity_option
,
ConfigCommand
,
ResourceOption
)
from
bob.io.base
import
create_directories_safe
logger
=
logging
.
getLogger
(
__name__
)
def
save_best_n_models
(
train_dir
,
save_dir
,
evaluated_file
,
keep_n_best_models
):
create_directories_safe
(
save_dir
)
evaluated
=
read_evaluated_file
(
evaluated_file
)
def
_key
(
x
):
x
=
x
[
1
]
ac
=
x
.
get
(
'
accuracy
'
)
lo
=
x
.
get
(
'
loss
'
)
or
0
return
ac
*
-
1
if
ac
is
not
None
else
lo
best_models
=
dict
(
sorted
(
evaluated
.
items
(),
key
=
_key
)[:
keep_n_best_models
])
# delete the old saved models that are not in top N best anymore
saved_models
=
defaultdict
(
list
)
for
path
in
glob
(
'
{}/model.ckpt-*
'
.
format
(
save_dir
)):
global_step
=
path
.
split
(
'
model.ckpt-
'
)[
1
].
split
(
'
.
'
)[
0
]
saved_models
[
global_step
].
append
(
path
)
for
global_step
,
paths
in
saved_models
.
items
():
if
global_step
not
in
best_models
:
for
path
in
paths
:
logger
.
info
(
"
Deleting `%s
'"
,
path
)
os
.
remove
(
path
)
# copy over the best models if not already there
for
global_step
in
best_models
:
for
path
in
glob
(
'
{}/model.ckpt-{}.*
'
.
format
(
train_dir
,
global_step
)):
dst
=
os
.
path
.
join
(
save_dir
,
os
.
path
.
basename
(
path
))
if
os
.
path
.
isfile
(
dst
):
continue
logger
.
info
(
"
Copying `%s
'
over to `%s
'"
,
path
,
dst
)
shutil
.
copy
(
path
,
dst
)
def
read_evaluated_file
(
path
):
evaluated
=
{}
with
open
(
path
)
as
f
:
for
line
in
f
:
global_step
,
line
=
line
.
split
(
'
'
,
1
)
temp
=
{}
for
k_v
in
line
.
strip
().
split
(
'
,
'
):
k
,
v
=
k_v
.
split
(
'
=
'
)
v
=
float
(
v
)
if
'
global_step
'
in
k
:
v
=
int
(
v
)
temp
[
k
]
=
v
evaluated
[
global_step
]
=
temp
return
evaluated
def
append_evaluated_file
(
path
,
evaluations
):
str_evaluations
=
'
,
'
.
join
(
'
%s = %s
'
%
(
k
,
v
)
for
k
,
v
in
sorted
(
six
.
iteritems
(
evaluations
)))
with
open
(
path
,
'
a
'
)
as
f
:
f
.
write
(
'
{} {}
\n
'
.
format
(
evaluations
[
'
global_step
'
],
str_evaluations
))
return
str_evaluations
@click.command
(
entry_point_group
=
'
bob.learn.tensorflow.config
'
,
cls
=
ConfigCommand
)
@click.option
(
'
--estimator
'
,
'
-e
'
,
required
=
True
,
cls
=
ResourceOption
,
...
...
@@ -28,12 +94,14 @@ logger = logging.getLogger(__name__)
entry_point_group
=
'
bob.learn.tensorflow.hook
'
)
@click.option
(
'
--run-once
'
,
cls
=
ResourceOption
,
default
=
False
,
show_default
=
True
)
@click.option
(
'
--eval-interval-secs
'
,
cls
=
ResourceOption
,
type
=
click
.
types
.
INT
,
@click.option
(
'
--eval-interval-secs
'
,
cls
=
ResourceOption
,
type
=
click
.
INT
,
default
=
60
,
show_default
=
True
)
@click.option
(
'
--name
'
,
cls
=
ResourceOption
)
@click.option
(
'
--keep-n-best-models
'
,
'
-K
'
,
type
=
click
.
INT
,
cls
=
ResourceOption
,
default
=
0
,
show_default
=
True
)
@verbosity_option
(
cls
=
ResourceOption
)
def
eval
(
estimator
,
eval_input_fn
,
hooks
,
run_once
,
eval_interval_secs
,
name
,
**
kwargs
):
keep_n_best_models
,
**
kwargs
):
"""
Evaluates networks using Tensorflow estimators.
\b
...
...
@@ -76,18 +144,16 @@ def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
logger
.
debug
(
'
run_once: %s
'
,
run_once
)
logger
.
debug
(
'
eval_interval_secs: %s
'
,
eval_interval_secs
)
logger
.
debug
(
'
name: %s
'
,
name
)
logger
.
debug
(
'
keep_n_best_models: %s
'
,
keep_n_best_models
)
logger
.
debug
(
'
kwargs: %s
'
,
kwargs
)
if
name
:
real_name
=
'
eval_
'
+
name
else
:
real_name
=
'
eval
'
evaluated_file
=
os
.
path
.
join
(
estimator
.
model_dir
,
real_name
,
'
evaluated
'
)
real_name
=
'
eval_
'
+
name
if
name
else
'
eval
'
eval_dir
=
os
.
path
.
join
(
estimator
.
model_dir
,
real_name
)
evaluated_file
=
os
.
path
.
join
(
eval_dir
,
'
evaluated
'
)
while
True
:
evaluated_steps
=
[]
evaluated_steps
=
{}
if
os
.
path
.
exists
(
evaluated_file
):
with
open
(
evaluated_file
)
as
f
:
evaluated_steps
=
[
line
.
split
()[
0
]
for
line
in
f
]
evaluated_steps
=
read_evaluated_file
(
evaluated_file
)
ckpt
=
tf
.
train
.
get_checkpoint_state
(
estimator
.
model_dir
)
if
(
not
ckpt
)
or
(
not
ckpt
.
model_checkpoint_path
):
...
...
@@ -113,14 +179,15 @@ def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
name
=
name
,
)
str_evaluations
=
'
,
'
.
join
(
'
%s = %s
'
%
(
k
,
v
)
for
k
,
v
in
sorted
(
six
.
iteritems
(
evaluations
)))
print
(
str_evaluations
)
str_evaluations
=
append_evaluated_file
(
evaluated_file
,
evaluations
)
click
.
echo
(
str_evaluations
)
sys
.
stdout
.
flush
()
with
open
(
evaluated_file
,
'
a
'
)
as
f
:
f
.
write
(
'
{} {}
\n
'
.
format
(
evaluations
[
'
global_step
'
],
str_evaluations
))
# Save the best N models into the eval directory
save_best_n_models
(
estimator
.
model_dir
,
eval_dir
,
evaluated_file
,
keep_n_best_models
)
if
run_once
:
break
time
.
sleep
(
eval_interval_secs
)
This diff is collapsed.
Click to expand it.
bob/learn/tensorflow/test/test_estimator_scripts.py
+
35
−
2
View file @
44e40482
from
__future__
import
print_function
import
os
import
shutil
from
glob
import
glob
from
tempfile
import
mkdtemp
from
click.testing
import
CliRunner
from
bob.io.base.test_utils
import
datafile
...
...
@@ -122,7 +123,7 @@ def _create_checkpoint(tmpdir, model_dir, dummy_tfrecord):
result
.
exc_info
,
result
.
output
,
result
.
exception
)
def
_eval
(
tmpdir
,
model_dir
,
dummy_tfrecord
):
def
_eval
(
tmpdir
,
model_dir
,
dummy_tfrecord
,
extra_args
=
[]
):
config
=
CONFIG
%
{
'
model_dir
'
:
model_dir
,
'
tfrecord_filenames
'
:
dummy_tfrecord
...
...
@@ -131,7 +132,7 @@ def _eval(tmpdir, model_dir, dummy_tfrecord):
with
open
(
config_path
,
'
w
'
)
as
f
:
f
.
write
(
config
)
runner
=
CliRunner
()
result
=
runner
.
invoke
(
eval_script
,
args
=
[
config_path
])
result
=
runner
.
invoke
(
eval_script
,
args
=
[
config_path
]
+
extra_args
)
assert
result
.
exit_code
==
0
,
'
%s
\n
%s
\n
%s
'
%
(
result
.
exc_info
,
result
.
output
,
result
.
exception
)
...
...
@@ -179,3 +180,35 @@ def test_eval():
shutil
.
rmtree
(
tmpdir
)
except
Exception
:
pass
def
test_eval_keep_n_model
():
tmpdir
=
mkdtemp
(
prefix
=
'
bob_
'
)
try
:
model_dir
=
os
.
path
.
join
(
tmpdir
,
'
model_dir
'
)
eval_dir
=
os
.
path
.
join
(
model_dir
,
'
eval
'
)
print
(
'
\n
Creating a dummy tfrecord
'
)
dummy_tfrecord
=
_create_tfrecord
(
tmpdir
)
print
(
'
Training a dummy network
'
)
_create_checkpoint
(
tmpdir
,
model_dir
,
dummy_tfrecord
)
print
(
'
Evaluating a dummy network
'
)
_eval
(
tmpdir
,
model_dir
,
dummy_tfrecord
,
[
'
-K
'
,
'
1
'
])
evaluated_path
=
os
.
path
.
join
(
eval_dir
,
'
evaluated
'
)
assert
os
.
path
.
exists
(
evaluated_path
),
evaluated_path
with
open
(
evaluated_path
)
as
f
:
doc
=
f
.
read
()
assert
'
1
'
in
doc
,
doc
assert
'
200
'
in
doc
,
doc
assert
len
(
glob
(
'
{}/model.ckpt-*
'
.
format
(
eval_dir
)))
==
3
,
\
os
.
listdir
(
eval_dir
)
finally
:
try
:
shutil
.
rmtree
(
tmpdir
)
except
Exception
:
pass
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment