Commit 10496287 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'smallfix' into 'master'

[predict_bio] Some fixes and new features

Closes #71

See merge request !72
parents 8653354d 5166896e
Pipeline #26082 passed with stages
in 29 minutes and 35 seconds
......@@ -100,7 +100,7 @@ class Regressor(estimator.Estimator):
# Some layer like tf.layers.batch_norm need this:
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
with tf.control_dependencies(update_ops), tf.name_scope('train'):
# Calculate Loss
self.loss = self._get_loss(
......
......@@ -271,7 +271,9 @@ def slim_architecture(
weights_regularizer=slim.l2_regularizer(weight_decay),
normalizer_fn=slim.batch_norm,
normalizer_params=batch_norm_params):
with tf.variable_scope('SimpleCNN', reuse=reuse):
with tf.variable_scope('SimpleCNN', reuse=reuse), slim.arg_scope(
[slim.batch_norm, slim.dropout],
is_training=mode == tf.estimator.ModeKeys.TRAIN):
initializer = tf.contrib.layers.xavier_initializer()
name = 'conv1'
......
......@@ -22,6 +22,20 @@ from bob.io.base import create_directories_safe
logger = logging.getLogger(__name__)
def copy_one_step(train_dir, global_step, save_dir):
for path in glob('{}/model.ckpt-{}.*'.format(train_dir, global_step)):
dst = os.path.join(save_dir, os.path.basename(path))
if os.path.isfile(dst):
continue
try:
shutil.copy(path, dst)
logger.info("Copied `%s' over to `%s'", path, dst)
except OSError:
logger.warning(
"Failed to copy `%s' over to `%s'", path, dst,
exc_info=True)
def save_n_best_models(train_dir, save_dir, evaluated_file,
keep_n_best_models, sort_by):
create_directories_safe(save_dir)
......@@ -53,17 +67,7 @@ def save_n_best_models(train_dir, save_dir, evaluated_file,
# copy over the best models if not already there
for global_step in best_models:
for path in glob('{}/model.ckpt-{}.*'.format(train_dir, global_step)):
dst = os.path.join(save_dir, os.path.basename(path))
if os.path.isfile(dst):
continue
try:
shutil.copy(path, dst)
logger.info("Copied `%s' over to `%s'", path, dst)
except OSError:
logger.warning(
"Failed to copy `%s' over to `%s'", path, dst,
exc_info=True)
copy_one_step(train_dir, global_step, save_dir)
# create a checkpoint file indicating to the best existing model:
# 1. filter non-existing models first
......@@ -139,6 +143,7 @@ def append_evaluated_file(path, evaluations):
cls=ResourceOption,
default=False,
show_default=True,
is_flag=True,
help='If given, the model will be evaluated only once.')
@click.option(
'--eval-interval-secs',
......@@ -170,9 +175,15 @@ def append_evaluated_file(path, evaluations):
default=-1,
show_default=True,
help='If given, the maximum number of intervals waiting for new training checkpoint.')
@click.option(
'--force-re-run',
is_flag=True,
default=False,
help='A debugging flag. Do not use!')
@verbosity_option(cls=ResourceOption)
def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
keep_n_best_models, sort_by, max_wait_intervals, **kwargs):
keep_n_best_models, sort_by, max_wait_intervals, force_re_run,
**kwargs):
"""Evaluates networks using Tensorflow estimators."""
log_parameters(logger)
......@@ -216,9 +227,13 @@ def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
print('Failed to find global_step for checkpoint_path {}, '
'skipping ...'.format(checkpoint_path))
continue
if global_step in evaluated_steps:
if global_step in evaluated_steps and not force_re_run:
continue
# copy over the checkpoint before evaluating since it might
# disappear after evaluation.
copy_one_step(estimator.model_dir, global_step, eval_dir)
# Evaluate
try:
evaluations = estimator.evaluate(
......
......@@ -18,6 +18,10 @@ from bob.io.base import create_directories_safe
from bob.bio.base.utils import save
from bob.bio.base.tools.grid import indices
from bob.learn.tensorflow.dataset.bio import BioGenerator
try:
import bob.bio.video
except ModuleNotFoundError:
pass
logger = logging.getLogger(__name__)
......@@ -51,11 +55,18 @@ def non_existing_files(paths, force=False):
yield i
def save_predictions(pool, output_dir, key, pred_buffer):
def save_predictions(pool, output_dir, key, pred_buffer, video_container):
outpath = make_output_path(output_dir, key)
create_directories_safe(os.path.dirname(outpath))
logger.debug("Saving predictions for %s", key)
pool.apply_async(save, (np.mean(pred_buffer[key], axis=0), outpath))
if video_container:
fc = bob.bio.video.FrameContainer()
for i, v in enumerate(pred_buffer[key]):
fc.add(i, v)
data = fc
else:
data = np.mean(pred_buffer[key], axis=0)
pool.apply_async(save, (data, outpath))
@click.command(
......@@ -136,7 +147,7 @@ def save_predictions(pool, output_dir, key, pred_buffer):
cls=ResourceOption,
help='If provided, it assumes that the db interface returns '
'several samples from a biofile. This option can be used '
'when you are working with')
'when you are working with videos.')
@click.option(
'--array',
'-t',
......@@ -151,10 +162,17 @@ def save_predictions(pool, output_dir, key, pred_buffer):
is_flag=True,
cls=ResourceOption,
help='Whether to overwrite existing predictions.')
@click.option(
'--video-container',
'-vc',
is_flag=True,
cls=ResourceOption,
help='If provided, the predictions will be written in FrameContainers from'
' bob.bio.video. You need to install bob.bio.video as well.')
@verbosity_option(cls=ResourceOption)
def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
output_dir, load_data, hooks, predict_keys, checkpoint_path,
multiple_samples, array, force, **kwargs):
multiple_samples, array, force, video_container, **kwargs):
"""Saves predictions or embeddings of tf.estimators.
This script works with bob.bio.base databases. This script works with
......@@ -188,7 +206,7 @@ def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
dataset = dataset.batch(10**3)
images, labels, keys = dataset.make_one_shot_iterator().get_next()
return {'data': images, 'keys': keys}, labels
return {'data': images, 'key': keys}, labels
return input_fn
"""
log_parameters(logger, ignore=('biofiles', ))
......@@ -221,9 +239,21 @@ def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
predict_input_fn = bio_predict_input_fn(generator, generator.output_types,
generator.output_shapes)
logger.info("Saving the predictions of %d files in %s", len(generator),
output_dir)
generic_predict(
estimator, predict_input_fn, output_dir, predict_keys=predict_keys,
checkpoint_path=checkpoint_path, hooks=hooks,
video_container=video_container)
def generic_predict(estimator, predict_input_fn, output_dir, predict_keys=None,
checkpoint_path=None, hooks=None, video_container=False):
# if the checkpoint_path is a directory, pick the latest checkpoint from
# that directory
if checkpoint_path:
if os.path.isdir(checkpoint_path):
ckpt = tf.train.get_checkpoint_state(estimator.model_dir)
ckpt = tf.train.get_checkpoint_state(checkpoint_path)
if ckpt and ckpt.model_checkpoint_path:
checkpoint_path = ckpt.model_checkpoint_path
......@@ -236,8 +266,12 @@ def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
checkpoint_path=checkpoint_path,
)
logger.info("Saving the predictions of %d files in %s", len(generator),
output_dir)
if video_container:
try:
import bob.bio.video
except ModuleNotFoundError:
raise click.ClickException(
'Could not import bob.bio.video. Have you installed it?')
pool = Pool()
try:
......@@ -247,7 +281,8 @@ def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
# key is in bytes format in Python 3
if sys.version_info >= (3, ):
key = key.decode(errors='replace')
prob = pred.get('probabilities', pred.get('embeddings', pred.get('predictions')))
prob = pred.get('probabilities', pred.get(
'embeddings', pred.get('predictions')))
assert prob is not None
pred_buffer[key].append(prob)
if i == 0:
......@@ -255,10 +290,11 @@ def predict_bio(estimator, database, biofiles, bio_predict_input_fn,
if last_key == key:
continue
else:
save_predictions(pool, output_dir, last_key, pred_buffer)
save_predictions(
pool, output_dir, last_key, pred_buffer, video_container)
last_key = key
# save the final returned key as well:
save_predictions(pool, output_dir, key, pred_buffer)
save_predictions(pool, output_dir, key, pred_buffer, video_container)
finally:
pool.close()
pool.join()
import os
from bob.bio.base.test.dummy.database import database
from bob.bio.base.utils import read_original_data
......
import tensorflow as tf
model_dir = "%(model_dir)s"
learning_rate = 0.00001
def architecture(images):
images = tf.cast(images, tf.float32)
logits = tf.reshape(images, [-1, 92 * 112])
logits = tf.contrib.slim.fully_connected(inputs=logits, num_outputs=20)
return logits
def model_fn(features, labels, mode, config):
key = features['key']
features = features['data']
logits = architecture(features)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
"key": key,
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
accuracy = tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])
metrics = {'accuracy': accuracy}
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_or_create_global_step())
# Log accuracy and loss
with tf.name_scope('train_metrics'):
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
from bob.learn.tensorflow.dataset.bio import BioGenerator
from bob.learn.tensorflow.utils import to_channels_last
import tensorflow as tf
batch_size = 2
epochs = 2
def input_fn(mode):
from bob.bio.base.test.dummy.database import database as db
if mode == tf.estimator.ModeKeys.TRAIN:
groups = 'world'
elif mode == tf.estimator.ModeKeys.EVAL:
groups = 'dev'
files = db.objects(groups=groups)
# construct integer labels for each identity in the database
CLIENT_IDS = (str(f.client_id) for f in files)
CLIENT_IDS = list(set(CLIENT_IDS))
CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
def biofile_to_label(f):
return CLIENT_IDS[str(f.client_id)]
def load_data(database, f):
img = f.load(database.original_directory, database.original_extension)
# make a channels_first image (bob format) with 1 channel
img = img.reshape(1, 112, 92)
return img
generator = BioGenerator(db, files, load_data, biofile_to_label)
dataset = tf.data.Dataset.from_generator(
generator, generator.output_types, generator.output_shapes)
def transform(image, label, key):
# convert to channels last
image = to_channels_last(image)
# per_image_standardization
image = tf.image.per_image_standardization(image)
return (image, label, key)
dataset = dataset.map(transform)
if mode == tf.estimator.ModeKeys.TRAIN:
# since we are caching to memory, caching only in training makes sense.
dataset = dataset.cache()
dataset = dataset.repeat(epochs)
dataset = dataset.batch(batch_size)
data, label, key = dataset.make_one_shot_iterator().get_next()
return {'data': data, 'key': key}, label
def train_input_fn():
return input_fn(tf.estimator.ModeKeys.TRAIN)
def eval_input_fn():
return input_fn(tf.estimator.ModeKeys.EVAL)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=50)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
import tensorflow as tf
from bob.bio.base.test.dummy.database import database
biofiles = database.all_files(['dev'])
def bio_predict_input_fn(generator, output_types, output_shapes):
def input_fn():
dataset = tf.data.Dataset.from_generator(
generator, output_types, output_shapes)
# apply all kinds of transformations here, process the data
# even further if you want.
dataset = dataset.prefetch(1)
dataset = dataset.batch(10**3)
images, labels, keys = dataset.make_one_shot_iterator().get_next()
return {'data': images, 'key': keys}, labels
return input_fn
import tensorflow as tf
from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, \
batch_data_and_labels
tfrecord_filenames = ['%(tfrecord_filenames)s']
data_shape = (1, 112, 92) # size of atnt images
data_type = tf.uint8
batch_size = 2
epochs = 2
def train_input_fn():
return shuffle_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=epochs)
def eval_input_fn():
return batch_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=1)
# config for train_and_evaluate
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=200)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
......@@ -3,14 +3,15 @@ import shutil
import pkg_resources
import tempfile
from click.testing import CliRunner
import bob.io.base
from bob.learn.tensorflow.script.db_to_tfrecords import db_to_tfrecords, describe_tf_record
from bob.io.base import create_directories_safe
from bob.learn.tensorflow.script.db_to_tfrecords import (
db_to_tfrecords, describe_tf_record)
from bob.learn.tensorflow.utils import load_mnist, create_mnist_tfrecord
regenerate_reference = False
dummy_config = pkg_resources.resource_filename(
'bob.learn.tensorflow', 'test/data/dummy_verify_config.py')
'bob.learn.tensorflow', 'test/data/db_to_tfrecords_config.py')
def test_db_to_tfrecords():
......@@ -52,20 +53,21 @@ def test_db_to_tfrecords_size_estimate():
def test_tfrecord_counter():
tfrecord_train = "./tf-train-test/train_mnist.tfrecord"
shape = (3136,) # I'm saving the thing as float
shape = (3136,) # I'm saving the thing as float
batch_size = 1000
try:
train_data, train_labels, validation_data, validation_labels = load_mnist()
bob.io.base.create_directories_safe(os.path.dirname(tfrecord_train))
train_data, train_labels, validation_data, validation_labels = \
load_mnist()
create_directories_safe(os.path.dirname(tfrecord_train))
create_mnist_tfrecord(
tfrecord_train, train_data, train_labels, n_samples=6000)
n_samples, n_labels = describe_tf_record(os.path.dirname(tfrecord_train), shape, batch_size)
n_samples, n_labels = describe_tf_record(
os.path.dirname(tfrecord_train), shape, batch_size)
assert n_samples == 6000
assert n_labels == 10
finally:
shutil.rmtree(os.path.dirname(tfrecord_train))
<
......@@ -4,144 +4,67 @@ import shutil
from glob import glob
from tempfile import mkdtemp
from click.testing import CliRunner
from bob.extension.scripts.click_helper import assert_click_runner_result
from bob.io.base.test_utils import datafile
from bob.learn.tensorflow.script.db_to_tfrecords import db_to_tfrecords
from bob.learn.tensorflow.script.train import train
from bob.learn.tensorflow.script.eval import eval as eval_script
from bob.learn.tensorflow.script.train_and_evaluate import train_and_evaluate
from bob.learn.tensorflow.script.predict_bio import predict_bio
dummy_tfrecord_config = datafile('dummy_verify_config.py', __name__)
CONFIG = '''
import tensorflow as tf
from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, \
batch_data_and_labels
model_dir = "%(model_dir)s"
tfrecord_filenames = ['%(tfrecord_filenames)s']
data_shape = (1, 112, 92) # size of atnt images
data_type = tf.uint8
batch_size = 2
epochs = 2
learning_rate = 0.00001
run_once = True
def train_input_fn():
return shuffle_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=epochs)
def eval_input_fn():
return batch_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=1)
# config for train_and_evaluate
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=200)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
def architecture(images):
images = tf.cast(images, tf.float32)
logits = tf.reshape(images, [-1, 92 * 112])
logits = tf.layers.dense(inputs=logits, units=20,
activation=tf.nn.relu)
return logits
def model_fn(features, labels, mode, params, config):
key = features['key']
features = features['data']
logits = architecture(features)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
"key": key,
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
accuracy = tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])
metrics = {'accuracy': accuracy}
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_or_create_global_step())
# Log accuracy and loss
with tf.name_scope('train_metrics'):
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
'''
db_to_tfrecords_config = datafile('db_to_tfrecords_config.py', __name__)
input_predict_bio_config = datafile('input_predict_bio_config.py', __name__)
input_biogenerator_config = datafile('input_biogenerator_config.py', __name__)
def input_tfrecords_config(tfrecord_path):
with open(datafile('input_tfrecords_config.py', __name__)) as f:
doc = '\n' + f.read() + '\n'
return doc % {'tfrecord_filenames': tfrecord_path}
def estimator_atnt_faces_config(model_dir):
with open(datafile('estimator_atnt_faces_config.py', __name__)) as f:
doc = '\n' + f.read() + '\n'
return doc % {'model_dir': model_dir}
def _create_tfrecord(test_dir):
config_path = os.path.join(test_dir, 'tfrecordconfig.py')
with open(dummy_tfrecord_config) as f, open(config_path, 'w') as f2:
f2.write(f.read().replace('TEST_DIR', test_dir))
output = os.path.join(test_dir, 'dev.tfrecords')
runner = CliRunner()
result = runner.invoke(
db_to_tfrecords, args=[dummy_tfrecord_config, '--output', output])
assert result.exit_code == 0, '%s\n%s\n%s' % (
result.exc_info, result.output, result.exception)
db_to_tfrecords, args=[db_to_tfrecords_config, '--output', output])
assert_click_runner_result(result)
return output
def _create_checkpoint(tmpdir, model_dir, dummy_tfrecord):
config = CONFIG % {
'model_dir': model_dir,
'tfrecord_filenames': dummy_tfrecord
}
def _create_checkpoint(tmpdir, model_dir, tfrecord_path):
config = input_tfrecords_config(
tfrecord_path) + estimator_atnt_faces_config(model_dir)
config_path = os.path.join(tmpdir, 'train_config.py')
with open(config_path, 'w') as f:
f.write(config)