Commit e2841635 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

re-organize estimator script tests

parent b89aa8ab
Pipeline #26004 failed with stage
in 28 minutes and 2 seconds
import tensorflow as tf
model_dir = "%(model_dir)s"
learning_rate = 0.00001
def architecture(images):
images = tf.cast(images, tf.float32)
logits = tf.reshape(images, [-1, 92 * 112])
logits = tf.contrib.slim.fully_connected(inputs=logits, num_outputs=20)
return logits
def model_fn(features, labels, mode, config):
key = features['key']
features = features['data']
logits = architecture(features)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
"key": key,
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
accuracy = tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])
metrics = {'accuracy': accuracy}
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_or_create_global_step())
# Log accuracy and loss
with tf.name_scope('train_metrics'):
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
from bob.learn.tensorflow.dataset.bio import BioGenerator
from bob.learn.tensorflow.utils import to_channels_last
import tensorflow as tf
batch_size = 2
epochs = 2
def input_fn(mode):
from bob.bio.base.test.dummy.database import database as db
if mode == tf.estimator.ModeKeys.TRAIN:
groups = 'world'
elif mode == tf.estimator.ModeKeys.EVAL:
groups = 'dev'
files = db.objects(groups=groups)
# construct integer labels for each identity in the database
CLIENT_IDS = (str(f.client_id) for f in files)
CLIENT_IDS = list(set(CLIENT_IDS))
CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
def biofile_to_label(f):
return CLIENT_IDS[str(f.client_id)]
def load_data(database, f):
img = f.load(database.original_directory, database.original_extension)
# make a channels_first image (bob format) with 1 channel
img = img.reshape(1, 112, 92)
return img
generator = BioGenerator(db, files, load_data, biofile_to_label)
dataset = tf.data.Dataset.from_generator(
generator, generator.output_types, generator.output_shapes)
def transform(image, label, key):
# convert to channels last
image = to_channels_last(image)
# per_image_standardization
image = tf.image.per_image_standardization(image)
return (image, label, key)
dataset = dataset.map(transform)
if mode == tf.estimator.ModeKeys.TRAIN:
# since we are caching to memory, caching only in training makes sense.
dataset = dataset.cache()
dataset = dataset.repeat(epochs)
dataset = dataset.batch(batch_size)
data, label, key = dataset.make_one_shot_iterator().get_next()
return {'data': data, 'key': key}, label
def train_input_fn():
return input_fn(tf.estimator.ModeKeys.TRAIN)
def eval_input_fn():
return input_fn(tf.estimator.ModeKeys.EVAL)
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=50)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
import tensorflow as tf
from bob.bio.base.test.dummy.database import database
biofiles = database.all_files(['dev'])
def bio_predict_input_fn(generator, output_types, output_shapes):
def input_fn():
dataset = tf.data.Dataset.from_generator(
generator, output_types, output_shapes)
# apply all kinds of transformations here, process the data
# even further if you want.
dataset = dataset.prefetch(1)
dataset = dataset.batch(10**3)
images, labels, keys = dataset.make_one_shot_iterator().get_next()
return {'data': images, 'key': keys}, labels
return input_fn
import tensorflow as tf
from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, \
batch_data_and_labels
tfrecord_filenames = ['%(tfrecord_filenames)s']
data_shape = (1, 112, 92) # size of atnt images
data_type = tf.uint8
batch_size = 2
epochs = 2
def train_input_fn():
return shuffle_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=epochs)
def eval_input_fn():
return batch_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=1)
# config for train_and_evaluate
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=200)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
......@@ -6,11 +6,11 @@ from click.testing import CliRunner
import bob.io.base
from bob.learn.tensorflow.script.db_to_tfrecords import db_to_tfrecords, describe_tf_record
from bob.learn.tensorflow.utils import load_mnist, create_mnist_tfrecord
regenerate_reference = False
dummy_config = pkg_resources.resource_filename(
'bob.learn.tensorflow', 'test/data/dummy_verify_config.py')
'bob.learn.tensorflow', 'test/data/tfrecord_config.py')
def test_db_to_tfrecords():
......@@ -62,7 +62,7 @@ def test_tfrecord_counter():
tfrecord_train, train_data, train_labels, n_samples=6000)
n_samples, n_labels = describe_tf_record(os.path.dirname(tfrecord_train), shape, batch_size)
assert n_samples == 6000
assert n_labels == 10
......
......@@ -14,127 +14,35 @@ from bob.learn.tensorflow.script.train_and_evaluate import train_and_evaluate
from bob.learn.tensorflow.script.predict_bio import predict_bio
dummy_tfrecord_config = datafile('dummy_verify_config.py', __name__)
CONFIG = '''
import tensorflow as tf
from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, \
batch_data_and_labels
model_dir = "%(model_dir)s"
tfrecord_filenames = ['%(tfrecord_filenames)s']
data_shape = (1, 112, 92) # size of atnt images
data_type = tf.uint8
batch_size = 2
epochs = 2
learning_rate = 0.00001
run_once = True
def train_input_fn():
return shuffle_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=epochs)
def eval_input_fn():
return batch_data_and_labels(tfrecord_filenames, data_shape, data_type,
batch_size, epochs=1)
# config for train_and_evaluate
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=200)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
def architecture(images):
images = tf.cast(images, tf.float32)
logits = tf.reshape(images, [-1, 92 * 112])
logits = tf.layers.dense(inputs=logits, units=20,
activation=tf.nn.relu)
return logits
def model_fn(features, labels, mode, params, config):
key = features['key']
features = features['data']
logits = architecture(features)
predictions = {
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
"key": key,
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(
logits=logits, labels=labels)
accuracy = tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])
metrics = {'accuracy': accuracy}
# Configure the training op
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(
learning_rate=learning_rate)
train_op = optimizer.minimize(
loss=loss, global_step=tf.train.get_or_create_global_step())
# Log accuracy and loss
with tf.name_scope('train_metrics'):
tf.summary.scalar('accuracy', accuracy[1])
tf.summary.scalar('loss', loss)
else:
train_op = None
return tf.estimator.EstimatorSpec(
mode=mode,
predictions=predictions,
loss=loss,
train_op=train_op,
eval_metric_ops=metrics)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
# predict bio config options:
from bob.bio.base.test.dummy.database import database
biofiles = database.all_files(['dev'])
def bio_predict_input_fn(generator, output_types, output_shapes):
def input_fn():
dataset = tf.data.Dataset.from_generator(
generator, output_types, output_shapes)
# apply all kinds of transformations here, process the data
# even further if you want.
dataset = dataset.prefetch(1)
dataset = dataset.batch(10**3)
images, labels, keys = dataset.make_one_shot_iterator().get_next()
return {'data': images, 'key': keys}, labels
return input_fn
'''
db_to_tfrecords_config = datafile('db_to_tfrecords_config.py', __name__)
input_predict_bio_config = datafile('input_predict_bio_config.py', __name__)
input_biogenerator_config = datafile('input_biogenerator_config.py', __name__)
def input_tfrecords_config(tfrecord_path):
with open(datafile('input_tfrecords_config.py', __name__)) as f:
doc = '\n' + f.read() + '\n'
return doc % {'tfrecord_filenames': tfrecord_path}
def estimator_atnt_faces_config(model_dir):
with open(datafile('estimator_atnt_faces_config.py', __name__)) as f:
doc = '\n' + f.read() + '\n'
return doc % {'model_dir': model_dir}
def _create_tfrecord(test_dir):
config_path = os.path.join(test_dir, 'tfrecordconfig.py')
with open(dummy_tfrecord_config) as f, open(config_path, 'w') as f2:
f2.write(f.read().replace('TEST_DIR', test_dir))
output = os.path.join(test_dir, 'dev.tfrecords')
runner = CliRunner()
result = runner.invoke(
db_to_tfrecords, args=[dummy_tfrecord_config, '--output', output])
db_to_tfrecords, args=[db_to_tfrecords_config, '--output', output])
assert_click_runner_result(result)
return output
def _create_checkpoint(tmpdir, model_dir, dummy_tfrecord):
config = CONFIG % {
'model_dir': model_dir,
'tfrecord_filenames': dummy_tfrecord
}
def _create_checkpoint(tmpdir, model_dir, tfrecord_path):
config = input_tfrecords_config(
tfrecord_path) + estimator_atnt_faces_config(model_dir)
config_path = os.path.join(tmpdir, 'train_config.py')
with open(config_path, 'w') as f:
f.write(config)
......@@ -143,11 +51,9 @@ def _create_checkpoint(tmpdir, model_dir, dummy_tfrecord):
assert_click_runner_result(result)
def _eval(tmpdir, model_dir, dummy_tfrecord, extra_args=[]):
config = CONFIG % {
'model_dir': model_dir,
'tfrecord_filenames': dummy_tfrecord
}
def _eval(tmpdir, model_dir, tfrecord_path, extra_args=['--run-once']):
config = input_tfrecords_config(
tfrecord_path) + estimator_atnt_faces_config(model_dir)
config_path = os.path.join(tmpdir, 'eval_config.py')
with open(config_path, 'w') as f:
f.write(config)
......@@ -156,11 +62,9 @@ def _eval(tmpdir, model_dir, dummy_tfrecord, extra_args=[]):
assert_click_runner_result(result)
def _train_and_evaluate(tmpdir, model_dir, dummy_tfrecord):
config = CONFIG % {
'model_dir': model_dir,
'tfrecord_filenames': dummy_tfrecord
}
def _train_and_evaluate(tmpdir, model_dir, tfrecord_path):
config = input_tfrecords_config(
tfrecord_path) + estimator_atnt_faces_config(model_dir)
config_path = os.path.join(tmpdir, 'train_config.py')
with open(config_path, 'w') as f:
f.write(config)
......@@ -168,16 +72,16 @@ def _train_and_evaluate(tmpdir, model_dir, dummy_tfrecord):
runner.invoke(train_and_evaluate, args=[config_path])
def _predict_bio(tmpdir, model_dir, dummy_tfrecord, extra_options=tuple()):
config = CONFIG % {
'model_dir': model_dir,
'tfrecord_filenames': dummy_tfrecord
}
def _predict_bio(tmpdir, model_dir, tfrecord_path, extra_options=tuple()):
config = input_tfrecords_config(
tfrecord_path) + estimator_atnt_faces_config(model_dir)
config_path = os.path.join(tmpdir, 'train_config.py')
with open(config_path, 'w') as f:
f.write(config)
runner = CliRunner()
return runner.invoke(predict_bio, args=[config_path] + list(extra_options))
return runner.invoke(
predict_bio,
args=[config_path, input_predict_bio_config] + list(extra_options))
def test_eval():
......@@ -187,13 +91,13 @@ def test_eval():
eval_dir = os.path.join(model_dir, 'eval')
print('\nCreating a dummy tfrecord')
dummy_tfrecord = _create_tfrecord(tmpdir)
tfrecord_path = _create_tfrecord(tmpdir)
print('Training a dummy network')
_create_checkpoint(tmpdir, model_dir, dummy_tfrecord)
_create_checkpoint(tmpdir, model_dir, tfrecord_path)
print('Evaluating a dummy network')
_eval(tmpdir, model_dir, dummy_tfrecord)
_eval(tmpdir, model_dir, tfrecord_path)
evaluated_path = os.path.join(eval_dir, 'evaluated')
assert os.path.exists(evaluated_path), evaluated_path
......@@ -204,7 +108,7 @@ def test_eval():
assert '200' in doc, doc
print('Train and evaluate a dummy network')
_train_and_evaluate(tmpdir, model_dir, dummy_tfrecord)
_train_and_evaluate(tmpdir, model_dir, tfrecord_path)
finally:
try:
......@@ -220,13 +124,13 @@ def test_eval_keep_n_model():
eval_dir = os.path.join(model_dir, 'eval')
print('\nCreating a dummy tfrecord')
dummy_tfrecord = _create_tfrecord(tmpdir)
tfrecord_path = _create_tfrecord(tmpdir)
print('Training a dummy network')
_create_checkpoint(tmpdir, model_dir, dummy_tfrecord)
_create_checkpoint(tmpdir, model_dir, tfrecord_path)
print('Evaluating a dummy network')
_eval(tmpdir, model_dir, dummy_tfrecord, ['-K', '1'])
_eval(tmpdir, model_dir, tfrecord_path, ['-K', '1', '--run-once'])
evaluated_path = os.path.join(eval_dir, 'evaluated')
assert os.path.exists(evaluated_path), evaluated_path
......@@ -249,12 +153,12 @@ def test_predict_bio():
try:
model_dir = os.path.join(tmpdir, 'model_dir')
dummy_tfrecord = _create_tfrecord(tmpdir)
_create_checkpoint(tmpdir, model_dir, dummy_tfrecord)
tfrecord_path = _create_tfrecord(tmpdir)
_create_checkpoint(tmpdir, model_dir, tfrecord_path)
# Run predict_bio
result = _predict_bio(
tmpdir, model_dir, dummy_tfrecord, ['-o', tmpdir, '-vvv'])
tmpdir, model_dir, tfrecord_path, ['-o', tmpdir, '-vvv'])
assert_click_runner_result(result)
finally:
......@@ -270,8 +174,8 @@ def test_predict_bio_empty_eval():
model_dir = os.path.join(tmpdir, 'model_dir')
eval_dir = os.path.join(model_dir, 'eval')
dummy_tfrecord = _create_tfrecord(tmpdir)
_create_checkpoint(tmpdir, model_dir, dummy_tfrecord)
tfrecord_path = _create_tfrecord(tmpdir)
_create_checkpoint(tmpdir, model_dir, tfrecord_path)
# Make an empty eval folder
os.makedirs(eval_dir)
......@@ -279,7 +183,7 @@ def test_predict_bio_empty_eval():
# Run predict_bio
result = _predict_bio(
tmpdir, model_dir, dummy_tfrecord,
tmpdir, model_dir, tfrecord_path,
['-o', tmpdir, '-c', eval_dir, '-vvv'])
# the command should fail when the checkpoint path is empty
assert_click_runner_result(result, -1)
......@@ -289,3 +193,34 @@ def test_predict_bio_empty_eval():
shutil.rmtree(tmpdir)
except Exception:
pass
# uncomment to run this test locally
# def test_eval_too_many_open_files_with_biogenerator():
# tmpdir = mkdtemp(prefix='bob_')
# try:
# # create estimator config file
# model_dir = os.path.join(tmpdir, 'model_dir')
# estimator_config = os.path.join(tmpdir, 'estimator_config.py')
# with open(estimator_config, 'w') as f:
# f.write(estimator_atnt_faces_config(model_dir))
# runner = CliRunner()
# # train and eval with biogenerators
# result = runner.invoke(
# train, args=[estimator_config, input_biogenerator_config])
# assert_click_runner_result(result)
# print("This test will not stop running. You should kill the process!")
# result = runner.invoke(
# eval_script, args=[estimator_config,
# input_biogenerator_config,
# '--force-re-run'])
# assert_click_runner_result(result)
# finally:
# try:
# shutil.rmtree(tmpdir)
# except Exception:
# pass
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment