Commit c2b538b0 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Merge branch 'tf2' into 'master'

Porting to TF2

Closes #75

See merge request !85
parents 3a6a2531 9e3b0c00
Pipeline #44929 failed with stages
in 2 minutes and 26 seconds
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/timothycrosley/isort
rev: 4.3.21-2
hooks:
- id: isort
args: [-sl]
- repo: https://github.com/psf/black
rev: stable
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.0.0
hooks:
- id: check-ast
- id: check-case-conflict
- id: trailing-whitespace
- id: end-of-file-fixer
- id: debug-statements
- id: check-added-large-files
- id: flake8
- repo: local
hooks:
- id: sphinx-build
name: sphinx build
entry: python -m sphinx.cmd.build
args: [-a, -E, -W, doc, sphinx]
language: system
files: ^doc/
types: [file]
pass_filenames: false
- id: sphinx-doctest
name: sphinx doctest
entry: python -m sphinx.cmd.build
args: [-a, -E, -b, doctest, doc, sphinx]
language: system
files: ^doc/
types: [file]
pass_filenames: false
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
\ No newline at end of file
__path__ = extend_path(__path__, __name__)
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
......@@ -3,4 +3,5 @@ def get_config():
Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__)
import json
import os
import tensorflow as tf
from tensorflow.keras import callbacks
class CustomBackupAndRestore(tf.keras.callbacks.experimental.BackupAndRestore):
"""This callback is experimental and might be removed in future.
See :any:`add_backup_callback`
"""
def __init__(self, callbacks, backup_dir, **kwargs):
super().__init__(backup_dir=backup_dir, **kwargs)
self.callbacks = callbacks
self.callbacks_backup_path = os.path.join(self.backup_dir, "callbacks.json")
def backup(self):
variables = {}
for cb_name, cb in self.callbacks.items():
variables[cb_name] = {}
for k, v in cb.__dict__.items():
if not isinstance(v, (int, float)):
continue
variables[cb_name][k] = v
with open(self.callbacks_backup_path, "w") as f:
json.dump(variables, f, indent=4, sort_keys=True)
def restore(self):
if not os.path.isfile(self.callbacks_backup_path):
return False
with open(self.callbacks_backup_path, "r") as f:
variables = json.load(f)
for cb_name, cb in self.callbacks.items():
if cb_name not in variables:
continue
for k, v in cb.__dict__.items():
if k in variables[cb_name]:
cb.__dict__[k] = variables[cb_name][k]
return True
def on_train_begin(self, logs=None):
super().on_train_begin(logs=logs)
if self.restore():
print(f"Restored callbacks from {self.callbacks_backup_path}")
else:
print("Did not restore callbacks")
def on_epoch_end(self, epoch, logs=None):
super().on_epoch_end(epoch, logs=logs)
self.backup()
def on_train_end(self, logs=None):
# do not delete backups
pass
def add_backup_callback(callbacks, backup_dir, **kwargs):
"""Adds a backup callback to your callbacks to restore the training process
if it is interrupted.
.. warning::
This function is experimental and may be removed or changed in future.
Examples
--------
>>> CHECKPOINT = "checkpoints"
>>> callbacks = {
... "best": tf.keras.callbacks.ModelCheckpoint(
... f"{CHECKPOINT}/best",
... monitor="val_acc",
... save_best_only=True,
... mode="max",
... verbose=1,
... ),
... "tensorboard": tf.keras.callbacks.TensorBoard(
... log_dir=f"{CHECKPOINT}/logs",
... update_freq=15,
... write_graph=False,
... ),
... }
>>> callbacks = add_backup_callback(callbacks, f"{CHECKPOINT}/backup")
>>> # callbacks will be a list that can be given to model.fit
>>> isinstance(callbacks, list)
True
"""
if not isinstance(callbacks, dict):
raise ValueError(
"Please provide a dictionary of callbacks where "
"keys are simple names for your callbacks!"
)
cb = CustomBackupAndRestore(callbacks=callbacks, backup_dir=backup_dir, **kwargs)
callbacks = list(callbacks.values())
callbacks.append(cb)
return callbacks
"""
Example using inception resnet v1
"""
import tensorflow as tf
# -- architecture
from bob.learn.tensorflow.network import inception_resnet_v1_batch_norm
architecture = inception_resnet_v1_batch_norm
# --checkpoint-dir
from bob.extension import rc
checkpoint_dir = rc['bob.bio.face_ongoing.inception-v1_batchnorm_rgb']
# --style-end-points and -- content-end-points
style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3"]
content_end_points = ["Block8"]
scopes = {"InceptionResnetV1/":"InceptionResnetV1/"}
# --style-image-paths
style_image_paths = ["vincent_van_gogh.jpg",
"vincent_van_gogh2.jpg"]
# --preprocess-fn
preprocess_fn = tf.image.per_image_standardization
"""
Example using inception resnet v2
"""
import tensorflow as tf
# -- architecture
from bob.learn.tensorflow.network import inception_resnet_v2_batch_norm
architecture = inception_resnet_v2_batch_norm
# --checkpoint-dir
from bob.extension import rc
checkpoint_dir = rc['bob.bio.face_ongoing.inception-v2_batchnorm_rgb']
# --style-end-points and -- content-end-points
style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3"]
content_end_points = ["Block8"]
scopes = {"InceptionResnetV2/":"InceptionResnetV2/"}
# --style-image-paths
style_image_paths = ["vincent_van_gogh.jpg",
"vincent_van_gogh2.jpg"]
# --preprocess-fn
preprocess_fn = tf.image.per_image_standardization
"""
Example using VGG19
"""
from bob.learn.tensorflow.network import vgg_19
# --architecture
architecture = vgg_19
import numpy
# -- checkpoint-dir
# YOU CAN DOWNLOAD THE CHECKPOINTS FROM HERE
# https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models
checkpoint_dir = "/PATH-TO/vgg_19.ckpt"
# --style-end-points and -- content-end-points
content_end_points = ['vgg_19/conv4/conv4_2', 'vgg_19/conv5/conv5_2']
style_end_points = ['vgg_19/conv1/conv1_2',
'vgg_19/conv2/conv2_1',
'vgg_19/conv3/conv3_1',
'vgg_19/conv4/conv4_1',
'vgg_19/conv5/conv5_1'
]
scopes = {"vgg_19/":"vgg_19/"}
style_image_paths = ["/PATH/TO/vincent_van_gogh.jpg"]
# --preprocess-fn and --un-preprocess-fn
# Taken from VGG19
def mean_norm(tensor):
return tensor - numpy.array([ 123.68 , 116.779, 103.939])
def un_mean_norm(tensor):
return tensor + numpy.array([ 123.68 , 116.779, 103.939])
preprocess_fn = mean_norm
un_preprocess_fn = un_mean_norm
from .Base import Base, normalize_checkpoint_path
from .Generic import Generic
from .Estimator import Estimator
from .generator import Generator, dataset_using_generator
from .tfrecords import dataset_to_tfrecord, dataset_from_tfrecord, TFRECORDS_EXT
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
......@@ -9,7 +7,7 @@ def __appropriate__(*args):
Parameters:
*args: An iterable of objects to modify
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
......@@ -20,8 +18,6 @@ def __appropriate__(*args):
__appropriate__(
Base,
Generic,
Estimator,
Generator,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
__all__ = [_ for _ in dir() if not _.startswith("_")]
import tensorflow as tf
import random
import logging
import random
import tensorflow as tf
logger = logging.getLogger(__name__)
......@@ -30,7 +31,14 @@ class Generator:
The shapes of the returned samples.
"""
def __init__(self, samples, reader, multiple_samples=False, shuffle_on_epoch_end=False, **kwargs):
def __init__(
self,
samples,
reader,
multiple_samples=False,
shuffle_on_epoch_end=False,
**kwargs
):
super().__init__(**kwargs)
self.reader = reader
self.samples = list(samples)
......@@ -55,8 +63,8 @@ class Generator:
break
# Creating a "fake" dataset just to get the types and shapes
dataset = tf.data.Dataset.from_tensors(dlk)
self._output_types = dataset.output_types
self._output_shapes = dataset.output_shapes
self._output_types = tf.compat.v1.data.get_output_types(dataset)
self._output_shapes = tf.compat.v1.data.get_output_shapes(dataset)
logger.info(
"Initializing a dataset with %d %s and %s types and %s shapes",
......
"""Utilities for TFRecords
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import tensorflow as tf
TFRECORDS_EXT = ".tfrecords"
def tfrecord_name_and_json_name(output):
output = normalize_tfrecords_path(output)
json_output = output[: -len(TFRECORDS_EXT)] + ".json"
return output, json_output
def normalize_tfrecords_path(output):
if not output.endswith(TFRECORDS_EXT):
output += TFRECORDS_EXT
return output
def bytes_feature(value):
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def dataset_to_tfrecord(dataset, output):
"""Writes a tf.data.Dataset into a TFRecord file.
Parameters
----------
dataset : ``tf.data.Dataset``
The tf.data.Dataset that you want to write into a TFRecord file.
output : str
Path to the TFRecord file. Besides this file, a .json file is also created.
This json file is needed when you want to convert the TFRecord file back into
a dataset.
Returns
-------
``tf.Operation``
A tf.Operation that, when run, writes contents of dataset to a file. When
running in eager mode, calling this function will write the file. Otherwise, you
have to call session.run() on the returned operation.
"""
output, json_output = tfrecord_name_and_json_name(output)
# dump the structure so that we can read it back
meta = {
"output_types": repr(tf.compat.v1.data.get_output_types(dataset)),
"output_shapes": repr(tf.compat.v1.data.get_output_shapes(dataset)),
}
with open(json_output, "w") as f:
json.dump(meta, f)
# create a custom map function that serializes the dataset
def serialize_example_pyfunction(*args):
feature = {}
for i, f in enumerate(args):
key = f"feature{i}"
feature[key] = bytes_feature(f)
example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
return example_proto.SerializeToString()
def tf_serialize_example(*args):
args = tf.nest.flatten(args)
args = [tf.io.serialize_tensor(f) for f in args]
tf_string = tf.py_function(serialize_example_pyfunction, args, tf.string)
return tf.reshape(tf_string, ()) # The result is a scalar
dataset = dataset.map(tf_serialize_example)
writer = tf.data.experimental.TFRecordWriter(output)
return writer.write(dataset)
def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
"""Reads TFRecords and returns a dataset.
The TFRecord file must have been created using the :any:`dataset_to_tfrecord`
function.
Parameters
----------
tfrecord : str or list
Path to the TFRecord file. Pass a list if you are sure several tfrecords need
the same map function.
num_parallel_reads: int
A `tf.int64` scalar representing the number of files to read in parallel.
Defaults to reading files sequentially.
Returns
-------
``tf.data.Dataset``
A dataset that contains the data from the TFRecord file.
"""
# these imports are needed so that eval can work
from tensorflow import TensorShape
if isinstance(tfrecord, str):
tfrecord = [tfrecord]
tfrecord = [tfrecord_name_and_json_name(path) for path in tfrecord]
json_output = tfrecord[0][1]
tfrecord = [path[0] for path in tfrecord]
raw_dataset = tf.data.TFRecordDataset(
tfrecord, num_parallel_reads=num_parallel_reads
)
with open(json_output) as f:
meta = json.load(f)
for k, v in meta.items():
meta[k] = eval(v)
output_types = tf.nest.flatten(meta["output_types"])
output_shapes = tf.nest.flatten(meta["output_shapes"])
feature_description = {}
for i in range(len(output_types)):
key = f"feature{i}"
feature_description[key] = tf.io.FixedLenFeature([], tf.string)
def _parse_function(example_proto):
# Parse the input tf.Example proto using the dictionary above.
args = tf.io.parse_single_example(
serialized=example_proto, features=feature_description
)
args = tf.nest.flatten(args)
args = [tf.io.parse_tensor(v, t) for v, t in zip(args, output_types)]
args = [tf.reshape(v, s) for v, s in zip(args, output_shapes)]
return tf.nest.pack_sequence_as(meta["output_types"], args)
return raw_dataset.map(_parse_function)
import tensorflow as tf
import numpy
import os
import bob.io.base
DEFAULT_FEATURE = {
"data": tf.FixedLenFeature([], tf.string),
"label": tf.FixedLenFeature([], tf.int64),
"key": tf.FixedLenFeature([], tf.string),
}
def from_hdf5file_to_tensor(filename):
import bob.io.image
data = bob.io.image.to_matplotlib(bob.io.base.load(filename))
# reshaping to ndim == 3
if data.ndim == 2:
data = numpy.reshape(data, (data.shape[0], data.shape[1], 1))
data = data.astype("float32")
return data
def from_filename_to_tensor(filename, extension=None):
"""
Read a file and it convert it to tensor.
If the file extension is something that tensorflow understands (.jpg, .bmp, .tif,...),
it uses the `tf.image.decode_image` otherwise it uses `bob.io.base.load`
"""
if extension == "hdf5":
return tf.py_func(from_hdf5file_to_tensor, [filename], [tf.float32])
else:
return tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
def append_image_augmentation(
image,
gray_scale=False,
output_shape=None,
random_flip=False,
random_brightness=False,
random_contrast=False,
random_saturation=False,
random_rotate=False,
per_image_normalization=True,
random_gamma=False,
random_crop=False,
):
"""
Append to the current tensor some random image augmentation operation
**Parameters**
gray_scale:
Convert to gray scale?
output_shape:
If set, will randomly crop the image given the output shape
random_flip:
Randomly flip an image horizontally (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
random_brightness:
Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
random_contrast:
Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
random_saturation:
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
random_rotate:
Randomly rotate face images between -5 and 5 degrees
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
"""
# Changing the range from 0-255 to 0-1
image = tf.cast(image, tf.float32) / 255
# FORCING A SEED FOR THE RANDOM OPERATIONS
tf.set_random_seed(0)
if output_shape is not None:
assert len(output_shape) == 2
if random_crop:
image = tf.random_crop(image, size=list(output_shape) + [3])
else:
image = tf.image.resize_image_with_crop_or_pad(
image, output_shape[0], output_shape[1]
)
if random_flip:
image = tf.image.random_flip_left_right(image)
if random_brightness:
image = tf.image.random_brightness(image, max_delta=0.15)
image = tf.clip_by_value(image, 0, 1)
if random_contrast:
image = tf.image.random_contrast(image, lower=0.85, upper=1.15)
image = tf.clip_by_value(image, 0, 1)
if random_saturation:
image = tf.image.random_saturation(image, lower=0.85, upper=1.15)
image = tf.clip_by_value(image, 0, 1)
if random_gamma:
image = tf.image.adjust_gamma(
image, gamma=tf.random.uniform(shape=[], minval=0.85, maxval=1.15)
)
image = tf.clip_by_value(image, 0, 1)
if random_rotate:
# from https://stackoverflow.com/a/53855704/1286165
degree = 0.08726646259971647 # math.pi * 5 /180
random_angles = tf.random.uniform(shape=(1,), minval=-degree, maxval=degree)
image = tf.contrib.image.transform(
image,
tf.contrib.image.angles_to_projective_transforms(
random_angles,
tf.cast(tf.shape(image)[-3], tf.float32),
tf.cast(tf.shape(image)[-2], tf.float32),
),
)