Commit e0ea47db authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Implemented style transfer algorithm

parent 53e9d390
Pipeline #21832 passed with stage
in 20 minutes and 25 seconds
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
import logging
import tensorflow as tf
logger = logging.getLogger("bob.learn.tensorflow")
def content_loss(noises, content_features):
"""
Implements the content loss from:
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
For a given noise signal :math:`n`, content image :math:`c` and convolved with the DCNN :math:`\phi` until the layer :math:`l` the content loss is defined as:
:math:`L(n,c) = \sum_{l=?}^{?}({\phi^l(n) - \phi^l(c)})^2`
Parameters
----------
noises: list
A list of tf.Tensor containing all the noises convolved
content_features: list
A list of numpy.array containing all the content_features convolved
"""
content_losses = []
for n,c in zip(noises, content_features):
content_losses.append((2 * tf.nn.l2_loss(n - c) / c.size))
return reduce(tf.add, content_losses)
def linear_gram_style_loss(noises, gram_style_features):
"""
Implements the style loss from:
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
For a given noise signal :math:`n`, content image :math:`c` and convolved with the DCNN :math:`\phi` until the layer :math:`l` the STYLE loss is defined as:
:math:`L(n,c) = \sum_{l=?}^{?}\frac{({\phi^l(n)^T*\phi^l(n) - \phi^l(c)^T*\phi^l(c)})^2}{N*M}`
Parameters
----------
noises: list
A list of tf.Tensor containing all the noises convolved
gram_style_features: list
A list of numpy.array containing all the content_features convolved
"""
style_losses = []
for n,s in zip(noises, gram_style_features):
style_losses.append((2 * tf.nn.l2_loss(n - s)) / s.size)
return reduce(tf.add, style_losses)
def denoising_loss(noise):
"""
Computes the denoising loss as in:
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
Parameters
----------
noise: tf.Tensor
Input noise
"""
def _tensor_size(tensor):
from operator import mul
return reduce(mul, (d.value for d in tensor.get_shape()), 1)
shape = noise.get_shape().as_list()
noise_y_size = _tensor_size(noise[:,1:,:,:])
noise_x_size = _tensor_size(noise[:,:,1:,:])
denoise_loss = 2 * ( (tf.nn.l2_loss(noise[:,1:,:,:] - noise[:,:shape[1]-1,:,:]) / noise_y_size) +
(tf.nn.l2_loss(noise[:,:,1:,:] - noise[:,:,:shape[2]-1,:]) / noise_x_size))
return denoise_loss
from .BaseLoss import mean_cross_entropy_loss, mean_cross_entropy_center_loss
from .ContrastiveLoss import contrastive_loss
from .TripletLoss import triplet_loss, triplet_average_loss, triplet_fisher_loss
from .StyleLoss import linear_gram_style_loss, content_loss, denoising_loss
# gets sphinx autodoc done right - don't remove it
......
#!/usr/bin/env python
"""Trains networks using Tensorflow estimators.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import click
import tensorflow as tf
from bob.extension.scripts.click_helper import (verbosity_option,
ConfigCommand, ResourceOption)
import bob.io.image
import bob.io.base
import numpy
import bob.ip.base
import bob.ip.color
import sys
from bob.learn.tensorflow.style_transfer import compute_features, compute_gram
from bob.learn.tensorflow.loss import linear_gram_style_loss, content_loss, denoising_loss
logger = logging.getLogger(__name__)
def normalize4save(img):
return (255 * ((img - numpy.min(img)) / (numpy.max(img)-numpy.min(img)))).astype("uint8")
@click.command(
entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
@click.argument('content_image', required=True)
@click.argument('style_image', required=True)
@click.argument('output', required=True)
@click.option('--architecture',
'-a',
required=True,
cls=ResourceOption,
entry_point_group='bob.learn.tensorflow.architecture',
help='The base architecure.')
@click.option('--checkpoint-dir',
'-c',
required=True,
cls=ResourceOption,
help='The base architecure.')
@click.option('--iterations',
'-i',
type=click.types.INT,
help='Number of steps for which to train model.',
default=1000)
@click.option('--learning_rate',
'-i',
type=click.types.FLOAT,
help='Learning rate.',
default=1.)
@click.option('--content-weight',
type=click.types.FLOAT,
help='Weight of the content loss.',
default=1.)
@click.option('--style-weight',
type=click.types.FLOAT,
help='Weight of the style loss.',
default=1000.)
@click.option('--denoise-weight',
type=click.types.FLOAT,
help='Weight denoising loss.',
default=100.)
@click.option('--content-end-points',
cls=ResourceOption,
multiple=True,
entry_point_group='bob.learn.tensorflow.end_points',
help='List of end_points for the used to encode the content')
@click.option('--style-end-points',
cls=ResourceOption,
multiple=True,
entry_point_group='bob.learn.tensorflow.end_points',
help='List of end_points for the used to encode the style')
@click.option('--scopes',
cls=ResourceOption,
entry_point_group='bob.learn.tensorflow.scopes',
help='Dictionary containing the mapping scores',
required=True)
@verbosity_option(cls=ResourceOption)
def style_transfer(content_image, style_image, output,
architecture, checkpoint_dir,
iterations, learning_rate,
content_weight, style_weight, denoise_weight, content_end_points,
style_end_points, scopes, **kwargs):
"""
Trains neural style transfer from
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
"""
# Reading and converting to the tensorflow format
content_image = bob.io.image.to_matplotlib(bob.io.base.load(content_image))
style_image = bob.io.image.to_matplotlib(bob.io.base.load(style_image))
# Reshaping to NxWxHxC
content_image = numpy.reshape(content_image, (1, content_image.shape[0],
content_image.shape[1],
content_image.shape[2]))
style_image = numpy.reshape(style_image, (1, style_image.shape[0],
style_image.shape[1],
style_image.shape[2]))
# Base content features
content_features = compute_features(content_image, architecture, checkpoint_dir, content_end_points)
# Base style features
# TODO: Enable a set of style images
style_features = compute_features(style_image, architecture, checkpoint_dir, style_end_points)
style_grams = compute_gram(style_features)
# Organizing the trainer
with tf.Graph().as_default():
# Random noise
noise = tf.Variable(tf.random_normal(shape=content_image.shape),
trainable=True)
_, end_points = architecture(noise,
mode=tf.estimator.ModeKeys.PREDICT,
trainable_variables=[])
# Computing content loss
content_noises = []
for c in content_end_points:
content_noises.append(end_points[c])
c_loss = content_loss(content_noises, content_features)
# Computing style_loss
style_gram_noises = []
for c in style_end_points:
layer = end_points[c]
_, height, width, number = map(lambda i: i.value, layer.get_shape())
size = height * width * number
features = tf.reshape(layer, (-1, number))
style_gram_noises.append(tf.matmul(tf.transpose(features), features) / size)
s_loss = linear_gram_style_loss(style_gram_noises, style_grams)
# Variation denoise
d_loss = denoising_loss(noise)
#Total loss
total_loss = content_weight*c_loss + style_weight*s_loss + denoise_weight*d_loss
solver = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
tf.contrib.framework.init_from_checkpoint(tf.train.latest_checkpoint(checkpoint_dir),
scopes)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(iterations):
_, loss = sess.run([solver, total_loss])
print("Iteration {0}, loss {1}".format(i, loss))
sys.stdout.flush()
style_image = sess.run(noise)[0, :, :,:]
style_image = bob.io.image.to_bob(style_image)
bob.io.base.save(normalize4save(style_image), output)
from .neural_transfer import compute_features, compute_gram
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args:
obj.__module__ = __name__
__appropriate__(
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
import tensorflow as tf
import numpy
def compute_features(input_image, architecture, checkpoint_dir, target_end_points):
"""
For a given set of end_points, convolve the input image until these points
Parameters
----------
input_image: numpy.array
Input image in the format WxHxC
architecture:
Pointer to the architecture function
checkpoint_dir: str
DCNN checkpoint directory
end_points: dict
Dictionary containing the end point tensors
"""
input_pl = tf.placeholder('float32', shape=(1, input_image.shape[1],
input_image.shape[2],
input_image.shape[3]))
# TODO: Think on how abstract this normalization operation
_, end_points = architecture(tf.stack([tf.image.per_image_standardization(i) for i in tf.unstack(input_image)]), mode=tf.estimator.ModeKeys.PREDICT, trainable_variables=None)
with tf.Session() as sess:
# Restoring the checkpoint for the given architecture
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))
#content_feature = sess.run(end_points[CONTENT_END_POINTS], feed_dict={input_image: content_image})
features = []
for ep in target_end_points:
feature = sess.run(end_points[ep], feed_dict={input_pl: input_image})
features.append(feature)
# Killing the graph
tf.reset_default_graph()
return features
def compute_gram(features):
"""
Given a list of features (as numpy.arrays) comput the gram matrices of each
pinning the channel as in:
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
Parameters
----------
features: numpy.array
Convolved features in the format NxWxHxC
"""
grams = []
for f in features:
f = numpy.reshape(f, (-1, f.shape[3]))
grams.append(numpy.matmul(f.T, f) / f.size)
return grams
......@@ -56,6 +56,7 @@ setup(
'predict_bio = bob.learn.tensorflow.script.predict_bio:predict_bio',
'train = bob.learn.tensorflow.script.train:train',
'train_and_evaluate = bob.learn.tensorflow.script.train_and_evaluate:train_and_evaluate',
'style_transfer = bob.learn.tensorflow.script.style_transfer:style_transfer'
],
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment