Commit 1991d94b authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Moved style transfer to a function

parent 03b8ea60
Pipeline #22472 failed with stage
in 16 minutes and 38 seconds
......@@ -16,21 +16,11 @@ import bob.ip.base
import bob.ip.color
import sys
import os
from bob.learn.tensorflow.style_transfer import compute_features, compute_gram
from bob.learn.tensorflow.loss import linear_gram_style_loss, content_loss, denoising_loss
from bob.learn.tensorflow.style_transfer import do_style_transfer
logger = logging.getLogger(__name__)
def wise_shape(shape):
if len(shape)==2:
return (1, shape[0], shape[1], 1)
else:
return (1, shape[0], shape[1], shape[2])
def normalize4save(img):
return (255 * ((img - numpy.min(img)) / (numpy.max(img)-numpy.min(img)))).astype("uint8")
@click.command(
entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
......@@ -41,7 +31,7 @@ def normalize4save(img):
required=True,
multiple=True,
entry_point_group='bob.learn.tensorflow.style_images',
help='List of images that encods the style.')
help='List of images that encodes the style.')
@click.option('--architecture',
'-a',
required=True,
......@@ -52,11 +42,11 @@ def normalize4save(img):
'-c',
required=True,
cls=ResourceOption,
help='The base architecure.')
help='CNN checkpoint path')
@click.option('--iterations',
'-i',
type=click.types.INT,
help='Number of steps for which to train model.',
help='Number of iterations to generate the image',
default=1000)
@click.option('--learning_rate',
'-i',
......@@ -119,7 +109,7 @@ def style_transfer(content_image_path, output_path, style_image_paths,
\b
If you want run a style transfer using InceptionV2 as basis folo
If you want run a style transfer using InceptionV2 as basis, use the following template
Below follow a CONFIG template
......@@ -178,108 +168,24 @@ def style_transfer(content_image_path, output_path, style_image_paths,
"""
logger.info("Style transfer, content_image={0}, style_image={1}".format(content_image_path, style_image_paths))
# Loading content image
content_image = bob.io.base.load(content_image_path)
# Reading and converting to the tensorflow format
content_image = bob.io.image.to_matplotlib(bob.io.base.load(content_image_path)).astype("float32")
style_images = []
for path in style_image_paths:
image = bob.io.image.to_matplotlib(bob.io.base.load(path)).astype("float32")
style_images.append(numpy.reshape(image, wise_shape(image.shape)))
# Reshaping to NxWxHxC
content_image = numpy.reshape(content_image, wise_shape(content_image.shape))
# Base content features
logger.info("Computing content features")
content_features = compute_features(content_image, architecture, checkpoint_dir,
content_end_points, preprocess_fn)
# Base style features
logger.info("Computing style features")
style_grams = []
for image in style_images:
style_features = compute_features(image, architecture, checkpoint_dir,
style_end_points, preprocess_fn)
style_grams.append(compute_gram(style_features))
# Organizing the trainer
logger.info("Training.....")
with tf.Graph().as_default():
tf.set_random_seed(0)
# Random noise
noise = tf.Variable(tf.random_normal(shape=content_image.shape),
trainable=True) * 0.256
_, end_points = architecture(noise,
mode=tf.estimator.ModeKeys.PREDICT,
trainable_variables=[])
# Computing content loss
content_noises = []
for c in content_end_points:
content_noises.append(end_points[c])
c_loss = content_loss(content_noises, content_features)
# Computing style_loss
style_gram_noises = []
s_loss = 0
for grams_per_image in style_grams:
for c in style_end_points:
layer = end_points[c]
_, height, width, number = map(lambda i: i.value, layer.get_shape())
size = height * width * number
features = tf.reshape(layer, (-1, number))
style_gram_noises.append(tf.matmul(tf.transpose(features), features) / size)
s_loss += linear_gram_style_loss(style_gram_noises, grams_per_image)
# Variation denoise
d_loss = denoising_loss(noise)
#Total loss
total_loss = content_weight*c_loss + style_weight*s_loss + denoise_weight*d_loss
solver = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
tf.contrib.framework.init_from_checkpoint(tf.train.latest_checkpoint(checkpoint_dir) if os.path.isdir(checkpoint_dir) else checkpoint_dir, scopes)
# Training
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(iterations):
_, loss = sess.run([solver, total_loss])
logger.info("Iteration {0}, loss {1}".format(i, loss))
sys.stdout.flush()
# Saving generated image
raw_style_image = sess.run(noise)[0, :, :,:]
# Unpreprocessing the signal
if un_preprocess_fn is not None:
raw_style_image = un_preprocess_fn(raw_style_image)
raw_style_image = bob.io.image.to_bob(raw_style_image)
normalized_style_image = normalize4save(raw_style_image)
if pure_noise:
if normalized_style_image.shape[0] == 1:
bob.io.base.save(normalized_style_image[0, :, :], output_path)
else:
bob.io.base.save(normalized_style_image, output_path)
else:
# Original output
if normalized_style_image.shape[0] == 1:
normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(normalized_style_image[0,:,:]))
# Loading the content image and clipping from 0-255 in case is in another scale
scaled_content_image = normalize4save(bob.io.base.load(content_image_path).astype("float32")).astype("float64")
content_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(scaled_content_image))
else:
normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(bob.ip.color.rgb_to_gray(normalized_style_image)))
content_image_yuv = bob.ip.color.rgb_to_yuv(bob.io.base.load(content_image_path))
output_image = numpy.zeros(shape=content_image_yuv.shape, dtype="uint8")
output_image[0,:,:] = normalized_style_image_yuv[0,:,:]
output_image[1,:,:] = content_image_yuv[1,:,:]
output_image[2,:,:] = content_image_yuv[2,:,:]
output_image = bob.ip.color.yuv_to_rgb(output_image)
bob.io.base.save(output_image, output_path)
style_images.append(bob.io.base.load(path))
output = do_style_transfer(content_image, style_images,
architecture, checkpoint_dir, scopes,
content_end_points, style_end_points,
preprocess_fn=preprocess_fn, un_preprocess_fn=un_preprocess_fn,
pure_noise=pure_noise,
iterations=iterations, learning_rate=learning_rate,
content_weight=content_weight, style_weight=style_weight,
denoise_weight=denoise_weight)
bob.io.base.save(output, output_path)
from .neural_transfer import compute_features, compute_gram
from .neural_transfer import compute_features, compute_gram, do_style_transfer
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
......
......@@ -6,6 +6,13 @@
import tensorflow as tf
import numpy
import os
from bob.learn.tensorflow.loss import linear_gram_style_loss, content_loss, denoising_loss
import bob.io.image
import bob.ip.color
import logging
logger = logging.getLogger(__name__)
def compute_features(input_image, architecture, checkpoint_dir, target_end_points, preprocess_fn=None):
"""
......@@ -82,3 +89,181 @@ def compute_gram(features):
return grams
def do_style_transfer(content_image, style_images,
architecture, checkpoint_dir, scopes,
content_end_points, style_end_points,
preprocess_fn=None, un_preprocess_fn=None, pure_noise=False,
iterations=1000, learning_rate=0.1,
content_weight=5., style_weight=500., denoise_weight=500.):
"""
Trains neural style transfer using the approach presented in:
Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
Parameters
----------
content_image: numpy.array
Content image in the Bob format (C x W x H)
style_images: :any:`list`
List of numpy.array (Bob format (C x W x H)) that encodes the style
architecture:
Point to a function with the base architecture
checkpoint_dir:
CNN checkpoint path
scopes:
Dictionary containing the mapping scores
content_end_points:
List of end_points (from the architecture) for the used to encode the content
style_end_points:
List of end_points (from the architecture) for the used to encode the style
preprocess_fn:
Preprocess function. Pointer to a function that preprocess the INPUT signal
unpreprocess_fn:
Un preprocess function. Pointer to a function that preprocess the OUTPUT signal
pure_noise:
If set will save the raw noisy generated image.
If not set, the output will be RGB = stylizedYUV.Y, originalYUV.U, originalYUV.V
iterations:
Number of iterations to generate the image
learning_rate:
Adam learning rate
content_weight:
Weight of the content loss
style_weight:
Weight of the style loss
denoise_weight:
Weight denoising loss
"""
def wise_shape(shape):
if len(shape)==2:
return (1, shape[0], shape[1], 1)
else:
return (1, shape[0], shape[1], shape[2])
def normalize4save(img):
return (255 * ((img - numpy.min(img)) / (numpy.max(img)-numpy.min(img)))).astype("uint8")
# Reshaping to NxWxHxC and converting to the tensorflow format
# content
content_image = bob.io.image.to_matplotlib(content_image).astype("float32")
content_image = numpy.reshape(content_image, wise_shape(content_image.shape))
# and style
for i in range(len(style_images)):
image = bob.io.image.to_matplotlib(style_images[i])
image = numpy.reshape(image, wise_shape(image.shape))
style_images[i] = image
# Base content features
logger.info("Computing content features")
content_features = compute_features(content_image, architecture, checkpoint_dir,
content_end_points, preprocess_fn)
# Base style features
logger.info("Computing style features")
style_grams = []
for image in style_images:
style_features = compute_features(image, architecture, checkpoint_dir,
style_end_points, preprocess_fn)
style_grams.append(compute_gram(style_features))
# Organizing the trainer
logger.info("Training.....")
with tf.Graph().as_default():
tf.set_random_seed(0)
# Random noise
noise = tf.Variable(tf.random_normal(shape=content_image.shape),
trainable=True) * 0.256
_, end_points = architecture(noise,
mode=tf.estimator.ModeKeys.PREDICT,
trainable_variables=[])
# Computing content loss
content_noises = []
for c in content_end_points:
content_noises.append(end_points[c])
c_loss = content_loss(content_noises, content_features)
# Computing style_loss
style_gram_noises = []
s_loss = 0
for grams_per_image in style_grams:
for c in style_end_points:
layer = end_points[c]
_, height, width, number = map(lambda i: i.value, layer.get_shape())
size = height * width * number
features = tf.reshape(layer, (-1, number))
style_gram_noises.append(tf.matmul(tf.transpose(features), features) / size)
s_loss += linear_gram_style_loss(style_gram_noises, grams_per_image)
# Variation denoise
d_loss = denoising_loss(noise)
#Total loss
total_loss = content_weight*c_loss + style_weight*s_loss + denoise_weight*d_loss
solver = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
tf.contrib.framework.init_from_checkpoint(tf.train.latest_checkpoint(checkpoint_dir) if os.path.isdir(checkpoint_dir) else checkpoint_dir, scopes)
# Training
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(iterations):
_, loss = sess.run([solver, total_loss])
logger.info("Iteration {0}, loss {1}".format(i, loss))
# Saving generated image
raw_style_image = sess.run(noise)[0, :, :,:]
# Unpreprocessing the signal
if un_preprocess_fn is not None:
raw_style_image = un_preprocess_fn(raw_style_image)
raw_style_image = bob.io.image.to_bob(raw_style_image)
normalized_style_image = normalize4save(raw_style_image)
if pure_noise:
if normalized_style_image.shape[0] == 1:
return normalized_style_image[0, :, :]
else:
return normalized_style_image
else:
# Original output
if normalized_style_image.shape[0] == 1:
normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(normalized_style_image[0,:,:]))
# Loading the content image and clipping from 0-255 in case is in another scale
scaled_content_image = normalize4save(bob.io.base.load(content_image_path).astype("float32")).astype("float64")
content_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(scaled_content_image))
else:
normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(bob.ip.color.rgb_to_gray(normalized_style_image)))
content_image_yuv = bob.ip.color.rgb_to_yuv(bob.io.base.load(content_image_path))
output_image = numpy.zeros(shape=content_image_yuv.shape, dtype="uint8")
output_image[0,:,:] = normalized_style_image_yuv[0,:,:]
output_image[1,:,:] = content_image_yuv[1,:,:]
output_image[2,:,:] = content_image_yuv[2,:,:]
output_image = bob.ip.color.yuv_to_rgb(output_image)
return output_image
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment