Created
December 16, 2017 01:23
-
-
Save dpiponi/fda76b777727083672e6459e2207d74b to your computer and use it in GitHub Desktop.
Neural style transfer based on https://arxiv.org/abs/1508.06576. I don't vouch for the current set of parameters in this code...
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# See https://arxiv.org/abs/1508.06576 | |
import tensorflow as tf | |
import numpy | |
import imageio | |
import scipy.io | |
print "Tensorflow Version", tf.__version__ | |
# Options | |
content_filename = 'briggs.jpg' | |
style_filename = 'cappuccino.jpg' | |
# content | |
alpha = 100.0/5e4 | |
# style | |
beta = 2000*0.25/6e6 # was 2000*... | |
# total variation | |
gamma = 5000*alpha # 1000*alpha # was 5000*... | |
vgg19 = "/input/imagenet-vgg-verydeep-19.mat" | |
num_iterations = 2500 | |
style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1'] | |
content_layer = 'conv4_2' | |
# Endf of options | |
print "Content image =", content_filename | |
print "Style image =", style_filename | |
MEAN_PIXEL = numpy.array([123.68 , 116.779, 103.939]) | |
def preprocess(image): | |
return image-MEAN_PIXEL | |
def unprocess(image): | |
return image+MEAN_PIXEL | |
content_image = imageio.imread(content_filename) | |
height, width, _ = content_image.shape | |
content_image = content_image.reshape(1, height, width, 3) | |
content_image = preprocess(content_image) | |
style_image = imageio.imread(style_filename) | |
style_image = scipy.misc.imresize(style_image, (height, width)) | |
style_image = style_image.reshape(1, height, width, 3) | |
style_image = preprocess(style_image) | |
# This section of code derived from material... | |
# Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3. | |
# Note, we're not training this net. | |
# Just recreating and then evaluating it. | |
vgg_data = scipy.io.loadmat(vgg19) | |
def vgg_net(input_image): | |
layers = ( | |
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', | |
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', | |
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', | |
'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', | |
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', | |
'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', | |
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', | |
'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' | |
) | |
mean = vgg_data['normalization'][0][0][0] | |
mean_pixel = numpy.mean(mean, axis=(0, 1)) | |
weights = vgg_data['layers'][0] | |
net = {} | |
current = input_image | |
for i, name in enumerate(layers): | |
kind = name[:4] | |
if kind == 'conv': | |
kernels, bias = weights[i][0][0][0][0] | |
# matconvnet: weights are [width, height, in_channels, out_channels] | |
# tensorflow: weights are [height, width, in_channels, out_channels] | |
kernels = numpy.transpose(kernels, (1, 0, 2, 3)) | |
bias = bias.reshape(-1) | |
conv = tf.nn.conv2d(current, | |
tf.constant(kernels), | |
strides=(1, 1, 1, 1), padding='SAME') | |
current = tf.nn.bias_add(conv, bias) | |
elif kind == 'relu': | |
current = tf.nn.relu(current) | |
elif kind == 'pool': | |
current = tf.nn.max_pool(current, | |
ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), | |
padding='SAME') | |
net[name] = current | |
assert len(net) == len(layers) | |
return net | |
# Back to my code again... | |
# Probably ought to do |del . image|... | |
def total_variation(image): | |
hdiff = image[:, :, 1:, :]-image[:, :, :-1, :] | |
vdiff = image[:, 1:, :, :]-image[:, :-1, :, :] | |
# Slightly inaccurate | |
return (tf.reduce_sum(tf.abs(hdiff), axis=(1, 2, 3))+tf.reduce_sum(tf.abs(vdiff), axis=(1, 2, 3)))/(width*height) | |
# CONTENT | |
image_in = tf.placeholder(tf.float32, shape=(1, height, width, 3)) | |
net = vgg_net(image_in) | |
with tf.Session() as sess: | |
content_target = sess.run(net[content_layer], feed_dict={image_in:content_image}) | |
image = tf.Variable(tf.random_normal([1, height, width, 3], stddev=0.1), name='image') | |
content_net2 = vgg_net(image) | |
content_error = tf.reduce_mean(tf.square(content_net2[content_layer]-content_target)) | |
# STYLE | |
def gram(x): | |
return tf.einsum("ijkl,ijkm->lm", x, x)/(height*width) | |
with tf.Session() as sess: | |
style_gs = {style_layer:gram(net[style_layer]) for style_layer in style_layers} | |
style_targets = sess.run(style_gs, feed_dict={image_in:style_image}) | |
style_net2 = vgg_net(image) | |
style_error = tf.reduce_mean(sum( | |
[tf.reduce_mean( | |
tf.square( | |
gram(style_net2[style_layer])-style_targets[style_layer])) | |
for style_layer in style_layers])) | |
# TRANSFER | |
tv = total_variation(image) | |
error = alpha*content_error+beta*style_error+gamma*tv | |
optimizer = tf.train.AdamOptimizer(1.0, beta1=0.9) | |
train_step = optimizer.minimize(error) | |
init_op = tf.global_variables_initializer() | |
# Using Adam optimizer because I can't get L-BFGS to work in tensorflow | |
with tf.Session() as sess: | |
sess.run(init_op) | |
for i in range(num_iterations): | |
[_, im2, e] = sess.run([train_step, image, error]) | |
print "Iteration ", i, "error =", e | |
if (i+1) % 100 == 0: | |
filename = '/output/out.%d.png' % i | |
print "writing to", filename | |
print im2.shape | |
im2 = unprocess(im2[0, :, :, :]) | |
im2 = numpy.clip(im2, 0, 255) | |
imageio.imwrite(filename, im2.astype(numpy.uint8)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment