Skip to content

Instantly share code, notes, and snippets.

@dpiponi
Created December 16, 2017 01:23
Show Gist options
  • Save dpiponi/fda76b777727083672e6459e2207d74b to your computer and use it in GitHub Desktop.
Save dpiponi/fda76b777727083672e6459e2207d74b to your computer and use it in GitHub Desktop.
Neural style transfer based on https://arxiv.org/abs/1508.06576. I don't vouch for the current set of parameters in this code...
# See https://arxiv.org/abs/1508.06576
import tensorflow as tf
import numpy
import imageio
import scipy.io
print "Tensorflow Version", tf.__version__
# Options
content_filename = 'briggs.jpg'
style_filename = 'cappuccino.jpg'
# content
alpha = 100.0/5e4
# style
beta = 2000*0.25/6e6 # was 2000*...
# total variation
gamma = 5000*alpha # 1000*alpha # was 5000*...
vgg19 = "/input/imagenet-vgg-verydeep-19.mat"
num_iterations = 2500
style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
content_layer = 'conv4_2'
# Endf of options
print "Content image =", content_filename
print "Style image =", style_filename
MEAN_PIXEL = numpy.array([123.68 , 116.779, 103.939])
def preprocess(image):
return image-MEAN_PIXEL
def unprocess(image):
return image+MEAN_PIXEL
content_image = imageio.imread(content_filename)
height, width, _ = content_image.shape
content_image = content_image.reshape(1, height, width, 3)
content_image = preprocess(content_image)
style_image = imageio.imread(style_filename)
style_image = scipy.misc.imresize(style_image, (height, width))
style_image = style_image.reshape(1, height, width, 3)
style_image = preprocess(style_image)
# This section of code derived from material...
# Copyright (c) 2015-2016 Anish Athalye. Released under GPLv3.
# Note, we're not training this net.
# Just recreating and then evaluating it.
vgg_data = scipy.io.loadmat(vgg19)
def vgg_net(input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2',
'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2',
'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2',
'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4'
)
mean = vgg_data['normalization'][0][0][0]
mean_pixel = numpy.mean(mean, axis=(0, 1))
weights = vgg_data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
kernels = numpy.transpose(kernels, (1, 0, 2, 3))
bias = bias.reshape(-1)
conv = tf.nn.conv2d(current,
tf.constant(kernels),
strides=(1, 1, 1, 1), padding='SAME')
current = tf.nn.bias_add(conv, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = tf.nn.max_pool(current,
ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
padding='SAME')
net[name] = current
assert len(net) == len(layers)
return net
# Back to my code again...
# Probably ought to do |del . image|...
def total_variation(image):
hdiff = image[:, :, 1:, :]-image[:, :, :-1, :]
vdiff = image[:, 1:, :, :]-image[:, :-1, :, :]
# Slightly inaccurate
return (tf.reduce_sum(tf.abs(hdiff), axis=(1, 2, 3))+tf.reduce_sum(tf.abs(vdiff), axis=(1, 2, 3)))/(width*height)
# CONTENT
image_in = tf.placeholder(tf.float32, shape=(1, height, width, 3))
net = vgg_net(image_in)
with tf.Session() as sess:
content_target = sess.run(net[content_layer], feed_dict={image_in:content_image})
image = tf.Variable(tf.random_normal([1, height, width, 3], stddev=0.1), name='image')
content_net2 = vgg_net(image)
content_error = tf.reduce_mean(tf.square(content_net2[content_layer]-content_target))
# STYLE
def gram(x):
return tf.einsum("ijkl,ijkm->lm", x, x)/(height*width)
with tf.Session() as sess:
style_gs = {style_layer:gram(net[style_layer]) for style_layer in style_layers}
style_targets = sess.run(style_gs, feed_dict={image_in:style_image})
style_net2 = vgg_net(image)
style_error = tf.reduce_mean(sum(
[tf.reduce_mean(
tf.square(
gram(style_net2[style_layer])-style_targets[style_layer]))
for style_layer in style_layers]))
# TRANSFER
tv = total_variation(image)
error = alpha*content_error+beta*style_error+gamma*tv
optimizer = tf.train.AdamOptimizer(1.0, beta1=0.9)
train_step = optimizer.minimize(error)
init_op = tf.global_variables_initializer()
# Using Adam optimizer because I can't get L-BFGS to work in tensorflow
with tf.Session() as sess:
sess.run(init_op)
for i in range(num_iterations):
[_, im2, e] = sess.run([train_step, image, error])
print "Iteration ", i, "error =", e
if (i+1) % 100 == 0:
filename = '/output/out.%d.png' % i
print "writing to", filename
print im2.shape
im2 = unprocess(im2[0, :, :, :])
im2 = numpy.clip(im2, 0, 255)
imageio.imwrite(filename, im2.astype(numpy.uint8))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment