Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ProGamerGov/88e087b8d83a7802ad92bc907301a76f to your computer and use it in GitHub Desktop.
Save ProGamerGov/88e087b8d83a7802ad92bc907301a76f to your computer and use it in GitHub Desktop.
-- Original DeepDream code by github.com/jcjohnson
-- Simaltaneous DeepDream and style transfer modifications by github.com/ProGamerGov
-- The FC layers as content layers feature, and the label file features come from:
-- github.com/htoyryla's gist.github.com/htoyryla/806ca4d978f0528114282cd00022ad71
-- Indepdent layer weights from: https://github.com/htoyryla/neural-style/blob/master/neural_style.lua
-- Torch model and architecture support from https://github.com/szagoruyko/neural-style/tree/torch
-- A script like this one is requires for making Torch models compatible: https://gist.github.com/szagoruyko/8828e09cc4687afd324d
-- Matting laplacian code from: github.com/martinbenson/deep-photo-styletransfer/
-- Generate your laplacians with: https://gist.github.com/ProGamerGov/290f26afccc5e013d1a8425ef6a594f2
require 'torch'
require 'nn'
require 'image'
require 'optim'
require 'loadcaffe'
local cmd = torch.CmdLine()
-- Basic options
cmd:option('-style_image', 'examples/inputs/seated-nude.jpg',
'Style target image')
cmd:option('-style_blend_weights', 'nil')
cmd:option('-content_image', 'examples/inputs/tubingen.jpg',
'Content target image')
cmd:option('-image_size', 512, 'Maximum height / width of generated image')
cmd:option('-gpu', '0', 'Zero-indexed ID of the GPU to use; for CPU mode set -gpu = -1')
cmd:option('-multigpu_strategy', '', 'Index of layers to split the network across GPUs')
-- Optimization options
cmd:option('-content_weight', 5e0)
cmd:option('-style_weight', 1e2)
cmd:option('-fc_weight', 5e2)
cmd:option('-tv_weight', 1e-3)
cmd:option('-num_iterations', 1000)
cmd:option('-normalize_gradients', false)
cmd:option('-init', 'random', 'random|image')
cmd:option('-init_image', '')
cmd:option('-optimizer', 'lbfgs', 'lbfgs|adam')
cmd:option('-learning_rate', 1e1)
cmd:option('-lbfgs_num_correction', 0)
-- Output options
cmd:option('-print_iter', 50)
cmd:option('-save_iter', 100)
cmd:option('-output_image', 'out.png')
-- Other options
cmd:option('-style_scale', 1.0)
cmd:option('-original_colors', 0)
cmd:option('-pooling', 'max', 'max|avg')
cmd:option('-proto_file', 'models/VGG_ILSVRC_19_layers_deploy.prototxt')
cmd:option('-model_file', 'models/VGG_ILSVRC_19_layers.caffemodel')
cmd:option('-label_file', '')
cmd:option('-backend', 'nn', 'nn|cudnn|clnn')
cmd:option('-cudnn_autotune', false)
cmd:option('-seed', -1)
cmd:option('-content_layers', 'relu4_2', 'layers for content')
cmd:option('-style_layers', 'relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', 'layers for style')
-- Options for DeepDream
cmd:option('-deepdream_layers', '')
cmd:option('-deepdream_weights', '')
cmd:option('-deepdream_layer_weights', 'nil')
-- Experimental Feature(s) From github.com/ProGamerGov
cmd:option('-time', false)
-- Experimental leongatys/NeuralImageSynthesis features
cmd:option('-padding', 'default', 'default|reflect|replicate')
cmd:option('-l2_weight', 0)
-- Experimental Photorealistim Related Parameters
cmd:option('-laplacian', '', 'Laplacian generated from your content image')
-- Local affine params
cmd:option('-lambda', 1e4)
cmd:option('-patch', 3)
cmd:option('-eps', 1e-7)
-- Reconstruct best local affine using joint bilateral smoothing
cmd:option('-f_radius', 7)
cmd:option('-f_edge', 0.05)
cmd:option('-index', 1)
cmd:option('-serial', 'affine_outputs')
total_time = nil
labels = {}
local function main(params)
if params.time then
total_time = torch.Timer()
print 'Time Keeping Enabled'
end
if params.label_file ~= '' then
f = io.open(params.label_file)
if f then
for line in f:lines() do
table.insert(labels, line)
end
end
end
local dtype, multigpu = setup_gpu(params)
local loadcaffe_backend = params.backend
if params.backend == 'clnn' then loadcaffe_backend = 'nn' end
local cnn
local is_caffemodel = params.model_file:find'caffemodel'
if is_caffemodel then
cnn = loadcaffe.load(params.proto_file, params.model_file, loadcaffe_backend):type(dtype)
else
cnn = torch.load(params.model_file):type(dtype)
if cnn.unpack then cnn = cnn:unpack() end
end
local content_image = image.load(params.content_image, 3)
content_image = image.scale(content_image, params.image_size, 'bilinear')
preprocess = is_caffemodel and preprocess_caffe or preprocess_torch
deprocess = is_caffemodel and deprocess_caffe or deprocess_torch
print(cnn.transform)
local content_image_caffe = preprocess(content_image, cnn.transform):float()
local style_size = math.ceil(params.style_scale * params.image_size)
local style_image_list = params.style_image:split(',')
local style_images_caffe = {}
for _, img_path in ipairs(style_image_list) do
local img = image.load(img_path, 3)
img = image.scale(img, style_size, 'bilinear')
local img_caffe = preprocess(img, cnn.transform):float()
table.insert(style_images_caffe, img_caffe)
end
local init_image = nil
if params.init_image ~= '' then
init_image = image.load(params.init_image, 3)
local H, W = content_image:size(2), content_image:size(3)
init_image = image.scale(init_image, W, H, 'bilinear')
init_image = preprocess(init_image, cnn.transform):float()
end
local CSR
local c, h, w
if params.laplacian ~= '' then
-- load matting laplacian
local CSR_fn = params.laplacian
print('loading matting laplacian...', CSR_fn)
local csvFile = io.open(CSR_fn, 'r')
local ROWS = tonumber(csvFile:read())
CSR = torch.Tensor(ROWS, 3)
local i = 0
for line in csvFile:lines('*l') do
i = i + 1
local l = line:split(',')
for key, val in ipairs(l) do
CSR[i][key] = val
end
end
csvFile:close()
paths.mkdir(tostring(params.serial))
print('Exp serial:', params.serial)
c, h, w = content_image:size(1), content_image:size(2), content_image:size(3)
require 'libcuda_utils'
end
-- Handle style blending weights for multiple style inputs
local style_blend_weights = nil
if params.style_blend_weights == 'nil' then
-- Style blending not specified, so use equal weighting
style_blend_weights = {}
for i = 1, #style_image_list do
table.insert(style_blend_weights, 1.0)
end
else
style_blend_weights = params.style_blend_weights:split(',')
assert(#style_blend_weights == #style_image_list,
'-style_blend_weights and -style_images must have the same number of elements')
end
-- Normalize the style blending weights so they sum to 1
local style_blend_sum = 0
for i = 1, #style_blend_weights do
style_blend_weights[i] = tonumber(style_blend_weights[i])
style_blend_sum = style_blend_sum + style_blend_weights[i]
end
for i = 1, #style_blend_weights do
style_blend_weights[i] = style_blend_weights[i] / style_blend_sum
end
local content_layers = params.content_layers:split(",")
local style_layers = params.style_layers:split(",")
local deepdream_layers = params.deepdream_layers:split(",")
local deepdream_layer_weights = {}
if params.deepdream_layer_weights == 'nil' then
for i = 1, #deepdream_layers do
--table.insert(deepdream_layer_weights, 1)
end
else
deepdream_layer_weights = params.deepdream_layer_weights:split(',')
assert(#deepdream_layer_weights == #deepdream_layers,
'-deepdream_layer_weights and -deepdream_layers must have the same number of elements')
end
-- Set up the network, inserting style and content loss modules
local content_losses, style_losses, deepdream_losses = {}, {}, {}
local next_content_idx, next_style_idx, next_deepdream_idx = 1, 1, 1
local net = nn.Sequential()
if params.tv_weight > 0 then
local tv_mod = nn.TVLoss(params.tv_weight):type(dtype)
net:add(tv_mod)
end
if params.l2_weight ~= 0 then
local l2_mod = nn.L2Penalty(params.l2_weight):type(dtype)
net:add(l2_mod)
end
for i = 1, #cnn do
local layer = cnn:get(i)
-- Makes FC Layers usable as content layers.
if (torch.type(layer) == "nn.View") then
addlayer = nn.SpatialAdaptiveMaxPooling(7,7):type(dtype)
net:add(addlayer)
end
if next_content_idx <= #content_layers or next_style_idx <= #style_layers then
--local layer = cnn:get(i)
--local name = layer.name
local name = is_caffemodel and layer.name or tostring(i)
local layer_type = torch.type(layer)
-- reflectance padding option from leongatys/NeuralImageSynthesis
local is_convolution = (layer_type == 'cudnn.SpatialConvolution' or layer_type == 'nn.SpatialConvolution')
if is_convolution and params.padding ~= 'default' then
local padW, padH = layer.padW, layer.padH
local pad_layer
if params.padding == 'reflect' then
pad_layer = nn.SpatialReflectionPadding(padW, padW, padH, padH):type(dtype)
net:add(pad_layer)
elseif params.padding == 'replicate' then
pad_layer = nn.SpatialReplicationPadding(padW, padW, padH, padH):type(dtype)
net:add(pad_layer)
else
error('Unknown padding type')
end
layer.padW = 0
layer.padH = 0
end
local is_pooling = (layer_type == 'cudnn.SpatialMaxPooling' or layer_type == 'nn.SpatialMaxPooling')
if is_pooling and params.pooling == 'avg' then
assert(layer.padW == 0 and layer.padH == 0)
local kW, kH = layer.kW, layer.kH
local dW, dH = layer.dW, layer.dH
local avg_pool_layer = nn.SpatialAveragePooling(kW, kH, dW, dH):type(dtype)
local msg = 'Replacing max pooling at layer %d with average pooling'
print(string.format(msg, i))
net:add(avg_pool_layer)
else
if params.label_file ~= '' then
if layer_type ~= "nn.Dropout" then
layer:type(dtype)
net:add(layer)
end
else
net:add(layer)
end
end
if name == content_layers[next_content_idx] then
print("Setting up content layer", i, ":", layer.name)
local norm = params.normalize_gradients
local cweight = params.content_weight
if is_caffemodel then
local pos, _ = string.find(layer.name, "fc")
if pos == 1 then -- this is an fc layer
cweight = params.fc_weight
end
end
local loss_module = nn.ContentLoss(cweight, norm):type(dtype)
net:add(loss_module)
table.insert(content_losses, loss_module)
next_content_idx = next_content_idx + 1
end
if name == style_layers[next_style_idx] then
print("Setting up style layer ", i, ":", layer.name)
local norm = params.normalize_gradients
local loss_module = nn.StyleLoss(params.style_weight, norm):type(dtype)
net:add(loss_module)
table.insert(style_losses, loss_module)
next_style_idx = next_style_idx + 1
end
if name == deepdream_layers[next_deepdream_idx] then
print("Setting up Deepdream layer ", i, ":", layer.name)
local dweight
-- Checks whether multiple DeepDream weights are needed
if params.deepdream_layer_weights ~= 'nil' then
dweight = deepdream_layer_weights[next_deepdream_idx]
else
dweight = params.deepdream_weights
end
if is_caffemodel then
local pos, _ = string.find(layer.name, "fc")
if pos == 1 then -- this is an fc layer
dweight = params.fc_weight
end
end
local loss_module = nn.DeepDreamLoss(dweight):type(dtype)
net:add(loss_module)
table.insert(deepdream_losses, loss_module)
next_deepdream_idx = next_deepdream_idx + 1
end
else
if params.label_file ~= '' then
if layer_type ~= "nn.Dropout" then
layer:type(dtype)
net:add(layer)
end
end
end
end
if params.label_file ~= '' then
--vgg16 places lacks softmax at the end, so insert one
local prob = nn.SoftMax():type(dtype)
net:add(prob)
end
if multigpu then
net = setup_multi_gpu(net, params)
end
net:type(dtype)
-- Capture content targets
for i = 1, #content_losses do
content_losses[i].mode = 'capture'
end
print 'Capturing content targets'
print(net)
content_image_caffe = content_image_caffe:type(dtype)
net:forward(content_image_caffe:type(dtype))
-- Capture style targets
for i = 1, #content_losses do
content_losses[i].mode = 'none'
end
for i = 1, #style_images_caffe do
print(string.format('Capturing style target %d', i))
for j = 1, #style_losses do
style_losses[j].mode = 'capture'
style_losses[j].blend_weight = style_blend_weights[i]
end
net:forward(style_images_caffe[i]:type(dtype))
end
-- Capture deepdream targets
if params.deepdream_layers ~= '' then
for i = 1, #deepdream_losses do
deepdream_losses[i].mode = 'capture'
end
print 'Capturing deepdream targets'
--print(net)
end
-- Set all loss modules to loss mode
for i = 1, #content_losses do
content_losses[i].mode = 'loss'
end
for i = 1, #style_losses do
style_losses[i].mode = 'loss'
end
if params.deepdream_layers ~= '' then
for i = 1, #deepdream_losses do
deepdream_losses[i].mode = 'loss'
end
end
-- Print Elapsed Time
if params.time then
elapsed_time = total_time:time().real
print(string.format('Setup time: %.2f seconds', elapsed_time))
end
-- We don't need the base CNN anymore, so clean it up to save memory.
if is_caffemodel then
cnn = nil
end
for i=1, #net.modules do
local module = net.modules[i]
if torch.type(module) == 'nn.SpatialConvolutionMM' then
-- remove these, not used, but uses gpu memory
module.gradWeight = nil
module.gradBias = nil
end
end
collectgarbage()
-- Initialize the image
if params.seed >= 0 then
torch.manualSeed(params.seed)
end
local img = nil
if params.init == 'random' then
img = torch.randn(content_image:size()):float():mul(0.001)
elseif params.init == 'image' then
if init_image then
img = init_image:clone()
else
img = content_image_caffe:clone()
end
else
error('Invalid init type')
end
img = img:type(dtype)
if params.label_file ~= '' then
--try the network with content image to detect features
cimg = content_image_caffe:clone():float()
cimg = cimg:type(dtype)
if f then
local p = net:forward(cimg)
print("----------- Detected Features --------------")
for i=1, #labels do
if p[i] > 0.03 then
print(string.format("%.4f %s", p[i], labels[i]))
end
end
end
end
local mean_pixel
local meanImage
if params.laplcian ~= '' then
mean_pixel = torch.CudaTensor({103.939, 116.779, 123.68})
meanImage = mean_pixel:view(3, 1, 1):expandAs(content_image_caffe)
end
-- Run it through the network once to get the proper size for the gradient
-- All the gradients will come from the extra loss modules, so we just pass
-- zeros into the top of the net on the backward pass.
local y = net:forward(img)
local dy = img.new(#y):zero()
-- Declaring this here lets us access it in maybe_print
local optim_state = nil
if params.optimizer == 'lbfgs' then
optim_state = {
maxIter = params.num_iterations,
verbose=true,
tolX=-1,
tolFun=-1,
}
if params.lbfgs_num_correction > 0 then
optim_state.nCorrection = params.lbfgs_num_correction
end
elseif params.optimizer == 'adam' then
optim_state = {
learningRate = params.learning_rate,
}
else
error(string.format('Unrecognized optimizer "%s"', params.optimizer))
end
local function maybe_print(t, loss, p)
local verbose = (params.print_iter > 0 and t % params.print_iter == 0)
if verbose then
print(string.format('Iteration %d / %d', t, params.num_iterations))
for i, loss_module in ipairs(content_losses) do
print(string.format(' Content %d loss: %f', i, loss_module.loss))
end
for i, loss_module in ipairs(style_losses) do
print(string.format(' Style %d loss: %f', i, loss_module.loss))
end
print(string.format(' Total loss: %f', loss))
-- Print Elapsed Time
if params.time then
elapsed_time = total_time:time().real
print(string.format(' Elapsed time: %.2f seconds', elapsed_time))
end
if f then
for i=1, #labels do
if p[i] > 0.03 then
print(string.format("%.4f %s", p[i], labels[i]))
end
end
end
end
end
local function maybe_save(t, img_mean)
local should_save = params.save_iter > 0 and t % params.save_iter == 0
should_save = should_save or t == params.num_iterations
if should_save then
--local disp = deprocess(img:double())
local disp = deprocess(img:double(), img_mean)
disp = image.minmax{tensor=disp, min=0, max=1}
local filename = build_filename(params.output_image, t)
if t == params.num_iterations then
filename = params.output_image
end
-- Maybe perform postprocessing for color-independent style transfer
if params.original_colors == 1 then
disp = original_colors(content_image, disp)
end
image.save(filename, disp)
end
end
-- Function to evaluate loss and gradient. We run the net forward and
-- backward to get the gradient, and sum up losses from the loss modules.
-- optim.lbfgs internally handles iteration and calls this function many
-- times, so we manually count the number of iterations to handle printing
-- and saving intermediate results.
local num_calls = 0
local function feval(x)
num_calls = num_calls + 1
local p
local grad
if params.laplacian ~= '' then
local output = torch.add(img, meanImage)
local input = torch.add(content_image_caffe, meanImage)
net:forward(img)
local gradient_VggNetwork = net:updateGradInput(img, dy)
local gradient_LocalAffine = MattingLaplacian(output, CSR, h, w):mul(params.lambda)
if num_calls % params.save_iter == 0 then
local best = SmoothLocalAffine(output, input, params.eps, params.patch, h, w, params.f_radius, params.f_edge)
fn = params.serial .. '/best' .. tostring(params.index) .. '_t_' .. tostring(num_calls) .. '.png'
image.save(fn, best)
end
grad = torch.add(gradient_VggNetwork, gradient_LocalAffine)
else
net:forward(x)
grad = net:updateGradInput(x, dy)
end
if params.label_file ~= '' then
p = net:forward(x)
end
local loss = 0
for _, mod in ipairs(content_losses) do
loss = loss + mod.loss
end
for _, mod in ipairs(style_losses) do
loss = loss + mod.loss
end
if params.deepdream_layers ~= '' then
for _, mod in ipairs(deepdream_losses) do
loss = loss + mod.loss
end
end
if params.label_file ~= '' then
maybe_print(num_calls, loss, p)
else
maybe_print(num_calls, loss)
end
if is_caffemodel then
maybe_save(num_calls)
else
maybe_save(num_calls, cnn.transform)
end
collectgarbage()
-- optim.lbfgs expects a vector for gradients
return loss, grad:view(grad:nElement())
end
-- Run optimization.
if params.optimizer == 'lbfgs' then
print('Running optimization with L-BFGS')
local x, losses = optim.lbfgs(feval, img, optim_state)
elseif params.optimizer == 'adam' then
print('Running optimization with ADAM')
for t = 1, params.num_iterations do
local x, losses = optim.adam(feval, img, optim_state)
end
end
end
function setup_gpu(params)
local multigpu = false
if params.gpu:find(',') then
multigpu = true
params.gpu = params.gpu:split(',')
for i = 1, #params.gpu do
params.gpu[i] = tonumber(params.gpu[i]) + 1
end
else
params.gpu = tonumber(params.gpu) + 1
end
local dtype = 'torch.FloatTensor'
if multigpu or params.gpu > 0 then
if params.backend ~= 'clnn' then
require 'cutorch'
require 'cunn'
if multigpu then
cutorch.setDevice(params.gpu[1])
else
cutorch.setDevice(params.gpu)
end
dtype = 'torch.CudaTensor'
else
require 'clnn'
require 'cltorch'
if multigpu then
cltorch.setDevice(params.gpu[1])
else
cltorch.setDevice(params.gpu)
end
dtype = torch.Tensor():cl():type()
end
else
params.backend = 'nn'
end
if params.backend == 'cudnn' then
require 'cudnn'
if params.cudnn_autotune then
cudnn.benchmark = true
end
cudnn.SpatialConvolution.accGradParameters = nn.SpatialConvolutionMM.accGradParameters -- ie: nop
end
return dtype, multigpu
end
function setup_multi_gpu(net, params)
local DEFAULT_STRATEGIES = {
[2] = {3},
}
local gpu_splits = nil
if params.multigpu_strategy == '' then
-- Use a default strategy
gpu_splits = DEFAULT_STRATEGIES[#params.gpu]
-- Offset the default strategy by one if we are using TV
if params.tv_weight > 0 then
for i = 1, #gpu_splits do gpu_splits[i] = gpu_splits[i] + 1 end
end
else
-- Use the user-specified multigpu strategy
gpu_splits = params.multigpu_strategy:split(',')
for i = 1, #gpu_splits do
gpu_splits[i] = tonumber(gpu_splits[i])
end
end
assert(gpu_splits ~= nil, 'Must specify -multigpu_strategy')
local gpus = params.gpu
local cur_chunk = nn.Sequential()
local chunks = {}
for i = 1, #net do
cur_chunk:add(net:get(i))
if i == gpu_splits[1] then
table.remove(gpu_splits, 1)
table.insert(chunks, cur_chunk)
cur_chunk = nn.Sequential()
end
end
table.insert(chunks, cur_chunk)
assert(#chunks == #gpus)
local new_net = nn.Sequential()
for i = 1, #chunks do
local out_device = nil
if i == #chunks then
out_device = gpus[1]
end
new_net:add(nn.GPU(chunks[i], gpus[i], out_device))
end
return new_net
end
-- Matting Laplacian Related Functions:
function MattingLaplacian(output, CSR, h, w)
local N, c = CSR:size(1), CSR:size(2)
local CSR_rowIdx = torch.CudaIntTensor(N):copy(torch.round(CSR[{{1,-1},1}]))
local CSR_colIdx = torch.CudaIntTensor(N):copy(torch.round(CSR[{{1,-1},2}]))
local CSR_val = torch.CudaTensor(N):copy(CSR[{{1,-1},3}])
local output01 = torch.div(output, 256.0)
local grad = cuda_utils.matting_laplacian(output01, h, w, CSR_rowIdx, CSR_colIdx, CSR_val, N)
grad:div(256.0)
return grad
end
function SmoothLocalAffine(output, input, epsilon, patch, h, w, f_r, f_e)
local output01 = torch.div(output, 256.0)
local input01 = torch.div(input, 256.0)
local filter_radius = f_r
local sigma1, sigma2 = filter_radius / 3, f_e
local best01= cuda_utils.smooth_local_affine(output01, input01, epsilon, patch, h, w, filter_radius, sigma1, sigma2)
return best01
end
function ErrorMapLocalAffine(output, input, epsilon, patch, h, w)
local output01 = torch.div(output, 256.0)
local input01 = torch.div(input, 256.0)
local err_map, best01, Mt_M, invMt_M = cuda_utils.error_map_local_affine(output01, input01, epsilon, patch, h, w)
return err_map, best01
end
function build_filename(output_image, iteration)
local ext = paths.extname(output_image)
local basename = paths.basename(output_image, ext)
local directory = paths.dirname(output_image)
return string.format('%s/%s_%d.%s',directory, basename, iteration, ext)
end
-- Preprocess an image before passing it to a Caffe model.
-- We need to rescale from [0, 1] to [0, 255], convert from RGB to BGR,
-- and subtract the mean pixel.
function preprocess_caffe(img)
local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
local perm = torch.LongTensor{3, 2, 1}
img = img:index(1, perm):mul(256.0)
mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
img:add(-1, mean_pixel)
return img
end
-- Undo the above preprocessing.
function deprocess_caffe(img)
local mean_pixel = torch.DoubleTensor({103.939, 116.779, 123.68})
mean_pixel = mean_pixel:view(3, 1, 1):expandAs(img)
img = img + mean_pixel
local perm = torch.LongTensor{3, 2, 1}
img = img:index(1, perm):div(256.0)
return img
end
function preprocess_torch(img, img_mean)
local im = img:clone()
for i=1,3 do im[i]:add(-img_mean.mean[i]):div(img_mean.std[i]) end
return im
end
function deprocess_torch(img, img_mean)
local im = img:clone()
for i=1,3 do im[i]:mul(img_mean.std[i]):add(img_mean.mean[i]) end
return im
end
-- Combine the Y channel of the generated image and the UV channels of the
-- content image to perform color-independent style transfer.
function original_colors(content, generated)
local generated_y = image.rgb2yuv(generated)[{{1, 1}}]
local content_uv = image.rgb2yuv(content)[{{2, 3}}]
return image.yuv2rgb(torch.cat(generated_y, content_uv, 1))
end
-- Define an nn Module to compute content loss in-place
local ContentLoss, parent = torch.class('nn.ContentLoss', 'nn.Module')
function ContentLoss:__init(strength, normalize)
parent.__init(self)
self.strength = strength
self.target = torch.Tensor()
self.normalize = normalize or false
self.loss = 0
self.crit = nn.MSECriterion()
self.mode = 'none'
end
function ContentLoss:updateOutput(input)
if self.mode == 'loss' then
self.loss = self.crit:forward(input, self.target) * self.strength
elseif self.mode == 'capture' then
self.target:resizeAs(input):copy(input)
end
self.output = input
return self.output
end
function ContentLoss:updateGradInput(input, gradOutput)
if self.mode == 'loss' then
if input:nElement() == self.target:nElement() then
self.gradInput = self.crit:backward(input, self.target)
end
if self.normalize then
self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
end
self.gradInput:mul(self.strength)
self.gradInput:add(gradOutput)
else
self.gradInput:resizeAs(gradOutput):copy(gradOutput)
end
return self.gradInput
end
local Gram, parent = torch.class('nn.GramMatrix', 'nn.Module')
function Gram:__init()
parent.__init(self)
end
function Gram:updateOutput(input)
assert(input:dim() == 3)
local C, H, W = input:size(1), input:size(2), input:size(3)
local x_flat = input:view(C, H * W)
self.output:resize(C, C)
self.output:mm(x_flat, x_flat:t())
return self.output
end
function Gram:updateGradInput(input, gradOutput)
assert(input:dim() == 3 and input:size(1))
local C, H, W = input:size(1), input:size(2), input:size(3)
local x_flat = input:view(C, H * W)
self.gradInput:resize(C, H * W):mm(gradOutput, x_flat)
self.gradInput:addmm(gradOutput:t(), x_flat)
self.gradInput = self.gradInput:view(C, H, W)
return self.gradInput
end
-- Define an nn Module to compute style loss in-place
local StyleLoss, parent = torch.class('nn.StyleLoss', 'nn.Module')
function StyleLoss:__init(strength, normalize)
parent.__init(self)
self.normalize = normalize or false
self.strength = strength
self.target = torch.Tensor()
self.mode = 'none'
self.loss = 0
self.gram = nn.GramMatrix()
self.blend_weight = nil
self.G = nil
self.crit = nn.MSECriterion()
end
function StyleLoss:updateOutput(input)
self.G = self.gram:forward(input)
self.G:div(input:nElement())
if self.mode == 'capture' then
if self.blend_weight == nil then
self.target:resizeAs(self.G):copy(self.G)
elseif self.target:nElement() == 0 then
self.target:resizeAs(self.G):copy(self.G):mul(self.blend_weight)
else
self.target:add(self.blend_weight, self.G)
end
elseif self.mode == 'loss' then
self.loss = self.strength * self.crit:forward(self.G, self.target)
end
self.output = input
return self.output
end
function StyleLoss:updateGradInput(input, gradOutput)
if self.mode == 'loss' then
local dG = self.crit:backward(self.G, self.target)
dG:div(input:nElement())
self.gradInput = self.gram:backward(input, dG)
if self.normalize then
self.gradInput:div(torch.norm(self.gradInput, 1) + 1e-8)
end
self.gradInput:mul(self.strength)
self.gradInput:add(gradOutput)
else
self.gradInput = gradOutput
end
return self.gradInput
end
local DeepDreamLoss, parent = torch.class('nn.DeepDreamLoss', 'nn.Module')
-- Deepdream from jcjohnson/fast-neural-style
function DeepDreamLoss:__init(strength, max_grad)
parent.__init(self)
self.strength = strength or 1e-5
self.max_grad = max_grad or 100.0
self.clipped = torch.Tensor()
self.loss = 0
end
function DeepDreamLoss:updateOutput(input)
self.output = input
-- Contrast fix
self.output = self.output:mul(1/self.output:max())
return self.output
end
function DeepDreamLoss:updateGradInput(input, gradOutput)
self.gradInput:resizeAs(gradOutput):copy(gradOutput)
self.clipped:resizeAs(input):clamp(input, -self.max_grad, self.max_grad)
self.gradInput:add(-self.strength, self.clipped)
return self.gradInput
end
local TVLoss, parent = torch.class('nn.TVLoss', 'nn.Module')
function TVLoss:__init(strength)
parent.__init(self)
self.strength = strength
self.x_diff = torch.Tensor()
self.y_diff = torch.Tensor()
end
function TVLoss:updateOutput(input)
self.output = input
return self.output
end
-- TV loss backward pass inspired by kaishengtai/neuralart
function TVLoss:updateGradInput(input, gradOutput)
self.gradInput:resizeAs(input):zero()
local C, H, W = input:size(1), input:size(2), input:size(3)
self.x_diff:resize(3, H - 1, W - 1)
self.y_diff:resize(3, H - 1, W - 1)
self.x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
self.x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
self.y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
self.y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
self.gradInput[{{}, {1, -2}, {1, -2}}]:add(self.x_diff):add(self.y_diff)
self.gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, self.x_diff)
self.gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, self.y_diff)
self.gradInput:mul(self.strength)
self.gradInput:add(gradOutput)
return self.gradInput
end
local L2Penalty, parent = torch.class('nn.L2Penalty','nn.Module')
--This module acts as an L2 latent state regularizer, adding the
--[gradOutput] to the gradient of the L2 loss. The [input] is copied to
--the [output].
-- L2Penalty module from leongatys/NeuralImageSynthesis
function L2Penalty:__init(l2weight, sizeAverage, provideOutput)
parent.__init(self)
self.l2weight = l2weight
self.sizeAverage = sizeAverage or false
if provideOutput == nil then
self.provideOutput = true
else
self.provideOutput = provideOutput
end
end
function L2Penalty:updateOutput(input)
local m = self.l2weight
if self.sizeAverage == true then
m = m/input:nElement()
end
local loss = m*input:norm(2)/2
self.loss = loss
self.output = input
return self.output
end
function L2Penalty:updateGradInput(input, gradOutput)
local m = self.l2weight
if self.sizeAverage == true then
m = m/input:nElement()
end
self.gradInput:resizeAs(input):copy(input):mul(m)
if self.provideOutput == true then
self.gradInput:add(gradOutput)
end
return self.gradInput
end
function TVGradient(input, gradOutput, strength)
local C, H, W = input:size(1), input:size(2), input:size(3)
local gradInput = torch.CudaTensor(C, H, W):zero()
local x_diff = torch.CudaTensor()
local y_diff = torch.CudaTensor()
x_diff:resize(3, H - 1, W - 1)
y_diff:resize(3, H - 1, W - 1)
x_diff:copy(input[{{}, {1, -2}, {1, -2}}])
x_diff:add(-1, input[{{}, {1, -2}, {2, -1}}])
y_diff:copy(input[{{}, {1, -2}, {1, -2}}])
y_diff:add(-1, input[{{}, {2, -1}, {1, -2}}])
gradInput[{{}, {1, -2}, {1, -2}}]:add(x_diff):add(y_diff)
gradInput[{{}, {1, -2}, {2, -1}}]:add(-1, x_diff)
gradInput[{{}, {2, -1}, {1, -2}}]:add(-1, y_diff)
gradInput:mul(strength)
gradInput:add(gradOutput)
return gradInput
end
local params = cmd:parse(arg)
main(params)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment