d02k01 · December 13, 2015 12:12
diff --git a/invert.py b/invert.py
 #!/usr/bin/env python

 import numpy as np
 from PIL import Image

 import chainer
 from chainer import cuda
 from chainer import optimizers
 import chainer.functions as F
 from chainer.links import caffe

 gpu = 0

 if gpu >= 0:
    cuda.check_cuda_available()
 xp = cuda.cupy if gpu >= 0 else np

 # Take a few minutes
 func = caffe.CaffeFunction('bvlc_reference_caffenet.caffemodel')

 if gpu >= 0:
    cuda.get_device(gpu).use()
    func.to_gpu()

 image_size = 256
 in_size = 227

 def extract_feature(x, layer):
    y, = func(inputs={'data': x}, outputs=[layer], train=False)
    return y

 mean_image = np.load('ilsvrc_2012_mean.npy').astype('f')

 cropwidth = image_size - in_size
 start = cropwidth // 2
 stop = start + in_size
 mean_image = mean_image[:, start:stop, start:stop].copy()

 def preprocess(pil_image, resize=True):
    if resize:
        pil_image = pil_image.resize((in_size, in_size))
    pil_image = pil_image.convert('RGB')
    in_ = np.asarray(pil_image, dtype='f')
    in_ = in_.transpose(2, 0, 1)
    in_ = in_[::-1]
    in_ -= mean_image
    return in_

 def deprocess(in_):
    in_ = in_.copy()
    in_ += mean_image
    in_ = in_[::-1]
    in_ = in_.transpose(1, 2, 0)
    pil_image = Image.fromarray(np.clip(in_, 0, 255).astype(np.uint8))
    return pil_image

 image_path = 'dataset/ILSVRC2012_val_00000013.JPEG'
 # image_path = 'dataset/ILSVRC2012_val_00002012.JPEG'
 in_ = preprocess(Image.open(image_path))

 # x0_sigma: average Euclidean norm of natural images in a training set.
 # You can compute this as follows:
 #     norms = [np.linalg.norm(preprocess(pil_image)) for pil_image in IMAGES]
 #     x0_sigma = np.mean(norms).astype('f')
 # In this case, use the value distributed by the author:
 # https://github.com/aravindhm/deep-goggle/blob/master/experiments/x0_sigma.mat
 x0_sigma = 27098.11571533

 # Initialize parameters
 x0_data = xp.asarray(in_[np.newaxis].copy())
 x0 = chainer.Variable(x0_data, volatile=True)

 x_data = np.random.randn(*x0_data.shape).astype('f')
 x_data = x_data / np.linalg.norm(x_data) * x0_sigma
 x_data = xp.asarray(x_data)

 # Extract feature from target image
 layer = 'conv3'
 y0 = extract_feature(x0, layer)
 y0.volatile = False
 y0_sigma = np.linalg.norm(cuda.to_cpu(y0.data))

 # Define hyper-parameters
 learning_rate = 0.004 * np.array([1] * 100
                                 + [0.1] * 200
                                 + [0.01] * 100, dtype='f')
 momentum = 0.9

 lambda_euc = x0_sigma**2 / y0_sigma**2

 beta = 2
 lambda_tv = 0.5
 if layer in ['conv1', 'pool1', 'norm1', 'conv2']:
    lambda_tv *= 1
 elif layer in ['pool2', 'norm2', 'conv3', 'conv4']:
    lambda_tv *= 10
 elif layer in ['conv5', 'pool5', 'fc6', 'fc7', 'fc8']:
    lambda_tv *= 100
 else:
    print('The layer does not exist.')

 # Total variation can be expressed as a combination of the two filters
 Wh_data = xp.array([[[[1],[-1]]]], dtype='f')
 Ww_data = xp.array([[[[1, -1]]]], dtype='f')

 Wh = chainer.Variable(Wh_data)
 Ww = chainer.Variable(Ww_data)

 tvh = lambda x: F.convolution_2d(x, W=Wh)
 tvw = lambda x: F.convolution_2d(x, W=Ww)

 def tv_norm(x, beta=2):
    diffh = tvh(F.reshape(x, (3, 1, in_size, in_size)))
    diffw = tvw(F.reshape(x, (3, 1, in_size, in_size)))

    tv = (F.sum(diffh**2) + F.sum(diffw**2))**(beta / 2.)
    return tv

 p = 6
 lambda_lp = 4e-10

 def lossfun(x, layer):
    y = extract_feature(x, layer)
    loss = (lambda_euc * float(y0.data.size) * F.mean_squared_error(y0, y)
            + lambda_tv * tv_norm(x, beta)
            + lambda_lp * F.sum(x**p))
    return loss

 opt = optimizers.MomentumSGD(momentum=momentum)
 lr_prev = 0

 x_link = chainer.links.Parameter(x_data)
 opt.setup(x_link)

 for lr in learning_rate:
    if lr != lr_prev:
        opt.lr = lr
        lr_prev = lr
        opt.momentum = 0
    else:
        opt.momentum = 0.9

    x = x_link.W
    opt.update(lossfun, x, layer)

 result = deprocess(cuda.to_cpu(x.data)[0])
 result.save('result.png')
	#!/usr/bin/env python

	import numpy as np
	from PIL import Image

	import chainer
	from chainer import cuda
	from chainer import optimizers
	import chainer.functions as F
	from chainer.links import caffe

	gpu = 0

	if gpu >= 0:
	cuda.check_cuda_available()
	xp = cuda.cupy if gpu >= 0 else np

	# Take a few minutes
	func = caffe.CaffeFunction('bvlc_reference_caffenet.caffemodel')

	if gpu >= 0:
	cuda.get_device(gpu).use()
	func.to_gpu()

	image_size = 256
	in_size = 227

	def extract_feature(x, layer):
	y, = func(inputs={'data': x}, outputs=[layer], train=False)
	return y

	mean_image = np.load('ilsvrc_2012_mean.npy').astype('f')

	cropwidth = image_size - in_size
	start = cropwidth // 2
	stop = start + in_size
	mean_image = mean_image[:, start:stop, start:stop].copy()

	def preprocess(pil_image, resize=True):
	if resize:
	pil_image = pil_image.resize((in_size, in_size))
	pil_image = pil_image.convert('RGB')
	in_ = np.asarray(pil_image, dtype='f')
	in_ = in_.transpose(2, 0, 1)
	in_ = in_[::-1]
	in_ -= mean_image
	return in_

	def deprocess(in_):
	in_ = in_.copy()
	in_ += mean_image
	in_ = in_[::-1]
	in_ = in_.transpose(1, 2, 0)
	pil_image = Image.fromarray(np.clip(in_, 0, 255).astype(np.uint8))
	return pil_image

	image_path = 'dataset/ILSVRC2012_val_00000013.JPEG'
	# image_path = 'dataset/ILSVRC2012_val_00002012.JPEG'
	in_ = preprocess(Image.open(image_path))

	# x0_sigma: average Euclidean norm of natural images in a training set.
	# You can compute this as follows:
	# norms = [np.linalg.norm(preprocess(pil_image)) for pil_image in IMAGES]
	# x0_sigma = np.mean(norms).astype('f')
	# In this case, use the value distributed by the author:
	# https://github.com/aravindhm/deep-goggle/blob/master/experiments/x0_sigma.mat
	x0_sigma = 27098.11571533

	# Initialize parameters
	x0_data = xp.asarray(in_[np.newaxis].copy())
	x0 = chainer.Variable(x0_data, volatile=True)

	x_data = np.random.randn(*x0_data.shape).astype('f')
	x_data = x_data / np.linalg.norm(x_data) * x0_sigma
	x_data = xp.asarray(x_data)

	# Extract feature from target image
	layer = 'conv3'
	y0 = extract_feature(x0, layer)
	y0.volatile = False
	y0_sigma = np.linalg.norm(cuda.to_cpu(y0.data))

	# Define hyper-parameters
	learning_rate = 0.004 * np.array([1] * 100
	+ [0.1] * 200
	+ [0.01] * 100, dtype='f')
	momentum = 0.9

	lambda_euc = x0_sigma2 / y0_sigma2

	beta = 2
	lambda_tv = 0.5
	if layer in ['conv1', 'pool1', 'norm1', 'conv2']:
	lambda_tv *= 1
	elif layer in ['pool2', 'norm2', 'conv3', 'conv4']:
	lambda_tv *= 10
	elif layer in ['conv5', 'pool5', 'fc6', 'fc7', 'fc8']:
	lambda_tv *= 100
	else:
	print('The layer does not exist.')

	# Total variation can be expressed as a combination of the two filters
	Wh_data = xp.array([[[[1],[-1]]]], dtype='f')
	Ww_data = xp.array([[[[1, -1]]]], dtype='f')

	Wh = chainer.Variable(Wh_data)
	Ww = chainer.Variable(Ww_data)

	tvh = lambda x: F.convolution_2d(x, W=Wh)
	tvw = lambda x: F.convolution_2d(x, W=Ww)

	def tv_norm(x, beta=2):
	diffh = tvh(F.reshape(x, (3, 1, in_size, in_size)))
	diffw = tvw(F.reshape(x, (3, 1, in_size, in_size)))

	tv = (F.sum(diffh2) + F.sum(diffw2))**(beta / 2.)
	return tv

	p = 6
	lambda_lp = 4e-10

	def lossfun(x, layer):
	y = extract_feature(x, layer)
	loss = (lambda_euc * float(y0.data.size) * F.mean_squared_error(y0, y)
	+ lambda_tv * tv_norm(x, beta)
	+ lambda_lp * F.sum(x**p))
	return loss

	opt = optimizers.MomentumSGD(momentum=momentum)
	lr_prev = 0

	x_link = chainer.links.Parameter(x_data)
	opt.setup(x_link)

	for lr in learning_rate:
	if lr != lr_prev:
	opt.lr = lr
	lr_prev = lr
	opt.momentum = 0
	else:
	opt.momentum = 0.9

	x = x_link.W
	opt.update(lossfun, x, layer)

	result = deprocess(cuda.to_cpu(x.data)[0])
	result.save('result.png')