Last active
August 29, 2015 14:24
-
-
Save pefoley2/d7d6da7b0240cde98fed to your computer and use it in GitHub Desktop.
Deepdream
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
import numpy as np | |
import scipy.ndimage as nd | |
import PIL.Image | |
import sys | |
sys.path.append('../caffe/distribute/python') | |
import caffe | |
# a couple of utility functions for converting to and from Caffe's input image layout | |
def preprocess(net, img): | |
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data'] | |
def deprocess(net, img): | |
return np.dstack((img + net.transformer.mean['data'])[::-1]) | |
def make_step(net, step_size=1.5, end='inception_4c/output', jitter=32, clip=True): | |
'''Basic gradient ascent step.''' | |
src = net.blobs['data'] # input image is stored in Net's 'data' blob | |
dst = net.blobs[end] | |
ox, oy = np.random.randint(-jitter, jitter + 1, 2) | |
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift | |
net.forward(end=end) | |
dst.diff[:] = dst.data # specify the optimization objective | |
net.backward(start=end) | |
g = src.diff[0] | |
# apply normalized ascent step to the input image | |
src.data[:] += step_size / np.abs(g).mean() * g | |
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image | |
if clip: | |
bias = net.transformer.mean['data'] | |
src.data[:] = np.clip(src.data, -bias, 255 - bias) | |
def deepdream(net, base_img, iter_n=10, octave_n=4, octave_scale=1.4, end='inception_4c/output', clip=True, **step_params): | |
# prepare base images for all octaves | |
octaves = [preprocess(net, base_img)] | |
for i in xrange(octave_n - 1): | |
octaves.append(nd.zoom(octaves[-1], (1, 1.0 / octave_scale, 1.0 / octave_scale), order=1)) | |
src = net.blobs['data'] | |
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details | |
for octave, octave_base in enumerate(octaves[::-1]): | |
h, w = octave_base.shape[-2:] | |
if octave > 0: | |
# upscale details from the previous octave | |
h1, w1 = detail.shape[-2:] | |
detail = nd.zoom(detail, (1, 1.0 * h / h1, 1.0 * w / w1), order=1) | |
src.reshape(1, 3, h, w) # resize the network's input image size | |
src.data[0] = octave_base + detail | |
print("octave %d %s" % (octave, end)) | |
for i in xrange(iter_n): | |
make_step(net, end=end, clip=clip, **step_params) | |
sys.stdout.write("%d " % i) | |
sys.stdout.flush() | |
print("") | |
# extract details produced on the current octave | |
detail = src.data[0] - octave_base | |
# returning the resulting image | |
return deprocess(net, src.data[0]) | |
model_path = '../caffe/models/bvlc_googlenet/' # substitute your path here | |
# NOTE: you *must* add "force_backward: true" to deploy.prototext for this to work properly. | |
net_fn = model_path + 'deploy.prototxt' | |
param_fn = model_path + 'bvlc_googlenet.caffemodel' | |
net = caffe.Classifier(net_fn, param_fn, | |
mean=np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent | |
channel_swap=(2, 1, 0)) # the reference model has channels in BGR order instead of RGB | |
img = np.asarray(PIL.Image.open('input.jpg')) | |
frame = deepdream(net, img, iter_n=20, octave_n=6) | |
PIL.Image.fromarray(np.uint8(frame)).save("output.jpg") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment