Skip to content

Instantly share code, notes, and snippets.

@alexjc
Created November 19, 2015 22:29
Show Gist options
  • Save alexjc/49c90776506d5d4be949 to your computer and use it in GitHub Desktop.
Save alexjc/49c90776506d5d4be949 to your computer and use it in GitHub Desktop.
This code trains neural network for the Twitter account @StreetViewBot.
#
# Copyright (c) 2015, Alex J. Champandard.
#
# This code is CC-BY-SA https://creativecommons.org/licenses/by-sa/2.0/
#
#
# Learning to evaluate random 640x320 photos from Google Street View to determine
# if they are beautiful landscapes or not!
#
import os
import bz2
import pickle
import random
import numpy
import scipy.misc
from sknn.platform import gpu32
from sknn.mlp import Regressor, Layer, Convolution
def prepare_data(count=3000):
"""Load manually annotated images from disk and populate two arrays, one for inputs
images Xs and the other for output estimates (Ys).
"""
print('Reading images...', end='', flush=True)
# Pre-allocate the arrays and incrementally write into them. Requires a bit of swap!
Xs = numpy.zeros((count, 320, 640, 3), dtype=numpy.float32)
Ys = numpy.zeros((count,), dtype=numpy.float32)
Fs = []
mean_color = numpy.array([119.68165173, 120.75284179, 111.68898161], dtype=numpy.float32)
# Randomly select from all the annotated photos. Those files with a rating start
# with 'r', others are still to be rated with a separate application.
files = [f for f in os.listdir('out') if f.startswith('r')]
random.shuffle(files)
for i, f in enumerate(files[:count]):
img = scipy.misc.imread('out/'+f)[:320,:]
Ys[i] = float(f.split('_')[0][1])
Xs[i] = (img - mean_color) / 255.0
Fs.append(f)
print('\rReading images done.')
return Xs, (Ys - Ys.mean()) / Ys.std()
def mirror(X):
if random.choice([True, False]):
X[:] = X[:,:,::-1]
# This neural network structure is inspired by the popular and succesful VGG architecture. It's much smaller
# though since it's a simpler problem and it needs to run on a server. Convolution layers are filters that
# operate over the entire image, and pooling layers reduce the image size by 2x. By conv4_2, the data is only
# 40x20 and with 64 channels.
nn = Regressor(
layers=[
Convolution("Rectifier", channels=32, kernel_shape=(3,3), border_mode='same'), # conv1_1
Convolution("Rectifier", channels=32, kernel_shape=(3,3), pool_shape=(2,2), border_mode='same'), # conv1_2
Convolution("Rectifier", channels=64, kernel_shape=(3,3), border_mode='same'), # conv2_1
Convolution("Rectifier", channels=64, kernel_shape=(3,3), pool_shape=(2,2), border_mode='same'), # conv2_2
Convolution("Rectifier", channels=96, kernel_shape=(3,3), border_mode='same'), # conv3_1
Convolution("Rectifier", channels=96, kernel_shape=(3,3), pool_shape=(2,2), border_mode='same'), # conv3_2
Convolution("Rectifier", channels=64, kernel_shape=(3,3), border_mode='same'), # conv4_1
Convolution("Rectifier", channels=64, kernel_shape=(3,3), pool_shape=(2,2), border_mode='same'), # conv4_2
Layer("Rectifier", units=96),
Layer("Rectifier", units=64, dropout=0.2),
Layer("Rectifier", units=32, dropout=0.1),
Layer("Linear", dropout=0.0)],
learning_rate=0.01,
learning_rule='adagrad', # The more recent and powerful 'adam' training rule doesn't do well here.
dropout_rate=0.3, # IMPORTANT: This is the most important parameter to tune.
mutator=mirror,
batch_size=15, # How many examples to train at the same time, faster but more memory used!
verbose=True,
n_iter=25, # Takes 280s to perform one iteration, and continues to improve even at the end.
n_stable=5)
# Iterate multiple times if the data is too big to fit into memory?
for _ in range(1):
Xs, Ys = prepare_data()
nn.fit(Xs, Ys)
# Serialize with slightly different parameters so it runs well on server.
with bz2.open('landscape.nn', 'wb') as f:
nn.dropout_rate = None
nn.batch_size = 5
nn.verbose = False
nn.mutator = None
pickle.dump(nn, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment