Created
March 2, 2016 22:53
-
-
Save jburroni/48d0d7673a6dbee6f12a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Author: Javier Burroni | |
###Creation: March 2016 | |
###Please give credit when using this code | |
import matplotlib | |
matplotlib.use('Agg') | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
from matplotlib import pyplot as plt | |
import cPickle, gzip, numpy | |
import multiprocessing as mp | |
sns.set(style='whitegrid') | |
class Logistic(object): | |
def activation(self, x): | |
return 1.0/(1+np.exp(-x)) | |
def derivative(self, x): | |
f = self.activation(x) | |
return f*(1-f) | |
class Layer(object): | |
def __init__(self, input_size, neurons, function = None): | |
self.weights = np.random.randn(input_size+1, neurons) | |
if not function: function = Logistic() | |
self.activation = function.activation | |
self.derivative = function.derivative | |
def net_input(self, input): | |
return input.dot(self.weights) | |
def forward(self, input): | |
return self.activation(self.net_input(input)) | |
def compute_local_gradient(self, inputs, next_gradient): | |
return self.derivative(self.net_input(inputs)).T*next_gradient | |
def weighted_local_gradient(self, local_gradient): | |
return self.weights.dot(local_gradient) | |
def update_weights(self, alpha, inputs, next_gradient): | |
local_gradient = self.compute_local_gradient(inputs, next_gradient) | |
answer = self.weighted_local_gradient(local_gradient)[:-1, :] | |
delta = inputs.T.dot(local_gradient.T) | |
self.weights = self.weights + alpha*delta | |
return answer | |
class Network(object): | |
def __init__(self, topology): | |
self.layers = [] | |
last = topology[0] | |
for dim in topology[1:]: | |
self.layers.append(Layer(last, dim)) | |
last = dim | |
def forward_and_inputs(self, input): | |
input = input[np.newaxis, :] if len(input.shape) < 2 else input | |
answer = input | |
inputs = [] | |
for layer in self.layers: | |
answer = np.append(answer, np.array([1])[None,:], axis=1) | |
inputs.append(answer) | |
answer = layer.forward(answer) | |
return answer, inputs | |
def forward(self, input): | |
return self.forward_and_inputs(input)[0] | |
def backprop(self, input, expected): | |
observed, layered_inputs = self.forward_and_inputs(input) | |
next_gradient = (expected-observed).T | |
alpha = 0.5 | |
#debug_here() | |
for layer, input in zip(self.layers, layered_inputs)[::-1]: | |
next_gradient = layer.update_weights(alpha, input, next_gradient) | |
def compute_precision(net, inputs, values): | |
partial = 0 | |
for i in range(inputs.shape[0]): | |
if net.forward(inputs[i]).argmax() == values[i]: | |
partial += 1 | |
return partial/float(inputs.shape[0]) | |
def train(args): | |
topology = args['topology'] | |
train_set = args['train_set'] | |
valid_set = args['valid_set'] | |
inputs = train_set[0] | |
values = train_set[1] | |
topology = [784,] + topology + [10,] | |
print topology | |
net = Network(topology) | |
val_prec = [] | |
train_prec = [] | |
indexes = [] | |
for k in range(20): | |
selection = np.random.choice(range(inputs.shape[0]), size=5000, replace=False) | |
for i in selection: | |
net.backprop(inputs[i], number_to_array(values[i])) | |
indexes.append((k+1)*5000) | |
val_prec.append(compute_precision(net, valid_set[0], valid_set[1])) | |
train_prec.append(compute_precision(net, inputs[selection], values[selection])) | |
print 'end of {}'.format(topology) | |
return pd.DataFrame({'validation' : val_prec, 'training' : train_prec}, index=indexes) | |
def number_to_array(n): | |
answer = np.zeros(10) | |
answer[n] = 1 | |
return answer | |
if __name__ == "__main__": | |
# Load the dataset | |
f = gzip.open('mnist.pkl.gz', 'rb') | |
train_set, valid_set, test_set = cPickle.load(f) | |
f.close() | |
N = 784 | |
pool = mp.Pool(15) | |
history, index = [], [] | |
precisions = pool.map(train, [{'topology': [N/(2**i),], 'train_set' : train_set, 'valid_set' : valid_set} for i in range(1, 8)]) | |
for i, df in enumerate(precisions): | |
k = N/(2**(i+1)) | |
plt.title("analysis with {} nodes in the hidden layer".format(k)) | |
df.plot() | |
plt.savefig("plots/{}.pdf".format(k)) | |
plt.clf() | |
history.append(df.validation.max()) | |
index.append(k) | |
plt.title('performance as a function of hidden layer size') | |
pd.Series(history, index=index, name='performance').plot() | |
plt.savefig('plots/performance.pdf') | |
plt.clf() | |
topologies = [] | |
last = [] | |
for i in range(1, 8): | |
last = last + [N/(2**i),] | |
topologies.append(last) | |
precisions = pool.map(train, [{'topology': topology, 'train_set' : train_set, 'valid_set' : valid_set} for topology in topologies]) | |
for i, (topology, df) in enumerate(zip(topologies, precisions)): | |
df.plot() | |
plt.title("analysis with the following hidden layers: {}".format(topology)) | |
plt.savefig("plots/hidden_{}.pdf".format(i)) | |
plt.clf() | |
history.append(df.validation.max()) | |
index.append(k) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment