Last active
November 16, 2017 01:09
-
-
Save Anderssorby/ea6d009188bd8e6cbd0c15dc9a613ba2 to your computer and use it in GitHub Desktop.
Neural network using numpy on binary data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from itertools import product | |
from timeit import default_timer as timer | |
import matplotlib.pyplot as plt | |
# Deterministic | |
np.random.seed(1234) | |
# Data | |
dim = 4 | |
data_size = 2**dim | |
X = np.empty((data_size,dim)) | |
Y = np.empty(data_size) | |
for n, x in enumerate(product(*([[0, 1]] * dim))): | |
target = int(sum(x) == 2) # the function we want to predict | |
X[n] = x | |
Y[n] = target | |
# Trainig data | |
training_size = data_size # =< data_size | |
target1 = np.array(Y[:training_size]) | |
input1 = np.array(X[:training_size]) | |
# Model parameters | |
num_epochs = 100000 | |
echos = 10 | |
echo_freq = min(num_epochs // echos, 5000) | |
etha = 0.1 # learning rate | |
precision = 1e-4 # Stopping criterion | |
# Some activation functions and their derivatives | |
def sigmoid(a): | |
return 1/(1+np.exp(-a)) | |
def sigmoid_prime(a): | |
return sigmoid(a)*(1-sigmoid(a)) | |
def id(z): | |
return z | |
def id_prime(z): | |
return 1 | |
def ReLU(a): | |
b = np.copy(a) | |
for i in range(len(a)): | |
b[i] = np.max(a[i],0) | |
return b | |
def ReLU_prime(a): | |
b = np.copy(a) | |
for i in range(len(a)): | |
if a[i] <= 0: | |
b[i] = 0 | |
else: | |
b[i] = 1 | |
return b | |
# Activation function | |
h = sigmoid | |
h_prime = sigmoid_prime | |
activation_functions = [sigmoid, sigmoid, id] | |
act_prime = [sigmoid_prime, sigmoid_prime, id] | |
# the output dimension of each layer | |
# including the input and output layer | |
layer_shape = [dim, 5, 10, 1] | |
num_layers = len(layer_shape) - 1 | |
# Initializing layers | |
layers = [] | |
biases = [] | |
for l in range(num_layers): | |
layers.append(np.random.uniform(size=(layer_shape[l+1], layer_shape[l]))) | |
biases.append(np.random.uniform(size=layer_shape[l+1])) | |
def compute(inp): | |
output = [inp] | |
a = inp | |
for l in range(num_layers): | |
# for each step transform the data using the activation function h | |
h = activation_functions[l] | |
w = layers[l] | |
z = np.dot(w, a)# + biases[l] | |
output.append(z) | |
a = h(z) | |
return output, a | |
# Training eg. minimize the error function with SGD | |
start = timer() | |
errors = [] | |
wprev = layers.copy() | |
bprev = biases.copy() | |
for epoch in range(num_epochs): | |
shuffled = np.arange(training_size) | |
np.random.shuffle(shuffled) | |
sumerr = 0 | |
for t in shuffled: | |
# Calculate the output for each layer | |
output, result = compute(input1[t]) | |
# The sign is important | |
error = result - target1[t] | |
# Using the l2-norm | |
sumerr += np.sqrt(sum(np.square(error))) | |
# Backpropagation | |
# Update the weights | |
# w = w - etha*\del E(w)/\del w | |
delta = etha*error | |
for l in reversed(range(num_layers)): | |
w = layers[l] | |
z = output[l] | |
b = biases[l] | |
h = activation_functions[l] | |
layers[l] = w - np.outer(delta, h(z)) | |
biases[l] = b - delta | |
delta = w.T.dot(delta) * h_prime(z) | |
sumerr = sumerr/training_size | |
errors.append(sumerr) | |
# if len(errors) > 2 and errors[-1] - errors[-2] < 1e-5: | |
# print(f'\nStopping at {epoch}th epoch with error {sumerr}') | |
# break | |
if sumerr < precision: | |
print(f'\nStopping at {epoch}th epoch with error {sumerr}') | |
break | |
if epoch % echo_freq == 0: | |
print(f'Epoch {epoch} - Current error:{sumerr}') | |
diff_w = [np.sum(layers[l] - wprev[l]) for l in range(num_layers)] | |
diff_b = [np.sum(biases[l] - bprev[l]) for l in range(num_layers)] | |
diff = np.linalg.norm(diff_w) + np.linalg.norm(diff_b) | |
print(f'Difference {diff}') | |
wprev = layers.copy() | |
bprev = biases.copy() | |
end_training = timer() | |
print(f'\nTrainig took {end_training-start}s\n') | |
# Plotting convergence | |
plt.plot(errors) | |
plt.show() | |
# Testing | |
for i in range(data_size): | |
output, result = compute(X[i]) | |
test_error = np.sum(np.abs(result-Y[i])) | |
if i >= training_size: | |
print("---Testdata---") | |
print(f'Test f({X[i]}) = {result}, target = {Y[i]}, error = {test_error}') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment