Skip to content

Instantly share code, notes, and snippets.

@stormxuwz
Created September 20, 2016 04:03
Show Gist options
  • Save stormxuwz/94ac809b107b2465a2d4852e2f07d46e to your computer and use it in GitHub Desktop.
Save stormxuwz/94ac809b107b2465a2d4852e2f07d46e to your computer and use it in GitHub Desktop.
neural network
from __future__ import division
import numpy as np
import chainer # only for import the data at this script
def softmax(z):
# z is a vector
return np.exp(z) / np.sum(np.exp(z))
def sigmoid(x):
# x can be a vector
return 1.0/(1.0+np.exp(-x))
def sigmoid_gradient(x):
# x can be a vector
return sigmoid(x)*(1-sigmoid(x))
def ReLU(x):
# x can be a vector
return np.maximum(x,0)
def ReLU_gradient(x):
# x can be a vector
return 1.0*(x>0)
class myNeuralNetwork(object):
def __init__(self,unitsPerHiddenLayer = [3], outputFunc = "softmax",hiddenLayerFunc = "ReLU",learningRate = 0.1,epochs = 3):
'''
# Currently, only single hidden layer is implemented
'''
self.hiddenLayerNum = len(unitsPerHiddenLayer)
self.unitsPerHiddenLayer = unitsPerHiddenLayer
if outputFunc == "softmax":
self.outputFunc = softmax
else:
raise ValueError("not implemented")
if hiddenLayerFunc == "sigmoid":
self.hiddenLayerFunc = sigmoid
self.hiddenLayerFunc_gradient = sigmoid_gradient
elif hiddenLayerFunc == "ReLU":
self.hiddenLayerFunc = ReLU
self.hiddenLayerFunc_gradient = ReLU_gradient
else:
raise ValueError("not Implemented")
self.LR = learningRate
self.epochs = epochs
self.w = [] # w[0] and b[0] are used from the data(a[0]) to z[0], w[1] and b[1] are used from a[1] to z[1]
self.b = []
self.a = [] # a[0] is the data or thr output of the data layers, a[1] is the output of the first layer
self.z = [] # z[0] is the input of the first hidden layers
def fit(self,X,Y):
d = X.shape[1] # data feature dimension
n = X.shape[0] # number of the data samples
k = len(set(Y)) # number of classes
# print d,n,k
# initialize the parameter
# initialize the input layers
self.a.append(np.zeros(d))
# initialize the hidden layers
for i in range(self.hiddenLayerNum):
L = self.unitsPerHiddenLayer[i]
self.w.append((np.random.rand(L,d)-0.5)/3.0) # -0.5 to reduce the initial weights to [-0.5,0.5], /3 to reduce to a even smaller weights
self.b.append((np.random.rand(L)-0.5)/3.0)
self.z.append(np.zeros((L,1)))
self.a.append(np.zeros((L,1)))
# initialize the output layers
self.w.append((np.random.rand(k,L)-0.5)/3.0)
self.b.append((np.random.rand(k)-0.5)/3.0)
self.z.append(np.zeros(k))
# start fitting using SGD
for e in range(self.epochs):
# print "epochs:",e
sampleIndex = np.random.choice(n,n,replace = False) # generate random numbers
for train_iter in sampleIndex:
j = train_iter
f = self.feedForward(X[j])
delta = self.backPropagation(f,X[j],Y[j])
# print "finish delta"
# update the parameters
for i in range(self.hiddenLayerNum+1): # +1 to update the output layers
self.w[i] += self.LR*delta["w"][i]
self.b[i] += self.LR*delta["b"][i]
def feedForward(self,x):
# feedForward function to evaluate
self.a[0] = x.T
for i in range(self.hiddenLayerNum):
self.z[i] = np.dot(self.w[i], self.a[i]) + self.b[i] # w[0] shape: (L,d); a[0] shape: (d,1) for the first hidden layer
self.a[i+1] = self.hiddenLayerFunc(self.z[i])
self.z[-1] = np.dot(self.w[-1],self.a[-1]) + self.b[-1] # hidden layer output to the output layers
output = self.outputFunc(self.z[-1]) # output the probability of each class
return output # probability of each class
def backPropagation(self,f,X,y):
'''
Back-Propagation algorithm to fit
with stochastic gradient descent
currently only fit for the one hidden layers
# costF is an array of the output of the softmax function
# x is the sample
# y is the label from 0,1,2,..k
'''
delta3 = -f # probability, shape (k,)
delta3[y] = delta3[y]+1
deltaW2 = np.dot(delta3.reshape(-1,1),self.a[1].reshape(1,-1)) # shape (k,1) dot shape(1,L)
deltab2 = delta3
# print "delta3.shape",delta3.shape
# print "deltaW2",deltaW2.shape
# print self.z[0]
# print self.w[1]
# print np.dot(self.w[1],delta3)
delta2 = np.dot(self.w[1].T,delta3) * self.hiddenLayerFunc_gradient(self.z[0]) # shape (L,)
# print "delta2.shape",delta2.shape
deltaW1 = np.dot(delta2.reshape(-1,1),X.reshape(1,-1))
deltab1 = delta2
return {"w":[deltaW1,deltaW2],"b":[deltab1,deltab2]}
def predict(self,X):
return np.array([np.argmax(self.feedForward(sample)) for sample in X])
def performanceEval(self,testX,testY):
return sum(self.predict(testX) == testY)/len(testY)
def test(uN,f,LR,epochs):
print "start"
nn = myNeuralNetwork(unitsPerHiddenLayer = [uN],hiddenLayerFunc = f, learningRate = LR,epochs=epochs)
train, test = chainer.datasets.get_mnist()
# train[i][0] is the image, train[i][1] is the label
trainX = np.array([sample[0].flatten() for sample in train])
trainY = np.array([sample[1] for sample in train])
testX = np.array([sample[0].flatten() for sample in test])
testY = np.array([sample[1] for sample in test])
nn.fit(X = trainX, Y = trainY)
print "units Num: %d, hidden layer func: %s, learning rate: %f, epochs: %d, accuracy: %f" %(uN,f,LR,epochs,nn.performanceEval(testX, testY))
def main():
# test(50, "sigmoid", 0.07, 1)
# test(50, "sigmoid", 0.07, 5)
# test(100, "sigmoid", 0.07, 1)
# test(100, "sigmoid", 0.07, 5)
# test(200, "sigmoid", 0.07, 1)
# test(200, "sigmoid", 0.07, 5)
# test(50, "ReLU", 0.07, 1)
# test(50, "ReLU", 0.07, 5)
# test(100, "ReLU", 0.07, 1)
# test(100, "ReLU", 0.07, 5)
# test(200, "ReLU", 0.07, 1)
# test(200, "ReLU", 0.07, 5)
#test(50, "sigmoid", 0.05, 5)
#test(50, "sigmoid", 0.1, 5)
#test(50, "sigmoid", 0.5, 5)
#test(50, "ReLU", 0.05, 5)
#test(50, "ReLU", 0.1, 5)
# test(50, "ReLU", 0.5, 5)
# test(50, "ReLU", 0.01, 5)
test(50, "sigmoid", 0.01, 5)
test(10, "sigmoid", 0.01, 5)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment