Instantly share code, notes, and snippets.
Created
September 20, 2016 04:03
-
Star
(0)
0
You must be signed in to star a gist -
Fork
(0)
0
You must be signed in to fork a gist
-
Save stormxuwz/94ac809b107b2465a2d4852e2f07d46e to your computer and use it in GitHub Desktop.
neural network
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import numpy as np | |
import chainer # only for import the data at this script | |
def softmax(z): | |
# z is a vector | |
return np.exp(z) / np.sum(np.exp(z)) | |
def sigmoid(x): | |
# x can be a vector | |
return 1.0/(1.0+np.exp(-x)) | |
def sigmoid_gradient(x): | |
# x can be a vector | |
return sigmoid(x)*(1-sigmoid(x)) | |
def ReLU(x): | |
# x can be a vector | |
return np.maximum(x,0) | |
def ReLU_gradient(x): | |
# x can be a vector | |
return 1.0*(x>0) | |
class myNeuralNetwork(object): | |
def __init__(self,unitsPerHiddenLayer = [3], outputFunc = "softmax",hiddenLayerFunc = "ReLU",learningRate = 0.1,epochs = 3): | |
''' | |
# Currently, only single hidden layer is implemented | |
''' | |
self.hiddenLayerNum = len(unitsPerHiddenLayer) | |
self.unitsPerHiddenLayer = unitsPerHiddenLayer | |
if outputFunc == "softmax": | |
self.outputFunc = softmax | |
else: | |
raise ValueError("not implemented") | |
if hiddenLayerFunc == "sigmoid": | |
self.hiddenLayerFunc = sigmoid | |
self.hiddenLayerFunc_gradient = sigmoid_gradient | |
elif hiddenLayerFunc == "ReLU": | |
self.hiddenLayerFunc = ReLU | |
self.hiddenLayerFunc_gradient = ReLU_gradient | |
else: | |
raise ValueError("not Implemented") | |
self.LR = learningRate | |
self.epochs = epochs | |
self.w = [] # w[0] and b[0] are used from the data(a[0]) to z[0], w[1] and b[1] are used from a[1] to z[1] | |
self.b = [] | |
self.a = [] # a[0] is the data or thr output of the data layers, a[1] is the output of the first layer | |
self.z = [] # z[0] is the input of the first hidden layers | |
def fit(self,X,Y): | |
d = X.shape[1] # data feature dimension | |
n = X.shape[0] # number of the data samples | |
k = len(set(Y)) # number of classes | |
# print d,n,k | |
# initialize the parameter | |
# initialize the input layers | |
self.a.append(np.zeros(d)) | |
# initialize the hidden layers | |
for i in range(self.hiddenLayerNum): | |
L = self.unitsPerHiddenLayer[i] | |
self.w.append((np.random.rand(L,d)-0.5)/3.0) # -0.5 to reduce the initial weights to [-0.5,0.5], /3 to reduce to a even smaller weights | |
self.b.append((np.random.rand(L)-0.5)/3.0) | |
self.z.append(np.zeros((L,1))) | |
self.a.append(np.zeros((L,1))) | |
# initialize the output layers | |
self.w.append((np.random.rand(k,L)-0.5)/3.0) | |
self.b.append((np.random.rand(k)-0.5)/3.0) | |
self.z.append(np.zeros(k)) | |
# start fitting using SGD | |
for e in range(self.epochs): | |
# print "epochs:",e | |
sampleIndex = np.random.choice(n,n,replace = False) # generate random numbers | |
for train_iter in sampleIndex: | |
j = train_iter | |
f = self.feedForward(X[j]) | |
delta = self.backPropagation(f,X[j],Y[j]) | |
# print "finish delta" | |
# update the parameters | |
for i in range(self.hiddenLayerNum+1): # +1 to update the output layers | |
self.w[i] += self.LR*delta["w"][i] | |
self.b[i] += self.LR*delta["b"][i] | |
def feedForward(self,x): | |
# feedForward function to evaluate | |
self.a[0] = x.T | |
for i in range(self.hiddenLayerNum): | |
self.z[i] = np.dot(self.w[i], self.a[i]) + self.b[i] # w[0] shape: (L,d); a[0] shape: (d,1) for the first hidden layer | |
self.a[i+1] = self.hiddenLayerFunc(self.z[i]) | |
self.z[-1] = np.dot(self.w[-1],self.a[-1]) + self.b[-1] # hidden layer output to the output layers | |
output = self.outputFunc(self.z[-1]) # output the probability of each class | |
return output # probability of each class | |
def backPropagation(self,f,X,y): | |
''' | |
Back-Propagation algorithm to fit | |
with stochastic gradient descent | |
currently only fit for the one hidden layers | |
# costF is an array of the output of the softmax function | |
# x is the sample | |
# y is the label from 0,1,2,..k | |
''' | |
delta3 = -f # probability, shape (k,) | |
delta3[y] = delta3[y]+1 | |
deltaW2 = np.dot(delta3.reshape(-1,1),self.a[1].reshape(1,-1)) # shape (k,1) dot shape(1,L) | |
deltab2 = delta3 | |
# print "delta3.shape",delta3.shape | |
# print "deltaW2",deltaW2.shape | |
# print self.z[0] | |
# print self.w[1] | |
# print np.dot(self.w[1],delta3) | |
delta2 = np.dot(self.w[1].T,delta3) * self.hiddenLayerFunc_gradient(self.z[0]) # shape (L,) | |
# print "delta2.shape",delta2.shape | |
deltaW1 = np.dot(delta2.reshape(-1,1),X.reshape(1,-1)) | |
deltab1 = delta2 | |
return {"w":[deltaW1,deltaW2],"b":[deltab1,deltab2]} | |
def predict(self,X): | |
return np.array([np.argmax(self.feedForward(sample)) for sample in X]) | |
def performanceEval(self,testX,testY): | |
return sum(self.predict(testX) == testY)/len(testY) | |
def test(uN,f,LR,epochs): | |
print "start" | |
nn = myNeuralNetwork(unitsPerHiddenLayer = [uN],hiddenLayerFunc = f, learningRate = LR,epochs=epochs) | |
train, test = chainer.datasets.get_mnist() | |
# train[i][0] is the image, train[i][1] is the label | |
trainX = np.array([sample[0].flatten() for sample in train]) | |
trainY = np.array([sample[1] for sample in train]) | |
testX = np.array([sample[0].flatten() for sample in test]) | |
testY = np.array([sample[1] for sample in test]) | |
nn.fit(X = trainX, Y = trainY) | |
print "units Num: %d, hidden layer func: %s, learning rate: %f, epochs: %d, accuracy: %f" %(uN,f,LR,epochs,nn.performanceEval(testX, testY)) | |
def main(): | |
# test(50, "sigmoid", 0.07, 1) | |
# test(50, "sigmoid", 0.07, 5) | |
# test(100, "sigmoid", 0.07, 1) | |
# test(100, "sigmoid", 0.07, 5) | |
# test(200, "sigmoid", 0.07, 1) | |
# test(200, "sigmoid", 0.07, 5) | |
# test(50, "ReLU", 0.07, 1) | |
# test(50, "ReLU", 0.07, 5) | |
# test(100, "ReLU", 0.07, 1) | |
# test(100, "ReLU", 0.07, 5) | |
# test(200, "ReLU", 0.07, 1) | |
# test(200, "ReLU", 0.07, 5) | |
#test(50, "sigmoid", 0.05, 5) | |
#test(50, "sigmoid", 0.1, 5) | |
#test(50, "sigmoid", 0.5, 5) | |
#test(50, "ReLU", 0.05, 5) | |
#test(50, "ReLU", 0.1, 5) | |
# test(50, "ReLU", 0.5, 5) | |
# test(50, "ReLU", 0.01, 5) | |
test(50, "sigmoid", 0.01, 5) | |
test(10, "sigmoid", 0.01, 5) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment