Created
September 24, 2017 13:30
-
-
Save suvojit-0x55aa/cb34bdd14144456a33120f0af403b987 to your computer and use it in GitHub Desktop.
A Simple Neural Net in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import csv | |
import matplotlib.pyplot as plt | |
#fix random seed for reproducibility | |
np.random.seed(1) | |
#Read Dataset | |
iris = open('iris.csv','r') | |
iris = csv.reader(iris,delimiter=',') | |
iris = np.array(list(iris)).astype(np.float64) | |
#shuffle dataset | |
np.random.shuffle(iris) | |
#all columns excluding the last is a feature last column is the label | |
features = iris[:,:-1] | |
features = (features - features.mean())/features.std() | |
#make a row sized vectors of 1 | |
biasPad = np.ones((features.shape[0],1), dtype=features.dtype) | |
#pad 1s on the right side of each feature vector | |
features = np.concatenate((features,biasPad), axis=1) | |
#create a one hot matrix repesentation of the labels | |
label = np.array(iris[:,-1],dtype=int).reshape(-1) | |
label = np.eye(3)[label] | |
#split training and testing set 0.8 split | |
M = features.shape[0] | |
splitIdx = int(0.8*M) | |
XTest = features[splitIdx:,:] | |
XTrain = features[:splitIdx,:] | |
YTest = label[splitIdx:,:] | |
YTrain = label[:splitIdx,:] | |
#neurons | |
#no of input neurons is equal to size of feature vector | |
inputCount = features.shape[1] | |
#basic case | |
hiddenCount = inputCount | |
#3 class classification thus 3 neurons | |
outputCount = 3 | |
#activations for each layer of neurons | |
ai = np.ones((inputCount,1)) | |
ah = np.ones((hiddenCount,1)) | |
ao = np.ones((outputCount,1)) | |
#neuron weights | |
#for each neuron in hidden layer calculate weights from ith input neuron | |
wih = np.random.rand(inputCount, hiddenCount)*np.sqrt(2./inputCount) | |
#for each neuron in output layer calculate weights from ith hidden neuron | |
who = np.random.rand(hiddenCount, outputCount)*np.sqrt(2./hiddenCount) | |
#Update Arrays for momentum updates | |
cih = np.zeros((inputCount, hiddenCount)) | |
cho = np.zeros((hiddenCount, outputCount)) | |
#function for feed forward calc | |
def feedFwd(featureMat): | |
global ai,ah,ao,wih,who | |
#input activations | |
ai = featureMat | |
'''hidden activations | |
vectorized matrix multiply | |
ai.T * wih''' | |
ah = np.dot(ai,wih) | |
#vectorized sigmoid | |
ah = np.tanh(ah) | |
'''output ativations | |
ah.T * who''' | |
ao = np.dot(ah, who) | |
#vectorized sigmoid | |
ao = np.tanh(ao) | |
#ao = 1.0/1.0+np.exp(-1*ao) | |
return ao | |
#function for backpropagation | |
def backProp(X,label,output,N,batchSize=1,beta=0.0009): | |
'''N: learning rate''' | |
global ai,ah,ao,wih,who,cih,cho | |
delOut = output - label | |
dwho = np.dot(ah.T,delOut)/batchSize | |
delHidden = np.dot(delOut,who.T)*(1.0 - ah**2) | |
dwih = np.dot(X.T,delHidden)/batchSize | |
'''weight updates''' | |
who -= N*dwho + beta*cho | |
cho[:] = dwho | |
wih -= N*dwih + beta*cih | |
cih[:] = dwih | |
def train(X,Y,iteration=1000,learningRate=0.001,batchSize=1,beta=0.099,decayRate=0.0005): | |
errorTimeline = [] | |
epochList = [] | |
#train it for iteration number of epoch | |
for epoch in xrange(iteration): | |
#for each mini batch | |
for i in xrange(0,X.shape[0],batchSize): | |
#split the dataset into mini batches | |
batchSplit = min(i+batchSize,X.shape[0]) | |
XminiBatch = X[i:batchSplit,:] | |
YminiBatch = Y[i:batchSplit,:] | |
#calculate a forwasd pass through the network | |
output = feedFwd(XminiBatch) | |
#calculate mean squared error | |
error = 0.5*np.sum((YminiBatch-output)**2)/batchSize | |
#print error | |
#backprop and update weights | |
backProp(XminiBatch,YminiBatch,output,learningRate,batchSize,beta) | |
#after every 50 iteration decrease momentum and learning rate | |
#decreasing momentum helps reduce the chances of overshooting a convergence point | |
if epoch%50 == 0 and epoch > 0: | |
learningRate *= 1./(1. + (decayRate * epoch)) | |
beta *= 1./(1. + (decayRate * epoch)) | |
#Store error for ploting graph | |
errorTimeline.append(error) | |
epochList.append(epoch) | |
print 'Epoch :',epoch,', Error :',error,', alpha :',learningRate | |
return errorTimeline,epochList | |
#Work it, make it, do it, | |
#Makes us harder, better, faster, stronger! | |
learningRate = 0.0001 | |
beta = 0.099 | |
errorTimeline,epochList = train(XTrain,YTrain,2000,learningRate,M,beta) | |
#How tough are ya ? | |
#get output for test features | |
predOutput = feedFwd(XTest) | |
#vectorised count compare the indices of output and labels along rows | |
#add to count if they are same | |
count = np.sum(np.argmax(predOutput,axis=1) == np.argmax(YTest,axis=1)) | |
#print accuracy | |
print 'Accuracy : ',(float(count)/float(YTest.shape[0])) | |
#plot graph | |
plt.plot(epochList,errorTimeline) | |
plt.xlabel('Number of epoch') | |
plt.ylabel('Training Error') | |
plt.savefig('loss-function.png') | |
plt.show() | |
#mow the lawn, take out garbage, have a good nights sleep |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment