Created
December 7, 2020 02:15
-
-
Save wangyu-/659731947271d25b2db4383be777cd64 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Mon Oct 5 07:13:29 2020 | |
@author: wangyu | |
""" | |
import random | |
import gzip | |
import numpy as np | |
import matplotlib.pyplot as plt | |
def parse_image(name): | |
dim=28*28 | |
file=gzip.open(name, 'rb') | |
data = file.read()[16:] | |
assert len(data) % dim == 0, "file len is not a multiple of dimension %s"%(name) | |
result = np.frombuffer(data, dtype=np.uint8).reshape(len(data)//(dim), dim) | |
return np.float32(result)/255.0 | |
def parse_label(name): | |
file=gzip.open(name, 'rb') | |
data = file.read()[8:] | |
result = np.frombuffer(data, dtype=np.uint8) | |
return result | |
def turn_into_vector(value): | |
assert value>=0 and value<10, "error range %d"%(value,) | |
vec = np.zeros((10, 1)) | |
vec[value] = 1.0 | |
return vec | |
def conv(images,labels): | |
first = [np.reshape(x, (784, 1)) for x in images] | |
second= [turn_into_vector(y) for y in labels] | |
return list(zip(first,second)) | |
train_images = parse_image('./train-images-idx3-ubyte.gz') | |
train_labels = parse_label('./train-labels-idx1-ubyte.gz') | |
test_images = parse_image('./t10k-images-idx3-ubyte.gz') | |
test_labels = parse_label('./t10k-labels-idx1-ubyte.gz') | |
train_set=conv(train_images,train_labels) | |
test_set=conv(test_images,test_labels) | |
def sigmoid(z): | |
return 1.0/(1.0+np.exp(-z)) | |
def d_sigmoid(z): | |
return sigmoid(z)*(1-sigmoid(z)) | |
def copy_as_zero(arr): | |
return [np.zeros(x.shape) for x in arr] | |
class NN(object): | |
def __init__(self, shape): | |
self.shape=shape | |
self.depth=len(shape)-1 | |
self.in_dim=shape[0]; self.out_dim=shape[-1] | |
self.biases=[] | |
self.weights=[] | |
self.train_history=[] | |
for i in range(0,self.depth): | |
self.biases.append(np.random.randn(shape[i+1], 1)) | |
self.weights.append(np.random.randn(shape[i+1], shape[i])) | |
def train(self, train_set,verify_set, iterations,report_period, batch_size, alpha): | |
self.train_history=[] | |
acc=self.accuracy(verify_set) | |
self.train_history.append((0,1-acc)) | |
print("iteration %d, accuracy: %.2f%%"%(0,100*acc)) | |
try: | |
for i in range(iterations): | |
self.stochastic_gradient_descent(train_set,batch_size, alpha) | |
if((i+1)%report_period==0 or i+1==iterations): | |
acc=self.accuracy(verify_set) | |
self.train_history.append((i+1,1-acc)) | |
print("iteration %d, accuracy: %.2f%%"%(i+1,100*acc)); | |
except KeyboardInterrupt: | |
print("") | |
def plot(self): | |
x_axis=[] | |
y_axis=[] | |
for (x,y) in self.train_history: | |
x_axis.append(x) | |
y_axis.append(y) | |
plt.scatter(x_axis,y_axis,s=10,marker="o",color="blue") | |
plt.xlabel("Number of Iterations of Gradient Decent") | |
plt.ylabel("Error_rate") | |
plt.title("Iterations vs Error_rate") | |
plt.show() | |
def inference(self, a): | |
for i in range(0,self.depth): | |
a = sigmoid(np.dot(self.weights[i], a)+self.biases[i]) | |
return a | |
def stochastic_gradient_descent(self, train_set,batch_size, alpha): | |
random_batch=random.sample(train_set,batch_size) | |
grad_b = copy_as_zero(self.biases) | |
grad_w = copy_as_zero(self.weights) | |
for (x,y) in random_batch: | |
grad_b_per_sample, grad_w_per_sample = self.back_propagation(x, y) | |
for i in range(0,self.depth): | |
grad_b[i] +=grad_b_per_sample[i] | |
grad_w[i] +=grad_w_per_sample[i] | |
for i in range(0,self.depth): | |
self.biases[i] -= alpha*grad_b[i]/batch_size | |
self.weights[i] -= alpha*grad_w[i]/batch_size | |
def back_propagation(self, x, y): | |
grad_b = copy_as_zero(self.biases) | |
grad_w = copy_as_zero(self.weights) | |
a_record = []; z_record = [] | |
a = x; a_record.append(a) | |
for i in range(0, self.depth): | |
z = np.dot(self.weights[i], a)+self.biases[i] | |
a = sigmoid(z) | |
a_record.append(a) | |
z_record.append(z) | |
current_layer=self.depth-1 | |
error = (a_record[current_layer+1]-y) * d_sigmoid(z_record[current_layer]) | |
grad_b[current_layer] = error | |
grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose()) | |
while current_layer>=1: | |
current_layer-=1 | |
error = np.dot(self.weights[current_layer+1].transpose(), error) * \ | |
d_sigmoid(z_record[current_layer]) | |
grad_b[current_layer] = error | |
grad_w[current_layer] = np.dot(error, a_record[current_layer].transpose()) | |
return (grad_b, grad_w) | |
def accuracy(self, test_set): | |
correct=0 | |
for (x,y) in test_set: | |
predicted=np.argmax(self.inference(x)) | |
actual=np.argmax(y) | |
if predicted==actual: | |
correct+=1 | |
return correct/len(test_set) | |
def macroF1(self, test_set): | |
TP=[0]*self.out_dim;TN=[0]*self.out_dim | |
FP=[0]*self.out_dim;FN=[0]*self.out_dim | |
F1=[0]*self.out_dim | |
for (x,y) in test_set: | |
predicted=np.argmax(self.inference(x)) | |
actual=np.argmax(y) | |
for i in range(0,self.out_dim): | |
actual_binary =1 if actual==i else 0 | |
predicted_binary=1 if predicted==i else 0 | |
if actual_binary==1: | |
if predicted_binary==1: | |
TP[i]+=1 | |
else: | |
FN[i]+=1 | |
else: | |
if predicted_binary==0: | |
TN[i]+=1 | |
else: | |
FP[i]+=1 | |
zero=1e-30 | |
for i in range(0,self.out_dim): | |
precision=TP[i]/(zero+TP[i]+FP[i])+zero | |
recall=TP[i]/(zero+TP[i]+FN[i])+zero | |
F1[i]=2/(1/precision+1/recall) | |
print("TP:",TP) | |
print("TN:",TN) | |
print("FP:",FP) | |
print("FN:",FN) | |
print("F1 for each class:",F1) | |
macroF1=sum(F1)/self.out_dim | |
return macroF1 | |
np.random.seed(123456) | |
random.seed(123456) | |
nn=NN([784,50,10]) | |
nn.train(train_set,train_set[:2000], 40000,200, 20, 1.5) | |
print("accuracy=",nn.accuracy(test_set)) | |
print("macroF1=",nn.macroF1(test_set)) | |
nn.plot() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment