Last active
November 11, 2018 06:18
-
-
Save yonghanjung/5dbd75b9819464f4e53a613f0f323344 to your computer and use it in GitHub Desktop.
SGD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import numpy as np | |
import gzip | |
import copy | |
import matplotlib.pyplot as plt | |
from sklearn.linear_model import LogisticRegression | |
''' Preprocessing: Train, Test ''' | |
# Train and Test | |
def read_idx(filename): | |
with gzip.open(filename, 'rb') as f: | |
zero, data_type, dims = struct.unpack('>HBB', f.read(4)) | |
shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims)) | |
return np.fromstring(f.read(), dtype=np.uint8).reshape(shape) | |
def Preprocess(whatmode, mytrainingsize, datapath, featuretype): | |
if whatmode == 'train': | |
# Load the training data | |
trainimg = datapath+'train-images-idx3-ubyte.gz' | |
trainimg = np.array(read_idx(trainimg)) | |
trainimg = trainimg[:mytrainingsize] | |
# Flatten 2D to 1D | |
if featuretype == 1: | |
flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 28 * 28) | |
elif featuretype == 2: | |
reduced_trainimg = list() | |
for img in trainimg: | |
reduced_img = maxpool(img) | |
reduced_trainimg.append(reduced_img) | |
trainimg = np.array(reduced_trainimg) | |
flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 14 * 14) | |
# Normalize to [0,1] | |
flat_trainimg = flat_trainimg / 255.0 | |
flat_trainimg = np.concatenate((flat_trainimg, np.ones((len(flat_trainimg), 1))), axis=1) | |
trainlabel = datapath+'train-labels-idx1-ubyte.gz' | |
trainlabel = read_idx(trainlabel) | |
trainlabel = trainlabel[:mytrainingsize] | |
return flat_trainimg, trainlabel | |
elif whatmode == 'test': | |
# Load the test data | |
testimg = datapath+'t10k-images-idx3-ubyte.gz' | |
testimg = np.array(read_idx(testimg)) | |
testimg = testimg[:] | |
# Flatten 2D to 1D | |
if featuretype == 1: | |
flat_testimg = testimg.flatten().reshape(10000, 28 * 28) | |
elif featuretype == 2: | |
reduced_trainimg = list() | |
for img in testimg: | |
reduced_img = maxpool(img) | |
reduced_trainimg.append(reduced_img) | |
testimg = np.array(reduced_trainimg) | |
flat_testimg = testimg.flatten().reshape(10000, 14 * 14) | |
# Normalize to [0,1] | |
flat_testimg = flat_testimg / 255.0 | |
flat_testimg = np.concatenate((flat_testimg, np.ones((len(flat_testimg), 1))), axis=1) | |
testlabel = datapath+'t10k-labels-idx1-ubyte.gz' | |
testlabel = read_idx(testlabel) | |
return flat_testimg, testlabel | |
# Only for Train | |
def GenNewLabel(trainlabel,mylabel): | |
newlabel = (trainlabel == mylabel)*1 | |
return newlabel | |
def maxpool(mat): | |
M = 28 | |
N = 28 | |
K = 2 | |
L = 2 | |
MK = M // K | |
NL = N // L | |
return mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3)) | |
def ShuffleIdx(N, n): | |
temp = np.arange(N) | |
np.random.shuffle(temp) | |
return temp[:n] | |
def mysigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
def stochasticGradientDescent(X, y, theta, alpha, num_epoch, regular, reglambda): | |
threshold = float(trainfeature.shape[1]) | |
for epoch in range(num_epoch): | |
shuffled = ShuffleIdx(len(X),len(X)) | |
X = X[shuffled] | |
y = y[shuffled] | |
prev_theta = copy.copy(theta) | |
for idx in range(len(X)): | |
xi = X[idx] | |
yi = y[idx] | |
yhat_i = mysigmoid(np.dot(xi,theta)) | |
if regular == True: | |
theta -= alpha * ((yhat_i - yi)*xi + reglambda * theta ) | |
else: | |
theta -= alpha * (yhat_i - yi) * xi | |
# print(np.sum(np.square(theta - prev_theta))) | |
if np.sum(np.square(theta - prev_theta)) < 1/threshold: | |
return theta | |
return theta | |
def ComputeAccuracy(mytestlabel, testlabel): | |
return float(np.sum((mytestlabel == testlabel) * 1)) / float(len(testlabel)) | |
def ComputeF1(mytestlabel, testlabel, listUniqLabel): | |
''' | |
Compute F1 score | |
:param mytestlabel: my prediction result | |
:param testlabel: groundtruth prediction | |
:param listUniqLabel: list of unique labels | |
:return: F1 for each label l | |
''' | |
listUniqLabel = np.array(listUniqLabel) # In case listUniqLabel is not array | |
# Initialization of Precision, Recall and F1 | |
Prec = np.zeros(len(listUniqLabel)) | |
Recall = np.zeros(len(listUniqLabel)) | |
F1 = np.zeros(len(listUniqLabel)) | |
# For each label l | |
for l in range(len(listUniqLabel)): | |
# Compute the precision | |
if np.sum((mytestlabel == listUniqLabel[l])*1) == 0: | |
Prec[l] = 0 | |
else: | |
numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l])*1)) | |
denumerator = float(np.sum((mytestlabel == listUniqLabel[l])*1)) | |
Prec[l] = numerator/denumerator | |
# Compute the Recall | |
if np.sum((testlabel == listUniqLabel[l])*1) == 0: | |
Recall[l] = 0 | |
else: | |
numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l]) * 1)) | |
denumerator = float(np.sum((testlabel == listUniqLabel[l]) * 1)) | |
Recall[l] = numerator/denumerator | |
# Compute the F1 | |
if (Prec[l] + Recall[l]) == 0: | |
F1[l] = 0 | |
else: | |
F1[l] = 2*(Prec[l]*Recall[l])/float((Prec[l] + Recall[l])) | |
return F1 | |
if __name__ == "main__": | |
''' Tunning the hyperparameters ''' | |
datapath = 'data/' | |
training_size = 10000 | |
test_size = 10000 | |
featuretype = 1 | |
alpha = 0.0001 | |
num_epoch = 100 | |
reglambda = 0.1 | |
regular = False | |
np.random.seed(1) | |
trainfeature, trainlabel = Preprocess('train', training_size, datapath, featuretype) | |
testfeature, testlabel = Preprocess('test', test_size, datapath, featuretype) | |
''' Training ''' | |
theta_box = list() | |
for wval in range(10): | |
target_label = wval | |
target_trainlabel = GenNewLabel(trainlabel, target_label) | |
theta = np.zeros(trainfeature.shape[1]) | |
theta = stochasticGradientDescent(trainfeature, target_trainlabel, theta, alpha, num_epoch, regular, reglambda) | |
theta_box.append(theta) | |
''' Estimation ''' | |
est_label = np.zeros(len(testlabel)) | |
for idx in range(len(testlabel)): | |
elem_feature = testfeature[idx, :] | |
est_wbox = np.zeros(10) | |
for wval in range(10): | |
est_wbox[wval] = np.round(mysigmoid(np.dot(elem_feature, theta_box[wval])), 3) | |
est_label[idx] = np.argmax(est_wbox) | |
acc = ComputeAccuracy(est_label, testlabel) | |
f1 = ComputeF1(est_label, testlabel, np.unique(testlabel)) | |
avgf1 = np.mean(f1) | |
print "Test accuracy:", np.round(acc, 4) | |
print "Test F1 score:", np.round(avgf1, 4) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment