Last active
November 11, 2018 06:19
-
-
Save yonghanjung/ef5b091db3e69580dbb22973c7e2eadd to your computer and use it in GitHub Desktop.
SGD experiment
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import numpy as np | |
import gzip | |
import pickle | |
import copy.copy | |
''' Preprocessing: Train, Test ''' | |
# Train and Test | |
def read_idx(filename): | |
with gzip.open(filename, 'rb') as f: | |
zero, data_type, dims = struct.unpack('>HBB', f.read(4)) | |
shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims)) | |
return np.fromstring(f.read(), dtype=np.uint8).reshape(shape) | |
def train2vali(num_train, trainimg, trainlabel): | |
origimg = trainimg[:] | |
origlabel = trainlabel[:] | |
selection_array = np.random.choice([0, 1], size=(num_train,), p=[0.1, 0.9]) | |
selection_array = (selection_array == 1) | |
nonselection_array = 1 - selection_array | |
nonselection_array = (nonselection_array == 1) | |
trainimg = origimg[selection_array, :] | |
valiimg = origimg[nonselection_array, :] | |
trainlabel = origlabel[selection_array] | |
valilabel = origlabel[nonselection_array] | |
return trainimg, trainlabel, valiimg, valilabel | |
def Preprocess(whatmode, mytrainingsize, datapath, featuretype): | |
if whatmode == 'train': | |
# Load the training data | |
trainimg = datapath+'train-images-idx3-ubyte.gz' | |
trainimg = np.array(read_idx(trainimg)) | |
trainimg = trainimg[:mytrainingsize] | |
# Flatten 2D to 1D | |
if featuretype == 1: | |
flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 28 * 28) | |
elif featuretype == 2: | |
reduced_trainimg = list() | |
for img in trainimg: | |
reduced_img = maxpool(img) | |
reduced_trainimg.append(reduced_img) | |
trainimg = np.array(reduced_trainimg) | |
flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 14 * 14) | |
# Normalize to [0,1] | |
flat_trainimg = flat_trainimg / 255.0 | |
flat_trainimg = np.concatenate((flat_trainimg, np.ones((len(flat_trainimg), 1))), axis=1) | |
trainlabel = datapath+'train-labels-idx1-ubyte.gz' | |
trainlabel = read_idx(trainlabel) | |
trainlabel = trainlabel[:mytrainingsize] | |
return flat_trainimg, trainlabel | |
elif whatmode == 'test': | |
# Load the test data | |
testimg = datapath+'t10k-images-idx3-ubyte.gz' | |
testimg = np.array(read_idx(testimg)) | |
testimg = testimg[:] | |
# Flatten 2D to 1D | |
if featuretype == 1: | |
flat_testimg = testimg.flatten().reshape(10000, 28 * 28) | |
elif featuretype == 2: | |
reduced_trainimg = list() | |
for img in testimg: | |
reduced_img = maxpool(img) | |
reduced_trainimg.append(reduced_img) | |
testimg = np.array(reduced_trainimg) | |
flat_testimg = testimg.flatten().reshape(10000, 14 * 14) | |
# Normalize to [0,1] | |
flat_testimg = flat_testimg / 255.0 | |
flat_testimg = np.concatenate((flat_testimg, np.ones((len(flat_testimg), 1))), axis=1) | |
testlabel = datapath+'t10k-labels-idx1-ubyte.gz' | |
testlabel = read_idx(testlabel) | |
return flat_testimg, testlabel | |
# Only for Train | |
def GenNewLabel(trainlabel,mylabel): | |
newlabel = (trainlabel == mylabel)*1 | |
return newlabel | |
def maxpool(mat): | |
M = 28 | |
N = 28 | |
K = 2 | |
L = 2 | |
MK = M // K | |
NL = N // L | |
return mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3)) | |
def ShuffleIdx(N, n): | |
temp = np.arange(N) | |
np.random.shuffle(temp) | |
return temp[:n] | |
def mysigmoid(z): | |
return 1 / (1 + np.exp(-z)) | |
def stochasticGradientDescent(X, y, theta, alpha, num_epoch, regular, reglambda): | |
threshold = float(trainfeature.shape[1]) | |
for epoch in range(num_epoch): | |
shuffled = ShuffleIdx(len(X),len(X)) | |
X = X[shuffled] | |
y = y[shuffled] | |
prev_theta = copy.copy(theta) | |
for idx in range(len(X)): | |
xi = X[idx] | |
yi = y[idx] | |
yhat_i = mysigmoid(np.dot(xi,theta)) | |
if regular == True: | |
theta -= alpha * ((yhat_i - yi)*xi + reglambda * theta ) | |
else: | |
theta -= alpha * (yhat_i - yi) * xi | |
# print(np.sum(np.square(theta - prev_theta))) | |
if np.sum(np.square(theta - prev_theta)) < 1/threshold: | |
return theta | |
return theta | |
def ComputeAccuracy(mytestlabel, testlabel): | |
return float(np.sum((mytestlabel == testlabel) * 1)) / float(len(testlabel)) | |
def ComputeF1(mytestlabel, testlabel, listUniqLabel): | |
''' | |
Compute F1 score | |
:param mytestlabel: my prediction result | |
:param testlabel: groundtruth prediction | |
:param listUniqLabel: list of unique labels | |
:return: F1 for each label l | |
''' | |
listUniqLabel = np.array(listUniqLabel) # In case listUniqLabel is not array | |
# Initialization of Precision, Recall and F1 | |
Prec = np.zeros(len(listUniqLabel)) | |
Recall = np.zeros(len(listUniqLabel)) | |
F1 = np.zeros(len(listUniqLabel)) | |
# For each label l | |
for l in range(len(listUniqLabel)): | |
# Compute the precision | |
if np.sum((mytestlabel == listUniqLabel[l])*1) == 0: | |
Prec[l] = 0 | |
else: | |
numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l])*1)) | |
denumerator = float(np.sum((mytestlabel == listUniqLabel[l])*1)) | |
Prec[l] = numerator/denumerator | |
# Compute the Recall | |
if np.sum((testlabel == listUniqLabel[l])*1) == 0: | |
Recall[l] = 0 | |
else: | |
numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l]) * 1)) | |
denumerator = float(np.sum((testlabel == listUniqLabel[l]) * 1)) | |
Recall[l] = numerator/denumerator | |
# Compute the F1 | |
if (Prec[l] + Recall[l]) == 0: | |
F1[l] = 0 | |
else: | |
F1[l] = 2*(Prec[l]*Recall[l])/float((Prec[l] + Recall[l])) | |
return F1 | |
if __name__ == "main__": | |
''' Tunning the hyperparameters ''' | |
datapath = 'data/' | |
training_size = 10000 | |
test_size = 10000 | |
np.random.seed(1) | |
''' Hyperparameter tunning AND experiment''' | |
# Hyper parameters | |
list_type = [1, 2] | |
list_regular = [False, True] | |
list_epoch = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] | |
list_alpha = [0.00001, 0.0001, 0.001, 0.005, 0.01] # learning rate | |
list_reglambda = [0.1,0.3,0.5,0.7,0.9] | |
list_train_memory = list() | |
list_test_memory = list() | |
for featuretype in list_type: | |
for regular in list_regular: | |
for num_epoch in list_epoch: | |
for alpha in list_alpha: | |
for reglambda in list_reglambda: | |
''' Train ''' | |
trainfeature, trainlabel = Preprocess('train', training_size, datapath, featuretype) | |
testfeature, testlabel = Preprocess('test', test_size, datapath, featuretype) | |
theta_box = list() | |
for wval in range(10): | |
target_label = wval | |
target_trainlabel = GenNewLabel(trainlabel, target_label) | |
theta = np.random.rand(trainfeature.shape[1]) * 0.1 | |
theta = stochasticGradientDescent(trainfeature, target_trainlabel, theta, alpha, num_epoch, regular, reglambda) | |
theta_box.append(theta) | |
''' Estimation ''' | |
# Training | |
est_label = np.zeros(len(trainlabel)) | |
for idx in range(len(trainfeature)): | |
elem_feature = trainfeature[idx, :] | |
est_wbox = np.zeros(10) | |
for wval in range(10): | |
est_wbox[wval] = np.round(mysigmoid(np.dot(elem_feature, theta_box[wval])), 3) | |
est_label[idx] = np.argmax(est_wbox) | |
acc = ComputeAccuracy(est_label, trainlabel) | |
print('train',(featuretype, regular, num_epoch, alpha, reglambda, acc)) | |
list_train_memory.append( (featuretype, regular, num_epoch, alpha, reglambda, acc) ) | |
# Test | |
est_label = np.zeros(len(testfeature)) | |
for idx in range(len(testfeature)): | |
elem_feature = testfeature[idx, :] | |
est_wbox = np.zeros(10) | |
for wval in range(10): | |
est_wbox[wval] = np.round(mysigmoid(np.dot(elem_feature, theta_box[wval])), 3) | |
est_label[idx] = np.argmax(est_wbox) | |
acc = ComputeAccuracy(est_label, testlabel) | |
print('test', (featuretype, regular, num_epoch, alpha, reglambda, acc)) | |
list_test_memory.append((featuretype, regular, num_epoch, alpha, reglambda, acc)) | |
pickle.dump(list_train_memory, open('train_SGD.pkl','wb')) | |
pickle.dump(list_test_memory, open('test_SGD.pkl', 'wb')) | |
list_train_memory = pickle.load(open('train_SGD.pkl','rb')) | |
list_test_memory = pickle.load(open('test_SGD.pkl','rb')) | |
''' Identify the best hyperparameters ''' | |
# Among feature 1 | |
## regular | |
rF = [elem[5] for elem in list_train_memory if elem[1] == False] | |
rT = [elem[5] for elem in list_train_memory if elem[1] == True] | |
print (np.mean(rF), np.mean(rT)) | |
## num_epoch | |
dict_epoch = dict() | |
for epoch in list_epoch: | |
dict_epoch[epoch] = [elem[5] for elem in list_train_memory if elem[1] == True and elem[2]==epoch] | |
print(epoch,np.mean(dict_epoch[epoch])) | |
## alpha // 0.001 | |
dict_alpha = dict() | |
for alpha in list_alpha: | |
dict_alpha[alpha] = [elem[5] for elem in list_train_memory if elem[1] == True and elem[3] == alpha] | |
print(alpha, np.mean(dict_alpha[alpha])) | |
## reglambda // 0.1 | |
dict_reglambda = dict() | |
for reglambda in list_reglambda: | |
dict_reglambda[reglambda] = [elem[5] for elem in list_train_memory if elem[1] == True and elem[4] == reglambda] | |
print(reglambda, np.mean(dict_reglambda[reglambda])) | |
## num_epoch with alpha, reglambda // 100 | |
dict_train_epoch = dict() | |
for epoch in list_epoch: | |
dict_train_epoch[epoch] = [elem[5] for elem in list_train_memory if elem[3] == 0.001 and elem[4] == 0.1 and elem[2] == epoch] | |
print(epoch, np.mean(dict_train_epoch[epoch])) | |
dict_test_epoch = dict() | |
for epoch in list_epoch: | |
dict_test_epoch[epoch] = [elem[5] for elem in list_test_memory if elem[3] == 0.001 and elem[4] == 0.1 and elem[2] == epoch] | |
print(epoch, np.mean(dict_test_epoch[epoch])) | |
''' Draw figure ''' | |
import matplotlib.pyplot as plt | |
# 0: featuretype | |
# 1: regularizing | |
# 2: epoch | |
# 3: alpha, | |
# 4: lambda | |
train_f1_F = [elem[5] for elem in list_train_memory if elem[0] == 1 and elem[1] == False and elem[3] == 0.001 and elem[4] == 0.1] | |
train_f1_T = [elem[5] for elem in list_train_memory if elem[0] == 1 and elem[1] == True and elem[3] == 0.001 and elem[4] == 0.1] | |
train_f2_F = [elem[5] for elem in list_train_memory if elem[0] == 2 and elem[1] == False and elem[3] == 0.001 and elem[4] == 0.1] | |
train_f2_T = [elem[5] for elem in list_train_memory if elem[0] == 2 and elem[1] == True and elem[3] == 0.001 and elem[4] == 0.1] | |
test_f1_F = [elem[5] for elem in list_test_memory if | |
elem[0] == 1 and elem[1] == False and elem[3] == 0.001 and elem[4] == 0.1] | |
test_f1_T = [elem[5] for elem in list_test_memory if | |
elem[0] == 1 and elem[1] == True and elem[3] == 0.001 and elem[4] == 0.1] | |
test_f2_F = [elem[5] for elem in list_test_memory if | |
elem[0] == 2 and elem[1] == False and elem[3] == 0.001 and elem[4] == 0.1] | |
test_f2_T = [elem[5] for elem in list_test_memory if | |
elem[0] == 2 and elem[1] == True and elem[3] == 0.001 and elem[4] == 0.1] | |
list_epoch = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] | |
f, axarr= plt.subplots(2, 2, sharex='col', sharey='row') | |
f.suptitle('Accuracies per Number of Epoch', fontsize=40) | |
axarr[0, 0].set_title("Regularizer False / Feature type 1", fontsize=20) | |
axarr[0, 0].plot(list_epoch, train_f1_F,'r--',label = 'Train') | |
axarr[0, 0].plot(list_epoch, test_f1_F,'b', label = 'Test') | |
axarr[0, 0].legend(loc='best') | |
# ax1.set_xticklabels(list_epoch, fontsize=30) | |
axarr[0, 1].set_title("Regularizer True / Feature type 1", fontsize=20) | |
axarr[0, 1].plot(list_epoch, train_f1_T,'r--',label='Train') | |
axarr[0, 1].plot(list_epoch, test_f1_T,'b',label='Test') | |
axarr[0, 1].legend(loc='best') | |
axarr[1, 0].set_title("Regularizer False / Feature type 2", fontsize=20) | |
axarr[1, 0].plot(list_epoch, train_f2_F,'r--',label='Train') | |
axarr[1, 0].plot(list_epoch, test_f2_F, 'b', label='Test') | |
axarr[1, 0].legend(loc='best') | |
axarr[1, 1].set_title("Regularizer True / Feature type 2", fontsize=20) | |
axarr[1, 1].plot(list_epoch, train_f2_T,'r--',label='Train') | |
axarr[1, 1].plot(list_epoch, test_f2_F, 'b', label='Test') | |
axarr[1, 1].legend(loc='best') | |
for ax in axarr.flat: | |
ax.set(xlabel='Num Epoch', ylabel='Accuracy') | |
ax.grid() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment