yonghanjung · November 11, 2018 06:18
diff --git a/SGD.py b/SGD.py
 import struct
 import numpy as np
 import gzip
 import copy
 import matplotlib.pyplot as plt
 from sklearn.linear_model import LogisticRegression

 ''' Preprocessing: Train, Test '''
 # Train and Test
 def read_idx(filename):
    with gzip.open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)

 def Preprocess(whatmode, mytrainingsize, datapath, featuretype):
    if whatmode == 'train':
        # Load the training data
        trainimg = datapath+'train-images-idx3-ubyte.gz'
        trainimg = np.array(read_idx(trainimg))
        trainimg = trainimg[:mytrainingsize]
        # Flatten 2D to 1D
        if featuretype == 1:
            flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 28 * 28)
        elif featuretype == 2:
            reduced_trainimg = list()
            for img in trainimg:
                reduced_img = maxpool(img)
                reduced_trainimg.append(reduced_img)
            trainimg = np.array(reduced_trainimg)
            flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 14 * 14)
        # Normalize to [0,1]
        flat_trainimg = flat_trainimg / 255.0
        flat_trainimg = np.concatenate((flat_trainimg, np.ones((len(flat_trainimg), 1))), axis=1)
        trainlabel = datapath+'train-labels-idx1-ubyte.gz'
        trainlabel = read_idx(trainlabel)
        trainlabel = trainlabel[:mytrainingsize]
        return flat_trainimg, trainlabel

    elif whatmode == 'test':
        # Load the test data
        testimg = datapath+'t10k-images-idx3-ubyte.gz'
        testimg = np.array(read_idx(testimg))
        testimg = testimg[:]
        # Flatten 2D to 1D
        if featuretype == 1:
            flat_testimg = testimg.flatten().reshape(10000, 28 * 28)
        elif featuretype == 2:
            reduced_trainimg = list()
            for img in testimg:
                reduced_img = maxpool(img)
                reduced_trainimg.append(reduced_img)
            testimg = np.array(reduced_trainimg)
            flat_testimg = testimg.flatten().reshape(10000, 14 * 14)
        # Normalize to [0,1]
        flat_testimg = flat_testimg / 255.0
        flat_testimg = np.concatenate((flat_testimg, np.ones((len(flat_testimg), 1))), axis=1)
        testlabel = datapath+'t10k-labels-idx1-ubyte.gz'
        testlabel = read_idx(testlabel)
        return flat_testimg, testlabel

 # Only for Train
 def GenNewLabel(trainlabel,mylabel):
    newlabel = (trainlabel == mylabel)*1
    return newlabel

 def maxpool(mat):
    M = 28
    N = 28
    K = 2
    L = 2
    MK = M // K
    NL = N // L
    return mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3))

 def ShuffleIdx(N, n):
    temp = np.arange(N)
    np.random.shuffle(temp)
    return temp[:n]

 def mysigmoid(z):
    return 1 / (1 + np.exp(-z))

 def stochasticGradientDescent(X, y, theta, alpha, num_epoch, regular, reglambda):
    threshold = float(trainfeature.shape[1])
    for epoch in range(num_epoch):
        shuffled = ShuffleIdx(len(X),len(X))
        X = X[shuffled]
        y = y[shuffled]
        prev_theta = copy.copy(theta)
        for idx in range(len(X)):
            xi = X[idx]
            yi = y[idx]
            yhat_i = mysigmoid(np.dot(xi,theta))
            if regular == True:
                theta -= alpha * ((yhat_i - yi)*xi + reglambda * theta  )
            else:
                theta -= alpha * (yhat_i - yi) * xi
        # print(np.sum(np.square(theta - prev_theta)))
        if np.sum(np.square(theta - prev_theta)) < 1/threshold:
            return theta
    return theta

 def ComputeAccuracy(mytestlabel, testlabel):
    return float(np.sum((mytestlabel == testlabel) * 1)) / float(len(testlabel))

 def ComputeF1(mytestlabel, testlabel, listUniqLabel):
    '''
    Compute F1 score
    :param mytestlabel: my prediction result
    :param testlabel: groundtruth prediction
    :param listUniqLabel: list of unique labels
    :return: F1 for each label l
    '''

    listUniqLabel = np.array(listUniqLabel)  # In case listUniqLabel is not array

    # Initialization of Precision, Recall and F1
    Prec = np.zeros(len(listUniqLabel))
    Recall = np.zeros(len(listUniqLabel))
    F1 = np.zeros(len(listUniqLabel))

    # For each label l
    for l in range(len(listUniqLabel)):
        # Compute the precision
        if np.sum((mytestlabel == listUniqLabel[l])*1) == 0:
            Prec[l] = 0
        else:
            numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l])*1))
            denumerator = float(np.sum((mytestlabel == listUniqLabel[l])*1))
            Prec[l] = numerator/denumerator

        # Compute the Recall
        if np.sum((testlabel == listUniqLabel[l])*1) == 0:
            Recall[l] = 0
        else:
            numerator = float(np.sum(((mytestlabel == testlabel)*1) * (testlabel == listUniqLabel[l]) * 1))
            denumerator = float(np.sum((testlabel == listUniqLabel[l]) * 1))
            Recall[l] = numerator/denumerator

        # Compute the F1
        if (Prec[l] + Recall[l]) == 0:
            F1[l] = 0
        else:
            F1[l] = 2*(Prec[l]*Recall[l])/float((Prec[l] + Recall[l]))
    return F1

 if __name__ == "main__":
    ''' Tunning the hyperparameters '''
    datapath = 'data/'
    training_size = 10000
    test_size = 10000
    featuretype = 1
    alpha = 0.0001
    num_epoch = 100
    reglambda = 0.1
    regular = False
    np.random.seed(1)

    trainfeature, trainlabel = Preprocess('train', training_size, datapath, featuretype)
    testfeature, testlabel = Preprocess('test', test_size, datapath, featuretype)

    ''' Training '''
    theta_box = list()
    for wval in range(10):
        target_label = wval
        target_trainlabel = GenNewLabel(trainlabel, target_label)
        theta = np.zeros(trainfeature.shape[1])
        theta = stochasticGradientDescent(trainfeature, target_trainlabel, theta, alpha, num_epoch, regular, reglambda)
        theta_box.append(theta)

    ''' Estimation '''
    est_label = np.zeros(len(testlabel))
    for idx in range(len(testlabel)):
        elem_feature = testfeature[idx, :]
        est_wbox = np.zeros(10)
        for wval in range(10):
            est_wbox[wval] = np.round(mysigmoid(np.dot(elem_feature, theta_box[wval])), 3)
        est_label[idx] = np.argmax(est_wbox)

    acc = ComputeAccuracy(est_label, testlabel)
    f1 = ComputeF1(est_label, testlabel, np.unique(testlabel))
    avgf1 = np.mean(f1)

    print "Test accuracy:", np.round(acc, 4)
    print "Test F1 score:", np.round(avgf1, 4)
	import struct
	import numpy as np
	import gzip
	import copy
	import matplotlib.pyplot as plt
	from sklearn.linear_model import LogisticRegression

	''' Preprocessing: Train, Test '''
	# Train and Test
	def read_idx(filename):
	with gzip.open(filename, 'rb') as f:
	zero, data_type, dims = struct.unpack('>HBB', f.read(4))
	shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
	return np.fromstring(f.read(), dtype=np.uint8).reshape(shape)

	def Preprocess(whatmode, mytrainingsize, datapath, featuretype):
	if whatmode == 'train':
	# Load the training data
	trainimg = datapath+'train-images-idx3-ubyte.gz'
	trainimg = np.array(read_idx(trainimg))
	trainimg = trainimg[:mytrainingsize]
	# Flatten 2D to 1D
	if featuretype == 1:
	flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 28 * 28)
	elif featuretype == 2:
	reduced_trainimg = list()
	for img in trainimg:
	reduced_img = maxpool(img)
	reduced_trainimg.append(reduced_img)
	trainimg = np.array(reduced_trainimg)
	flat_trainimg = trainimg.flatten().reshape(mytrainingsize, 14 * 14)
	# Normalize to [0,1]
	flat_trainimg = flat_trainimg / 255.0
	flat_trainimg = np.concatenate((flat_trainimg, np.ones((len(flat_trainimg), 1))), axis=1)
	trainlabel = datapath+'train-labels-idx1-ubyte.gz'
	trainlabel = read_idx(trainlabel)
	trainlabel = trainlabel[:mytrainingsize]
	return flat_trainimg, trainlabel

	elif whatmode == 'test':
	# Load the test data
	testimg = datapath+'t10k-images-idx3-ubyte.gz'
	testimg = np.array(read_idx(testimg))
	testimg = testimg[:]
	# Flatten 2D to 1D
	if featuretype == 1:
	flat_testimg = testimg.flatten().reshape(10000, 28 * 28)
	elif featuretype == 2:
	reduced_trainimg = list()
	for img in testimg:
	reduced_img = maxpool(img)
	reduced_trainimg.append(reduced_img)
	testimg = np.array(reduced_trainimg)
	flat_testimg = testimg.flatten().reshape(10000, 14 * 14)
	# Normalize to [0,1]
	flat_testimg = flat_testimg / 255.0
	flat_testimg = np.concatenate((flat_testimg, np.ones((len(flat_testimg), 1))), axis=1)
	testlabel = datapath+'t10k-labels-idx1-ubyte.gz'
	testlabel = read_idx(testlabel)
	return flat_testimg, testlabel

	# Only for Train
	def GenNewLabel(trainlabel,mylabel):
	newlabel = (trainlabel == mylabel)*1
	return newlabel

	def maxpool(mat):
	M = 28
	N = 28
	K = 2
	L = 2
	MK = M // K
	NL = N // L
	return mat[:MK * K, :NL * L].reshape(MK, K, NL, L).max(axis=(1, 3))

	def ShuffleIdx(N, n):
	temp = np.arange(N)
	np.random.shuffle(temp)
	return temp[:n]

	def mysigmoid(z):
	return 1 / (1 + np.exp(-z))

	def stochasticGradientDescent(X, y, theta, alpha, num_epoch, regular, reglambda):
	threshold = float(trainfeature.shape[1])
	for epoch in range(num_epoch):
	shuffled = ShuffleIdx(len(X),len(X))
	X = X[shuffled]
	y = y[shuffled]
	prev_theta = copy.copy(theta)
	for idx in range(len(X)):
	xi = X[idx]
	yi = y[idx]
	yhat_i = mysigmoid(np.dot(xi,theta))
	if regular == True:
	theta -= alpha * ((yhat_i - yi)xi + reglambda theta )
	else:
	theta -= alpha * (yhat_i - yi) * xi
	# print(np.sum(np.square(theta - prev_theta)))
	if np.sum(np.square(theta - prev_theta)) < 1/threshold:
	return theta
	return theta

	def ComputeAccuracy(mytestlabel, testlabel):
	return float(np.sum((mytestlabel == testlabel) * 1)) / float(len(testlabel))

	def ComputeF1(mytestlabel, testlabel, listUniqLabel):
	'''
	Compute F1 score
	:param mytestlabel: my prediction result
	:param testlabel: groundtruth prediction
	:param listUniqLabel: list of unique labels
	:return: F1 for each label l
	'''

	listUniqLabel = np.array(listUniqLabel) # In case listUniqLabel is not array

	# Initialization of Precision, Recall and F1
	Prec = np.zeros(len(listUniqLabel))
	Recall = np.zeros(len(listUniqLabel))
	F1 = np.zeros(len(listUniqLabel))

	# For each label l
	for l in range(len(listUniqLabel)):
	# Compute the precision
	if np.sum((mytestlabel == listUniqLabel[l])*1) == 0:
	Prec[l] = 0
	else:
	numerator = float(np.sum(((mytestlabel == testlabel)1) (testlabel == listUniqLabel[l])*1))
	denumerator = float(np.sum((mytestlabel == listUniqLabel[l])*1))
	Prec[l] = numerator/denumerator

	# Compute the Recall
	if np.sum((testlabel == listUniqLabel[l])*1) == 0:
	Recall[l] = 0
	else:
	numerator = float(np.sum(((mytestlabel == testlabel)1) (testlabel == listUniqLabel[l]) * 1))
	denumerator = float(np.sum((testlabel == listUniqLabel[l]) * 1))
	Recall[l] = numerator/denumerator

	# Compute the F1
	if (Prec[l] + Recall[l]) == 0:
	F1[l] = 0
	else:
	F1[l] = 2(Prec[l]Recall[l])/float((Prec[l] + Recall[l]))
	return F1

	if __name__ == "main__":
	''' Tunning the hyperparameters '''
	datapath = 'data/'
	training_size = 10000
	test_size = 10000
	featuretype = 1
	alpha = 0.0001
	num_epoch = 100
	reglambda = 0.1
	regular = False
	np.random.seed(1)

	trainfeature, trainlabel = Preprocess('train', training_size, datapath, featuretype)
	testfeature, testlabel = Preprocess('test', test_size, datapath, featuretype)

	''' Training '''
	theta_box = list()
	for wval in range(10):
	target_label = wval
	target_trainlabel = GenNewLabel(trainlabel, target_label)
	theta = np.zeros(trainfeature.shape[1])
	theta = stochasticGradientDescent(trainfeature, target_trainlabel, theta, alpha, num_epoch, regular, reglambda)
	theta_box.append(theta)

	''' Estimation '''
	est_label = np.zeros(len(testlabel))
	for idx in range(len(testlabel)):
	elem_feature = testfeature[idx, :]
	est_wbox = np.zeros(10)
	for wval in range(10):
	est_wbox[wval] = np.round(mysigmoid(np.dot(elem_feature, theta_box[wval])), 3)
	est_label[idx] = np.argmax(est_wbox)

	acc = ComputeAccuracy(est_label, testlabel)
	f1 = ComputeF1(est_label, testlabel, np.unique(testlabel))
	avgf1 = np.mean(f1)

	print "Test accuracy:", np.round(acc, 4)
	print "Test F1 score:", np.round(avgf1, 4)