Last active
July 17, 2018 03:12
-
-
Save arikaa/70d7cb567cbab68b8e19cf57bed5c9f0 to your computer and use it in GitHub Desktop.
Logistic regression classifier
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def logistic_regression(data, label, iteration, learning_rate): | |
''' | |
Logistic regression classifier | |
Arguments | |
data: This is the training data with shape (n, d), which corresponds to n samples and each sample has d features. | |
label: This is the training data's label with shape (n,1). The 1 corresponds to the correct classification of the data. | |
iteration: The number of times to iterate. | |
learning_rate: The learning rate for the weight update. | |
Returns | |
w: This is the separator with the shape (1, d). | |
''' | |
dim = data.shape[1] | |
w = np.zeros((dim,1)) # w = gradient descent of the entropy error | |
for i in range(iteration): # iteration of data, while not close to zero | |
g = np.zeros((dim,1)) | |
for j in range(data.shape[0]): # get the gradient of the cross entropy error | |
gradient = (-1 * label[j] * data[j]) / ( 1 + np.exp(label[j] * np.dot(np.transpose(w),data[j]))) | |
g = g + gradient[:,np.newaxis] | |
g = (1 / data.shape[0] ) * g | |
# move in the direction v = -n * gradient(w(t)) | |
v = -(learning_rate) * g | |
# update weights: w(t + 1) = w(t) + nv | |
w = w + v | |
return w | |
def sig(z): | |
return 1 / (1 + math.e ** -z) | |
def accuracy(x, y, w): | |
''' | |
This computes the accuracy of a logsitic regression model. | |
Arguments | |
x: This is the input data with shape (n, d). n represents total data samples while d represents the | |
total feature numbers for a data sample. | |
y: This is the corresponding label of x with the shape of (n, 1). n represents | |
the total data samples. | |
w: This is the separator developed from the logistic regression function with shape (d, 1). | |
d represents total feature numbers for a data sample. | |
Returns | |
accuracy: This is the total percentage of correctly classified samples. | |
The threshold is set to 0.5, where if the predicted probability is greater, | |
then the classification is 1, otherwise it is -1. | |
''' | |
correct = 0 | |
final = 0 | |
for i in range(x.shape[0]): | |
wt = np.transpose(w) | |
prediction = sig(np.dot(wt, x[i])) | |
if ((prediction >= 0.5 and y[i] == 1) or (prediction < 0.5 and y[i] == -1)): # classify and compare with label | |
correct = correct + 1 | |
return correct / x.shape[0] # return percent of correct classifications |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment