Last active
June 12, 2016 19:30
-
-
Save davidmpaz/e95d8f95632e79820494909b952afc29 to your computer and use it in GitHub Desktop.
logistic regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def logistic_regression(feature_matrix, sentiment, initial_coefficients, step_size, max_iter): | |
coefficients = np.array(initial_coefficients) # make sure it's a numpy array | |
for itr in xrange(max_iter): | |
# Predict P(y_i = +1|x_i,w) using your predict_probability() function | |
# YOUR CODE HERE | |
predictions = predict_probability(feature_matrix, coefficients) | |
# Compute indicator value for (y_i = +1) | |
indicator = (sentiment==+1) | |
# Compute the errors as indicator - predictions | |
errors = indicator - predictions | |
for j in xrange(len(coefficients)): # loop over each coefficient | |
# Recall that feature_matrix[:,j] is the feature column associated with coefficients[j]. | |
# Compute the derivative for coefficients[j]. Save it in a variable called derivative | |
# YOUR CODE HERE | |
derivative = feature_derivative(errors, feature_matrix[:,j]) | |
# add the step size times the derivative to the current coefficient | |
## YOUR CODE HERE | |
coefficients = coefficients + (step_size * derivative) | |
# Checking whether log likelihood is increasing | |
if itr <= 15 or (itr <= 100 and itr % 10 == 0) or (itr <= 1000 and itr % 100 == 0) \ | |
or (itr <= 10000 and itr % 1000 == 0) or itr % 10000 == 0: | |
lp = compute_log_likelihood(feature_matrix, sentiment, coefficients) | |
print 'iteration %*d: log likelihood of observed labels = %.8f' % \ | |
(int(np.ceil(np.log10(max_iter))), itr, lp) | |
return coefficients |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment