Skip to content

Instantly share code, notes, and snippets.

@davidmpaz
Last active June 12, 2016 19:30
Show Gist options
  • Save davidmpaz/e95d8f95632e79820494909b952afc29 to your computer and use it in GitHub Desktop.
Save davidmpaz/e95d8f95632e79820494909b952afc29 to your computer and use it in GitHub Desktop.
logistic regression
def logistic_regression(feature_matrix, sentiment, initial_coefficients, step_size, max_iter):
coefficients = np.array(initial_coefficients) # make sure it's a numpy array
for itr in xrange(max_iter):
# Predict P(y_i = +1|x_i,w) using your predict_probability() function
# YOUR CODE HERE
predictions = predict_probability(feature_matrix, coefficients)
# Compute indicator value for (y_i = +1)
indicator = (sentiment==+1)
# Compute the errors as indicator - predictions
errors = indicator - predictions
for j in xrange(len(coefficients)): # loop over each coefficient
# Recall that feature_matrix[:,j] is the feature column associated with coefficients[j].
# Compute the derivative for coefficients[j]. Save it in a variable called derivative
# YOUR CODE HERE
derivative = feature_derivative(errors, feature_matrix[:,j])
# add the step size times the derivative to the current coefficient
## YOUR CODE HERE
coefficients = coefficients + (step_size * derivative)
# Checking whether log likelihood is increasing
if itr <= 15 or (itr <= 100 and itr % 10 == 0) or (itr <= 1000 and itr % 100 == 0) \
or (itr <= 10000 and itr % 1000 == 0) or itr % 10000 == 0:
lp = compute_log_likelihood(feature_matrix, sentiment, coefficients)
print 'iteration %*d: log likelihood of observed labels = %.8f' % \
(int(np.ceil(np.log10(max_iter))), itr, lp)
return coefficients
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment