Last active
August 29, 2015 14:14
-
-
Save emhart/b326b62f786452574c79 to your computer and use it in GitHub Desktop.
Gradient Descent algorithm for a linear model of p parameters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' A function to do gradient descent on arbitrarily sized linear models | |
#' @param Y a vector of response variables | |
#' @param X a design matrix of predictor variables | |
#' @param alpha the gradient descent step size | |
#' @param init the seed to start off for estimating parameters, must be the same size as the columns in X | |
#' @param failure the number of iterations after which to say convergence has failed | |
#' @param epsilon the difference in size between loss function iterations after which to say convergence has been reached. | |
linear_gd <- function(Y, X, alpha, init = runif(dim(X)[2],-10,10),failure = 10000,epsilon = 0.0001){ | |
theta_new <- init | |
j <- 2 | |
cost <- rep(NA,failure) | |
cost[1] <- (1/(2*dim(X)[2]))*sum((X%*%theta_new-Y)^2) | |
## Little hack to kick off the while loop | |
cost[2] <- cost[1] + epsilon + 1 | |
while(abs(cost[j - 1] - cost[j]) > epsilon){ | |
j <- j+1 | |
if(failure <= j){stop(paste("algorithm failed to converge after",failure, "iterations",sep=" "))} | |
theta_old <- theta_new | |
for(i in 1:length(theta_old)){ | |
theta_new[i] <- theta_old[i] - (alpha*mean((X%*%theta_old-Y)*X[,i])) | |
} | |
cost[j] <- (1/(2*dim(X)[2]))*sum((X%*%theta_new-Y)^2) | |
} | |
cat(paste("Algorithm converged after",j,"iterations.",sep=" ")) | |
cost <- cost[!is.na(cost)] | |
return(list(fit=theta_new,loss=cost)) | |
} | |
### Simple example | |
# Generate some data | |
X <- cbind(rep(1,100),matrix(rnorm(300),ncol=3,nrow=100)) | |
p1 <- c(2,3,-3,4) | |
Y <- X%*%p1 | |
# Plot for fun | |
plot(Y~X[,2]) | |
# Fit | |
out <- linear_gd(Y,X,.1,init = c(1,1,1,1)) | |
# Compare outputs to fit | |
cbind(out$fit,p1) | |
# plot loss function | |
plot(out$loss,type='l') | |
### Ok now let's take it up a notch where our design matrix is 100 X 100 | |
X <- cbind(rep(1,100), matrix(rnorm(9900),ncol=99,nrow=100)) | |
p1 <- runif(100,-5,5) | |
Y <- X%*%p1 | |
plot(Y~X[,2]) | |
out <- linear_gd(Y,X,.1) | |
plot(out$loss,type='l') | |
cbind(round(out$fit,2),round(p1,2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment