Created
February 26, 2017 12:58
-
-
Save Laurae2/cf9444846d6eb7ef21c93b50adf4306b to your computer and use it in GitHub Desktop.
Linear Regression simple gradient descent (brute forced) in R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Setting up random matrix | |
set.seed(11111) | |
x <- data.frame(a = rnorm(n = 15) * 5, | |
b = rnorm(n = 15) * 3 + 1, | |
c = rnorm(n = 15) * 2 + 2) | |
# Setting up the (perfect) linear relationship | |
y <- 2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2]) | |
# Setting up polynomial features | |
columns <- ncol(x) | |
for (i in 1:columns) { | |
x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i] | |
for (j in i:columns) { | |
x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j] | |
} | |
} | |
# Add column names and intercept | |
colnames(x) <- c("a*2", "b*3", "c*4", "aXa", "aXb*1", "aXc", "bXb", "bXc", "cXc*1") | |
x <- as.matrix(cbind(Intercept = 1, x)) | |
# Make Gradient Descent brute force code | |
GradientDescent_brute <- function(x, y, param, eta, iters){ | |
# Uncomment every code if you want to monitor the cost | |
# browser() | |
# cost <- rep(0,iters) | |
# Perform loop per iteration of gradient descent | |
for(k in 1:iters){ | |
# Initialize dummy gradient vector to zero | |
grad <- rep(0, ncol(x)) | |
# Loop through each feature (column) | |
for(i in 1:ncol(x)) { | |
# Loop through each observation (row) | |
for(j in 1:nrow(x)) { | |
# Get the gradient increase per observation per feature | |
# Squared Error = (x - y) ^ 2 | |
# Squared Error Gradient: 2 * (x - y) | |
# new feature gradient = old feature gradient + (((matmult of observation feature values * coefficients) - value) * observation feature value) | |
grad[i] <- grad[i] + 2 * (((x[j, ] %*% param) - y[j]) * x[j, i]) | |
} | |
} | |
# Apply shrinkage to gradient, and divide by observation count to get mean | |
param <- param - (eta * grad / nrow(x)) | |
# Cost specific routines: | |
# grad <- ((x %*% param) - y) ^ 2 | |
# cost[k] <- sum(grad) / nrow(x) | |
} | |
print(param) | |
return(param) | |
} | |
# Try with gradient descent here: | |
GradientDescent_brute(x, y, rep(0, 10), 0.0001, 100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment