Skip to content

Instantly share code, notes, and snippets.

@Laurae2
Last active February 25, 2017 20:20
Show Gist options
  • Save Laurae2/c6e9d11b7e99176dc3327695be528db4 to your computer and use it in GitHub Desktop.
Save Laurae2/c6e9d11b7e99176dc3327695be528db4 to your computer and use it in GitHub Desktop.
L2 Regularizaion Regression example in R
# Setting up random matrix
set.seed(11111)
x <- data.frame(a = rnorm(n = 15) * 5,
b = rnorm(n = 15) * 3 + 1,
c = rnorm(n = 15) * 2 + 2)
# Setting up the (perfect) linear relationship
y <- 2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])
# Setting up polynomial features
columns <- ncol(x)
for (i in 1:columns) {
x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
for (j in i:columns) {
x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
}
}
# Add column names and intercept
colnames(x) <- c("a*2", "b*3", "c*4", "aXa", "aXb*1", "aXc", "bXb", "bXc", "cXc*1")
x <- as.matrix(cbind(Intercept = 1, x))
# Calculate Mean Squared Error cost
cost <- function(x, y, param) {mean(((x %*% param)- y) ^ 2)}
grad <- function(x, y, param, l2) {
gradient <- rep(0, length(param))
pre_sum <- ((x %*% param) - y)
for (i in 1:length(param)) {
# Squared Error = (x - y) ^ 2
# Squared Error Gradient: 2 * (x - y)
gradient[i] <- 2 * mean(pre_sum * x[, i])
}
# Add L2 Regularization
gradient <- c(gradient[1], gradient[2:length(gradient)] + (l2 * sum(param[-1] ^ 2)))
return(gradient)
}
L2_Regularization <- function(x, y, init, iters, eta, cost, grad, lambda) {
param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
colnames(param) <- c(colnames(x), "Loss")
param[1, ] <- c(init, cost(x, y, init))
for(i in 1:iters) {
param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), lambda)
param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
}
cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")
param <- cbind(Iteration = 0:(nrow(param) - 1), param)
return(param)
}
param <- L2_Regularization(x = x,
y = y,
init = rep(0, 10),
iters = 10000,
eta = 0.0005,
cost = cost,
grad = grad,
lambda = 1)
melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
#car::scatterplot(value ~ Iteration | variable, data = melted_param)
lattice::xyplot(value ~ Iteration | variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment