Skip to content

Instantly share code, notes, and snippets.

@Laurae2
Created February 27, 2017 23:24
Show Gist options
  • Save Laurae2/01628c4a93e30ed57edb83e4f6a1202a to your computer and use it in GitHub Desktop.
Save Laurae2/01628c4a93e30ed57edb83e4f6a1202a to your computer and use it in GitHub Desktop.
Logistic Regression + Elastic Net Regularizaion example in R
# Setting up random matrix
set.seed(11111)
x <- data.frame(a = rnorm(n = 15) * 5,
b = rnorm(n = 15) * 3 + 1,
c = rnorm(n = 15) * 2 + 2)
# Setting up an imperfect relationship (non-linear problem)
y <- as.integer((2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])) > 20)
# Setting up polynomial features
columns <- ncol(x)
for (i in 1:columns) {
x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
for (j in i:columns) {
x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
}
}
# Add column names and intercept
colnames(x) <- c("a*2", "b*3", "c*4", "aXa", "aXb*1", "aXc", "bXb", "bXc", "cXc*1")
x <- as.matrix(cbind(Intercept = 1, x))
# Setup activation function
activation <- function(x) {
return(1 / (1 + exp(-x))) # Sigmoid function
}
# Calculate Logloss cost
cost <- function(x, y, param, eps = 1e-15) {
new_x <- activation(x %*% param)
new_y <- pmin(pmax(y, eps), 1 - eps)
return(mean(-new_y * log(new_x) - (1 - new_y) * log(1 - new_x)))
}
grad <- function(x, y, param, l1, l2) {
# One-shot approach to Gradient Boosting for logloss
# See Linear Regression for the (identical) example
gradient <- (t((activation(x %*% param) - y)) %*% x) / nrow(x)
# This is the same as doing this
# for (i in 1:ncol(x)) {
# gradient[i] <- mean((activation(x %*% param) - y) * x[, i])
# }
# Add L1/L2 Regularization
gradient <- c(gradient[1], gradient[2:length(gradient)] + (l1 * sum(abs(param[-1]))) + (l2 * sum(param[-1] ^ 2)))
return(gradient)
}
EN_Regularization <- function(x, y, init, iters, eta, cost, grad, alpha, lambda) {
param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
colnames(param) <- c(colnames(x), "Loss")
param[1, ] <- c(init, cost(x, y, init))
for(i in 1:iters) {
param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), alpha, lambda)
param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
}
cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")
param <- cbind(Iteration = 0:(nrow(param) - 1), param)
return(param)
}
param <- EN_Regularization(x = x,
y = y,
init = rep(0, 10),
iters = 1000,
eta = 0.01,
cost = cost,
grad = grad,
alpha = 0,
lambda = 0)
melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
#car::scatterplot(value ~ Iteration | variable, data = melted_param)
lattice::xyplot(value ~ Iteration | variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment