Laurae2 · February 25, 2017 20:21
diff --git a/L1_regularization_regression.R b/L1_regularization_regression.R
 # Setting up random matrix
 set.seed(11111)
 x <- data.frame(a = rnorm(n = 15) * 5,
                b = rnorm(n = 15) * 3 + 1,
                c = rnorm(n = 15) * 2 + 2)

 # Setting up the (perfect) linear relationship
 y <- 2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])

 # Setting up polynomial features
 columns <- ncol(x)
 for (i in 1:columns) {
  x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
  for (j in i:columns) {
    x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
  }
 }

 # Add column names and intercept
 colnames(x) <- c("a*2", "b*3", "c*4", "aXa", "aXb*1", "aXc", "bXb", "bXc", "cXc*1")
 x <- as.matrix(cbind(Intercept = 1, x))

 # Calculate Mean Squared Error cost
 cost <- function(x, y, param) {mean(((x %*% param)- y) ^ 2)}
 grad <- function(x, y, param, l1) {
  gradient <- rep(0, length(param))
  pre_sum <- ((x %*% param) - y)
  for (i in 1:length(param)) {
    # Squared Error = (x - y) ^ 2
    # Squared Error Gradient: 2 * (x - y)
    gradient[i] <- 2 * mean(pre_sum * x[, i])
  }
  # Add L1 Regularization
  gradient <- c(gradient[1], gradient[2:length(gradient)] + (l1 * sum(abs(param[-1]))))
  return(gradient)
 }

 L1_Regularization <- function(x, y, init, iters, eta, cost, grad, alpha) {
  
  param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
  colnames(param) <- c(colnames(x), "Loss")
  param[1, ] <- c(init, cost(x, y, init))
  
  for(i in 1:iters) {
    param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), alpha)
    param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
  }
  
  cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
  cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")
  
  param <- cbind(Iteration = 0:(nrow(param) - 1), param)
  
  return(param)
  
 }

 param <- L1_Regularization(x = x,
                           y = y,
                           init = rep(0, 10),
                           iters = 10000,
                           eta = 0.0005,
                           cost = cost,
                           grad = grad,
                           alpha = 1)

 melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
 #car::scatterplot(value ~ Iteration | variable, data = melted_param)
 lattice::xyplot(value ~ Iteration | variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})
	# Setting up random matrix
	set.seed(11111)
	x <- data.frame(a = rnorm(n = 15) * 5,
	b = rnorm(n = 15) * 3 + 1,
	c = rnorm(n = 15) * 2 + 2)

	# Setting up the (perfect) linear relationship
	y <- 2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])

	# Setting up polynomial features
	columns <- ncol(x)
	for (i in 1:columns) {
	x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
	for (j in i:columns) {
	x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
	}
	}

	# Add column names and intercept
	colnames(x) <- c("a2", "b3", "c4", "aXa", "aXb1", "aXc", "bXb", "bXc", "cXc*1")
	x <- as.matrix(cbind(Intercept = 1, x))

	# Calculate Mean Squared Error cost
	cost <- function(x, y, param) {mean(((x %*% param)- y) ^ 2)}
	grad <- function(x, y, param, l1) {
	gradient <- rep(0, length(param))
	pre_sum <- ((x %*% param) - y)
	for (i in 1:length(param)) {
	# Squared Error = (x - y) ^ 2
	# Squared Error Gradient: 2 * (x - y)
	gradient[i] <- 2 * mean(pre_sum * x[, i])
	}
	# Add L1 Regularization
	gradient <- c(gradient[1], gradient[2:length(gradient)] + (l1 * sum(abs(param[-1]))))
	return(gradient)
	}

	L1_Regularization <- function(x, y, init, iters, eta, cost, grad, alpha) {

	param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
	colnames(param) <- c(colnames(x), "Loss")
	param[1, ] <- c(init, cost(x, y, init))

	for(i in 1:iters) {
	param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), alpha)
	param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
	}

	cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
	cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")

	param <- cbind(Iteration = 0:(nrow(param) - 1), param)

	return(param)

	}

	param <- L1_Regularization(x = x,
	y = y,
	init = rep(0, 10),
	iters = 10000,
	eta = 0.0005,
	cost = cost,
	grad = grad,
	alpha = 1)

	melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
	#car::scatterplot(value ~ Iteration \| variable, data = melted_param)
	lattice::xyplot(value ~ Iteration \| variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})