Laurae2 · February 27, 2017 23:24
diff --git a/logistic_regression_EN.R b/logistic_regression_EN.R
 # Setting up random matrix
 set.seed(11111)
 x <- data.frame(a = rnorm(n = 15) * 5,
                b = rnorm(n = 15) * 3 + 1,
                c = rnorm(n = 15) * 2 + 2)

 # Setting up an imperfect relationship (non-linear problem)
 y <- as.integer((2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])) > 20)

 # Setting up polynomial features
 columns <- ncol(x)
 for (i in 1:columns) {
  x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
  for (j in i:columns) {
    x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
  }
 }

 # Add column names and intercept
 colnames(x) <- c("a*2", "b*3", "c*4", "aXa", "aXb*1", "aXc", "bXb", "bXc", "cXc*1")
 x <- as.matrix(cbind(Intercept = 1, x))

 # Setup activation function
 activation <- function(x) {
  return(1 / (1 + exp(-x))) # Sigmoid function
 }

 # Calculate Logloss cost
 cost <- function(x, y, param, eps = 1e-15) {
  new_x <- activation(x %*% param)
  new_y <- pmin(pmax(y, eps), 1 - eps)
  return(mean(-new_y * log(new_x) - (1 - new_y) * log(1 - new_x)))
 }
 grad <- function(x, y, param, l1, l2) {
  # One-shot approach to Gradient Boosting for logloss
  # See Linear Regression for the (identical) example
  gradient <- (t((activation(x %*% param) - y)) %*% x) / nrow(x)
  
  # This is the same as doing this
  # for (i in 1:ncol(x)) {
  #   gradient[i] <- mean((activation(x %*% param) - y) * x[, i])
  # }
  
  # Add L1/L2 Regularization
  gradient <- c(gradient[1], gradient[2:length(gradient)] + (l1 * sum(abs(param[-1]))) + (l2 * sum(param[-1] ^ 2)))
  return(gradient)
 }

 EN_Regularization <- function(x, y, init, iters, eta, cost, grad, alpha, lambda) {
  
  param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
  colnames(param) <- c(colnames(x), "Loss")
  param[1, ] <- c(init, cost(x, y, init))
  
  for(i in 1:iters) {
    param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), alpha, lambda)
    param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
  }
  
  cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
  cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")
  
  param <- cbind(Iteration = 0:(nrow(param) - 1), param)
  
  return(param)
  
 }

 param <- EN_Regularization(x = x,
                           y = y,
                           init = rep(0, 10),
                           iters = 1000,
                           eta = 0.01,
                           cost = cost,
                           grad = grad,
                           alpha = 0,
                           lambda = 0)

 melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
 #car::scatterplot(value ~ Iteration | variable, data = melted_param)
 lattice::xyplot(value ~ Iteration | variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})
	# Setting up random matrix
	set.seed(11111)
	x <- data.frame(a = rnorm(n = 15) * 5,
	b = rnorm(n = 15) * 3 + 1,
	c = rnorm(n = 15) * 2 + 2)

	# Setting up an imperfect relationship (non-linear problem)
	y <- as.integer((2 + (x[, 1] * 2) + (x[, 2] * 3) + (x[, 3] * 4) + (x[, 3] ^ 2) + (x[, 1] * x[, 2])) > 20)

	# Setting up polynomial features
	columns <- ncol(x)
	for (i in 1:columns) {
	x[, paste0(colnames(x)[i], "X", colnames(x)[i])] <- x[, i] * x[, i]
	for (j in i:columns) {
	x[, paste0(colnames(x)[i], "X", colnames(x)[j])] <- x[, i] * x[, j]
	}
	}

	# Add column names and intercept
	colnames(x) <- c("a2", "b3", "c4", "aXa", "aXb1", "aXc", "bXb", "bXc", "cXc*1")
	x <- as.matrix(cbind(Intercept = 1, x))

	# Setup activation function
	activation <- function(x) {
	return(1 / (1 + exp(-x))) # Sigmoid function
	}

	# Calculate Logloss cost
	cost <- function(x, y, param, eps = 1e-15) {
	new_x <- activation(x %*% param)
	new_y <- pmin(pmax(y, eps), 1 - eps)
	return(mean(-new_y * log(new_x) - (1 - new_y) * log(1 - new_x)))
	}
	grad <- function(x, y, param, l1, l2) {
	# One-shot approach to Gradient Boosting for logloss
	# See Linear Regression for the (identical) example
	gradient <- (t((activation(x %% param) - y)) %% x) / nrow(x)

	# This is the same as doing this
	# for (i in 1:ncol(x)) {
	# gradient[i] <- mean((activation(x %% param) - y) x[, i])
	# }

	# Add L1/L2 Regularization
	gradient <- c(gradient[1], gradient[2:length(gradient)] + (l1 * sum(abs(param[-1]))) + (l2 * sum(param[-1] ^ 2)))
	return(gradient)
	}

	EN_Regularization <- function(x, y, init, iters, eta, cost, grad, alpha, lambda) {

	param <- data.frame(matrix(nrow = iters + 1, ncol = length(init) + 1))
	colnames(param) <- c(colnames(x), "Loss")
	param[1, ] <- c(init, cost(x, y, init))

	for(i in 1:iters) {
	param[i + 1, 1:length(init)] <- as.numeric(param[i, 1:length(init)]) - eta * grad(x, y, as.numeric(param[i, 1:length(init)]), alpha, lambda)
	param[i + 1, length(init) + 1] <- cost(x, y, as.numeric(param[i + 1, 1:length(init)]))
	}

	cat("Final cost: ", sprintf("%10.07f", param[nrow(param), ncol(param)]), "\n", sep = "")
	cat("Parameters:", as.numeric(param[nrow(param), 1:(length(init))]), sep = " ")

	param <- cbind(Iteration = 0:(nrow(param) - 1), param)

	return(param)

	}

	param <- EN_Regularization(x = x,
	y = y,
	init = rep(0, 10),
	iters = 1000,
	eta = 0.01,
	cost = cost,
	grad = grad,
	alpha = 0,
	lambda = 0)

	melted_param <- reshape::melt(param[(1:((nrow(param) - 1) / 10)) * 10 + 1, 1:(ncol(param) - 1)], id = c("Iteration"))
	#car::scatterplot(value ~ Iteration \| variable, data = melted_param)
	lattice::xyplot(value ~ Iteration \| variable, data = melted_param, type = "l", panel = function(...) {panel.xyplot(...); panel.abline(h = 0)})