dendisuhubdy · February 16, 2016 02:40
diff --git a/ridgelogit.R b/ridgelogit.R
 #'Norm Vec
 norm_vec <- function(x){
 	return (sqrt(sum(x^2)))
 } 

 #' Gradient Step
 #' 
 #' @param gradf handle to function that returns gradient of objective function
 #' @param x current parameter estimate
 #' @param t step-size
 gradient_step <- function(gradf, t, x) {
 	result <- (x - (t * gradf(x)))
 	return (result)
 }

 #' Gradient Descent (Fixed Step-Size)
 #'
 #' @param fx handle to function that returns objective function values
 #' @param gradf handle to function that returns gradient of objective function
 #' @param x0 initial parameter estimate
 #' @param t step-size
 #' @param max_iter maximum number of iterations
 #' @param tol convergence tolerance
 gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) {
 	xtrace <- 0
 	ytrace <- 0
 	vals <- 0
 	b <- 0
 	b[1] <- x0
 	for(i in 1:max_iter) {
 		b[i+1] <- gradient_step(gradf(b[i]),t,b[i])
 		xtrace[i] <- b[i+1]/b[i]
 		ytrace[i] <- fx(b[i+1])/fx(b[i])
 		vals[i] <- fx(b[i+1]) - fx(b[i])
 		if (ytrace[i] <= tol) {
 			break
 		}
 	}
 	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
 	return (result)
 }

 #' Backtracking
 #' 
 #' @param fx handle to function that returns objective function values
 #' @param x current parameter estimate
 #' @param t current step-size
 #' @param df the value of the gradient of objective function evaluated at the current x
 #' @param alpha the backtracking parameter
 #' @param beta the decrementing multiplier
 backtrack <- function(fx, t, x, df, beta=0.9) {
 	iter = length(x)
 	for (i in 1:iter) {
 		if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) {
 			t <- beta*t
 		}
 		else {
 			break
 		}
 	}
 	return (t)
 }

 #' Gradient Descent (Backtracking Step-Size)
 #' 
 #' @param fx handle to function that returns objective function values
 #' @param gradf handle to function that returns gradient of objective function
 #' @param x0 initial parameter estimate
 #' @param max_iter maximum number of iterations
 #' @param tol convergence tolerance
 gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) {
 	xtrace <- 0
 	ytrace <- 0
 	vals <- 0
 	t <- 1
 	b <- 0
 	b[1] <- x0
 	for(i in 1:max_iter) {
 		step <- backtrack(fx,t,b[i],gradf)
 		b[i+1] <- gradient_step(gradf(b[i]),step,b[i])
 		xtrace[i] <- b[i+1]/b[i]
 		ytrace[i] <- fx(b[i+1])/fx(b[i])
 		vals[i] <- fx(b[i+1]) - fx(b[i])
 		if (ytrace[i] <= tol) {
 			break
 		}
 	}
 	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
 	return (result)
 }

 #' Gradient Descent
 #' 
 #' @param fx handle to function that returns objective function values
 #' @param gradf handle to function that returns gradient of objective function
 #' @param x0 initial parameter estimate
 #' @param t step-size
 #' @param max_iter maximum number of iterations
 #' @param tol convergence tolerance
 gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) {
 	## wrapper
 	if (is.null(t)) {
 		return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol))
 	} else {
 		return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol))
 	}
 }

 #' Objective Function for Logistic Regression
 #' 
 #' @param y binary response
 #' @param X design matrix
 #' @param beta regression coefficient vector
 #' @param lambda regularization parameter
 fx_logistic <- function(y, X, beta, lambda=0) {
 	iter <- length(X)
 	sum <- 0
 	for (i in 1:iter) {
 		sum <- sum - (y[i]*X[i]*beta) + (log(1+exp(X[i]*beta)))
 	}
 	return (sum + (lambda/2)*norm_vec(beta))
 }


 #' Gradient for Logistic Regession
 #'
 #' @param y binary response
 #' @param X design matrix
 #' @param beta regression coefficient vector
 #' @param lambda regularization parameter
 gradf_logistic <- function(y, X, beta, lambda=0) {
 	iter <- length(X)
 	sum <- 0
 	for (i in 1:iter) {
 		sum <- sum + (((exp(X[i]*beta)/(1+ exp(X[i]*beta)))-y[i])*X[i])
 	}
 	return (sum +  (lambda*beta))
 }


 ## step 7


 ## initialize data sequences for testing
 set.seed(12345)
 n <- 100
 p <- 2

 X <- matrix(rnorm(n*p),n,p)
 x1 <- X[,1]
 x2 <- X[,2]
 beta0 <- matrix(rnorm(p),p,1)
 beta1 <- beta0[1]
 beta2 <- beta0[2]
 y <- (runif(n) <= plogis(X%*%beta0)) + 0


 #' Wrapper Logistic Regession
 #'
 fx <- function(beta) {
 	return (fx_logistic(y, x1, beta, lambda=0))
 }

 #' Wrapper Gradient for Logistic Regession
 #'
 gradf <- function(beta) {
 	return(gradf_logistic (y, x1, beta, lambda=0))
 }


 t=1
 x0 = x1[1]

 result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
 L1 <- 0
 for (i in 1:length(result1[[2]])) {
 	L1[i] <- result1[[2]][i] - result1[[2]][100]
 }

 plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue")
 ## 

 ## step 8

 result2 <- gradient_descent_backtrack(fx, gradf, x0)
 L2 <- 0
 for (i in 1:length(result2[[2]])) {
 	L2[i] <- result2[[2]][i] - result2[[2]][100]
 }

 plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red")

 ## step 9


 #' Wrapper Logistic Regession
 #'
 fx <- function(beta) {
 	return (fx_logistic(y, x1, beta, lambda=10))
 }

 #' Wrapper Gradient for Logistic Regession
 #'
 gradf <- function(beta) {
 	return(gradf_logistic (y, x1, beta, lambda=10))
 }

 result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e3)
 L3 <- 0
 for (i in 1:length(result3[[2]])) {
 	L3[i] <- result3[[2]][i] - result3[[2]][100] 
 }

 plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green")
	#'Norm Vec
	norm_vec <- function(x){
	return (sqrt(sum(x^2)))
	}

	#' Gradient Step
	#'
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x current parameter estimate
	#' @param t step-size
	gradient_step <- function(gradf, t, x) {
	result <- (x - (t * gradf(x)))
	return (result)
	}

	#' Gradient Descent (Fixed Step-Size)
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param t step-size
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
	b[i+1] <- gradient_step(gradf(b[i]),t,b[i])
	xtrace[i] <- b[i+1]/b[i]
	ytrace[i] <- fx(b[i+1])/fx(b[i])
	vals[i] <- fx(b[i+1]) - fx(b[i])
	if (ytrace[i] <= tol) {
	break
	}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
	}

	#' Backtracking
	#'
	#' @param fx handle to function that returns objective function values
	#' @param x current parameter estimate
	#' @param t current step-size
	#' @param df the value of the gradient of objective function evaluated at the current x
	#' @param alpha the backtracking parameter
	#' @param beta the decrementing multiplier
	backtrack <- function(fx, t, x, df, beta=0.9) {
	iter = length(x)
	for (i in 1:iter) {
	if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) {
	t <- beta*t
	}
	else {
	break
	}
	}
	return (t)
	}

	#' Gradient Descent (Backtracking Step-Size)
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) {
	xtrace <- 0
	ytrace <- 0
	vals <- 0
	t <- 1
	b <- 0
	b[1] <- x0
	for(i in 1:max_iter) {
	step <- backtrack(fx,t,b[i],gradf)
	b[i+1] <- gradient_step(gradf(b[i]),step,b[i])
	xtrace[i] <- b[i+1]/b[i]
	ytrace[i] <- fx(b[i+1])/fx(b[i])
	vals[i] <- fx(b[i+1]) - fx(b[i])
	if (ytrace[i] <= tol) {
	break
	}
	}
	result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
	return (result)
	}

	#' Gradient Descent
	#'
	#' @param fx handle to function that returns objective function values
	#' @param gradf handle to function that returns gradient of objective function
	#' @param x0 initial parameter estimate
	#' @param t step-size
	#' @param max_iter maximum number of iterations
	#' @param tol convergence tolerance
	gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) {
	## wrapper
	if (is.null(t)) {
	return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol))
	} else {
	return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol))
	}
	}

	#' Objective Function for Logistic Regression
	#'
	#' @param y binary response
	#' @param X design matrix
	#' @param beta regression coefficient vector
	#' @param lambda regularization parameter
	fx_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
	sum <- sum - (y[i]X[i]beta) + (log(1+exp(X[i]*beta)))
	}
	return (sum + (lambda/2)*norm_vec(beta))
	}


	#' Gradient for Logistic Regession
	#'
	#' @param y binary response
	#' @param X design matrix
	#' @param beta regression coefficient vector
	#' @param lambda regularization parameter
	gradf_logistic <- function(y, X, beta, lambda=0) {
	iter <- length(X)
	sum <- 0
	for (i in 1:iter) {
	sum <- sum + (((exp(X[i]beta)/(1+ exp(X[i]beta)))-y[i])*X[i])
	}
	return (sum + (lambda*beta))
	}


	## step 7


	## initialize data sequences for testing
	set.seed(12345)
	n <- 100
	p <- 2

	X <- matrix(rnorm(n*p),n,p)
	x1 <- X[,1]
	x2 <- X[,2]
	beta0 <- matrix(rnorm(p),p,1)
	beta1 <- beta0[1]
	beta2 <- beta0[2]
	y <- (runif(n) <= plogis(X%*%beta0)) + 0


	#' Wrapper Logistic Regession
	#'
	fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=0))
	}

	#' Wrapper Gradient for Logistic Regession
	#'
	gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=0))
	}


	t=1
	x0 = x1[1]

	result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
	L1 <- 0
	for (i in 1:length(result1[[2]])) {
	L1[i] <- result1[[2]][i] - result1[[2]][100]
	}

	plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue")
	##

	## step 8

	result2 <- gradient_descent_backtrack(fx, gradf, x0)
	L2 <- 0
	for (i in 1:length(result2[[2]])) {
	L2[i] <- result2[[2]][i] - result2[[2]][100]
	}

	plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red")

	## step 9


	#' Wrapper Logistic Regession
	#'
	fx <- function(beta) {
	return (fx_logistic(y, x1, beta, lambda=10))
	}

	#' Wrapper Gradient for Logistic Regession
	#'
	gradf <- function(beta) {
	return(gradf_logistic (y, x1, beta, lambda=10))
	}

	result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e3)
	L3 <- 0
	for (i in 1:length(result3[[2]])) {
	L3[i] <- result3[[2]][i] - result3[[2]][100]
	}

	plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green")