Created
February 16, 2016 02:40
-
-
Save dendisuhubdy/1143922f88be5ee4b9e7 to your computer and use it in GitHub Desktop.
logit
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#'Norm Vec | |
norm_vec <- function(x){ | |
return (sqrt(sum(x^2))) | |
} | |
#' Gradient Step | |
#' | |
#' @param gradf handle to function that returns gradient of objective function | |
#' @param x current parameter estimate | |
#' @param t step-size | |
gradient_step <- function(gradf, t, x) { | |
result <- (x - (t * gradf(x))) | |
return (result) | |
} | |
#' Gradient Descent (Fixed Step-Size) | |
#' | |
#' @param fx handle to function that returns objective function values | |
#' @param gradf handle to function that returns gradient of objective function | |
#' @param x0 initial parameter estimate | |
#' @param t step-size | |
#' @param max_iter maximum number of iterations | |
#' @param tol convergence tolerance | |
gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) { | |
xtrace <- 0 | |
ytrace <- 0 | |
vals <- 0 | |
b <- 0 | |
b[1] <- x0 | |
for(i in 1:max_iter) { | |
b[i+1] <- gradient_step(gradf(b[i]),t,b[i]) | |
xtrace[i] <- b[i+1]/b[i] | |
ytrace[i] <- fx(b[i+1])/fx(b[i]) | |
vals[i] <- fx(b[i+1]) - fx(b[i]) | |
if (ytrace[i] <= tol) { | |
break | |
} | |
} | |
result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals) | |
return (result) | |
} | |
#' Backtracking | |
#' | |
#' @param fx handle to function that returns objective function values | |
#' @param x current parameter estimate | |
#' @param t current step-size | |
#' @param df the value of the gradient of objective function evaluated at the current x | |
#' @param alpha the backtracking parameter | |
#' @param beta the decrementing multiplier | |
backtrack <- function(fx, t, x, df, beta=0.9) { | |
iter = length(x) | |
for (i in 1:iter) { | |
if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) { | |
t <- beta*t | |
} | |
else { | |
break | |
} | |
} | |
return (t) | |
} | |
#' Gradient Descent (Backtracking Step-Size) | |
#' | |
#' @param fx handle to function that returns objective function values | |
#' @param gradf handle to function that returns gradient of objective function | |
#' @param x0 initial parameter estimate | |
#' @param max_iter maximum number of iterations | |
#' @param tol convergence tolerance | |
gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) { | |
xtrace <- 0 | |
ytrace <- 0 | |
vals <- 0 | |
t <- 1 | |
b <- 0 | |
b[1] <- x0 | |
for(i in 1:max_iter) { | |
step <- backtrack(fx,t,b[i],gradf) | |
b[i+1] <- gradient_step(gradf(b[i]),step,b[i]) | |
xtrace[i] <- b[i+1]/b[i] | |
ytrace[i] <- fx(b[i+1])/fx(b[i]) | |
vals[i] <- fx(b[i+1]) - fx(b[i]) | |
if (ytrace[i] <= tol) { | |
break | |
} | |
} | |
result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals) | |
return (result) | |
} | |
#' Gradient Descent | |
#' | |
#' @param fx handle to function that returns objective function values | |
#' @param gradf handle to function that returns gradient of objective function | |
#' @param x0 initial parameter estimate | |
#' @param t step-size | |
#' @param max_iter maximum number of iterations | |
#' @param tol convergence tolerance | |
gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) { | |
## wrapper | |
if (is.null(t)) { | |
return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol)) | |
} else { | |
return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol)) | |
} | |
} | |
#' Objective Function for Logistic Regression | |
#' | |
#' @param y binary response | |
#' @param X design matrix | |
#' @param beta regression coefficient vector | |
#' @param lambda regularization parameter | |
fx_logistic <- function(y, X, beta, lambda=0) { | |
iter <- length(X) | |
sum <- 0 | |
for (i in 1:iter) { | |
sum <- sum - (y[i]*X[i]*beta) + (log(1+exp(X[i]*beta))) | |
} | |
return (sum + (lambda/2)*norm_vec(beta)) | |
} | |
#' Gradient for Logistic Regession | |
#' | |
#' @param y binary response | |
#' @param X design matrix | |
#' @param beta regression coefficient vector | |
#' @param lambda regularization parameter | |
gradf_logistic <- function(y, X, beta, lambda=0) { | |
iter <- length(X) | |
sum <- 0 | |
for (i in 1:iter) { | |
sum <- sum + (((exp(X[i]*beta)/(1+ exp(X[i]*beta)))-y[i])*X[i]) | |
} | |
return (sum + (lambda*beta)) | |
} | |
## step 7 | |
## initialize data sequences for testing | |
set.seed(12345) | |
n <- 100 | |
p <- 2 | |
X <- matrix(rnorm(n*p),n,p) | |
x1 <- X[,1] | |
x2 <- X[,2] | |
beta0 <- matrix(rnorm(p),p,1) | |
beta1 <- beta0[1] | |
beta2 <- beta0[2] | |
y <- (runif(n) <= plogis(X%*%beta0)) + 0 | |
#' Wrapper Logistic Regession | |
#' | |
fx <- function(beta) { | |
return (fx_logistic(y, x1, beta, lambda=0)) | |
} | |
#' Wrapper Gradient for Logistic Regession | |
#' | |
gradf <- function(beta) { | |
return(gradf_logistic (y, x1, beta, lambda=0)) | |
} | |
t=1 | |
x0 = x1[1] | |
result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) | |
L1 <- 0 | |
for (i in 1:length(result1[[2]])) { | |
L1[i] <- result1[[2]][i] - result1[[2]][100] | |
} | |
plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue") | |
## | |
## step 8 | |
result2 <- gradient_descent_backtrack(fx, gradf, x0) | |
L2 <- 0 | |
for (i in 1:length(result2[[2]])) { | |
L2[i] <- result2[[2]][i] - result2[[2]][100] | |
} | |
plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red") | |
## step 9 | |
#' Wrapper Logistic Regession | |
#' | |
fx <- function(beta) { | |
return (fx_logistic(y, x1, beta, lambda=10)) | |
} | |
#' Wrapper Gradient for Logistic Regession | |
#' | |
gradf <- function(beta) { | |
return(gradf_logistic (y, x1, beta, lambda=10)) | |
} | |
result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e3) | |
L3 <- 0 | |
for (i in 1:length(result3[[2]])) { | |
L3[i] <- result3[[2]][i] - result3[[2]][100] | |
} | |
plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment