Skip to content

Instantly share code, notes, and snippets.

@dendisuhubdy
Created February 16, 2016 02:52
Show Gist options
  • Save dendisuhubdy/d0b553222f143cd7a38e to your computer and use it in GitHub Desktop.
Save dendisuhubdy/d0b553222f143cd7a38e to your computer and use it in GitHub Desktop.
logit finished
#'Norm Vec
norm_vec <- function(x){
return (sqrt(sum(x^2)))
}
#' Gradient Step
#'
#' @param gradf handle to function that returns gradient of objective function
#' @param x current parameter estimate
#' @param t step-size
gradient_step <- function(gradf, t, x) {
result <- (x - (t * gradf(x)))
return (result)
}
#' Gradient Descent (Fixed Step-Size)
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param t step-size
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent_fixed <- function(fx, gradf, t, x0, max_iter=1e2, tol=1e-3) {
xtrace <- 0
ytrace <- 0
vals <- 0
b <- 0
b[1] <- x0
for(i in 1:max_iter) {
b[i+1] <- gradient_step(gradf(b[i]),t,b[i])
xtrace[i] <- b[i+1]/b[i]
ytrace[i] <- fx(b[i+1])/fx(b[i])
vals[i] <- fx(b[i+1]) - fx(b[i])
if (ytrace[i] <= tol) {
break
}
}
result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
return (result)
}
#' Backtracking
#'
#' @param fx handle to function that returns objective function values
#' @param x current parameter estimate
#' @param t current step-size
#' @param df the value of the gradient of objective function evaluated at the current x
#' @param alpha the backtracking parameter
#' @param beta the decrementing multiplier
backtrack <- function(fx, t, x, df, beta=0.9) {
iter = length(x)
for (i in 1:iter) {
if ((fx(x[i] - df(x[i]))) >= fx(x[i]) - (t/2)*norm_vec(df(x[i]))) {
t <- beta*t
}
else {
break
}
}
return (t)
}
#' Gradient Descent (Backtracking Step-Size)
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent_backtrack <- function(fx, gradf, x0, max_iter=1e2, tol=1e-3) {
xtrace <- 0
ytrace <- 0
vals <- 0
t <- 1
b <- 0
b[1] <- x0
for(i in 1:max_iter) {
step <- backtrack(fx,t,b[i],gradf)
b[i+1] <- gradient_step(gradf(b[i]),step,b[i])
xtrace[i] <- b[i+1]/b[i]
ytrace[i] <- fx(b[i+1])/fx(b[i])
vals[i] <- fx(b[i+1]) - fx(b[i])
if (ytrace[i] <= tol) {
break
}
}
result <- list(b[length(b)],fx(b),norm_vec(gradf(b)),xtrace, ytrace, vals)
return (result)
}
#' Gradient Descent
#'
#' @param fx handle to function that returns objective function values
#' @param gradf handle to function that returns gradient of objective function
#' @param x0 initial parameter estimate
#' @param t step-size
#' @param max_iter maximum number of iterations
#' @param tol convergence tolerance
gradient_descent <- function(fx, gradf, x0, t=NULL, max_iter=1e2, tol=1e-3) {
## wrapper
if (is.null(t)) {
return (gradient_descent_backtrack(fx, gradf, x0, max_iter, tol))
} else {
return (gradient_descent_fixed(fx, gradf, t, x0, max_iter, tol))
}
}
#' Objective Function for Logistic Regression
#'
#' @param y binary response
#' @param X design matrix
#' @param beta regression coefficient vector
#' @param lambda regularization parameter
fx_logistic <- function(y, X, beta, lambda=0) {
iter <- length(X)
sum <- 0
for (i in 1:iter) {
sum <- sum - (y[i]*X[i]*beta) + (log(1+exp(X[i]*beta)))
}
return (sum + (lambda/2)*norm_vec(beta))
}
#' Gradient for Logistic Regession
#'
#' @param y binary response
#' @param X design matrix
#' @param beta regression coefficient vector
#' @param lambda regularization parameter
gradf_logistic <- function(y, X, beta, lambda=0) {
iter <- length(X)
sum <- 0
for (i in 1:iter) {
sum <- sum + (((exp(X[i]*beta)/(1+ exp(X[i]*beta)))-y[i])*X[i])
}
return (sum + (lambda*beta))
}
## step 7
## initialize data sequences for testing
set.seed(12345)
n <- 100
p <- 2
X <- matrix(rnorm(n*p),n,p)
x1 <- X[,1]
x2 <- X[,2]
beta0 <- matrix(rnorm(p),p,1)
beta1 <- beta0[1]
beta2 <- beta0[2]
y <- (runif(n) <= plogis(X%*%beta0)) + 0
#' Wrapper Logistic Regession
#'
fx <- function(beta) {
return (fx_logistic(y, x1, beta, lambda))
}
#' Wrapper Gradient for Logistic Regession
#'
gradf <- function(beta) {
return(gradf_logistic (y, x1, beta, lambda))
}
lambda =0
t=1
x0 = x1[1]
result1 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
L1 <- 0
for (i in 1:length(result1[[2]])) {
L1[i] <- result1[[2]][i] - result1[[2]][100]
}
plot(1:length(result1[[2]]),L1, xlab="iterations", ylab="Logistic Result 1",type="o",col="blue")
## the fluctuations are fairly large and random
## step 8
result2 <- gradient_descent_backtrack(fx, gradf, x0)
L2 <- 0
for (i in 1:length(result2[[2]])) {
L2[i] <- result2[[2]][i] - result2[[2]][100]
}
plot(1:length(result2[[2]]),L2, xlab="iterations", ylab="Logistic Result 2",type="o",col="red")
## the fluctuations are fairly structured and neat
## step 9
lambda=1.0
t=1
x0 = x1[1]
result3 <- gradient_descent_fixed(fx, gradf, t, x0, max_iter=1e2, tol=1e-3)
L3 <- 0
for (i in 1:length(result3[[2]])) {
L3[i] <- result3[[2]][i] - result3[[2]][100]
}
plot(1:length(result3[[2]]),L3, xlab="iterations", ylab="Logistic Result 3",type="o",col="green")
## the results converges faster in first numbers of iterations with a higher level of lambda
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment