Created
May 31, 2017 01:35
-
-
Save jtrive84/b609fe6f949db9e0deea1976a6730df9 to your computer and use it in GitHub Desktop.
Implementation of Fisher Scoring algorithm used to determine Logistic Regression coefficients when modeling a dataset with a dichotomous response.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### `getLogisticCoefficients_1` tracks the development of the coefficent vector and | |
### Information matrix across iterations, and identifies them as `information_progression` | |
### and coefficients_progression` in the returned list `returnList` ===> | |
getLogisticCoefficients_1 <- function(design_matrix, response_vector, epsilon=.0001) { | |
# =================================================================== | |
# design_matrix (X) => n-by-(p+1) | | |
# response_vector (y) => n-by-1 | | |
# probability_vector (p) => n-by-1 | | |
# weights_matrix (W) => n-by-n | | |
# =================================================================== | |
# n => # of observations | | |
# (p + 1) => # of params, +1 for intercept term | | |
# =================================================================== | |
# U => First derivative of Log-Likelihood with respect to | | |
# each beta_i. The `Score Function`: X_transpose * (y - p) | | |
# | | |
# I => Second derivative of Log-Likelihood with respect to | | |
# each beta_i. The `Information Matrix`: (X_transpose * W * X) | | |
# | | |
# X^T*W*X results in a (p+1)-by-(p+1) matrix | | |
# X^T(y - p) results in a (p+1)-by-1 matrix | | |
# (X^T*W*X)^-1 * X^T(y - p) results in a (p+1)-by-1 matrix | | |
# ==================================================================| | |
I_progression <- list() | |
coeffs_progression <- list() | |
X <- as.matrix(design_matrix) | |
y <- as.matrix(response_vector) | |
# initialize functions used in iteration => | |
U <- function(X_, y_, p_) return(t(X_) %*% (y_ - p_)) | |
I <- function(X_, W_) return(t(X_) %*% W_ %*% X_) | |
pi_i <- function(v_) return(exp(v_)/(1 + exp(v_))) | |
p <- function(X_, beta_) return(pi_i(X_ %*% beta_)) | |
W <- function(p_) return(diag(as.vector(p_*(1-p_)))) | |
# initialize beta_0, p_0, W_0, I_0 & U_0 => | |
beta_0 <- matrix(rep(0, ncol(X)), nrow=ncol(X), ncol=1, byrow=FALSE, dimnames=NULL) | |
p_0 <- p(X_=X, beta_=beta_0) | |
W_0 <- W(p_=p_0) | |
I_0 <- I(X_=X, W_=W_0) | |
U_0 <- U(X_=X, y_=y, p_=p_0) | |
# initialize variables for iteration => | |
beta_old <- beta_0 | |
iter_I <- I_0 | |
iter_U <- U_0 | |
iter_p <- p_0 | |
iter_W <- W_0 | |
fisher_scoring_iterations <- 0 | |
#I_progression[[length(I_progression)+1]] <- iter_I | |
coeffs_progression[[length(coeffs_progression)+1]] <- beta_old | |
# iterate until difference between beta's is less than epsilon => | |
while(TRUE) { | |
# Fisher Scoring Update Step => | |
fisher_scoring_iterations <- fisher_scoring_iterations + 1 | |
beta_new <- beta_old + solve(iter_I) %*% iter_U | |
coeffs_progression[[length(coeffs_progression)+1]] <- beta_new | |
I_progression[[length(I_progression)+1]] <- iter_I | |
if (all(abs(beta_new - beta_old) < epsilon)) { | |
model_parameters <- beta_new | |
fitted_values <- pi_i(X %*% model_parameters) | |
covariance_matrix <- solve(iter_I) | |
break | |
} else { | |
iter_p <- p(X_=X, beta_=beta_new) | |
iter_W <- W(p_=iter_p) | |
iter_I <- I(X_=X, W_=iter_W) | |
iter_U <- U(X_=X, y_=y, p_=iter_p) | |
beta_old <- beta_new | |
} | |
} | |
returnList <- list( | |
'model_parameters'=model_parameters, | |
'covariance_matrix'=covariance_matrix, | |
'fitted_values'=fitted_values, | |
'nbr_iterations'=fisher_scoring_iterations, | |
'information_progression'=I_progression, | |
'coefficients_progression'=coeffs_progression | |
) | |
return(returnList) | |
} | |
### `getLogisticCoefficients_2` does not track the development of the coefficent vector | |
### or the Information matrix across iterations, but is otherwise returns the same | |
### summary statistics as `getLogisticCoefficients_1` ===> | |
getLogisticCoefficients_2 <- function(design_matrix, response_vector, epsilon=.0001) { | |
# ========================================================================= | |
# design_matrix `X` => n-by-(p+1) | | |
# response_vector `y` => n-by-1 | | |
# probability_vector `p` => n-by-1 | | |
# weights_matrix `W` => n-by-n | | |
# epsilon => threshold above which iteration continues | | |
# ========================================================================= | |
# n => # of observations | | |
# (p + 1) => # of parameterss, +1 for intercept term | | |
# ========================================================================= | |
# U => First derivative of Log-Likelihood with respect to | | |
# each beta_i, i.e. `Score Function`: X_transpose * (y - p) | | |
# | | |
# I => Second derivative of Log-Likelihood with respect to | | |
# each beta_i. The `Information Matrix`: (X_transpose * W * X) | | |
# | | |
# X^T*W*X results in a (p+1)-by-(p+1) matrix | | |
# X^T(y - p) results in a (p+1)-by-1 matrix | | |
# (X^T*W*X)^-1 * X^T(y - p) results in a (p+1)-by-1 matrix | | |
# ========================================================================| | |
X <- as.matrix(design_matrix) | |
y <- as.matrix(response_vector) | |
# initialize logistic function used for Scoring calculations => | |
pi_i <- function(v) return(exp(v)/(1 + exp(v))) | |
# initialize beta_0, p_0, W_0, I_0 & U_0 => | |
beta_0 <- matrix(rep(0, ncol(X)), nrow=ncol(X), ncol=1, byrow=FALSE, dimnames=NULL) | |
p_0 <- pi_i(X %*% beta_0) | |
W_0 <- diag(as.vector(p_0*(1-p_0))) | |
I_0 <- t(X) %*% W_0 %*% X | |
U_0 <- t(X) %*% (y - p_0) | |
# initialize variables for iteration => | |
beta_old <- beta_0 | |
iter_I <- I_0 | |
iter_U <- U_0 | |
iter_p <- p_0 | |
iter_W <- W_0 | |
fisher_scoring_iterations <- 0 | |
# iterate until difference between abs(beta_new - beta_old) < epsilon => | |
while(TRUE) { | |
# Fisher Scoring Update Step => | |
fisher_scoring_iterations <- fisher_scoring_iterations + 1 | |
beta_new <- beta_old + solve(iter_I) %*% iter_U | |
if (all(abs(beta_new - beta_old) < epsilon)) { | |
model_parameters <- beta_new | |
fitted_values <- pi_i(X %*% model_parameters) | |
covariance_matrix <- solve(iter_I) | |
break | |
} else { | |
iter_p <- pi_i(X %*% beta_new) | |
iter_W <- diag(as.vector(iter_p*(1-iter_p))) | |
iter_I <- t(X) %*% iter_W %*% X | |
iter_U <- t(X) %*% (y - iter_p) | |
beta_old <- beta_new | |
} | |
} | |
summaryList <- list( | |
'model_parameters'=model_parameters, | |
'covariance_matrix'=covariance_matrix, | |
'fitted_values'=fitted_values, | |
'number_iterations'=fisher_scoring_iterations | |
) | |
return(summaryList) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment