Last active
June 4, 2020 02:54
-
-
Save ammaraziz/be2b002a000a6b40a7818b4b38afe1d5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mle <- function(data,start=NULL,eps=10**(-9),logit=FALSE,cov=NULL){ | |
# INPUT: | |
# data: data matrix with | |
# 1) observed outcome D (0: no; 1: yes) | |
# 2) observed types at risk for (0: no; 1: yes) | |
# start: starting vector for the algorithm | |
# eps: accuracy parameter to stop the algorithm | |
# logit: logical to indicate whether the logit model is used | |
# (i.e. correct for a covariate) | |
# cov: covariate vector to correct for (only if logit=TRUE) | |
# | |
# OUTPUT: | |
# mle: vector with estimated parameter values | |
# results: table with results (nr HPV+, MLE, 95% CI, nr lesions, AF) | |
# cov: p-value for the slope of the logit-model (only if logit=TRUE) | |
K <- length(data[1,-1]); n <- length(data[,1]) | |
if (is.null(start)){ | |
start <- colSums(data[,-1])/n; start[start==0] <- 0.01; start[start==1] <- 0.99 | |
if (logit){ | |
start <- c(log(start/(1-start)),0) | |
} | |
} | |
est <- as.matrix(start) | |
ll <- numeric(0) | |
m <- 1; diff1 <- diff2 <- 1 | |
while (max(abs(diff1),abs(diff2))>eps){ | |
ll.m <- log_lik(est[,m],data,logit,cov) | |
if (((m>1)&&(ll.m$ll<ll[m-1]))||(ll.m$ll=="NaN")){ | |
x0 <- est[,m-1] | |
ll.0 <- log_lik(x0,data,logit,cov) | |
ll0 <-ll.0$ll | |
dir <- ll.0$H.inv%*%ll.0$g | |
est.n <- line_search(x0,ll0,dir,data,logit,cov) | |
ll.m <- log_lik(est.n,data,logit,cov) | |
est <- cbind(est[,-m],est.n) | |
m <- m-1 | |
} else { | |
ll <- c(ll,ll.m$ll) | |
est <- cbind(est,est[,m]-ll.m$H.inv%*%ll.m$g) | |
} | |
if (logit){ | |
est[est[,m+1]<(-15),m+1] <- -15; est[est[,m+1]>10,m+1] <- 10 | |
diff1 <- 1/(1+exp(-est[,m+1]))-1/(1+exp(-est[,m])); | |
} else { | |
est[est[,m+1]<=eps*10**(-3),m+1] <- 10**(-5) | |
est[est[,m+1]>=1-eps*10**(-3),m+1] <- 1-10**(-5) | |
diff1 <- est[,m+1]-est[,m] | |
} | |
if (m>1){ diff2 <- ll[m]-ll[m-1] } | |
m <- m+1 | |
} | |
ll.m <- log_lik(est[,m-1],data,logit,cov) | |
se <- sqrt(-diag(ll.m$H.inv)) | |
if (logit) { mle <- c(est[1:K,m-1]-mean(cov)*est[K+1,m-1],est[K+1,m-1]) | |
} else { mle <- est[,m-1] } | |
if (logit){ | |
a <- mle[1:K]; b <- mle[K+1]; x <- mean(cov) | |
mle.C <- a+b*x | |
y1 <- mle.C-qnorm(0.975)*se[-(K+1)]; y2 <- mle.C+qnorm(0.975)*se[-(K+1)] | |
CI <- cbind(1/(1+exp(-y1)),1/(1+exp(-y2))) | |
mle.C <- 1/(1+exp(-mle.C)) | |
} else { | |
CI <- cbind(mle-qnorm(0.975)*se,mle+qnorm(0.975)*se) | |
} | |
CI[CI[,1]<0,1] <- 10**(-5); CI[CI[,2]>1,2] <- 1-10**(-5) | |
Wald.score <- abs(mle-rep(0,K+logit))/se | |
p.val <- 2*(1-pnorm(Wald.score)) | |
if (logit) { nr <- colSums(data[,-1])*mle.C | |
} else { nr <- colSums(data[,-1])*mle } | |
AF <- nr/sum(nr) | |
if (logit){ | |
out <- cbind(colSums(data[,-1]),round(mle.C,3),round(CI,3),round(nr,1),round(AF,3)) | |
dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF") | |
list(mle=mle,results=out,cov=round(p.val[K+1],3)) | |
} else { | |
out <- cbind(colSums(data[,-1]),round(mle,3),round(CI,3),round(nr,1),round(AF,3)) | |
dimnames(out)[[2]] <- c("#HPV+","MLE","95% CI","","#lesions","AF") | |
list(mle=mle,results=out) | |
} | |
} | |
log_lik <- function(par,data,logit=FALSE,cov=NULL){ | |
# INPUT: | |
# par: parameter vector to determine the log-likelihood at | |
# data / logit / cov: as in mle-function | |
# | |
# OUTPUT: | |
# ll: value of the log-likelihood at par | |
# g: gradient vector with partial derivative of log-likelihood | |
# H: Hessian matrix with the second partial derivative of log-likelihood | |
# H.inv: inverse of H | |
K <- length(data[1,-1]); N <- length(data[,1]) | |
d <- data[,1]; Y <- data[,-1] | |
if (logit){ X <- cov-mean(cov) } | |
if (logit){ | |
pred <- matrix(rep(par[1:K],N),byrow=TRUE,ncol=K)+par[K+1]*X | |
pi <- 1/(1+exp(-pred)) | |
} else { | |
pi <- matrix(rep(par,N),byrow=TRUE,ncol=K) | |
} | |
prod <- apply((1-pi)**Y,1,prod) | |
if (logit){ | |
sum.1 <- apply(1-(1-pi)**Y,1,sum) | |
sum.2 <- apply((pi**Y)*(1-pi**Y),1,sum) | |
} | |
ll <- sum(d*log(1-prod)+(1-d)*log(prod)) | |
g <- numeric(K+logit); H <- matrix(0,ncol=K+logit,nrow=K+logit) | |
for (k in 1:K){ | |
ind.k <- which(Y[,k]==1) | |
if (logit){ | |
g[k] <- sum((d*pi[,k]/(1-prod)-pi[,k])[ind.k]) | |
H[k,k] <- sum((d*pi[,k]*(1-pi[,k]-prod)/(1-prod)**2-pi[,k]*(1-pi[,k]))[ind.k]) | |
H[k,K+1] <- H[K+1,k] <- sum((X*(d*pi[,k]*((1-pi[,k])*(1-prod)-prod*sum.1)/(1-prod)**2 | |
-pi[,k]*(1-pi[,k])))[ind.k]) | |
} else { | |
g[k] <- sum((d*prod/((1-pi[,k])*(1-prod))-(1-d)/(1-pi[,k]))[ind.k]) | |
H[k,k] <- -sum((d*prod**2/((1-prod)**2*(1-pi[,k])**2)+(1-d)/(1-pi[,k])**2)[ind.k]) | |
} | |
for (l in min((k+1),K):K){ | |
if (k!=l){ | |
ind.kl<- which((Y[,k]==1)&(Y[,l]==1)) | |
if (logit){ | |
H[k,l] <- H[l,k] <- sum((d*pi[,k]*pi[,l]*prod/(1-prod)**2)[ind.kl]) | |
} else { | |
H[k,l] <- H[l,k] <- -sum((d*prod**2/((1-prod)**2*(1-pi[,k])*(1-pi[,l])))[ind.kl]) | |
} | |
} | |
} | |
} | |
if (logit){ | |
g[K+1] <- sum(X*(d*sum.1/(1-prod)-sum.1)) | |
H[K+1,K+1] <- sum(X**2*(d*((1-prod)*sum.2-prod*sum.1**2)/(1-prod)**2-sum.2)) | |
} | |
if (logit){ | |
# invert H via block-inversion for case of "singularity" due to precision | |
A <- H[1:K,1:K]; B <- H[K+1,1:K]; C <- H[1:K,K+1]; D <- H[K+1,K+1] | |
A.inv <- solve(A); A.inv_B <- A.inv%*%B; C_A.inv <- C%*%A.inv | |
L1 <- A.inv + (A.inv_B%*%L4)%*%C_A.inv; L2 <- -A.inv_B%*%L4 | |
L3 <- -L4%*%C_A.inv; L4 <- 1/(D-C%*%A.inv%*%B) | |
H.inv <- rbind(cbind(L1,L2),cbind(L3,L4)) | |
} else { | |
H.inv <- solve(H) | |
} | |
list(ll=ll,g=g,H=H,H.inv=H.inv) | |
} | |
line_search <- function(x0, ll0, dir, data, logit=FALSE, cov=NULL) { | |
# INPUT: | |
# x0: current estimate to start line search algorithm from | |
# ll0: log-likelihood value at x0 | |
# dir: direction for line search (H**(-1)*g) | |
# data / logit / cov: as in mle-function | |
# | |
# OUTPUT: | |
# x.n: new estimate based on line search | |
lambda <- 1 | |
ll.n <- log_lik(x0,data,logit,cov) | |
# determine the correction direction | |
x.d <- x0-0.01*dir; | |
if (logit) { x.d[x.d>10] <- 10 | |
x.d[-x.d>15] <- -15 | |
} else { x.d[x.d<=0] <- 10**(-5) | |
x.d[x.d>=1] <- 1-10**(-5) } | |
ll.d <- log_lik(x.d,data,logit,cov) | |
if (ll.d$ll<ll.n$ll) {dir <- -dir} | |
ll.n$ll <- ll.n$ll-1 | |
while(ll.n$ll<ll0){ | |
lambda <- lambda/2 | |
x.n <- x0-lambda*dir; | |
if (logit){ x.n[x.n>10] <- 10 | |
x.n[-x.n>15] <- -15 | |
} else { x.n[x.n<=0] <- 10**(-5) | |
x.n[x.n>=1] <- 1-10**(-5) } | |
ll.n <- log_lik(x.n,data,logit,cov)} | |
return(x.n) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment