explodecomputer · May 8, 2015 18:10
diff --git a/lapla2_chd.R b/lapla2_chd.R
 two_sample_iv <- function(b_exp, b_out, se_exp, se_out, n=10000)
 {
 	b <- sum(b_exp*b_out / se_out^2) / sum(b_exp^2/se_out^2)
 	se <- sqrt(1 / sum(b_exp^2/se_out^2))
 	pval <- pt(b / se, df = n, low=FALSE)
 	return(list(b=b, se=se, pval=pval))
 }

 two_sample_iv_delta <- function(b_exp, b_out, se_exp, se_out, n=10000, cov=1)
 {
 	b <- b_out / b_exp
 	se <- sqrt((se_out^2/b_exp^2) + (b_out^2/b_exp^4)*se_exp^2 - 2*(b_out/b_out^3)*cov)
 	pval <- pt(b / se, df = n, low=FALSE)
 	return(list(b=b, se=se, pval=pval))
 }

 two_sample_iv_ml <- function(b_exp, b_out, se_exp, se_out, n=10000)
 {
 	loglikelihood <- function(param) {
 		return(1/2*sum((b_exp-param[1:length(b_exp)])^2/se_exp^2)+1/2*sum((b_out-param[length(b_exp)+1]*param[1:length(b_exp)])^2/se_out^2))
 	}
 	opt <- optim(
 		c(b_exp, sum(b_exp*b_out/se_out^2)/sum(b_exp^2/se_out^2)),
 		loglikelihood, 
 		hessian=TRUE, 
 		control = list(maxit=25000))

 	b <- opt$par[length(b_exp)+1]
 	se <- sqrt(solve(opt$hessian)[length(b_exp)+1,length(b_exp)+1])
 	pval <- pt(b / se, df = n, low=FALSE)
 	return(list(b=b, se=se, pval=pval))
 }

 cohens_d <- function(m1, m2, s1, s2, n1, n2)
 {
 	spooled <- sqrt((s1^2 + s2^2)/2)
 	d <- (m1 - m2) / spooled

 	s <- sqrt(((n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2 - 2))) * ((n1 + n2) / (n1 + n2 - 2)))

 	return(list(eff = d, s = s))
 }

 percent_to_lor <- function(a, b, c, d)
 {
 	lor <- log((a * d) / (b * c))
 	se <- sqrt(1/a + 1/b + 1/c + 1/d)
 	return(list(eff = lor, s = se))
 }

 pool_means <- function(n, m, s)
 {
 	w <- n / sum(n)
 	mp <- sum(m * w)
 	sp <- sqrt(sum((n - 1) * s^2) / sum(n - 1))
 	return(list(m = mp, s = sp))
 }

 eff_se_2_or_ci <- function(eff, se)
 {
 	or <- exp(eff)
 	cid <- se * 2 * 1.96
 	ci <- paste(formatC((or - cid/2), digits=3, width=0, format="f"), " - ", formatC((or + cid/2), digits=3, width=0, format="f"), sep="")
 	return(list(or = or, ci = ci))
 }

 mr_table <- function(b, se, pval, exposure, outcome, note)
 {
 	note <- as.data.frame(note)
 	d <- data.frame(note, exposure, outcome, paste(formatC(b, digits=3), " (", ifelse(is.numeric(se), formatC(se, digits=3), se), ")", sep=""), pval)
 	names(d) <- c(rep("-", ncol(note)), "Exposure", "Outcome", "IV estimate", "P-value")
 	return(d)
 }


 # exposed group
 # bad
 a = 10 * 0 + 85 * 0.047
 # good
 b = 10 * 1 + 85 * (1-0.047)

 # control group
 # bad
 c = 8554 * 0.091 + 2301 * 0.068
 # good
 d = 8554 * (1-0.091) + 2301 * (1-0.068)

 b_out <- percent_to_lor(a, b, c, d)

 a1 <- pool_means(c(8554, 2301), c(238.5, 199.5), c(60.7, 54.5))
 a2 <- pool_means(c(10, 85), c(84.6, 113.2), c(49.4, 32.8))
 m1 <- a1$m
 m2 <- a2$m
 s1 <- a1$s
 s2 <- a2$s
 n1 <- 8554 + 2301
 n2 <- 10 + 85

 b_exp <- cohens_d(m1, m2, s1, s2, n1, n2)

 tsiv <- two_sample_iv(b_exp$eff, b_out$eff, b_exp$s, b_out$s, n=n1+n2)
 eff_se_2_or_ci(tsiv$b, tsiv$se)

 eff_se_2_or_ci(b_out$eff, b_out$s)



 b_exp_ae <- cohens_d(8554, 10, 238.5, 84.6, 60.7, 49.4)
 b_exp_aa <- cohens_d(2301, 85, 199.5, 113.2, 54.5, 32.8)

 b_out_ae <- percent_to_lor(1, 11, 8555 * 0.091, 8555 * 0.909)
 b_out_aa <- percent_to_lor(85 * 0.047, 85 * (1-0.047), 2301 * 0.068, 2301 * (1-0.068))


 tsiv_ae <- two_sample_iv(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1)
 tsiv_aa <- two_sample_iv(b_exp_aa$eff, b_out_aa$eff, b_exp_aa$s, b_out_aa$s, n=n2)

 tsiv_ae <- two_sample_iv_delta(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1, cov=0)

 eff_se_2_or_ci(tsiv_aa$b, tsiv_aa$se)
 eff_se_2_or_ci(tsiv_ae$b, tsiv_ae$se)

 p1_ncar <- rnorm(8554, 238.5, 60.7)
 p1_car <- rnorm(10, 84.6, 49.4)
 p2_ncar <- rnorm(2301, 199.5, 54.5)
 p2_car <- rnorm(85, 113.2, 32.8)

 chd1_ncar <- sample(c(0, 1), 8554, replace=TRUE, prob=c(90.9, 9.1))
 chd1_car <- sample(c(0, 1), 10, replace=T, prob=c(1, 0))

 chd2_ncar <- sample(c(0, 1), 2301, replace=TRUE, prob=c(100-6.8, 6.8))
 chd2_car <- sample(c(0, 1), 85, replace=T, prob=c(100-4.7, 4.7))

 dat <- data.frame(exposure = c(p1_ncar, p2_ncar, p1_car, p2_car), geno = rep(c("ncar", "car"), c(8554+2301, 10 + 85)), outcome = c(chd1_ncar, chd2_ncar, chd1_car, chd2_car), popn = rep(c("ae", "aa", "ae", "aa"), c(8554, 2301, 10, 85)))

 library(systemfit)
 m1 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="ae"))))
 m2 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="aa"))))
 m3 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat))))
 m4 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(dat))))

 library(plyr)
 d2 <- list()
 for(i in 1:50)
 d2[[i]] <- dat
 d2 <- rbind.fill(d2)

 summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="ae")))
 summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="aa")))
 summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2)))
 summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(d2)))

 r3 <- eff_se_2_or_ci(tsiv$b, tsiv$se)
 r4 <- eff_se_2_or_ci(m4[2,1], m4[2,2])

 tab <- rbind(
 	mr_table(r3$or, r3$ci, tsiv$pval, "LAPLA2 (nmol/min/ml)", "CHD (OR)", list("Two sample MR", "No adjustment", n1+n2)),
 	mr_table(r4$or, r4$ci, m4[2,4], "", "", list("Simulation", "Adjusted for population", n1+n2))
 )

 write.csv(tab, file="~/Desktop/lapla2.csv")
	two_sample_iv <- function(b_exp, b_out, se_exp, se_out, n=10000)
	{
	b <- sum(b_exp*b_out / se_out^2) / sum(b_exp^2/se_out^2)
	se <- sqrt(1 / sum(b_exp^2/se_out^2))
	pval <- pt(b / se, df = n, low=FALSE)
	return(list(b=b, se=se, pval=pval))
	}

	two_sample_iv_delta <- function(b_exp, b_out, se_exp, se_out, n=10000, cov=1)
	{
	b <- b_out / b_exp
	se <- sqrt((se_out^2/b_exp^2) + (b_out^2/b_exp^4)se_exp^2 - 2(b_out/b_out^3)*cov)
	pval <- pt(b / se, df = n, low=FALSE)
	return(list(b=b, se=se, pval=pval))
	}

	two_sample_iv_ml <- function(b_exp, b_out, se_exp, se_out, n=10000)
	{
	loglikelihood <- function(param) {
	return(1/2sum((b_exp-param[1:length(b_exp)])^2/se_exp^2)+1/2sum((b_out-param[length(b_exp)+1]*param[1:length(b_exp)])^2/se_out^2))
	}
	opt <- optim(
	c(b_exp, sum(b_exp*b_out/se_out^2)/sum(b_exp^2/se_out^2)),
	loglikelihood,
	hessian=TRUE,
	control = list(maxit=25000))

	b <- opt$par[length(b_exp)+1]
	se <- sqrt(solve(opt$hessian)[length(b_exp)+1,length(b_exp)+1])
	pval <- pt(b / se, df = n, low=FALSE)
	return(list(b=b, se=se, pval=pval))
	}

	cohens_d <- function(m1, m2, s1, s2, n1, n2)
	{
	spooled <- sqrt((s1^2 + s2^2)/2)
	d <- (m1 - m2) / spooled

	s <- sqrt(((n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2 - 2))) * ((n1 + n2) / (n1 + n2 - 2)))

	return(list(eff = d, s = s))
	}

	percent_to_lor <- function(a, b, c, d)
	{
	lor <- log((a * d) / (b * c))
	se <- sqrt(1/a + 1/b + 1/c + 1/d)
	return(list(eff = lor, s = se))
	}

	pool_means <- function(n, m, s)
	{
	w <- n / sum(n)
	mp <- sum(m * w)
	sp <- sqrt(sum((n - 1) * s^2) / sum(n - 1))
	return(list(m = mp, s = sp))
	}

	eff_se_2_or_ci <- function(eff, se)
	{
	or <- exp(eff)
	cid <- se * 2 * 1.96
	ci <- paste(formatC((or - cid/2), digits=3, width=0, format="f"), " - ", formatC((or + cid/2), digits=3, width=0, format="f"), sep="")
	return(list(or = or, ci = ci))
	}

	mr_table <- function(b, se, pval, exposure, outcome, note)
	{
	note <- as.data.frame(note)
	d <- data.frame(note, exposure, outcome, paste(formatC(b, digits=3), " (", ifelse(is.numeric(se), formatC(se, digits=3), se), ")", sep=""), pval)
	names(d) <- c(rep("-", ncol(note)), "Exposure", "Outcome", "IV estimate", "P-value")
	return(d)
	}


	# exposed group
	# bad
	a = 10 * 0 + 85 * 0.047
	# good
	b = 10 * 1 + 85 * (1-0.047)

	# control group
	# bad
	c = 8554 * 0.091 + 2301 * 0.068
	# good
	d = 8554 * (1-0.091) + 2301 * (1-0.068)

	b_out <- percent_to_lor(a, b, c, d)

	a1 <- pool_means(c(8554, 2301), c(238.5, 199.5), c(60.7, 54.5))
	a2 <- pool_means(c(10, 85), c(84.6, 113.2), c(49.4, 32.8))
	m1 <- a1$m
	m2 <- a2$m
	s1 <- a1$s
	s2 <- a2$s
	n1 <- 8554 + 2301
	n2 <- 10 + 85

	b_exp <- cohens_d(m1, m2, s1, s2, n1, n2)

	tsiv <- two_sample_iv(b_exp$eff, b_out$eff, b_exp$s, b_out$s, n=n1+n2)
	eff_se_2_or_ci(tsiv$b, tsiv$se)

	eff_se_2_or_ci(b_out$eff, b_out$s)



	b_exp_ae <- cohens_d(8554, 10, 238.5, 84.6, 60.7, 49.4)
	b_exp_aa <- cohens_d(2301, 85, 199.5, 113.2, 54.5, 32.8)

	b_out_ae <- percent_to_lor(1, 11, 8555 * 0.091, 8555 * 0.909)
	b_out_aa <- percent_to_lor(85 * 0.047, 85 * (1-0.047), 2301 * 0.068, 2301 * (1-0.068))


	tsiv_ae <- two_sample_iv(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1)
	tsiv_aa <- two_sample_iv(b_exp_aa$eff, b_out_aa$eff, b_exp_aa$s, b_out_aa$s, n=n2)

	tsiv_ae <- two_sample_iv_delta(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1, cov=0)

	eff_se_2_or_ci(tsiv_aa$b, tsiv_aa$se)
	eff_se_2_or_ci(tsiv_ae$b, tsiv_ae$se)

	p1_ncar <- rnorm(8554, 238.5, 60.7)
	p1_car <- rnorm(10, 84.6, 49.4)
	p2_ncar <- rnorm(2301, 199.5, 54.5)
	p2_car <- rnorm(85, 113.2, 32.8)

	chd1_ncar <- sample(c(0, 1), 8554, replace=TRUE, prob=c(90.9, 9.1))
	chd1_car <- sample(c(0, 1), 10, replace=T, prob=c(1, 0))

	chd2_ncar <- sample(c(0, 1), 2301, replace=TRUE, prob=c(100-6.8, 6.8))
	chd2_car <- sample(c(0, 1), 85, replace=T, prob=c(100-4.7, 4.7))

	dat <- data.frame(exposure = c(p1_ncar, p2_ncar, p1_car, p2_car), geno = rep(c("ncar", "car"), c(8554+2301, 10 + 85)), outcome = c(chd1_ncar, chd2_ncar, chd1_car, chd2_car), popn = rep(c("ae", "aa", "ae", "aa"), c(8554, 2301, 10, 85)))

	library(systemfit)
	m1 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="ae"))))
	m2 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="aa"))))
	m3 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat))))
	m4 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(dat))))

	library(plyr)
	d2 <- list()
	for(i in 1:50)
	d2[[i]] <- dat
	d2 <- rbind.fill(d2)

	summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="ae")))
	summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="aa")))
	summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2)))
	summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(d2)))

	r3 <- eff_se_2_or_ci(tsiv$b, tsiv$se)
	r4 <- eff_se_2_or_ci(m4[2,1], m4[2,2])

	tab <- rbind(
	mr_table(r3$or, r3$ci, tsiv$pval, "LAPLA2 (nmol/min/ml)", "CHD (OR)", list("Two sample MR", "No adjustment", n1+n2)),
	mr_table(r4$or, r4$ci, m4[2,4], "", "", list("Simulation", "Adjusted for population", n1+n2))
	)

	write.csv(tab, file="~/Desktop/lapla2.csv")