Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created May 8, 2015 18:10
Show Gist options
  • Save explodecomputer/ed197dab8884f4905335 to your computer and use it in GitHub Desktop.
Save explodecomputer/ed197dab8884f4905335 to your computer and use it in GitHub Desktop.
Two sample IV of LAPLA2 and CHD
two_sample_iv <- function(b_exp, b_out, se_exp, se_out, n=10000)
{
b <- sum(b_exp*b_out / se_out^2) / sum(b_exp^2/se_out^2)
se <- sqrt(1 / sum(b_exp^2/se_out^2))
pval <- pt(b / se, df = n, low=FALSE)
return(list(b=b, se=se, pval=pval))
}
two_sample_iv_delta <- function(b_exp, b_out, se_exp, se_out, n=10000, cov=1)
{
b <- b_out / b_exp
se <- sqrt((se_out^2/b_exp^2) + (b_out^2/b_exp^4)*se_exp^2 - 2*(b_out/b_out^3)*cov)
pval <- pt(b / se, df = n, low=FALSE)
return(list(b=b, se=se, pval=pval))
}
two_sample_iv_ml <- function(b_exp, b_out, se_exp, se_out, n=10000)
{
loglikelihood <- function(param) {
return(1/2*sum((b_exp-param[1:length(b_exp)])^2/se_exp^2)+1/2*sum((b_out-param[length(b_exp)+1]*param[1:length(b_exp)])^2/se_out^2))
}
opt <- optim(
c(b_exp, sum(b_exp*b_out/se_out^2)/sum(b_exp^2/se_out^2)),
loglikelihood,
hessian=TRUE,
control = list(maxit=25000))
b <- opt$par[length(b_exp)+1]
se <- sqrt(solve(opt$hessian)[length(b_exp)+1,length(b_exp)+1])
pval <- pt(b / se, df = n, low=FALSE)
return(list(b=b, se=se, pval=pval))
}
cohens_d <- function(m1, m2, s1, s2, n1, n2)
{
spooled <- sqrt((s1^2 + s2^2)/2)
d <- (m1 - m2) / spooled
s <- sqrt(((n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2 - 2))) * ((n1 + n2) / (n1 + n2 - 2)))
return(list(eff = d, s = s))
}
percent_to_lor <- function(a, b, c, d)
{
lor <- log((a * d) / (b * c))
se <- sqrt(1/a + 1/b + 1/c + 1/d)
return(list(eff = lor, s = se))
}
pool_means <- function(n, m, s)
{
w <- n / sum(n)
mp <- sum(m * w)
sp <- sqrt(sum((n - 1) * s^2) / sum(n - 1))
return(list(m = mp, s = sp))
}
eff_se_2_or_ci <- function(eff, se)
{
or <- exp(eff)
cid <- se * 2 * 1.96
ci <- paste(formatC((or - cid/2), digits=3, width=0, format="f"), " - ", formatC((or + cid/2), digits=3, width=0, format="f"), sep="")
return(list(or = or, ci = ci))
}
mr_table <- function(b, se, pval, exposure, outcome, note)
{
note <- as.data.frame(note)
d <- data.frame(note, exposure, outcome, paste(formatC(b, digits=3), " (", ifelse(is.numeric(se), formatC(se, digits=3), se), ")", sep=""), pval)
names(d) <- c(rep("-", ncol(note)), "Exposure", "Outcome", "IV estimate", "P-value")
return(d)
}
# exposed group
# bad
a = 10 * 0 + 85 * 0.047
# good
b = 10 * 1 + 85 * (1-0.047)
# control group
# bad
c = 8554 * 0.091 + 2301 * 0.068
# good
d = 8554 * (1-0.091) + 2301 * (1-0.068)
b_out <- percent_to_lor(a, b, c, d)
a1 <- pool_means(c(8554, 2301), c(238.5, 199.5), c(60.7, 54.5))
a2 <- pool_means(c(10, 85), c(84.6, 113.2), c(49.4, 32.8))
m1 <- a1$m
m2 <- a2$m
s1 <- a1$s
s2 <- a2$s
n1 <- 8554 + 2301
n2 <- 10 + 85
b_exp <- cohens_d(m1, m2, s1, s2, n1, n2)
tsiv <- two_sample_iv(b_exp$eff, b_out$eff, b_exp$s, b_out$s, n=n1+n2)
eff_se_2_or_ci(tsiv$b, tsiv$se)
eff_se_2_or_ci(b_out$eff, b_out$s)
b_exp_ae <- cohens_d(8554, 10, 238.5, 84.6, 60.7, 49.4)
b_exp_aa <- cohens_d(2301, 85, 199.5, 113.2, 54.5, 32.8)
b_out_ae <- percent_to_lor(1, 11, 8555 * 0.091, 8555 * 0.909)
b_out_aa <- percent_to_lor(85 * 0.047, 85 * (1-0.047), 2301 * 0.068, 2301 * (1-0.068))
tsiv_ae <- two_sample_iv(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1)
tsiv_aa <- two_sample_iv(b_exp_aa$eff, b_out_aa$eff, b_exp_aa$s, b_out_aa$s, n=n2)
tsiv_ae <- two_sample_iv_delta(b_exp_ae$eff, b_out_ae$eff, b_exp_ae$s, b_out_ae$s, n=n1, cov=0)
eff_se_2_or_ci(tsiv_aa$b, tsiv_aa$se)
eff_se_2_or_ci(tsiv_ae$b, tsiv_ae$se)
p1_ncar <- rnorm(8554, 238.5, 60.7)
p1_car <- rnorm(10, 84.6, 49.4)
p2_ncar <- rnorm(2301, 199.5, 54.5)
p2_car <- rnorm(85, 113.2, 32.8)
chd1_ncar <- sample(c(0, 1), 8554, replace=TRUE, prob=c(90.9, 9.1))
chd1_car <- sample(c(0, 1), 10, replace=T, prob=c(1, 0))
chd2_ncar <- sample(c(0, 1), 2301, replace=TRUE, prob=c(100-6.8, 6.8))
chd2_car <- sample(c(0, 1), 85, replace=T, prob=c(100-4.7, 4.7))
dat <- data.frame(exposure = c(p1_ncar, p2_ncar, p1_car, p2_car), geno = rep(c("ncar", "car"), c(8554+2301, 10 + 85)), outcome = c(chd1_ncar, chd2_ncar, chd1_car, chd2_car), popn = rep(c("ae", "aa", "ae", "aa"), c(8554, 2301, 10, 85)))
library(systemfit)
m1 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="ae"))))
m2 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat, popn=="aa"))))
m3 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(dat))))
m4 <- coefficients(summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(dat))))
library(plyr)
d2 <- list()
for(i in 1:50)
d2[[i]] <- dat
d2 <- rbind.fill(d2)
summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="ae")))
summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2, popn=="aa")))
summary(systemfit(as.factor(outcome) ~ exposure, inst = ~ geno, method="2SLS", data=subset(d2)))
summary(systemfit(as.factor(outcome) ~ exposure + popn, inst = ~ geno, method="2SLS", data=subset(d2)))
r3 <- eff_se_2_or_ci(tsiv$b, tsiv$se)
r4 <- eff_se_2_or_ci(m4[2,1], m4[2,2])
tab <- rbind(
mr_table(r3$or, r3$ci, tsiv$pval, "LAPLA2 (nmol/min/ml)", "CHD (OR)", list("Two sample MR", "No adjustment", n1+n2)),
mr_table(r4$or, r4$ci, m4[2,4], "", "", list("Simulation", "Adjusted for population", n1+n2))
)
write.csv(tab, file="~/Desktop/lapla2.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment