Created
November 17, 2016 21:14
-
-
Save explodecomputer/f8b392bf192267c0658bd20ea5611be2 to your computer and use it in GitHub Desktop.
risk, liability, heritability, prevalence
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(pROC) | |
library(ggplot2) | |
library(dplyr) | |
library(tidyr) | |
make_geno <- function(nid, nsnp, maf) | |
{ | |
return(matrix(rbinom(nid * nsnp, 2, maf), nid, nsnp)) | |
} | |
range01 <- function(x) | |
{ | |
(x-min(x))/(max(x)-min(x)) | |
} | |
gx_to_gp <- function(gx, h2x, prev) | |
{ | |
x_prime <- qnorm(prev, 0, 1) | |
p <- pnorm(x_prime, mean=gx, sd = sqrt(1 - h2x), lower.tail=FALSE) | |
return(p) | |
} | |
cross_plot <- function(x, o, xlab="Values (low to high)", ylab="", title="", xname="GRS", oname="Disease") | |
{ | |
d <- data.frame( | |
value = c(range01(o), range01(x)), | |
key = c(rep(oname, length(o)), rep(xname, length(x))), | |
gr = rep(1:length(x), times=2) | |
) | |
d$key <- factor(d$key, levels=c(xname, oname)) | |
ggplot(d, aes(x=value, y=key)) + | |
geom_line(aes(group=gr), alpha=0.1) + | |
geom_point(aes(colour=key)) + | |
labs(x=xlab,y=ylab,title=title) + | |
scale_colour_discrete(guide=FALSE) + | |
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) | |
} | |
# G should scaled | |
# var(eff) is the h2x | |
simulate <- function(G, eff, prevalence, prop_discovered) | |
{ | |
nid <- nrow(G) | |
nsnp <- ncol(G) | |
h2x <- var(eff) | |
gx_true <- as.numeric(scale(G %*% eff)) * sqrt(h2x) | |
prob_disease <- gx_to_gp(gx_true, h2x, 1-prevalence) | |
disease <- rbinom(nid, 1, prob_disease) | |
eff_pred <- eff | |
eff_pred[sample(1:nsnp, nsnp * (1-prop_discovered))] <- 0 | |
gx_pred <- as.numeric(G %*% eff_pred / sqrt(nsnp)) | |
dat <- data.frame(gx_true=gx_true, gx_pred=gx_pred, prob_disease=prob_disease, disease=disease) | |
return(dat) | |
} | |
# h2l <- h2o * k^2 * (1-k)^2 / (p * (1 - p) * dnorm(threshold)^2) | |
# Calculating disease risk on observed scale | |
# Requires some | |
# Check that gx to go works | |
d <- expand.grid( | |
g = seq(-4,4,by=0.1), | |
h2x = c(0.35, 0.5, 1), | |
prev = c(0.004, 0.2, 0.5) | |
) | |
d$gr <- 1:nrow(d) | |
d$x_prime <- qnorm(d$prev, 0, 1) | |
d$e2x <- 1 - d$h2x | |
d$z <- dnorm(d$x_prime, mean=d$g, sd = sqrt(d$e2x)) | |
d$p <- pnorm(d$x_prime, mean=d$g, sd = sqrt(d$e2x), lower.tail=FALSE) | |
d$p1 <- gx_to_gp(d$g, d$h2x, d$prev) | |
d <- group_by(d, h2x, prev) %>% | |
mutate(p = range01(p), g = range01(g)) | |
d1 <- gather(d, key, value, g, p) | |
ggplot(d1, aes(x=value, y=key)) + | |
geom_point(aes(colour=key)) + | |
geom_line(aes(group=gr)) + | |
facet_grid(h2x ~ prev) | |
# Model 1 | |
# Common variant-common disease | |
nid <- 1000 | |
nsnp <- 1000 | |
h2x <- 0.3 | |
prev <- 0.5 | |
G_cdcv <- scale(make_geno(nid, nsnp, 0.5)) | |
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x)) | |
dat_cdcv <- simulate( | |
G=G_cdcv, | |
eff=eff_cdcv, | |
prevalence=prev, | |
prop_discovered=0.1 | |
) | |
plot(roc(disease ~ gx_true, dat_cdcv)) | |
table(dat_cdcv$disease) | |
var(dat_cdcv$gx_true) | |
plot(roc(disease ~ gx_true, dat_cdcv)) | |
plot(roc(disease ~ gx_pred, dat_cdcv)) | |
cross_plot(o=dat_cdcv$prob_disease, x=dat_cdcv$gx_true) | |
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_true, title="Genetic values mapped to disease") | |
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease") | |
nid <- 100 | |
nsnp <- 1000 | |
h2x <- 0.8 | |
prev <- 0.5 | |
G_cdcv <- scale(make_geno(nid, nsnp, 0.5)) | |
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x)) | |
dat_cdcv <- simulate( | |
G=G_cdcv, | |
eff=eff_cdcv, | |
prevalence=prev, | |
prop_discovered=0.1 | |
) | |
pdf(file="roc_0.8_0.5.pdf") | |
plot(roc(disease ~ gx_true, dat_cdcv)) | |
dev.off() | |
pdf(file="roc_0.8_0.5_0.1.pdf") | |
plot(roc(disease ~ gx_pred, dat_cdcv)) | |
dev.off() | |
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_true, title="Genetic values mapped to disease") | |
ggsave(file="cp_0.8_0.5.pdf", width=8,height=3) | |
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease using only 10% of causal variants") | |
ggsave(file="cp_0.8_0.5_0.1.pdf", width=8,height=3) | |
nid <- 1000 | |
nsnp <- 1000 | |
h2x <- 0.3 | |
prev <- 0.5 | |
G_cdcv <- scale(make_geno(nid, nsnp, 0.5)) | |
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x)) | |
dat_cdcv <- simulate( | |
G=G_cdcv, | |
eff=eff_cdcv, | |
prevalence=prev, | |
prop_discovered=0.1 | |
) | |
pdf(file="roc_0.3_0.5.pdf") | |
plot(roc(disease ~ gx_true, dat_cdcv)) | |
dev.off() | |
pdf(file="roc_0.3_0.5_0.1.pdf") | |
plot(roc(disease ~ gx_pred, dat_cdcv)) | |
dev.off() | |
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease using only 10% of causal variants") | |
nid <- 10000 | |
nsnp <- 1000 | |
h2x <- 0.3 | |
prev <- 0.01 | |
G_cdcv <- scale(make_geno(nid, nsnp, 0.5)) | |
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x)) | |
dat_cdcv <- simulate( | |
G=G_cdcv, | |
eff=eff_cdcv, | |
prevalence=prev, | |
prop_discovered=0.1 | |
) | |
pdf(file="roc_0.3_0.01.pdf") | |
plot(roc(disease ~ gx_true, dat_cdcv)) | |
dev.off() | |
# Model 2 | |
# Every case has a specific mutation | |
# Rare variant-common disease | |
nid <- 1000 | |
nsnp <- 1000 | |
h2x <- 0.3 | |
prev <- 0.5 | |
G_cdrv <- diag(nid) | |
diag(G_cdrv)[1:(nid/2)] <- 0 | |
eff_cdrv <- scale(rnorm(nid, diag(G_cdrv), 0.001)) * sqrt(h2x) | |
dat_cdrv <- simulate( | |
G=G_cdrv, | |
eff=eff_cdrv, | |
prevalence=prev, | |
prop_discovered=0.1 | |
) | |
table(dat_cdrv$disease) | |
var(dat_cdrv$gx_true) | |
pdf("roc_0.3_0.5_rare.pdf") | |
plot(roc(disease ~ gx_true, dat_cdrv)) | |
dev.off() | |
pdf("roc_0.3_0.5_0.1_rare.pdf") | |
plot(roc(disease ~ gx_pred, dat_cdrv)) | |
dev.off() | |
cross_plot(o=dat_cdrv$prob_disease, x=dat_cdrv$gx_true) | |
cross_plot(o=dat_cdrv$disease, x=dat_cdrv$gx_true, title="Genetic values mapped to disease") | |
cross_plot(o=dat_cdrv$disease, x=dat_cdrv$gx_pred, title="Genetic predictor of disease") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment