Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created November 17, 2016 21:14
Show Gist options
  • Save explodecomputer/f8b392bf192267c0658bd20ea5611be2 to your computer and use it in GitHub Desktop.
Save explodecomputer/f8b392bf192267c0658bd20ea5611be2 to your computer and use it in GitHub Desktop.
risk, liability, heritability, prevalence
library(pROC)
library(ggplot2)
library(dplyr)
library(tidyr)
make_geno <- function(nid, nsnp, maf)
{
return(matrix(rbinom(nid * nsnp, 2, maf), nid, nsnp))
}
range01 <- function(x)
{
(x-min(x))/(max(x)-min(x))
}
gx_to_gp <- function(gx, h2x, prev)
{
x_prime <- qnorm(prev, 0, 1)
p <- pnorm(x_prime, mean=gx, sd = sqrt(1 - h2x), lower.tail=FALSE)
return(p)
}
cross_plot <- function(x, o, xlab="Values (low to high)", ylab="", title="", xname="GRS", oname="Disease")
{
d <- data.frame(
value = c(range01(o), range01(x)),
key = c(rep(oname, length(o)), rep(xname, length(x))),
gr = rep(1:length(x), times=2)
)
d$key <- factor(d$key, levels=c(xname, oname))
ggplot(d, aes(x=value, y=key)) +
geom_line(aes(group=gr), alpha=0.1) +
geom_point(aes(colour=key)) +
labs(x=xlab,y=ylab,title=title) +
scale_colour_discrete(guide=FALSE) +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank())
}
# G should scaled
# var(eff) is the h2x
simulate <- function(G, eff, prevalence, prop_discovered)
{
nid <- nrow(G)
nsnp <- ncol(G)
h2x <- var(eff)
gx_true <- as.numeric(scale(G %*% eff)) * sqrt(h2x)
prob_disease <- gx_to_gp(gx_true, h2x, 1-prevalence)
disease <- rbinom(nid, 1, prob_disease)
eff_pred <- eff
eff_pred[sample(1:nsnp, nsnp * (1-prop_discovered))] <- 0
gx_pred <- as.numeric(G %*% eff_pred / sqrt(nsnp))
dat <- data.frame(gx_true=gx_true, gx_pred=gx_pred, prob_disease=prob_disease, disease=disease)
return(dat)
}
# h2l <- h2o * k^2 * (1-k)^2 / (p * (1 - p) * dnorm(threshold)^2)
# Calculating disease risk on observed scale
# Requires some
# Check that gx to go works
d <- expand.grid(
g = seq(-4,4,by=0.1),
h2x = c(0.35, 0.5, 1),
prev = c(0.004, 0.2, 0.5)
)
d$gr <- 1:nrow(d)
d$x_prime <- qnorm(d$prev, 0, 1)
d$e2x <- 1 - d$h2x
d$z <- dnorm(d$x_prime, mean=d$g, sd = sqrt(d$e2x))
d$p <- pnorm(d$x_prime, mean=d$g, sd = sqrt(d$e2x), lower.tail=FALSE)
d$p1 <- gx_to_gp(d$g, d$h2x, d$prev)
d <- group_by(d, h2x, prev) %>%
mutate(p = range01(p), g = range01(g))
d1 <- gather(d, key, value, g, p)
ggplot(d1, aes(x=value, y=key)) +
geom_point(aes(colour=key)) +
geom_line(aes(group=gr)) +
facet_grid(h2x ~ prev)
# Model 1
# Common variant-common disease
nid <- 1000
nsnp <- 1000
h2x <- 0.3
prev <- 0.5
G_cdcv <- scale(make_geno(nid, nsnp, 0.5))
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x))
dat_cdcv <- simulate(
G=G_cdcv,
eff=eff_cdcv,
prevalence=prev,
prop_discovered=0.1
)
plot(roc(disease ~ gx_true, dat_cdcv))
table(dat_cdcv$disease)
var(dat_cdcv$gx_true)
plot(roc(disease ~ gx_true, dat_cdcv))
plot(roc(disease ~ gx_pred, dat_cdcv))
cross_plot(o=dat_cdcv$prob_disease, x=dat_cdcv$gx_true)
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_true, title="Genetic values mapped to disease")
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease")
nid <- 100
nsnp <- 1000
h2x <- 0.8
prev <- 0.5
G_cdcv <- scale(make_geno(nid, nsnp, 0.5))
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x))
dat_cdcv <- simulate(
G=G_cdcv,
eff=eff_cdcv,
prevalence=prev,
prop_discovered=0.1
)
pdf(file="roc_0.8_0.5.pdf")
plot(roc(disease ~ gx_true, dat_cdcv))
dev.off()
pdf(file="roc_0.8_0.5_0.1.pdf")
plot(roc(disease ~ gx_pred, dat_cdcv))
dev.off()
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_true, title="Genetic values mapped to disease")
ggsave(file="cp_0.8_0.5.pdf", width=8,height=3)
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease using only 10% of causal variants")
ggsave(file="cp_0.8_0.5_0.1.pdf", width=8,height=3)
nid <- 1000
nsnp <- 1000
h2x <- 0.3
prev <- 0.5
G_cdcv <- scale(make_geno(nid, nsnp, 0.5))
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x))
dat_cdcv <- simulate(
G=G_cdcv,
eff=eff_cdcv,
prevalence=prev,
prop_discovered=0.1
)
pdf(file="roc_0.3_0.5.pdf")
plot(roc(disease ~ gx_true, dat_cdcv))
dev.off()
pdf(file="roc_0.3_0.5_0.1.pdf")
plot(roc(disease ~ gx_pred, dat_cdcv))
dev.off()
cross_plot(o=dat_cdcv$disease, x=dat_cdcv$gx_pred, title="Genetic predictor of disease using only 10% of causal variants")
nid <- 10000
nsnp <- 1000
h2x <- 0.3
prev <- 0.01
G_cdcv <- scale(make_geno(nid, nsnp, 0.5))
eff_cdcv <- rnorm(nsnp, sd=sqrt(h2x))
dat_cdcv <- simulate(
G=G_cdcv,
eff=eff_cdcv,
prevalence=prev,
prop_discovered=0.1
)
pdf(file="roc_0.3_0.01.pdf")
plot(roc(disease ~ gx_true, dat_cdcv))
dev.off()
# Model 2
# Every case has a specific mutation
# Rare variant-common disease
nid <- 1000
nsnp <- 1000
h2x <- 0.3
prev <- 0.5
G_cdrv <- diag(nid)
diag(G_cdrv)[1:(nid/2)] <- 0
eff_cdrv <- scale(rnorm(nid, diag(G_cdrv), 0.001)) * sqrt(h2x)
dat_cdrv <- simulate(
G=G_cdrv,
eff=eff_cdrv,
prevalence=prev,
prop_discovered=0.1
)
table(dat_cdrv$disease)
var(dat_cdrv$gx_true)
pdf("roc_0.3_0.5_rare.pdf")
plot(roc(disease ~ gx_true, dat_cdrv))
dev.off()
pdf("roc_0.3_0.5_0.1_rare.pdf")
plot(roc(disease ~ gx_pred, dat_cdrv))
dev.off()
cross_plot(o=dat_cdrv$prob_disease, x=dat_cdrv$gx_true)
cross_plot(o=dat_cdrv$disease, x=dat_cdrv$gx_true, title="Genetic values mapped to disease")
cross_plot(o=dat_cdrv$disease, x=dat_cdrv$gx_pred, title="Genetic predictor of disease")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment