Created
February 10, 2017 18:15
-
-
Save explodecomputer/e18d5b40c6ee03ba80a48183c2637fc5 to your computer and use it in GitHub Desktop.
ld score regression simplistic simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
# Generate some LD blocks for 1000000 SNPs | |
# Some blocks have more SNPs than others | |
ld <- sample(1:1000, 1000000, replace=TRUE, prob=runif(1000)) | |
range(table(ld)) | |
# Generate some xsq stats for each SNP | |
xsq <- rchisq(1000000, df=1) | |
# How many SNPs per LX block? | |
# This is the LD score | |
counts <- as.data.frame(table(ld)) | |
dat <- data.frame(ld=ld, xsq=xsq) | |
dat <- merge(dat, counts, by="ld") | |
# Generate the "estimated" xsq - that is the combined xsq of all SNPs | |
dat <- group_by(dat, ld) %>% | |
mutate(xsqsum = sum(xsq)) | |
# The estimate of all SNPs is the same as an estimate of a random set of SNPs | |
summary(lm(xsqsum ~ Freq, dat)) | |
summary(lm(xsqsum ~ Freq, dat[sample(1:1000000, 100),])) | |
# Choose the best SNP from each LD block | |
dattop <- group_by(dat, ld) %>% | |
dplyr::summarise(Freq=first(Freq), xsqsum = max(xsq)) | |
# much lower | |
summary(lm(xsqsum ~ Freq, dattop)) | |
# Choose the worst SNP from each LD block | |
datbottom <- group_by(dat, ld) %>% | |
dplyr::summarise(Freq=first(Freq), xsqsum = min(xsq)) | |
# Lower still | |
summary(lm(xsqsum ~ Freq, datbottom)) | |
# Choose a random SNP from each LD block | |
datrand <- group_by(dat, ld) %>% | |
dplyr::summarise(Freq=first(Freq), xsqsum = first(xsq)) | |
# between top and bottom | |
summary(lm(xsqsum ~ Freq, datrand)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment