Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created February 10, 2017 18:15
Show Gist options
  • Save explodecomputer/e18d5b40c6ee03ba80a48183c2637fc5 to your computer and use it in GitHub Desktop.
Save explodecomputer/e18d5b40c6ee03ba80a48183c2637fc5 to your computer and use it in GitHub Desktop.
ld score regression simplistic simulation
library(dplyr)
# Generate some LD blocks for 1000000 SNPs
# Some blocks have more SNPs than others
ld <- sample(1:1000, 1000000, replace=TRUE, prob=runif(1000))
range(table(ld))
# Generate some xsq stats for each SNP
xsq <- rchisq(1000000, df=1)
# How many SNPs per LX block?
# This is the LD score
counts <- as.data.frame(table(ld))
dat <- data.frame(ld=ld, xsq=xsq)
dat <- merge(dat, counts, by="ld")
# Generate the "estimated" xsq - that is the combined xsq of all SNPs
dat <- group_by(dat, ld) %>%
mutate(xsqsum = sum(xsq))
# The estimate of all SNPs is the same as an estimate of a random set of SNPs
summary(lm(xsqsum ~ Freq, dat))
summary(lm(xsqsum ~ Freq, dat[sample(1:1000000, 100),]))
# Choose the best SNP from each LD block
dattop <- group_by(dat, ld) %>%
dplyr::summarise(Freq=first(Freq), xsqsum = max(xsq))
# much lower
summary(lm(xsqsum ~ Freq, dattop))
# Choose the worst SNP from each LD block
datbottom <- group_by(dat, ld) %>%
dplyr::summarise(Freq=first(Freq), xsqsum = min(xsq))
# Lower still
summary(lm(xsqsum ~ Freq, datbottom))
# Choose a random SNP from each LD block
datrand <- group_by(dat, ld) %>%
dplyr::summarise(Freq=first(Freq), xsqsum = first(xsq))
# between top and bottom
summary(lm(xsqsum ~ Freq, datrand))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment