Created
June 29, 2017 18:40
-
-
Save explodecomputer/6069a794dbc6bc7581403fd8fea1a76d to your computer and use it in GitHub Desktop.
r2pred
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Get the GLGC data | |
system("wget http://csg.sph.umich.edu/abecasis/public/lipids2013/jointGwasMc_HDL.txt.gz") | |
# Get 1000 genomes frequencies | |
system("plink1.90 --bfile /panfs/panasas01/shared/alspac/studies/latest/alspac/genetic/variants/arrays/gwas/imputed/1000genomes/released/2015-10-30/data/derived/filtered/bestguess/maf0.01_info0.8/data_chr01 --freq --out chr1") | |
# Predict effect sizes and standard errors | |
library(data.table) | |
hdlz <- fread("HDL_Z.txt") | |
hdl <- fread("zcat jointGwasMc_HDL.txt.gz") | |
names(hdlz)[1] <- "SNP" | |
# Sample size per SNP is tricky - not clear what it should be for the imputed variants? | |
# For the sake of comparison just use an approx value for all | |
hdlz$rsq <- hdlz$`Z-score`^2 / (100000 - hdlz$`Z-score`^2) | |
# Merge with allele frequencies | |
fr <- fread("chr1.frq") | |
hdlz <- merge(hdlz, fr, by="SNP") | |
# Estimate effect size and se | |
hdlz$b_pred <- sqrt(hdlz$rsq / (2 * hdlz$MAF * (1 - hdlz$MAF))) | |
hdlz$se_pred <- hdlz$b_pred / hdlz$`Z-score` | |
# Compare to true beta and se | |
hdlz <- merge(hdlz, hdl, by.x="SNP", by.y="rsid") | |
cor(hdlz$b_pred, hdlz$beta) | |
cor(hdlz$se_pred, hdlz$se, use="pair") | |
png("beta_comp.png") | |
plot(b_pred ~ beta, hdlz) | |
dev.off() | |
png("se_comp.png") | |
plot(se_pred ~ se, hdlz) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment