Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Last active December 18, 2020 12:35
Show Gist options
  • Save explodecomputer/2348bc9f00d332834106db504070417c to your computer and use it in GitHub Desktop.
Save explodecomputer/2348bc9f00d332834106db504070417c to your computer and use it in GitHub Desktop.
#===================================#
# GWAS Practical #
# iBSC Genomic Medicine #
# Lavinia Paternoster #
# 08/11/2018 #
#===================================#
# Run the GWAS without PCs but using the cleaned genetic data
cd ~/ibsc_unit2/data
plink \
--bfile geno_qc \
--pheno phen_clean.txt \
--pheno-name CRP \
--covar covs.txt \
--covar-name age,sex \
--linear \
--out ../output/gwas/CRP_nopc
awk 'NR==1 || /ADD/' ../output/gwas/precomputed/CRP_nopc.assoc.linear > ../output/gwas/CRP_nopc.assoc.linear.add
# Run the GWAS with PCs
plink \
--bfile geno_qc \
--pheno phen_clean.txt \
--pheno-name CRP \
--covar covs.txt \
--covar-name PC1-PC10,age,sex \
--linear \
--out ../output/gwas/CRP_allcovs
awk 'NR==1 || /ADD/' ../output/gwas/precomputed/CRP_allcovs.assoc.linear > ../output/gwas/CRP_allcovs.assoc.linear.add
# See which hits are independent using 'clump'
plink \
--bfile geno_qc \
--clump ../output/gwas/precomputed/CRP_allcovs.assoc.linear.add \
--clump-kb 10000 \
--clump-r2 0.001 \
--clump-p1 5e-8 \
--clump-p2 5e-8 \
--out ../output/gwas/CRP_allcovs.assoc.linear.add
# Generate allele freqs for var explained formula
plink \
--bfile geno_qc \
--freq \
--out geno_qc_freq
#=====================#
# Interpreting a GWAS #
# Lavinia Paternoster #
# 08/11/2018 #
#=====================#
install.packages("qqman")
install.packages("GenABEL")
#==================#
#load GWAS results #
#==================#
results <- read.table("~/Documents/ibsc_unit2/output/gwas/precomputed/CRP_nopc.assoc.linear.add", header=T)
#============#
# Make plots #
#============#
#make QQ and manhattan
library(qqman)
png('~/Documents/ibsc_unit2/output/gwas/CRP_man_nopc.png')
manhattan(results, ylim=c(0,20))
dev.off()
png('~/Documents/ibsc_unit2/output/gwas/CRP_qq_nopc.png')
qq(results$P)
dev.off()
#estimate lambda
library(GenABEL)
estlambda(results$P, method="median")
#===========================#
#load adjusted GWAS results #
#===========================#
results <- read.table("~/Documents/ibsc_unit2/output/gwas/precomputed/CRP_allcovs.assoc.linear.add", header=T)
#==============================#
# Make plots for adjusted GWAS #
#==============================#
#make QQ and manhattan
library(qqman)
png('~/Documents/ibsc_unit2/output/gwas/CRP_man_withpc.png')
manhattan(results, ylim=c(0,20))
dev.off()
png('~/Documents/ibsc_unit2/output/gwas/CRP_qq_withpc.png')
qq(results$P)
dev.off()
#estimate lambda
library(GenABEL)
estlambda(results$P, method="median")
#=============================#
# Look at significant results #
#=============================#
#how many significant results are there?
length(results$P[which(results$P<5e-8)])
#what is the top signal?
results[which(results$P<5e-8), ]
# I now want to save this as a table that I can output
table <- results[which(results$P<5e-8), ]
write.table(table, file="~/Documents/ibsc_unit2/output/gwas/CRP_results_table.txt")
#==================#
# Independent hits #
#==================#
# We now want to look at only the clumped results - Run plink --clump command
# load in the clumped results
crp_clumped <- read.table("~/Documents/ibsc_unit2/output/gwas/CRP_allcovs.assoc.linear.add.clumped", header=T)
#subset the results to only include the independent results
clumped_results <- subset(results, SNP %in% crp_clumped$SNP)
#have a look at the new table
clumped_results
#output this table
table2 <- clumped_results[which(clumped_results$P<5e-8), ]
write.table(table2, file="~/Documents/ibsc_unit2/output/gwas/crp_clumped_results_table.txt")
#==================================#
# calculate variance explained #
#==================================#
# You use the formula from the lecture:
# r2 = (2p(1-p)beta^2)/Var(P)
# so we need to find each item (p, beta and Var(P))
# calculate Var(P)
# load in phen_clean & then calculate variance of CRP
phen_clean <- read.table("~/Documents/ibsc_unit2/data/phen_clean.txt", header=T)
var <- var(phen_clean$CRP, na.rm=TRUE) ## This doesn't change between SNPs
# For each SNP you need the beta - already in the clumped_results object
# For each SNP you need p - we will need to generate this in plink and then load in.
# Plink code:
# plink \
# --bfile ~/Documents/ibsc_unit2/data/geno_qc \
# --freq \
# --out ~/Documents/ibsc_unit2/data/geno_qc_freq
# Then load in the freqs
freqs <- read.table("~/Documents/ibsc_unit2/data/geno_qc_freq.frq", header=T)
# get just the freqs in the clumped set
clumped_freqs <- subset(freqs, SNP %in% crp_clumped$SNP)
# now we have everything we need to run the formula
# we'll create a new r2 column in clumped_results
clumped_results$r2 <- (clumped_results$BETA^2*2*clumped_freqs$MAF*(1- clumped_freqs$MAF))/var
clumped_results ###take a look at the new column
# we can add up the r2s to get total r2 explained by all independent significant SNPs
sum(clumped_results$r2)
# you should get 0.025 - the SNPs together explain 2.5% of the variance in CRP
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment