Created
April 4, 2018 10:36
-
-
Save cth/3c994ea94fb19103f9456321f10dfb27 to your computer and use it in GitHub Desktop.
Genetic Risk Score power simulator with support for imputed genotypes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Genetic Risk Score power simulator with support for imputed genotypes. | |
# The motivating idea was to check whether inclusion of (badly) imputed SNPs still improves power. | |
# Christian Theil Have, 2018. | |
# Simulated annealing to generate permutation of genotypes that minimizes fn | |
genetic.optim <- function(genotypes,fn) { | |
randomswap <- function(genotypes) { | |
i <- sample(length(genotypes),1) | |
j <- sample(which(genotypes!=genotypes[i]),1) | |
tmp <- genotypes[i] | |
genotypes[i] <- genotypes[j] | |
genotypes[j] <- tmp | |
genotypes | |
} | |
current.score <- fn(genotypes) | |
max.iter=length(genotypes)*2 | |
iter <- 0 | |
repeat { | |
permuted <- randomswap(genotypes) | |
permuted.score <- fn(permuted) | |
if(permuted.score < current.score) { | |
current.score <- permuted.score | |
genotypes <- permuted | |
} | |
iter <- iter + 1 | |
if (iter >= max.iter) | |
break | |
} | |
genotypes | |
} | |
# pairs diploid-alleles assorted according hardy-weinberg equibrilium principles iff alleles are in random order | |
alleles2genotypes <- function(alleles) | |
alleles[seq(1,length(alleles)-1,2)] + alleles[seq(2,length(alleles),2)] | |
sample.genotypes.normal <- function(phenotypes, beta, n, maf) { | |
num.variant.alleles <- round(maf*2*n) | |
allele.pool <- sample(c(rep(T,num.variant.alleles), rep(F, (2*n)-num.variant.alleles)), 2*n) | |
genotypes <- alleles2genotypes(allele.pool) | |
optim.func <- function(x) abs(beta-lm(phenotypes~x)$coefficients[2]) | |
genetic.optim(genotypes, optim.func) | |
} | |
scale.to.dosages <- function(unscaled) { | |
minima <- min(unscaled) | |
maxima <- max(unscaled) | |
2 * (unscaled + abs(minima)) / (maxima+abs(minima) - (minima+abs(minima))) | |
} | |
# Sample dosages from genotypes using fast binary search for r-squared | |
# It will result in approximately correct r-squared, but have a tendency to "look" to nice.. | |
sample.dosages <- function(genotypes,info,epsilon=0.01) { | |
jitter.amount = 0 | |
dosages <- genotypes | |
repeat { | |
rsq <- summary(lm(genotypes ~ dosages))$r.squared | |
diff <- abs(rsq - info) | |
if(diff < epsilon) | |
break | |
else if (rsq > info) | |
jitter.amount <- jitter.amount + (diff / 2) | |
else | |
jitter.amount <- jitter.amount - (diff / 2) | |
dosages <- scale.to.dosages(jitter(genotypes,amount=jitter.amount)) | |
} | |
list(genotypes = genotypes, dosages = dosages,r.squared = rsq) | |
} | |
################################################################################################ | |
# pwr.grs calculates the power to detect of a GRS to detect a signal at given significance level | |
# using a GRS weights in a certain sample size and with given imputatation quality. | |
# Note: It assumes that the weights used are accurate estimates of SNP effects. | |
# | |
# ARGUMENTS: | |
# snps: A dataframe with the following columns: | |
# - SNP: Unique name of SNP | |
# - Weight: a weight associated with the _minor_ allele of SNP. Weights should be normalized | |
# relative to a standard normal distribution (mean 0, variance 1). | |
# - EAF: Effect allele frequency | |
# - INFO: Estimated R-squared correlation of imputation with actual genotype. | |
# For imputed genotypes this will be [0-1] and for directly genotyped SNPs it should be 1. | |
# n: The number of individuals to include in the GRS | |
# max.iter: The number of iterations to use for the power calculation. This affects the precision | |
# of the calculated power. Should be at least 100. | |
################################################################################################ | |
pwr.grs <- function(snps, n, alpha = 0.05, max.iter=100) { | |
phenotypes <- rnorm(n) | |
dosages <- list() | |
grs <- rep(0,n) | |
pvalues <- sapply(1:max.iter, function(i) { | |
for(snp_idx in 1:nrow(snps)) { | |
weight <- snps[snp_idx,]$Weight | |
genotypes <- sample.genotypes.normal(phenotypes,weight,n, snps[snp_idx,]$EAF) | |
dosages <- sample.dosages(genotypes, snps[snp_idx,]$INFO)$dosages | |
grs <- grs + dosages*weight | |
} | |
summary(lm(phenotypes~grs))$coefficients[2,4] | |
}) | |
# Power is the fraction of succesfull tests | |
sum(pvalues<alpha) / max.iter | |
} | |
example.grs.data <- data.frame( | |
SNP = c("rs1", "rs2", "rs3"), | |
Weight = c(0.01, 0.01, 0.2), | |
EAF = c(0.42, 0.42, 0.42), | |
INFO= c(0.9, 0.9, 0.1)) | |
pwr.grs(example.grs.data,n=100, alpha = 0.05) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment