Skip to content

Instantly share code, notes, and snippets.

@cth
Created April 4, 2018 10:36
Show Gist options
  • Save cth/3c994ea94fb19103f9456321f10dfb27 to your computer and use it in GitHub Desktop.
Save cth/3c994ea94fb19103f9456321f10dfb27 to your computer and use it in GitHub Desktop.
Genetic Risk Score power simulator with support for imputed genotypes
## Genetic Risk Score power simulator with support for imputed genotypes.
# The motivating idea was to check whether inclusion of (badly) imputed SNPs still improves power.
# Christian Theil Have, 2018.
# Simulated annealing to generate permutation of genotypes that minimizes fn
genetic.optim <- function(genotypes,fn) {
randomswap <- function(genotypes) {
i <- sample(length(genotypes),1)
j <- sample(which(genotypes!=genotypes[i]),1)
tmp <- genotypes[i]
genotypes[i] <- genotypes[j]
genotypes[j] <- tmp
genotypes
}
current.score <- fn(genotypes)
max.iter=length(genotypes)*2
iter <- 0
repeat {
permuted <- randomswap(genotypes)
permuted.score <- fn(permuted)
if(permuted.score < current.score) {
current.score <- permuted.score
genotypes <- permuted
}
iter <- iter + 1
if (iter >= max.iter)
break
}
genotypes
}
# pairs diploid-alleles assorted according hardy-weinberg equibrilium principles iff alleles are in random order
alleles2genotypes <- function(alleles)
alleles[seq(1,length(alleles)-1,2)] + alleles[seq(2,length(alleles),2)]
sample.genotypes.normal <- function(phenotypes, beta, n, maf) {
num.variant.alleles <- round(maf*2*n)
allele.pool <- sample(c(rep(T,num.variant.alleles), rep(F, (2*n)-num.variant.alleles)), 2*n)
genotypes <- alleles2genotypes(allele.pool)
optim.func <- function(x) abs(beta-lm(phenotypes~x)$coefficients[2])
genetic.optim(genotypes, optim.func)
}
scale.to.dosages <- function(unscaled) {
minima <- min(unscaled)
maxima <- max(unscaled)
2 * (unscaled + abs(minima)) / (maxima+abs(minima) - (minima+abs(minima)))
}
# Sample dosages from genotypes using fast binary search for r-squared
# It will result in approximately correct r-squared, but have a tendency to "look" to nice..
sample.dosages <- function(genotypes,info,epsilon=0.01) {
jitter.amount = 0
dosages <- genotypes
repeat {
rsq <- summary(lm(genotypes ~ dosages))$r.squared
diff <- abs(rsq - info)
if(diff < epsilon)
break
else if (rsq > info)
jitter.amount <- jitter.amount + (diff / 2)
else
jitter.amount <- jitter.amount - (diff / 2)
dosages <- scale.to.dosages(jitter(genotypes,amount=jitter.amount))
}
list(genotypes = genotypes, dosages = dosages,r.squared = rsq)
}
################################################################################################
# pwr.grs calculates the power to detect of a GRS to detect a signal at given significance level
# using a GRS weights in a certain sample size and with given imputatation quality.
# Note: It assumes that the weights used are accurate estimates of SNP effects.
#
# ARGUMENTS:
# snps: A dataframe with the following columns:
# - SNP: Unique name of SNP
# - Weight: a weight associated with the _minor_ allele of SNP. Weights should be normalized
# relative to a standard normal distribution (mean 0, variance 1).
# - EAF: Effect allele frequency
# - INFO: Estimated R-squared correlation of imputation with actual genotype.
# For imputed genotypes this will be [0-1] and for directly genotyped SNPs it should be 1.
# n: The number of individuals to include in the GRS
# max.iter: The number of iterations to use for the power calculation. This affects the precision
# of the calculated power. Should be at least 100.
################################################################################################
pwr.grs <- function(snps, n, alpha = 0.05, max.iter=100) {
phenotypes <- rnorm(n)
dosages <- list()
grs <- rep(0,n)
pvalues <- sapply(1:max.iter, function(i) {
for(snp_idx in 1:nrow(snps)) {
weight <- snps[snp_idx,]$Weight
genotypes <- sample.genotypes.normal(phenotypes,weight,n, snps[snp_idx,]$EAF)
dosages <- sample.dosages(genotypes, snps[snp_idx,]$INFO)$dosages
grs <- grs + dosages*weight
}
summary(lm(phenotypes~grs))$coefficients[2,4]
})
# Power is the fraction of succesfull tests
sum(pvalues<alpha) / max.iter
}
example.grs.data <- data.frame(
SNP = c("rs1", "rs2", "rs3"),
Weight = c(0.01, 0.01, 0.2),
EAF = c(0.42, 0.42, 0.42),
INFO= c(0.9, 0.9, 0.1))
pwr.grs(example.grs.data,n=100, alpha = 0.05)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment