cth · April 4, 2018 10:36
diff --git a/PowerGRS.R b/PowerGRS.R
 ## Genetic Risk Score power simulator with support for imputed genotypes.
 # The motivating idea was to check whether inclusion of (badly) imputed SNPs still improves power.
 # Christian Theil Have, 2018.

 # Simulated annealing to generate permutation of genotypes that minimizes fn
 genetic.optim <- function(genotypes,fn) {
  randomswap <- function(genotypes) {
    i <- sample(length(genotypes),1)
    j <- sample(which(genotypes!=genotypes[i]),1)
    tmp <- genotypes[i]
    genotypes[i] <- genotypes[j]
    genotypes[j] <- tmp
    genotypes
  }
  current.score <- fn(genotypes)
  max.iter=length(genotypes)*2
  iter <- 0
  repeat {
    permuted <- randomswap(genotypes)
    permuted.score <- fn(permuted)
    if(permuted.score < current.score) {
      current.score <- permuted.score
      genotypes <- permuted
    }
    iter <- iter + 1
    if (iter >= max.iter)
      break
  }
  genotypes
 }

 # pairs diploid-alleles assorted according hardy-weinberg equibrilium principles iff alleles are in random order
 alleles2genotypes <- function(alleles)
  alleles[seq(1,length(alleles)-1,2)] + alleles[seq(2,length(alleles),2)]

 sample.genotypes.normal <- function(phenotypes, beta, n, maf) {
  num.variant.alleles <- round(maf*2*n)
  allele.pool <- sample(c(rep(T,num.variant.alleles), rep(F, (2*n)-num.variant.alleles)), 2*n)
  genotypes <- alleles2genotypes(allele.pool)
  optim.func <- function(x) abs(beta-lm(phenotypes~x)$coefficients[2])
  genetic.optim(genotypes, optim.func)
 }

 scale.to.dosages <- function(unscaled) {
  minima <- min(unscaled) 
  maxima <- max(unscaled)
  2 * (unscaled + abs(minima)) / (maxima+abs(minima) - (minima+abs(minima)))
 }

 # Sample dosages from genotypes using fast binary search for r-squared 
 # It will result in  approximately correct r-squared, but have a tendency to "look" to nice..
 sample.dosages <- function(genotypes,info,epsilon=0.01) {
  jitter.amount = 0
  dosages <- genotypes
  repeat {
    rsq <- summary(lm(genotypes ~ dosages))$r.squared
    diff <- abs(rsq - info)
    if(diff < epsilon)
      break
    else if (rsq > info)
      jitter.amount <- jitter.amount + (diff / 2)
    else
      jitter.amount <- jitter.amount - (diff / 2)
    dosages <- scale.to.dosages(jitter(genotypes,amount=jitter.amount))
  }
  list(genotypes = genotypes, dosages = dosages,r.squared = rsq)
 }

 ################################################################################################
 # pwr.grs calculates the power to detect of a GRS to detect a signal at given significance level
 # using a GRS  weights in a certain sample size and with given imputatation quality. 
 # Note: It assumes that the weights used are accurate estimates of SNP effects. 
 # 
 # ARGUMENTS:
 # snps: A dataframe with the following columns:
 # - SNP: Unique name of SNP
 # - Weight: a weight associated with the _minor_ allele of SNP. Weights should be normalized
 #   relative to a standard normal distribution (mean 0, variance 1).
 # - EAF: Effect allele frequency 
 # - INFO: Estimated R-squared correlation of imputation with actual genotype. 
 #   For imputed genotypes this will be [0-1] and for directly genotyped SNPs it should be 1. 
 # n: The number of individuals to include in the GRS
 # max.iter: The number of iterations to use for the power calculation. This affects the precision
 #           of the calculated power. Should be at least 100. 
 ################################################################################################
 pwr.grs <- function(snps, n, alpha = 0.05, max.iter=100) {
  phenotypes <- rnorm(n)
  dosages <- list()
  grs <- rep(0,n)
  pvalues <- sapply(1:max.iter, function(i) {
      for(snp_idx in 1:nrow(snps)) {
        weight <- snps[snp_idx,]$Weight
        genotypes <- sample.genotypes.normal(phenotypes,weight,n, snps[snp_idx,]$EAF)
        dosages <- sample.dosages(genotypes, snps[snp_idx,]$INFO)$dosages
        grs <- grs + dosages*weight
      }
      summary(lm(phenotypes~grs))$coefficients[2,4]
  })
  
  # Power is the fraction of succesfull tests
  sum(pvalues<alpha) / max.iter
 }

 example.grs.data <- data.frame(
  SNP = c("rs1", "rs2", "rs3"),
  Weight = c(0.01, 0.01, 0.2),
  EAF = c(0.42, 0.42, 0.42),
  INFO= c(0.9, 0.9, 0.1))

 pwr.grs(example.grs.data,n=100, alpha = 0.05)
	## Genetic Risk Score power simulator with support for imputed genotypes.
	# The motivating idea was to check whether inclusion of (badly) imputed SNPs still improves power.
	# Christian Theil Have, 2018.

	# Simulated annealing to generate permutation of genotypes that minimizes fn
	genetic.optim <- function(genotypes,fn) {
	randomswap <- function(genotypes) {
	i <- sample(length(genotypes),1)
	j <- sample(which(genotypes!=genotypes[i]),1)
	tmp <- genotypes[i]
	genotypes[i] <- genotypes[j]
	genotypes[j] <- tmp
	genotypes
	}
	current.score <- fn(genotypes)
	max.iter=length(genotypes)*2
	iter <- 0
	repeat {
	permuted <- randomswap(genotypes)
	permuted.score <- fn(permuted)
	if(permuted.score < current.score) {
	current.score <- permuted.score
	genotypes <- permuted
	}
	iter <- iter + 1
	if (iter >= max.iter)
	break
	}
	genotypes
	}

	# pairs diploid-alleles assorted according hardy-weinberg equibrilium principles iff alleles are in random order
	alleles2genotypes <- function(alleles)
	alleles[seq(1,length(alleles)-1,2)] + alleles[seq(2,length(alleles),2)]

	sample.genotypes.normal <- function(phenotypes, beta, n, maf) {
	num.variant.alleles <- round(maf2n)
	allele.pool <- sample(c(rep(T,num.variant.alleles), rep(F, (2n)-num.variant.alleles)), 2n)
	genotypes <- alleles2genotypes(allele.pool)
	optim.func <- function(x) abs(beta-lm(phenotypes~x)$coefficients[2])
	genetic.optim(genotypes, optim.func)
	}

	scale.to.dosages <- function(unscaled) {
	minima <- min(unscaled)
	maxima <- max(unscaled)
	2 * (unscaled + abs(minima)) / (maxima+abs(minima) - (minima+abs(minima)))
	}

	# Sample dosages from genotypes using fast binary search for r-squared
	# It will result in approximately correct r-squared, but have a tendency to "look" to nice..
	sample.dosages <- function(genotypes,info,epsilon=0.01) {
	jitter.amount = 0
	dosages <- genotypes
	repeat {
	rsq <- summary(lm(genotypes ~ dosages))$r.squared
	diff <- abs(rsq - info)
	if(diff < epsilon)
	break
	else if (rsq > info)
	jitter.amount <- jitter.amount + (diff / 2)
	else
	jitter.amount <- jitter.amount - (diff / 2)
	dosages <- scale.to.dosages(jitter(genotypes,amount=jitter.amount))
	}
	list(genotypes = genotypes, dosages = dosages,r.squared = rsq)
	}

	################################################################################################
	# pwr.grs calculates the power to detect of a GRS to detect a signal at given significance level
	# using a GRS weights in a certain sample size and with given imputatation quality.
	# Note: It assumes that the weights used are accurate estimates of SNP effects.
	#
	# ARGUMENTS:
	# snps: A dataframe with the following columns:
	# - SNP: Unique name of SNP
	# - Weight: a weight associated with the _minor_ allele of SNP. Weights should be normalized
	# relative to a standard normal distribution (mean 0, variance 1).
	# - EAF: Effect allele frequency
	# - INFO: Estimated R-squared correlation of imputation with actual genotype.
	# For imputed genotypes this will be [0-1] and for directly genotyped SNPs it should be 1.
	# n: The number of individuals to include in the GRS
	# max.iter: The number of iterations to use for the power calculation. This affects the precision
	# of the calculated power. Should be at least 100.
	################################################################################################
	pwr.grs <- function(snps, n, alpha = 0.05, max.iter=100) {
	phenotypes <- rnorm(n)
	dosages <- list()
	grs <- rep(0,n)
	pvalues <- sapply(1:max.iter, function(i) {
	for(snp_idx in 1:nrow(snps)) {
	weight <- snps[snp_idx,]$Weight
	genotypes <- sample.genotypes.normal(phenotypes,weight,n, snps[snp_idx,]$EAF)
	dosages <- sample.dosages(genotypes, snps[snp_idx,]$INFO)$dosages
	grs <- grs + dosages*weight
	}
	summary(lm(phenotypes~grs))$coefficients[2,4]
	})

	# Power is the fraction of succesfull tests
	sum(pvalues<alpha) / max.iter
	}

	example.grs.data <- data.frame(
	SNP = c("rs1", "rs2", "rs3"),
	Weight = c(0.01, 0.01, 0.2),
	EAF = c(0.42, 0.42, 0.42),
	INFO= c(0.9, 0.9, 0.1))

	pwr.grs(example.grs.data,n=100, alpha = 0.05)