Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active September 17, 2024 03:42
Show Gist options
  • Save sashagusev/cc56c7e772611457fccaac5d96195ff8 to your computer and use it in GitHub Desktop.
Save sashagusev/cc56c7e772611457fccaac5d96195ff8 to your computer and use it in GitHub Desktop.
set.seed(42)
# Library for GLMM computations
library(PQLseq2)
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
# We will simulate two populations with drift
# Simulation for the past 10k years
gens = as.integer(10e3/25)
# Use 500 neutral SNPs for the GRM calculation
M = 500
# No. of SNPs to test under selection
M_selected = 100
# 100 individuals in each population
samp_0 = 100
samp_1 = 100
# 50% MAF in the starting population
pstart = 0.5
# ---
# Simulate Ne=10k and very large
for ( Ne in c(10e3,1e6) ) {
cat("\n\nSimulating Ne:",Ne,'\n')
# Compute drift variance
drift_var = pstart * (1-pstart) * (gens / (2*Ne))
# First population
p0 = rep(pstart,M)
# Second population
p1 = p0 + rnorm(M,0,sqrt(drift_var))
p1[ p1 < 0 ] = 0
p1[ p1 > 1 ] = 1
# Matrices for the null SNPs
snp0 = matrix(NA,nrow=samp_0,ncol=M)
snp1 = matrix(NA,nrow=samp_1,ncol=M)
for ( i in 1:M ) {
# generate samples from the two populations
snp0[,i] = rbinom(samp_0,2,p0[i])
snp1[,i] = rbinom(samp_1,2,p1[i])
}
# compute the GRM
genos = scale(rbind(snp0,snp1))
grm = genos %*% t(genos) / (M-1)
# compute PC1
pc1 = prcomp(t(genos) )$rotation[,1]
# check correlation with pop label
pop_lbl = c(rep(0,samp_0),rep(1,samp_1))
cat("Correlation w/ PC1",cor(pc1,pop_lbl),'\n')
# --- Now generate new SNPs under selection and test them
# Selection coefficients
for ( s in c(0,0.001,0.002) ) {
# compute target_maf for this s iteratively (there has to be a derivation for this)
p_trajectory = rep(NA,gens)
p_trajectory[1] = pstart
for ( g in 2:gens ) {
p_trajectory[g] = p_trajectory[g-1] + s*p_trajectory[g-1]*(1-p_trajectory[g-1])/(1+2*s*p_trajectory[g-1])
}
##plot(1:gens,p_trajectory,type="l",ylim=c(0,0.5),las=1)
target_maf = tail(p_trajectory,1)
cat("---\ns =" , s , ", starting MAF =", pstart , ", ending MAF =" , target_maf , '\n')
p0 = rep(0.5,M_selected)
p1 = target_maf + rnorm(M,0,sqrt(drift_var))
p1[ p1 < 0 ] = 0
p1[ p1 > 1 ] = 1
chisq_waples = rep(NA,M_selected)
z_lm = rep(NA,M_selected)
z_lm_pc = rep(NA,M_selected)
z_glm = rep(NA,M_selected)
z_glm_pc = rep(NA,M_selected)
z_glmm = rep(NA,M_selected)
z_glmm_pc = rep(NA,M_selected)
for ( i in 1:M_selected ) {
# test with GLMM model
cur_snp0 = rbinom(samp_0,2,p0[i])
cur_snp1 = rbinom(samp_1,2,p1[i])
test_y = matrix(c(cur_snp0,cur_snp1),nrow=1)
rownames(test_y) = "snp1"
test_total = matrix(rep(2,length(test_y)),nrow=1)
# test for association with number of generations apart
test_x = pop_lbl*gens
# test with standard regression ( should be inflated due to drift )
reg = summary(lm(c(test_y) ~ test_x))
z_lm[i] = reg$coef[2,3]
# test standard regression with pc covariate
reg = summary(lm(c(test_y) ~ test_x + pc1))
z_lm_pc[i] = reg$coef[2,3]
# test with standard glm ( should be inflated due to drift )
reg = summary(glm( cbind(c(test_y),2-c(test_y)) ~ test_x,family="binomial"))
z_glm[i] = reg$coef[2,3]
# test with standard glm with pc covariate
reg = summary(glm( cbind(c(test_y),2-c(test_y)) ~ test_x + pc1 ,family="binomial"))
z_glm_pc[i] = reg$coef[2,3]
# test with a GLMM
# hide outputs
reg = quiet(pqlseq2( test_y , test_x , grm , lib_size = test_total , filter=F , verbose=F , model="BMM" ))
z_glmm[i] = reg$beta / reg$se_beta
# compute the Waples statistic (assuming we know Ne)
cur_p0 = mean(cur_snp0/2)
cur_p1 = mean(cur_snp1/2)
cur_drift_var = cur_p1 * (1-cur_p1) * ( 1/(2*samp_0) + 1/(2*samp_1) + (gens / (2*Ne))*(1-1/(2*samp_1)) )
chisq_waples[i] = (cur_p1 - cur_p0)^2 / ( cur_drift_var )
}
cat("Power Waples",mean(chisq_waples > 1.96^2),'\n')
cat("Power LM",mean(z_lm^2 > 1.96^2),'\n')
cat("Power LM with PCs",mean(z_lm_pc^2 > 1.96^2),'\n')
cat("Power GLM",mean(z_glm^2 > 1.96^2),'\n')
cat("Power GLM with PCs",mean(z_glm_pc^2 > 1.96^2),'\n')
cat("Power GLLM (Akbari etal. method)",mean(z_glmm^2 > 1.96^2),'\n')
}
}
# Expected Output:
# Simulating Ne: 10000
# Correlation w/ PC1 -0.8192212
# ---
# s = 0 , starting MAF = 0.5 , ending MAF = 0.5
# Power Waples 0.05
# Power LM 0.24
# Power LM with PCs 0.15
# Power GLM 0.24
# Power GLM with PCs 0.14
# Power GLLM (Akbari etal. method) 0.22
# ---
# s = 0.001 , starting MAF = 0.5 , ending MAF = 0.5983469
# Power Waples 0.22
# Power LM 0.52
# Power LM with PCs 0.26
# Power GLM 0.52
# Power GLM with PCs 0.23
# Power GLLM (Akbari etal. method) 0.51
# ---
# s = 0.002 , starting MAF = 0.5 , ending MAF = 0.6891725
# Power Waples 0.64
# Power LM 0.87
# Power LM with PCs 0.56
# Power GLM 0.87
# Power GLM with PCs 0.55
# Power GLLM (Akbari etal. method) 0.84
#
#
# Simulating Ne: 1e+06
# Correlation w/ PC1 0.02093607
# ---
# s = 0 , starting MAF = 0.5 , ending MAF = 0.5
# Power Waples 0.05
# Power LM 0.05
# Power LM with PCs 0.05
# Power GLM 0.05
# Power GLM with PCs 0.05
# Power GLLM (Akbari etal. method) 0.05
# ---
# s = 0.001 , starting MAF = 0.5 , ending MAF = 0.5983469
# Power Waples 0.64
# Power LM 0.66
# Power LM with PCs 0.66
# Power GLM 0.64
# Power GLM with PCs 0.64
# Power GLLM (Akbari etal. method) 0.61
# ---
# s = 0.002 , starting MAF = 0.5 , ending MAF = 0.6891725
# Power Waples 0.95
# Power LM 0.95
# Power LM with PCs 0.95
# Power GLM 0.94
# Power GLM with PCs 0.94
# Power GLLM (Akbari etal. method) 0.94
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment