Last active
September 17, 2024 03:42
-
-
Save sashagusev/cc56c7e772611457fccaac5d96195ff8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set.seed(42) | |
# Library for GLMM computations | |
library(PQLseq2) | |
quiet <- function(x) { | |
sink(tempfile()) | |
on.exit(sink()) | |
invisible(force(x)) | |
} | |
# We will simulate two populations with drift | |
# Simulation for the past 10k years | |
gens = as.integer(10e3/25) | |
# Use 500 neutral SNPs for the GRM calculation | |
M = 500 | |
# No. of SNPs to test under selection | |
M_selected = 100 | |
# 100 individuals in each population | |
samp_0 = 100 | |
samp_1 = 100 | |
# 50% MAF in the starting population | |
pstart = 0.5 | |
# --- | |
# Simulate Ne=10k and very large | |
for ( Ne in c(10e3,1e6) ) { | |
cat("\n\nSimulating Ne:",Ne,'\n') | |
# Compute drift variance | |
drift_var = pstart * (1-pstart) * (gens / (2*Ne)) | |
# First population | |
p0 = rep(pstart,M) | |
# Second population | |
p1 = p0 + rnorm(M,0,sqrt(drift_var)) | |
p1[ p1 < 0 ] = 0 | |
p1[ p1 > 1 ] = 1 | |
# Matrices for the null SNPs | |
snp0 = matrix(NA,nrow=samp_0,ncol=M) | |
snp1 = matrix(NA,nrow=samp_1,ncol=M) | |
for ( i in 1:M ) { | |
# generate samples from the two populations | |
snp0[,i] = rbinom(samp_0,2,p0[i]) | |
snp1[,i] = rbinom(samp_1,2,p1[i]) | |
} | |
# compute the GRM | |
genos = scale(rbind(snp0,snp1)) | |
grm = genos %*% t(genos) / (M-1) | |
# compute PC1 | |
pc1 = prcomp(t(genos) )$rotation[,1] | |
# check correlation with pop label | |
pop_lbl = c(rep(0,samp_0),rep(1,samp_1)) | |
cat("Correlation w/ PC1",cor(pc1,pop_lbl),'\n') | |
# --- Now generate new SNPs under selection and test them | |
# Selection coefficients | |
for ( s in c(0,0.001,0.002) ) { | |
# compute target_maf for this s iteratively (there has to be a derivation for this) | |
p_trajectory = rep(NA,gens) | |
p_trajectory[1] = pstart | |
for ( g in 2:gens ) { | |
p_trajectory[g] = p_trajectory[g-1] + s*p_trajectory[g-1]*(1-p_trajectory[g-1])/(1+2*s*p_trajectory[g-1]) | |
} | |
##plot(1:gens,p_trajectory,type="l",ylim=c(0,0.5),las=1) | |
target_maf = tail(p_trajectory,1) | |
cat("---\ns =" , s , ", starting MAF =", pstart , ", ending MAF =" , target_maf , '\n') | |
p0 = rep(0.5,M_selected) | |
p1 = target_maf + rnorm(M,0,sqrt(drift_var)) | |
p1[ p1 < 0 ] = 0 | |
p1[ p1 > 1 ] = 1 | |
chisq_waples = rep(NA,M_selected) | |
z_lm = rep(NA,M_selected) | |
z_lm_pc = rep(NA,M_selected) | |
z_glm = rep(NA,M_selected) | |
z_glm_pc = rep(NA,M_selected) | |
z_glmm = rep(NA,M_selected) | |
z_glmm_pc = rep(NA,M_selected) | |
for ( i in 1:M_selected ) { | |
# test with GLMM model | |
cur_snp0 = rbinom(samp_0,2,p0[i]) | |
cur_snp1 = rbinom(samp_1,2,p1[i]) | |
test_y = matrix(c(cur_snp0,cur_snp1),nrow=1) | |
rownames(test_y) = "snp1" | |
test_total = matrix(rep(2,length(test_y)),nrow=1) | |
# test for association with number of generations apart | |
test_x = pop_lbl*gens | |
# test with standard regression ( should be inflated due to drift ) | |
reg = summary(lm(c(test_y) ~ test_x)) | |
z_lm[i] = reg$coef[2,3] | |
# test standard regression with pc covariate | |
reg = summary(lm(c(test_y) ~ test_x + pc1)) | |
z_lm_pc[i] = reg$coef[2,3] | |
# test with standard glm ( should be inflated due to drift ) | |
reg = summary(glm( cbind(c(test_y),2-c(test_y)) ~ test_x,family="binomial")) | |
z_glm[i] = reg$coef[2,3] | |
# test with standard glm with pc covariate | |
reg = summary(glm( cbind(c(test_y),2-c(test_y)) ~ test_x + pc1 ,family="binomial")) | |
z_glm_pc[i] = reg$coef[2,3] | |
# test with a GLMM | |
# hide outputs | |
reg = quiet(pqlseq2( test_y , test_x , grm , lib_size = test_total , filter=F , verbose=F , model="BMM" )) | |
z_glmm[i] = reg$beta / reg$se_beta | |
# compute the Waples statistic (assuming we know Ne) | |
cur_p0 = mean(cur_snp0/2) | |
cur_p1 = mean(cur_snp1/2) | |
cur_drift_var = cur_p1 * (1-cur_p1) * ( 1/(2*samp_0) + 1/(2*samp_1) + (gens / (2*Ne))*(1-1/(2*samp_1)) ) | |
chisq_waples[i] = (cur_p1 - cur_p0)^2 / ( cur_drift_var ) | |
} | |
cat("Power Waples",mean(chisq_waples > 1.96^2),'\n') | |
cat("Power LM",mean(z_lm^2 > 1.96^2),'\n') | |
cat("Power LM with PCs",mean(z_lm_pc^2 > 1.96^2),'\n') | |
cat("Power GLM",mean(z_glm^2 > 1.96^2),'\n') | |
cat("Power GLM with PCs",mean(z_glm_pc^2 > 1.96^2),'\n') | |
cat("Power GLLM (Akbari etal. method)",mean(z_glmm^2 > 1.96^2),'\n') | |
} | |
} | |
# Expected Output: | |
# Simulating Ne: 10000 | |
# Correlation w/ PC1 -0.8192212 | |
# --- | |
# s = 0 , starting MAF = 0.5 , ending MAF = 0.5 | |
# Power Waples 0.05 | |
# Power LM 0.24 | |
# Power LM with PCs 0.15 | |
# Power GLM 0.24 | |
# Power GLM with PCs 0.14 | |
# Power GLLM (Akbari etal. method) 0.22 | |
# --- | |
# s = 0.001 , starting MAF = 0.5 , ending MAF = 0.5983469 | |
# Power Waples 0.22 | |
# Power LM 0.52 | |
# Power LM with PCs 0.26 | |
# Power GLM 0.52 | |
# Power GLM with PCs 0.23 | |
# Power GLLM (Akbari etal. method) 0.51 | |
# --- | |
# s = 0.002 , starting MAF = 0.5 , ending MAF = 0.6891725 | |
# Power Waples 0.64 | |
# Power LM 0.87 | |
# Power LM with PCs 0.56 | |
# Power GLM 0.87 | |
# Power GLM with PCs 0.55 | |
# Power GLLM (Akbari etal. method) 0.84 | |
# | |
# | |
# Simulating Ne: 1e+06 | |
# Correlation w/ PC1 0.02093607 | |
# --- | |
# s = 0 , starting MAF = 0.5 , ending MAF = 0.5 | |
# Power Waples 0.05 | |
# Power LM 0.05 | |
# Power LM with PCs 0.05 | |
# Power GLM 0.05 | |
# Power GLM with PCs 0.05 | |
# Power GLLM (Akbari etal. method) 0.05 | |
# --- | |
# s = 0.001 , starting MAF = 0.5 , ending MAF = 0.5983469 | |
# Power Waples 0.64 | |
# Power LM 0.66 | |
# Power LM with PCs 0.66 | |
# Power GLM 0.64 | |
# Power GLM with PCs 0.64 | |
# Power GLLM (Akbari etal. method) 0.61 | |
# --- | |
# s = 0.002 , starting MAF = 0.5 , ending MAF = 0.6891725 | |
# Power Waples 0.95 | |
# Power LM 0.95 | |
# Power LM with PCs 0.95 | |
# Power GLM 0.94 | |
# Power GLM with PCs 0.94 | |
# Power GLLM (Akbari etal. method) 0.94 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment