Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created September 20, 2024 18:45
Show Gist options
  • Save sashagusev/06764448e14b6f6246c3f9364e7dbdb3 to your computer and use it in GitHub Desktop.
Save sashagusev/06764448e14b6f6246c3f9364e7dbdb3 to your computer and use it in GitHub Desktop.
set.seed(1)
# Library for GLMM computations
library(PQLseq2)
# Library for mixed model / Wald test
library("GMMAT")
# convenience function to suppress prints
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
RUN_LMM = TRUE
RUN_LMM2 = TRUE
RUN_GLMM = TRUE
# Simulation for the past 10k years / 400 generations
gens = as.integer(10e3/25)
# No. of neutral SNPs for the GRM calculation
M = 1e3
# No. of SNPs to test under selection
M_selected = 200
# No. of individuals to sample
samp_n = 200
# ---
# Simulate different Nes for different levels of drift
for ( Ne in c(1e3,10e3,1e9) ) {
# Set the selection parameter (0=neutral)
for ( s in c(0,0.002) ) {
cat("\nSimulating Ne:",Ne,"and s:",s,'\n')
# Neutral variants
neutral_mafs = matrix(NA,nrow=gens,ncol=M)
neutral_mafs[1,] = runif(M,0.1,0.9)
for ( g in 2:gens ) {
keep = neutral_mafs[g-1,] > 0 & neutral_mafs[g-1,] < 1
# Add drift
neutral_mafs[g,keep] = neutral_mafs[g-1,keep] + rnorm(sum(keep),0,sqrt( neutral_mafs[g-1,keep] * (1-neutral_mafs[g-1,keep]) / (2*Ne) ))
neutral_mafs[ g:gens , which(neutral_mafs[g,] < 0) ] = 0
neutral_mafs[ g:gens , which(neutral_mafs[g,] > 1) ] = 1
}
neutral_mafs[ neutral_mafs < 0 ] = 0
neutral_mafs[ neutral_mafs > 1 ] = 1
# Selected variants
selected_mafs = matrix(NA,nrow=gens,ncol=M_selected)
selected_mafs[1,] = runif(M_selected,0.1,0.9)
for ( g in 2:gens ) {
keep = selected_mafs[g-1,] > 0 & selected_mafs[g-1,] < 1
# Add drift
prev_maf = selected_mafs[g-1,keep]
selected_mafs[g,keep] = prev_maf + s*prev_maf*(1-prev_maf)/(1+2*s*prev_maf) + rnorm(sum(keep),0,sqrt( prev_maf * (1-prev_maf) / (2*Ne) ))
selected_mafs[ g:gens , which(selected_mafs[g,] < 0) ] = 0
selected_mafs[ g:gens , which(selected_mafs[g,] > 1) ] = 1
}
selected_mafs[ selected_mafs < 0 ] = 0
selected_mafs[ selected_mafs > 1 ] = 1
# set the sampling time (when each aDNA sample was collected)
sample_time = sample(1:gens,samp_n)
# generate neutral variants
# Matrix for the null SNPs
snp_neutral = matrix(NA,nrow=samp_n,ncol=M)
for ( i in 1:M ) {
# for each individual, take the allele frequeny from *their* sampling time
snp_neutral[,i] = rbinom(samp_n,2,neutral_mafs[sample_time,i])
}
# generate variants under selection
snp_selected = matrix(NA,nrow=samp_n,ncol=M_selected)
for ( i in 1:M_selected ) {
# for each individual, take the allele frequeny from *their* sampling time
snp_selected[,i] = rbinom(samp_n,2,selected_mafs[sample_time,i])
}
# --- make the neutral snp GRM
# center and scale the genotypes
# genos = scale(rbind(snp0,snp1))
# center and scale using frequencies
genos = snp_neutral
mafs = apply(genos,2,mean)/2
for ( i in 1:M ) {
genos[,i] = (genos[,i] - 2*mafs[i])/sqrt(2*mafs[i]*(1-mafs[i]))
}
# compute the GRM
grm = genos %*% t(genos) / M
# add a small ridge to make this matrix non-singular
grm = 0.9 * grm + 0.1 * diag(nrow(grm))
# As a sanity check, compute PCs
eigenvec = prcomp( grm )$rotation
# check correlation with sampling time
cat("Correlation w/ PC1",cor(eigenvec[,1],sample_time),'\n')
# Selection scan
test_population_time = sample_time + rnorm(length(sample_time),0,sqrt(0.1*var(sample_time)))
z_lm = rep(NA,M_selected)
z_glmm = rep(NA,M_selected)
# --- Now test the SNPs under selection
# Selection coefficients
for ( i in 1:M_selected ) {
test_snp_alleles = matrix(snp_selected[,i],nrow=1)
rownames(test_snp_alleles) = "snp1"
test_total_alleles = matrix(rep(2,length(test_snp_alleles)),nrow=1)
# test with a standard LM (should be inflated by drift)
reg = summary(lm( test_population_time ~ snp_selected[,i] ))
z_lm[i] = reg$coef[2,3]
if ( RUN_GLMM ) {
# test with a GLMM
# hide outputs
reg = quiet(pqlseq2( Y=test_snp_alleles , x=test_population_time , K=grm , lib_size = test_total_alleles , filter=F , verbose=F , model="BMM" , check_K=T , fix_h2eq1=T ))
# Same run but with the original pqlseq
# reg = quiet(pqlseq( RawCountDataSet=test_snp_alleles , Phenotypes=test_population_time , RelatednessMatrix=grm , LibSize = test_total_alleles , filtering=F , fit.model="BMM" ))
if ( is.na(reg$h2) || reg$h2 == 0 || reg$converged == FALSE ) {
z_glmm[i] = NA
} else {
z_glmm[i] = reg$beta / reg$se_beta
}
}
}
if ( RUN_LMM ) {
# Prepare data for GMMAT/LMM analysis, note a temp file has to be saved here for the method to load it
rownames(snp_selected) = paste("sample_",1:nrow(snp_selected),sep='')
colnames(snp_selected) = paste("snp_",1:ncol(snp_selected),sep='')
write.table( t(snp_selected) , row.names=T, col.names=F, sep='\t' , quote=F , file="tmp.geno.txt")
gmmat_df = data.frame("disease" = test_population_time , "id" = rownames(snp_selected) )
rownames(grm) = rownames(snp_selected)
colnames(grm) = rownames(snp_selected)
# Run GMMAT analysis
reg = glmm.wald( fixed = disease ~ 1 , data = gmmat_df , id = "id" , kins = grm , family = gaussian(link = "identity") , infile = "tmp.geno.txt" , is.dosage=TRUE, snps=colnames(snp_selected) , verbose=FALSE )
z_lmm = reg$BETA / reg$SE
} else {
z_lmm = NA
}
if ( RUN_LMM2 ) {
# Prepare data for GMMAT/LMM analysis, note a temp file has to be saved here for the method to load it
z_lmm2 = rep(NA,ncol(snp_selected))
rownames(snp_selected) = paste("sample_",1:nrow(snp_selected),sep='')
colnames(snp_selected) = paste("snp_",1:ncol(snp_selected),sep='')
rownames(grm) = rownames(snp_selected)
colnames(grm) = rownames(snp_selected)
time_pseudo_snp = 2*(test_population_time - min(test_population_time)) / (max(test_population_time) - min(test_population_time))
write.table( t(time_pseudo_snp) , row.names="snp", col.names=F, sep='\t' , quote=F , file="tmp.geno.txt")
for ( i in 1:ncol(snp_selected) ) {
gmmat_df = data.frame("disease" = scale(snp_selected[,i]) , "id" = rownames(snp_selected) )
# Run GMMAT analysis
reg = glmm.wald( fixed = disease ~ 1 , data = gmmat_df , id = "id" , kins = grm , is.dosage=TRUE , family = gaussian(link = "identity") , infile = "tmp.geno.txt" , snps="snp" , verbose=FALSE )
z_lmm2[i] = reg$BETA / reg$SE
}
} else {
z_lmm2 = NA
}
cat("GLMM NaN Fail",mean(is.na(z_glmm)),'\n')
cat("Power LM",mean(z_lm^2 > 1.96^2),'\n')
cat("Power GLMM (geno ~ time)",mean(z_glmm^2 > 1.96^2,na.rm=T),'\n')
cat("Power LMM (geno ~ time)",mean(z_lmm2^2 > 1.96^2),'\n')
cat("Power LMM (time ~ geno)",mean(z_lmm^2 > 1.96^2),'\n')
cat( paste(mean(z_lm^2 > 1.96^2) , mean(z_glmm^2 > 1.96^2,na.rm=T) , mean(z_lmm2^2 > 1.96^2) , mean(z_lmm^2 > 1.96^2) ,sep=',') , '\n' )
}
}
# Expected Output
# Simulating Ne: 1000 and s: 0
# Correlation w/ PC1 -0.9767579
# GLMM NaN Fail 0.29
# Power LM 0.53
# Power GLMM (geno ~ time) 0.4788732
# Power LMM (geno ~ time) 0.34
# Power LMM (time ~ geno) 0.115
# 0.53,0.47887323943662,0.34,0.115
#
# Simulating Ne: 1000 and s: 0.002
# Correlation w/ PC1 0.9802674
# GLMM NaN Fail 0.365
# Power LM 0.68
# Power GLMM (geno ~ time) 0.5433071
# Power LMM (geno ~ time) 0.48
# Power LMM (time ~ geno) 0.135
# 0.68,0.543307086614173,0.48,0.135
#
# Simulating Ne: 10000 and s: 0
# Correlation w/ PC1 -0.7918637
# GLMM NaN Fail 0.53
# Power LM 0.155
# Power GLMM (geno ~ time) 0.1595745
# Power LMM (geno ~ time) 0.145
# Power LMM (time ~ geno) 0.06
# 0.155,0.159574468085106,0.145,0.06
#
# Simulating Ne: 10000 and s: 0.002
# Correlation w/ PC1 0.8033265
# GLMM NaN Fail 0.455
# Power LM 0.525
# Power GLMM (geno ~ time) 0.440367
# Power LMM (geno ~ time) 0.5
# Power LMM (time ~ geno) 0.305
# 0.525,0.440366972477064,0.5,0.305
#
# Simulating Ne: 1e+09 and s: 0
# Correlation w/ PC1 0.0171917
# GLMM NaN Fail 0.435
# Power LM 0.045
# Power GLMM (geno ~ time) 0.02654867
# Power LMM (geno ~ time) 0.04
# Power LMM (time ~ geno) 0.045
# 0.045,0.0265486725663717,0.04,0.045
#
# Simulating Ne: 1e+09 and s: 0.002
# Correlation w/ PC1 -0.0787399
# GLMM NaN Fail 0.505
# Power LM 0.43
# Power GLMM (geno ~ time) 0.4141414
# Power LMM (geno ~ time) 0.43
# Power LMM (time ~ geno) 0.43
# 0.43,0.414141414141414,0.43,0.43
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment