Last active
September 6, 2023 17:32
-
-
Save sashagusev/5998d81a9f0bcfeef041f0edaaac3c39 to your computer and use it in GitHub Desktop.
simple simulation of a phenotype in genetically distant populations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# reproducibility: | |
set.seed(42) | |
# number of causal variants: | |
M = 5e3 | |
# number of individuals | |
N = 10e3 | |
# heritability | |
hsq = 0.25 | |
# sample individuals with admixture | |
admix = TRUE | |
# allele frequencies computed from the 1000 Gnomes | |
# can be downloaded here: | |
# https://www.dropbox.com/scl/fi/7n2ou0pp9m2yp3vbdf4lc/1000G.EUR_AFR.frq?rlkey=pkatcvwfgtvgtr4xrrjlv22b7&dl=0 | |
frq = read.table("~/Downloads/1000G.EUR_AFR.frq",head=T,as.is=T) | |
# flip frequencies where the alleles don't match | |
flip = frq[,3] != frq[,6] | |
frq[flip,7] = 1 - frq[flip,7] | |
seeds = 20 | |
est = rep(NA,seeds) | |
cat( "variance explained by group label\n" ) | |
for ( s in 1:seeds ) { | |
# generate causal variants | |
effects = rnorm(M,0,1) | |
# sample some causal variants | |
M.causal = sample(1:nrow(frq),M) | |
# simulate some individuals | |
pop_EUR = matrix(NA,nrow=N,ncol=M) | |
pop_AFR = matrix(NA,nrow=N,ncol=M) | |
for ( i in 1:M ) { | |
pop_EUR[,i] = rbinom(N,2,frq[i,4]) | |
pop_AFR[,i] = rbinom(N,2,frq[i,7]) | |
} | |
# simulate some heritable phenotypes | |
gv_EUR = pop_EUR %*% effects | |
gv_AFR = pop_AFR %*% effects | |
# simulate african ancestry in african americans (based on GERA estimates) | |
if ( admix == TRUE ) { | |
afr_anc = rnorm(N,0.736,0.174) | |
afr_anc[ afr_anc < 0 ] = 0 | |
afr_anc[ afr_anc > 1 ] = 1 | |
# admix the phenotypes | |
gv_AFR2 = pop_EUR %*% effects | |
for ( i in 1:N ) { | |
gv_AFR[i] = afr_anc[i] * gv_AFR[i] + (1-afr_anc[i]) * gv_AFR2[i] | |
} | |
} | |
gv_both = scale(c(gv_EUR,gv_AFR)) * sqrt(hsq) | |
env_both = rnorm(N+N,0,sqrt(1-hsq)) | |
y_both = gv_both + env_both | |
pop_lbl = c(rep(TRUE,N),rep(FALSE,N)) | |
# total heritability check: | |
# cor(y_both,gv_both)^2 | |
# variance in outcome explained by group label | |
est[s] = cor(y_both,pop_lbl)^2 | |
cat( "iteration" , s , ":", mean(est,na.rm=T) , "s.e." , sd(est,na.rm=T)/sqrt(sum(!is.na(est))) , '\n' ) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment