Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created September 18, 2025 12:55
Show Gist options
  • Save sashagusev/b92fd98eb9e7a4f0cf93889ab38ac711 to your computer and use it in GitHub Desktop.
Save sashagusev/b92fd98eb9e7a4f0cf93889ab38ac711 to your computer and use it in GitHub Desktop.
Simulation of within-sibling admixture analysis for a diverging trait
set.seed(42)
options(digits=3)
# --- Parameters
# sibling pairs
N = 100e3
# unadmixed individuals (for population estimate)
N_unadmixed = 10e3
# number of variants
M = 100
# heritability
hsq = 0.8
# number of runs per architecture
seeds = 5
# frequency in pop1
frq_pop1 = 0.9
# frequency in pop2
frq_pop2 = 0.1
# ---
# For storing results
all_1cv_est = vector()
all_1cv_true = vector()
all_100cv_est = vector()
all_100cv_true = vector()
# Iterate across increasing levels of frequency differentiation
for ( frq_pop1 in seq(0.2,0.9,by=0.1) ) {
# For reference, calculate the expected phenotype difference based on allele frequencies and heritability
frq_mean = (frq_pop1 + frq_pop2)/2
expected_diff = 2 * ( frq_pop1 - frq_pop2 ) * sqrt ( hsq / (2*frq_mean*(1-frq_mean)) )
cat("p1" , frq_pop1 , "p2" , frq_pop2, "Expected absolute effect", expected_diff,'\n')
cat( "# causal" , "Pop Effect", "Effect SE" , "WF effect" , "WF SE", "BF effect" , "BF effect" , "BF SE\n", sep='\t' )
# Iterate over different numbers of causal variants
for ( M_causal in c(1,100) ) {
# for storing results
est_wf = rep(NA,seeds)
est_bf = rep(NA,seeds)
est_pop = rep(NA,seeds)
# -
for ( s in 1:seeds ) {
# Sample causal variants
i_causal = sample(M,M_causal)
# Default allele frequency
maf = 0.5
# sample 0/1 haplotypes for parents
mate1_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate1_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate2_hap_a = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
mate2_hap_b = matrix( rbinom(N*M,1,maf) , nrow=N , ncol=M )
# sample 0/1 ancestry for parents
mate1_anc_a = matrix( rbinom(N*M,1,0.5) , nrow=N , ncol=M )
mate1_anc_b = matrix( rbinom(N*M,1,0.5) , nrow=N , ncol=M )
mate2_anc_a = matrix( rbinom(N*M,1,0.5) , nrow=N , ncol=M )
mate2_anc_b = matrix( rbinom(N*M,1,0.5) , nrow=N , ncol=M )
# differentiate the causal variants between populations
if ( TRUE ) {
for ( i in i_causal ) {
# set population 1 to be all carriers
mate1_hap_a[mate1_anc_a[,i] == 1,i] = rbinom(sum(mate1_anc_a[,i] == 1),1,frq_pop1)
mate1_hap_b[mate1_anc_b[,i] == 1,i] = rbinom(sum(mate1_anc_b[,i] == 1),1,frq_pop1)
mate2_hap_a[mate2_anc_a[,i] == 1,i] = rbinom(sum(mate2_anc_a[,i] == 1),1,frq_pop1)
mate2_hap_b[mate2_anc_b[,i] == 1,i] = rbinom(sum(mate2_anc_b[,i] == 1),1,frq_pop1)
# set population 0 to be all non-carriers
mate1_hap_a[mate1_anc_a[,i] == 0,i] = rbinom(sum(mate1_anc_a[,i] == 0),1,frq_pop2)
mate1_hap_b[mate1_anc_b[,i] == 0,i] = rbinom(sum(mate1_anc_b[,i] == 0),1,frq_pop2)
mate2_hap_a[mate2_anc_a[,i] == 0,i] = rbinom(sum(mate2_anc_a[,i] == 0),1,frq_pop2)
mate2_hap_b[mate2_anc_b[,i] == 0,i] = rbinom(sum(mate2_anc_b[,i] == 0),1,frq_pop2)
}
}
# --- generate sibling 1
# hap a
child1_hap_a = mate1_hap_a
child1_anc_a = mate1_anc_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child1_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ]
child1_anc_a[ !sampled_vars ] = mate1_anc_b[ !sampled_vars ]
# hap b
child1_hap_b = mate2_hap_a
child1_anc_b = mate2_anc_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child1_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ]
child1_anc_b[ !sampled_vars ] = mate2_anc_b[ !sampled_vars ]
# --- generate sibling 2
# hap a
child2_hap_a = mate1_hap_a
child2_anc_a = mate1_anc_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child2_hap_a[ !sampled_vars ] = mate1_hap_b[ !sampled_vars ]
child2_anc_a[ !sampled_vars ] = mate1_anc_b[ !sampled_vars ]
# hap b
child2_hap_b = mate2_hap_a
child2_anc_b = mate2_anc_a
sampled_vars = matrix( as.logical(rbinom(N*M,1,0.5)) , nrow=N , ncol=M )
child2_hap_b[ !sampled_vars ] = mate2_hap_b[ !sampled_vars ]
child2_anc_b[ !sampled_vars ] = mate2_anc_b[ !sampled_vars ]
# --- add some people with 100% ancestry for a more conventional analysis (same code as before)
# sample 0/1 haplotypes for parents
mate1_hap_a = matrix( rbinom(N_unadmixed*M,1,maf) , nrow=N_unadmixed , ncol=M )
mate1_hap_b = matrix( rbinom(N_unadmixed*M,1,maf) , nrow=N_unadmixed , ncol=M )
mate2_hap_a = matrix( rbinom(N_unadmixed*M,1,maf) , nrow=N_unadmixed , ncol=M )
mate2_hap_b = matrix( rbinom(N_unadmixed*M,1,maf) , nrow=N_unadmixed , ncol=M )
# 100% ancestry
mate1_anc_a = matrix( 1 , nrow=N_unadmixed , ncol=M )
mate1_anc_b = matrix( 1 , nrow=N_unadmixed , ncol=M )
mate2_anc_a = matrix( 0 , nrow=N_unadmixed , ncol=M )
mate2_anc_b = matrix( 0 , nrow=N_unadmixed , ncol=M )
# fix the causal variants between populations
for ( i in i_causal ) {
# set population 1 to be all carriers
mate1_hap_a[mate1_anc_a[,i] == 1,i] = rbinom(sum(mate1_anc_a[,i] == 1),1,frq_pop1)
mate1_hap_b[mate1_anc_b[,i] == 1,i] = rbinom(sum(mate1_anc_b[,i] == 1),1,frq_pop1)
mate2_hap_a[mate2_anc_a[,i] == 1,i] = rbinom(sum(mate2_anc_a[,i] == 1),1,frq_pop1)
mate2_hap_b[mate2_anc_b[,i] == 1,i] = rbinom(sum(mate2_anc_b[,i] == 1),1,frq_pop1)
# set population 0 to be all non-carriers
mate1_hap_a[mate1_anc_a[,i] == 0,i] = rbinom(sum(mate1_anc_a[,i] == 0),1,frq_pop2)
mate1_hap_b[mate1_anc_b[,i] == 0,i] = rbinom(sum(mate1_anc_b[,i] == 0),1,frq_pop2)
mate2_hap_a[mate2_anc_a[,i] == 0,i] = rbinom(sum(mate2_anc_a[,i] == 0),1,frq_pop2)
mate2_hap_b[mate2_anc_b[,i] == 0,i] = rbinom(sum(mate2_anc_b[,i] == 0),1,frq_pop2)
}
# --- merge the admixed and unadmixed samples and label them
all_genos = rbind(child1_hap_a+child1_hap_b,child2_hap_a+child2_hap_b,mate1_hap_a+mate1_hap_b,mate2_hap_a+mate2_hap_b)
all_anc = rbind(child1_anc_a+child1_anc_b,child2_anc_a+child2_anc_b,mate1_anc_a+mate1_anc_b,mate2_anc_a+mate2_anc_b)
all_global_anc = apply( all_anc , 1 , sum ) / (2*M)
type = c( rep("siba",N) , rep("sibb",N) , rep("pop1",N_unadmixed) , rep("pop0",N_unadmixed) )
# --- generate heritable phenotype in the entire population with a random environment
b_causal = rnorm(M_causal,0,1)
all_y = scale( all_genos[,i_causal,drop=F] %*% b_causal )*sqrt(hsq) + rnorm(nrow(all_genos),0,sqrt(1-hsq))
# Run the regression analysis
# --- joint regression (ala Wang et al.)
fam_mean = (all_global_anc[type=="siba"] + all_global_anc[type=="sibb"])/2
# sibling regression
reg_wf = summary(lm( (all_y[type=="siba"]) ~ (all_global_anc[type=="siba"]) + (fam_mean) ))
# pop regression without admixture
reg_pop = summary(lm(all_y[(type=="pop1" | type=="pop0")] ~ all_global_anc[(type=="pop1" | type=="pop0")] ))
#est_wf[s] = reg$coef[2,1]
#est_bf[s] = reg$coef[3,1]
#est_pop[s] = reg$coef[2,1]
cat( M_causal , "\t" , reg_pop$coef[2,1] , "\t(" , reg_pop$coef[2,2] , ")\t" , reg_wf$coef[2,1] , "\t(" , reg_wf$coef[2,2] , ")\t" , reg_wf$coef[3,1] , "\t(" , reg_wf$coef[3,2] , ")\n" , sep='' )
# store all of the results for plotting
if ( M_causal == 1 ) {
all_1cv_est = c(all_1cv_est,reg_wf$coef[2,1])
all_1cv_true = c(all_1cv_true,reg_pop$coef[2,1])
} else if ( M_causal == 100 ) {
all_100cv_est = c(all_100cv_est,reg_wf$coef[2,1])
all_100cv_true = c(all_100cv_true,reg_pop$coef[2,1])
}
}
}
}
# Visualize the results from the two architectures
clr = c("#d53e4f","#fc8d59")
plot( all_1cv_true , all_1cv_est , xlim=c(-2.5,2.5) , ylim=c(-2.5,2.5) , type="n" , las=1 , bty="n" , xlab="True population difference" , ylab="Estimated Within-Family Effect")
abline(0,1,lty=2)
points( all_1cv_true , all_1cv_est , pch=19 , col=paste(clr[1],"50",sep='') )
points( all_1cv_true , all_1cv_est , col=clr[1] )
reg1 = (lm(all_1cv_est~all_1cv_true))
abline(reg1,col=clr[1],lty=3)
points( all_100cv_true , all_100cv_est , pch=19 , col=paste(clr[2],"50",sep=''))
points( all_100cv_true , all_100cv_est , col=clr[2])
reg100 = (lm(all_100cv_est~all_100cv_true))
abline(reg100,col=clr[2],lty=3)
legend("topleft",pch=19,col=clr,legend=c(paste("1 causal (slope=",round(reg1$coef[2],2)," s.e. ",round(summary(reg1)$coef[2,2],2),")",sep=''),paste("100 causal (slope=",round(reg100$coef[2],2)," s.e. ",round(summary(reg100)$coef[2,2],2),")",sep='')),bty="n",cex=0.75)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment