Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Last active December 26, 2024 18:31
Show Gist options
  • Save sashagusev/86c1323950be3284de9d083478f5f36c to your computer and use it in GitHub Desktop.
Save sashagusev/86c1323950be3284de9d083478f5f36c to your computer and use it in GitHub Desktop.
# Simulations and visualization of a heritable liability threshold model across generations and in families
# ---
# Liability functions (from Baselmans et al.)
# See: https://github.com/BartBaselmans/CHARRGe
risk_from_liability = function( prev , h2 ) {
# Threshold
Tx = -qnorm(prev,0,1)
z = dnorm(Tx)
i = z/prev
# One affected parent / sibling
a = 0.5
Tx1 = (Tx - a * i * h2) / (sqrt(1 - a * a * h2 * h2 * i * (i-Tx)))
Kx1 = 1 - pnorm(Tx1)
# Two affected parents
Tx2 = (Tx -i*h2) / sqrt(1-(0.5*h2*h2*i)*(i-Tx))
Kx2 = 1 - pnorm(Tx2)
return( c(Kx1,Kx2) )
}
# Set population-level parameters
prev = 0.01
thresh = qnorm(prev,lower.tail=F)
N = 10e6
seeds = 10
# --- Get the analytically derived risks
for ( hsq in c(0.1,0.2,0.5) ) {
derived_risk = risk_from_liability( prev , hsq )
# % of cases with 1 affected relative
pct_1 = prev * (1-prev) * 2 * derived_risk[1] / prev
# % of cases with 2 affected relative
pct_2 = prev * prev * derived_risk[2] / prev
# % of cases with 0 affected relatives
pct_0 = 1 - pct_1 - pct_2
cat( hsq , (pct_0 * prev) / ((1-prev) * (1-prev)) , derived_risk , '\n' )
}
# --- Barplots of risk in offspring
# 0.009916982 0.01406147 0.01948549
# 0.009810279 0.01926405 0.03516929
# 0.009304792 0.04372749 0.1456944
mat = matrix( c(0.009916982,0.009810279,0.009304792,0.01406147,0.01926405,0.04372749,0.01948549,0.03516929,0.1456944) , nrow=3 , byrow=T )
xval = barplot(mat,col=c("#ffffff","#9ecae1","#3182bd"),beside=T,names=c("10%","20%","50%"),xlab="Heritability",las=1,ylab="Lifetime risk in offspring",ylim=c(0,0.5),border=c("#3182bd"))
text( xval[1,] , mat[1,] , paste(format(mat[1,]*100,digits=2),"%",sep='') ,pos=3 )
text( xval[2,] , mat[2,] , paste(format(mat[2,]*100,digits=2),"%",sep='') ,pos=3 )
text( xval[3,] , mat[3,] , paste(format(mat[3,]*100,digits=2),"%",sep='') ,pos=3 )
abline(h=0.01,lty=3)
legend("topleft",legend=c("Zero affected parents","One affected parent/sibling","Both affected parents"),fill=c("#ffffff","#9ecae1","#3182bd"),bty="n")
# --- Barplots for distribution of cases
mat = matrix(NA,nrow=3,ncol=4)
i = 1
for ( hsq in c(1,0.5,0.2,0.1) ) {
derived_risk = risk_from_liability( prev , hsq )
# % of cases with 1 affected relative
pct_1 = prev * (1-prev) * 2 * derived_risk[1] / prev
# % of cases with 2 affected relative
pct_2 = prev * prev * derived_risk[2] / prev
# % of cases with 0 affected relatives
pct_0 = 1 - pct_1 - pct_2
mat[,i] = c(pct_0,pct_1,pct_2)
i = i+1
}
par(xpd=T)
yval = barplot(mat,las=1,horiz=T,xlim=c(0,1.1),col=c("#ffffff","#9ecae1","#3182bd"),xlab="Proportion of cases (for a condition with 1% prevalence)", ylab="Heritability",names=c("100%","50%","20%","10%"))
text( 0 , yval , paste(format(mat[1,]*100,digits=2),"%",sep='') , pos=4 , cex=0.75 )
text( mat[1,] , yval , paste(format(mat[2,]*100,digits=2),"%",sep='') , pos=2 , cex=0.75 , col="#9ecae1" )
text( 1 , yval , paste(format(mat[3,]*100,digits=1),"%",sep='') , pos=4 , cex=0.75 ,col="#3182bd")
legend("topleft",inset=c(0,-0.2),ncol=3,cex=0.9,legend=c("No affected parents","One affected parent","Both affected parents"),fill=c("#ffffff","#9ecae1","#3182bd"),bty="n")
# --- Confirm analytical risks with simulations
for ( hsq in c(0.1,0.2,0.5) ) {
est_0deg = rep(NA,seeds)
est_1deg = rep(NA,seeds)
est_2deg = rep(NA,seeds)
tot_0deg = rep(NA,seeds)
tot_1deg = rep(NA,seeds)
tot_2deg = rep(NA,seeds)
for ( s in 1:seeds ) {
gv_mum = rnorm(N,0,sqrt(hsq))
gv_dad = rnorm(N,0,sqrt(hsq))
mean_parent_gv = (gv_mum + gv_dad)/2
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq))
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq))
gv_kid1 = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq))
# population risk
# mean( y_kid1 > thresh )
# risk given both parents are CONTROLS
est_0deg[s] = mean( y_kid1[ y_mum < thresh & y_dad < thresh ] > thresh )
tot_0deg[s] = sum( y_kid1[ y_mum < thresh & y_dad < thresh ] > thresh )
# risk given one parent or sibling
est_1deg[s] = mean( y_kid1[ y_mum > thresh | y_dad > thresh ] > thresh )
tot_1deg[s] = sum( y_kid1[ y_mum > thresh | y_dad > thresh ] > thresh )
# risk given both parents
est_2deg[s] = mean( y_kid1[ y_mum > thresh & y_dad > thresh ] > thresh )
tot_2deg[s] = sum( y_kid1[ y_mum > thresh & y_dad > thresh ] > thresh )
cat( s , mean(est_0deg,na.rm=T) , mean(est_1deg,na.rm=T) , mean(est_2deg,na.rm=T) , '\n' )
cat( s , mean(tot_0deg,na.rm=T) , mean(tot_1deg,na.rm=T) , mean(tot_2deg,na.rm=T) , '\n' )
}
}
# --- Simulate and visualize full liability distribution
liab_pop = vector()
liab_case = vector()
liab_0deg = vector()
liab_1deg = vector()
liab_2deg = vector()
hsq = 0.5
est_0deg = rep(NA,seeds)
est_1deg = rep(NA,seeds)
est_2deg = rep(NA,seeds)
for ( s in 1:seeds ) {
gv_mum = rnorm(N,0,sqrt(hsq))
gv_dad = rnorm(N,0,sqrt(hsq))
mean_parent_gv = (gv_mum + gv_dad)/2
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq))
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq))
gv_kid1 = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq))
# population risk
# mean( y_kid1 > thresh )
liab_pop = c(liab_pop,sample(y_kid1,10e3))
# liability in cases
keep = y_kid1 > thresh
liab_case = c(liab_case,sample(y_kid1[ keep ],min(sum(keep),10e3)))
# risk given both parents are CONTROLS
keep = y_mum < thresh & y_dad < thresh
est_0deg[s] = mean( y_kid1[ keep ] > thresh )
liab_0deg = c(liab_0deg,sample(y_kid1[ keep ],min(sum(keep),10e3)))
# risk given one parent or sibling
keep = y_mum > thresh | y_dad > thresh
est_1deg[s] = mean( y_kid1[ keep ] > thresh )
liab_1deg = c(liab_1deg,sample(y_kid1[ keep ],min(sum(keep),10e3)))
# risk given both parents
keep = y_mum > thresh & y_dad > thresh
est_2deg[s] = mean( y_kid1[ keep ] > thresh )
liab_2deg = c(liab_2deg,sample(y_kid1[ keep ],min(sum(keep),10e3)))
cat( s , mean(est_0deg,na.rm=T) , mean(est_1deg,na.rm=T) , mean(est_2deg,na.rm=T) , '\n' )
}
plot(0,0,type="n",xlim=c(-4,6),ylim=c(0,0.5),xlab="Liability",yaxt="n",ylab="",bty="n")
polygon( density(liab_pop,adjust=2) , border="NA" , col="#eeeeee" )
lines( density(liab_1deg,adjust=2) , col="#9ecae1" , lwd=2 )
lines( density(liab_2deg,adjust=2) , col="#3182bd" , lwd=2 )
points( 0 , 0 , pch=19 , cex=1 , col="#bbbbbb")
points( mean(liab_1deg) , 0 , pch=19 , cex=1 , col="#9ecae1")
points( mean(liab_2deg) , 0 , pch=19 , cex=1 , col="#3182bd")
points( mean(liab_case) , 0 , pch=19 , cex=1 , col="#756bb1")
abline(v=thresh,lty=3)
legend("topleft",legend=c("Population","One Parent Affected","Both Parents Affected","Cases"),pch=19,col=c("#bbbbbb","#9ecae1","#3182bd","#756bb1"),bty="n")
# lines( density(liab_case,adjust=2) , col="#756bb1" , lwd=2 )
# --- Simulate genetically correlated traits
N = 10e6
thresh = qnorm(0.01,lower.tail=F)
seeds = 10
for ( hsq in c(0.5) ) {
for ( rg in c(0.3,0.7) ) {
est_1 = rep(NA,seeds)
est_2 = rep(NA,seeds)
for ( s in 1:seeds ) {
gv_mum = rnorm(N,0,sqrt(hsq))
gv_dad = rnorm(N,0,sqrt(hsq))
mean_parent_gv = (gv_mum + gv_dad)/2
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq))
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq))
mean_parent_gv = (gv_mum + gv_dad)/2
gv_kid = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv
y_kid = gv_kid + rnorm(N,0,sqrt(1-hsq))
# Offspring risk given one parent is a case
est_1[s] = mean( y_kid[ y_mum > thresh | y_dad > thresh ] > thresh )
# Generate correlated traits
y_mum1 = scale(scale(gv_mum) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + rnorm(N,0,sqrt(1-hsq))
y_dad1 = scale(scale(gv_dad) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + rnorm(N,0,sqrt(1-hsq))
# Offspring risk given at least one parent has BOTH traits
est_2[s] = mean( y_kid[ (y_mum > thresh & y_mum1 > thresh) | (y_dad > thresh & y_dad1 > thresh) ] > thresh )
cat( s , mean(est_1,na.rm=T) , mean(est_2,na.rm=T) , '\n' )
}
}
}
# Simulate correlated traits with a correlated environment
for ( hsq in c(0.5) ) {
for ( rg in c(0, 0.3,0.7) ) {
est_1 = rep(NA,seeds)
est_2 = rep(NA,seeds)
for ( s in 1:seeds ) {
gv_mum = rnorm(N,0,sqrt(hsq))
gv_dad = rnorm(N,0,sqrt(hsq))
mean_parent_gv = (gv_mum + gv_dad)/2
env_mum = rnorm(N,0,sqrt(1-hsq))
env_dad = rnorm(N,0,sqrt(1-hsq))
y_mum = gv_mum + env_mum
y_dad = gv_dad + env_dad
mean_parent_gv = (gv_mum + gv_dad)/2
gv_kid = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv
y_kid = gv_kid + rnorm(N,0,sqrt(1-hsq))
est_1[s] = mean( y_kid[ y_mum > thresh | y_dad > thresh ] > thresh )
y_mum1 = scale(scale(gv_mum) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + env_mum
y_dad1 = scale(scale(gv_dad) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + env_dad
est_2[s] = mean( y_kid[ (y_mum > thresh & y_mum1 > thresh) | (y_dad > thresh & y_dad1 > thresh) ] > thresh )
cat( s , mean(est_1,na.rm=T) , mean(est_2,na.rm=T) , '\n' )
}
}
}
# --- Barplot of correlated trait risk
# 0.0433239 0.05368054 0.06495996
mat = matrix( c(0.0433239,0.05368054,0.06495996,0.02742762 ,0.03327447,0.04161584 ) , nrow=2 , byrow=T )
xval = barplot(mat,col=c("#78c679","#238443"),beside=T,names=c("None","0.3","0.7"),xlab="Genetic correlation with second trait",las=1,ylab="Probability in offspring",ylim=c(0,0.5),border=c("#238443"))
text( xval[1,] , mat[1,] , paste(format(mat[1,]*100,digits=2),"%",sep='') ,pos=3 )
text( xval[2,] , mat[2,] , paste(format(mat[2,]*100,digits=2),"%",sep='') ,pos=3 )
abline(h=0.01,lty=3)
legend("topleft",legend=c("Independent environment","Correlated environment"),fill=c("#78c679","#238443"),bty="n")
# --- Simulate multi-generational selection
# Figures are generated during simulation
par(mfrow=c(1,3))
tot_gens = 10
hsq = 0.50
N = 10e6
prev = 0.0364378
thresh = qnorm(prev,lower.tail=F)
gv_mum = rnorm(N,0,sqrt(hsq))
gv_dad = rnorm(N,0,sqrt(hsq))
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq))
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq))
est_polygenic = rep(NA,tot_gens)
for ( g in 1:tot_gens ) {
N = length(gv_mum)
# random mating
reord = sample(N)
gv_mum = gv_mum[reord]
reord = sample(N)
gv_dad = gv_dad[reord]
mean_parent_gv = (gv_mum + gv_dad)/2
wf_hsq = 2*var(mean_parent_gv)
cat("WFHSQ",wf_hsq,'\n')
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq))
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq))
gv_kid1 = rnorm(N,0,sqrt(wf_hsq/2)) + mean_parent_gv
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq))
if ( g == 1 ) {
plot( 0,0,type="n",xlim=c(-4,6),ylim=c(0,0.6),bty="n",las=1,xlab="",ylab="",yaxt="n",main="Polygenic")
abline(v=thresh,lty=3)
lines(density(y_kid1))
} else {
lines(density(y_kid1),col="#756bb175")
}
est_polygenic[g] = mean( y_kid1 > thresh ,na.rm=T )
cat(g,est_polygenic[g],'\n')
gv_kid2 = rnorm(N,0,sqrt(wf_hsq/2)) + mean_parent_gv
# remove kids where parents are cases
no_kids = y_mum > thresh | y_dad > thresh
gv_mum = gv_kid1[!no_kids]
gv_dad = gv_kid2[!no_kids]
}
# Simulate binomial / monogenic trait
hsq = 0.50
N = 10e6
prev = 0.01
thresh = qnorm(prev,lower.tail=F)
frq = 0.05
snp_mum_a = rbinom(N,1,frq)
snp_mum_b = rbinom(N,1,frq)
snp_dad_a = rbinom(N,1,frq)
snp_dad_b = rbinom(N,1,frq)
denom = 2*frq*(1-frq)
est = rep(NA,tot_gens)
for ( g in 1:tot_gens ) {
N = length(snp_mum_a)
# random mating
reord = sample(N)
snp_mum_a = snp_mum_a[reord]
snp_mum_b = snp_mum_b[reord]
reord = sample(N)
snp_dad_a = snp_dad_a[reord]
snp_dad_b = snp_dad_b[reord]
cat("frequency",mean(c(snp_mum_a+snp_mum_b,snp_dad_a+snp_dad_b))/2,'\n')
#cur_mean = mean(c(snp_mum_a + snp_mum_b,snp_dad_a + snp_dad_b))
cur_mean = frq * 2
y_mum = sqrt(hsq) * (snp_mum_a + snp_mum_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq))
y_dad = sqrt(hsq) * (snp_dad_a + snp_dad_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq))
hap_inherit = rbinom(N,1,0.5) == 0
snp_kid1_a = snp_mum_a
snp_kid1_a[ ! hap_inherit ] = snp_mum_b[ ! hap_inherit ]
hap_inherit = rbinom(N,1,0.5) == 0
snp_kid1_b = snp_dad_a
snp_kid1_b[ ! hap_inherit ] = snp_dad_b[ ! hap_inherit ]
y_kid1 = sqrt(hsq) * (snp_kid1_a + snp_kid1_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq))
hap_inherit = rbinom(N,1,0.5) == 0
snp_kid2_a = snp_mum_a
snp_kid2_a[ ! hap_inherit ] = snp_mum_b[ ! hap_inherit ]
hap_inherit = rbinom(N,1,0.5) == 0
snp_kid2_b = snp_dad_a
snp_kid2_b[ ! hap_inherit ] = snp_dad_b[ ! hap_inherit ]
if ( g == 1 ) {
plot( 0,0,type="n",xlim=c(-4,6),ylim=c(0,0.6),bty="n",las=1,xlab="",ylab="",yaxt="n",main="Monogenic")
abline(v=thresh,lty=3)
lines(density(y_kid1))
} else {
lines(density(y_kid1),col="#f03b2075")
}
est[g] = mean( y_kid1 > thresh ,na.rm=T )
cat(g,"incidence:",est[g],'\n')
# remove kids where parents are cases
no_kids = y_mum > thresh | y_dad > thresh
snp_mum_a = snp_kid1_a[ !no_kids ]
snp_mum_b = snp_kid1_b[ !no_kids ]
snp_dad_a = snp_kid2_a[ !no_kids ]
snp_dad_b = snp_kid2_b[ !no_kids ]
}
plot( 1:tot_gens , est_polygenic , type="n" , las=1,ylab="Incidence",xlab="Generations",bty="n",ylim=c(0,0.04))
lines( 1:tot_gens , est_polygenic , col="#756bb1" )
points( 1:tot_gens , est_polygenic , col="#756bb1" ,pch=19)
lines( 1:tot_gens , est , col="#f03b20" )
points( 1:tot_gens , est , col="#f03b20" ,pch=19)
legend("topright",legend=c("Polygenic","Monogenic"),pch=19,bty="n",col=c("#756bb1","#f03b20"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment