Last active
December 26, 2024 18:31
-
-
Save sashagusev/86c1323950be3284de9d083478f5f36c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulations and visualization of a heritable liability threshold model across generations and in families | |
# --- | |
# Liability functions (from Baselmans et al.) | |
# See: https://github.com/BartBaselmans/CHARRGe | |
risk_from_liability = function( prev , h2 ) { | |
# Threshold | |
Tx = -qnorm(prev,0,1) | |
z = dnorm(Tx) | |
i = z/prev | |
# One affected parent / sibling | |
a = 0.5 | |
Tx1 = (Tx - a * i * h2) / (sqrt(1 - a * a * h2 * h2 * i * (i-Tx))) | |
Kx1 = 1 - pnorm(Tx1) | |
# Two affected parents | |
Tx2 = (Tx -i*h2) / sqrt(1-(0.5*h2*h2*i)*(i-Tx)) | |
Kx2 = 1 - pnorm(Tx2) | |
return( c(Kx1,Kx2) ) | |
} | |
# Set population-level parameters | |
prev = 0.01 | |
thresh = qnorm(prev,lower.tail=F) | |
N = 10e6 | |
seeds = 10 | |
# --- Get the analytically derived risks | |
for ( hsq in c(0.1,0.2,0.5) ) { | |
derived_risk = risk_from_liability( prev , hsq ) | |
# % of cases with 1 affected relative | |
pct_1 = prev * (1-prev) * 2 * derived_risk[1] / prev | |
# % of cases with 2 affected relative | |
pct_2 = prev * prev * derived_risk[2] / prev | |
# % of cases with 0 affected relatives | |
pct_0 = 1 - pct_1 - pct_2 | |
cat( hsq , (pct_0 * prev) / ((1-prev) * (1-prev)) , derived_risk , '\n' ) | |
} | |
# --- Barplots of risk in offspring | |
# 0.009916982 0.01406147 0.01948549 | |
# 0.009810279 0.01926405 0.03516929 | |
# 0.009304792 0.04372749 0.1456944 | |
mat = matrix( c(0.009916982,0.009810279,0.009304792,0.01406147,0.01926405,0.04372749,0.01948549,0.03516929,0.1456944) , nrow=3 , byrow=T ) | |
xval = barplot(mat,col=c("#ffffff","#9ecae1","#3182bd"),beside=T,names=c("10%","20%","50%"),xlab="Heritability",las=1,ylab="Lifetime risk in offspring",ylim=c(0,0.5),border=c("#3182bd")) | |
text( xval[1,] , mat[1,] , paste(format(mat[1,]*100,digits=2),"%",sep='') ,pos=3 ) | |
text( xval[2,] , mat[2,] , paste(format(mat[2,]*100,digits=2),"%",sep='') ,pos=3 ) | |
text( xval[3,] , mat[3,] , paste(format(mat[3,]*100,digits=2),"%",sep='') ,pos=3 ) | |
abline(h=0.01,lty=3) | |
legend("topleft",legend=c("Zero affected parents","One affected parent/sibling","Both affected parents"),fill=c("#ffffff","#9ecae1","#3182bd"),bty="n") | |
# --- Barplots for distribution of cases | |
mat = matrix(NA,nrow=3,ncol=4) | |
i = 1 | |
for ( hsq in c(1,0.5,0.2,0.1) ) { | |
derived_risk = risk_from_liability( prev , hsq ) | |
# % of cases with 1 affected relative | |
pct_1 = prev * (1-prev) * 2 * derived_risk[1] / prev | |
# % of cases with 2 affected relative | |
pct_2 = prev * prev * derived_risk[2] / prev | |
# % of cases with 0 affected relatives | |
pct_0 = 1 - pct_1 - pct_2 | |
mat[,i] = c(pct_0,pct_1,pct_2) | |
i = i+1 | |
} | |
par(xpd=T) | |
yval = barplot(mat,las=1,horiz=T,xlim=c(0,1.1),col=c("#ffffff","#9ecae1","#3182bd"),xlab="Proportion of cases (for a condition with 1% prevalence)", ylab="Heritability",names=c("100%","50%","20%","10%")) | |
text( 0 , yval , paste(format(mat[1,]*100,digits=2),"%",sep='') , pos=4 , cex=0.75 ) | |
text( mat[1,] , yval , paste(format(mat[2,]*100,digits=2),"%",sep='') , pos=2 , cex=0.75 , col="#9ecae1" ) | |
text( 1 , yval , paste(format(mat[3,]*100,digits=1),"%",sep='') , pos=4 , cex=0.75 ,col="#3182bd") | |
legend("topleft",inset=c(0,-0.2),ncol=3,cex=0.9,legend=c("No affected parents","One affected parent","Both affected parents"),fill=c("#ffffff","#9ecae1","#3182bd"),bty="n") | |
# --- Confirm analytical risks with simulations | |
for ( hsq in c(0.1,0.2,0.5) ) { | |
est_0deg = rep(NA,seeds) | |
est_1deg = rep(NA,seeds) | |
est_2deg = rep(NA,seeds) | |
tot_0deg = rep(NA,seeds) | |
tot_1deg = rep(NA,seeds) | |
tot_2deg = rep(NA,seeds) | |
for ( s in 1:seeds ) { | |
gv_mum = rnorm(N,0,sqrt(hsq)) | |
gv_dad = rnorm(N,0,sqrt(hsq)) | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq)) | |
gv_kid1 = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv | |
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq)) | |
# population risk | |
# mean( y_kid1 > thresh ) | |
# risk given both parents are CONTROLS | |
est_0deg[s] = mean( y_kid1[ y_mum < thresh & y_dad < thresh ] > thresh ) | |
tot_0deg[s] = sum( y_kid1[ y_mum < thresh & y_dad < thresh ] > thresh ) | |
# risk given one parent or sibling | |
est_1deg[s] = mean( y_kid1[ y_mum > thresh | y_dad > thresh ] > thresh ) | |
tot_1deg[s] = sum( y_kid1[ y_mum > thresh | y_dad > thresh ] > thresh ) | |
# risk given both parents | |
est_2deg[s] = mean( y_kid1[ y_mum > thresh & y_dad > thresh ] > thresh ) | |
tot_2deg[s] = sum( y_kid1[ y_mum > thresh & y_dad > thresh ] > thresh ) | |
cat( s , mean(est_0deg,na.rm=T) , mean(est_1deg,na.rm=T) , mean(est_2deg,na.rm=T) , '\n' ) | |
cat( s , mean(tot_0deg,na.rm=T) , mean(tot_1deg,na.rm=T) , mean(tot_2deg,na.rm=T) , '\n' ) | |
} | |
} | |
# --- Simulate and visualize full liability distribution | |
liab_pop = vector() | |
liab_case = vector() | |
liab_0deg = vector() | |
liab_1deg = vector() | |
liab_2deg = vector() | |
hsq = 0.5 | |
est_0deg = rep(NA,seeds) | |
est_1deg = rep(NA,seeds) | |
est_2deg = rep(NA,seeds) | |
for ( s in 1:seeds ) { | |
gv_mum = rnorm(N,0,sqrt(hsq)) | |
gv_dad = rnorm(N,0,sqrt(hsq)) | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq)) | |
gv_kid1 = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv | |
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq)) | |
# population risk | |
# mean( y_kid1 > thresh ) | |
liab_pop = c(liab_pop,sample(y_kid1,10e3)) | |
# liability in cases | |
keep = y_kid1 > thresh | |
liab_case = c(liab_case,sample(y_kid1[ keep ],min(sum(keep),10e3))) | |
# risk given both parents are CONTROLS | |
keep = y_mum < thresh & y_dad < thresh | |
est_0deg[s] = mean( y_kid1[ keep ] > thresh ) | |
liab_0deg = c(liab_0deg,sample(y_kid1[ keep ],min(sum(keep),10e3))) | |
# risk given one parent or sibling | |
keep = y_mum > thresh | y_dad > thresh | |
est_1deg[s] = mean( y_kid1[ keep ] > thresh ) | |
liab_1deg = c(liab_1deg,sample(y_kid1[ keep ],min(sum(keep),10e3))) | |
# risk given both parents | |
keep = y_mum > thresh & y_dad > thresh | |
est_2deg[s] = mean( y_kid1[ keep ] > thresh ) | |
liab_2deg = c(liab_2deg,sample(y_kid1[ keep ],min(sum(keep),10e3))) | |
cat( s , mean(est_0deg,na.rm=T) , mean(est_1deg,na.rm=T) , mean(est_2deg,na.rm=T) , '\n' ) | |
} | |
plot(0,0,type="n",xlim=c(-4,6),ylim=c(0,0.5),xlab="Liability",yaxt="n",ylab="",bty="n") | |
polygon( density(liab_pop,adjust=2) , border="NA" , col="#eeeeee" ) | |
lines( density(liab_1deg,adjust=2) , col="#9ecae1" , lwd=2 ) | |
lines( density(liab_2deg,adjust=2) , col="#3182bd" , lwd=2 ) | |
points( 0 , 0 , pch=19 , cex=1 , col="#bbbbbb") | |
points( mean(liab_1deg) , 0 , pch=19 , cex=1 , col="#9ecae1") | |
points( mean(liab_2deg) , 0 , pch=19 , cex=1 , col="#3182bd") | |
points( mean(liab_case) , 0 , pch=19 , cex=1 , col="#756bb1") | |
abline(v=thresh,lty=3) | |
legend("topleft",legend=c("Population","One Parent Affected","Both Parents Affected","Cases"),pch=19,col=c("#bbbbbb","#9ecae1","#3182bd","#756bb1"),bty="n") | |
# lines( density(liab_case,adjust=2) , col="#756bb1" , lwd=2 ) | |
# --- Simulate genetically correlated traits | |
N = 10e6 | |
thresh = qnorm(0.01,lower.tail=F) | |
seeds = 10 | |
for ( hsq in c(0.5) ) { | |
for ( rg in c(0.3,0.7) ) { | |
est_1 = rep(NA,seeds) | |
est_2 = rep(NA,seeds) | |
for ( s in 1:seeds ) { | |
gv_mum = rnorm(N,0,sqrt(hsq)) | |
gv_dad = rnorm(N,0,sqrt(hsq)) | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq)) | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
gv_kid = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv | |
y_kid = gv_kid + rnorm(N,0,sqrt(1-hsq)) | |
# Offspring risk given one parent is a case | |
est_1[s] = mean( y_kid[ y_mum > thresh | y_dad > thresh ] > thresh ) | |
# Generate correlated traits | |
y_mum1 = scale(scale(gv_mum) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + rnorm(N,0,sqrt(1-hsq)) | |
y_dad1 = scale(scale(gv_dad) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + rnorm(N,0,sqrt(1-hsq)) | |
# Offspring risk given at least one parent has BOTH traits | |
est_2[s] = mean( y_kid[ (y_mum > thresh & y_mum1 > thresh) | (y_dad > thresh & y_dad1 > thresh) ] > thresh ) | |
cat( s , mean(est_1,na.rm=T) , mean(est_2,na.rm=T) , '\n' ) | |
} | |
} | |
} | |
# Simulate correlated traits with a correlated environment | |
for ( hsq in c(0.5) ) { | |
for ( rg in c(0, 0.3,0.7) ) { | |
est_1 = rep(NA,seeds) | |
est_2 = rep(NA,seeds) | |
for ( s in 1:seeds ) { | |
gv_mum = rnorm(N,0,sqrt(hsq)) | |
gv_dad = rnorm(N,0,sqrt(hsq)) | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
env_mum = rnorm(N,0,sqrt(1-hsq)) | |
env_dad = rnorm(N,0,sqrt(1-hsq)) | |
y_mum = gv_mum + env_mum | |
y_dad = gv_dad + env_dad | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
gv_kid = rnorm(N,0,sqrt(hsq/2)) + mean_parent_gv | |
y_kid = gv_kid + rnorm(N,0,sqrt(1-hsq)) | |
est_1[s] = mean( y_kid[ y_mum > thresh | y_dad > thresh ] > thresh ) | |
y_mum1 = scale(scale(gv_mum) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + env_mum | |
y_dad1 = scale(scale(gv_dad) * rg + rnorm(N,0,sqrt(1-rg^2))) * sqrt(hsq) + env_dad | |
est_2[s] = mean( y_kid[ (y_mum > thresh & y_mum1 > thresh) | (y_dad > thresh & y_dad1 > thresh) ] > thresh ) | |
cat( s , mean(est_1,na.rm=T) , mean(est_2,na.rm=T) , '\n' ) | |
} | |
} | |
} | |
# --- Barplot of correlated trait risk | |
# 0.0433239 0.05368054 0.06495996 | |
mat = matrix( c(0.0433239,0.05368054,0.06495996,0.02742762 ,0.03327447,0.04161584 ) , nrow=2 , byrow=T ) | |
xval = barplot(mat,col=c("#78c679","#238443"),beside=T,names=c("None","0.3","0.7"),xlab="Genetic correlation with second trait",las=1,ylab="Probability in offspring",ylim=c(0,0.5),border=c("#238443")) | |
text( xval[1,] , mat[1,] , paste(format(mat[1,]*100,digits=2),"%",sep='') ,pos=3 ) | |
text( xval[2,] , mat[2,] , paste(format(mat[2,]*100,digits=2),"%",sep='') ,pos=3 ) | |
abline(h=0.01,lty=3) | |
legend("topleft",legend=c("Independent environment","Correlated environment"),fill=c("#78c679","#238443"),bty="n") | |
# --- Simulate multi-generational selection | |
# Figures are generated during simulation | |
par(mfrow=c(1,3)) | |
tot_gens = 10 | |
hsq = 0.50 | |
N = 10e6 | |
prev = 0.0364378 | |
thresh = qnorm(prev,lower.tail=F) | |
gv_mum = rnorm(N,0,sqrt(hsq)) | |
gv_dad = rnorm(N,0,sqrt(hsq)) | |
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq)) | |
est_polygenic = rep(NA,tot_gens) | |
for ( g in 1:tot_gens ) { | |
N = length(gv_mum) | |
# random mating | |
reord = sample(N) | |
gv_mum = gv_mum[reord] | |
reord = sample(N) | |
gv_dad = gv_dad[reord] | |
mean_parent_gv = (gv_mum + gv_dad)/2 | |
wf_hsq = 2*var(mean_parent_gv) | |
cat("WFHSQ",wf_hsq,'\n') | |
y_mum = gv_mum + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = gv_dad + rnorm(N,0,sqrt(1-hsq)) | |
gv_kid1 = rnorm(N,0,sqrt(wf_hsq/2)) + mean_parent_gv | |
y_kid1 = gv_kid1 + rnorm(N,0,sqrt(1-hsq)) | |
if ( g == 1 ) { | |
plot( 0,0,type="n",xlim=c(-4,6),ylim=c(0,0.6),bty="n",las=1,xlab="",ylab="",yaxt="n",main="Polygenic") | |
abline(v=thresh,lty=3) | |
lines(density(y_kid1)) | |
} else { | |
lines(density(y_kid1),col="#756bb175") | |
} | |
est_polygenic[g] = mean( y_kid1 > thresh ,na.rm=T ) | |
cat(g,est_polygenic[g],'\n') | |
gv_kid2 = rnorm(N,0,sqrt(wf_hsq/2)) + mean_parent_gv | |
# remove kids where parents are cases | |
no_kids = y_mum > thresh | y_dad > thresh | |
gv_mum = gv_kid1[!no_kids] | |
gv_dad = gv_kid2[!no_kids] | |
} | |
# Simulate binomial / monogenic trait | |
hsq = 0.50 | |
N = 10e6 | |
prev = 0.01 | |
thresh = qnorm(prev,lower.tail=F) | |
frq = 0.05 | |
snp_mum_a = rbinom(N,1,frq) | |
snp_mum_b = rbinom(N,1,frq) | |
snp_dad_a = rbinom(N,1,frq) | |
snp_dad_b = rbinom(N,1,frq) | |
denom = 2*frq*(1-frq) | |
est = rep(NA,tot_gens) | |
for ( g in 1:tot_gens ) { | |
N = length(snp_mum_a) | |
# random mating | |
reord = sample(N) | |
snp_mum_a = snp_mum_a[reord] | |
snp_mum_b = snp_mum_b[reord] | |
reord = sample(N) | |
snp_dad_a = snp_dad_a[reord] | |
snp_dad_b = snp_dad_b[reord] | |
cat("frequency",mean(c(snp_mum_a+snp_mum_b,snp_dad_a+snp_dad_b))/2,'\n') | |
#cur_mean = mean(c(snp_mum_a + snp_mum_b,snp_dad_a + snp_dad_b)) | |
cur_mean = frq * 2 | |
y_mum = sqrt(hsq) * (snp_mum_a + snp_mum_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq)) | |
y_dad = sqrt(hsq) * (snp_dad_a + snp_dad_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq)) | |
hap_inherit = rbinom(N,1,0.5) == 0 | |
snp_kid1_a = snp_mum_a | |
snp_kid1_a[ ! hap_inherit ] = snp_mum_b[ ! hap_inherit ] | |
hap_inherit = rbinom(N,1,0.5) == 0 | |
snp_kid1_b = snp_dad_a | |
snp_kid1_b[ ! hap_inherit ] = snp_dad_b[ ! hap_inherit ] | |
y_kid1 = sqrt(hsq) * (snp_kid1_a + snp_kid1_b - cur_mean)/sqrt(denom) + rnorm(N,0,sqrt(1-hsq)) | |
hap_inherit = rbinom(N,1,0.5) == 0 | |
snp_kid2_a = snp_mum_a | |
snp_kid2_a[ ! hap_inherit ] = snp_mum_b[ ! hap_inherit ] | |
hap_inherit = rbinom(N,1,0.5) == 0 | |
snp_kid2_b = snp_dad_a | |
snp_kid2_b[ ! hap_inherit ] = snp_dad_b[ ! hap_inherit ] | |
if ( g == 1 ) { | |
plot( 0,0,type="n",xlim=c(-4,6),ylim=c(0,0.6),bty="n",las=1,xlab="",ylab="",yaxt="n",main="Monogenic") | |
abline(v=thresh,lty=3) | |
lines(density(y_kid1)) | |
} else { | |
lines(density(y_kid1),col="#f03b2075") | |
} | |
est[g] = mean( y_kid1 > thresh ,na.rm=T ) | |
cat(g,"incidence:",est[g],'\n') | |
# remove kids where parents are cases | |
no_kids = y_mum > thresh | y_dad > thresh | |
snp_mum_a = snp_kid1_a[ !no_kids ] | |
snp_mum_b = snp_kid1_b[ !no_kids ] | |
snp_dad_a = snp_kid2_a[ !no_kids ] | |
snp_dad_b = snp_kid2_b[ !no_kids ] | |
} | |
plot( 1:tot_gens , est_polygenic , type="n" , las=1,ylab="Incidence",xlab="Generations",bty="n",ylim=c(0,0.04)) | |
lines( 1:tot_gens , est_polygenic , col="#756bb1" ) | |
points( 1:tot_gens , est_polygenic , col="#756bb1" ,pch=19) | |
lines( 1:tot_gens , est , col="#f03b20" ) | |
points( 1:tot_gens , est , col="#f03b20" ,pch=19) | |
legend("topright",legend=c("Polygenic","Monogenic"),pch=19,bty="n",col=c("#756bb1","#f03b20")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment