Created
December 9, 2022 11:31
-
-
Save explodecomputer/9ca3225f338ab01fcc59f997678f982b to your computer and use it in GitHub Desktop.
Simulate siblings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(dplyr) | |
#' Genarte genotypes for nuclear families | |
#' | |
#' @param af Vector of allele frequencies for SNPs (length = number of SNPs to simulate) | |
#' @param nfam Number of families to generate | |
#' | |
#' @return List of genotype matrices for each family member type (mum, dad, sib1, sib2) | |
make_families <- function(af, nfam) | |
{ | |
nsnp <- length(af) | |
dads <- matrix(0, nfam, nsnp) | |
mums <- matrix(0, nfam, nsnp) | |
sibs1 <- matrix(0, nfam, nsnp) | |
sibs2 <- matrix(0, nfam, nsnp) | |
ibd <- matrix(0, nfam, nsnp) | |
ibs <- matrix(0, nfam, nsnp) | |
for(i in 1:nsnp) | |
{ | |
dad1 <- rbinom(nfam, 1, af[i]) + 1 | |
dad2 <- (rbinom(nfam, 1, af[i]) + 1) * -1 | |
mum1 <- rbinom(nfam, 1, af[i]) + 1 | |
mum2 <- (rbinom(nfam, 1, af[i]) + 1) * -1 | |
dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE) | |
dadh <- rep(NA, nfam) | |
dadh[dadindex] <- dad1[dadindex] | |
dadh[!dadindex] <- dad2[!dadindex] | |
mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE) | |
mumh <- rep(NA, nfam) | |
mumh[mumindex] <- mum1[mumindex] | |
mumh[!mumindex] <- mum2[!mumindex] | |
sib1 <- cbind(dadh, mumh) | |
dadindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE) | |
dadh <- rep(NA, nfam) | |
dadh[dadindex] <- dad1[dadindex] | |
dadh[!dadindex] <- dad2[!dadindex] | |
mumindex <- sample(c(TRUE, FALSE), nfam, replace=TRUE) | |
mumh <- rep(NA, nfam) | |
mumh[mumindex] <- mum1[mumindex] | |
mumh[!mumindex] <- mum2[!mumindex] | |
sib2 <- cbind(dadh, mumh) | |
ibd[,i] <- (as.numeric(sib1[,1] == sib2[,1]) + as.numeric(sib1[,2] == sib2[,2])) / 2 | |
sibs1[,i] <- rowSums(abs(sib1) - 1) | |
sibs2[,i] <- rowSums(abs(sib2) - 1) | |
dads[,i] <- dad1 - 1 + abs(dad2) - 1 | |
mums[,i] <- mum1 - 1 + abs(mum2) - 1 | |
# l[[i]] <- (sum(sib1[,1] == sib2[,1]) / nsnp + sum(sib1[,2] == sib2[,2]) / nsnp) / 2 | |
} | |
# This may not be correct - getting some really large values | |
ibs <- scale(sibs1) * scale(sibs2) | |
# Just count how many alleles are in common | |
ibs_unw <- abs(abs(sibs1 - sibs2) - 2) / 2 | |
return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2, ibd=ibd, ibs=ibs, ibs_unw=ibs_unw)) | |
} | |
#' Generate phenotypes from genotypes | |
#' | |
#' @param direct_effect Direct effect of each SNP on the phenotype (must be same length as number of SNPs) | |
#' @param indirect_effect Indirect effect of parental phenotypes on sibling phenotypes (length = 1) | |
#' @param geno Output from \code{make_families} | |
#' | |
#' @return Data frame of phenotype values for each family member type | |
generate_phen <- function(direct_effect, indirect_effect, geno) | |
{ | |
n <- nrow(geno$dads) | |
tibble( | |
phen_dads = geno$dads %*% direct_effect + rnorm(n), | |
phen_mums = geno$dads %*% direct_effect + rnorm(n), | |
phen_sibs1 = geno$sibs1 %*% direct_effect + | |
(phen_dads * indirect_effect)/2 + | |
(phen_mums * indirect_effect)/2 + | |
rnorm(n), | |
phen_sibs2 = geno$sibs1 %*% direct_effect + | |
(phen_dads * indirect_effect)/2 + | |
(phen_mums * indirect_effect)/2 + | |
rnorm(n), | |
) | |
} | |
geno <- make_families(af=0.3, nfam=100000) | |
glimpse(geno) | |
phen <- generate_phen(direct_effect=0.1, indirect_effect=0.1, geno) | |
glimpse(geno) | |
cor(phen) | |
## To do | |
## Implement regression method that will take sibling genotypes and phenotypes, and then estimate transmitted and untransmitted effects | |
## ... | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment