|
simulate_two_correlated_snps <- function(n, p1, p2, r) |
|
{ |
|
D <- r * sqrt((1-p1) * (1-p2) * p1 * p2) |
|
p11 <- p1 * p2 + D |
|
p12 <- p1 * (1 - p2) - D |
|
p21 <- (1 - p1) * p2 - D |
|
p22 <- (1-p1) * (1-p2) + D |
|
|
|
if(any(c(p11, p12, p21, p22) < 0)) |
|
{ |
|
stop("Impossible r value for specified allele frequencies") |
|
} |
|
|
|
x1 <- sample(1:4, n, replace=TRUE, prob=c(p11, p12, p21, p22)) |
|
x2 <- sample(1:4, n, replace=TRUE, prob=c(p11, p12, p21, p22)) |
|
|
|
geno <- expand.grid(0:1,0:1) |
|
|
|
X1 <- geno[x1,] |
|
X2 <- geno[x2,] |
|
|
|
X <- X1 + X2 |
|
return(X) |
|
} |
|
|
|
|
|
simulate_snp_correlated_to_existing_snp <- function(x, p2, r) |
|
{ |
|
n <- length(x) |
|
p1 <- sum(x) / n / 2 |
|
|
|
D <- r * sqrt((1-p1) * (1-p2) * p1 * p2) |
|
p11 <- p1 * p2 + D |
|
p12 <- p1 * (1 - p2) - D |
|
p21 <- (1 - p1) * p2 - D |
|
p22 <- (1-p1) * (1-p2) + D |
|
|
|
if(any(c(p11, p12, p21, p22) < 0)) |
|
{ |
|
stop("Impossible r value for specified allele frequencies") |
|
} |
|
|
|
xh11 <- xh12 <- array(0, n) |
|
xh11[x == 0] <- 0 |
|
xh11[x == 2] <- 1 |
|
xh12[x == 0] <- 0 |
|
xh12[x == 2] <- 1 |
|
|
|
het <- which(x == 1) |
|
index <- sample(het, length(het)/2, replace=FALSE) |
|
xh11[index] <- 1 |
|
xh12[het[!het %in% index]] <- 1 |
|
|
|
xh21 <- xh22 <- array(0, n) |
|
|
|
indexh10 <- xh11 == 0 |
|
xh21[indexh10] <- sample(0:1, sum(indexh10), replace=TRUE, prob=c(p11/(p11+p12), p12/(p11+p12))) |
|
|
|
indexh11 <- xh11 == 1 |
|
xh21[indexh11] <- sample(0:1, sum(indexh11), replace=TRUE, prob=c(p21/(p21+p22), p22/(p21+p22))) |
|
|
|
indexh20 <- xh12 == 0 |
|
xh22[indexh20] <- sample(0:1, sum(indexh20), replace=TRUE, prob=c(p11/(p11+p12), p12/(p11+p12))) |
|
|
|
indexh21 <- xh12 == 1 |
|
xh22[indexh21] <- sample(0:1, sum(indexh21), replace=TRUE, prob=c(p21/(p21+p22), p22/(p21+p22))) |
|
|
|
return(xh21+xh22) |
|
} |