Last active
February 9, 2017 11:24
-
-
Save explodecomputer/888d1cd4e99cf0b40bbe59973a95cee2 to your computer and use it in GitHub Desktop.
simulate sib pairs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(ggplot2) | |
| library(reshape2) | |
| makePhen <- function(effs, indep, vy=1, vx=rep(1, length(effs))) | |
| { | |
| if(is.null(dim(indep))) indep <- cbind(indep) | |
| stopifnot(ncol(indep) == length(effs)) | |
| stopifnot(length(vx) == length(effs)) | |
| cors <- effs * vx / sqrt(vx) / sqrt(vy) | |
| stopifnot(sum(cors^2) <= 1) | |
| cors <- c(cors, sqrt(1-sum(cors^2))) | |
| indep <- t(t(scale(cbind(indep, rnorm(nrow(indep))))) * cors * c(vx, 1)) | |
| y <- drop(scale(rowSums(indep)) * sqrt(vy)) | |
| return(y) | |
| } | |
| base_population <- function(nparents, allele_frequency, effect_size) | |
| { | |
| nsnp <- length(allele_frequency) | |
| males1 <- matrix(FALSE, nparents, nsnp) | |
| males2 <- matrix(FALSE, nparents, nsnp) | |
| females1 <- matrix(FALSE, nparents, nsnp) | |
| females2 <- matrix(FALSE, nparents, nsnp) | |
| for(i in 1:nsnp) | |
| { | |
| males1[,i] <- rbinom(nparents, 1, allele_frequency[i]) | |
| males2[,i] <- rbinom(nparents, 1, allele_frequency[i]) | |
| females1[,i] <- rbinom(nparents, 1, allele_frequency[i]) | |
| females2[,i] <- rbinom(nparents, 1, allele_frequency[i]) | |
| } | |
| population <- list(males1=males1, males2=males2, females1=females1, females2=females2) | |
| return(population) | |
| } | |
| reproduce <- function(population) | |
| { | |
| # Make two children per family | |
| nparents <- nrow(population$males1) | |
| nchildren <- nparents * 2 | |
| population_size <- nchildren | |
| nsnp <- ncol(population$males1) | |
| male_partner <- 1:nparents | |
| female_partner <- 1:nparents | |
| x <- population$males1[male_partner, , drop=FALSE] | |
| locus <- sample(1:ncol(x), ncol(x)/2, replace=FALSE) | |
| x[, locus] <- population$males2[male_partner, locus, drop=FALSE] | |
| child_paternal1 <- x | |
| x <- population$males1[male_partner, , drop=FALSE] | |
| locus <- sample(1:ncol(x), ncol(x)/2, replace=FALSE) | |
| x[, locus] <- population$males2[male_partner, locus, drop=FALSE] | |
| child_paternal2 <- x | |
| x <- population$females1[female_partner, , drop=FALSE] | |
| locus <- sample(1:ncol(x), ncol(x)/2, replace=FALSE) | |
| x[, locus] <- population$females2[female_partner, locus, drop=FALSE] | |
| child_maternal1 <- x | |
| x <- population$females1[female_partner, , drop=FALSE] | |
| locus <- sample(1:ncol(x), ncol(x)/2, replace=FALSE) | |
| x[, locus] <- population$females2[female_partner, locus, drop=FALSE] | |
| child_maternal2 <- x | |
| fam <- data.frame( | |
| fid = rep(1:nparents, times=4), | |
| iid = c(1:(nparents*4)), | |
| pat = c(rep(0, nparents*2), rep(1:nparents, times=2)), | |
| mat = c(rep(0, nparents*2), rep(1:nparents+nparents, times=2)), | |
| sex = c(rep(1:2, each=nparents), rep(1:2, each=nparents)), | |
| phen = -9 | |
| ) | |
| paternal <- rbind( | |
| population$males1, | |
| population$females1, | |
| child_paternal1, | |
| child_paternal2 | |
| ) | |
| maternal <- rbind( | |
| population$males2, | |
| population$females2, | |
| child_maternal1, | |
| child_maternal2 | |
| ) | |
| return(list(pat = paternal, mat = maternal, id=fam)) | |
| } | |
| make_families <- function(nparents, allele_frequency, effect_size) | |
| { | |
| population <- base_population(nparents, allele_frequency, effect_size) | |
| return(reproduce(population)) | |
| } | |
| dat <- make_population(20000, runif(100), NULL) | |
| get_sibling_similarities <- function(dat) | |
| { | |
| nparents <- nrow(dat$id) / 4 | |
| # sib1 <- 1:nparents + nparents*2 | |
| # sib2 <- 1:nparents + nparents*3 | |
| sib1 <- 1:nparents | |
| sib2 <- 1:nparents + nparents | |
| ibd <- (dat$pat[sib1,] == dat$pat[sib2,]) + (dat$mat[sib1,] == dat$mat[sib2,]) | |
| pihat <- rowMeans(ibd) / 2 | |
| hist(pihat) | |
| } | |
| get_freqs <- function(generations, fitness_effects) | |
| { | |
| require(reshape2) | |
| require(lubridate) | |
| a <- matrix(sapply(generations, function(x) | |
| { | |
| a <- matrix(sapply(x, function(y) | |
| { | |
| colMeans(y) | |
| }), ncol=4) | |
| rowMeans(a) | |
| }), ncol=length(generations)) | |
| a <- melt(a, c("mutation", "generation"), value.name="freq") | |
| b <- data.frame(mutation=1:length(fitness_effects), fitness_effects=fitness_effects) | |
| a <- merge(a, b, by="mutation") | |
| a <- a[order(a$generation, a$mutation), ] | |
| # a$date <- Sys.Date() | |
| # a$date <- a$date + years(25 * (a$generation-1)) | |
| a$year <- 2016 + (a$generation - 1) * 25 | |
| names(a) <- c("mutation", "generation", "allele_frequency", "fitness_effects", "year") | |
| a$mutation <- paste0("position_", formatC(a$mutation, width=2, format="d", flag="0")) | |
| return(a) | |
| } | |
| run_generations <- function(generations, population_size, fitness_effects, number_of_generations, mutation_rate_per_generation_per_locus) | |
| { | |
| l <- length(generations) | |
| for(i in 1:number_of_generations) | |
| { | |
| generations[[i + l]] <- reproduce(generations[[l + i - 1]], population_size, fitness_effects, mutation_rate_per_generation_per_locus) | |
| } | |
| return(generations) | |
| } | |
| plot_people <- function(population) | |
| { | |
| dat1 <- data.frame(val=rowSums(population$males1) + rowSums(population$males2), sex="Male") | |
| dat1 <- dat1[order(dat1$val, decreasing=TRUE), ] | |
| dat1$id <- 1:nrow(dat1) | |
| d <- ceiling(sqrt(nrow(dat1))) | |
| dat1$x <- ceiling(dat1$id / d) | |
| dat1$y <- (dat1$id-1) %% d | |
| dat2 <- data.frame(val=rowSums(population$females1) + rowSums(population$females2), sex="Female") | |
| dat2 <- dat2[order(dat2$val, decreasing=TRUE), ] | |
| dat2$id <- 1:nrow(dat2) | |
| d <- ceiling(sqrt(nrow(dat2))) | |
| dat2$x <- ceiling(dat2$id / d) | |
| dat2$y <- (dat2$id-1) %% d | |
| dat <- rbind(dat1, dat2) | |
| p <- ggplot(dat, aes(x=x, y=y)) + | |
| geom_point(aes(colour=val), size=5) + | |
| scale_colour_gradient(low="red", high="blue") + | |
| facet_grid(. ~ sex) + | |
| theme(axis.text=element_blank(), axis.ticks=element_blank(), axis.title=element_blank(), legend.position="none") | |
| return(p) | |
| } | |
| plot_frequencies <- function(f) | |
| { | |
| p <- ggplot(f, aes(x=generation, y=freq)) + | |
| geom_line(aes(group=mutation, col=fitness_effects)) + | |
| ylim(c(0,1)) + | |
| scale_colour_gradientn(colours=c("blue", "black", "red")) | |
| return(p) | |
| } | |
| library(dplyr) | |
| make_families <- function(af, nfam) | |
| { | |
| nsnp <- length(af) | |
| dads <- list() | |
| mums <- list() | |
| sibs1 <- list() | |
| sibs2 <- list() | |
| for(i in 1:nfam) | |
| { | |
| dad1 <- rbinom(nsnp, 1, af1) | |
| dad2 <- rbinom(nsnp, 1, af1) | |
| mum1 <- rbinom(nsnp, 1, af1) | |
| mum2 <- rbinom(nsnp, 1, af1) | |
| dadindex <- sample(c(TRUE, FALSE), nsnp, replace=TRUE) | |
| dadh <- rep(NA, nsnp) | |
| dadh[dadindex] <- dad1[dadindex] | |
| dadh[!dadindex] <- dad2[!dadindex] | |
| mumindex <- sample(c(TRUE, FALSE), nsnp, replace=TRUE) | |
| mumh <- rep(NA, nsnp) | |
| mumh[mumindex] <- mum1[mumindex] | |
| mumh[!mumindex] <- mum2[!mumindex] | |
| sib1 <- cbind(dadh, mumh) | |
| dadindex <- sample(c(TRUE, FALSE), nsnp, replace=TRUE) | |
| dadh <- rep(NA, nsnp) | |
| dadh[dadindex] <- dad1[dadindex] | |
| dadh[!dadindex] <- dad2[!dadindex] | |
| mumindex <- sample(c(TRUE, FALSE), nsnp, replace=TRUE) | |
| mumh <- rep(NA, nsnp) | |
| mumh[mumindex] <- mum1[mumindex] | |
| mumh[!mumindex] <- mum2[!mumindex] | |
| sib2 <- cbind(dadh, mumh) | |
| sibs1[[i]] <- rowSums(sib1) | |
| sibs2[[i]] <- rowSums(sib2) | |
| dads[[i]] <- dad1 + dad2 | |
| mums[[i]] <- mum1 + mum2 | |
| # l[[i]] <- (sum(sib1[,1] == sib2[,1]) / nsnp + sum(sib1[,2] == sib2[,2]) / nsnp) / 2 | |
| } | |
| sibs1 <- do.call(rbind, sibs1) | |
| sibs2 <- do.call(rbind, sibs2) | |
| dads <- do.call(rbind, dads) | |
| mums <- do.call(rbind, mums) | |
| return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2)) | |
| } | |
| makePhen <- function(effs, indep, vy=1, vx=rep(1, length(effs)), my=0) | |
| { | |
| if(is.null(dim(indep))) indep <- cbind(indep) | |
| stopifnot(ncol(indep) == length(effs)) | |
| stopifnot(length(vx) == length(effs)) | |
| cors <- effs * vx / sqrt(vx) / sqrt(vy) | |
| stopifnot(sum(cors^2) <= 1) | |
| cors <- c(cors, sqrt(1-sum(cors^2))) | |
| indep <- t(t(scale(cbind(indep, rnorm(nrow(indep))))) * cors * c(vx, 1)) | |
| y <- drop(scale(rowSums(indep)) * sqrt(vy)) + my | |
| return(y) | |
| } | |
| chooseEffects <- function(nsnp, totvar, sqrt=TRUE) | |
| { | |
| eff <- rnorm(nsnp) | |
| aeff <- abs(eff) | |
| sc <- sum(aeff) / totvar | |
| out <- eff / sc | |
| if(sqrt) | |
| { | |
| out <- sqrt(abs(out)) * sign(out) | |
| } | |
| return(out) | |
| } | |
| make_phenotypes <- function(fam, eff_gx, eff_xy, vx, vy, mx, my) | |
| { | |
| lapply(fam, function(g) | |
| { | |
| u <- rnorm(nrow(g)) | |
| x <- makePhen(c(eff_gx), cbind(g), vy=vx, my=mx) | |
| y <- makePhen(c(eff_xy), cbind(x), vy=vy, my=my) | |
| return(data.frame(x=x, y=y)) | |
| }) | |
| } | |
| join_populations <- function(l) | |
| { | |
| dads <- do.call(rbind, lapply(l, function(x) x$dads)) | |
| mums <- do.call(rbind, lapply(l, function(x) x$mums)) | |
| sibs1 <- do.call(rbind, lapply(l, function(x) x$sibs1)) | |
| sibs2 <- do.call(rbind, lapply(l, function(x) x$sibs2)) | |
| return(list(dads=dads, mums=mums, sibs1=sibs1, sibs2=sibs2)) | |
| } | |
| a <- make_families(runif(90), 10000) | |
| b <- make_families(runif(90), 10000) | |
| eff <- chooseEffects(nsnp, 0.1) | |
| ap <- make_phenotypes(a, eff, sqrt(0), 4, 2, 40, 20) | |
| bp <- make_phenotypes(b, eff, sqrt(0), 4, 2, 23, 10) | |
| ab <- join_populations(list(a,b)) | |
| abp <- join_populations(list(ap,bp)) | |
| lapply(ap, cor) | |
| lapply(bp, cor) | |
| lapply(abp, cor) | |
| bx <- summary(lm(ap$dad$x ~ a$dad))$coefficients[-1,1] | |
| by <- summary(lm(ap$dad$y ~ a$dad))$coefficients[-1,1] | |
| plot(by ~ bx) | |
| summary(lm(by ~ bx)) | |
| bxab <- summary(lm(abp$dad$x ~ ab$dad))$coefficients[-1,1] | |
| by <- summary(lm(abp$dad$y ~ ab$dad))$coefficients[-1,1] | |
| plot(by ~ bx) | |
| summary(lm(by ~ bx)) | |
| bxab <- summary(lm(abp$mum$x ~ ab$mum))$coefficients[-1,1] | |
| by <- summary(lm(abp$mum$y ~ ab$mum))$coefficients[-1,1] | |
| plot(by ~ bx) | |
| summary(lm(by ~ bx)) | |
| sbx <- rep(NA, nsnp) | |
| sby <- rep(NA, nsnp) | |
| ssex <- rep(NA, nsnp) | |
| ssey <- rep(NA, nsnp) | |
| for(i in 1:nsnp) | |
| { | |
| sdiffgx <- a$sibs1[,i] * bx[i] - a$sibs2[,i] * bx[i] | |
| sdiffx <- ap$sibs1$x - ap$sibs2$x | |
| mod <- summary(lm(sdiffx ~ sdiffgx)) | |
| sbx[i] <- mod$coef[2,1] | |
| ssex[i] <- mod$coef[2,2] | |
| sdiffy <- ap$sibs1$y - ap$sibs2$y | |
| mod <- summary(lm(sdiffy ~ sdiffgx)) | |
| sby[i] <- mod$coef[2,1] | |
| ssey[i] <- mod$coef[2,2] | |
| } | |
| plot(sby ~ sbx) | |
| summary(lm(sby ~ sbx)) | |
| library(TwoSampleMR) | |
| mr_ivw(sbx, sby, ssex, ssey) | |
| plot(y = sby/sbx * sqrt(1/ssex), x=sqrt(1/ssex) ) | |
| summary(lm(sby/sbx * sqrt(1/ssex) ~ sqrt(1/ssex))) | |
| abline(lm(sby/sbx * sqrt(1/ssex) ~ sqrt(1/ssex))) | |
| summary(glm(rep(1:2, each=10000) ~ ab$dads)) | |
| get_pop_estimate <- function(ab, abp) | |
| { | |
| } | |
| # simulate a confounder | |
| n <- 100000 | |
| g1 <- rbinom(n, 2, 0.7) | |
| g2 <- rbinom(n, 2, 0.3) | |
| x1 <- 27 + g1 * 0.2 + rnorm(n, sd=4) | |
| x2 <- 27 + g2 * 0.2 + rnorm(n, sd=4) | |
| summary(lm(x1 ~ g1)) | |
| summary(lm(x2 ~ g2)) | |
| summary(lm(c(x1, x2) ~ c(g1, g2))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment