explodecomputer · May 4, 2017 15:28
diff --git a/assortativemating. b/assortativemating.
 # Sample size
 nid <- 2000

 # Number of SNPs
 nsnp <- 1000

 # Variance explained by SNPs
 varexp <- 0.8

 # Effect sizes of SNPs
 eff <- rnorm(nsnp)

 # Make genotype matrix
 S <- matrix(rbinom(nid*nsnp, 2, 0.5), nid, nsnp)

 # Scale genotype matrix
 X <- scale(S)

 # Make phenotype
 g <- scale(X %*% eff) * sqrt(varexp)
 p <- g + rnorm(nid, sd=sqrt(1-varexp))

 # Phenotype should have variance of 1
 var(p)

 # This should be varexp
 cor(p, g)^2

 # Choose partners. 
 # Split into high and low values
 # Randomly pair amongst high and amongst low
 h <- which(p > median(p))
 l <- which(p <= median(p))
 hid1 <- h[1:(nid/4)]
 hid2 <- h[-c(1:(nid/4))]
 lid1 <- l[1:(nid/4)]
 lid2 <- l[-c(1:(nid/4))]
 id1 <- c(hid1, lid1)
 id2 <- c(hid2, lid2)

 # Check that the phenotypes correlate
 cor(p[id1], p[id2])
 summary(lm(p[id1] ~ p[id2]))

 # Does the genotype of id1 predict the phenotype of id2?
 summary(lm(p[id2] ~ g[id1]))

 # Do the genotypes correlate?
 cor(g[id1], g[id2])
 summary(lm(g[id1] ~ g[id2]))

 # Try new method, comparing kinships of partners against kinships of non-partners


 # Make weighted kinship matrix
 Xw <- t(t(X) * eff^2)
 cor(eff^2, apply(Xw, 2, sd)^2)

 K <- Xw %*% t(Xw) / nsnp
 partners <- K[cbind(id1, id2)]
 K[cbind(id1, id2)] <- NA
 K[diag(K)] <- NA
 strangers <- as.numeric(K[!is.na(K)])

 t.test(partners, strangers)

 hist(K)
 K[diag(K)]

 dat <- data.frame(
 	x = c(partners, c(strangers)),
 	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
 )

 library(ggplot2)
 ggplot(dat, aes(x=x)) +
 geom_density(aes(fill=y), alpha=0.2)


 # Try without weighting the kinships

 K <- X %*% t(X)
 partners <- K[cbind(id1, id2)]
 K[cbind(id1, id2)] <- NA
 K[diag(K)] <- NA
 strangers <- as.numeric(K[!is.na(K)])

 t.test(partners, strangers)

 dat <- data.frame(
 	x = c(partners, c(strangers)),
 	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
 )

 library(ggplot2)
 ggplot(dat, aes(x=x)) +
 geom_density(aes(fill=y), alpha=0.2)


 # Try with random weighting

 effrandom <- rnorm(nsnp)
 Xw <- t(t(X) * effrandom)
 K <- Xw %*% t(Xw)
 partners <- K[cbind(id1, id2)]
 K[cbind(id1, id2)] <- NA
 K[diag(K)] <- NA
 strangers <- as.numeric(K[!is.na(K)])

 t.test(partners, strangers)

 dat <- data.frame(
 	x = c(partners, c(strangers)),
 	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
 )

 library(ggplot2)
 ggplot(dat, aes(x=x)) +
 geom_density(aes(fill=y), alpha=0.2)
	# Sample size
	nid <- 2000

	# Number of SNPs
	nsnp <- 1000

	# Variance explained by SNPs
	varexp <- 0.8

	# Effect sizes of SNPs
	eff <- rnorm(nsnp)

	# Make genotype matrix
	S <- matrix(rbinom(nid*nsnp, 2, 0.5), nid, nsnp)

	# Scale genotype matrix
	X <- scale(S)

	# Make phenotype
	g <- scale(X %% eff) sqrt(varexp)
	p <- g + rnorm(nid, sd=sqrt(1-varexp))

	# Phenotype should have variance of 1
	var(p)

	# This should be varexp
	cor(p, g)^2

	# Choose partners.
	# Split into high and low values
	# Randomly pair amongst high and amongst low
	h <- which(p > median(p))
	l <- which(p <= median(p))
	hid1 <- h[1:(nid/4)]
	hid2 <- h[-c(1:(nid/4))]
	lid1 <- l[1:(nid/4)]
	lid2 <- l[-c(1:(nid/4))]
	id1 <- c(hid1, lid1)
	id2 <- c(hid2, lid2)

	# Check that the phenotypes correlate
	cor(p[id1], p[id2])
	summary(lm(p[id1] ~ p[id2]))

	# Does the genotype of id1 predict the phenotype of id2?
	summary(lm(p[id2] ~ g[id1]))

	# Do the genotypes correlate?
	cor(g[id1], g[id2])
	summary(lm(g[id1] ~ g[id2]))

	# Try new method, comparing kinships of partners against kinships of non-partners


	# Make weighted kinship matrix
	Xw <- t(t(X) * eff^2)
	cor(eff^2, apply(Xw, 2, sd)^2)

	K <- Xw %*% t(Xw) / nsnp
	partners <- K[cbind(id1, id2)]
	K[cbind(id1, id2)] <- NA
	K[diag(K)] <- NA
	strangers <- as.numeric(K[!is.na(K)])

	t.test(partners, strangers)

	hist(K)
	K[diag(K)]

	dat <- data.frame(
	x = c(partners, c(strangers)),
	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
	)

	library(ggplot2)
	ggplot(dat, aes(x=x)) +
	geom_density(aes(fill=y), alpha=0.2)


	# Try without weighting the kinships

	K <- X %*% t(X)
	partners <- K[cbind(id1, id2)]
	K[cbind(id1, id2)] <- NA
	K[diag(K)] <- NA
	strangers <- as.numeric(K[!is.na(K)])

	t.test(partners, strangers)

	dat <- data.frame(
	x = c(partners, c(strangers)),
	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
	)

	library(ggplot2)
	ggplot(dat, aes(x=x)) +
	geom_density(aes(fill=y), alpha=0.2)


	# Try with random weighting

	effrandom <- rnorm(nsnp)
	Xw <- t(t(X) * effrandom)
	K <- Xw %*% t(Xw)
	partners <- K[cbind(id1, id2)]
	K[cbind(id1, id2)] <- NA
	K[diag(K)] <- NA
	strangers <- as.numeric(K[!is.na(K)])

	t.test(partners, strangers)

	dat <- data.frame(
	x = c(partners, c(strangers)),
	y = c(rep("Partners", length(partners)), rep("Strangers", length(strangers)))
	)

	library(ggplot2)
	ggplot(dat, aes(x=x)) +
	geom_density(aes(fill=y), alpha=0.2)