explodecomputer · August 19, 2020 13:20
diff --git a/compare allele frequencies b/compare allele frequencies
 # simulation
 n <- 10000
 a <- rbinom(n, 2, 0.5)
 b <- rbinom(n, 2, 0.49)

 # perform fisher's exact test to obtain odds ratio for two allele frequencies being different
 cont <- matrix(
 	c(sum(a==0) * 2 + sum(a==1), sum(a==2) * 2 + sum(a==1), sum(b==0) * 2 + sum(b==1), sum(b==2) * 2 + sum(b==1)), 2, 2)

 fisher.test(cont)

 # can do this approximately with just allele frequency and sample size

 # make sure that the effect allele is the same for the two populations
 compare_af <- function(eaf_pop1, eaf_pop2, n_pop1, n_pop2)
 {
 	contingency <- matrix(
 		c(2 * eaf_pop1 * n_pop1, 2 * (1-eaf_pop1) * n_pop1, 2 * eaf_pop2 * n_pop2, 2 * (1-eaf_pop2) * n_pop2), 2, 2)
 	x <- fisher.test(contingency)
 	return(x)
 }

 # example
 compare_af(0.2, 0.19, 10000, 10000)

 # get the odds ratio using 
 x <- compare_af(0.2, 0.19, 10000, 10000)
 x$estimate

 # probably best to use the log odds ratio for any further analysis as it is symmetrical
 log(x$estimate)
	# simulation
	n <- 10000
	a <- rbinom(n, 2, 0.5)
	b <- rbinom(n, 2, 0.49)

	# perform fisher's exact test to obtain odds ratio for two allele frequencies being different
	cont <- matrix(
	c(sum(a==0) * 2 + sum(a==1), sum(a==2) * 2 + sum(a==1), sum(b==0) * 2 + sum(b==1), sum(b==2) * 2 + sum(b==1)), 2, 2)

	fisher.test(cont)

	# can do this approximately with just allele frequency and sample size

	# make sure that the effect allele is the same for the two populations
	compare_af <- function(eaf_pop1, eaf_pop2, n_pop1, n_pop2)
	{
	contingency <- matrix(
	c(2 * eaf_pop1 * n_pop1, 2 * (1-eaf_pop1) * n_pop1, 2 * eaf_pop2 * n_pop2, 2 * (1-eaf_pop2) * n_pop2), 2, 2)
	x <- fisher.test(contingency)
	return(x)
	}

	# example
	compare_af(0.2, 0.19, 10000, 10000)

	# get the odds ratio using
	x <- compare_af(0.2, 0.19, 10000, 10000)
	x$estimate

	# probably best to use the log odds ratio for any further analysis as it is symmetrical
	log(x$estimate)