explodecomputer · November 22, 2017 18:03
diff --git a/zscores.Rmd b/zscores.Rmd
 ---
 title: Converting Z scores to betas and log(OR)
 author: Gibran Hemani, Philip Haycock
 date: 22/11/2017
 output: html_document
 ---

 Use this formula to convert any effects and SE to the standardised scale:

 ```{r, echo=FALSE}
 library(knitr)
 opts_chunk$set(cache=TRUE)
 ```

 ```
 zscore = b / se
 b_p = zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
 se_p = 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
 ```

 Let's test how well it works.

 ## Effect sizes

 ```{r }

 dat <- expand.grid(
 	rep=1:10,
 	p=c(0.01, 0.1, 0.5), 
 	n=c(1000,10000,100000), 
 	eff=c(0.001, 0.01, 0.1), 
 	b=NA, se=NA, b_p=NA, se_p=NA
 )

 for(i in 1:nrow(dat))
 {
 	n <- dat$n[i]
 	g <- rbinom(n, 2, dat$p[i])
 	x <- g * dat$eff[i] + rnorm(n, sd=10)

 	mod <- summary(lm(x ~ g))$coefficients

 	zscore <- mod[2,1] / mod[2,2]
 	snp_freq <- sum(g) / (2 * n)
 	b_p <- zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
 	se_p <- 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))

 	dat$b[i] <- mod[2,1]
 	dat$se[i] <- mod[2,2]
 	dat$b_p[i] <- b_p * sd(x)
 	dat$se_p[i] <- se_p * sd(x)
 }
 ```

 Comparison of effect sizes

 ```{r }
 library(ggplot2)
 ggplot(dat, aes(x=b, y=b_p)) +
 geom_point(aes(colour=as.factor(n))) +
 facet_wrap(~ eff + p, scale="free")

 ```

 Comparison of standard errors

 ```{r }
 ggplot(dat, aes(x=se, y=se_p)) +
 geom_point(aes(colour=as.factor(n))) +
 facet_wrap(~ eff + p, scale="free")
 ```

 ## Odds ratios 1

 Same as before but this time convert phenotype to case/control and run logistic regression.

 ```{r }
 dat <- expand.grid(
 	rep=1:10,
 	p=c(0.01, 0.1, 0.5), 
 	n=c(1000,10000), 
 	eff=c(0.001, 0.01, 0.1),
 	quant=c(0.1, 0.5),
 	b=NA, se=NA, b_p=NA, se_p=NA,
 	xvar=NA
 )

 for(i in 1:nrow(dat))
 {
 	n <- dat$n[i]
 	g <- rbinom(n, 2, dat$p[i])
 	x <- g * dat$eff[i] + rnorm(n)
 	cc <- as.numeric(x > quantile(x, dat$quant[i]))

 	mod <- summary(glm(cc ~ g, family="binomial"))$coefficients

 	zscore <- mod[2,1] / mod[2,2]
 	snp_freq <- sum(g) / (2 * n)
 	neff <- 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
 	b_p <- zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
 	se_p <- b_p / zscore

 	dat$b[i] <- mod[2,1]
 	dat$se[i] <- mod[2,2]
 	dat$b_p[i] <- b_p
 	dat$se_p[i] <- se_p
 	dat$xvar[i] <- var(x)
 }
 ```

 Effect sizes

 ```{r }
 ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
 geom_point(aes(colour=as.factor(quant))) +
 facet_wrap(~ eff + p, scale="free")
 ```

 Standard errors

 ```{r }
 ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
 geom_point(aes(colour=as.factor(quant))) +
 facet_wrap(~ eff + p, scale="free")
 ```


 ## Odds ratios 2

 Try using the formula

 ```
 neff = 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
 log(OR) = zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
 ```

 ```{r }
 dat <- expand.grid(
 	rep=1:10,
 	p=c(0.01, 0.1, 0.5), 
 	n=c(1000,10000), 
 	eff=c(0.001, 0.01, 0.1),
 	quant=c(0.1, 0.5),
 	b=NA, se=NA, b_p=NA, se_p=NA,
 	xvar=NA
 )

 for(i in 1:nrow(dat))
 {
 	n <- dat$n[i]
 	g <- rbinom(n, 2, dat$p[i])
 	x <- g * dat$eff[i] + rnorm(n)
 	cc <- as.numeric(x > quantile(x, dat$quant[i]))

 	mod <- summary(glm(cc ~ g, family="binomial"))$coefficients

 	zscore <- mod[2,1] / mod[2,2]
 	snp_freq <- sum(g) / (2 * n)
 	b_p <- zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
 	se_p <- 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))


 	dat$xvar[i] <- var(x)
 	dat$b[i] <- mod[2,1]
 	dat$se[i] <- mod[2,2]
 	dat$b_p[i] <- b_p
 	dat$se_p[i] <- se_p
 }
 ```

 Effect sizes

 ```{r }
 ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
 geom_point(aes(colour=as.factor(quant))) +
 facet_wrap(~ eff + p, scale="free")
 ```

 Standard errors

 ```{r }
 ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
 geom_point(aes(colour=as.factor(quant))) +
 facet_wrap(~ eff + p, scale="free")
 ```
	---
	title: Converting Z scores to betas and log(OR)
	author: Gibran Hemani, Philip Haycock
	date: 22/11/2017
	output: html_document
	---

	Use this formula to convert any effects and SE to the standardised scale:

	```{r, echo=FALSE}
	library(knitr)
	opts_chunk$set(cache=TRUE)
	```

	```
	zscore = b / se
	b_p = zscore / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))
	se_p = 1 / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))
	```

	Let's test how well it works.

	## Effect sizes

	```{r }

	dat <- expand.grid(
	rep=1:10,
	p=c(0.01, 0.1, 0.5),
	n=c(1000,10000,100000),
	eff=c(0.001, 0.01, 0.1),
	b=NA, se=NA, b_p=NA, se_p=NA
	)

	for(i in 1:nrow(dat))
	{
	n <- dat$n[i]
	g <- rbinom(n, 2, dat$p[i])
	x <- g * dat$eff[i] + rnorm(n, sd=10)

	mod <- summary(lm(x ~ g))$coefficients

	zscore <- mod[2,1] / mod[2,2]
	snp_freq <- sum(g) / (2 * n)
	b_p <- zscore / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))
	se_p <- 1 / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))

	dat$b[i] <- mod[2,1]
	dat$se[i] <- mod[2,2]
	dat$b_p[i] <- b_p * sd(x)
	dat$se_p[i] <- se_p * sd(x)
	}
	```

	Comparison of effect sizes

	```{r }
	library(ggplot2)
	ggplot(dat, aes(x=b, y=b_p)) +
	geom_point(aes(colour=as.factor(n))) +
	facet_wrap(~ eff + p, scale="free")

	```

	Comparison of standard errors

	```{r }
	ggplot(dat, aes(x=se, y=se_p)) +
	geom_point(aes(colour=as.factor(n))) +
	facet_wrap(~ eff + p, scale="free")
	```

	## Odds ratios 1

	Same as before but this time convert phenotype to case/control and run logistic regression.

	```{r }
	dat <- expand.grid(
	rep=1:10,
	p=c(0.01, 0.1, 0.5),
	n=c(1000,10000),
	eff=c(0.001, 0.01, 0.1),
	quant=c(0.1, 0.5),
	b=NA, se=NA, b_p=NA, se_p=NA,
	xvar=NA
	)

	for(i in 1:nrow(dat))
	{
	n <- dat$n[i]
	g <- rbinom(n, 2, dat$p[i])
	x <- g * dat$eff[i] + rnorm(n)
	cc <- as.numeric(x > quantile(x, dat$quant[i]))

	mod <- summary(glm(cc ~ g, family="binomial"))$coefficients

	zscore <- mod[2,1] / mod[2,2]
	snp_freq <- sum(g) / (2 * n)
	neff <- 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
	b_p <- zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
	se_p <- b_p / zscore

	dat$b[i] <- mod[2,1]
	dat$se[i] <- mod[2,2]
	dat$b_p[i] <- b_p
	dat$se_p[i] <- se_p
	dat$xvar[i] <- var(x)
	}
	```

	Effect sizes

	```{r }
	ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
	geom_point(aes(colour=as.factor(quant))) +
	facet_wrap(~ eff + p, scale="free")
	```

	Standard errors

	```{r }
	ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
	geom_point(aes(colour=as.factor(quant))) +
	facet_wrap(~ eff + p, scale="free")
	```


	## Odds ratios 2

	Try using the formula

	```
	neff = 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
	log(OR) = zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
	```

	```{r }
	dat <- expand.grid(
	rep=1:10,
	p=c(0.01, 0.1, 0.5),
	n=c(1000,10000),
	eff=c(0.001, 0.01, 0.1),
	quant=c(0.1, 0.5),
	b=NA, se=NA, b_p=NA, se_p=NA,
	xvar=NA
	)

	for(i in 1:nrow(dat))
	{
	n <- dat$n[i]
	g <- rbinom(n, 2, dat$p[i])
	x <- g * dat$eff[i] + rnorm(n)
	cc <- as.numeric(x > quantile(x, dat$quant[i]))

	mod <- summary(glm(cc ~ g, family="binomial"))$coefficients

	zscore <- mod[2,1] / mod[2,2]
	snp_freq <- sum(g) / (2 * n)
	b_p <- zscore / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))
	se_p <- 1 / sqrt(2snp_freq(1-snp_freq)*(n+zscore^2))


	dat$xvar[i] <- var(x)
	dat$b[i] <- mod[2,1]
	dat$se[i] <- mod[2,2]
	dat$b_p[i] <- b_p
	dat$se_p[i] <- se_p
	}
	```

	Effect sizes

	```{r }
	ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
	geom_point(aes(colour=as.factor(quant))) +
	facet_wrap(~ eff + p, scale="free")
	```

	Standard errors

	```{r }
	ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
	geom_point(aes(colour=as.factor(quant))) +
	facet_wrap(~ eff + p, scale="free")
	```