Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created November 22, 2017 18:03
Show Gist options
  • Save explodecomputer/eb2627a52a7805da21e558484b0dccad to your computer and use it in GitHub Desktop.
Save explodecomputer/eb2627a52a7805da21e558484b0dccad to your computer and use it in GitHub Desktop.
z-score transformations
---
title: Converting Z scores to betas and log(OR)
author: Gibran Hemani, Philip Haycock
date: 22/11/2017
output: html_document
---
Use this formula to convert any effects and SE to the standardised scale:
```{r, echo=FALSE}
library(knitr)
opts_chunk$set(cache=TRUE)
```
```
zscore = b / se
b_p = zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
se_p = 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
```
Let's test how well it works.
## Effect sizes
```{r }
dat <- expand.grid(
rep=1:10,
p=c(0.01, 0.1, 0.5),
n=c(1000,10000,100000),
eff=c(0.001, 0.01, 0.1),
b=NA, se=NA, b_p=NA, se_p=NA
)
for(i in 1:nrow(dat))
{
n <- dat$n[i]
g <- rbinom(n, 2, dat$p[i])
x <- g * dat$eff[i] + rnorm(n, sd=10)
mod <- summary(lm(x ~ g))$coefficients
zscore <- mod[2,1] / mod[2,2]
snp_freq <- sum(g) / (2 * n)
b_p <- zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
se_p <- 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
dat$b[i] <- mod[2,1]
dat$se[i] <- mod[2,2]
dat$b_p[i] <- b_p * sd(x)
dat$se_p[i] <- se_p * sd(x)
}
```
Comparison of effect sizes
```{r }
library(ggplot2)
ggplot(dat, aes(x=b, y=b_p)) +
geom_point(aes(colour=as.factor(n))) +
facet_wrap(~ eff + p, scale="free")
```
Comparison of standard errors
```{r }
ggplot(dat, aes(x=se, y=se_p)) +
geom_point(aes(colour=as.factor(n))) +
facet_wrap(~ eff + p, scale="free")
```
## Odds ratios 1
Same as before but this time convert phenotype to case/control and run logistic regression.
```{r }
dat <- expand.grid(
rep=1:10,
p=c(0.01, 0.1, 0.5),
n=c(1000,10000),
eff=c(0.001, 0.01, 0.1),
quant=c(0.1, 0.5),
b=NA, se=NA, b_p=NA, se_p=NA,
xvar=NA
)
for(i in 1:nrow(dat))
{
n <- dat$n[i]
g <- rbinom(n, 2, dat$p[i])
x <- g * dat$eff[i] + rnorm(n)
cc <- as.numeric(x > quantile(x, dat$quant[i]))
mod <- summary(glm(cc ~ g, family="binomial"))$coefficients
zscore <- mod[2,1] / mod[2,2]
snp_freq <- sum(g) / (2 * n)
neff <- 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
b_p <- zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
se_p <- b_p / zscore
dat$b[i] <- mod[2,1]
dat$se[i] <- mod[2,2]
dat$b_p[i] <- b_p
dat$se_p[i] <- se_p
dat$xvar[i] <- var(x)
}
```
Effect sizes
```{r }
ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
geom_point(aes(colour=as.factor(quant))) +
facet_wrap(~ eff + p, scale="free")
```
Standard errors
```{r }
ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
geom_point(aes(colour=as.factor(quant))) +
facet_wrap(~ eff + p, scale="free")
```
## Odds ratios 2
Try using the formula
```
neff = 4 / (1 / sum(cc == 1) + 1 / sum(cc == 0))
log(OR) = zscore / sqrt(2 * neff * snp_freq * (1-snp_freq))
```
```{r }
dat <- expand.grid(
rep=1:10,
p=c(0.01, 0.1, 0.5),
n=c(1000,10000),
eff=c(0.001, 0.01, 0.1),
quant=c(0.1, 0.5),
b=NA, se=NA, b_p=NA, se_p=NA,
xvar=NA
)
for(i in 1:nrow(dat))
{
n <- dat$n[i]
g <- rbinom(n, 2, dat$p[i])
x <- g * dat$eff[i] + rnorm(n)
cc <- as.numeric(x > quantile(x, dat$quant[i]))
mod <- summary(glm(cc ~ g, family="binomial"))$coefficients
zscore <- mod[2,1] / mod[2,2]
snp_freq <- sum(g) / (2 * n)
b_p <- zscore / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
se_p <- 1 / sqrt(2*snp_freq*(1-snp_freq)*(n+zscore^2))
dat$xvar[i] <- var(x)
dat$b[i] <- mod[2,1]
dat$se[i] <- mod[2,2]
dat$b_p[i] <- b_p
dat$se_p[i] <- se_p
}
```
Effect sizes
```{r }
ggplot(subset(dat, b < 5), aes(x=b, y=b_p)) +
geom_point(aes(colour=as.factor(quant))) +
facet_wrap(~ eff + p, scale="free")
```
Standard errors
```{r }
ggplot(subset(dat, b < 5), aes(x=se, y=se_p)) +
geom_point(aes(colour=as.factor(quant))) +
facet_wrap(~ eff + p, scale="free")
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment