Last active
August 29, 2015 14:24
-
-
Save aurora-mareviv/d32d2bf8a29ee389c8e2 to your computer and use it in GitHub Desktop.
Sample size and power calculation for mtDNA association studies. Based in Samuels et al., AJHG 2006, PMC1424681
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################ | |
### 1. FORMULA SAMUELS CALC #### | |
####### NCMIN-HAPLOGROUPS ###### | |
################################ | |
# Under GPL license | |
# AUTHOR: A. Baluja | |
# Original formulae in Samuels et al., AJHG 2006, PMC1424681. Written in R by A. Baluja. | |
# To determine the minimum number of cases (Ncmin) | |
# required to detect: either a change from p0 to p1, or a given OR, with a predefined confidence interval | |
# in a study with Nh haplogroups. | |
# Note: I assume that case-control equations are valid for a cohort. | |
# USAGE: | |
# p0: the frequency of the haplogroup in the control population, the controls among exposed. It depends on haplogroup baseline frequency. | |
# Nh: number of categories for haplogroups. Usually 10 haplogroups plus one category for rare haplogrs.: Nh <- 11 | |
# OR.cas.ctrl: (p1 / (1-p1)) / (p0 / (1-p0)) the OR you want to detect with your data. It can be either a single value, or a sequence: OR.cas.ctrl <- 2; OR.cas.ctrl <- seq(1.25,3 by=0.5) | |
# power: the power I want to detect any OR in my study. | |
# sig.level: error alpha accepted. Can take 3 possible values: 0.05, 0.01 and 0.001 (see [Table 2] of Samuels et al.) | |
# cases.min: number of cases or controls that I need to recruit. | |
# Gives the result in a data frame, easy to print in a plot | |
mydata <- mthacases(p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, power=power, sig.level=sig.level) | |
# Calculates minimum number of cases required to detect an OR | |
mthacases <- | |
function(p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, power=power, sig.level=sig.level){ | |
power <- power | |
p0 <- p0 | |
Nh <- Nh | |
ORcas.ctrl <- OR.cas.ctrl | |
sig.level <- sig.level | |
# Parameters related to sig.level, from [Table 2] of Samuels et al. | |
# For 90% power and alpha = .05, Nscaled = 8.5 | |
if (sig.level == 0.05){ | |
A <- -28 # Parameter A for alpha=.05 | |
x0 <- 2.6 # Parameter x0 for alpha=.05 | |
d <- 2.4 # Parameter d for alpha=.05 | |
} | |
if (sig.level == 0.01){ | |
A <- -13 # Parameter A for alpha=.01 | |
x0 <- 5 # Parameter x0 for alpha=.01 | |
d <- 2.5 # Parameter d for alpha=.01 | |
} | |
if (sig.level == 0.001){ | |
A <- -7 # Parameter A for alpha=.001 | |
x0 <- 7.4 # Parameter x0 for alpha=.001 | |
d <- 2.8 # Parameter d for alpha=.001 | |
} | |
cases.min <- NULL # initialize vector | |
out.cas <- NULL # initialize vector | |
for(OR.cas.ctrl in OR.cas.ctrl){ | |
logparen <- (power-A)/(100-power) # 1. CALCULATES Nscaled FROM FIXED PARAMETERS AND DESIRED POWER | |
Nscaled <- x0+d*log(logparen) | |
ORctrl.cas <- 1 / ORcas.ctrl # 2. CALCULATES P1 FROM A PREDEFINED P0, AND A DESIRED OR | |
OR <- ORctrl.cas | |
bracket.pw <- p0 / (OR - OR*p0) # obtained after isolating p1 in OR equation [3]. | |
p1 <- bracket.pw / (1 + bracket.pw) | |
Nh037 <- Nh^0.37 | |
nump1 <- p1*(1-p1) | |
nump0 <- p0*(1-p0) | |
num <- nump1+nump0 | |
den <- (p1-p0)^2 | |
paren <- num/den | |
cases.min <- Nscaled*Nh037*paren # 3. CALCULATES NCMIN | |
} # Number of cases or controls required to detect a given OR at a desired power. | |
out.cas <- data.frame(Nh, cases.min, p0, p1, ORctrl.cas, ORcas.ctrl, power, sig.level) | |
out.cas | |
round(out.cas,3) # Results given with 3 decimals | |
} | |
# EXAMPLE | |
mydata <- mthacases(p0=0.41, Nh=11, OR.cas.ctrl=c(1.25,1.5,1.75,2,2.25,2.5,2.75,3), power=80, sig.level=0.05) | |
mydata | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################ | |
##### 2. FORMULA SAMUELS ###### | |
########### POWER ############# | |
################################ | |
# Under GPL license | |
# AUTHOR: A. Baluja | |
# Original formulae in Samuels et al., AJHG 2006, PMC1424681. Written in R by A. Baluja. | |
# For a GIVEN STUDY SIZE Ncmin <- n.cases, | |
# use the equation to determine minimum value of change p0 to p1 (or minimum OR) | |
# that can be detected with desired power and significance level, | |
# in a study with Nh haplogroups | |
# Note: I assume that case-control equations are valid for cohors "divided" between cases and controls. | |
# USAGE: | |
# n.cases: number of cases or controls from the study. It can be either a single value, or a sequence: n.cases <- 300; n.cases <- seq(50,500 by=10) | |
# p0: the frequency of the haplogroup in the control population, the controls among exposed. It depends on haplogroup baseline frequency. | |
# Nh: number of categories for haplogroups. Usually 10 haplogroups plus one category for rare haplogrs.: Nh <- 11 | |
# OR.cas.ctrl: (p1 / (1-p1)) / (p0 / (1-p0)) the OR you want to detect with your data. | |
# sig.level: error alpha accepted. Can take 3 possible values: 0.05, 0.01 and 0.001 (see [Table 2] of Samuels et al.) | |
# Gives the result in a data frame, easy to print in a plot | |
mydata <- mthapower(n.cases=n.cases, p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, sig.level=sig.level) | |
# Calculates power given number of cases and other parameters | |
mthapower <- | |
function(n.cases=ncases, p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, sig.level=sig.level){ | |
ncases <- n.cases | |
p0 <- p0 | |
Nh <- Nh | |
OR.cas.ctrl <- OR.cas.ctrl | |
sig.level <- sig.level | |
# Parameters related to sig.level, from [Table 2] of Samuels et al. | |
# For 90% power and alpha = .05, Nscaled = 8.5 | |
if (sig.level == 0.05){ | |
A <- -28 # Parameter A for alpha=.05 | |
x0 <- 2.6 # Parameter x0 for alpha=.05 | |
d <- 2.4 # Parameter d for alpha=.05 | |
} | |
if (sig.level == 0.01){ | |
A <- -13 # Parameter A for alpha=.01 | |
x0 <- 5 # Parameter x0 for alpha=.01 | |
d <- 2.5 # Parameter d for alpha=.01 | |
} | |
if (sig.level == 0.001){ | |
A <- -7 # Parameter A for alpha=.001 | |
x0 <- 7.4 # Parameter x0 for alpha=.001 | |
d <- 2.8 # Parameter d for alpha=.001 | |
} | |
pow.ns <- NULL # initialize vector | |
out.pow <- NULL # initialize vector | |
for(n.cases in n.cases){ | |
OR.ctrl.cas <- 1 / OR.cas.ctrl # 1. CALCULATE P1 FROM A PREDEFINED P0, AND A DESIRED OR | |
OR <- OR.ctrl.cas | |
bracket.pw <- p0 / (OR - OR*p0) # obtained after isolating p1 in OR equation [3]. | |
p1 <- bracket.pw / (1 + bracket.pw) | |
Nh037 <- Nh^0.37 # 2. CALCULATE NSCALED | |
num.n <- ncases*((p1-p0)^2) | |
den.n <- (p1*(1-p1) + p0*(1-p0))*Nh037 | |
Nscaled <- num.n/den.n | |
num.power <- A - 100 # 3. CALCULATE POWER | |
den.power <- 1 + exp((Nscaled - x0)/d) | |
power <- 100 + (num.power/den.power) # The power I have to detect a given OR with my data, at a given alpha | |
} | |
out.pow <- data.frame(Nh,ncases, p0, p1, OR.ctrl.cas, OR.cas.ctrl, power, sig.level, Nscaled) | |
out.pow | |
round(out.pow,3) # Results given with 3 decimals | |
} | |
# EXAMPLES | |
mydata <- mthapower(n.cases=seq(50,1000,by=50), p0=0.44, Nh=11, OR.cas.ctrl=2.3, sig.level=0.05) | |
mydata | |
mydata <- mthapower(n.cases=203, p0=0.44, Nh=11, OR.cas.ctrl=2.3, sig.level=0.05) | |
mydata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment