Skip to content

Instantly share code, notes, and snippets.

@aurora-mareviv
Last active August 29, 2015 14:24
Show Gist options
  • Save aurora-mareviv/d32d2bf8a29ee389c8e2 to your computer and use it in GitHub Desktop.
Save aurora-mareviv/d32d2bf8a29ee389c8e2 to your computer and use it in GitHub Desktop.
Sample size and power calculation for mtDNA association studies. Based in Samuels et al., AJHG 2006, PMC1424681
################################
### 1. FORMULA SAMUELS CALC ####
####### NCMIN-HAPLOGROUPS ######
################################
# Under GPL license
# AUTHOR: A. Baluja
# Original formulae in Samuels et al., AJHG 2006, PMC1424681. Written in R by A. Baluja.
# To determine the minimum number of cases (Ncmin)
# required to detect: either a change from p0 to p1, or a given OR, with a predefined confidence interval
# in a study with Nh haplogroups.
# Note: I assume that case-control equations are valid for a cohort.
# USAGE:
# p0: the frequency of the haplogroup in the control population, the controls among exposed. It depends on haplogroup baseline frequency.
# Nh: number of categories for haplogroups. Usually 10 haplogroups plus one category for rare haplogrs.: Nh <- 11
# OR.cas.ctrl: (p1 / (1-p1)) / (p0 / (1-p0)) the OR you want to detect with your data. It can be either a single value, or a sequence: OR.cas.ctrl <- 2; OR.cas.ctrl <- seq(1.25,3 by=0.5)
# power: the power I want to detect any OR in my study.
# sig.level: error alpha accepted. Can take 3 possible values: 0.05, 0.01 and 0.001 (see [Table 2] of Samuels et al.)
# cases.min: number of cases or controls that I need to recruit.
# Gives the result in a data frame, easy to print in a plot
mydata <- mthacases(p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, power=power, sig.level=sig.level)
# Calculates minimum number of cases required to detect an OR
mthacases <-
function(p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, power=power, sig.level=sig.level){
power <- power
p0 <- p0
Nh <- Nh
ORcas.ctrl <- OR.cas.ctrl
sig.level <- sig.level
# Parameters related to sig.level, from [Table 2] of Samuels et al.
# For 90% power and alpha = .05, Nscaled = 8.5
if (sig.level == 0.05){
A <- -28 # Parameter A for alpha=.05
x0 <- 2.6 # Parameter x0 for alpha=.05
d <- 2.4 # Parameter d for alpha=.05
}
if (sig.level == 0.01){
A <- -13 # Parameter A for alpha=.01
x0 <- 5 # Parameter x0 for alpha=.01
d <- 2.5 # Parameter d for alpha=.01
}
if (sig.level == 0.001){
A <- -7 # Parameter A for alpha=.001
x0 <- 7.4 # Parameter x0 for alpha=.001
d <- 2.8 # Parameter d for alpha=.001
}
cases.min <- NULL # initialize vector
out.cas <- NULL # initialize vector
for(OR.cas.ctrl in OR.cas.ctrl){
logparen <- (power-A)/(100-power) # 1. CALCULATES Nscaled FROM FIXED PARAMETERS AND DESIRED POWER
Nscaled <- x0+d*log(logparen)
ORctrl.cas <- 1 / ORcas.ctrl # 2. CALCULATES P1 FROM A PREDEFINED P0, AND A DESIRED OR
OR <- ORctrl.cas
bracket.pw <- p0 / (OR - OR*p0) # obtained after isolating p1 in OR equation [3].
p1 <- bracket.pw / (1 + bracket.pw)
Nh037 <- Nh^0.37
nump1 <- p1*(1-p1)
nump0 <- p0*(1-p0)
num <- nump1+nump0
den <- (p1-p0)^2
paren <- num/den
cases.min <- Nscaled*Nh037*paren # 3. CALCULATES NCMIN
} # Number of cases or controls required to detect a given OR at a desired power.
out.cas <- data.frame(Nh, cases.min, p0, p1, ORctrl.cas, ORcas.ctrl, power, sig.level)
out.cas
round(out.cas,3) # Results given with 3 decimals
}
# EXAMPLE
mydata <- mthacases(p0=0.41, Nh=11, OR.cas.ctrl=c(1.25,1.5,1.75,2,2.25,2.5,2.75,3), power=80, sig.level=0.05)
mydata
################################
##### 2. FORMULA SAMUELS ######
########### POWER #############
################################
# Under GPL license
# AUTHOR: A. Baluja
# Original formulae in Samuels et al., AJHG 2006, PMC1424681. Written in R by A. Baluja.
# For a GIVEN STUDY SIZE Ncmin <- n.cases,
# use the equation to determine minimum value of change p0 to p1 (or minimum OR)
# that can be detected with desired power and significance level,
# in a study with Nh haplogroups
# Note: I assume that case-control equations are valid for cohors "divided" between cases and controls.
# USAGE:
# n.cases: number of cases or controls from the study. It can be either a single value, or a sequence: n.cases <- 300; n.cases <- seq(50,500 by=10)
# p0: the frequency of the haplogroup in the control population, the controls among exposed. It depends on haplogroup baseline frequency.
# Nh: number of categories for haplogroups. Usually 10 haplogroups plus one category for rare haplogrs.: Nh <- 11
# OR.cas.ctrl: (p1 / (1-p1)) / (p0 / (1-p0)) the OR you want to detect with your data.
# sig.level: error alpha accepted. Can take 3 possible values: 0.05, 0.01 and 0.001 (see [Table 2] of Samuels et al.)
# Gives the result in a data frame, easy to print in a plot
mydata <- mthapower(n.cases=n.cases, p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, sig.level=sig.level)
# Calculates power given number of cases and other parameters
mthapower <-
function(n.cases=ncases, p0=p0, Nh=Nh, OR.cas.ctrl=OR.cas.ctrl, sig.level=sig.level){
ncases <- n.cases
p0 <- p0
Nh <- Nh
OR.cas.ctrl <- OR.cas.ctrl
sig.level <- sig.level
# Parameters related to sig.level, from [Table 2] of Samuels et al.
# For 90% power and alpha = .05, Nscaled = 8.5
if (sig.level == 0.05){
A <- -28 # Parameter A for alpha=.05
x0 <- 2.6 # Parameter x0 for alpha=.05
d <- 2.4 # Parameter d for alpha=.05
}
if (sig.level == 0.01){
A <- -13 # Parameter A for alpha=.01
x0 <- 5 # Parameter x0 for alpha=.01
d <- 2.5 # Parameter d for alpha=.01
}
if (sig.level == 0.001){
A <- -7 # Parameter A for alpha=.001
x0 <- 7.4 # Parameter x0 for alpha=.001
d <- 2.8 # Parameter d for alpha=.001
}
pow.ns <- NULL # initialize vector
out.pow <- NULL # initialize vector
for(n.cases in n.cases){
OR.ctrl.cas <- 1 / OR.cas.ctrl # 1. CALCULATE P1 FROM A PREDEFINED P0, AND A DESIRED OR
OR <- OR.ctrl.cas
bracket.pw <- p0 / (OR - OR*p0) # obtained after isolating p1 in OR equation [3].
p1 <- bracket.pw / (1 + bracket.pw)
Nh037 <- Nh^0.37 # 2. CALCULATE NSCALED
num.n <- ncases*((p1-p0)^2)
den.n <- (p1*(1-p1) + p0*(1-p0))*Nh037
Nscaled <- num.n/den.n
num.power <- A - 100 # 3. CALCULATE POWER
den.power <- 1 + exp((Nscaled - x0)/d)
power <- 100 + (num.power/den.power) # The power I have to detect a given OR with my data, at a given alpha
}
out.pow <- data.frame(Nh,ncases, p0, p1, OR.ctrl.cas, OR.cas.ctrl, power, sig.level, Nscaled)
out.pow
round(out.pow,3) # Results given with 3 decimals
}
# EXAMPLES
mydata <- mthapower(n.cases=seq(50,1000,by=50), p0=0.44, Nh=11, OR.cas.ctrl=2.3, sig.level=0.05)
mydata
mydata <- mthapower(n.cases=203, p0=0.44, Nh=11, OR.cas.ctrl=2.3, sig.level=0.05)
mydata
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment