Skip to content

Instantly share code, notes, and snippets.

@bwiernik
Created December 6, 2020 13:42
Show Gist options
  • Save bwiernik/c96f7047cacb833765ad9fe1ef9718c2 to your computer and use it in GitHub Desktop.
Save bwiernik/c96f7047cacb833765ad9fe1ef9718c2 to your computer and use it in GitHub Desktop.
R functions to compute adjusted SMD (Cohen's d) from a regression or ANCOVA model
.cmicalc <- function (df) {
cmi <- ifelse(df <= 1, NA,
exp(lgamma(df/2) - log(sqrt(df/2)) - lgamma((df - 1)/2))
)
return(cmi)
}
smd_stats <-
function(diff = NULL,
se_diff = NULL,
df = NULL,
m1 = NULL, m2 = NULL,
n1 = NULL, n2 = NULL,
sd1 = NULL, sd2 = NULL,
sigma = NULL,
mse = NULL,
t_ncp = NULL,
R2_cov = 0,
q = 0,
method = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"),
unbiased = FALSE,
conf.int = TRUE,
conf.level = 0.95,
conf_method = c("t", "norm"),
sd_method = c("unbiased", "ml"),
std_method = c("unbiased", "ml"),
se_var_method = c("ls", "unbiased"),
...) {
method <- match.arg(method, choices = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"))
# Compute degrees of freedom
if (is.null(df)) {
if (is.null(n1) & is.null(n2)) {
stop("One of these must be provided:\n (1) `df`\n (2) `n1`, `n2`, `q`")
} else {
df <- n1 + n2 - 2 - q
}
}
# TODO: Add Satterwhite degrees of freedom
# Compute standardizer
if (all(is.null(mse),
is.null(sigma),
any(is.null(sd1),
is.null(sd2),
is.null(n1),
is.null(n2)))
) {
stop("One of these must be provided:\n (1) `sigma`\n (2)`mse`\n (3) `sd1`, `sd2`, `n1`, `n2`.")
} else if (is.null(mse)) {
if (!is.null(sigma)) {
mse <- sigma^2
} else {
sd_method <- match.arg(sd_method, choices = c("unbiased", "ml"))
std_method <- match.arg(std_method, choices = c("unbiased", "ml"))
if (method == "smd_pooledSD") {
if (sd_method == "unbiased") {
ssq1 <- (n1 - 1) * sd1^2
ssq2 <- (n2 - 1) * sd2^2
} else {
ssq1 <- n1 * sd1^2
ssq2 <- n2 * sd2^2
}
ssq <- ssq2 + ssq2
mse <-
ifelse(std_method == "unbiased",
ssq / df,
ssq / (n1 + n2)
)
} else if (method == "smd_controlSD") {
if (sd_method == "unbiased") {
ssq <- (n1 - 1) * sd1^2
} else {
ssq <- n1 * sd1^2
}
mse <-
ifelse(std_method == "unbiased",
ssq / df,
ssq / (n1 + n2)
)
} else {
if (sd_method == "unbiased") {
mse <-
ifelse(std_method == "unbiased",
(sd1^2 + sd2^2) / 2,
(sd1^2 * (n1 - 1) / n1 + sd2^2 * (n2 - 1) / n2) / 2
)
} else {
mse <-
ifelse(std_method == "unbiased",
(sd1^2 * n1 / (n1 - 1) + sd2^2 * n2 / (n2 - 1)) / 2,
(sd1^2 + sd2^2) / 2
)
}
}
}
}
# TODO: Implement large-sample vs unbiased sampling error variance
if (is.null(diff)) {
if (is.null(m1) & is.null(m2)) {
stop("One of these must be provided:\n (1) `diff`\n (2) `m1`, `m2`")
} else {
diff <- m2 - m1
}
}
# Adjust standardizer for covariates
A <- (1 - R2_cov)
mse <- mse / A
# Compute d
d <- diff / sqrt(mse)
# Compute variance of d
se_var <-
switch(method,
smd_pooledSD = A * (n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2)),
smd_controlSD = A * (1/n1 + (sd2^2 / sd1^2) / n2 + d^2 / (2 * n1)),
smd_aveSD = A * (d^2 / (8 * mse^2) * (sd1^4 / (n1 - 1) + sd2^4 / (n2 - 1)) + (sd1^2 / mse) / (n1 - 1) + (sd2^2 / mse) / (n2 - 1))
)
if (method != "smd_pooledSD" & R2_cov != 0) {
warning(paste0("Covariate-adjusted 'se_var' is only approximately accurate when `method` is ", method, "."))
}
# Small-sample bias correction
if (unbiased) {
J <- .cmicalc(df)
} else {
J <- 1
}
# Compute confidence interval
if (conf.int) {
conf_method <- match.arg(conf_method, choices = c("t", "norm"))
# TODO: Add alternate ci methods from Bonett 2008
if (method != "smd_pooledSD" & conf_method == "t") {
# TODO: Derive ncp-based CIs for glass and bonnett d
conf_method <- "norm"
message(paste0("`conf_method` == 't' not available when `method` == ", method, ".\n`conf_method` == 'norm' used instead."))
}
if (conf_method == "norm") {
limits <- d + c(-1, 1) * qnorm((1 - conf.level) / 2, lower.tail = FALSE) * sqrt(se_var)
} else {
if (is.null(t_ncp)) {
if (!is.null(se_diff)) {
t_ncp <- diff / se_diff
} else {
# Approximate NCP if neither `t_ncp` nor `se_diff` are given
n_eff <- (n1 * n2)/((n1 + n2) * A)
t_ncp <- d * sqrt(n_eff)
}
}
# TODO: Implement conf.limits.nct natively
limits <- unlist(MBESS::conf.limits.nct(t_ncp, df, conf.level, ...)[c("Lower.Limit", "Upper.Limit")])
if (!is.null(se_diff)) {
limits <- limits * se_diff / sqrt(mse)
} else {
limits <- limits / sqrt(n_eff)
}
}
names(limits) <- c("lower", "upper")
}
# Correct for small-sample bias
d <- d * J
se_var <- se_var * J^2
se <- sqrt(se_var)
if (conf.int) {
limits <- limits * J
out <- list(es = d, df = df, se = se, se_var = se_var, ci.lb = limits['lower'], ci.ub = limits['upper'])
} else {
out <- list(es = d, df = df, se = se, se_var = se_var)
}
out <- lapply(out, unname)
class(out) <- c("es_smd", "es", "list")
attr(out, "es") <- "SMD"
attr(out, "method") <- method
attr(out, "unbiased") <- unbiased
attr(out, "conf.level") <- conf.level
return(out)
}
print.es <- function(x, digits = 3, ...) {
if (attr(x, "es") == "SMD") {
if (attr(x, "method") == "smd_pooledSD") {
if (attr(x, "unbiased") == TRUE) {
cat("Standardized mean difference (Cohen's d)\n")
} else {
cat("Standardized mean difference (Hedges' g)\n")
}
cat("========================================")
} else if (attr(x, "method") == "smd_controlSD") {
if (attr(x, "unbiased") == TRUE) {
cat("Standardized mean difference (Glass' \u0394 [unbiased])\n")
cat("=======================================================")
} else {
cat("Standardized mean difference (Glass' \u0394)\n")
cat("============================================")
}
} else {
if (attr(x, "unbiased") == TRUE) {
cat("Standardized mean difference (Bonett's d [unbiased])\n")
cat("=======================================================")
} else {
cat("Standardized mean difference (Bonett's d)\n")
cat("============================================")
}
}
cat("\n\n")
df <- as.data.frame(x)
print.data.frame(df, digits = digits, row.names = FALSE)
}
invisible(x)
}
mod <- lm(mpg ~ factor(cyl, levels = c(4, 6, 8)) + hp, data = mtcars)
r2_cov <- summary(lm(mpg ~ hp, data = mtcars))$r.squared
print.es(
smd_stats(diff = summary(mod)$coef[2,1],
se_diff = summary(mod)$coef[2,2],
t_ncp = summary(mod)$coef[2,3],
df = mod$df.residual,
sigma = summary(mod)$sigma,
R2_cov = r2_cov,
n1 = table(model.frame(mod)[,2])[1],
n2 = table(model.frame(mod)[,2])[2],
conf.int = TRUE
)
)
# TODO: Add smd.lm() method to automatically extract factors and compute d values.
# Formula to use with lm output
# d_adj <- t * se_b / sigma * sqrt(1 - R2_cov)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment