Created
December 6, 2020 13:42
-
-
Save bwiernik/c96f7047cacb833765ad9fe1ef9718c2 to your computer and use it in GitHub Desktop.
R functions to compute adjusted SMD (Cohen's d) from a regression or ANCOVA model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.cmicalc <- function (df) { | |
cmi <- ifelse(df <= 1, NA, | |
exp(lgamma(df/2) - log(sqrt(df/2)) - lgamma((df - 1)/2)) | |
) | |
return(cmi) | |
} | |
smd_stats <- | |
function(diff = NULL, | |
se_diff = NULL, | |
df = NULL, | |
m1 = NULL, m2 = NULL, | |
n1 = NULL, n2 = NULL, | |
sd1 = NULL, sd2 = NULL, | |
sigma = NULL, | |
mse = NULL, | |
t_ncp = NULL, | |
R2_cov = 0, | |
q = 0, | |
method = c("smd_pooledSD", "smd_controlSD", "smd_aveSD"), | |
unbiased = FALSE, | |
conf.int = TRUE, | |
conf.level = 0.95, | |
conf_method = c("t", "norm"), | |
sd_method = c("unbiased", "ml"), | |
std_method = c("unbiased", "ml"), | |
se_var_method = c("ls", "unbiased"), | |
...) { | |
method <- match.arg(method, choices = c("smd_pooledSD", "smd_controlSD", "smd_aveSD")) | |
# Compute degrees of freedom | |
if (is.null(df)) { | |
if (is.null(n1) & is.null(n2)) { | |
stop("One of these must be provided:\n (1) `df`\n (2) `n1`, `n2`, `q`") | |
} else { | |
df <- n1 + n2 - 2 - q | |
} | |
} | |
# TODO: Add Satterwhite degrees of freedom | |
# Compute standardizer | |
if (all(is.null(mse), | |
is.null(sigma), | |
any(is.null(sd1), | |
is.null(sd2), | |
is.null(n1), | |
is.null(n2))) | |
) { | |
stop("One of these must be provided:\n (1) `sigma`\n (2)`mse`\n (3) `sd1`, `sd2`, `n1`, `n2`.") | |
} else if (is.null(mse)) { | |
if (!is.null(sigma)) { | |
mse <- sigma^2 | |
} else { | |
sd_method <- match.arg(sd_method, choices = c("unbiased", "ml")) | |
std_method <- match.arg(std_method, choices = c("unbiased", "ml")) | |
if (method == "smd_pooledSD") { | |
if (sd_method == "unbiased") { | |
ssq1 <- (n1 - 1) * sd1^2 | |
ssq2 <- (n2 - 1) * sd2^2 | |
} else { | |
ssq1 <- n1 * sd1^2 | |
ssq2 <- n2 * sd2^2 | |
} | |
ssq <- ssq2 + ssq2 | |
mse <- | |
ifelse(std_method == "unbiased", | |
ssq / df, | |
ssq / (n1 + n2) | |
) | |
} else if (method == "smd_controlSD") { | |
if (sd_method == "unbiased") { | |
ssq <- (n1 - 1) * sd1^2 | |
} else { | |
ssq <- n1 * sd1^2 | |
} | |
mse <- | |
ifelse(std_method == "unbiased", | |
ssq / df, | |
ssq / (n1 + n2) | |
) | |
} else { | |
if (sd_method == "unbiased") { | |
mse <- | |
ifelse(std_method == "unbiased", | |
(sd1^2 + sd2^2) / 2, | |
(sd1^2 * (n1 - 1) / n1 + sd2^2 * (n2 - 1) / n2) / 2 | |
) | |
} else { | |
mse <- | |
ifelse(std_method == "unbiased", | |
(sd1^2 * n1 / (n1 - 1) + sd2^2 * n2 / (n2 - 1)) / 2, | |
(sd1^2 + sd2^2) / 2 | |
) | |
} | |
} | |
} | |
} | |
# TODO: Implement large-sample vs unbiased sampling error variance | |
if (is.null(diff)) { | |
if (is.null(m1) & is.null(m2)) { | |
stop("One of these must be provided:\n (1) `diff`\n (2) `m1`, `m2`") | |
} else { | |
diff <- m2 - m1 | |
} | |
} | |
# Adjust standardizer for covariates | |
A <- (1 - R2_cov) | |
mse <- mse / A | |
# Compute d | |
d <- diff / sqrt(mse) | |
# Compute variance of d | |
se_var <- | |
switch(method, | |
smd_pooledSD = A * (n1 + n2) / (n1 * n2) + d^2 / (2 * (n1 + n2)), | |
smd_controlSD = A * (1/n1 + (sd2^2 / sd1^2) / n2 + d^2 / (2 * n1)), | |
smd_aveSD = A * (d^2 / (8 * mse^2) * (sd1^4 / (n1 - 1) + sd2^4 / (n2 - 1)) + (sd1^2 / mse) / (n1 - 1) + (sd2^2 / mse) / (n2 - 1)) | |
) | |
if (method != "smd_pooledSD" & R2_cov != 0) { | |
warning(paste0("Covariate-adjusted 'se_var' is only approximately accurate when `method` is ", method, ".")) | |
} | |
# Small-sample bias correction | |
if (unbiased) { | |
J <- .cmicalc(df) | |
} else { | |
J <- 1 | |
} | |
# Compute confidence interval | |
if (conf.int) { | |
conf_method <- match.arg(conf_method, choices = c("t", "norm")) | |
# TODO: Add alternate ci methods from Bonett 2008 | |
if (method != "smd_pooledSD" & conf_method == "t") { | |
# TODO: Derive ncp-based CIs for glass and bonnett d | |
conf_method <- "norm" | |
message(paste0("`conf_method` == 't' not available when `method` == ", method, ".\n`conf_method` == 'norm' used instead.")) | |
} | |
if (conf_method == "norm") { | |
limits <- d + c(-1, 1) * qnorm((1 - conf.level) / 2, lower.tail = FALSE) * sqrt(se_var) | |
} else { | |
if (is.null(t_ncp)) { | |
if (!is.null(se_diff)) { | |
t_ncp <- diff / se_diff | |
} else { | |
# Approximate NCP if neither `t_ncp` nor `se_diff` are given | |
n_eff <- (n1 * n2)/((n1 + n2) * A) | |
t_ncp <- d * sqrt(n_eff) | |
} | |
} | |
# TODO: Implement conf.limits.nct natively | |
limits <- unlist(MBESS::conf.limits.nct(t_ncp, df, conf.level, ...)[c("Lower.Limit", "Upper.Limit")]) | |
if (!is.null(se_diff)) { | |
limits <- limits * se_diff / sqrt(mse) | |
} else { | |
limits <- limits / sqrt(n_eff) | |
} | |
} | |
names(limits) <- c("lower", "upper") | |
} | |
# Correct for small-sample bias | |
d <- d * J | |
se_var <- se_var * J^2 | |
se <- sqrt(se_var) | |
if (conf.int) { | |
limits <- limits * J | |
out <- list(es = d, df = df, se = se, se_var = se_var, ci.lb = limits['lower'], ci.ub = limits['upper']) | |
} else { | |
out <- list(es = d, df = df, se = se, se_var = se_var) | |
} | |
out <- lapply(out, unname) | |
class(out) <- c("es_smd", "es", "list") | |
attr(out, "es") <- "SMD" | |
attr(out, "method") <- method | |
attr(out, "unbiased") <- unbiased | |
attr(out, "conf.level") <- conf.level | |
return(out) | |
} | |
print.es <- function(x, digits = 3, ...) { | |
if (attr(x, "es") == "SMD") { | |
if (attr(x, "method") == "smd_pooledSD") { | |
if (attr(x, "unbiased") == TRUE) { | |
cat("Standardized mean difference (Cohen's d)\n") | |
} else { | |
cat("Standardized mean difference (Hedges' g)\n") | |
} | |
cat("========================================") | |
} else if (attr(x, "method") == "smd_controlSD") { | |
if (attr(x, "unbiased") == TRUE) { | |
cat("Standardized mean difference (Glass' \u0394 [unbiased])\n") | |
cat("=======================================================") | |
} else { | |
cat("Standardized mean difference (Glass' \u0394)\n") | |
cat("============================================") | |
} | |
} else { | |
if (attr(x, "unbiased") == TRUE) { | |
cat("Standardized mean difference (Bonett's d [unbiased])\n") | |
cat("=======================================================") | |
} else { | |
cat("Standardized mean difference (Bonett's d)\n") | |
cat("============================================") | |
} | |
} | |
cat("\n\n") | |
df <- as.data.frame(x) | |
print.data.frame(df, digits = digits, row.names = FALSE) | |
} | |
invisible(x) | |
} | |
mod <- lm(mpg ~ factor(cyl, levels = c(4, 6, 8)) + hp, data = mtcars) | |
r2_cov <- summary(lm(mpg ~ hp, data = mtcars))$r.squared | |
print.es( | |
smd_stats(diff = summary(mod)$coef[2,1], | |
se_diff = summary(mod)$coef[2,2], | |
t_ncp = summary(mod)$coef[2,3], | |
df = mod$df.residual, | |
sigma = summary(mod)$sigma, | |
R2_cov = r2_cov, | |
n1 = table(model.frame(mod)[,2])[1], | |
n2 = table(model.frame(mod)[,2])[2], | |
conf.int = TRUE | |
) | |
) | |
# TODO: Add smd.lm() method to automatically extract factors and compute d values. | |
# Formula to use with lm output | |
# d_adj <- t * se_b / sigma * sqrt(1 - R2_cov) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment