I want to simulate to demonstrate how it's possible that the hierarchical model can even have better power than getting exactly the right values.
Note: The original question and results are no longer applicable due to a few fatal flaws with my code. Below follows the correct code. But the oracle is optimal now...
library(tidyverse)
library(lme4)
library(lmerTest)
library(progress)
library(furrr)
set.seed(42)
true_mean <- 3
true_sd <- 1
true_sigma <- 0.6
true_dif <- 0.5
meas_per_indiv <- 5
simfunc <- function(true_mean, true_sd, true_sigma, true_dif, sample_size, meas_per_indiv) {
pb$tick()
dat <- tidyr::crossing(Subj = 1:sample_size,
Group = c("Group1", "Group2")) %>%
# Generate true values
mutate(trueval = rnorm(n(), true_mean, true_sd),
trueval = ifelse(Group=="Group2",
yes = trueval + true_dif,
no = trueval)) %>%
# Generate measured values around the true values
crossing(Meas = 1:meas_per_indiv) %>%
mutate(measval = rnorm(n(), trueval, sd=true_sigma)) %>%
# Make sure that the subjects are factors
mutate(Subj = paste0(Group, "_", Subj),
Subj = as.factor(Subj))
# Calculate summary data for individuals
indivdat <- dat %>%
group_by(Subj, Group) %>%
summarise(trueval = mean(trueval), # these are all the same value
measmean = mean(measval), # there are different values
.groups = "keep") %>%
ungroup()
true_t_test <- t.test(indivdat$trueval ~ indivdat$Group, var.equal=TRUE)
meas_t_test <- t.test(indivdat$measmean ~ indivdat$Group, var.equal=TRUE)
meas_allval <- lm(measval ~ Group + Subj, data=dat)
meas_lme <- lmer(measval ~ Group + (1 | Subj),
data=dat)
trueval_p <- true_t_test$p.value
measmean_p <- meas_t_test$p.value
allval_p <- summary(meas_allval)$coefficients[2,4]
lme_p <- summary(meas_lme)$coefficients[2,5]
out <- tibble::tibble(
true_p = trueval_p,
measmean_p = measmean_p,
allval_p = allval_p,
lme_p = lme_p,
)
return(out)
}
safely_simfunc <- safely(simfunc)
Let's just perform a sanity check with the function code
sample_size <- 10
dat <- tidyr::crossing(Subj = 1:sample_size,
Group = c("Group1", "Group2")) %>%
# Generate true values
mutate(trueval = rnorm(n(), true_mean, true_sd),
trueval = ifelse(Group=="Group2",
yes = trueval + true_dif,
no = trueval)) %>%
# Generate measured values around the true values
crossing(Meas = 1:meas_per_indiv) %>%
mutate(measval = rnorm(n(), trueval, sd=true_sigma)) %>%
# Make sure that the subjects are factors
mutate(Subj = paste0(Group, "_", Subj),
Subj = as.factor(Subj))
# Calculate summary data for individuals
indivdat <- dat %>%
group_by(Subj, Group) %>%
summarise(trueval = mean(trueval), # these are all the same value
measmean = mean(measval), # there are different values
.groups = "keep") %>%
ungroup()
true_t_test <- t.test(indivdat$trueval ~ indivdat$Group, var.equal=TRUE)
meas_t_test <- t.test(indivdat$measmean ~ indivdat$Group, var.equal=TRUE)
meas_allval <- lm(measval ~ Group + Subj, data=dat)
meas_lme <- lmer(measval ~ Group + (1 | Subj),
data=dat)
trueval_p <- true_t_test$p.value
true_t_test
##
## Two Sample t-test
##
## data: indivdat$trueval by indivdat$Group
## t = -1.0969, df = 18, p-value = 0.2871
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.2238057 0.3842405
## sample estimates:
## mean in group Group1 mean in group Group2
## 3.116625 3.536408
measmean_p <- meas_t_test$p.value
meas_t_test
##
## Two Sample t-test
##
## data: indivdat$measmean by indivdat$Group
## t = -1.0755, df = 18, p-value = 0.2964
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.3744837 0.4437371
## sample estimates:
## mean in group Group1 mean in group Group2
## 3.237863 3.703236
allval_p <- summary(meas_allval)$coefficients[2,4]
summary(meas_allval)
##
## Call:
## lm(formula = measval ~ Group + Subj, data = dat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.59909 -0.48665 -0.01459 0.41129 1.28871
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3984 0.3156 10.769 < 2e-16 ***
## GroupGroup2 -0.1601 0.4463 -0.359 0.720645
## SubjGroup1_10 0.7158 0.4463 1.604 0.112656
## SubjGroup1_2 0.3257 0.4463 0.730 0.467636
## SubjGroup1_3 0.1451 0.4463 0.325 0.745978
## SubjGroup1_4 0.3687 0.4463 0.826 0.411118
## SubjGroup1_5 -0.6389 0.4463 -1.432 0.156139
## SubjGroup1_6 -0.2801 0.4463 -0.628 0.531957
## SubjGroup1_7 0.5522 0.4463 1.237 0.219595
## SubjGroup1_8 -1.3054 0.4463 -2.925 0.004478 **
## SubjGroup1_9 -1.4879 0.4463 -3.334 0.001298 **
## SubjGroup2_1 1.2585 0.4463 2.820 0.006052 **
## SubjGroup2_10 0.8224 0.4463 1.843 0.069049 .
## SubjGroup2_2 -0.1559 0.4463 -0.349 0.727794
## SubjGroup2_3 -1.5692 0.4463 -3.516 0.000724 ***
## SubjGroup2_4 0.1765 0.4463 0.396 0.693472
## SubjGroup2_5 -0.6731 0.4463 -1.508 0.135435
## SubjGroup2_6 2.3737 0.4463 5.319 9.26e-07 ***
## SubjGroup2_7 1.2131 0.4463 2.718 0.008042 **
## SubjGroup2_8 1.2042 0.4463 2.698 0.008499 **
## SubjGroup2_9 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7056 on 80 degrees of freedom
## Multiple R-squared: 0.6924, Adjusted R-squared: 0.6194
## F-statistic: 9.479 on 19 and 80 DF, p-value: 1.418e-13
lme_p <- summary(meas_lme)$coefficients[2,5]
summary(meas_lme)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: measval ~ Group + (1 | Subj)
## Data: dat
##
## REML criterion at convergence: 257.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.30973 -0.68794 0.07297 0.64398 1.76834
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subj (Intercept) 0.8367 0.9147
## Residual 0.4979 0.7056
## Number of obs: 100, groups: Subj, 20
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.2379 0.3060 18.0000 10.582 3.71e-09 ***
## GroupGroup2 0.4654 0.4327 18.0000 1.075 0.296
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## GroupGroup2 -0.707
out <- tibble::tibble(
true_p = trueval_p,
measmean_p = measmean_p,
allval_p = allval_p,
lme_p = lme_p,
)
knitr::kable(out)
true_p | measmean_p | allval_p | lme_p |
---|---|---|---|
0.2871447 | 0.2963731 | 0.7206447 | 0.2963731 |
Looks good!
simulations <- tidyr::crossing(
sample_size = c(10, 20, 40, 80),
dif = c(0, 0.5),
simrep = 1:1000)
pb <- progress_bar$new(total = nrow(simulations))
# plan(multisession, workers = 6)
simulations <- simulations %>%
mutate(res = map2(sample_size, dif, ~safely_simfunc(true_mean = true_mean,
true_sd = true_sd,
true_sigma = true_sigma,
true_dif = .y,
sample_size = .x,
meas_per_indiv = meas_per_indiv)))
saveRDS(simulations, "power_clarification_sim.rds")
simulations <- readRDS("power_clarification_sim.rds")
simulations <- simulations %>%
mutate(failure = map_lgl(res, ~!is.null(.x$error)))
sum(simulations$failure)
## [1] 0
simulations <- simulations %>%
mutate(res = map(res, "result"))
simulations_res <- simulations %>%
unnest(res) %>%
group_by(sample_size, dif) %>%
summarise(true_power = mean(true_p < 0.05),
measmean_power = mean(measmean_p < 0.05),
allval_power = mean(allval_p < 0.05),
lme_power = mean(lme_p < 0.05)) %>%
arrange(dif, sample_size)
## `summarise()` has grouped output by 'sample_size'. You can override using the `.groups` argument.
knitr::kable(simulations_res)
sample_size | dif | true_power | measmean_power | allval_power | lme_power |
---|---|---|---|---|---|
10 | 0.0 | 0.044 | 0.050 | 0.600 | 0.050 |
20 | 0.0 | 0.053 | 0.052 | 0.595 | 0.052 |
40 | 0.0 | 0.052 | 0.066 | 0.596 | 0.066 |
80 | 0.0 | 0.054 | 0.055 | 0.616 | 0.055 |
10 | 0.5 | 0.181 | 0.186 | 0.595 | 0.186 |
20 | 0.5 | 0.323 | 0.304 | 0.620 | 0.304 |
40 | 0.5 | 0.605 | 0.580 | 0.610 | 0.580 |
80 | 0.5 | 0.880 | 0.855 | 0.630 | 0.855 |
simulations_res %>%
gather(Outcome, Value, -sample_size, -dif) %>%
filter(grepl("_power", Outcome)) %>%
mutate(Outcome = case_when(
Outcome=="lme_power" ~ "LME: All measured data",
Outcome=="allval_power" ~ "FE Regression: All measured data",
Outcome=="true_power" ~ "T-test: Mean true values",
Outcome=="measmean_power" ~ "T-test: Mean measured values",
)) %>%
mutate(Test = ifelse(dif == 0,
yes = "False Positives",
no = "Power")) %>%
mutate(Outcome = fct_inorder(Outcome)) %>%
ggplot(aes(x=sample_size, y=Value, colour=Outcome)) +
geom_point(alpha = 0.5) +
geom_line(alpha = 0.5) +
labs(x="Sample Size (Individuals)", y="Power",
title="Power for calculating group means",
subtitle=paste0("Different methods for calculating group means\n",
"when each individual is measured 5 times")) +
facet_wrap(~Test)
Was looking at this code in your post on datamethods.org. A few things:
Subj
needs to be treated as a factor in the call tolm()
, i.e.,meas_allval <- lm(measval ~ Group + factor(Subj), data=dat)
. Currently it's treatingSubj
like a continuous variable and estimating a single slope for it. When treating it as a factor, you get identical p-values to those fromlmer()
. In your post, you only talk about power, but you will also notice that the type I error rate is massively inflated for the models that includeSubj
as a fixed or random effect. So the real question isn't why those models have higher power, it's why those models have such inflated p-values. I don't exactly know the answer to that question.