JimGrange · October 18, 2024 06:00
diff --git a/sim_linear.r b/sim_linear.r
 library(tidyverse)
 library(faux)
 library(lme4)
 library(afex)
 options(dplyr.summarise.inform = FALSE)
 set.seed(123)


 sim_data <- function(n_subj, n_trials){
  
  # set all data-generating parameters
  beta_0  <- 800 # intercept; i.e., the grand mean
  beta_1  <-  50 # slope; i.e, effect of category
  omega_0 <-  80 # by-item random intercept sd
  tau_0   <- 100 # by-subject random intercept sd
  tau_1   <-  40 # by-subject random slope sd
  rho     <-  .2 # correlation between intercept and slope
  sigma   <- 200 # residual (error) sd
  
  n_subj <- n_subj
  n_trials <- n_trials
  
  
  items <- tibble(
    trial = seq_len(n_trials),
    condition = rep(c("A", "B"), c(n_trials / 2, n_trials / 2)),
    X_i = rep(c(-0.5, 0.5), c(n_trials / 2, n_trials / 2)),
    O_0i = rnorm(n = n_trials, mean = 0, sd = omega_0)
  )
  
  # simulate a sample of subjects
  
  # sample from a multivariate random distribution 
  subjects <- faux::rnorm_multi(
    n = n_subj, 
    mu = 0, # means for random effects are always 0
    sd = c(tau_0, tau_1), # set SDs
    r = rho, # set correlation, see ?rnorm_multi
    varnames = c("T_0s", "T_1s")
  ) |> 
    round(2)
  
  # add subject IDs
  subjects$subj_id <- seq_len(n_subj)
  
  # cross subject and item IDs; add an error term
  # nrow(.) is the number of rows in the table
  trials <- crossing(subjects, items)  %>%
    mutate(e_si = rnorm(nrow(.), mean = 0, sd = sigma))
  
  dat_sim <- trials %>%
    mutate(RT = beta_0 + T_0s + (beta_1 + T_1s) * X_i + e_si) %>%
    select(subj_id, trial, condition, X_i, RT)
  
  dat_sim <- dat_sim |> 
    select(id = subj_id, trial, condition, condition_coded = X_i, rt = RT)
  
  
  # model & check significance of aggregate data
  agg_data <- dat_sim |> 
    group_by(id, condition_coded) |> 
    summarise(mean_rt = mean(rt)) 
  agg_data_model <- lm(mean_rt ~ 1 + condition_coded, data = agg_data)
  p_value_agg <-  as.numeric(summary(agg_data_model)$coefficients[, "Pr(>|t|)"][2])
  
  
  # model & check significance of trial-level data
  trial_data_model <- lmer(rt ~ 1 + condition_coded + (1 + condition|id), 
                           data = dat_sim)
  p_value_trial <- as.numeric(summary(trial_data_model)$coefficients[, "Pr(>|t|)"][2])
  
  return(round(c(p_value_agg, p_value_trial), 3))
 }

 n_sims <- 100
 sims <- 1:n_sims
 n_subj <- c(10, 20, 30, 50, 100)
 n_trials <- c(100, 250, 1000)

 sim_results <- tibble(crossing(n_subj, n_trials, sims)) |> 
  mutate(p_agg = 0, p_lmm = 0)

 for(i in 1:nrow(sim_results)){
  print(i)
  sim <- sim_data(sim_results$n_subj[i], 
                  sim_results$n_trials[i])
  sim_results$p_agg[i] <- sim[1]
  sim_results$p_lmm[i] <- sim[2]
 }

 pd = position_dodge(3)

 sim_results |> 
  group_by(n_subj, n_trials) |> 
  summarise(power_agg = (sum(p_agg < 0.05) / n_sims), 
            power_lmm = (sum(p_lmm < 0.05) / n_sims)) |> 
  mutate(n_trials = as.factor(n_trials)) |> 
  pivot_longer(power_agg:power_lmm) |> 
  ggplot(aes(x = n_subj, y = value, group = n_trials)) +
  geom_hline(yintercept = 0.8, 
            colour = "grey", 
            linewidth = 1.2) + 
    geom_line(aes(colour = n_trials), 
              position = pd, 
              linewidth = 1) + 
    geom_point(aes(colour = n_trials), 
               position = pd, 
               size = 2.5) + 
  scale_x_continuous(breaks = c(10, 20, 30, 50, 100)) +
  facet_wrap(~name) + 
  labs(x = "Number of Subjects", 
       y = "Power") + 
  theme(text = element_text(size = 16))
	library(tidyverse)
	library(faux)
	library(lme4)
	library(afex)
	options(dplyr.summarise.inform = FALSE)
	set.seed(123)


	sim_data <- function(n_subj, n_trials){

	# set all data-generating parameters
	beta_0 <- 800 # intercept; i.e., the grand mean
	beta_1 <- 50 # slope; i.e, effect of category
	omega_0 <- 80 # by-item random intercept sd
	tau_0 <- 100 # by-subject random intercept sd
	tau_1 <- 40 # by-subject random slope sd
	rho <- .2 # correlation between intercept and slope
	sigma <- 200 # residual (error) sd

	n_subj <- n_subj
	n_trials <- n_trials


	items <- tibble(
	trial = seq_len(n_trials),
	condition = rep(c("A", "B"), c(n_trials / 2, n_trials / 2)),
	X_i = rep(c(-0.5, 0.5), c(n_trials / 2, n_trials / 2)),
	O_0i = rnorm(n = n_trials, mean = 0, sd = omega_0)
	)

	# simulate a sample of subjects

	# sample from a multivariate random distribution
	subjects <- faux::rnorm_multi(
	n = n_subj,
	mu = 0, # means for random effects are always 0
	sd = c(tau_0, tau_1), # set SDs
	r = rho, # set correlation, see ?rnorm_multi
	varnames = c("T_0s", "T_1s")
	) \|>
	round(2)

	# add subject IDs
	subjects$subj_id <- seq_len(n_subj)

	# cross subject and item IDs; add an error term
	# nrow(.) is the number of rows in the table
	trials <- crossing(subjects, items) %>%
	mutate(e_si = rnorm(nrow(.), mean = 0, sd = sigma))

	dat_sim <- trials %>%
	mutate(RT = beta_0 + T_0s + (beta_1 + T_1s) * X_i + e_si) %>%
	select(subj_id, trial, condition, X_i, RT)

	dat_sim <- dat_sim \|>
	select(id = subj_id, trial, condition, condition_coded = X_i, rt = RT)


	# model & check significance of aggregate data
	agg_data <- dat_sim \|>
	group_by(id, condition_coded) \|>
	summarise(mean_rt = mean(rt))
	agg_data_model <- lm(mean_rt ~ 1 + condition_coded, data = agg_data)
	p_value_agg <- as.numeric(summary(agg_data_model)$coefficients[, "Pr(>\|t\|)"][2])


	# model & check significance of trial-level data
	trial_data_model <- lmer(rt ~ 1 + condition_coded + (1 + condition\|id),
	data = dat_sim)
	p_value_trial <- as.numeric(summary(trial_data_model)$coefficients[, "Pr(>\|t\|)"][2])

	return(round(c(p_value_agg, p_value_trial), 3))
	}

	n_sims <- 100
	sims <- 1:n_sims
	n_subj <- c(10, 20, 30, 50, 100)
	n_trials <- c(100, 250, 1000)

	sim_results <- tibble(crossing(n_subj, n_trials, sims)) \|>
	mutate(p_agg = 0, p_lmm = 0)

	for(i in 1:nrow(sim_results)){
	print(i)
	sim <- sim_data(sim_results$n_subj[i],
	sim_results$n_trials[i])
	sim_results$p_agg[i] <- sim[1]
	sim_results$p_lmm[i] <- sim[2]
	}

	pd = position_dodge(3)

	sim_results \|>
	group_by(n_subj, n_trials) \|>
	summarise(power_agg = (sum(p_agg < 0.05) / n_sims),
	power_lmm = (sum(p_lmm < 0.05) / n_sims)) \|>
	mutate(n_trials = as.factor(n_trials)) \|>
	pivot_longer(power_agg:power_lmm) \|>
	ggplot(aes(x = n_subj, y = value, group = n_trials)) +
	geom_hline(yintercept = 0.8,
	colour = "grey",
	linewidth = 1.2) +
	geom_line(aes(colour = n_trials),
	position = pd,
	linewidth = 1) +
	geom_point(aes(colour = n_trials),
	position = pd,
	size = 2.5) +
	scale_x_continuous(breaks = c(10, 20, 30, 50, 100)) +
	facet_wrap(~name) +
	labs(x = "Number of Subjects",
	y = "Power") +
	theme(text = element_text(size = 16))