Created
November 22, 2024 01:00
-
-
Save paulgp/6d8a0c452bc8a0da7f4c054314d7e59e to your computer and use it in GitHub Desktop.
Simulations that follow Abadie et al.'s paper "When Should You Adjust Standard Errors for Clustering?"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set random seed for reproducibility | |
set.seed(123) | |
# Simulation parameters | |
n_clusters <- 100 # Number of clusters | |
cluster_size <- 100 # Units per cluster | |
n <- n_clusters * cluster_size # Total sample size | |
rho <- 0.5 # Within-cluster error correlation | |
iterations <- 1000 # Number of simulation iterations | |
# Function to generate clustered data with heterogeneous treatment effects | |
generate_data <- function() { | |
# Generate cluster IDs | |
cluster_id <- rep(1:n_clusters, each = cluster_size) | |
# Generate correlated errors within clusters | |
cluster_effect <- rep(rnorm(n_clusters, sd = sqrt(rho)), each = cluster_size) | |
individual_effect <- rnorm(n, sd = sqrt(1 - rho)) | |
error <- cluster_effect + individual_effect | |
# Random treatment assignment at individual level | |
treatment <- rbinom(n, 1, 0.5) | |
# Generate heterogeneous treatment effects | |
# Treatment effect varies by both cluster and individual | |
cluster_te <- rep(rnorm(n_clusters, mean = .1, sd = 0.5), each = cluster_size) | |
individual_te <- rnorm(n, mean = .1, sd = 0.5) | |
treatment_effect <- cluster_te + individual_te | |
# Outcome with heterogeneous treatment effects | |
y <- treatment * treatment_effect + error | |
data.frame( | |
y = y, | |
treatment = treatment, | |
cluster = cluster_id, | |
true_effect = treatment_effect | |
) | |
} | |
# Function to estimate model and return different standard errors | |
estimate_model <- function(data) { | |
# Fit the model | |
model <- lm(y ~ treatment, data = data) | |
# Get robust (HC2) standard errors | |
robust_se <- sqrt(diag(sandwich::vcovHC(model, type = "HC2")))[2] | |
# Get clustered standard errors | |
clustered_se <- sqrt(diag(sandwich::vcovCL(model, cluster = data$cluster)))[2] | |
# Return coefficient and both SEs | |
c( | |
coef = coef(model)[2], | |
robust_se = robust_se, | |
clustered_se = clustered_se | |
) | |
} | |
# Run simulation | |
results <- replicate(iterations, { | |
data <- generate_data() | |
# Calculate true average treatment effect for this sample | |
true_ate <- mean(data$true_effect) | |
c(estimate_model(data), true_ate = true_ate) | |
}) | |
# Calculate coverage rates | |
alpha <- 0.05 | |
coverage_robust <- mean(abs((results[1, ] - results[4, ]) / results[2, ]) < qnorm(1 - alpha / 2)) | |
coverage_clustered <- mean(abs((results[1, ] - results[4, ]) / results[3, ]) < qnorm(1 - alpha / 2)) | |
# Print results | |
cat("\nResults from", iterations, "iterations:\n") | |
cat("\nAverage estimated treatment effect:", mean(results[1, ])) | |
cat("\nAverage true treatment effect:", mean(results[4, ])) | |
cat("\n\nAverage SEs:\n") | |
cat("Robust SE: ", mean(results[2, ]), "\n") | |
cat("Clustered SE: ", mean(results[3, ]), "\n") | |
cat("\nCoverage rates for 95% confidence intervals:\n") | |
cat("Robust SE: ", round(coverage_robust * 100, 1), "%\n") | |
cat("Clustered SE: ", round(coverage_clustered * 100, 1), "%\n") | |
# Calculate ratio of clustered to robust SEs | |
se_ratio <- results[3, ] / results[2, ] | |
cat("\nAverage ratio of Clustered to Robust SEs:", mean(se_ratio), "\n") | |
# Calculate empirical standard deviation of estimates | |
cat("\nEmpirical SD of estimates:", sd(results[1, ]), "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment