library(tidyverse)
library(wakefield)
library(rdrobust)
library(rddensity)
library(broom)
library(huxtable)
# Make fake data
set.seed(1234)
df <- r_data_frame(
n = 1000,
id,
attendance = rbeta(shape1 = 7, shape2 = 2)
) %>%
mutate(attendance = rescale(attendance, to = c(20, 100))) %>%
mutate(treatment = attendance < 80) %>%
mutate(grade = (200 * treatment) + (20 * attendance) + rnorm(1000, 600, 100)) %>%
mutate(grade = rescale(grade, to = c(0, 100)))
# Is this sharp or fuzzy? (It's sharp because I made it that way)
ggplot(df, aes(x = attendance, y = treatment, color = treatment)) +
geom_point(size = 0.5, alpha = 0.5) +
guides(color = FALSE) +
labs(x = "% attendance", y = "Received treatment") +
theme_minimal()

# Distribution of running variable looks okay and discontinuity-free
rddensity(df$attendance, c = 80) %>% summary()
#>
#> RD Manipulation Test using local polynomial density estimation.
#>
#> Number of obs = 1000
#> Model = unrestricted
#> Kernel = triangular
#> BW method = comb
#> VCE method = jackknife
#>
#> Cutoff c = 80 Left of c Right of c
#> Number of obs 553 447
#> Eff. Number of obs 263 284
#> Order est. (p) 2 2
#> Order bias (q) 3 3
#> BW est. (h) 10.783 9.638
#>
#> Method T P > |T|
#> Robust 0.3687 0.7123
rdplotdensity(rddensity(df$attendance, c = 80), df$attendance)

#> $Estl
#> Call: lpdensity
#>
#> Sample size 553
#> Polynomial order for point estimation (p=) 2
#> Order of derivative estimated (v=) 1
#> Polynomial order for confidence interval (q=) 3
#> Kernel function triangular
#> Scaling factor 0.552552552552553
#> Bandwidth method user provided
#>
#> Use summary(...) to show estimates.
#>
#> $Estr
#> Call: lpdensity
#>
#> Sample size 447
#> Polynomial order for point estimation (p=) 2
#> Order of derivative estimated (v=) 1
#> Polynomial order for confidence interval (q=) 3
#> Kernel function triangular
#> Scaling factor 0.446446446446446
#> Bandwidth method user provided
#>
#> Use summary(...) to show estimates.
#>
#> $Estplot

# Plot outcome and running variable
ggplot(df, aes(x = attendance, y = grade, color = treatment)) +
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(data = filter(df, attendance < 80), method = "lm") +
geom_smooth(data = filter(df, attendance >= 80), method = "lm") +
geom_vline(xintercept = 80) +
guides(color = FALSE) +
labs(x = "% attendance", y = "% final grade") +
theme_minimal()

# Non-parametric RD with different kernels
rdrobust(df$grade, df$attendance, c = 80, kernel = "triangular") %>% summary() # Default
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type mserd
#> Kernel Triangular
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 131 155
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 5.033 5.033
#> BW bias (b) 8.541 8.541
#> rho (h/b) 0.589 0.589
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -13.644 1.402 -9.734 0.000 [-16.391 , -10.896]
#> Robust - - -8.783 0.000 [-17.321 , -11.001]
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, kernel = "epanechnikov") %>% summary()
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type mserd
#> Kernel Epanechnikov
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 137 159
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 5.247 5.247
#> BW bias (b) 8.801 8.801
#> rho (h/b) 0.596 0.596
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -13.307 1.370 -9.712 0.000 [-15.992 , -10.622]
#> Robust - - -8.577 0.000 [-16.829 , -10.568]
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, kernel = "uniform") %>% summary()
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type mserd
#> Kernel Uniform
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 178 206
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 6.915 6.915
#> BW bias (b) 11.902 11.902
#> rho (h/b) 0.581 0.581
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -12.252 1.187 -10.321 0.000 [-14.578 , -9.925]
#> Robust - - -8.866 0.000 [-15.186 , -9.688]
#> =============================================================================
# Check potential data-driven bandwith options
# This says +/- 5ish
rdbwselect(df$grade, df$attendance, c = 80) %>% summary()
#> Call: rdbwselect
#>
#> Number of Obs. 1000
#> BW type mserd
#> Kernel Triangular
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Order est. (p) 1 1
#> Order bias (q) 2 2
#>
#> =======================================================
#> BW est. (h) BW bias (b)
#> Left of c Right of c Left of c Right of c
#> =======================================================
#> mserd 5.033 5.033 8.541 8.541
#> =======================================================
# Check best bandwidth + half bandwidth + twice bandwidth
rdrobust(df$grade, df$attendance, c = 80, h = 5.03) %>% summary()
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type Manual
#> Kernel Triangular
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 131 155
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 5.030 5.030
#> BW bias (b) 5.030 5.030
#> rho (h/b) 1.000 1.000
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -13.645 1.402 -9.732 0.000 [-16.393 , -10.897]
#> Robust - - -8.123 0.000 [-18.606 , -11.372]
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, h = 5.03 / 2) %>% summary()
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type Manual
#> Kernel Triangular
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 61 78
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 2.515 2.515
#> BW bias (b) 2.515 2.515
#> rho (h/b) 1.000 1.000
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -15.155 1.744 -8.692 0.000 [-18.572 , -11.738]
#> Robust - - -9.484 0.000 [-21.080 , -13.859]
#> =============================================================================
rdrobust(df$grade, df$attendance, c = 80, h = 5.03 * 2) %>% summary()
#> Call: rdrobust
#>
#> Number of Obs. 1000
#> BW type Manual
#> Kernel Triangular
#> VCE method NN
#>
#> Number of Obs. 553 447
#> Eff. Number of Obs. 251 293
#> Order est. (p) 1 1
#> Order bias (p) 2 2
#> BW est. (h) 10.060 10.060
#> BW bias (b) 10.060 10.060
#> rho (h/b) 1.000 1.000
#>
#> =============================================================================
#> Method Coef. Std. Err. z P>|z| [ 95% C.I. ]
#> =============================================================================
#> Conventional -12.432 1.062 -11.705 0.000 [-14.514 , -10.351]
#> Robust - - -8.752 0.000 [-15.819 , -10.030]
#> =============================================================================
# Plot this (though not with ggplot ::sadface::)
rdplot(df$grade, df$attendance, c = 80)

# Parametric estimation
# If we center the variable it's easier to interpret the attendance coefficient
df <- df %>%
mutate(attendance_centered = attendance - 80)
# Regular regression
model1 <- lm(grade ~ attendance_centered + treatment, data = df)
# Narrower bandwidth with regular regression
model2 <- lm(grade ~ attendance + treatment,
data = filter(df, attendance_centered < 5, attendance_centered > -5))
# Polynomial regression
model3 <- lm(grade ~ attendance_centered + I(attendance_centered^2) + treatment, data = df)
model4 <- lm(grade ~ attendance_centered + I(attendance_centered^2) +
I(attendance_centered^3) + treatment, data = df)
# All together
huxreg(model1, model2, model3, model4)
─────────────────────────────────────────────────────────────────────────────────────
(1) (2) (3) (4)
────────────────────────────────────────────────────────────────
(Intercept) 65.839 *** -48.349 * 65.822 *** 66.023 ***
(0.324) (19.146) (0.438) (0.452)
attendance_centere 1.163 *** 1.165 *** 1.174 ***
d
(0.020) (0.035) (0.036)
treatmentTRUE 11.574 *** 12.781 *** 11.597 *** 11.726 ***
(0.589) (1.342) (0.725) (0.728)
attendance 1.421 ***
(0.232)
I(attendance_cente 0.000 -0.002
red^2)
(0.001) (0.002)
I(attendance_cente -0.000
red^3)
(0.000)
────────────────────────────────────────────────────────────────
N 1000 286 1000 1000
R2 0.823 0.279 0.823 0.824
logLik -3201.077 -897.957 -3201.075 -3199.508
AIC 6410.153 1803.915 6412.150 6411.016
─────────────────────────────────────────────────────────────────────────────────────
*** p < 0.001; ** p < 0.01; * p < 0.05.