Created
March 5, 2021 00:23
-
-
Save BioSciEconomist/7b0a3628cf70568fe158e140016cad8f to your computer and use it in GitHub Desktop.
Example comparing logistic regression to linear probability model in regression discontinuity model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME:ex RDD logistic.R | |
# | DATE: 3/4/21 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: RDD with binary outcomes | |
# *---------------------------------------------------------------- | |
library(dplyr) | |
library(ggplot2) | |
library(rdd) | |
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation | |
# | |
# marginal effects function for logistic regression | |
# | |
mfx <- function(x){ | |
pdf <- mean(dlogis(predict(x, type = "link"))) | |
marginal.effects <- pdf*coef(x) | |
print("Marginal Effects") | |
return(marginal.effects) | |
} | |
# mfx(mylogit) # results | |
#------------------------------------ | |
# simulate data for logistic regression | |
#------------------------------------ | |
set.seed(567) | |
x <-runif(10000,1,100) # running varaible with cutoff | |
D <- ifelse(x > 50,1,0) | |
xb <- -1 + .50*D + .05*x | |
p <- 1/(1 + exp(-xb)) | |
y <- rbinom(n = 10000, size = 1, prob = p) | |
dat <- data.frame(x,D,p,y) | |
summary(dat) | |
# write.csv(dat, file = "/Users/mattbogard/Google Drive/Python Scripts/logit_RDD.csv", row.names = FALSE) | |
# visualize | |
ggplot(dat,aes(x,p,color = D)) + geom_point() # latent probabilities | |
ggplot(dat,aes(x,y,color = D)) + geom_point() # outcome | |
#-------------------------------------------- | |
# RDD estimate with optimal kernal bw | |
#------------------------------------------- | |
summary(RDestimate(y ~ x, dat, subset = NULL, cutpoint = 50, bw = NULL, | |
kernel = "triangular", se.type = "HC1", cluster = NULL, | |
verbose = FALSE, model = FALSE, frame = FALSE)) | |
#------------------------------------------------- | |
# logistic regression and LPM on full data | |
#------------------------------------------------ | |
mod <- glm(y ~ D + x, family = "binomial", data = dat) | |
summary(mod) | |
mfx(mod) # marginal effects | |
# just kicking the tires | |
dat$preds <- predict(mod, type = "response") | |
ggplot(dat,aes(x,preds,color = D)) + geom_point() # predicted probabilities | |
summary(lm(y ~ D + x, data = dat)) # linear probability model | |
#------------------------------------------------- | |
# logistic regression and LPM on restricted bandwidth (no kernel weighting) | |
#------------------------------------------------ | |
cutoff <- 50 | |
bw <- 2.29 | |
tmp <- dat[dat$x > (cutoff - bw) & dat$x < (cutoff + bw),] | |
mod <- glm(y ~ D + x, family = "binomial", data = tmp) | |
summary(mod) | |
mfx(mod) # marginal effects | |
summary(lm(y ~ D + x, data = tmp)) # linear probability model |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment