Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Created March 5, 2021 00:23
Show Gist options
  • Save BioSciEconomist/7b0a3628cf70568fe158e140016cad8f to your computer and use it in GitHub Desktop.
Save BioSciEconomist/7b0a3628cf70568fe158e140016cad8f to your computer and use it in GitHub Desktop.
Example comparing logistic regression to linear probability model in regression discontinuity model
# *-----------------------------------------------------------------
# | PROGRAM NAME:ex RDD logistic.R
# | DATE: 3/4/21
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: RDD with binary outcomes
# *----------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(rdd)
options("scipen" =100, "digits" = 4) # override R's tendency to use scientific notation
#
# marginal effects function for logistic regression
#
mfx <- function(x){
pdf <- mean(dlogis(predict(x, type = "link")))
marginal.effects <- pdf*coef(x)
print("Marginal Effects")
return(marginal.effects)
}
# mfx(mylogit) # results
#------------------------------------
# simulate data for logistic regression
#------------------------------------
set.seed(567)
x <-runif(10000,1,100) # running varaible with cutoff
D <- ifelse(x > 50,1,0)
xb <- -1 + .50*D + .05*x
p <- 1/(1 + exp(-xb))
y <- rbinom(n = 10000, size = 1, prob = p)
dat <- data.frame(x,D,p,y)
summary(dat)
# write.csv(dat, file = "/Users/mattbogard/Google Drive/Python Scripts/logit_RDD.csv", row.names = FALSE)
# visualize
ggplot(dat,aes(x,p,color = D)) + geom_point() # latent probabilities
ggplot(dat,aes(x,y,color = D)) + geom_point() # outcome
#--------------------------------------------
# RDD estimate with optimal kernal bw
#-------------------------------------------
summary(RDestimate(y ~ x, dat, subset = NULL, cutpoint = 50, bw = NULL,
kernel = "triangular", se.type = "HC1", cluster = NULL,
verbose = FALSE, model = FALSE, frame = FALSE))
#-------------------------------------------------
# logistic regression and LPM on full data
#------------------------------------------------
mod <- glm(y ~ D + x, family = "binomial", data = dat)
summary(mod)
mfx(mod) # marginal effects
# just kicking the tires
dat$preds <- predict(mod, type = "response")
ggplot(dat,aes(x,preds,color = D)) + geom_point() # predicted probabilities
summary(lm(y ~ D + x, data = dat)) # linear probability model
#-------------------------------------------------
# logistic regression and LPM on restricted bandwidth (no kernel weighting)
#------------------------------------------------
cutoff <- 50
bw <- 2.29
tmp <- dat[dat$x > (cutoff - bw) & dat$x < (cutoff + bw),]
mod <- glm(y ~ D + x, family = "binomial", data = tmp)
summary(mod)
mfx(mod) # marginal effects
summary(lm(y ~ D + x, data = tmp)) # linear probability model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment