Created
June 1, 2021 14:27
-
-
Save BioSciEconomist/b8051e3dd48a724fe5b5eecb4128b708 to your computer and use it in GitHub Desktop.
Simulating logistic regression marginal effects and divide by 4 rule
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex marginal effects and divide by 4.R | |
# | DATE: 6/1/21 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: example calculations of marginal effects and divide by 4 rule | |
# *---------------------------------------------------------------- | |
# see also: http://econometricsense.blogspot.com/2016/05/divide-by-4-rule-for-marginal-effects.html | |
# https://stats.stackexchange.com/questions/415928/does-the-divide-by-4-rule-give-the-upper-bound-marginal-effect | |
# https://www.polyu.edu.hk/cbs/sjpolit/logisticregression.html | |
# https://data.library.virginia.edu/simulating-a-logistic-regression-model/ | |
#-------------------------- | |
# step by step simulation of data for logistic regression | |
#------------------------- | |
set.seed(1) | |
gender <- sample(c(0,1), size = 1000, replace = TRUE) # let gender (male = 1) be our treatment variable for calculating marginal effects | |
age <- round(runif(1000, 18, 80)) | |
xb <- -9 + 3.5*gender + 0.2*age # linear combination function | |
# probabilities from logistic function | |
p <- 1/(1 + exp(-xb)) | |
summary(p) | |
# our outcome based on assumptions and simulated data above | |
y <- rbinom(n = 1000, size = 1, prob = p) | |
summary(y) | |
# logistic regression model | |
mod <- glm(y ~ gender + age, family = "binomial") | |
summary(mod) | |
#------------------------------------ | |
# simulating data for logistic regresssion and marginal effects | |
#------------------------------------- | |
# all code at once | |
set.seed(1) | |
gender <- sample(c(0,1), size = 1000, replace = TRUE) | |
age <- round(runif(1000, 18, 80)) | |
xb <- -9 + 3.5*gender + 0.2*age | |
p <- 1/(1 + exp(-xb)) | |
y <- rbinom(n = 1000, size = 1, prob = p) | |
mod <- glm(y ~ gender + age, family = "binomial") | |
summary(mod) | |
mylogit <- glm(y ~ gender + age,family=binomial(link='logit')) | |
summary(mylogit) | |
#---------------------------------------- | |
# calculating marginal effects | |
#--------------------------------------- | |
# linear probability model | |
summary(lm(y ~ gender + age)) | |
# function for calculating marginal effects | |
mfx <- function(x){ | |
pdf <- mean(dlogis(predict(x, type = "link"))) | |
marginal.effects <- pdf*coef(x) | |
print("Marginal Effects") | |
return(marginal.effects) | |
} | |
mfx(mylogit) # results | |
# crude estimate of marginal effects at the mean | |
### averaging the meff for the whole data set | |
dat <- data.frame(y,gender,age) # create data frame | |
b1 <- 3.16 # from coefficient of interest from logistic regression model | |
b2 <- .1843 # coefficient on age | |
b0 <- -8.1954 # intercept from logistic regression model | |
dat$XB <- dat$gender*b1 + dat$age*b2 + b0 | |
meffx <- (exp(dat$XB)/((1+exp(dat$XB))^2))*b1 | |
summary(meffx) # get mean | |
### !!!!! Notice that the maximum value above .789 corresponds to 3.16/4 = .79 DIVIDE BY FOUR RULE in action | |
### !!!!! this illustrates that the divide by 4 rule provides the upper bound of estimated individual marginal | |
### !!!!! effects in the data set | |
# other | |
(OR <- exp(3.16)) # this gives us an odds ratio | |
(log(OR)) # this gets back the OR | |
# so | |
log(OR)/4 # upper bound on marginal effects |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment