Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Last active March 8, 2023 16:55
Show Gist options
  • Save BioSciEconomist/8cedac800119f515811bb76e86429f46 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/8cedac800119f515811bb76e86429f46 to your computer and use it in GitHub Desktop.
Example marginal effects for logistic regression
# *-----------------------------------------------------------------
# | PROGRAM NAME: ex marginal effects.R
# | DATE: 2/26/21
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: example calculations of marginal effects
# *----------------------------------------------------------------
#------------------------------------
# simulating data for logistic regresssion and marginal effects
#-------------------------------------
# all code at once
set.seed(1)
gender <- sample(c(0,1), size = 10000, replace = TRUE)
age <- round(runif(10000, 18, 80))
xb <- -9 + 3.5*gender + 0.2*age
p <- 1/(1 + exp(-xb))
y <- rbinom(n = 10000, size = 1, prob = p)
mod <- glm(y ~ gender + age, family = "binomial")
summary(mod)
dat <- data.frame(y,gender,age)
#--------------------------------------------
# model of interest
#--------------------------------------------
mylogit <- glm(y ~ gender + age,family=binomial(link='logit'),data = dat)
summary(mylogit)
#-------------------------------
# linear probability model
#-------------------------------
summary(lm(y ~ gender + age, data = dat))
# notice in all the examples below the ME from the lpm above
# are nearly identical to the ME derived from logistic regression
#--------------------------------------------------
# function for calculating marginal effects
#--------------------------------------------------
# simplified based on: Alan Fernihough - https://econpapers.repec.org/paper/ucnwpaper/201122.htm
mfx <- function(x){
pdf <- mean(dlogis(predict(x, type = "link")))
marginal.effects <- pdf*coef(x)
print("Marginal Effects")
return(marginal.effects)
}
mfx(mylogit) # results
#----------------------------------------------------
# mfx with bootsrapped SEs
#----------------------------------------------------
# (ref: https://diffuseprior.wordpress.com/2012/04/23/probitlogit-marginal-effects-in-r-2/)
mfxboot <- function(modform,dist,data,boot=1000,digits=3){
x <- glm(modform, family=binomial(link=dist),data)
# get marginal effects
pdf <- ifelse(dist=="probit",
mean(dnorm(predict(x, type = "link"))),
mean(dlogis(predict(x, type = "link"))))
marginal.effects <- pdf*coef(x)
# start bootstrap
bootvals <- matrix(rep(NA,boot*length(coef(x))), nrow=boot)
set.seed(1111)
for(i in 1:boot){
samp1 <- data[sample(1:dim(data)[1],replace=T,dim(data)[1]),]
x1 <- glm(modform, family=binomial(link=dist),samp1)
pdf1 <- ifelse(dist=="probit",
mean(dnorm(predict(x, type = "link"))),
mean(dlogis(predict(x, type = "link"))))
bootvals[i,] <- pdf1*coef(x1)
}
res <- cbind(marginal.effects,apply(bootvals,2,sd),marginal.effects/apply(bootvals,2,sd))
if(names(x$coefficients[1])=="(Intercept)"){
res1 <- res[2:nrow(res),]
res2 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep=""),res1)),nrow=dim(res1)[1])
rownames(res2) <- rownames(res1)
} else {
res2 <- matrix(as.numeric(sprintf(paste("%.",paste(digits,"f",sep=""),sep="")),nrow=dim(res)[1]))
rownames(res2) <- rownames(res)
}
colnames(res2) <- c("marginal.effect","standard.error","z.ratio")
return(res2)
}
# calculate MEs
mfx2 <- mfxboot(y ~ gender + age,"logit",dat)
# extract ME and SE
mfxdat <- data.frame(cbind(rownames(mfx2),mfx2))
mfxdat$me <- as.numeric(as.character(mfxdat$marginal.effect))
mfxdat$se <- as.numeric(as.character(mfxdat$standard.error))
# report
(rpt <- mfxdat[c("V1","me","se")])
#---------------------------------------
# using mfx package by Alan Fernihough
#--------------------------------------
library(mfx)
logitmfx(y ~ gender + age, dat, atmean = TRUE, robust = FALSE, clustervar1 = NULL,
clustervar2 = NULL, start = NULL, control = list())
#----------------------------------------------
# using new R margins package
#----------------------------------------------
library(margins)
margins(mylogit)
summary(margins(mylogit))
#-------------------------------------------------
# crude stimate of marginal effects at the mean
#-------------------------------------------------
### averaging the meff for the whole data set
summary(mylogit)
b1 <- 3.44 # from coefficient of interest from logistic regression model
b2 <- .200 # coefficient on age (control)
b0 <- -8.97 # intercept from logistic regression model
dat$XB <- dat$gender*b1 + dat$age*b2 + b0
meffx <- (exp(dat$XB)/((1+exp(dat$XB))^2))*b1
summary(meffx) # get mean
#--------------------------------------------------
# demonstrating divide by 4 rule for marginal effects
#--------------------------------------------------
### !!!!! Notice that the maximum value above .859 corresponds to b1/4 or 3.44/4 = .86 DIVIDE BY FOUR RULE in action
### !!!!! this illustrates that the divide by 4 rule provides the upper bound of estimated individual marginal
### !!!!! effects in the data set
# see http://econometricsense.blogspot.com/2016/05/divide-by-4-rule-for-marginal-effects.html
# other
(OR <- exp(3.44)) # this gives us an odds ratio
(log(OR)) # this gets back the OR
# so
log(OR)/4 # upper bound on marginal effects
#-----------------------------------------------------
# converting ORs to MEs
#------------------------------------------------------
# this function uses baseline rates (Pc) for the control group to convert ORs to marginal effects
# see: https://stats.stackexchange.com/questions/324410/converting-odds-ratio-to-percentage-increase-reduction
# this works for an unadjusted OR so to demonstrate we run a single variabel model
mylogit2 <- glm(y ~ gender,family=binomial(link='logit'),data = dat)
mfx(mylogit2) # get ME for comparison
summary(margins(mylogit2)) # ME for comparison
meffOR <- function(Pc,OR){
Pt <- (OR * Pc )/(1 + (OR*Pc) - Pc)
me <- (Pt - Pc)
print("Marginal Effects Based on OR")
return(me)
}
(OR <- exp(1.33)) # 3.78
# baseline for control
library(dplyr)
dat%>%
group_by(gender)%>%
summarize(tavg = mean(y)) # .574 (control group)
meffOR(.574,OR) # results. # .26 similar to results above
#---------------------------------------------
# studenmund's rule of thumb
#---------------------------------------------
# ref: Using Econometrics: A Practical Guide 5th Ed
# p. 458
# according to Studenmund this is approximating the derivitive of the logit
# the expected value of D (probability of y = 1) caused by a
# one unit increase in X holding constant the other independent variables
# in the equation = b1*D*(1-D) where D and b1 are estimates
# this seems to work in a single variable regression but not the full model
b1 <- 1.33 # from gender on single variable model mylogit 2
Di <- mean(y)
(me <- b1*(Di*(1-Di))) # .276
# does not work on gender from multivariable logit
b1 <- 3.45 # from gender on single variable model mylogit
Di <- mean(y)
(me <- b1*(Di*(1-Di)))
# does not work on age from multivariable logit
b1 <- .20 # from gender on single variable model mylogit
Di <- mean(y)
(me <- b1*(Di*(1-Di)))
# does not work well on age from single variable logit (running with age instead of gender)
b1 <- .1391 # from gender on single variable model mylogit
Di <- mean(y)
(me <- b1*(Di*(1-Di)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment