Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Created March 5, 2021 01:23
Show Gist options
  • Save BioSciEconomist/e5e6cd377ee8ccd565db967e76a58088 to your computer and use it in GitHub Desktop.
Save BioSciEconomist/e5e6cd377ee8ccd565db967e76a58088 to your computer and use it in GitHub Desktop.
example code for getting marginal effects from logistic regression using python
# *-----------------------------------------------------------------
# | PROGRAM NAME: ex logit marginal effects.py
# | DATE: 3/4/21
# | CREATED BY: MATT BOGARD
# | PROJECT FILE:
# *----------------------------------------------------------------
# | PURPOSE: example code for getting marginal effects from logistic regression
# *----------------------------------------------------------------
import pandas as pd
import numpy as np
from matplotlib import style
from matplotlib import pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.api as sm
# simulate some data
df = pd.DataFrame(columns=['x','D', 'p','y'])
np.random.seed(123)
df['ID'] = np.random.uniform(1, 10000, 10000)
df['D'] = np.random.binomial(1, .5, 10000)
np.random.seed(123)
df['x'] = np.random.normal(30,1,10000)
xb = -9 + 3.5*df['D'] + .2*df['x'] #-9 + 3.5*gender + 0.2*age
df['p'] = 1/(1 + np.exp(-xb))
np.random.seed(123)
df['y'] = np.random.binomial(1, df['p'], 10000)
df.head()
df.describe()
style.use("fivethirtyeight")
#----------------------------------
# for a baseline, run linear probabiity model (LPM)
#----------------------------------
smf.ols('y ~ D + x', data=df ).fit().summary()
#---------------------------------------
# logistic regression
#---------------------------------------
model = smf.logit(formula='y ~ D + x', data=df).fit()
model.summary()
print(model.get_margeff(at ='overall').summary()) # get marginal effects
print(model.get_margeff(at ='mean').summary()) # get marginal effects
# mean marginal effects from logistic regressin are close to LPM model
# syntax
#get_margeff(at='overall', method='dydx', atexog=None, dummy=False, count=False)
# 'at' Options are:
# ‘overall’, The average of the marginal effects at each observation.
# ‘mean’, The marginal effects at the mean of each regressor.
# ‘median’, The marginal effects at the median of each regressor.
# ‘zero’, The marginal effects at zero for each regressor.
# ‘all’, The marginal effects at each observation. If at is all only margeff will be available from the returned object.
#-----------------------------------------
# logistic regression with glm
#-----------------------------------------
model = smf.glm('y ~ D + x', data=df, family=sm.families.Binomial(link = sm.genmod.families.links.logit))
result = model.fit()
print(result.summary())
#print(result.get_margeff().summary()) # this won't work with glm
# home grown marginal effects function for logit model with 2 variables
def mfx(result,mu1,mu2,par):
"""
result: model object from stats models logistic regression
ex: y ~ b0 + b1*x1 + b2*x2
mu1: mean value for first variable in model
mu2: mean value for 2nd variable in model
par: indicates index from 0 for model parameter you want to convert to a
marginal effect
note: this easily extends to more variables but does not handle predictors
with multiple categories (unless they are dummy coded)
"""
b0 = result.params[0]
b1 = result.params[1]
b2 = result.params[2]
XB = mu1*b1 + mu2*b2 + b0
return (np.exp(XB)/((1+np.exp(XB))**2))*result.params[par]
mfx(result,.5,30,1) # home grown function gives almost
# exact same result as get_margeff() above
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment