Created
March 5, 2021 01:23
-
-
Save BioSciEconomist/e5e6cd377ee8ccd565db967e76a58088 to your computer and use it in GitHub Desktop.
example code for getting marginal effects from logistic regression using python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# *----------------------------------------------------------------- | |
# | PROGRAM NAME: ex logit marginal effects.py | |
# | DATE: 3/4/21 | |
# | CREATED BY: MATT BOGARD | |
# | PROJECT FILE: | |
# *---------------------------------------------------------------- | |
# | PURPOSE: example code for getting marginal effects from logistic regression | |
# *---------------------------------------------------------------- | |
import pandas as pd | |
import numpy as np | |
from matplotlib import style | |
from matplotlib import pyplot as plt | |
import statsmodels.formula.api as smf | |
import statsmodels.api as sm | |
# simulate some data | |
df = pd.DataFrame(columns=['x','D', 'p','y']) | |
np.random.seed(123) | |
df['ID'] = np.random.uniform(1, 10000, 10000) | |
df['D'] = np.random.binomial(1, .5, 10000) | |
np.random.seed(123) | |
df['x'] = np.random.normal(30,1,10000) | |
xb = -9 + 3.5*df['D'] + .2*df['x'] #-9 + 3.5*gender + 0.2*age | |
df['p'] = 1/(1 + np.exp(-xb)) | |
np.random.seed(123) | |
df['y'] = np.random.binomial(1, df['p'], 10000) | |
df.head() | |
df.describe() | |
style.use("fivethirtyeight") | |
#---------------------------------- | |
# for a baseline, run linear probabiity model (LPM) | |
#---------------------------------- | |
smf.ols('y ~ D + x', data=df ).fit().summary() | |
#--------------------------------------- | |
# logistic regression | |
#--------------------------------------- | |
model = smf.logit(formula='y ~ D + x', data=df).fit() | |
model.summary() | |
print(model.get_margeff(at ='overall').summary()) # get marginal effects | |
print(model.get_margeff(at ='mean').summary()) # get marginal effects | |
# mean marginal effects from logistic regressin are close to LPM model | |
# syntax | |
#get_margeff(at='overall', method='dydx', atexog=None, dummy=False, count=False) | |
# 'at' Options are: | |
# ‘overall’, The average of the marginal effects at each observation. | |
# ‘mean’, The marginal effects at the mean of each regressor. | |
# ‘median’, The marginal effects at the median of each regressor. | |
# ‘zero’, The marginal effects at zero for each regressor. | |
# ‘all’, The marginal effects at each observation. If at is all only margeff will be available from the returned object. | |
#----------------------------------------- | |
# logistic regression with glm | |
#----------------------------------------- | |
model = smf.glm('y ~ D + x', data=df, family=sm.families.Binomial(link = sm.genmod.families.links.logit)) | |
result = model.fit() | |
print(result.summary()) | |
#print(result.get_margeff().summary()) # this won't work with glm | |
# home grown marginal effects function for logit model with 2 variables | |
def mfx(result,mu1,mu2,par): | |
""" | |
result: model object from stats models logistic regression | |
ex: y ~ b0 + b1*x1 + b2*x2 | |
mu1: mean value for first variable in model | |
mu2: mean value for 2nd variable in model | |
par: indicates index from 0 for model parameter you want to convert to a | |
marginal effect | |
note: this easily extends to more variables but does not handle predictors | |
with multiple categories (unless they are dummy coded) | |
""" | |
b0 = result.params[0] | |
b1 = result.params[1] | |
b2 = result.params[2] | |
XB = mu1*b1 + mu2*b2 + b0 | |
return (np.exp(XB)/((1+np.exp(XB))**2))*result.params[par] | |
mfx(result,.5,30,1) # home grown function gives almost | |
# exact same result as get_margeff() above | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment