BioSciEconomist · March 5, 2021 01:23
diff --git a/ex logit marginal effects.py b/ex logit marginal effects.py
 # *-----------------------------------------------------------------
 # | PROGRAM NAME: ex logit marginal effects.py
 # | DATE: 3/4/21 
 # | CREATED BY: MATT BOGARD 
 # | PROJECT FILE:            
 # *----------------------------------------------------------------
 # | PURPOSE: example code for getting marginal effects from logistic regression
 # *----------------------------------------------------------------

 import pandas as pd
 import numpy as np
 from matplotlib import style
 from matplotlib import pyplot as plt
 import statsmodels.formula.api as smf
 import statsmodels.api as sm

 # simulate some data
 df = pd.DataFrame(columns=['x','D', 'p','y'])
 np.random.seed(123)
 df['ID'] = np.random.uniform(1, 10000, 10000)
 df['D'] = np.random.binomial(1, .5, 10000)
 np.random.seed(123)
 df['x'] = np.random.normal(30,1,10000)
 xb = -9 + 3.5*df['D'] + .2*df['x'] #-9 + 3.5*gender + 0.2*age
 df['p'] = 1/(1 + np.exp(-xb))
 np.random.seed(123)
 df['y'] = np.random.binomial(1, df['p'], 10000)

 df.head()
 df.describe()


 style.use("fivethirtyeight")

 #----------------------------------
 # for a baseline, run linear probabiity model (LPM)
 #----------------------------------

 smf.ols('y ~ D + x', data=df ).fit().summary() 

 #---------------------------------------
 # logistic regression 
 #---------------------------------------

 model = smf.logit(formula='y ~ D + x', data=df).fit()
 model.summary()

 print(model.get_margeff(at ='overall').summary()) # get marginal effects
 print(model.get_margeff(at ='mean').summary()) # get marginal effects


 # mean marginal effects from logistic regressin are close to LPM model

 # syntax
 #get_margeff(at='overall', method='dydx', atexog=None, dummy=False, count=False)

 # 'at' Options are:

 # ‘overall’, The average of the marginal effects at each observation.

 # ‘mean’, The marginal effects at the mean of each regressor.

 # ‘median’, The marginal effects at the median of each regressor.

 # ‘zero’, The marginal effects at zero for each regressor.

 # ‘all’, The marginal effects at each observation. If at is all only margeff will be available from the returned object.

 #-----------------------------------------
 # logistic regression  with glm
 #-----------------------------------------

 model = smf.glm('y ~ D + x', data=df, family=sm.families.Binomial(link = sm.genmod.families.links.logit))
 result = model.fit()
 print(result.summary())

 #print(result.get_margeff().summary()) # this won't work with glm

 # home grown marginal effects function for logit model with 2 variables

 def mfx(result,mu1,mu2,par):
    """
    result: model object from stats models logistic regression
    ex: y ~ b0 + b1*x1 + b2*x2
    mu1: mean value for first variable in model
    mu2: mean value for 2nd variable in model
    par: indicates index from 0 for model parameter you want to convert to a 
         marginal effect
    note: this easily extends to more variables but does not handle predictors
          with multiple categories (unless they are dummy coded)
    """
    b0 =  result.params[0]  
    b1 =  result.params[1] 
    b2 =  result.params[2] 
    XB = mu1*b1 + mu2*b2 + b0 
    return (np.exp(XB)/((1+np.exp(XB))**2))*result.params[par]


 mfx(result,.5,30,1) # home grown function gives almost
                    # exact same result as get_margeff() above
	# *-----------------------------------------------------------------
	# \| PROGRAM NAME: ex logit marginal effects.py
	# \| DATE: 3/4/21
	# \| CREATED BY: MATT BOGARD
	# \| PROJECT FILE:
	# *----------------------------------------------------------------
	# \| PURPOSE: example code for getting marginal effects from logistic regression
	# *----------------------------------------------------------------

	import pandas as pd
	import numpy as np
	from matplotlib import style
	from matplotlib import pyplot as plt
	import statsmodels.formula.api as smf
	import statsmodels.api as sm

	# simulate some data
	df = pd.DataFrame(columns=['x','D', 'p','y'])
	np.random.seed(123)
	df['ID'] = np.random.uniform(1, 10000, 10000)
	df['D'] = np.random.binomial(1, .5, 10000)
	np.random.seed(123)
	df['x'] = np.random.normal(30,1,10000)
	xb = -9 + 3.5df['D'] + .2df['x'] #-9 + 3.5gender + 0.2age
	df['p'] = 1/(1 + np.exp(-xb))
	np.random.seed(123)
	df['y'] = np.random.binomial(1, df['p'], 10000)

	df.head()
	df.describe()


	style.use("fivethirtyeight")

	#----------------------------------
	# for a baseline, run linear probabiity model (LPM)
	#----------------------------------

	smf.ols('y ~ D + x', data=df ).fit().summary()

	#---------------------------------------
	# logistic regression
	#---------------------------------------

	model = smf.logit(formula='y ~ D + x', data=df).fit()
	model.summary()

	print(model.get_margeff(at ='overall').summary()) # get marginal effects
	print(model.get_margeff(at ='mean').summary()) # get marginal effects


	# mean marginal effects from logistic regressin are close to LPM model

	# syntax
	#get_margeff(at='overall', method='dydx', atexog=None, dummy=False, count=False)

	# 'at' Options are:

	# ‘overall’, The average of the marginal effects at each observation.

	# ‘mean’, The marginal effects at the mean of each regressor.

	# ‘median’, The marginal effects at the median of each regressor.

	# ‘zero’, The marginal effects at zero for each regressor.

	# ‘all’, The marginal effects at each observation. If at is all only margeff will be available from the returned object.

	#-----------------------------------------
	# logistic regression with glm
	#-----------------------------------------

	model = smf.glm('y ~ D + x', data=df, family=sm.families.Binomial(link = sm.genmod.families.links.logit))
	result = model.fit()
	print(result.summary())

	#print(result.get_margeff().summary()) # this won't work with glm

	# home grown marginal effects function for logit model with 2 variables

	def mfx(result,mu1,mu2,par):
	"""
	result: model object from stats models logistic regression
	ex: y ~ b0 + b1x1 + b2x2
	mu1: mean value for first variable in model
	mu2: mean value for 2nd variable in model
	par: indicates index from 0 for model parameter you want to convert to a
	marginal effect
	note: this easily extends to more variables but does not handle predictors
	with multiple categories (unless they are dummy coded)
	"""
	b0 = result.params[0]
	b1 = result.params[1]
	b2 = result.params[2]
	XB = mu1b1 + mu2b2 + b0
	return (np.exp(XB)/((1+np.exp(XB))*2))result.params[par]


	mfx(result,.5,30,1) # home grown function gives almost
	# exact same result as get_margeff() above