BioSciEconomist · November 16, 2019 02:34
diff --git a/gamma model demo.py b/gamma model demo.py
 #  ------------------------------------------------------------------
 #  |PROGRAM NAME:gamma model demo.py
 #  |DATE: 11/13/19
 #  |CREATED BY: MATT BOGARD 
 #  |PROJECT FILE:  
 #  |----------------------------------------------------------------
 #  | PURPOSE: example analysis of cost data using GLM gamma model with log link
 #  |----------------------------------------------------------------

 # TO DO: add bootstrapping

 #-----------------------------------
 # read latest cohort file
 #-----------------------------------

 import pandas as pd
 import numpy as np

 # read simulated analysis file
 df = pd.read_csv('//data//simcost.csv')

 # inspect data
 df.columns 
 df.head()
 df.info()
 df.shape


 #---------------------------------------
 # cost analysis
 #---------------------------------------

 # visualization and descriptives
 df.cost.plot( kind='hist', normed=True, bins=50, range=(0,300000))
 df.cost.describe()

 # Create the boxplo
 import matplotlib.pyplot as plt
 df.boxplot(column='cost', by='treat', rot=90)

 # descriptive stats by treatment status
 df.groupby('treat')['cost'].mean()
 df.groupby('treat')['id'].count()


 df.groupby('treat')['cost'].mean()
 df.groupby('treat')['id'].count()


 #-----------------------------------------
 # analysis
 #----------------------------------------

 # benchmark against standard linear regression
 import statsmodels.api as sm # import stastmodels
 import statsmodels.formula.api as smf # this allows us to use an explicit formulation 

 # regression using smf
 results = smf.ols('cost ~ treat', data=df).fit()
 results.summary() # one of the problems with stats models I have read is you can't override scientific notation?

 del results # cleanup
               
 # glm with log link and gamma error on positive claims

 model = smf.glm('cost ~ treat', data=df, family=sm.families.Gamma(link = sm.genmod.families.links.log))
 result = model.fit()
 print(result.summary())

 import math

 math.exp(.5831) # 1.79 as ratio of treatment and control means

 25331/14139 # 1.79 same as exponentiated coefficient from gamma model
	# ------------------------------------------------------------------
	# \|PROGRAM NAME:gamma model demo.py
	# \|DATE: 11/13/19
	# \|CREATED BY: MATT BOGARD
	# \|PROJECT FILE:
	# \|----------------------------------------------------------------
	# \| PURPOSE: example analysis of cost data using GLM gamma model with log link
	# \|----------------------------------------------------------------

	# TO DO: add bootstrapping

	#-----------------------------------
	# read latest cohort file
	#-----------------------------------

	import pandas as pd
	import numpy as np

	# read simulated analysis file
	df = pd.read_csv('//data//simcost.csv')

	# inspect data
	df.columns
	df.head()
	df.info()
	df.shape


	#---------------------------------------
	# cost analysis
	#---------------------------------------

	# visualization and descriptives
	df.cost.plot( kind='hist', normed=True, bins=50, range=(0,300000))
	df.cost.describe()

	# Create the boxplo
	import matplotlib.pyplot as plt
	df.boxplot(column='cost', by='treat', rot=90)

	# descriptive stats by treatment status
	df.groupby('treat')['cost'].mean()
	df.groupby('treat')['id'].count()


	df.groupby('treat')['cost'].mean()
	df.groupby('treat')['id'].count()


	#-----------------------------------------
	# analysis
	#----------------------------------------

	# benchmark against standard linear regression
	import statsmodels.api as sm # import stastmodels
	import statsmodels.formula.api as smf # this allows us to use an explicit formulation

	# regression using smf
	results = smf.ols('cost ~ treat', data=df).fit()
	results.summary() # one of the problems with stats models I have read is you can't override scientific notation?

	del results # cleanup

	# glm with log link and gamma error on positive claims

	model = smf.glm('cost ~ treat', data=df, family=sm.families.Gamma(link = sm.genmod.families.links.log))
	result = model.fit()
	print(result.summary())

	import math

	math.exp(.5831) # 1.79 as ratio of treatment and control means

	25331/14139 # 1.79 same as exponentiated coefficient from gamma model