ttungl · June 24, 2022 08:02
diff --git a/iptw_causal_analysis.r b/iptw_causal_analysis.r
 ## Data analysis project - carry out an IPTW causal analysis


 install.packages("tableone")
 install.packages("Matching")
 install.packages("ipw")
 install.packages("survey")
 install.packages("MatchIt")

 library(tableone)
 library(Matching)
 library(ipw)
 library(survey)
 library(MatchIt)

 data("lalonde")

 View(lalonde)
 head(lalonde)

 dim(lalonde)

 mydata<-data.frame(lalonde)


 #covariates we will use (shorter list than you would use in practice)
 xvars<-c("age","educ","race","married","nodegree","re74","re75")

 #look at a table 1
 table11 <- CreateTableOne(vars=xvars, strata="treat", data=mydata, test=FALSE)
 ## include standardized mean difference (SMD)
 print(table11,smd=TRUE)

 ## Fit a propensity score model. Use a logistic regression model, 
 ## where the outcome is treatment. Include the 8 confounding variables in
 ## the model as predictors, with no interaction terms or non-linear terms 
 ## (such as squared terms). Obtain the propensity score for each subject. 
 ## Next, obtain the inverse probability of treatment weights for each subject.


 logit<- function(p){log(p) - log(1-p)}

 # fit a psm using logistic regression.
 psmodel <- glm(treat ~ age+educ+race+married+nodegree+re74+re75, data=mydata,
               family=binomial(link="logit"))

 # value of propensity score for each subject.
 ps <- predict(psmodel, type="response")

 # create weights
 weight <- ifelse(mydata$treat==1, 1/(ps), 1/(1-ps))

 # apply the weight to data to find the weighted mean and weighted variance 
 # for each group. 
 weighteddata <- svydesign(ids = ~ 1, data=mydata, weights= ~ weight)

 # create weighted table 1
 weightedtable <- svyCreateTableOne(vars=xvars, strata="treat",
                                   data = weighteddata, test = FALSE)

 # show table with SMD
 print(weightedtable, smd=TRUE)

 ####
 # Q1: What are the minimum and maximum weights? 
 ####
 min(weight); # 1.009163
 max(weight); # 40.07729

 #### SMD after weighting
 # Q2: Find the standardized differences for each

 # confounder on the weighted (pseudo) population. What is the standardized
 # difference for nodegree? 

 # get a weighted mean for a single covariate directly
 mean(weight[mydata$treat==1] * mydata$nodegree[mydata$treat==1])/(mean(weight[mydata$treat==1]))
 ## "1"= 0.5702415
 ## SMD = 0.11

 #### ATE after weighting
 # Q3: Using IPTW, find the estimate and 95% confidence
 # interval for the average causal effect. This can be obtained from svyglm
 # MSMs: get causal risk difference 
 weightmodel <- ipwpoint(exposure = treat, family="binomial", link="logit",
                        denominator = ~ age+educ+race+married+nodegree+re74+re75,
                        data=mydata)

 # numeric summary of weights
 summary(weightmodel$ipw.weights)
 # Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 # 1.009   1.052   1.170   1.905   1.623  40.077 

 ipwplot(weights=weightmodel$ipw.weights, logscale = FALSE,
        main="weights", xlim=c(0,22))
 mydata$wt <- weightmodel$ipw.weights

 # fit a marginal structural model (aka risk difference)
 # ATE after weighting

 msm <- (svyglm(re78 ~ treat, design = svydesign(~ 1, weights = ~ weightmodel$ipw.weights, data=mydata)))
 coef(msm)
 #(Intercept)       treat 
 #6422.8390    224.6763 
 confint(msm)
 #               2.5 %     97.5 %
 #  (Intercept)  5705.529 7140.149
 #  treat       -1562.856 2012.208

 # Q4: Now truncate the weights at the 1st and 99th
 # percentiles. This can be done with the trunc=0.01 option in svyglm. 
 ## Using IPTW with the truncated weights, find the
 ## estimate and 95% confidence interval for the average causal effect 

 ####
 # Q4: ATE after weight truncating
 # Now truncate the weights at the 1st and 99th
 # percentiles. This can be done with the trunc=0.01 option in svyglm. 
 # Using IPTW with the truncated weights, find the
 # estimate and 95% confidence interval for the average causal effect 

 weightmodel <- ipwpoint(exposure = treat, family = "binomial", link = "logit", numerator = ~ 1, 
                        denominator = ~ age + educ + race + married + nodegree + re74 + re75, 
                        data = mydata, trunc = 0.01)
 msm_trunc <- (svyglm(re78 ~ treat, design = svydesign(~ 1, weights = ~ weightmodel$weights.trunc, data=mydata)))
 coef(msm_trunc)
 # (Intercept)       treat 
 # 6422.8390    486.3557 
 confint(msm_trunc)
 #                 2.5 %   97.5 %
 #  (Intercept)  5705.529 7140.149
 #  treat       -1093.919 2066.631
	## Data analysis project - carry out an IPTW causal analysis


	install.packages("tableone")
	install.packages("Matching")
	install.packages("ipw")
	install.packages("survey")
	install.packages("MatchIt")

	library(tableone)
	library(Matching)
	library(ipw)
	library(survey)
	library(MatchIt)

	data("lalonde")

	View(lalonde)
	head(lalonde)

	dim(lalonde)

	mydata<-data.frame(lalonde)


	#covariates we will use (shorter list than you would use in practice)
	xvars<-c("age","educ","race","married","nodegree","re74","re75")

	#look at a table 1
	table11 <- CreateTableOne(vars=xvars, strata="treat", data=mydata, test=FALSE)
	## include standardized mean difference (SMD)
	print(table11,smd=TRUE)

	## Fit a propensity score model. Use a logistic regression model,
	## where the outcome is treatment. Include the 8 confounding variables in
	## the model as predictors, with no interaction terms or non-linear terms
	## (such as squared terms). Obtain the propensity score for each subject.
	## Next, obtain the inverse probability of treatment weights for each subject.


	logit<- function(p){log(p) - log(1-p)}

	# fit a psm using logistic regression.
	psmodel <- glm(treat ~ age+educ+race+married+nodegree+re74+re75, data=mydata,
	family=binomial(link="logit"))

	# value of propensity score for each subject.
	ps <- predict(psmodel, type="response")

	# create weights
	weight <- ifelse(mydata$treat==1, 1/(ps), 1/(1-ps))

	# apply the weight to data to find the weighted mean and weighted variance
	# for each group.
	weighteddata <- svydesign(ids = ~ 1, data=mydata, weights= ~ weight)

	# create weighted table 1
	weightedtable <- svyCreateTableOne(vars=xvars, strata="treat",
	data = weighteddata, test = FALSE)

	# show table with SMD
	print(weightedtable, smd=TRUE)

	####
	# Q1: What are the minimum and maximum weights?
	####
	min(weight); # 1.009163
	max(weight); # 40.07729

	#### SMD after weighting
	# Q2: Find the standardized differences for each

	# confounder on the weighted (pseudo) population. What is the standardized
	# difference for nodegree?

	# get a weighted mean for a single covariate directly
	mean(weight[mydata$treat==1] * mydata$nodegree[mydata$treat==1])/(mean(weight[mydata$treat==1]))
	## "1"= 0.5702415
	## SMD = 0.11

	#### ATE after weighting
	# Q3: Using IPTW, find the estimate and 95% confidence
	# interval for the average causal effect. This can be obtained from svyglm
	# MSMs: get causal risk difference
	weightmodel <- ipwpoint(exposure = treat, family="binomial", link="logit",
	denominator = ~ age+educ+race+married+nodegree+re74+re75,
	data=mydata)

	# numeric summary of weights
	summary(weightmodel$ipw.weights)
	# Min. 1st Qu. Median Mean 3rd Qu. Max.
	# 1.009 1.052 1.170 1.905 1.623 40.077

	ipwplot(weights=weightmodel$ipw.weights, logscale = FALSE,
	main="weights", xlim=c(0,22))
	mydata$wt <- weightmodel$ipw.weights

	# fit a marginal structural model (aka risk difference)
	# ATE after weighting

	msm <- (svyglm(re78 ~ treat, design = svydesign(~ 1, weights = ~ weightmodel$ipw.weights, data=mydata)))
	coef(msm)
	#(Intercept) treat
	#6422.8390 224.6763
	confint(msm)
	# 2.5 % 97.5 %
	# (Intercept) 5705.529 7140.149
	# treat -1562.856 2012.208

	# Q4: Now truncate the weights at the 1st and 99th
	# percentiles. This can be done with the trunc=0.01 option in svyglm.
	## Using IPTW with the truncated weights, find the
	## estimate and 95% confidence interval for the average causal effect

	####
	# Q4: ATE after weight truncating
	# Now truncate the weights at the 1st and 99th
	# percentiles. This can be done with the trunc=0.01 option in svyglm.
	# Using IPTW with the truncated weights, find the
	# estimate and 95% confidence interval for the average causal effect

	weightmodel <- ipwpoint(exposure = treat, family = "binomial", link = "logit", numerator = ~ 1,
	denominator = ~ age + educ + race + married + nodegree + re74 + re75,
	data = mydata, trunc = 0.01)
	msm_trunc <- (svyglm(re78 ~ treat, design = svydesign(~ 1, weights = ~ weightmodel$weights.trunc, data=mydata)))
	coef(msm_trunc)
	# (Intercept) treat
	# 6422.8390 486.3557
	confint(msm_trunc)
	# 2.5 % 97.5 %
	# (Intercept) 5705.529 7140.149
	# treat -1093.919 2066.631
No results found