ttungl · June 23, 2022 05:58 · ttungl · May 31, 2022
diff --git a/crashcoursecausal_data_example.r b/crashcoursecausal_data_example.r
 ###################
 #RHC Example

 #install packages
 install.packages("tableone") ## useful when you're doing a matched analysis.
 install.packages("Matching") ## carry out the actual matching.

 #load packages
 library(tableone)
 library(Matching)

 #read in data
 load(url("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.sav"))
 #view data
 View(rhc)

 #treatment variables is swang1
 #x variables that we will use
 #cat1: primary disease category
 #age
 #sex
 #meanbp1: mean blood pressure

 #create a data set with just these variables that will be used, for simplicity
 ARF<-as.numeric(rhc$cat1=='ARF')
 CHF<-as.numeric(rhc$cat1=='CHF')
 Cirr<-as.numeric(rhc$cat1=='Cirrhosis')
 colcan<-as.numeric(rhc$cat1=='Colon Cancer')
 Coma<-as.numeric(rhc$cat1=='Coma')
 COPD<-as.numeric(rhc$cat1=='COPD')
 lungcan<-as.numeric(rhc$cat1=='Lung Cancer')
 MOSF<-as.numeric(rhc$cat1=='MOSF w/Malignancy')
 sepsis<-as.numeric(rhc$cat1=='MOSF w/Sepsis')
 female<-as.numeric(rhc$sex=='Female')
 died<-as.numeric(rhc$death=='Yes')
 age<-rhc$age
 treatment<-as.numeric(rhc$swang1=='RHC')
 meanbp1<-rhc$meanbp1

 #new dataset
 mydata<-cbind(ARF,CHF,Cirr,colcan,Coma,lungcan,MOSF,sepsis,
              age,female,meanbp1,treatment,died)
 mydata<-data.frame(mydata)

 #covariates we will use (shorter list than you would use in practice)
 xvars<-c("ARF","CHF","Cirr","colcan","Coma","lungcan","MOSF","sepsis",
         "age","female","meanbp1")

 #look at a table 1 ## CreateTableOne is to summarize statistics for those variables in xvars. 
 # In this case, it also does the stratification on treatment because we want to look at the balance
 # between the treatment and control groups. 
 # test=False means that there is no need a significance test.

 table1<- CreateTableOne(vars=xvars,strata="treatment", data=mydata, test=FALSE)
 ## include standardized mean difference (SMD)
 print(table1,smd=TRUE)

 # the output will show the stratification by treatment. 
 # The output table indicates how much confounding you might be dealing with.

 ############################################
 #do greedy matching on Mahalanobis distance
 ############################################
 # M=1 is whether you want pair matched or maybe you're matched in more than one control for a treatment.
 # so for M=1 meaning one treated subject to one control subject.
 # replace=False meaning no replacement.
 greedymatch<-Match(Tr=treatment,M=1,X=mydata[xvars],replace=FALSE)
 matched<-mydata[unlist(greedymatch[c("index.treated","index.control")]), ]


 #get table 1 for matched data with standardized differences
 matchedtab1<-CreateTableOne(vars=xvars, strata ="treatment", 
                            data=matched, test = FALSE)
 print(matchedtab1, smd = TRUE)

 ######
 #outcome analysis
 ######
 y_trt<-matched$died[matched$treatment==1]
 y_con<-matched$died[matched$treatment==0]

 #pairwise difference
 diffy<-y_trt-y_con

 ## causal risk difference
 #paired t-test
 t.test(diffy)

 ## the output shows the mean of x = 0.0452, 
 ## that's a causal risk difference because the difference in outcomes and mean.
 ## Point estimate: 0.045; difference in probability of death if everyone received RHC 
 ## versus if no one received RHC is 0.045 (i.e. higher risk of death in RHC group)
 ## 95% CI (0.019, 0.072); P-value < 0.001;
 ## we got small p-value, and 95%CI gives you a sense of plausible range of true risk difference.
 ## this is only for illustration, we haven't controlled for all of the confounders that are in the dataset.

 ####
 # McNemar test
 ####
 ## paired outcome observations, it will show the counts of each type of pairs.
 table(y_trt,y_con)

 mcnemar.test(matrix(c(973,513,395,303),2,2))

 ## output p-value=0.001, hypothesis of no-treatment effect.
 ## 493+394 discordant pairs; 493 is when a treated person died and a control person did not.
 ## p-value is about the same as from paired t-test 0.001.
 ## we're concluding that treated subjects were at higher risk of death.

 ## Discussion:
 ## rcbalance package has many more matching options. (obsstudies.org/files/rcbalance_paper_v7r2.pdf)
 ## tableone package vignette has useful code for creating figures and tables.

 ##########################
 #propensity score matching
 #########################

 #fit a propensity score model. logistic regression

 psmodel<-glm(treatment~ARF+CHF+Cirr+colcan+Coma+lungcan+MOSF+
               sepsis+age+female+meanbp1+aps,
    family=binomial(),data=mydata)

 #show coefficients etc
 summary(psmodel)
 #create propensity score
 pscore<-psmodel$fitted.values


 #do greedy matching on logit(PS) using Match with a caliper

 logit <- function(p) {log(p)-log(1-p)}
 psmatch<-Match(Tr=mydata$treatment,M=1,X=logit(pscore),replace=FALSE,caliper=.2)
 matched<-mydata[unlist(psmatch[c("index.treated","index.control")]), ]
 xvars<-c("ARF","CHF","Cirr","colcan","Coma","lungcan","MOSF","sepsis",
         "age","female","meanbp1")

 #get standardized differences
 matchedtab1<-CreateTableOne(vars=xvars, strata ="treatment", 
                            data=matched, test = FALSE)
 print(matchedtab1, smd = TRUE)

 #outcome analysis
 y_trt<-matched$died[matched$treatment==1]
 y_con<-matched$died[matched$treatment==0]

 #pairwise difference
 diffy<-y_trt-y_con

 #paired t-test
 t.test(diffy)
	###################
	#RHC Example

	#install packages
	install.packages("tableone") ## useful when you're doing a matched analysis.
	install.packages("Matching") ## carry out the actual matching.

	#load packages
	library(tableone)
	library(Matching)

	#read in data
	load(url("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.sav"))
	#view data
	View(rhc)

	#treatment variables is swang1
	#x variables that we will use
	#cat1: primary disease category
	#age
	#sex
	#meanbp1: mean blood pressure

	#create a data set with just these variables that will be used, for simplicity
	ARF<-as.numeric(rhc$cat1=='ARF')
	CHF<-as.numeric(rhc$cat1=='CHF')
	Cirr<-as.numeric(rhc$cat1=='Cirrhosis')
	colcan<-as.numeric(rhc$cat1=='Colon Cancer')
	Coma<-as.numeric(rhc$cat1=='Coma')
	COPD<-as.numeric(rhc$cat1=='COPD')
	lungcan<-as.numeric(rhc$cat1=='Lung Cancer')
	MOSF<-as.numeric(rhc$cat1=='MOSF w/Malignancy')
	sepsis<-as.numeric(rhc$cat1=='MOSF w/Sepsis')
	female<-as.numeric(rhc$sex=='Female')
	died<-as.numeric(rhc$death=='Yes')
	age<-rhc$age
	treatment<-as.numeric(rhc$swang1=='RHC')
	meanbp1<-rhc$meanbp1

	#new dataset
	mydata<-cbind(ARF,CHF,Cirr,colcan,Coma,lungcan,MOSF,sepsis,
	age,female,meanbp1,treatment,died)
	mydata<-data.frame(mydata)

	#covariates we will use (shorter list than you would use in practice)
	xvars<-c("ARF","CHF","Cirr","colcan","Coma","lungcan","MOSF","sepsis",
	"age","female","meanbp1")

	#look at a table 1 ## CreateTableOne is to summarize statistics for those variables in xvars.
	# In this case, it also does the stratification on treatment because we want to look at the balance
	# between the treatment and control groups.
	# test=False means that there is no need a significance test.

	table1<- CreateTableOne(vars=xvars,strata="treatment", data=mydata, test=FALSE)
	## include standardized mean difference (SMD)
	print(table1,smd=TRUE)

	# the output will show the stratification by treatment.
	# The output table indicates how much confounding you might be dealing with.

	############################################
	#do greedy matching on Mahalanobis distance
	############################################
	# M=1 is whether you want pair matched or maybe you're matched in more than one control for a treatment.
	# so for M=1 meaning one treated subject to one control subject.
	# replace=False meaning no replacement.
	greedymatch<-Match(Tr=treatment,M=1,X=mydata[xvars],replace=FALSE)
	matched<-mydata[unlist(greedymatch[c("index.treated","index.control")]), ]


	#get table 1 for matched data with standardized differences
	matchedtab1<-CreateTableOne(vars=xvars, strata ="treatment",
	data=matched, test = FALSE)
	print(matchedtab1, smd = TRUE)

	######
	#outcome analysis
	######
	y_trt<-matched$died[matched$treatment==1]
	y_con<-matched$died[matched$treatment==0]

	#pairwise difference
	diffy<-y_trt-y_con

	## causal risk difference
	#paired t-test
	t.test(diffy)

	## the output shows the mean of x = 0.0452,
	## that's a causal risk difference because the difference in outcomes and mean.
	## Point estimate: 0.045; difference in probability of death if everyone received RHC
	## versus if no one received RHC is 0.045 (i.e. higher risk of death in RHC group)
	## 95% CI (0.019, 0.072); P-value < 0.001;
	## we got small p-value, and 95%CI gives you a sense of plausible range of true risk difference.
	## this is only for illustration, we haven't controlled for all of the confounders that are in the dataset.

	####
	# McNemar test
	####
	## paired outcome observations, it will show the counts of each type of pairs.
	table(y_trt,y_con)

	mcnemar.test(matrix(c(973,513,395,303),2,2))

	## output p-value=0.001, hypothesis of no-treatment effect.
	## 493+394 discordant pairs; 493 is when a treated person died and a control person did not.
	## p-value is about the same as from paired t-test 0.001.
	## we're concluding that treated subjects were at higher risk of death.

	## Discussion:
	## rcbalance package has many more matching options. (obsstudies.org/files/rcbalance_paper_v7r2.pdf)
	## tableone package vignette has useful code for creating figures and tables.

	##########################
	#propensity score matching
	#########################

	#fit a propensity score model. logistic regression

	psmodel<-glm(treatment~ARF+CHF+Cirr+colcan+Coma+lungcan+MOSF+
	sepsis+age+female+meanbp1+aps,
	family=binomial(),data=mydata)

	#show coefficients etc
	summary(psmodel)
	#create propensity score
	pscore<-psmodel$fitted.values


	#do greedy matching on logit(PS) using Match with a caliper

	logit <- function(p) {log(p)-log(1-p)}
	psmatch<-Match(Tr=mydata$treatment,M=1,X=logit(pscore),replace=FALSE,caliper=.2)
	matched<-mydata[unlist(psmatch[c("index.treated","index.control")]), ]
	xvars<-c("ARF","CHF","Cirr","colcan","Coma","lungcan","MOSF","sepsis",
	"age","female","meanbp1")

	#get standardized differences
	matchedtab1<-CreateTableOne(vars=xvars, strata ="treatment",
	data=matched, test = FALSE)
	print(matchedtab1, smd = TRUE)

	#outcome analysis
	y_trt<-matched$died[matched$treatment==1]
	y_con<-matched$died[matched$treatment==0]

	#pairwise difference
	diffy<-y_trt-y_con

	#paired t-test
	t.test(diffy)
No results found