battenr · April 8, 2024 13:38
diff --git a/prob_of_censor b/prob_of_censor
 # Title: Calculating Probability of Censoring

 # Description: Inverse probability of censoring weighting (IPCW) can be used to remove the potential selection bias 
 # caused by censoring. There are different ways to calculate the probability of censoring. The goal of this code was 
 # to examine whether the two methods produced the same result. 

 # Setup ----

 #... Libraries 

 library(tidyverse) # ol faithful
 library(simsurv) # used to simulate survival data
 library(survival) # used to fix Cox Proportional Hazards Model

 # Simulating Data ----

 # Simulating this data included starting with a very simple example. It can be expanded upon later if of interest. 

 # Variables

 # - Binary treatment 
 # - One continuous covariates (x1) and one binary covariate (x2)

 cov <- data.frame(id = 1:200,
                    trt = rbinom(200, 1, 0.5),
                    x1 = rnorm(n = 200, mean = 5, sd = 2),
                    x2 = rbinom(200, 1, 0.65))
                    
 # Simulating the survival data. This includes the event times and censoring. 
 # Arbitrarily chose 5 as the maximum follow-up time (assuming 5 years). 

 dat <- simsurv::simsurv(dist = "exponential", # using exponential distribution, commonly used for Cox PH models
                          lambdas = 0.1, # specifying the parameter needed
                          betas = c(trt = -0.5, x1 = -0.3), # betas x1 and trt. There is none chosen for x2
                          x = cov, # covariate dataframe from above 
                          maxt = 5) # max follow-up time
  
 # Merging the simulated survival data and covariates 

 dat <- merge(cov, dat)

 # Probability of Censoring ----

 #... Logistic Regression ----

 # One way to calculate the probability of censoring is to fit a logistic regression model. This method is 
 # similar to what is used for inverse probability of treatment weighting. Code for this method can be 
 # found in program 12.7 of Causal Inference: What If by Hernan & Robins. 

 # Fitting model for the denominator of the weights. This code only focuses on the denominator.
  
 denom.cens <- glm(status ~ trt + x1 + x2, # including both x1 and x2 for now 
                    family = binomial(link = "logit"), 
                    data = dat)
 
 # Using the above model, we calculate the probability of not being censored as 1 - Prob(Censor | Covariates)
 # Very similar methodology as IPTW. 
  
 pd.cens <- 1-predict(denom.cens, type = "response") # probability of not being censoring. 

 #... Cox Model ----

 # Another way to calculate this probability (which I wasn't aware of) is to fix a Cox PH model. To do this, we 
 # first fix a Cox PH model then predict the probability of not being censored using type = "expected". 

 coxmod <- survival::coxph(Surv(eventtime, status) ~ trt + x1 + x2, 
                          data = dat)

 coxp <- exp(-predict(coxmod, type = "expected"))

 # Comparing Methods ----

 # Making a dataframe, called probcensor, to compare the results from both methods. 

 pweights <- data.frame(coxp, pd.cens)

 # Viewing this dataframe shows the similarities/differences of the estimated probability using each method. An important point to keep
 # in mind, is that these methods give similar results but won't be exact. This is due to each method using a different model to 
 # estimate the probability (each with different assumptions to consider).
	# Title: Calculating Probability of Censoring

	# Description: Inverse probability of censoring weighting (IPCW) can be used to remove the potential selection bias
	# caused by censoring. There are different ways to calculate the probability of censoring. The goal of this code was
	# to examine whether the two methods produced the same result.

	# Setup ----

	#... Libraries

	library(tidyverse) # ol faithful
	library(simsurv) # used to simulate survival data
	library(survival) # used to fix Cox Proportional Hazards Model

	# Simulating Data ----

	# Simulating this data included starting with a very simple example. It can be expanded upon later if of interest.

	# Variables

	# - Binary treatment
	# - One continuous covariates (x1) and one binary covariate (x2)

	cov <- data.frame(id = 1:200,
	trt = rbinom(200, 1, 0.5),
	x1 = rnorm(n = 200, mean = 5, sd = 2),
	x2 = rbinom(200, 1, 0.65))

	# Simulating the survival data. This includes the event times and censoring.
	# Arbitrarily chose 5 as the maximum follow-up time (assuming 5 years).

	dat <- simsurv::simsurv(dist = "exponential", # using exponential distribution, commonly used for Cox PH models
	lambdas = 0.1, # specifying the parameter needed
	betas = c(trt = -0.5, x1 = -0.3), # betas x1 and trt. There is none chosen for x2
	x = cov, # covariate dataframe from above
	maxt = 5) # max follow-up time

	# Merging the simulated survival data and covariates

	dat <- merge(cov, dat)

	# Probability of Censoring ----

	#... Logistic Regression ----

	# One way to calculate the probability of censoring is to fit a logistic regression model. This method is
	# similar to what is used for inverse probability of treatment weighting. Code for this method can be
	# found in program 12.7 of Causal Inference: What If by Hernan & Robins.

	# Fitting model for the denominator of the weights. This code only focuses on the denominator.

	denom.cens <- glm(status ~ trt + x1 + x2, # including both x1 and x2 for now
	family = binomial(link = "logit"),
	data = dat)

	# Using the above model, we calculate the probability of not being censored as 1 - Prob(Censor \| Covariates)
	# Very similar methodology as IPTW.

	pd.cens <- 1-predict(denom.cens, type = "response") # probability of not being censoring.

	#... Cox Model ----

	# Another way to calculate this probability (which I wasn't aware of) is to fix a Cox PH model. To do this, we
	# first fix a Cox PH model then predict the probability of not being censored using type = "expected".

	coxmod <- survival::coxph(Surv(eventtime, status) ~ trt + x1 + x2,
	data = dat)

	coxp <- exp(-predict(coxmod, type = "expected"))

	# Comparing Methods ----

	# Making a dataframe, called probcensor, to compare the results from both methods.

	pweights <- data.frame(coxp, pd.cens)

	# Viewing this dataframe shows the similarities/differences of the estimated probability using each method. An important point to keep
	# in mind, is that these methods give similar results but won't be exact. This is due to each method using a different model to
	# estimate the probability (each with different assumptions to consider).
No results found