Last active
December 9, 2023 02:50
-
-
Save vincentarelbundock/4313644338740a0238933e16124607f9 to your computer and use it in GitHub Desktop.
Maringe et al. 2020
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Downloads data here: | |
# https://academic.oup.com/ije/article/49/5/1719/5835351#226179197 | |
############################################################################# | |
# TRIAL EMULATION: SURGERY WITHIN 6 MONTHS AMONG OLDER LUNG CANCER PATIENTS | |
# Author: Clemence Leyrat (from Camille Maringe's Stata code) | |
# Refactored by Vincent Arel-Bundock on 2023-12-08 | |
# Content: recoding the outcome and follow-up time for our emulated trial | |
# creating the censoring indicators and time uncensored | |
############################################################################# | |
# Packages | |
library(survival) # For survival analysis | |
library(tinytest) | |
library(boot) | |
set.seed(1024) | |
tab <- read.csv("ije-2019-08-1035-File008.csv", sep = ",", header = T) | |
############################################################################################### | |
# VARIABLES | |
# id: patient identifier | |
# fup_obs: observed follow-up time (time to death or 1 year if censored alive) | |
# death: observed event of interest (all-cause death) 1: dead, 0:alive | |
# timetosurgery: time to surgery (NA if no surgery) | |
# surgery: observed treatment 1 if the patient received surgery within 6 month, 0 otherwise | |
# age: age at diagnosis | |
# sex: patient's sex | |
# perf: performance status at diagnosis | |
# stage: stage at diagnosis | |
# deprivation: deprivation score | |
# charlson: Charlson's comorbidity index | |
# emergency: route to diagnosis | |
# Dataset: these variables are in a dataframe called "tab" (unavailable) | |
############################################################################################### | |
######################################################################## | |
# STEP 1-CLONING: CREATION OF OUTCOME AND FOLLOW-UP TIME IN EACH ARM | |
######################################################################## | |
# Note: Letters A, B... refer to the scenarios identified in our graph | |
# We will create two new variables: | |
# - fup: the follow-up time in the emulated trial (which can be different from the observed follow-up time) | |
# - outcome: the outcome in the emulated trial (which can be different from the observed follow-up time) | |
# These variables will be the outcome and follow up time in the outcome model | |
######################################################################################################################################### | |
clone <- function(tab, arm = "Control") { | |
# Create a copy of the dataset for the 'Control' arm | |
out <- tab | |
out$arm <- arm | |
# Case 1: Patients receive surgery within 6 months | |
idx <- which(out$surgery == 1 & out$timetosurgery <= 182.62) | |
out$outcome[idx] <- ifelse(out$arm[idx] == "Control", 0, out$death[idx]) | |
out$fup[idx] <- ifelse(out$arm[idx] == "Control", out$timetosurgery[idx], out$fup_obs[idx]) | |
# Case 2: Patients do not receive surgery within 6 months | |
if (arm == "Control") { | |
idx <- which(out$surgery == 0 | (out$surgery == 1 & out$timetosurgery > 182.62)) | |
out$outcome[idx] <- out$death[idx] | |
out$fup[idx] <- out$fup_obs[idx] | |
# Case 2: Patients die or are lost to follow-up before 6 months without having surgery | |
} else if (arm == "Surgery") { | |
idx <- which(out$surgery == 0 & out$fup_obs <= 182.62) | |
out$outcome[idx] <- out$death[idx] | |
out$fup[idx] <- out$fup_obs[idx] | |
} | |
# Case 3: Patients do not receive surgery within 6 months and are still alive or at risk at 6 months | |
if (arm == "Surgery") { | |
idx <- which((out$surgery == 0 & out$fup_obs > 182.62) | (out$surgery == 1 & out$timetosurgery > 182.62)) | |
out$outcome[idx] <- 0 | |
out$fup[idx] <- 182.62 | |
} | |
# Censoring status and follow-up time uncensored | |
# Case 1 | |
idx <- out$surgery == 1 & out$timetosurgery <= 182.62 | |
out$censoring[idx] <- ifelse(arm == "Control", 1, 0) | |
out$fup_uncensored[idx] <- out$timetosurgery[idx] | |
# Case 2 | |
idx <- out$surgery == 0 & out$fup_obs <= 182.62 | |
out$censoring[idx] <- 0 | |
out$fup_uncensored[idx] <- out$fup_obs[idx] | |
# Case 3 | |
idx <- (out$surgery == 0 & out$fup_obs > 182.62) | (out$surgery == 1 & out$timetosurgery > 182.62) | |
out$censoring[idx] <- ifelse(arm == "Surgery", 1, 0) | |
out$fup_uncensored[idx] <- 182.62 | |
return(out) | |
} | |
tab <- rbind( | |
clone(tab, "Control"), | |
clone(tab, "Surgery") | |
) | |
# Bootstrap function | |
fboot <- function(tab, indices) { | |
t <- tab[tab$arm == "Control", ] | |
t1 <- tab[tab$arm == "Surgery", ] | |
tab0 <- t[indices, ] # allows boot to select sample | |
select <- tab0$id | |
tab1 <- t1[t1$id %in% select, ] | |
tab <- rbind(tab0, tab1) | |
#################################################### | |
# STEP 2-SPLITTING THE DATASET AT EACH TIME OF EVENT | |
#################################################### | |
# Dataframe containing the time of events and an ID for the times of events | |
t_events <- sort(unique(tab$fup)) | |
times <- data.frame("tevent" = t_events, "ID_t" = seq(1:length(t_events))) | |
process_data <- function(tab, t_events, times, arm = "Surgery") { | |
# Filtering the dataset for the 'Surgery' arm | |
tab_s <- tab[tab$arm == arm, ] | |
# Creation of the entry variable (Tstart, 0 for everyone) | |
tab_s$Tstart <- 0 | |
# Splitting the dataset at each time of event until the event happens and sorting it | |
data.long <- survSplit(tab_s, | |
cut = t_events, end = "fup", | |
start = "Tstart", event = "outcome", id = "ID") | |
data.long <- data.long[order(data.long$ID, data.long$fup), ] | |
# Splitting the original dataset at each time of event and sorting it until censoring happens | |
data.long.cens <- survSplit(tab_s, | |
cut = t_events, end = "fup", | |
start = "Tstart", event = "censoring", id = "ID") | |
data.long.cens <- data.long.cens[order(data.long.cens$ID, data.long.cens$fup), ] | |
# Replacing the censoring variable in data.long by the censoring variable obtained in the second split dataset | |
data.long$censoring <- data.long.cens$censoring | |
# Creating Tstop (end of the interval) | |
data.long$Tstop <- data.long$fup | |
# Merge with 'times' data and sort | |
data.long <- merge(data.long, times, by.x = "Tstart", by.y = "tevent", all.x = TRUE) | |
data.long <- data.long[order(data.long$ID, data.long$fup), ] | |
data.long$ID_t[is.na(data.long$ID_t)] <- 0 | |
return(data.long) | |
} | |
data <- rbind( | |
process_data(tab, t_events, times, arm = "Surgery"), | |
process_data(tab, t_events, times, arm = "Control") | |
) | |
data_final <- merge(data, times, by = "ID_t", all.x = T) | |
data_final <- data_final[order(data_final$ID, data_final$fup), ] | |
############################################ | |
# STEP 3- ESTIMATING THE CENSORING WEIGHTS | |
############################################ | |
censoring_weights <- function(data_final, arm = "Surgery") { | |
data.long <- data_final[which(data_final$arm == arm), ] | |
# STEP 1: censoring model | |
# Cox model | |
ms_cens <- coxph(Surv(Tstart, Tstop, censoring) ~ age + sex + emergency + | |
stage + deprivation + charlson + perf, ties = "efron", data = data.long) | |
data.long$risk <- predict( | |
ms_cens, | |
newdata = data.long, | |
type = "risk", | |
reference = "zero") | |
# Estimating the cumulative hazard (when covariates=0) | |
dat.base <- data.frame(basehaz(ms_cens, centered = F)) | |
names(dat.base) <- c("hazard", "t") | |
dat.base <- unique(merge(dat.base, times, by.x = "t", by.y = "tevent", all.x = T)) | |
# Merging and reordering the dataset | |
data.long <- merge(data.long, dat.base, by = "ID_t", all.x = T) | |
data.long <- data.long[order(data.long$id, data.long$fup), ] | |
data.long$hazard[is.na(data.long$hazard)] <- 0 | |
# Estimating the probability of remaining uncensored at each time of event | |
data.long$P_uncens <- exp(-(data.long$hazard) * data.long$risk) | |
# IPC Weights are the inverse of the probability of remaining uncensored | |
data.long$weight_Cox <- 1 / data.long$P_uncens | |
return(data.long) | |
} | |
data.long.Cox <- rbind( | |
censoring_weights(data_final, arm = "Surgery"), | |
censoring_weights(data_final, arm = "Control") | |
) | |
############################################ | |
# STEP 3- ESTIMATING THE SURVIVOR FUNCTION | |
############################################ | |
################################################## | |
# Emulated trial with Cox weights (Kaplan-Meier) | |
################################################## | |
emul_Cox_s <- survfit(Surv(Tstart, Tstop, outcome) ~ 1, | |
data = data.long.Cox[data.long.Cox$arm == "Surgery", ], weights = weight_Cox) | |
S1 <- min(emul_Cox_s$surv) # 1 year survival in the surgery group | |
fit.tableM <- summary(emul_Cox_s, rmean = 365)$table | |
RMST1 <- fit.tableM["*rmean"] # Estimated RMST in the surgery group | |
emul_Cox_c <- survfit(Surv(Tstart, Tstop, outcome) ~ 1, | |
data = data.long.Cox[data.long.Cox$arm == "Control", ], weights = weight_Cox) | |
S0 <- min(emul_Cox_c$surv) # 1 year survival in the control group | |
fit.tableM2 <- summary(emul_Cox_c, rmean = 365)$table | |
RMST0 <- fit.tableM2["*rmean"] # Estimated RMST in the control group | |
Diff_surv <- S1 - S0 # Difference in 1 year survival | |
Diff_RMST <- RMST1 - RMST0 # Difference in RMST | |
################################################## | |
# Emulated trial with Cox weights (Cox model) | |
################################################## | |
Cox_w <- coxph(Surv(Tstart, Tstop, outcome) ~ arm, | |
data = data.long.Cox, weights = weight_Cox) | |
HR <- exp(Cox_w$coefficients) # Hazard ratio | |
res <- c(Diff_surv, Diff_RMST, HR) | |
names(res) <- c("Diff_surv", "Diff_RMST", "HR") | |
return(res) | |
} | |
############################################## | |
# CALLING THE BOOTSTRAP FUNCTION | |
############################################## | |
# Bootstrapping with 1000 replications | |
results <- boot(data = tab, statistic = fboot, R = 100) | |
expect_equal(unname(results$t0[1]), .12, tolerance = .01) |> print() | |
expect_equal(unname(results$t0[3]), 0.6072863, tolerance = .01) |> print() | |
# 95% confidence intervals for each measure | |
# boot.ci(results,type="norm",index=1) #Difference in survival | |
# boot.ci(results,type="norm",index=2) #Difference in RMST | |
# boot.ci(results,type="norm",index=3) #HR |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://academic.oup.com/ije/article/49/5/1719/5835351#226179197