Skip to content

Instantly share code, notes, and snippets.

@n8thangreen
Created November 6, 2013 12:03
Show Gist options
  • Save n8thangreen/7335023 to your computer and use it in GitHub Desktop.
Save n8thangreen/7335023 to your computer and use it in GitHub Desktop.
Create time-dependent long format time to event data for 3-state Death-Disease multistate model
tvCox <- function(survData, covariates, type){
##
## time-dependent hazard array representation
## can specify which time to use: cause specific or subdistributions
##
## survData: survival data subset of total.data with indicators
## covariates: e.g. c("age", "agegr", "cath", "surgical", "gender")
## type: type of hazard functions to use
## "" - Cause-specific
## "death" - subdistribution time-to-death
## "alive" - subdistribution time-to-discharge alive
if (type=="alive"){
time <- survData$time2disch # subdistribution discharge times
} else if (type=="death"){
time <- survData$time2death # subdistribution death times
} else if (type==""){
time <- survData$time} # raw times
rows.inf <- survData$infstatus==1
survData.inf <- survData[rows.inf,]
survData.mix <- survData[!rows.inf,]
## repeated pairs of row ids for infected cases
ninf <- rep(1:nrow(survData.inf), each=2)
epsilon <- 0.5
time.inf <- time[rows.inf] # subset of event times for infected cases
time.mix <- time[!rows.inf]
originalid <- 0 # original array single-line format patient row id
tvdata.inf <- data.frame(id=ninf,
tstart=NA,tstop=NA,inf=NA,disch=NA,death=NA,
survData.inf[ninf, c("infstatus",covariates)])
## check that column name is correct
## currently always 7th column but better this way
names(tvdata.inf)[grep("infstatus",names(tvdata.inf))] <- "infstatus"
numRows.new <- nrow(tvdata.inf)
if (numRows.new > 0){ # only for infected cases
for (i in seq(1, numRows.new,by=2)){ # fill two rows at a time
originalid <- originalid+1
spectime <- survData.inf$spectime[originalid]
time.inf.id <- time.inf[originalid]
# 1) admission -> infection
tvdata.inf[i,"tstart"]<- 0 # admission start time always zero
tvdata.inf[i,"tstop"] <- ifelse(spectime==0, epsilon, spectime) # so start<stop due to same day events
tvdata.inf[i,"disch"] <- 0 # discharge or censored
tvdata.inf[i,"inf"] <- 0 # not infected yet
tvdata.inf[i,"death"] <- 0 # death or censored
# 2) infection -> death/discharge
tvdata.inf[i+1,"tstart"] <- tvdata.inf[i,"tstop"] # start next state at the end time of first state
# stop time is the event time death, discharge or administrative censoring
# to ensure start<stop add epsilon due to same day events
tvdata.inf[i+1,"tstop"] <- ifelse(time.inf.id==spectime,
time.inf.id+epsilon,
time.inf.id)
## TODO ##
## what if spectime>time? table(survData$spectime>survData$time2disch)
# currently replaces with NA by default by R
# flag if theres a recorded event type and is the event type discharge
# otherwise censored
tvdata.inf[i+1,"disch"] <- (1-as.numeric(survData.inf$event[originalid])) & survData.inf$missingType[originalid]
tvdata.inf[i+1,"inf"] <- 1
# flag if there a recorded event type and is the event death
# otherwise censored
# censored at sink i.e not infection
## TODO ## why is event sometimes in a list??
tvdata.inf[i+1,"death"] <- unlist(survData.inf$event[originalid]) & survData.inf$missingType[originalid]
}
}
# construct an equivalent matrix for the uninfected patients
# include arbitrary unique IDs (otherwise bootstrapper ignores patient)
# split event into death and discharge and can still recover censored data
tvdata.mix <- data.frame(id=originalid+10+seq_along(survData.mix$time),
tstart=0,
tstop=ifelse(time.mix>0, time.mix, epsilon),
inf=0,
disch= (1-as.numeric(survData.mix[,"event"])) & survData.mix[,"missingType"],
death= unlist(survData.mix[,"event"]) & survData.mix[,"missingType"],
survData.mix[, c("infstatus",covariates)])
## check that column name is correct
## currently always 7th column but better this way
names(tvdata.mix)[grep("infstatus",names(tvdata.mix))] <- "infstatus"
tvdata <- rbind(tvdata.inf, tvdata.mix)
tvdata <- cbind(tvdata, status=tvdata$disch+(2*tvdata$death)) # single vector for censored (0), discharge (1) or death (2)
tvdata <- tvdata[!is.na(tvdata[,"tstop"]),] # remove empty rows
## rename some columns
names(tvdata)[which(names(tvdata)=="gender")] <- "sex"
names(tvdata)[which(names(tvdata)=="infstatus")] <- "originf"
tvdata
}
## FUNCTION END ##
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment