Created
November 6, 2013 12:03
-
-
Save n8thangreen/7335023 to your computer and use it in GitHub Desktop.
Create time-dependent long format time to event data for 3-state Death-Disease multistate model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tvCox <- function(survData, covariates, type){ | |
## | |
## time-dependent hazard array representation | |
## can specify which time to use: cause specific or subdistributions | |
## | |
## survData: survival data subset of total.data with indicators | |
## covariates: e.g. c("age", "agegr", "cath", "surgical", "gender") | |
## type: type of hazard functions to use | |
## "" - Cause-specific | |
## "death" - subdistribution time-to-death | |
## "alive" - subdistribution time-to-discharge alive | |
if (type=="alive"){ | |
time <- survData$time2disch # subdistribution discharge times | |
} else if (type=="death"){ | |
time <- survData$time2death # subdistribution death times | |
} else if (type==""){ | |
time <- survData$time} # raw times | |
rows.inf <- survData$infstatus==1 | |
survData.inf <- survData[rows.inf,] | |
survData.mix <- survData[!rows.inf,] | |
## repeated pairs of row ids for infected cases | |
ninf <- rep(1:nrow(survData.inf), each=2) | |
epsilon <- 0.5 | |
time.inf <- time[rows.inf] # subset of event times for infected cases | |
time.mix <- time[!rows.inf] | |
originalid <- 0 # original array single-line format patient row id | |
tvdata.inf <- data.frame(id=ninf, | |
tstart=NA,tstop=NA,inf=NA,disch=NA,death=NA, | |
survData.inf[ninf, c("infstatus",covariates)]) | |
## check that column name is correct | |
## currently always 7th column but better this way | |
names(tvdata.inf)[grep("infstatus",names(tvdata.inf))] <- "infstatus" | |
numRows.new <- nrow(tvdata.inf) | |
if (numRows.new > 0){ # only for infected cases | |
for (i in seq(1, numRows.new,by=2)){ # fill two rows at a time | |
originalid <- originalid+1 | |
spectime <- survData.inf$spectime[originalid] | |
time.inf.id <- time.inf[originalid] | |
# 1) admission -> infection | |
tvdata.inf[i,"tstart"]<- 0 # admission start time always zero | |
tvdata.inf[i,"tstop"] <- ifelse(spectime==0, epsilon, spectime) # so start<stop due to same day events | |
tvdata.inf[i,"disch"] <- 0 # discharge or censored | |
tvdata.inf[i,"inf"] <- 0 # not infected yet | |
tvdata.inf[i,"death"] <- 0 # death or censored | |
# 2) infection -> death/discharge | |
tvdata.inf[i+1,"tstart"] <- tvdata.inf[i,"tstop"] # start next state at the end time of first state | |
# stop time is the event time death, discharge or administrative censoring | |
# to ensure start<stop add epsilon due to same day events | |
tvdata.inf[i+1,"tstop"] <- ifelse(time.inf.id==spectime, | |
time.inf.id+epsilon, | |
time.inf.id) | |
## TODO ## | |
## what if spectime>time? table(survData$spectime>survData$time2disch) | |
# currently replaces with NA by default by R | |
# flag if theres a recorded event type and is the event type discharge | |
# otherwise censored | |
tvdata.inf[i+1,"disch"] <- (1-as.numeric(survData.inf$event[originalid])) & survData.inf$missingType[originalid] | |
tvdata.inf[i+1,"inf"] <- 1 | |
# flag if there a recorded event type and is the event death | |
# otherwise censored | |
# censored at sink i.e not infection | |
## TODO ## why is event sometimes in a list?? | |
tvdata.inf[i+1,"death"] <- unlist(survData.inf$event[originalid]) & survData.inf$missingType[originalid] | |
} | |
} | |
# construct an equivalent matrix for the uninfected patients | |
# include arbitrary unique IDs (otherwise bootstrapper ignores patient) | |
# split event into death and discharge and can still recover censored data | |
tvdata.mix <- data.frame(id=originalid+10+seq_along(survData.mix$time), | |
tstart=0, | |
tstop=ifelse(time.mix>0, time.mix, epsilon), | |
inf=0, | |
disch= (1-as.numeric(survData.mix[,"event"])) & survData.mix[,"missingType"], | |
death= unlist(survData.mix[,"event"]) & survData.mix[,"missingType"], | |
survData.mix[, c("infstatus",covariates)]) | |
## check that column name is correct | |
## currently always 7th column but better this way | |
names(tvdata.mix)[grep("infstatus",names(tvdata.mix))] <- "infstatus" | |
tvdata <- rbind(tvdata.inf, tvdata.mix) | |
tvdata <- cbind(tvdata, status=tvdata$disch+(2*tvdata$death)) # single vector for censored (0), discharge (1) or death (2) | |
tvdata <- tvdata[!is.na(tvdata[,"tstop"]),] # remove empty rows | |
## rename some columns | |
names(tvdata)[which(names(tvdata)=="gender")] <- "sex" | |
names(tvdata)[which(names(tvdata)=="infstatus")] <- "originf" | |
tvdata | |
} | |
## FUNCTION END ## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment