n8thangreen · November 6, 2013 12:03
diff --git a/time-dept_array b/time-dept_array
 tvCox <- function(survData, covariates, type){
  ##
  ## time-dependent hazard array representation
  ## can specify which time to use: cause specific or subdistributions
  ##
  ## survData: survival data subset of total.data with indicators
  ## covariates: e.g. c("age", "agegr", "cath", "surgical", "gender")
  ## type: type of hazard functions to use
  ##         "" - Cause-specific
  ##		"death" - subdistribution time-to-death 
  ##		"alive" - subdistribution time-to-discharge alive 
  
  
  if (type=="alive"){
    time <- survData$time2disch	# subdistribution discharge times
  } else if (type=="death"){
    time <- survData$time2death  # subdistribution death times
  } else if (type==""){ 
    time <- survData$time}        # raw times
  
  rows.inf <- survData$infstatus==1
  
  survData.inf <- survData[rows.inf,]
  survData.mix <- survData[!rows.inf,]
  
  ## repeated pairs of row ids for infected cases
  ninf <- rep(1:nrow(survData.inf), each=2)
  epsilon <- 0.5
  
  time.inf <- time[rows.inf]   # subset of event times for infected cases
  time.mix <- time[!rows.inf]
  
  originalid <- 0							# original array single-line format patient row id
  tvdata.inf <- data.frame(id=ninf,
                           tstart=NA,tstop=NA,inf=NA,disch=NA,death=NA,
                           survData.inf[ninf, c("infstatus",covariates)])
  ## check that column name is correct
  ## currently always 7th column but better this way
  names(tvdata.inf)[grep("infstatus",names(tvdata.inf))] <- "infstatus"
  
  numRows.new <- nrow(tvdata.inf)
  
  if (numRows.new > 0){ # only for infected cases
    for (i in seq(1, numRows.new,by=2)){		# fill two rows at a time
      
      originalid <- originalid+1
      spectime <- survData.inf$spectime[originalid]
      time.inf.id <- time.inf[originalid]
      
      # 1) admission -> infection
      tvdata.inf[i,"tstart"]<- 0 		                              # admission start time always zero
      tvdata.inf[i,"tstop"] <- ifelse(spectime==0, epsilon, spectime)	# so start<stop due to same day events
      tvdata.inf[i,"disch"] <- 0		                              # discharge or censored
      tvdata.inf[i,"inf"] 	<- 0	                            	  # not infected yet
      tvdata.inf[i,"death"] <- 0	                              	# death or censored
      
      
      # 2) infection -> death/discharge
      tvdata.inf[i+1,"tstart"] <- tvdata.inf[i,"tstop"]     # start next state at the end time of first state
      
      # stop time is the event time death, discharge or administrative censoring
      # to ensure start<stop add epsilon due to same day events
      tvdata.inf[i+1,"tstop"]  <- ifelse(time.inf.id==spectime,
                                         time.inf.id+epsilon,
                                         time.inf.id)
      
      ## TODO ##
      ## what if spectime>time? table(survData$spectime>survData$time2disch)
      # currently replaces with NA by default by R
      
      # flag if theres a recorded event type and is the event type discharge
      # otherwise censored
      tvdata.inf[i+1,"disch"] <- (1-as.numeric(survData.inf$event[originalid])) & survData.inf$missingType[originalid]
      
      tvdata.inf[i+1,"inf"] 	<- 1
      
      # flag if there a recorded event type and is the event death
      # otherwise censored
      # censored at sink i.e not infection
      ## TODO ## why is event sometimes in a list??
      tvdata.inf[i+1,"death"] <- unlist(survData.inf$event[originalid]) & survData.inf$missingType[originalid]		
    }
  }
  
  # construct an equivalent matrix for the uninfected patients
  # include arbitrary unique IDs (otherwise bootstrapper ignores patient)
  # split event into death and discharge and can still recover censored data
  tvdata.mix <- data.frame(id=originalid+10+seq_along(survData.mix$time),
                           tstart=0,
                           tstop=ifelse(time.mix>0, time.mix, epsilon),
                           inf=0,
                           disch= (1-as.numeric(survData.mix[,"event"])) & survData.mix[,"missingType"],
                           death= unlist(survData.mix[,"event"]) & survData.mix[,"missingType"],
                           survData.mix[, c("infstatus",covariates)])
  
  ## check that column name is correct
  ## currently always 7th column but better this way
  names(tvdata.mix)[grep("infstatus",names(tvdata.mix))] <- "infstatus"
  
  tvdata <- rbind(tvdata.inf, tvdata.mix)
  tvdata <- cbind(tvdata, status=tvdata$disch+(2*tvdata$death)) # single vector for censored (0), discharge (1) or death (2)
  
  tvdata <- tvdata[!is.na(tvdata[,"tstop"]),]  	# remove empty rows
  
  ## rename some columns
  names(tvdata)[which(names(tvdata)=="gender")] <- "sex"
  names(tvdata)[which(names(tvdata)=="infstatus")] <- "originf"
  
 tvdata
 }
 ## FUNCTION END ##
	tvCox <- function(survData, covariates, type){
	##
	## time-dependent hazard array representation
	## can specify which time to use: cause specific or subdistributions
	##
	## survData: survival data subset of total.data with indicators
	## covariates: e.g. c("age", "agegr", "cath", "surgical", "gender")
	## type: type of hazard functions to use
	## "" - Cause-specific
	## "death" - subdistribution time-to-death
	## "alive" - subdistribution time-to-discharge alive


	if (type=="alive"){
	time <- survData$time2disch # subdistribution discharge times
	} else if (type=="death"){
	time <- survData$time2death # subdistribution death times
	} else if (type==""){
	time <- survData$time} # raw times

	rows.inf <- survData$infstatus==1

	survData.inf <- survData[rows.inf,]
	survData.mix <- survData[!rows.inf,]

	## repeated pairs of row ids for infected cases
	ninf <- rep(1:nrow(survData.inf), each=2)
	epsilon <- 0.5

	time.inf <- time[rows.inf] # subset of event times for infected cases
	time.mix <- time[!rows.inf]

	originalid <- 0 # original array single-line format patient row id
	tvdata.inf <- data.frame(id=ninf,
	tstart=NA,tstop=NA,inf=NA,disch=NA,death=NA,
	survData.inf[ninf, c("infstatus",covariates)])
	## check that column name is correct
	## currently always 7th column but better this way
	names(tvdata.inf)[grep("infstatus",names(tvdata.inf))] <- "infstatus"

	numRows.new <- nrow(tvdata.inf)

	if (numRows.new > 0){ # only for infected cases
	for (i in seq(1, numRows.new,by=2)){ # fill two rows at a time

	originalid <- originalid+1
	spectime <- survData.inf$spectime[originalid]
	time.inf.id <- time.inf[originalid]

	# 1) admission -> infection
	tvdata.inf[i,"tstart"]<- 0 # admission start time always zero
	tvdata.inf[i,"tstop"] <- ifelse(spectime==0, epsilon, spectime) # so start<stop due to same day events
	tvdata.inf[i,"disch"] <- 0 # discharge or censored
	tvdata.inf[i,"inf"] <- 0 # not infected yet
	tvdata.inf[i,"death"] <- 0 # death or censored


	# 2) infection -> death/discharge
	tvdata.inf[i+1,"tstart"] <- tvdata.inf[i,"tstop"] # start next state at the end time of first state

	# stop time is the event time death, discharge or administrative censoring
	# to ensure start<stop add epsilon due to same day events
	tvdata.inf[i+1,"tstop"] <- ifelse(time.inf.id==spectime,
	time.inf.id+epsilon,
	time.inf.id)

	## TODO ##
	## what if spectime>time? table(survData$spectime>survData$time2disch)
	# currently replaces with NA by default by R

	# flag if theres a recorded event type and is the event type discharge
	# otherwise censored
	tvdata.inf[i+1,"disch"] <- (1-as.numeric(survData.inf$event[originalid])) & survData.inf$missingType[originalid]

	tvdata.inf[i+1,"inf"] <- 1

	# flag if there a recorded event type and is the event death
	# otherwise censored
	# censored at sink i.e not infection
	## TODO ## why is event sometimes in a list??
	tvdata.inf[i+1,"death"] <- unlist(survData.inf$event[originalid]) & survData.inf$missingType[originalid]
	}
	}

	# construct an equivalent matrix for the uninfected patients
	# include arbitrary unique IDs (otherwise bootstrapper ignores patient)
	# split event into death and discharge and can still recover censored data
	tvdata.mix <- data.frame(id=originalid+10+seq_along(survData.mix$time),
	tstart=0,
	tstop=ifelse(time.mix>0, time.mix, epsilon),
	inf=0,
	disch= (1-as.numeric(survData.mix[,"event"])) & survData.mix[,"missingType"],
	death= unlist(survData.mix[,"event"]) & survData.mix[,"missingType"],
	survData.mix[, c("infstatus",covariates)])

	## check that column name is correct
	## currently always 7th column but better this way
	names(tvdata.mix)[grep("infstatus",names(tvdata.mix))] <- "infstatus"

	tvdata <- rbind(tvdata.inf, tvdata.mix)
	tvdata <- cbind(tvdata, status=tvdata$disch+(2*tvdata$death)) # single vector for censored (0), discharge (1) or death (2)

	tvdata <- tvdata[!is.na(tvdata[,"tstop"]),] # remove empty rows

	## rename some columns
	names(tvdata)[which(names(tvdata)=="gender")] <- "sex"
	names(tvdata)[which(names(tvdata)=="infstatus")] <- "originf"

	tvdata
	}
	## FUNCTION END ##