berndweiss · November 24, 2011 18:04
diff --git a/gistfile1.r b/gistfile1.r
 ## Note: No missing values in tvc.start_/tvc.stop_ allowed!


 ## 0. Step: Make up some data
 ##          event:      event status
 ##          start:      starting time
 ##          stop:       ending time
 ##          tvc.start_: starting time of qualitative/categorial TVC
 ##          tvc.stop_:  ending time of qualitative/categorial TVC
 df <- data.frame(id=c(1, 2),
                 event=c(1, 0),
                 start=c(0, 0),
                 stop=c(12, 8),
                 tvc.start1=c(2, 4),
                 tvc.start2=c(3, 7),
                 tvc.stop1=c(12, 7),
                 tvc.stop2=c(7, 8)
                 )
 df


 ## 1. Step: Reshape data frame
 ##          ALL potential endpoints need to be collapsed into one vector and,
 ##          by default, R uses the first name in varying=list(...) for the
 ##          name of this newly created vector (here: "stop")
 data.long <- reshape(data=df, direction="long", timevar="index",
                     varying=list(c("stop", "tvc.start1", "tvc.stop1",
                     "tvc.start2", "tvc.stop2")))
 data.long
 ## Remove "index" variable (see reshape(..., timevar="index"))
 data.long <- data.long[, names(data.long)!="index"]



 ## 2. Step: Sort data by id and event time (here: "stop")
 data.long <- data.long[order(data.long$id, data.long$stop), ]
 data.long



 ## 3. Step: Correct starting times (i.e. generate left side of intervals)
 ##          Don't worry, this is tricky step and I do not expect you to
 ##          understand this step...
 tmp.list <- split(data.long[,c("id", "stop")], data.long$id)
 tmp.list.start <- lapply(tmp.list, function(x){c(0, x$stop[1:length(x$stop)-1])})
 data.long$start <- unlist(tmp.list.start)
 data.long



 ## 4. Step: If end of a TVC episode >= end of observation time,
 ##          remove double record
 data.long <- data.long[!data.long$start==data.long$stop, ]
 data.long



 ## 5. Step: Merge long and wide data
 ##          Note: variable "stop" exists in both data sets
 data.long <- merge(data.long, df[, c("id", "stop", "tvc.start1", "tvc.start2",
                                     "tvc.stop1", "tvc.stop2")], by="id",
                   all.x=TRUE)
 names(data.long)[names(data.long)=="stop.x"] <- "stop"
 data.long



 ## 6. Step: Correct old "event" status variable
 data.long$event2 <- data.long$event
 data.long$event <- 0
 data.long$event[(data.long$stop == data.long$stop.y)
                & (data.long$event2 == 1)] <- 1
 data.long



 ## 7. Step: Generate TVC dummies
 data.long$tvc.dummy1 <- 0
 data.long$tvc.dummy1[data.long$start >= data.long$tvc.start1
                     & data.long$stop <= data.long$tvc.stop1] <- 1
 data.long

 data.long$tvc.dummy2 <- 0
 data.long$tvc.dummy2[data.long$start >= data.long$tvc.start2
                     & data.long$stop <= data.long$tvc.stop2] <- 1
 data.long
	## Note: No missing values in tvc.start_/tvc.stop_ allowed!


	## 0. Step: Make up some data
	## event: event status
	## start: starting time
	## stop: ending time
	## tvc.start_: starting time of qualitative/categorial TVC
	## tvc.stop_: ending time of qualitative/categorial TVC
	df <- data.frame(id=c(1, 2),
	event=c(1, 0),
	start=c(0, 0),
	stop=c(12, 8),
	tvc.start1=c(2, 4),
	tvc.start2=c(3, 7),
	tvc.stop1=c(12, 7),
	tvc.stop2=c(7, 8)
	)
	df


	## 1. Step: Reshape data frame
	## ALL potential endpoints need to be collapsed into one vector and,
	## by default, R uses the first name in varying=list(...) for the
	## name of this newly created vector (here: "stop")
	data.long <- reshape(data=df, direction="long", timevar="index",
	varying=list(c("stop", "tvc.start1", "tvc.stop1",
	"tvc.start2", "tvc.stop2")))
	data.long
	## Remove "index" variable (see reshape(..., timevar="index"))
	data.long <- data.long[, names(data.long)!="index"]



	## 2. Step: Sort data by id and event time (here: "stop")
	data.long <- data.long[order(data.long$id, data.long$stop), ]
	data.long



	## 3. Step: Correct starting times (i.e. generate left side of intervals)
	## Don't worry, this is tricky step and I do not expect you to
	## understand this step...
	tmp.list <- split(data.long[,c("id", "stop")], data.long$id)
	tmp.list.start <- lapply(tmp.list, function(x){c(0, x$stop[1:length(x$stop)-1])})
	data.long$start <- unlist(tmp.list.start)
	data.long



	## 4. Step: If end of a TVC episode >= end of observation time,
	## remove double record
	data.long <- data.long[!data.long$start==data.long$stop, ]
	data.long



	## 5. Step: Merge long and wide data
	## Note: variable "stop" exists in both data sets
	data.long <- merge(data.long, df[, c("id", "stop", "tvc.start1", "tvc.start2",
	"tvc.stop1", "tvc.stop2")], by="id",
	all.x=TRUE)
	names(data.long)[names(data.long)=="stop.x"] <- "stop"
	data.long



	## 6. Step: Correct old "event" status variable
	data.long$event2 <- data.long$event
	data.long$event <- 0
	data.long$event[(data.long$stop == data.long$stop.y)
	& (data.long$event2 == 1)] <- 1
	data.long



	## 7. Step: Generate TVC dummies
	data.long$tvc.dummy1 <- 0
	data.long$tvc.dummy1[data.long$start >= data.long$tvc.start1
	& data.long$stop <= data.long$tvc.stop1] <- 1
	data.long

	data.long$tvc.dummy2 <- 0
	data.long$tvc.dummy2[data.long$start >= data.long$tvc.start2
	& data.long$stop <= data.long$tvc.stop2] <- 1
	data.long