Created
November 24, 2011 18:04
-
-
Save berndweiss/1391938 to your computer and use it in GitHub Desktop.
Episode splitting with qualitative covariates
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Note: No missing values in tvc.start_/tvc.stop_ allowed! | |
## 0. Step: Make up some data | |
## event: event status | |
## start: starting time | |
## stop: ending time | |
## tvc.start_: starting time of qualitative/categorial TVC | |
## tvc.stop_: ending time of qualitative/categorial TVC | |
df <- data.frame(id=c(1, 2), | |
event=c(1, 0), | |
start=c(0, 0), | |
stop=c(12, 8), | |
tvc.start1=c(2, 4), | |
tvc.start2=c(3, 7), | |
tvc.stop1=c(12, 7), | |
tvc.stop2=c(7, 8) | |
) | |
df | |
## 1. Step: Reshape data frame | |
## ALL potential endpoints need to be collapsed into one vector and, | |
## by default, R uses the first name in varying=list(...) for the | |
## name of this newly created vector (here: "stop") | |
data.long <- reshape(data=df, direction="long", timevar="index", | |
varying=list(c("stop", "tvc.start1", "tvc.stop1", | |
"tvc.start2", "tvc.stop2"))) | |
data.long | |
## Remove "index" variable (see reshape(..., timevar="index")) | |
data.long <- data.long[, names(data.long)!="index"] | |
## 2. Step: Sort data by id and event time (here: "stop") | |
data.long <- data.long[order(data.long$id, data.long$stop), ] | |
data.long | |
## 3. Step: Correct starting times (i.e. generate left side of intervals) | |
## Don't worry, this is tricky step and I do not expect you to | |
## understand this step... | |
tmp.list <- split(data.long[,c("id", "stop")], data.long$id) | |
tmp.list.start <- lapply(tmp.list, function(x){c(0, x$stop[1:length(x$stop)-1])}) | |
data.long$start <- unlist(tmp.list.start) | |
data.long | |
## 4. Step: If end of a TVC episode >= end of observation time, | |
## remove double record | |
data.long <- data.long[!data.long$start==data.long$stop, ] | |
data.long | |
## 5. Step: Merge long and wide data | |
## Note: variable "stop" exists in both data sets | |
data.long <- merge(data.long, df[, c("id", "stop", "tvc.start1", "tvc.start2", | |
"tvc.stop1", "tvc.stop2")], by="id", | |
all.x=TRUE) | |
names(data.long)[names(data.long)=="stop.x"] <- "stop" | |
data.long | |
## 6. Step: Correct old "event" status variable | |
data.long$event2 <- data.long$event | |
data.long$event <- 0 | |
data.long$event[(data.long$stop == data.long$stop.y) | |
& (data.long$event2 == 1)] <- 1 | |
data.long | |
## 7. Step: Generate TVC dummies | |
data.long$tvc.dummy1 <- 0 | |
data.long$tvc.dummy1[data.long$start >= data.long$tvc.start1 | |
& data.long$stop <= data.long$tvc.stop1] <- 1 | |
data.long | |
data.long$tvc.dummy2 <- 0 | |
data.long$tvc.dummy2[data.long$start >= data.long$tvc.start2 | |
& data.long$stop <= data.long$tvc.stop2] <- 1 | |
data.long |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment