Created
January 25, 2016 11:09
-
-
Save jwaage/5a5da5b4405ebcc3984a to your computer and use it in GitHub Desktop.
Survival analysis and kaplan meyer plots
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Welcome to the COPSAC ggplot Kaplan-Meier template. It builds nice and very customizable KM curves as shown below. Please run the two functions to load into memory, it works like a SAS macro. | |
| # Load dependencies | |
| library(survival) | |
| library(ggplot2) | |
| library(cowplot) | |
| library(reshape2) | |
| # Run these functions to load into memory | |
| # Define custom function to create a survival data.frame. Credit http://www.ceb-institute.org/bbs/wp-content/uploads/2011/09/handout_ggplot2.pdf | |
| createSurvivalFrame <- function(f.survfit){ | |
| # initialise frame variable | |
| f.frame <- NULL | |
| # check if more then one strata | |
| if(length(names(f.survfit$strata)) == 0){ | |
| # create data.frame with data from survfit | |
| f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower) | |
| # create first two rows (start at 1) | |
| f.start <- data.frame(time=c(0, f.frame$time[1]), n.risk=c(f.survfit$n, f.survfit$n), n.event=c(0,0), n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1)) | |
| # add first row to dataset | |
| f.frame <- rbind(f.start, f.frame) | |
| # remove temporary data | |
| rm(f.start) | |
| } | |
| else { | |
| # create vector for strata identification | |
| f.strata <- NULL | |
| print(f.survfit$strata) | |
| for(f.i in 1:length(f.survfit$strata)){ | |
| # add vector for one strata according to number of rows of strata | |
| f.strata <- c(f.strata, rep(names(f.survfit$strata)[f.i], f.survfit$strata[f.i])) | |
| } | |
| # create data.frame with data from survfit (create column for strata) | |
| f.frame <- data.frame(time=f.survfit$time, n.risk=f.survfit$n.risk, n.event=f.survfit$n.event, n.censor = f.survfit$n.censor, surv=f.survfit$surv, upper=f.survfit$upper, lower=f.survfit$lower, strata=factor(f.strata)) | |
| # remove temporary data | |
| rm(f.strata) | |
| # create first two rows (start at 1) for each strata | |
| for(f.i in 1:length(f.survfit$strata)){ | |
| # take only subset for this strata from data | |
| f.subset <- subset(f.frame, strata==names(f.survfit$strata)[f.i]) | |
| # create first two rows (time: 0, time of first event) | |
| f.start <- data.frame(time=c(0, f.subset$time[1]), n.risk=rep(f.survfit[f.i]$n, 2), n.event=c(0,0), n.censor=c(0,0), surv=c(1,1), upper=c(1,1), lower=c(1,1), strata=rep(names(f.survfit$strata)[f.i], 2)) | |
| # add first two rows to dataset | |
| f.frame <- rbind(f.start, f.frame) | |
| # remove temporary data | |
| rm(f.start, f.subset) | |
| } | |
| # reorder data | |
| f.frame <- f.frame[order(f.frame$strata, f.frame$time), ] | |
| # rename row.names | |
| rownames(f.frame) <- NULL | |
| } | |
| # return frame | |
| return(f.frame) | |
| } | |
| # Credit http://www.ceb-institute.org/bbs/wp-content/uploads/2011/09/handout_ggplot2.pdf | |
| qplot_survival <- function(f.frame, f.CI="default", f.shape=3){ | |
| # use different plotting commands dependig whether or not strata's are given | |
| if("strata" %in% names(f.frame) == FALSE){ | |
| # confidence intervals are drawn if not specified otherwise | |
| if(f.CI=="default" | f.CI==TRUE ){ | |
| # create plot with 4 layers (first 3 layers only events, last layer only censored) | |
| # hint: censoring data for multiple censoring events at timepoint are overplotted | |
| # (unlike in plot.survfit in survival package) | |
| ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2) + geom_step(aes(x=time,y=lower), direction="hv", linetype=2) + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) | |
| } | |
| else { | |
| # create plot without confidence intervalls | |
| ggplot(data=f.frame) + geom_step(aes(x=time, y=surv), direction="hv") + | |
| geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) | |
| } | |
| } | |
| else { | |
| if(f.CI=="default" | f.CI==FALSE){ | |
| # without CI | |
| ggplot(data=f.frame, aes(group=strata, colour=strata)) + geom_step(aes(x=time, y=surv), | |
| direction="hv") + geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) | |
| } | |
| else { | |
| # with CI (hint: use alpha for CI) | |
| ggplot(data=f.frame, aes(colour=strata, group=strata)) + geom_step(aes(x=time, y=surv), direction="hv") + geom_step(aes(x=time, y=upper), directions="hv", linetype=2, alpha=0.5) + | |
| geom_step(aes(x=time,y=lower), direction="hv", linetype=2, alpha=0.5) + | |
| geom_point(data=subset(f.frame, n.censor==1), aes(x=time, y=surv), shape=f.shape) | |
| } | |
| } | |
| } | |
| # | |
| ## Now for a short example | |
| # | |
| ## Example Data, lung cancer study from the survival package. Read about it with ?lung | |
| # | |
| #data(lung) | |
| ## Variable description: | |
| ## time - survival time in days | |
| ## status - 1=censored, 2=dead | |
| ## ph.ecog - Performance Score, ability to perform normal daily activities. Takes values 0-3 in this dataset. We will remove lvl 3 (1 obs), and model as factor. | |
| # | |
| #lunge <- lung[lung$ph.ecog != 3, c("time","status","ph.ecog")] # Select interesting variables, remove PS 3, save in a new dataframe called "lunge" | |
| #lunge <- lunge[complete.cases(lunge),] # Remove missing data | |
| # | |
| ## Research question - does performance score influence survival time? | |
| # | |
| ## Fit a cox regession. Here you could add other variables to the model to adjust for potential confounders. | |
| #fit <- coxph(Surv(time, status == 2) ~ factor(ph.ecog), data=lunge) # Fit the model | |
| #summary(fit) # Hazard Ratios can be seen as "exp(coef)", PS 0 is the reference level. Model p value at the bottom. | |
| # | |
| ## Extract HR, CI, p as text, can be used for titles in plots. Here we'll use PS=2 as an example | |
| #resdf <- signif(coef(summary(fit)),3) | |
| #cidf <- round(exp(confint(fit)),2) | |
| #HRtext <- paste0("HR ", resdf[2,2], " [", cidf[2,1], "-",cidf[2,2],"], p = ", resdf[2,5]) | |
| # | |
| ## Fit a "survfit" (Kaplan-Meier) object. Only use what you want on graph as right-hand side factor (ie. only Performance Score) | |
| #sf <- survfit(Surv(time, status == 2) ~ factor(ph.ecog), data=lunge) | |
| # | |
| ## Use "macro" function to create plot-data | |
| #sdf <- createSurvivalFrame(sf) | |
| #levels(sdf$strata) # Inspect group levels | |
| #levels(sdf$strata) <- c("2", "1", "0") # Rename group levels | |
| # | |
| ## Optional - flip y axis to change survival rate to cumulative event rate | |
| #sdf_flip <- sdf | |
| #sdf_flip[,c("surv", "upper", "lower")] <- 1 - sdf_flip[,c("surv", "upper", "lower")] | |
| # | |
| ## Plot with the above template. Can be modified with ggplot commands too, eg. | |
| #qplot_survival(sdf) # Survival rate | |
| #qplot_survival(sdf_flip) # Cumulative event rate | |
| #qplot_survival(sdf_flip) + xlab("Time since diagnosis, days") | |
| #qplot_survival(sdf_flip, f.CI=TRUE) + xlab("Time since diagnosis, days") # Add confidence interval | |
| # | |
| ## Make a plot manually. Distinguish Performance Score strata by color. | |
| #ggplot(sdf_flip, aes(x=time, y=surv, color=strata)) + # Define and map data | |
| #geom_step(size=0.9) + # Adjust line thickness | |
| #xlab("Survival time, days") + # Set x-axis label | |
| #ylab("Cumulative risk of death") + # Set y axis label | |
| #scale_color_manual(name = "Performance Score", values=c("darkblue", "blue", "lightblue")) + # Set legend name and colors | |
| #theme_cowplot() + # Set base theme | |
| #theme(legend.position=c(1,0), legend.justification=c(1,0)) + # Set legend position and "anchor point"(x,y) | |
| #ggtitle("Lung cancer mortality") # Set a title of the plot | |
| # | |
| ## Alternative theme (standard ggplot2) | |
| #ggplot(sdf_flip, aes(x=time, y=surv, color=strata)) + # Define and map data | |
| #geom_step(size=0.5) + # Adjust line thickness | |
| #xlab("Survival time, days") + # Set x-axis label | |
| #ylab("Cumulative risk of death") + # Set y axis label | |
| #scale_color_manual(name = "Performance Score", values=c("red", "blue", "green")) + # Set legend name and colors | |
| #theme_grey() + # Set base theme | |
| #theme(legend.position=c(1,0), legend.justification=c(1,0)) + # Set legend position and "anchor point"(x,y) | |
| #ggtitle("Lung cancer mortality") # Set a title of the plot | |
| # | |
| ## Alternative, black and white lines with "black & white" theme. Distinguish Performance Score by line type | |
| #ggplot(sdf_flip, aes(x=time, y=surv, linetype=strata)) + # Define and map data | |
| #geom_step(size=0.2) + # Adjust line thickness | |
| #xlab("Survival time, days") + # Set x-axis label | |
| #ylab("Cumulative risk of death") + # Set y axis label | |
| #scale_linetype_manual(name = "Performance Score", values=c("solid", "dotted", "dashed")) + # Set legend name | |
| #theme_bw() + # Set base theme | |
| #theme(legend.position=c(1,0), legend.justification=c(1,0), legend.background=element_blank(), legend.direction="horizontal") + # Set legend position and "anchor point"(x,y), change to horizontal | |
| #ggtitle("Lung cancer mortality") # Set a title of the plot | |
| # | |
| # | |
| # | |
| ## Make extra plot with n at risk for each group in a lower subplot | |
| ## Vector of time points, define manually | |
| #timepoints <- c(0, 250, 500, 750, 1000) | |
| # | |
| ## Split dataframe into groups | |
| #nlist <- split(sdf_flip, list(sdf_flip$strata)) | |
| #getmax <- function(data, time) data[which.max(data$time[data$time <= time]),"n.risk"] | |
| #timenleft <- as.data.frame(lapply(timepoints, function(x) sapply(nlist, getmax, x) )) | |
| #names(timenleft) <- timepoints | |
| #timenleft$group <- rownames(timenleft) | |
| #plotdat <- melt(timenleft) | |
| #names(plotdat) <- c("group", "time", "n") | |
| # | |
| #KMplot <- ggplot(sdf_flip, aes(x=time, y=surv, linetype=strata)) + # Define and map data | |
| #geom_step(size=0.2) + # Adjust line thickness | |
| #xlab("Survival time, days") + # Set x-axis label | |
| #ylab("Cumulative risk of death") + | |
| #scale_linetype_manual(name = "Performance Score", values=c("solid", "dotted", "dashed")) + # Set legend name | |
| #theme(legend.position=c(1,0), legend.justification=c(1,0), legend.background=element_blank(), legend.direction="horizontal") + # Set legend position and "anchor point"(x,y), change to horizontal | |
| #ggtitle(HRtext) # Set a title of the plot | |
| # | |
| #nleftplot <- ggplot(plotdat, aes(x=as.numeric(time), y=factor(group), label=n)) + geom_text() + theme_cowplot() + xlab(NULL) + ylab("Performance\nScore") + theme(axis.ticks.x=element_blank(), axis.line.x=element_blank(), axis.text.x=element_blank()) | |
| # | |
| #plot_grid(KMplot, nleftplot, nrow=2, rel_heights=c(0.7,0.3)) | |
| # | |
| ## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment