Created
April 25, 2016 21:42
-
-
Save joseph-rickert/447346bbe24ccfdf730167499b90cb5f to your computer and use it in GitHub Desktop.
Code for post on Reading with R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Joseph Rickert | |
# April 2016 | |
# Some code for reading: | |
# LOGISTIC REGRESSION, SURVIVL ANALYSIS, AND THE KAPLAN-MEIER CURVE | |
# by Bradley Efron (1987) | |
#https://statistics.stanford.edu/sites/default/files/BIO%20115.pdf | |
library(survival) | |
library(ggplot2) | |
#-------------------------------------- | |
# Data for Efron's Table 1 | |
# Enter raw data for arm A | |
Adays <- c(7, 34, 42, 63, 64, 74, 83, 84, 91, | |
108, 112, 129, 133, 133, 139, 140, 140, 146, | |
149, 154, 157, 160, 160, 165, 173, 176, 185, | |
218, 225, 241, 248, 273, 277, 279, 297, 319, | |
405, 417, 420, 440, 523, 523, 583, 594, | |
1101, 1116, 1146, 1226, 1349, 1412, 1417) | |
Astatus <- rep(1,51) | |
Astatus[c(6,27,34,36,42,46,48,49,50)] <-0 | |
Aobj <- Surv(time = Adays, Astatus==1) | |
Aobj | |
# [1] 7 34 42 63 64 74+ 83 84 91 108 112 129 133 133 | |
# [15] 139 140 140 146 149 154 157 160 160 165 173 176 185+ 218 | |
# [29] 225 241 248 273 277 279+ 297 319+ 405 417 420 440 523 523+ | |
# [43] 583 594 1101 1116+ 1146 1226+ 1349+ 1412+ 1417 | |
#-------------------------- | |
# Data for Efron's Table 2 | |
# data for arm B | |
Bdays <- c(37, 84, 92, 94, | |
110, 112, 119, 127, 130, 133, 140, 146, 155, 159, | |
169, 173, 179, 194, 195, 209, 249, 281, 319, 339, | |
432, 469, 519, 528, 547, 613, 633, 725, 759, 817, | |
1092, 1245, 1331, 1557,1642, 1771, | |
1776, 1897, 2023, 2146, 2297) | |
Bstatus <- rep(1,45) | |
Bstatus[c(15,28,29,30,33,35,36,37,39,40,42,43,44,45)] <- 0 | |
Bobj <- Surv(Bdays, Bstatus == 1) | |
Bobj | |
# [1] 37 84 92 94 110 112 119 127 130 133 140 146 155 159 169+ 173 179 | |
# [18] 194 195 209 249 281 319 339 432 469 519 528+ 547+ 613+ 633 725 759+ 817 | |
# [35] 1092+ 1245+ 1331+ 1557 1642+ 1771+ 1776 1897+ 2023+ 2146+ 2297+ | |
#---------------------- | |
# Plot Kaplan-Meier Curves | |
Afit <- survfit(Aobj ~ 1, type="kaplan-meier") | |
Bfit <- survfit(Bobj ~ 1, type="kaplan-meier") | |
plot(Afit, conf.int = FALSE, col = "blue", | |
xlim = c(0, 2400), | |
xlab = "days", | |
ylab = "Proportion Surving", | |
main = " Kaplan-Meier Curves: Efron Fig 1") | |
lines(Bfit, conf.int = FALSE, col = "red") | |
legend("topright", # places a legend at the appropriate place | |
legend = c("Arm A","Arm B"), # puts text in the legend | |
lty=c(1,1), # gives the legend appropriate symbols (lines) | |
lwd=c(2.5,2.5), | |
cex=0.5, | |
col=c("Blue","Red")) # gives the legend lines the correct color and width | |
#------------------------------ | |
# Models for Arm A | |
m <- 30.348 #days per month | |
KM_A <- summary(Afit, times=seq(m,47*m,by=m),scale=m) | |
KM_A | |
# Call: survfit(formula = Aobj ~ 1, type = "kaplan-meier") | |
# time n.risk n.event survival std.err lower 95% CI upper 95% CI | |
# 1 50 1 0.980 0.0194 0.9431 1.000 | |
# 2 48 2 0.941 0.0329 0.8788 1.000 | |
# 3 42 5 0.842 0.0513 0.7470 0.949 | |
# 4 40 2 0.802 0.0562 0.6989 0.920 | |
# . | |
# . | |
# . | |
# 45 2 0 0.126 0.0528 0.0552 0.286 | |
# 46 2 0 0.126 0.0528 0.0552 0.286 | |
# Set up variables for models | |
ni <- c(51,KM_A$n.risk) # Number at risk in month i | |
si <- c(KM_A$n.event,1) # Number of deaths in month i | |
failure <- ni - si # Number of "failures" at risk who didn't die | |
n <- length(ni) | |
month <- 1:n | |
t <- (1:n - .5) # base time | |
t2 <- t^2 | |
t3 <- t^3 | |
t11 <- pmin((t-11),0) # Cubic spline time | |
t11.2 <- t11^2 | |
t11.3 <- t11^3 | |
#----------------------------- | |
# Life Table Model | |
hLT <- si/ni # hazard estimate | |
#----------------------------- | |
# Cubic Model | |
Y <- matrix(c(si, failure),n,2) # Response matrix | |
form <- formula(Y ~ t + t2 + t3) | |
logitC <- glm(form,family=binomial(link="logit")) | |
hc <- logitC$fitted.values # hazard | |
summary(logitC) | |
#----------------------------- | |
# Cubic Spline Model | |
formSpline <- formula(Y ~ t + t11.2 + t11.3) | |
logitCS <- glm(formSpline,family=binomial(link="logit")) | |
hs <- logitCS$fitted.values # hazard | |
summary(logitCS) | |
#----------------------------- | |
# Calculate Survival Functions for Arm A | |
G <- vector(); Gc <- vector(); Gs <- vector() | |
G[1] <- 1; Gc[1] <- 1; Gs[1] <- 1 | |
n <- length(hs) | |
for (i in 2:n){ | |
G[i] <- G[i-1]*(1 - hLT[i - 1]) # Life Tale | |
Gc[i] <- Gc[i-1]*(1 - hc[i - 1]) # Cubic Model | |
Gs[i] <- Gs[i-1]*( 1 - hs[i - 1]) # Cubic Spline Model | |
} | |
#------------------------- | |
# Recreate figure 3 from Efron's Paper | |
armA <- data.frame(month,ni,si,failure,hLT,t,t2,t3,t11,t11.2,t11.3) | |
head(armA,3) | |
# month ni si failure hLT t t2 t3 t11 t11.2 t11.3 | |
# 1 1 51 1 50 0.01960784 0.5 0.25 0.125 -10.5 110.25 -1157.625 | |
# 2 2 50 2 48 0.04000000 1.5 2.25 3.375 -9.5 90.25 -857.375 | |
# 3 3 48 5 43 0.10416667 2.5 6.25 15.625 -8.5 72.25 -614.125 | |
p <- ggplot(armA, aes(month)) | |
p + geom_line(aes(y = G)) + | |
geom_point(aes(y = Gc), colour = "blue", shape = 2) + | |
geom_point(aes(y = Gs), colour = "red", shape = 3) + | |
geom_line(aes(y = Gs), colour = "red") + | |
xlab("months") + | |
ylab("probability") + | |
ggtitle("Efron Figure 3") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment