Skip to content

Instantly share code, notes, and snippets.

View cimentadaj's full-sized avatar

Jorge Cimentada cimentadaj

View GitHub Profile
# (b) Use the same model fit to generate simulations of CD4 percentages at
# each of the time periods for a new child who was 4 years old at baseline.
# For that we have to first create the dataset with the hypothetical child:
# I'll assume 7 hypothetical dates
hyp_child <- data.frame(newpid = 120,
VDATE = sample(cd4$VDATE[!is.na(cd4$VDATE)], 7),
baseage = 4,
treatmnt = 1)
library(arm)
cd4 <- read.csv("http://www.stat.columbia.edu/~gelman/arm/examples/cd4/allvar.csv")
# Let's recreate the model from 12.2 (b)
cd4$VDATE <- as.Date(cd4$VDATE, format = "%m/%d/%Y")
mod2 <- lmer(CD4PCT ~ VDATE + treatmnt + baseage + (1 | newpid), data = cd4)
# (a) Use the model fit from Exercise 12.2(b) to generate simulation
# of predicted CD4 percentages for each child in the dataset at a
# hypothetical next time point.
library(ggplot2)
# (c) Investigate the change in partial pooling from (a)
# to (b) both graphically and numerically.
# Change in standard errors
# First and second model intercepts
df1 <- coef(mod1)$newpid[,1 , drop = F]
df2 <- coef(mod2)$newpid[,1 , drop = F]
names(df1) <- c("int")
# (b) Extend the model in (a) to include child-level predictors
# (that is, group-level predictors) for treatment and age at baseline.
# Fit using lmer() and interpret the coefficients on time, treatment,
# and age at baseline.
mod2 <- lmer(CD4PCT ~ VDATE + treatmnt + baseage + (1 | newpid), data = cd4)
display(mod2)
# coef.est coef.se
# (Intercept) 67.28 9.82
library(arm)
cd4 <- read.csv("http://www.stat.columbia.edu/~gelman/arm/examples/cd4/allvar.csv")
head(cd4)
# VISIT newpid VDATE CD4PCT arv visage treatmnt CD4CNT baseage
# 1 1 1 6/29/1988 18 0 3.910000 1 323 3.91
# 2 4 1 1/19/1989 37 0 4.468333 1 610 3.91
# 3 7 1 4/13/1989 13 0 4.698333 1 324 3.91
# 4 10 1 NA 0 5.005000 1 NA 3.91
# 5 13 1 11/30/1989 13 0 5.330833 1 626 3.91
# In R for logit coefficients and robust standard errors:
dat <- read_dta("http://www.stata-press.com/data/r9/quad1.dta")
mod1 <- glm(z ~ x1 + x2 + x3, dat, family = binomial)
robustse(mod1, coef = "logit")
# Estimate Std. Error z value Pr(>|z|)
# x1 0.024050 0.025869 0.9297 0.3525395
# x2 0.157143 0.089715 1.7516 0.0798448 .
# x3 0.190162 0.089418 2.1267 0.0334490 *
# This function estimates robust standad error for glm objects and
# returns coefficients as either logit, odd ratios or probabilities.
# logits are default
# argument x must be glm model.
# Credit to Achim here: http://stackoverflow.com/questions/27367974/different-robust-standard-errors-of-logit-regression-in-stata-and-r
# for the code in line 14 and 15
robustse <- function(x, coef = c("logit", "odd.ratio", "probs")) {
#
# a)
# The probs argument sets the probability of making a shot. In this case it'll be 0.60
thrower <- function(probs) {
vec <- replicate(2, rbinom(1, 1, probs)) # create a vector with two random numbers of either 1 or 0, with a probability of probs for 1
while ((vec[length(vec)] + vec[length(vec) - 1]) != 0) { # While the sum of the last and the second-last element is not 0
vec <- c(vec, rbinom(1, 1, probs)) # keep adding random shots with a probability of probs
}
return(vec)
}
stargazer2 <- function(model, odd.ratio = F, ...) {
if(!("list" %in% class(model))) model <- list(model)
if (odd.ratio) {
coefOR2 <- lapply(model, function(x) exp(coef(x)))
seOR2 <- lapply(model, function(x) exp(coef(x)) * summary(x)$coef[, 2])
p2 <- lapply(model, function(x) summary(x)$coefficients[, 4])
stargazer(model, coef = coefOR2, se = seOR2, p = p2, ...)
} else {
install.packages("stargazer") # in case you don't have this package
library(stargazer)
m1 <- glm(mtcars$vs ~ mtcars$hp + mtcars$mpg)
stargazer(m1, type = "text") # Our standard log odds
stargazer(m1, apply.coef = exp, type = "text") # The coefficients are correct, but look at the significance levels!