-
-
Save dsparks/4332698 to your computer and use it in GitHub Desktop.
doInstall <- TRUE | |
toInstall <- c("ggplot2") | |
if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")} | |
lapply(toInstall, library, character.only = TRUE) | |
ANES <- read.csv("http://www.oberlin.edu/faculty/cdesante/assets/downloads/ANES.csv") | |
ANES <- ANES[ANES$year == 2008, -c(1, 11, 17)] # Limit to just 2008 respondents, | |
head(ANES) # remove some non-helpful variables | |
# Fit several models with the same DV: | |
model1 <- lm(pid7 ~ ideo7 + female + age + south, data = ANES) | |
model2 <- lm(pid7 ~ ideo7 + female + age + female:age, data = ANES) | |
model3 <- lm(pid7 ~ ideo7, data = ANES) # These are just arbitrary examples | |
# Put model estimates into temporary data.frames: | |
model1Frame <- data.frame(Variable = rownames(summary(model1)$coef), | |
Coefficient = summary(model1)$coef[, 1], | |
SE = summary(model1)$coef[, 2], | |
modelName = "South Indicator") | |
model2Frame <- data.frame(Variable = rownames(summary(model2)$coef), | |
Coefficient = summary(model2)$coef[, 1], | |
SE = summary(model2)$coef[, 2], | |
modelName = "Age Interaction") | |
model3Frame <- data.frame(Variable = rownames(summary(model3)$coef), | |
Coefficient = summary(model3)$coef[, 1], | |
SE = summary(model3)$coef[, 2], | |
modelName = "Univariate") | |
# Combine these data.frames | |
allModelFrame <- data.frame(rbind(model1Frame, model2Frame, model3Frame)) # etc. | |
# Specify the width of your confidence intervals | |
interval1 <- -qnorm((1-0.9)/2) # 90% multiplier | |
interval2 <- -qnorm((1-0.95)/2) # 95% multiplier | |
# Plot | |
zp1 <- ggplot(allModelFrame, aes(colour = modelName)) | |
zp1 <- zp1 + geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) | |
zp1 <- zp1 + geom_linerange(aes(x = Variable, ymin = Coefficient - SE*interval1, | |
ymax = Coefficient + SE*interval1), | |
lwd = 1, position = position_dodge(width = 1/2)) | |
zp1 <- zp1 + geom_pointrange(aes(x = Variable, y = Coefficient, ymin = Coefficient - SE*interval2, | |
ymax = Coefficient + SE*interval2), | |
lwd = 1/2, position = position_dodge(width = 1/2), | |
shape = 21, fill = "WHITE") | |
zp1 <- zp1 + coord_flip() + theme_bw() | |
zp1 <- zp1 + ggtitle("Comparing several models") | |
print(zp1) # The trick to these is position_dodge(). |
@stapial
You can use ggplot2's facet functions to split the models into columns:
zp1<- zp1 + facet_grid(.~modelName)
perfect! Thank you so much @ksorby!
This is exactly what I needed. Thank you!
Thank you so much for this walk-through! Now, just one question: How can I select only certain variables to depict in the figure? Or at least drop the intercept?
Thank you so much for this walk-through! Now, just one question: How can I select only certain variables to depict in the figure? Or at least drop the intercept?
I found a solution:
allModelFrame <- subset(allModelFrame, allModelFrame$Variable == "variable1" |
allModelFrame$Variable == "variable2")
Also, if you want heteroscedastic standard errors, define them for example as so: coff_model1 <- coeftest(model1, vcov = vcovHC)
and plug it into the code:
model1.Frame <- data.frame(Variable = rownames(summary(model1)$coef),
Coefficient = summary(model1)$coef[, 1],
SE = coff_model1[, 2],
modelName = "model1")
Hope this helps.
This is awesome, thanks!
Quick question, is there anyway to do the same but have each model in a different column and not stacked? I have 6 binary regression models (they don't have the same dependent variable). It looks messy when I lump them all together. I would prefer to have a column for each model with its respective coefficients in the same plot. Any ideas on how to do this???
Thanks!