Last active
April 9, 2018 11:55
-
-
Save elliottmorris/905bd514110ea7d0b599cfca8a8e7332 to your computer and use it in GitHub Desktop.
Trump approval house effects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################## | |
# Code for house-correcting Trump approval polls | |
# Written by Alexander Agadjanian and G. Elliott Morris | |
# for use on Elliott's blog, TheCrosstab.com. | |
# @a_agadjanian @GElliottMorris | |
# | |
# Github Gist lives at https://gist.github.com/elliottmorris/905bd514110ea7d0b599cfca8a8e7332 | |
#################################### | |
# libraries #### | |
rm(list = ls()) | |
#setwd("/Users/Victor/Desktop/Blog-data/excel files/post election/") | |
library(matrixStats) | |
library(ggrepel) | |
library(maps) | |
library(ggthemes) | |
library(stringr) | |
library(gridExtra) | |
library(survey) | |
library(haven) | |
library(dplyr) | |
library(tidyr) | |
library(ggplot2) | |
library(lubridate) | |
library(broom) | |
library(plotly) | |
options(stringsAsFactors = FALSE) | |
library(stargazer) | |
library(utils) | |
library(ggthemes) | |
library(ggrepel) | |
library(png) | |
library(grid) | |
library(gridExtra) | |
library(knitr) | |
library(reshape) | |
source('~/Desktop/theme_elliott.R') | |
source('~/Desktop/theme_elliott.R') | |
exponent_weight <- function(i) { | |
exp(-0.2*i) | |
} | |
weight_data <- data.frame("Days" = c(1:14),"Weights" = exponent_weight(c(1:14))) | |
weight_data | |
# data #### | |
download.file("http://elections.huffingtonpost.com/pollster/api/v2/questions/00c%20-Pres-45-Trump%20-%20Job%20Approval%20-%20National/poll-responses-clean.tsv", | |
"approvalrating.tsv") | |
df <- read.table("approvalrating.tsv", sep = "\t", header = TRUE) | |
df1 <- df %>% | |
filter(sample_subpopulation == "Likely Voters" | | |
sample_subpopulation == "Adults" | | |
sample_subpopulation == "Registered Voters") | |
write.csv(df1,"cleaned_polls.csv",row.names = FALSE) | |
polls <- df1 %>% | |
mutate(net.approval = Approve - Disapprove, | |
population = ifelse(sample_subpopulation == "Adults", "1. Adults", | |
ifelse(sample_subpopulation == "Registered Voters", "2. Registered Voters", | |
ifelse(sample_subpopulation == "Likely Voters", "3. Likely Voters", NA))), | |
mode = ifelse(mode == "Live Phone", "1. Live Phone", | |
ifelse(mode == "IVR/Online", "2. IVR/Online", | |
ifelse(mode == "Internet", "3. Internet", NA))), | |
field.time = difftime(end_date, start_date), | |
sample = observations, | |
since.inaug = difftime(end_date, "2017-01-20"), | |
undecided.dk = 100 - (Approve + Disapprove)) | |
for (i in unique(polls$survey_house)){ | |
polls[[i]] <- ifelse(polls$survey_house == i, paste("2. ",i),"1. Other Polls") | |
} | |
#################### Approval & Disapproval ################################ | |
# + field.time + undecided.dk | |
approval.lm <- lm(Approve ~ population + mode + since.inaug + survey_house, data = polls) | |
summary(approval.lm) | |
df.approval.lm <- tidy(approval.lm) | |
# loop through to pull coefficients ********************** | |
# creating vector that contains index term | |
temp <- polls[21:length(polls)] | |
pollsters <- colnames(temp) | |
# only want pollsters with 3+ polls | |
for (i in pollsters){ | |
if(nrow(polls[polls$survey_house == i,]) < 2){ | |
pollsters <- pollsters[pollsters != i] | |
} | |
} | |
# creating empty data frame to which house effects will be attached | |
house <- data.frame(term = character(), | |
estimate = numeric()) | |
# looping through regressions for each individual pollster to get house effect for each | |
for (i in pollsters) { | |
approval.lm <- lm(polls$Approve ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]]) | |
summary(approval.lm) | |
df.approval.lm <- tidy(approval.lm) %>% | |
select(term, estimate) | |
df.approval.lm <- df.approval.lm[nrow(df.approval.lm),] | |
if (df.approval.lm[1,]$term == "polls$undecided.dk"){next} | |
print(paste(i,df.approval.lm)) | |
house <- rbind(house, df.approval.lm) | |
} | |
# fixing data frame that contains house effects | |
house$term <- str_replace_all(house$term, "polls\\[\\[i\\]\\]2.", "") | |
house <- house %>% | |
select("Pollster" = term, "Effect" = estimate) | |
# order | |
house <- house[order(house$Effect),] | |
house$Pollster <- factor(house$Pollster, levels = house$Pollster[order(house$Effect)]) | |
house$Pollster # notice the changed order of factor levels | |
house$Bias <- ifelse(house$Effect <=0,"Democratic Bias","Republican Bias") | |
# + field.time + undecided.dk | |
disapproval.lm <- lm(Disapprove ~ population + mode + since.inaug + survey_house, data = polls) | |
summary(disapproval.lm) | |
disdf.approval.lm <- tidy(approval.lm) | |
# loop through to pull coefficients ********************** | |
# creating vector that contains index term | |
temp <- polls[21:length(polls)] | |
pollsters <- colnames(temp) | |
# creating empty data frame to which house effects will be attached | |
house_2 <- data.frame(term = character(), | |
estimate = numeric()) | |
# looping through regressions for each individual pollster to get house effect for each | |
for (i in pollsters) { | |
disapproval.lm <- lm(polls$Disapprove ~ polls$population + polls$mode + polls$since.inaug + polls$field.time + polls$undecided.dk + polls[[i]]) | |
summary(disapproval.lm) | |
df.disapproval.lm <- tidy(disapproval.lm) %>% | |
select(term, estimate) | |
df.disapproval.lm <- df.disapproval.lm[nrow(df.disapproval.lm),] | |
if (df.disapproval.lm[1,]$term == "polls$undecided.dk"){next} | |
print(paste(i,df.approval.lm)) | |
house_2 <- rbind(house_2, df.disapproval.lm) | |
} | |
# fixing data frame that contains house effects | |
house_2$term <- str_replace_all(house_2$term, "polls\\[\\[i\\]\\]2.", "") | |
house_2 <- house_2 %>% | |
select("Pollster" = term, "Effect" = estimate) | |
# order | |
house_2 <- house_2[order(house_2$Effect),] | |
house_2$Pollster <- factor(house_2$Pollster, levels = house_2$Pollster[order(house_2$Effect)]) | |
house_2$Pollster # notice the changed order of factor levels | |
house_2$Bias <- ifelse(house_2$Effect <=0,"Democratic Bias","Republican Bias") | |
names(house_2) <- c("Pollster","D_Effect","Bias") | |
house <- left_join(house, | |
house_2 %>% select(-Bias), | |
by="Pollster") | |
# creating graph for pollster effects #### | |
pollster_effect<- ggplot() + | |
geom_bar(data = house, aes(x = reorder(Pollster,Effect), y = Effect, fill = Effect),col = "#616161", stat = "identity") + | |
theme_elliott() + | |
labs(title = "House Effects for Trump Job Approval Polls", | |
subtitle = "After taking mode, population, and date of poll into account.\nOnly computed for polling firms that have released more than 2 polls of Trump's Job Approval", | |
x = "Pollster", | |
y = "House Effect") + | |
scale_fill_gradient2(low = "blue",mid="white",high="red") + | |
scale_y_continuous(breaks=c(-10,-5,0,5,10),labels=c("-10%","-5%","0","5%","10%")) + | |
coord_flip() | |
grid.arrange(pollster_effect,my_g,heights=c(9, .5)) | |
png("Trump_approval.png",unit="in",height=6,width=8,res=300) | |
grid.arrange(pollster_effect,my_g,heights=c(9, .5)) | |
dev.off() | |
kable(house,align="l",format="html",digits=2,row.names = FALSE) | |
## stargazing | |
approval.lm1 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk, data = polls) | |
approval.lm2 <- lm(net.approval ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls) | |
approval.lm3 <- lm(net.approval ~ population + mode + since.inaug + survey_house, data = polls) | |
# approve | |
approval.lm4 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk, data = polls) | |
approval.lm5 <- lm(Approve ~ population + mode + since.inaug + field.time + undecided.dk + survey_house, data = polls) | |
approval.lm6 <- lm(Approve ~ population + mode + since.inaug + survey_house, data = polls) | |
text<- stargazer(approval.lm1,approval.lm2,approval.lm3,approval.lm4,approval.lm5,approval.lm6, type = "html") | |
#box plots | |
polling <- df %>% | |
filter(sample_subpopulation == "Likely Voters" | | |
sample_subpopulation == "Adults" | | |
sample_subpopulation == "Registered Voters") | |
mode_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(mode,Approve), y= Approve, col = mode)) + | |
geom_boxplot(width=.4) + | |
theme_elliott() + | |
coord_flip()+ | |
coord_flip() + | |
labs(title = "Mode Effects for Trump Job Approval Polls", | |
subtitle = "A simple comparison of averages over the past 3 weeks", | |
y = "Approval Rating (%)", | |
x = "Pollster") | |
grid.arrange(mode_effect,my_g,heights=c(9, .5)) | |
png("Trump_mode.png",unit="in",height=6,width=8,res=300) | |
grid.arrange(mode_effect,my_g,heights=c(9, .5)) | |
dev.off() | |
pop_effect <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], aes(x = reorder(sample_subpopulation,Approve), y= Approve, color = sample_subpopulation)) + | |
geom_boxplot(width=.4) + | |
theme_elliott() + | |
coord_flip() + | |
labs(title = "Population Effects for Trump Job Approval Polls", | |
subtitle = "A simple comparison of averages over the past 3 weeks", | |
y = "Approval Rating (%)", | |
x = "Pollster") | |
grid.arrange(pop_effect,my_g,heights=c(9, .5)) | |
png("Trump_population.png",unit="in",height=6,width=8,res=300) | |
grid.arrange(pop_effect,my_g,heights=c(9, .5)) | |
dev.off() | |
avg_compare <- ggplot(polling[as.Date(polling$end_date) > as.Date(max(as.Date(polling$end_date))) - 21,], | |
aes(x = reorder(survey_house,Approve), y= Approve,color=survey_house)) + | |
geom_boxplot() + | |
theme_elliott() + | |
coord_flip() + | |
labs(title = "House Effects for Trump Job Approval Polls", | |
subtitle = "A simple comparison of averages over the past 3 weeks", | |
y = "Approval Rating (%)", | |
x = "Pollster") | |
grid.arrange(avg_compare,my_g,heights=c(9, .5)) | |
png("Trump_house_avg.png",unit="in",height=6,width=8,res=300) | |
grid.arrange(avg_compare,my_g,heights=c(9, .5)) | |
dev.off() | |
########## why not try a correction? ################### | |
df2 <- df1 %>% mutate(Adjusted = NA,Adjusted_Dis = NA) | |
house$Pollster_split <- substr(as.character(house$Pollster),3,10000) | |
for (i in unique(df2$survey_house)){ | |
if(nrow(house[house$Pollster_split == i,])>0){ | |
effect <- as.numeric(house[house$Pollster_split == i,]$Effect)}else{effect <- 0} | |
df2[df2$survey_house == i,]$Adjusted = df2[df2$survey_house == i,]$Approve - effect | |
if(nrow(house[house$Pollster_split == i,])>0){ | |
dis_effect <- as.numeric(house[house$Pollster_split == i,]$D_Effect)}else{dis_effect <- 0} | |
df2[df2$survey_house == i,]$Adjusted_Dis = df2[df2$survey_house == i,]$Disapprove - dis_effect | |
} | |
df2$Net <- df2$Adjusted - df2$Adjusted_Dis | |
df2<-df2[order(df2$end_date),] | |
# function for WMA | |
ma <- function(x,n=7){stats::filter(x,rep(1/n,n), sides=1)} | |
df3 <- as.data.frame(df2 %>% | |
group_by(end_date) %>% | |
summarise(Adjusted = mean(Adjusted), | |
Adjusted_Dis = mean(Adjusted_Dis), | |
Net = mean(Net))) | |
# for i in date | |
# all polls in last 2 weeks | |
# get days since date | |
# get weights | |
# make weighted average | |
# save average to that row | |
df3$M_Adjusted <- NA | |
df3$M_Adjusted_Dis <- NA | |
for (i in unique(df3$end_date)){ | |
temp <- df3 %>% | |
filter((as.Date(end_date) > as.Date(i) - 15) & as.Date(end_date)<= as.Date(i)) %>% | |
mutate(Days = as.numeric(difftime(as.Date(Sys.Date()),ymd(end_date))), | |
weight = exponent_weight(Days)) | |
df3[df3$end_date == i,]$M_Adjusted = weighted.mean(temp$Adjusted,temp$weight) | |
df3[df3$end_date == i,]$M_Adjusted_Dis = weighted.mean(temp$Adjusted_Dis,temp$weight) | |
} | |
df3$M_Net = df3$M_Adjusted - df3$M_Adjusted_Dis | |
df3 <- df3[complete.cases(df3),] | |
names(df3) <- as.character(names(df3)) | |
df3.m <- melt(df3,id.vars = c("end_date","Adjusted","Adjusted_Dis","Net","M_Net")) | |
df3.m$variable <- gsub("M_Adjusted","Approve",df3.m$variable) | |
df3.m$variable <- gsub("Approve_Dis","Disapprove",df3.m$variable) | |
# need the points for polls | |
df2.m <- melt(df2, id.vars = c(names(df2)[!(names(df2) %in% c("Adjusted","Adjusted_Dis"))])) | |
df2.m$variable <- gsub("Adjusted","Approve",df2.m$variable) | |
df2.m$variable <- gsub("Approve_Dis","Disapprove",df2.m$variable) | |
#avg today | |
disapprove <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted_Dis | |
approve <- df3[as.Date(df3$end_date) == max(as.Date(df3$end_date)),]$M_Adjusted | |
trump_polls_ggplot <- ggplot(df3.m, | |
aes(x=as.Date(end_date),y=value,col = variable))+ | |
geom_point(data=df2.m, | |
aes(x=as.Date(end_date),y=value,col=variable,fill=survey_house, | |
label=value,label2=survey_house), | |
shape=1,alpha=.8)+ | |
geom_line(size=1) + | |
theme_elliott() + | |
labs(x="Date", | |
y="", | |
title = "Trump Job Approval Polls", | |
subtitle="Polls corrected for house effects and averaged with\nhigher weight given to most recent polls") + | |
geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7, | |
y=disapprove, | |
label=paste0(round(disapprove,1),"% Disapprove"),col="Disapprove")) + | |
geom_text(aes(x = as.Date(max(as.Date(df3$end_date)))+7, | |
y=approve, | |
label=paste0(round(approve,1),"% Approve"),col="Approve")) + | |
scale_color_manual("",values = c("Approve" = "blue","Disapprove"="red")) + | |
coord_cartesian(xlim = c(as.Date("2017-01-20"),max(as.Date(df3$end_date))+15), | |
ylim = c(35,65)) | |
#geom_smooth(data=df2,aes(x=as.Date(end_date),y=Net),col="black",se=FALSE,span=.3) | |
grid.arrange(trump_polls_ggplot,my_g,heights=c(9, .5)) | |
png("MasterPlot.png",unit="in",height=15,width=15,res=400) | |
grid.arrange(trump_polls_ggplot, | |
grid.arrange( | |
pollster_effect, | |
avg_compare, | |
mode_effect, | |
pop_effect, | |
ncol = 2), | |
my_g, | |
heights=c(5,9, .5) | |
) | |
dev.off() | |
house$Pollster = substr(as.character(house$Pollster),3,100) | |
left_join(df2 %>% | |
select("Pollster" = survey_house, | |
"Date" = end_date, | |
Approve, | |
Disapprove, | |
"Approve (Adj.)" = Adjusted, | |
"Disapprove (Adj.)"= Adjusted_Dis) %>% | |
arrange(desc(Date)), | |
house %>% select(Pollster,"House Effect" = Effect),by="Pollster") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment