Skip to content

Instantly share code, notes, and snippets.

@jonrobinson2
Created September 23, 2024 17:22
Show Gist options
  • Save jonrobinson2/2ee9dffc525bd5cb7c78d8ac4de53eae to your computer and use it in GitHub Desktop.
Save jonrobinson2/2ee9dffc525bd5cb7c78d8ac4de53eae to your computer and use it in GitHub Desktop.
Simple Analysis of Alsobrooks vs Hogan Mode Effects
options(gsubfn.engine = "R", scipen=999, digits=3) # This stops the loading of XQuartz on Mac
require(sqldf)
require(ggplot2)
require(scales)
require(reshape2)
require(lubridate)
require(ggrepel)
jr=theme(panel.background = element_rect(fill = NA, colour = "black", size = 0.25),
panel.border = element_blank(),
panel.grid.major = element_line(colour = "black", size = 0.05),
panel.grid.minor = element_line(colour = "black", size = 0.05),
plot.title=element_text(size=22, family="Helvetica Neue Thin"),
plot.subtitle=element_text(size=12, family="Helvetica Neue Thin"),
axis.title.x=element_text(size=18, family="Helvetica Neue Thin"),
axis.text.x=element_text(colour="black", size=18, family="Helvetica Neue Thin"),
axis.title.y=element_text(size=18, family="Helvetica Neue Thin"),
axis.text.y=element_text(colour="black",size=18, family="Helvetica Neue Thin"),
strip.text.x = element_text(size = 22,family="Helvetica Neue Thin"),
strip.text.y = element_text(size = 18,family="Helvetica Neue Thin"),
legend.title = element_text(size=18, family="Helvetica Neue Thin",face="bold"),
legend.text = element_text(size=18, family="Helvetica Neue Thin"),
strip.background = element_rect(colour = "grey", fill = "white"))
md = read.csv(url('https://projects.fivethirtyeight.com/polls-page/data/senate_polls.csv'))
md = md[md$state=='Maryland',]
meta = sqldf("select poll_id, question_id,
avg(case when answer='Trone' then cast(1 as double) else 0 end) as trone
from md
group by 1,2
having avg(case when answer='Trone' then cast(1 as double) else 0 end) = 0
order by 1,2")
md = md[paste0(md$poll_id,md$question_id) %in% paste0(meta$poll_id,meta$question_id),]
md$end_date = mdy(md$end_date)
md$month = month(md$end_date)
md = md[md$end_date>=lubridate::ymd("20240301"),]
md$population_full[md$population_full=='v']='lv'
sqldf("select pollster, end_date, sum(case when answer='Alsobrooks' then pct else 0 end)/sum(case when answer in ('Alsobrooks','Hogan') then pct else 0 end) as two_way
from md
group by 1,2
order by end_date,Pollster")
md_agg = sqldf("select case when methodology like '%Live Phone%' then 'Live Phone' else 'All Other Methods' end as methodology, end_date, pollster,
sample_size, population_full, month,
sum(case when answer='Alsobrooks' then pct else 0 end)/sum(case when answer in ('Alsobrooks','Hogan') then pct else 0 end) as two_way
from md
group by 1,2,3,4,5,6
order by end_date,methodology")
reg = lm(data = md_agg, two_way~methodology+population_full+sample_size+month)
coefs = data.frame(coef = reg$coefficients
,se = sqrt(diag(vcov(reg))),
var = names(reg$coefficients))
ggplot(coefs, aes(toupper(var),coef)) +
geom_point(size=5, alpha=.35) +
ggrepel::geom_text_repel(aes(label=percent(coef,1)), family='Helvetica Neue Thin', size=8) +
coord_flip() +
scale_y_continuous('Regression Coefficient',label=percent_format(1)) +
geom_hline(yintercept=0, linetype=2) +
jr + xlab('') +
geom_errorbar(aes(ymin = coef - se, ymax = coef + se), width = 0, size=1) +
ggtitle("Alsobrooks Does Better in Polls that Don't Use Live Callers","Polls Since March")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment