Created
September 23, 2024 17:22
-
-
Save jonrobinson2/2ee9dffc525bd5cb7c78d8ac4de53eae to your computer and use it in GitHub Desktop.
Simple Analysis of Alsobrooks vs Hogan Mode Effects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options(gsubfn.engine = "R", scipen=999, digits=3) # This stops the loading of XQuartz on Mac | |
require(sqldf) | |
require(ggplot2) | |
require(scales) | |
require(reshape2) | |
require(lubridate) | |
require(ggrepel) | |
jr=theme(panel.background = element_rect(fill = NA, colour = "black", size = 0.25), | |
panel.border = element_blank(), | |
panel.grid.major = element_line(colour = "black", size = 0.05), | |
panel.grid.minor = element_line(colour = "black", size = 0.05), | |
plot.title=element_text(size=22, family="Helvetica Neue Thin"), | |
plot.subtitle=element_text(size=12, family="Helvetica Neue Thin"), | |
axis.title.x=element_text(size=18, family="Helvetica Neue Thin"), | |
axis.text.x=element_text(colour="black", size=18, family="Helvetica Neue Thin"), | |
axis.title.y=element_text(size=18, family="Helvetica Neue Thin"), | |
axis.text.y=element_text(colour="black",size=18, family="Helvetica Neue Thin"), | |
strip.text.x = element_text(size = 22,family="Helvetica Neue Thin"), | |
strip.text.y = element_text(size = 18,family="Helvetica Neue Thin"), | |
legend.title = element_text(size=18, family="Helvetica Neue Thin",face="bold"), | |
legend.text = element_text(size=18, family="Helvetica Neue Thin"), | |
strip.background = element_rect(colour = "grey", fill = "white")) | |
md = read.csv(url('https://projects.fivethirtyeight.com/polls-page/data/senate_polls.csv')) | |
md = md[md$state=='Maryland',] | |
meta = sqldf("select poll_id, question_id, | |
avg(case when answer='Trone' then cast(1 as double) else 0 end) as trone | |
from md | |
group by 1,2 | |
having avg(case when answer='Trone' then cast(1 as double) else 0 end) = 0 | |
order by 1,2") | |
md = md[paste0(md$poll_id,md$question_id) %in% paste0(meta$poll_id,meta$question_id),] | |
md$end_date = mdy(md$end_date) | |
md$month = month(md$end_date) | |
md = md[md$end_date>=lubridate::ymd("20240301"),] | |
md$population_full[md$population_full=='v']='lv' | |
sqldf("select pollster, end_date, sum(case when answer='Alsobrooks' then pct else 0 end)/sum(case when answer in ('Alsobrooks','Hogan') then pct else 0 end) as two_way | |
from md | |
group by 1,2 | |
order by end_date,Pollster") | |
md_agg = sqldf("select case when methodology like '%Live Phone%' then 'Live Phone' else 'All Other Methods' end as methodology, end_date, pollster, | |
sample_size, population_full, month, | |
sum(case when answer='Alsobrooks' then pct else 0 end)/sum(case when answer in ('Alsobrooks','Hogan') then pct else 0 end) as two_way | |
from md | |
group by 1,2,3,4,5,6 | |
order by end_date,methodology") | |
reg = lm(data = md_agg, two_way~methodology+population_full+sample_size+month) | |
coefs = data.frame(coef = reg$coefficients | |
,se = sqrt(diag(vcov(reg))), | |
var = names(reg$coefficients)) | |
ggplot(coefs, aes(toupper(var),coef)) + | |
geom_point(size=5, alpha=.35) + | |
ggrepel::geom_text_repel(aes(label=percent(coef,1)), family='Helvetica Neue Thin', size=8) + | |
coord_flip() + | |
scale_y_continuous('Regression Coefficient',label=percent_format(1)) + | |
geom_hline(yintercept=0, linetype=2) + | |
jr + xlab('') + | |
geom_errorbar(aes(ymin = coef - se, ymax = coef + se), width = 0, size=1) + | |
ggtitle("Alsobrooks Does Better in Polls that Don't Use Live Callers","Polls Since March") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment