Skip to content

Instantly share code, notes, and snippets.

@jonrobinson2
Created November 8, 2024 16:51
Show Gist options
  • Save jonrobinson2/5c6040d243093ea6fd887762379b8c4a to your computer and use it in GitHub Desktop.
Save jonrobinson2/5c6040d243093ea6fd887762379b8c4a to your computer and use it in GitHub Desktop.
apv_fna_json_scrape.R
require(jsonlite)
require(plyr)
require(sqldf)
require(reshape2)
require(openxlsx)
download.file(url = 'https://feeds-elections.foxnews.com/voter_analysis/2024_Generals/US/US-P-00.json',destfile = 'Downloads/apv_24.json')
download.file(url = 'https://feeds-elections.foxnews.com/voter_analysis/2020_Generals/US/US-P-00.json',destfile = 'Downloads/apv_20.json')
apv_24 = fromJSON('Downloads/apv_24.json')[[2]]
apv_20 = fromJSON('Downloads/apv_20.json')[[2]]
results_24 = rbind.fill(lapply(which(apv_24$qcode %in% apv_20$qcode), function(x){
message(x)
hm = data.frame(answer = apv_24$h_answers[[x]]$answer,
orders = apv_24$h_answers[[x]]$order,
share = apv_24$h_answers[[x]]$variable_percent/100)
if(nrow(hm)!=0){
hm$question = apv_24$question[x]
hm$qcode = apv_24$qcode[x]
for(y in 1:nrow(hm))
apv_24$h_answers[[x]]$results[[y]]$answer=hm$answer[y]
hm2 = rbind.fill(apv_24$h_answers[[x]]$results)
hm2 = dcast(hm2,answer ~ party, value.var='result_percent')
hm2$twoway = hm2$Democrat/(hm2$Democrat+hm2$Republican)
return(merge(hm,hm2[,c('answer','twoway')]))
}
}))
results_20 = rbind.fill(lapply(which(apv_20$qcode %in% apv_24$qcode), function(x){
message(x)
hm = data.frame(answer = apv_20$h_answers[[x]]$answer,
orders = apv_20$h_answers[[x]]$order,
share = apv_20$h_answers[[x]]$variable_percent/100)
if(nrow(hm)!=0){
hm$question = apv_20$question[x]
hm$qcode = apv_20$qcode[x]
for(y in 1:nrow(hm))
apv_20$h_answers[[x]]$results[[y]]$answer=hm$answer[y]
hm2 = rbind.fill(apv_20$h_answers[[x]]$results)
hm2 = dcast(hm2,answer ~ party, value.var='result_percent')
hm2$twoway = hm2$Democrat/(hm2$Democrat+hm2$Republican)
return(merge(hm,hm2[,c('answer','twoway')]))
}
}))
results = merge(results_24, results_20,by=c('answer','question','qcode','orders'), suffixes = c('_24','_20'))
results = sqldf("select * from results order by qcode, question, orders")
results$timestamp = lubridate::now()
results$share_24_minus_20 = results$share_24 - results$share_20
results$twoway_24_minus_20 = results$twoway_24 - results$twoway_20
results = results[,c('timestamp','qcode','question','answer','share_24','share_20','share_24_minus_20','twoway_24','twoway_20','twoway_24_minus_20')]
write.xlsx(x = results, file = 'Downloads/Miscellaneous/apv_comp.xlsx')
system('rm Downloads/apv*')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment