Skip to content

Instantly share code, notes, and snippets.

@peterhurford
Created December 17, 2018 02:37
Show Gist options
  • Save peterhurford/65eb1dda60f707a8505ffe522a7e98cf to your computer and use it in GitHub Desktop.
Save peterhurford/65eb1dda60f707a8505ffe522a7e98cf to your computer and use it in GitHub Desktop.
Model predicting state-level ballot initiative outcomes
# Install and load libraries
if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr)
if (!require("devtools")) { install.packages("devtools") }; library(devtools)
if (!require("readr")) { install.packages("readr") }; library(readr)
if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr)
# Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0
states <- read_csv("~/Downloads/AR US States.csv")
# Clean data
states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes")
states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]]))
states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE)))
states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]]))
states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D")
states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes")
states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes")
# Check correlations
vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?")
lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var))
# Make an initial model
summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states))
# Get out-of-sample data for the model
outputs <- list(c("State", "Probability"))
for (i in seq(nrow(states))) {
train <- states[-i,]
test <- states[i,]
model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train)
outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test))))))
}
outputs <- homogeneous_recombinator(outputs)
arrange(outputs, Probability)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment