Created
December 17, 2018 02:37
-
-
Save peterhurford/65eb1dda60f707a8505ffe522a7e98cf to your computer and use it in GitHub Desktop.
Model predicting state-level ballot initiative outcomes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install and load libraries | |
if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr) | |
if (!require("devtools")) { install.packages("devtools") }; library(devtools) | |
if (!require("readr")) { install.packages("readr") }; library(readr) | |
if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr) | |
# Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0 | |
states <- read_csv("~/Downloads/AR US States.csv") | |
# Clean data | |
states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes") | |
states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]])) | |
states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE))) | |
states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]])) | |
states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D") | |
states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes") | |
states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes") | |
# Check correlations | |
vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?") | |
lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var)) | |
# Make an initial model | |
summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states)) | |
# Get out-of-sample data for the model | |
outputs <- list(c("State", "Probability")) | |
for (i in seq(nrow(states))) { | |
train <- states[-i,] | |
test <- states[i,] | |
model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train) | |
outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test)))))) | |
} | |
outputs <- homogeneous_recombinator(outputs) | |
arrange(outputs, Probability) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment