peterhurford · December 17, 2018 02:37
diff --git a/ballot_initiatives_model.R b/ballot_initiatives_model.R
 # Install and load libraries
 if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr)
 if (!require("devtools")) { install.packages("devtools") }; library(devtools)
 if (!require("readr")) { install.packages("readr") }; library(readr)
 if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr)


 # Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0
 states <- read_csv("~/Downloads/AR US States.csv")


 # Clean data
 states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes")
 states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]]))
 states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE)))
 states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]]))
 states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D")
 states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes")
 states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes")


 # Check correlations
 vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?")
 lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var))


 # Make an initial model
 summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states))


 # Get out-of-sample data for the model
 outputs <- list(c("State", "Probability"))
 for (i in seq(nrow(states))) {
  train <- states[-i,]
  test <- states[i,]
  model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train)
  outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test))))))
 }
 outputs <- homogeneous_recombinator(outputs)
 arrange(outputs, Probability)
	# Install and load libraries
	if (!require("dplyr")) { install.packages("dplyr") }; library(dplyr)
	if (!require("devtools")) { install.packages("devtools") }; library(devtools)
	if (!require("readr")) { install.packages("readr") }; library(readr)
	if (!require("recombinator")) { install_github("robertzk/recombinator") }; library(readr)


	# Download data from https://docs.google.com/spreadsheets/d/1LzUHVgbyQddvESuW_WhJwNUn52023vYerlsho39em2I/edit#gid=0
	states <- read_csv("~/Downloads/AR US States.csv")


	# Clean data
	states[["Any ban?"]] <- as.numeric(states[["Any ban?"]] == "Yes")
	states[["% White"]] <- as.numeric(gsub("%", "", states[["% White"]]))
	states[["Average Income"]] <- as.numeric(gsub(",", "", gsub("$", "", states[["Average Income"]], fixed=TRUE)))
	states[["Average Education"]] <- as.numeric(gsub("%", "", states[["Average Education"]]))
	states[["DTrifecta"]] <- as.numeric(states[["Trifecta?"]] == "D")
	states[["Ag gag law?"]] <- as.numeric(states[["Ag gag law?"]] == "Yes")
	states[["Right to Farm?"]] <- as.numeric(states[["Right to Farm?"]] == "Yes")


	# Check correlations
	vars <- c("Population", "% White", "Average Income", "Average Education", "Density", "Cook PVI", "DTrifecta", "538 Elasticity", "Restaurants / 1M People", "Overall Meat Demand", "Average Senator Score", "Red Meat Production per person", "Hog Slaughter per person", "Eggs per Person", "Ag gag law?", "Right to Farm?")
	lapply(vars, function(var) list(cor.test(states[["Any ban?"]], states[[var]], use = "complete.obs"), var))


	# Make an initial model
	summary(lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, states))


	# Get out-of-sample data for the model
	outputs <- list(c("State", "Probability"))
	for (i in seq(nrow(states))) {
	train <- states[-i,]
	test <- states[i,]
	model <- lm(`Any ban?` ~ `Population` + `Density` + `Restaurants / 1M People` + `% White` + `Average Income`, train)
	outputs <- append(outputs, list(c(test$State, 1 / (1 + exp(-predict(model, test))))))
	}
	outputs <- homogeneous_recombinator(outputs)
	arrange(outputs, Probability)