Created
August 16, 2013 18:01
-
-
Save andybega/6252067 to your computer and use it in GitHub Desktop.
Protest models from Mike, produces count plots that look really weird.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# | |
# Original code from Mike: | |
# | |
# | |
# mdw attempts at modeling Protests & Demonstrations | |
# August 13, 2013 | |
library(wicews) | |
data(icews) | |
data(icews) | |
# DV transformation | |
icews$mdwPD<- icews$protest.tALL | |
icews$mdwPD[icews$mdwPD>30]<-30 | |
data(cutoffs) | |
data(predict.data) | |
# DV transformation | |
predict.data$mdwPD<- predict.data$protest.tALL | |
predict.data$mdwPD[predict.data$mdwPD>30]<-30 | |
#create training and test set | |
training <- icews[icews$date <= cutoffs$trainingend,] | |
test <- icews[icews$date >= cutoffs$teststart,] | |
#range(training$date); range(test$date) | |
# Function for mean squared prediction error | |
MSE <- function(model, data){ | |
y <- names(model$model)[1] | |
pred <- predict(model, newdata=data) | |
diff <- (data[, y] - pred)^2 | |
output <- sqrt(sum(diff)/length(diff)) | |
print(output) | |
} | |
########## | |
# Protest & Demonstrations count model | |
# Dependent Variable is protest.tALL | |
# too underdispersed for normal processing | |
# use exp(3) to truncate the data to number of events | |
# up to twenty, then set to 20 or more. | |
model.pd <- zeroinfl( | |
mdwPD ~ eth.rel.h.count.l1 # the nonzeros | |
| # conditional on the zeros | |
excl_groups_count.l1 + exclpop.l1 + PARCOMP.l1, | |
dist="negbin", link="logit", data=training) | |
# Cannot include (1|ccode) due to identification. | |
summary(model.pd) | |
MSE(model.pd, training) | |
MSE(model.pd, test) | |
########## | |
# Plot training and test data fit | |
# | |
########## | |
par(mfrow=c(1,2)) | |
plot.countfit(model.pd, training, title="In-Sample Protest Model") | |
plot.countfit(model.pd, test, title="Out-of-Sample Protest Model") | |
# | |
# | |
# Is is a problem with plotting? | |
# | |
# | |
cutpoint <- quantile(training[, "mdwPD"][training[, "mdwPD"] > 0])[4] | |
true <- ifelse(training[, "mdwPD"] < 1, 0, NA) | |
true <- ifelse(training[, "mdwPD"] >= 1 & training[, "mdwPD"] < 7, 1, true) | |
true <- ifelse(training[, "mdwPD"] >= 7, 2, true) | |
table(true) | |
# so...there should be 15383 zeroes for training data | |
# manually sum the first column in the training countfit plot: | |
10+12894+2479 | |
# Ok, matches. So it looks like the axis labels are switched. Or the table is flipped. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment