andybega · August 16, 2013 18:01
diff --git a/count-model-bug.R b/count-model-bug.R
 #
 #
 #     Original code from Mike:
 #
 #

 # mdw attempts at modeling Protests & Demonstrations
 # August 13, 2013

 library(wicews)
 data(icews)

 data(icews)
 # DV transformation
 icews$mdwPD<- icews$protest.tALL
 icews$mdwPD[icews$mdwPD>30]<-30
 
 data(cutoffs)

 data(predict.data)
 # DV transformation
 predict.data$mdwPD<- predict.data$protest.tALL
 predict.data$mdwPD[predict.data$mdwPD>30]<-30

 #create training and test set
 training <- icews[icews$date <= cutoffs$trainingend,]

 test <- icews[icews$date >= cutoffs$teststart,]

 #range(training$date); range(test$date)

 # Function for mean squared prediction error
 MSE <- function(model, data){
    y <- names(model$model)[1]
    pred <- predict(model, newdata=data)
    diff <- (data[, y] - pred)^2
    output <- sqrt(sum(diff)/length(diff))
    print(output)
 }

 ##########
 # Protest & Demonstrations count model
 # Dependent Variable is protest.tALL
 # too underdispersed for normal processing
 # use exp(3)  to truncate the data to number of events
 # up to twenty, then set to 20 or more.
 
 model.pd <- zeroinfl(
       mdwPD ~ eth.rel.h.count.l1 # the nonzeros
    | # conditional on the zeros
       excl_groups_count.l1 + exclpop.l1 + PARCOMP.l1, 
       dist="negbin", link="logit", data=training)

 # Cannot include (1|ccode) due to identification.
 summary(model.pd)


 MSE(model.pd, training)
 MSE(model.pd, test)
 

 ##########
 # Plot training and test data fit
 #
 ##########
 
 par(mfrow=c(1,2))
 plot.countfit(model.pd, training, title="In-Sample Protest Model")
 plot.countfit(model.pd, test, title="Out-of-Sample Protest Model")

 #
 #
 #     Is is a problem with plotting?
 #
 #

 cutpoint <- quantile(training[, "mdwPD"][training[, "mdwPD"] > 0])[4]
 true <- ifelse(training[, "mdwPD"] < 1, 0, NA)
 true <- ifelse(training[, "mdwPD"] >= 1 & training[, "mdwPD"] < 7, 1, true)
 true <- ifelse(training[, "mdwPD"] >= 7, 2, true)
 table(true)

 # so...there should be 15383 zeroes for training data
 # manually sum the first column in the training countfit plot:
 10+12894+2479

 # Ok, matches. So it looks like the axis labels are switched. Or the table is flipped.
	#
	#
	# Original code from Mike:
	#
	#

	# mdw attempts at modeling Protests & Demonstrations
	# August 13, 2013

	library(wicews)
	data(icews)

	data(icews)
	# DV transformation
	icews$mdwPD<- icews$protest.tALL
	icews$mdwPD[icews$mdwPD>30]<-30

	data(cutoffs)

	data(predict.data)
	# DV transformation
	predict.data$mdwPD<- predict.data$protest.tALL
	predict.data$mdwPD[predict.data$mdwPD>30]<-30

	#create training and test set
	training <- icews[icews$date <= cutoffs$trainingend,]

	test <- icews[icews$date >= cutoffs$teststart,]

	#range(training$date); range(test$date)

	# Function for mean squared prediction error
	MSE <- function(model, data){
	y <- names(model$model)[1]
	pred <- predict(model, newdata=data)
	diff <- (data[, y] - pred)^2
	output <- sqrt(sum(diff)/length(diff))
	print(output)
	}

	##########
	# Protest & Demonstrations count model
	# Dependent Variable is protest.tALL
	# too underdispersed for normal processing
	# use exp(3) to truncate the data to number of events
	# up to twenty, then set to 20 or more.

	model.pd <- zeroinfl(
	mdwPD ~ eth.rel.h.count.l1 # the nonzeros
	\| # conditional on the zeros
	excl_groups_count.l1 + exclpop.l1 + PARCOMP.l1,
	dist="negbin", link="logit", data=training)

	# Cannot include (1\|ccode) due to identification.
	summary(model.pd)


	MSE(model.pd, training)
	MSE(model.pd, test)


	##########
	# Plot training and test data fit
	#
	##########

	par(mfrow=c(1,2))
	plot.countfit(model.pd, training, title="In-Sample Protest Model")
	plot.countfit(model.pd, test, title="Out-of-Sample Protest Model")

	#
	#
	# Is is a problem with plotting?
	#
	#

	cutpoint <- quantile(training[, "mdwPD"][training[, "mdwPD"] > 0])[4]
	true <- ifelse(training[, "mdwPD"] < 1, 0, NA)
	true <- ifelse(training[, "mdwPD"] >= 1 & training[, "mdwPD"] < 7, 1, true)
	true <- ifelse(training[, "mdwPD"] >= 7, 2, true)
	table(true)

	# so...there should be 15383 zeroes for training data
	# manually sum the first column in the training countfit plot:
	10+12894+2479

	# Ok, matches. So it looks like the axis labels are switched. Or the table is flipped.