Created
September 4, 2012 21:24
-
-
Save jayyonamine/3626704 to your computer and use it in GitHub Desktop.
out-of-sample Bootstrapped logit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################## | |
##r code for greed and grievance replication/extension file | |
##James E. Yonamine (worked off of VJD's original code) | |
##2/14/2012 | |
##data from http://www.apsanet.org/content_29436.cfm | |
########################## | |
#if you don't have the packages below, un-comment them out and install. | |
#install.packages("ROCR") | |
#install.packages("fields") | |
#install.packages("car") | |
#install.packages("lattice") | |
#install.packages("vioplot") | |
library(foreign) | |
library(ROCR) | |
library(fields) | |
library(car) | |
library(lattice) | |
library(vioplot) | |
library(Zelig) | |
library(Amelia) | |
library(stats) | |
rm(list=ls()) | |
data <- read.dta("collier and hoeffler 2004.dta") | |
data<-data[complete.cases(data$warsa),] #this drops all rows with NA for warsa, i.e. drops all continuations. C&H use this approach, which is reasonable | |
a.out <- amelia(data, m = 1, ts = "year", cs = "country") | |
data<-a.out$imputations[[1]] | |
##just to make sure coefficients are basically the same after imputation. they are. | |
z.out.1 <-zelig(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop , model = "logit", data=data ) | |
summary(z.out.1) | |
##bootstrapping out-of-sample## | |
## Bootstrap the split of these datasets for forecasting | |
bs.num <- 1000 | |
history <- matrix(data=0, nrow=bs.num, ncol=24) | |
colnames(history) <- c("griev_1.sens", "griev_1.spec", "griev_1.auc","griev_2.sens", "griev_2.spec", "griev_2.auc","griev_3.sens", "griev_3.spec", "griev_3.auc", "opp_1.sens", "opp_1.spec", "opp_1.auc", "opp_2.sens", "opp_2.spec", "opp_2.auc","opp_3.sens", "opp_3.spec", "opp_3.auc","opp_4.sens", "opp_4.spec", "opp_4.auc","opp_5.sens", "opp_5.spec", "opp_5.auc",) | |
for (i in 1:bs.num) { | |
set.seed(i) | |
## Subset the training and outsample randomly | |
data <- cbind(data,runif(nrow(data))) | |
train.data<- data[which(data[,ncol(data)] >= 0.5),] | |
test.data <- data[which(data[,ncol(data)] < 0.5),] | |
############################################## | |
###############Grievance Models################ | |
############################################## | |
#############Grievance 1######################## | |
grievance.1 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop, family=binomial(link="logit"), data = train.data) | |
grievance.1.predict <- predict(grievance.1, newdata=test.data, type="response") | |
grievance.1.y.hat<-as.matrix(grievance.1.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
grievance.1.predict <-prediction(grievance.1.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
grievance.1.f <- performance(grievance.1.predict , measure = "f") | |
grievance.1.where.F <- which.max(as.numeric(unlist(slot(grievance.1.f,"y.values")))) | |
grievance.1.what.F <- performance(grievance.1.predict, measure="sens", x.measure="spec") | |
history[i,1] <- as.numeric(unlist(slot(grievance.1.what.F,"y.values")))[grievance.1.where.F] | |
history[i,2] <- as.numeric(unlist(slot(grievance.1.what.F,"x.values")))[grievance.1.where.F] | |
## Calculate and store the AUC | |
grievance.1.auc <- performance(grievance.1.predict, measure = "auc") | |
history[i,3] <- as.numeric(unlist(slot(grievance.1.auc,"y.values"))) | |
#################Grievance 2######################## | |
grievance.2 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop + ygini, family=binomial(link="logit"), data = train.data) | |
grievance.2.predict <- predict(grievance.2, newdata=test.data, type="response") | |
grievance.2.y.hat<-as.matrix(grievance.2.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
grievance.2.predict <-prediction(grievance.2.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
grievance.2.with.f <- performance(grievance.2.predict , measure = "f") | |
grievance.2.where.F <- which.max(as.numeric(unlist(slot(grievance.2.with.f,"y.values")))) | |
grievance.2.what.F <- performance(grievance.2.predict, measure="sens", x.measure="spec") | |
history[i,4] <- as.numeric(unlist(slot(grievance.2.what.F,"y.values")))[grievance.2.where.F] | |
history[i,5] <- as.numeric(unlist(slot(grievance.2.what.F,"x.values")))[grievance.2.where.F] | |
## Calculate and store the AUC | |
grievance.2.auc <- performance(grievance.2.predict, measure = "auc") | |
history[i,6] <- as.numeric(unlist(slot(grievance.2.auc,"y.values"))) | |
#################Grievance 3######################## | |
grievance.3 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop + lgini, family=binomial(link="logit"), data = train.data) | |
grievance.3.predict <- predict(grievance.3, newdata=test.data, type="response") | |
grievance.3.y.hat<-as.matrix(grievance.3.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
grievance.3.predict <-prediction(grievance.3.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
grievance.3.with.f <- performance(grievance.3.predict , measure = "f") | |
grievance.3.where.F <- which.max(as.numeric(unlist(slot(grievance.3.with.f,"y.values")))) | |
grievance.3.what.F <- performance(grievance.3.predict, measure="sens", x.measure="spec") | |
history[i,7] <- as.numeric(unlist(slot(grievance.3.what.F,"y.values")))[grievance.3.where.F] | |
history[i,8] <- as.numeric(unlist(slot(grievance.3.what.F,"x.values")))[grievance.3.where.F] | |
## Calculate and store the AUC | |
grievance.3.auc <- performance(grievance.3.predict, measure = "auc") | |
history[i,9] <- as.numeric(unlist(slot(grievance.3.auc,"y.values"))) | |
############################################## | |
###############Opportunity Models################ | |
############################################## | |
##############Opportunity 1##################### | |
opportunity.1 <- glm(warsa ~sxp + sxp2 + coldwar + secm + gy1 + peace + prevwara + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data) | |
opportunity.1.predict <- predict(opportunity.1, newdata=test.data, type="response") | |
opportunity.1.y.hat<-as.matrix(opportunity.1.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
opp.1.predict <-prediction(opportunity.1.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
opp.1.f <- performance(opp.1.predict , measure = "f") | |
opp.1.where.F <- which.max(as.numeric(unlist(slot(opp.1.f,"y.values")))) | |
opp.1.what.F <- performance(opp.1.predict, measure="sens", x.measure="spec") | |
history[i,10] <- as.numeric(unlist(slot(opp.1.what.F,"y.values")))[opp.1.where.F] | |
history[i,11] <- as.numeric(unlist(slot(opp.1.what.F,"x.values")))[opp.1.where.F] | |
## Calculate and store the AUC | |
opp.1.auc <- performance(opp.1.predict, measure = "auc") | |
history[i,12] <- as.numeric(unlist(slot(opp.1.auc,"y.values"))) | |
##############Opportunity 2##################### | |
opportunity.2 <- glm(warsa ~sxp + sxp2 + coldwar + secm + gy1 + peace + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data) | |
opportunity.2.predict <- predict(opportunity.2, newdata=test.data, type="response") | |
opportunity.2.y.hat<-as.matrix(opportunity.2.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
opp.2.predict <-prediction(opportunity.2.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
opp.2.f <- performance(opp.2.predict , measure = "f") | |
opp.2.where.F <- which.max(as.numeric(unlist(slot(opp.2.f,"y.values")))) | |
opp.2.what.F <- performance(opp.2.predict, measure="sens", x.measure="spec") | |
history[i,13] <- as.numeric(unlist(slot(opp.2.what.F,"y.values")))[opp.2.where.F] | |
history[i,14] <- as.numeric(unlist(slot(opp.2.what.F,"x.values")))[opp.2.where.F] | |
## Calculate and store the AUC | |
opp.2.auc <- performance(opp.2.predict, measure = "auc") | |
history[i,15] <- as.numeric(unlist(slot(opp.2.auc,"y.values"))) | |
##############Opportunity 3##################### | |
opportunity.3 <- glm(warsa ~sxp + sxp2 + coldwar + secm + lngdp_ + gy1 + peace + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data) | |
opportunity.3.predict <- predict(opportunity.3, newdata=test.data, type="response") | |
opportunity.3.y.hat<-as.matrix(opportunity.3.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
opp.3.predict <-prediction(opportunity.3.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
opp.3.f <- performance(opp.3.predict , measure = "f") | |
opp.3.where.F <- which.max(as.numeric(unlist(slot(opp.3.f,"y.values")))) | |
opp.3.what.F <- performance(opp.3.predict, measure="sens", x.measure="spec") | |
history[i,16] <- as.numeric(unlist(slot(opp.3.what.F,"y.values")))[opp.3.where.F] | |
history[i,17] <- as.numeric(unlist(slot(opp.3.what.F,"x.values")))[opp.3.where.F] | |
## Calculate and store the AUC | |
opp.3.auc <- performance(opp.3.predict, measure = "auc") | |
history[i,18] <- as.numeric(unlist(slot(opp.3.auc,"y.values"))) | |
##############Opportunity 4##################### | |
opportunity.4 <- glm(warsa ~ sxp + sxp2 + lngdp_ + peace + lnpop + diaspeaa , family=binomial(link="logit"), data = train.data) | |
opportunity.4.predict <- predict(opportunity.4, newdata=test.data, type="response") | |
opportunity.4.y.hat<-as.matrix(opportunity.4.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
opp.4.predict <-prediction(opportunity.4.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
opp.4.f <- performance(opp.4.predict , measure = "f") | |
opp.4.where.F <- which.max(as.numeric(unlist(slot(opp.4.f,"y.values")))) | |
opp.4.what.F <- performance(opp.4.predict, measure="sens", x.measure="spec") | |
history[i,19] <- as.numeric(unlist(slot(opp.4.what.F,"y.values")))[opp.4.where.F] | |
history[i,20] <- as.numeric(unlist(slot(opp.4.what.F,"x.values")))[opp.4.where.F] | |
## Calculate and store the AUC | |
opp.4.auc <- performance(opp.4.predict, measure = "auc") | |
history[i,21] <- as.numeric(unlist(slot(opp.4.auc,"y.values"))) | |
##############Opportunity 5##################### | |
opportunity.5 <- glm(warsa ~ sxp + sxp2 + lngdp_ + peace + lnpop + diahpeaa + difdpeaa , family=binomial(link="logit"), data = train.data) | |
opportunity.5.predict <- predict(opportunity.5, newdata=test.data, type="response") | |
opportunity.5.y.hat<-as.matrix(opportunity.5.predict) | |
## Establish prediction objects from ROCR package | |
y<-as.matrix(test.data$warsa) | |
opp.5.predict <-prediction(opportunity.5.y.hat,y) | |
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff | |
opp.5.f <- performance(opp.5.predict , measure = "f") | |
opp.5.where.F <- which.max(as.numeric(unlist(slot(opp.5.f,"y.values")))) | |
opp.5.what.F <- performance(opp.5.predict, measure="sens", x.measure="spec") | |
history[i,22] <- as.numeric(unlist(slot(opp.5.what.F,"y.values")))[opp.5.where.F] | |
history[i,23] <- as.numeric(unlist(slot(opp.5.what.F,"x.values")))[opp.5.where.F] | |
## Calculate and store the AUC | |
opp.5.auc <- performance(opp.5.predict, measure = "auc") | |
history[i,24] <- as.numeric(unlist(slot(opp.5.auc,"y.values"))) | |
## Drop the last column | |
data <- data[,-ncol(data)] | |
} | |
# sorting each column in history and subsetting out the bottom/top 5 percent | |
for (j in 1:ncol(history)) { | |
history[,j] <- sort(history[,j]) | |
} | |
ci.hist <- history[25:975,] | |
#fix(ci.hist) | |
############################# | |
###### GRAPH IT ###### | |
############################# | |
my.xlim <- c(0,1) | |
my.ylim <- c(0,2.25) | |
par(mfcol=c(3,1),oma=c(2,10,1,2), pty="s") | |
labs.1 <- c("Grievance 4.1","Grievance 4.2","Grievance 4.3", "Opportunity 3.1","Opportunity 3.2","Opportunity 3.3","Opportunity 3.4","Opportunity 3.5") | |
# Sensitivity | |
par(mar=c(3,2,3,2)) | |
plot(x=mean(ci.hist[,1]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean Sensitivity w/ 90% Conf. Int.") | |
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE) | |
segments(x0=min(ci.hist[,1]),x1=max(ci.hist[,1]),y0=.25,y1=.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,4]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,4]),x1=max(ci.hist[,4]),y0=.5,y1=.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,7]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,7]),x1=max(ci.hist[,7]),y0=.75,y1=.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,10]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,10]),x1=max(ci.hist[,10]),y0=1,y1=1,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,13]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,13]),x1=max(ci.hist[,13]),y0=1.25,y1=1.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,16]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,16]),x1=max(ci.hist[,16]),y0=1.5,y1=1.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,19]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,19]),x1=max(ci.hist[,19]),y0=1.75,y1=1.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,22]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,22]),x1=max(ci.hist[,22]),y0=2,y1=2,lwd=2) | |
# Specificity | |
par(mar=c(3,2,3,2)) | |
plot(x=mean(ci.hist[,2]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean Specificity w/ 90% Conf. Int.") | |
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE) | |
segments(x0=min(ci.hist[,2]),x1=max(ci.hist[,2]),y0=.25,y1=.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,5]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,5]),x1=max(ci.hist[,5]),y0=.5,y1=.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,8]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,8]),x1=max(ci.hist[,8]),y0=.75,y1=.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,11]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,11]),x1=max(ci.hist[,11]),y0=1,y1=1,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,14]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,14]),x1=max(ci.hist[,14]),y0=1.25,y1=1.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,17]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,17]),x1=max(ci.hist[,17]),y0=1.5,y1=1.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,20]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,20]),x1=max(ci.hist[,20]),y0=1.75,y1=1.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,23]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,23]),x1=max(ci.hist[,23]),y0=2,y1=2,lwd=2) | |
# AUC | |
par(mar=c(3,2,3,2)) | |
plot(x=mean(ci.hist[,3]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean AUC. w/ 90% Conf. Int.") | |
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE) | |
segments(x0=min(ci.hist[,3]),x1=max(ci.hist[,3]),y0=.25,y1=.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,6]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,6]),x1=max(ci.hist[,6]),y0=.5,y1=.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,9]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,9]),x1=max(ci.hist[,9]),y0=.75,y1=.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,12]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,12]),x1=max(ci.hist[,12]),y0=1,y1=1,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,15]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,15]),x1=max(ci.hist[,15]),y0=1.25,y1=1.25,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,18]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,18]),x1=max(ci.hist[,18]),y0=1.5,y1=1.5,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,21]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,21]),x1=max(ci.hist[,21]),y0=1.75,y1=1.75,lwd=2) | |
par(new=TRUE) | |
plot(mean(ci.hist[,24]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n") | |
segments(x0=min(ci.hist[,24]),x1=max(ci.hist[,24]),y0=2,y1=2,lwd=2) | |
##### AND DONE ###### |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment