Skip to content

Instantly share code, notes, and snippets.

@jayyonamine
Created September 4, 2012 21:24
Show Gist options
  • Save jayyonamine/3626704 to your computer and use it in GitHub Desktop.
Save jayyonamine/3626704 to your computer and use it in GitHub Desktop.
out-of-sample Bootstrapped logit
##########################
##r code for greed and grievance replication/extension file
##James E. Yonamine (worked off of VJD's original code)
##2/14/2012
##data from http://www.apsanet.org/content_29436.cfm
##########################
#if you don't have the packages below, un-comment them out and install.
#install.packages("ROCR")
#install.packages("fields")
#install.packages("car")
#install.packages("lattice")
#install.packages("vioplot")
library(foreign)
library(ROCR)
library(fields)
library(car)
library(lattice)
library(vioplot)
library(Zelig)
library(Amelia)
library(stats)
rm(list=ls())
data <- read.dta("collier and hoeffler 2004.dta")
data<-data[complete.cases(data$warsa),] #this drops all rows with NA for warsa, i.e. drops all continuations. C&H use this approach, which is reasonable
a.out <- amelia(data, m = 1, ts = "year", cs = "country")
data<-a.out$imputations[[1]]
##just to make sure coefficients are basically the same after imputation. they are.
z.out.1 <-zelig(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop , model = "logit", data=data )
summary(z.out.1)
##bootstrapping out-of-sample##
## Bootstrap the split of these datasets for forecasting
bs.num <- 1000
history <- matrix(data=0, nrow=bs.num, ncol=24)
colnames(history) <- c("griev_1.sens", "griev_1.spec", "griev_1.auc","griev_2.sens", "griev_2.spec", "griev_2.auc","griev_3.sens", "griev_3.spec", "griev_3.auc", "opp_1.sens", "opp_1.spec", "opp_1.auc", "opp_2.sens", "opp_2.spec", "opp_2.auc","opp_3.sens", "opp_3.spec", "opp_3.auc","opp_4.sens", "opp_4.spec", "opp_4.auc","opp_5.sens", "opp_5.spec", "opp_5.auc",)
for (i in 1:bs.num) {
set.seed(i)
## Subset the training and outsample randomly
data <- cbind(data,runif(nrow(data)))
train.data<- data[which(data[,ncol(data)] >= 0.5),]
test.data <- data[which(data[,ncol(data)] < 0.5),]
##############################################
###############Grievance Models################
##############################################
#############Grievance 1########################
grievance.1 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop, family=binomial(link="logit"), data = train.data)
grievance.1.predict <- predict(grievance.1, newdata=test.data, type="response")
grievance.1.y.hat<-as.matrix(grievance.1.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
grievance.1.predict <-prediction(grievance.1.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
grievance.1.f <- performance(grievance.1.predict , measure = "f")
grievance.1.where.F <- which.max(as.numeric(unlist(slot(grievance.1.f,"y.values"))))
grievance.1.what.F <- performance(grievance.1.predict, measure="sens", x.measure="spec")
history[i,1] <- as.numeric(unlist(slot(grievance.1.what.F,"y.values")))[grievance.1.where.F]
history[i,2] <- as.numeric(unlist(slot(grievance.1.what.F,"x.values")))[grievance.1.where.F]
## Calculate and store the AUC
grievance.1.auc <- performance(grievance.1.predict, measure = "auc")
history[i,3] <- as.numeric(unlist(slot(grievance.1.auc,"y.values")))
#################Grievance 2########################
grievance.2 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop + ygini, family=binomial(link="logit"), data = train.data)
grievance.2.predict <- predict(grievance.2, newdata=test.data, type="response")
grievance.2.y.hat<-as.matrix(grievance.2.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
grievance.2.predict <-prediction(grievance.2.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
grievance.2.with.f <- performance(grievance.2.predict , measure = "f")
grievance.2.where.F <- which.max(as.numeric(unlist(slot(grievance.2.with.f,"y.values"))))
grievance.2.what.F <- performance(grievance.2.predict, measure="sens", x.measure="spec")
history[i,4] <- as.numeric(unlist(slot(grievance.2.what.F,"y.values")))[grievance.2.where.F]
history[i,5] <- as.numeric(unlist(slot(grievance.2.what.F,"x.values")))[grievance.2.where.F]
## Calculate and store the AUC
grievance.2.auc <- performance(grievance.2.predict, measure = "auc")
history[i,6] <- as.numeric(unlist(slot(grievance.2.auc,"y.values")))
#################Grievance 3########################
grievance.3 <- glm(warsa ~ elfo + rf + pol16 + etdo4590 + dem + peace + mount + geogia + lnpop + lgini, family=binomial(link="logit"), data = train.data)
grievance.3.predict <- predict(grievance.3, newdata=test.data, type="response")
grievance.3.y.hat<-as.matrix(grievance.3.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
grievance.3.predict <-prediction(grievance.3.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
grievance.3.with.f <- performance(grievance.3.predict , measure = "f")
grievance.3.where.F <- which.max(as.numeric(unlist(slot(grievance.3.with.f,"y.values"))))
grievance.3.what.F <- performance(grievance.3.predict, measure="sens", x.measure="spec")
history[i,7] <- as.numeric(unlist(slot(grievance.3.what.F,"y.values")))[grievance.3.where.F]
history[i,8] <- as.numeric(unlist(slot(grievance.3.what.F,"x.values")))[grievance.3.where.F]
## Calculate and store the AUC
grievance.3.auc <- performance(grievance.3.predict, measure = "auc")
history[i,9] <- as.numeric(unlist(slot(grievance.3.auc,"y.values")))
##############################################
###############Opportunity Models################
##############################################
##############Opportunity 1#####################
opportunity.1 <- glm(warsa ~sxp + sxp2 + coldwar + secm + gy1 + peace + prevwara + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data)
opportunity.1.predict <- predict(opportunity.1, newdata=test.data, type="response")
opportunity.1.y.hat<-as.matrix(opportunity.1.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
opp.1.predict <-prediction(opportunity.1.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
opp.1.f <- performance(opp.1.predict , measure = "f")
opp.1.where.F <- which.max(as.numeric(unlist(slot(opp.1.f,"y.values"))))
opp.1.what.F <- performance(opp.1.predict, measure="sens", x.measure="spec")
history[i,10] <- as.numeric(unlist(slot(opp.1.what.F,"y.values")))[opp.1.where.F]
history[i,11] <- as.numeric(unlist(slot(opp.1.what.F,"x.values")))[opp.1.where.F]
## Calculate and store the AUC
opp.1.auc <- performance(opp.1.predict, measure = "auc")
history[i,12] <- as.numeric(unlist(slot(opp.1.auc,"y.values")))
##############Opportunity 2#####################
opportunity.2 <- glm(warsa ~sxp + sxp2 + coldwar + secm + gy1 + peace + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data)
opportunity.2.predict <- predict(opportunity.2, newdata=test.data, type="response")
opportunity.2.y.hat<-as.matrix(opportunity.2.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
opp.2.predict <-prediction(opportunity.2.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
opp.2.f <- performance(opp.2.predict , measure = "f")
opp.2.where.F <- which.max(as.numeric(unlist(slot(opp.2.f,"y.values"))))
opp.2.what.F <- performance(opp.2.predict, measure="sens", x.measure="spec")
history[i,13] <- as.numeric(unlist(slot(opp.2.what.F,"y.values")))[opp.2.where.F]
history[i,14] <- as.numeric(unlist(slot(opp.2.what.F,"x.values")))[opp.2.where.F]
## Calculate and store the AUC
opp.2.auc <- performance(opp.2.predict, measure = "auc")
history[i,15] <- as.numeric(unlist(slot(opp.2.auc,"y.values")))
##############Opportunity 3#####################
opportunity.3 <- glm(warsa ~sxp + sxp2 + coldwar + secm + lngdp_ + gy1 + peace + mount + geogia + frac + lnpop, family=binomial(link="logit"), data = train.data)
opportunity.3.predict <- predict(opportunity.3, newdata=test.data, type="response")
opportunity.3.y.hat<-as.matrix(opportunity.3.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
opp.3.predict <-prediction(opportunity.3.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
opp.3.f <- performance(opp.3.predict , measure = "f")
opp.3.where.F <- which.max(as.numeric(unlist(slot(opp.3.f,"y.values"))))
opp.3.what.F <- performance(opp.3.predict, measure="sens", x.measure="spec")
history[i,16] <- as.numeric(unlist(slot(opp.3.what.F,"y.values")))[opp.3.where.F]
history[i,17] <- as.numeric(unlist(slot(opp.3.what.F,"x.values")))[opp.3.where.F]
## Calculate and store the AUC
opp.3.auc <- performance(opp.3.predict, measure = "auc")
history[i,18] <- as.numeric(unlist(slot(opp.3.auc,"y.values")))
##############Opportunity 4#####################
opportunity.4 <- glm(warsa ~ sxp + sxp2 + lngdp_ + peace + lnpop + diaspeaa , family=binomial(link="logit"), data = train.data)
opportunity.4.predict <- predict(opportunity.4, newdata=test.data, type="response")
opportunity.4.y.hat<-as.matrix(opportunity.4.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
opp.4.predict <-prediction(opportunity.4.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
opp.4.f <- performance(opp.4.predict , measure = "f")
opp.4.where.F <- which.max(as.numeric(unlist(slot(opp.4.f,"y.values"))))
opp.4.what.F <- performance(opp.4.predict, measure="sens", x.measure="spec")
history[i,19] <- as.numeric(unlist(slot(opp.4.what.F,"y.values")))[opp.4.where.F]
history[i,20] <- as.numeric(unlist(slot(opp.4.what.F,"x.values")))[opp.4.where.F]
## Calculate and store the AUC
opp.4.auc <- performance(opp.4.predict, measure = "auc")
history[i,21] <- as.numeric(unlist(slot(opp.4.auc,"y.values")))
##############Opportunity 5#####################
opportunity.5 <- glm(warsa ~ sxp + sxp2 + lngdp_ + peace + lnpop + diahpeaa + difdpeaa , family=binomial(link="logit"), data = train.data)
opportunity.5.predict <- predict(opportunity.5, newdata=test.data, type="response")
opportunity.5.y.hat<-as.matrix(opportunity.5.predict)
## Establish prediction objects from ROCR package
y<-as.matrix(test.data$warsa)
opp.5.predict <-prediction(opportunity.5.y.hat,y)
## Calculate the f-measure, find the cutoff, calculate sensitivity and specificty using that cutoff
opp.5.f <- performance(opp.5.predict , measure = "f")
opp.5.where.F <- which.max(as.numeric(unlist(slot(opp.5.f,"y.values"))))
opp.5.what.F <- performance(opp.5.predict, measure="sens", x.measure="spec")
history[i,22] <- as.numeric(unlist(slot(opp.5.what.F,"y.values")))[opp.5.where.F]
history[i,23] <- as.numeric(unlist(slot(opp.5.what.F,"x.values")))[opp.5.where.F]
## Calculate and store the AUC
opp.5.auc <- performance(opp.5.predict, measure = "auc")
history[i,24] <- as.numeric(unlist(slot(opp.5.auc,"y.values")))
## Drop the last column
data <- data[,-ncol(data)]
}
# sorting each column in history and subsetting out the bottom/top 5 percent
for (j in 1:ncol(history)) {
history[,j] <- sort(history[,j])
}
ci.hist <- history[25:975,]
#fix(ci.hist)
#############################
###### GRAPH IT ######
#############################
my.xlim <- c(0,1)
my.ylim <- c(0,2.25)
par(mfcol=c(3,1),oma=c(2,10,1,2), pty="s")
labs.1 <- c("Grievance 4.1","Grievance 4.2","Grievance 4.3", "Opportunity 3.1","Opportunity 3.2","Opportunity 3.3","Opportunity 3.4","Opportunity 3.5")
# Sensitivity
par(mar=c(3,2,3,2))
plot(x=mean(ci.hist[,1]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean Sensitivity w/ 90% Conf. Int.")
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE)
segments(x0=min(ci.hist[,1]),x1=max(ci.hist[,1]),y0=.25,y1=.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,4]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,4]),x1=max(ci.hist[,4]),y0=.5,y1=.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,7]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,7]),x1=max(ci.hist[,7]),y0=.75,y1=.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,10]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,10]),x1=max(ci.hist[,10]),y0=1,y1=1,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,13]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,13]),x1=max(ci.hist[,13]),y0=1.25,y1=1.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,16]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,16]),x1=max(ci.hist[,16]),y0=1.5,y1=1.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,19]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,19]),x1=max(ci.hist[,19]),y0=1.75,y1=1.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,22]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,22]),x1=max(ci.hist[,22]),y0=2,y1=2,lwd=2)
# Specificity
par(mar=c(3,2,3,2))
plot(x=mean(ci.hist[,2]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean Specificity w/ 90% Conf. Int.")
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE)
segments(x0=min(ci.hist[,2]),x1=max(ci.hist[,2]),y0=.25,y1=.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,5]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,5]),x1=max(ci.hist[,5]),y0=.5,y1=.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,8]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,8]),x1=max(ci.hist[,8]),y0=.75,y1=.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,11]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,11]),x1=max(ci.hist[,11]),y0=1,y1=1,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,14]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,14]),x1=max(ci.hist[,14]),y0=1.25,y1=1.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,17]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,17]),x1=max(ci.hist[,17]),y0=1.5,y1=1.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,20]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,20]),x1=max(ci.hist[,20]),y0=1.75,y1=1.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,23]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,23]),x1=max(ci.hist[,23]),y0=2,y1=2,lwd=2)
# AUC
par(mar=c(3,2,3,2))
plot(x=mean(ci.hist[,3]),y=.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n",main="Mean AUC. w/ 90% Conf. Int.")
axis(labels=labs.1,at=c(.25, .5, .75, 1, 1.25, 1.5, 1.75, 2),side=2,las=1,tick=FALSE)
segments(x0=min(ci.hist[,3]),x1=max(ci.hist[,3]),y0=.25,y1=.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,6]),y=.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,6]),x1=max(ci.hist[,6]),y0=.5,y1=.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,9]),y=.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,9]),x1=max(ci.hist[,9]),y0=.75,y1=.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,12]),y=1,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,12]),x1=max(ci.hist[,12]),y0=1,y1=1,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,15]),y=1.25,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,15]),x1=max(ci.hist[,15]),y0=1.25,y1=1.25,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,18]),y=1.5,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,18]),x1=max(ci.hist[,18]),y0=1.5,y1=1.5,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,21]),y=1.75,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,21]),x1=max(ci.hist[,21]),y0=1.75,y1=1.75,lwd=2)
par(new=TRUE)
plot(mean(ci.hist[,24]),y=2,xlim=my.xlim,ylim=my.ylim,xlab="",ylab="",yaxt="n")
segments(x0=min(ci.hist[,24]),x1=max(ci.hist[,24]),y0=2,y1=2,lwd=2)
##### AND DONE ######
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment