Skip to content

Instantly share code, notes, and snippets.

@jayyonamine
Created September 4, 2012 21:24
Show Gist options
  • Save jayyonamine/3626711 to your computer and use it in GitHub Desktop.
Save jayyonamine/3626711 to your computer and use it in GitHub Desktop.
lasso of multiple imputations
##########################
##r code for greed and grievance replication/extension file
##James E. Yonamine
##4/13/2012
##data from http://www.apsanet.org/content_29436.cfm
##code takes 20 minutes to run on a 2.3 ghz intel core i5 with 4gb ram
##########################
library(foreign)
library(ROCR)
library(fields)
library(car)
library(lattice)
library(vioplot)
library(Zelig)
library(Amelia)
library(stats)
library(lars)
library(lattice)
library(MASS)
rm(list=ls())
set.seed(2012)
data <- read.dta("collier and hoeffler 2004.dta")
data<-data[complete.cases(data$warsa),] #this drops all rows with NA for warsa, i.e. drops all continuations. C&H use this approach, which is reasonable
m=100
a.out <- amelia(data, m = m, ts = "year", cs = "country")
history <- matrix(data=0, nrow=m, ncol=24)
colnames(history) <-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb", "diahpeaa", "difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop", "frac", "elfo", "rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")
for (j in 1:m){
data<-a.out$imputations[[j]]
data.x<-data.frame(data$sxp, data$sxp2, data$secm, data$gy1, data$oilsxp, data$oilsxp2,
data$greedxb, data$diahpeaa, data$difdpeaa, data$coldwar, data$prevwar, data$peace, data$mount, data$geogia,
data$lnpop, data$frac, data$elfo, data$rf, data$pol16, data$etdo4590, data$dem, data$ygini, data$lgini,
data$grievxb)
for (i in 1:ncol(data.x)) {
data.x[,i] <- (data.x[,i] - mean(data.x[,i])) / sd(data.x[,i])
}
data.x<-as.matrix(data.x)
data.y<-data$warsa
data.y.0 <- data.y - mean(data.y)
lasso<-lars(data.x, data.y.0,type="lasso", trace=TRUE)
#lasso$lambda
summary(lasso)
#lasso$beta
lasso.matrix<-as.matrix(summary(lasso))
count=1
for (k in 1:nrow(lasso.matrix)){
if (lasso.matrix[k,3]==min(lasso.matrix[,3])) {
trial<-count
break
}
else {
count=count+1
}
}
best.lasso.coef<-lasso$beta[trial,] #selects the trial # with the lowest Cp score
lasso.coef<-as.matrix(best.lasso.coef)
history[j,]<-lasso.coef[,1]
}
history.1<-history
for (i in 1:nrow(history.1)){
for (j in 1:ncol(history.1)){
if(history.1[i,j]!=0){
history.1[i,j]<-1
}
}
}
names<-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb", "diahpeaa", "difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop", "frac", "elfo", "rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")
colors=c("black", "black","black", "black","black", "black","black", "black","black", "grey","grey", "grey"," grey", "grey","grey", "white","white", "white","white", "white","white", "white","white", "white")
##plotting number of times each variable is selected into lasso
summary <- matrix(data=0, nrow=1, ncol=24)
colnames(summary) <-c("sxp", "sxp2", "secm", "gy1", "oilsxp", "oilsxp2", "greedxb", "diahpeaa", "difdpeaa", "coldwar", "prevwar", "peace", "mount", "geogia", "lnpop", "frac", "elfo", "rf", "pol16", "etdo4590", "dem", "ygini","lgini", "grievxb")
for (j in 1:ncol(summary)){
summary[1,j]<-sum(history.1[,j])
}
legend = c("Grievance variables","'Other' variables","Greed variables")
summary<-c(summary)
barplot(summary, las=2, ylab="# of times selected by Lasso out of 100 Imputations", col=colors, names.arg=names)
legend(16, 100, legend, fill=c("black","grey","white"))
title(main="The Importance Standardized 'Opportunity' and 'Grievance' Variables \n Indicated by Lasso Selection (Larger Values Reflect Higher Importance)")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment