Created
January 31, 2019 16:14
-
-
Save adamlauretig/ce536aefe21523e2757ee4f261242b0c to your computer and use it in GitHub Desktop.
Code to create a monthly panel from the Militarized Interstate Disputes dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Code to create a monthly, directed dyadic panel of | |
# Militarized Interstate Dispute data, with directional initiation | |
# uses cshapes to create monthly list of all dyads, then uses data.table | |
# to transform MIDS to a monthly panel and then merge | |
# Code by Adam Lauretig, 2019 | |
# website: adamlauretig.github.io | |
rm(list = ls()) | |
options(stringsAsFactors = FALSE) | |
seed_to_use <- 216 | |
set.seed(seed_to_use) | |
library(data.table) | |
library(parallel) | |
library(cshapes) | |
load("master_dyads_1946_1980.rdata") | |
# for code to create this, see appendix at bottom | |
setDT(master_dyads) | |
# revised MID data from http://svmiller.com/gml-mid-data/ | |
mids <- fread("gml-ddy-2.1.csv") | |
# gets dispute start days ---- | |
mida <- fread("gml-mida-2.1.csv") | |
mida <- mida[,.( | |
dispnum3, stday, stmon, styear, endday, endmon, endyear, fatality, hostlev)] | |
mida <- mida[ styear > 1945 & styear < 1981] | |
# use "sprintf" to make sure all days/months preserve the leading zero | |
mida[, stmon := sprintf("%02s", stmon)] | |
mida[, stday := sprintf("%02s", stday)] | |
mida[, endmon := sprintf("%02s", endmon)] | |
mida[, endday := sprintf("%02s", endday)] | |
mida[, `:=`(endday = ifelse(endday == "-9", "01", endday), | |
stday = ifelse(stday == "-9", "01", stday))] | |
# working w/mids ---- | |
# year range of interest | |
mids <- mids[ year > 1945 & year < 1981] | |
mids[, fatal_mid := as.numeric(!(is.na(fatality)) & fatality > 0) ] | |
mids_sub <- mids[,.(year, ccode1, ccode2, hostlev, sidea1, sidea2, dispnum)] | |
# merge dispute data with dyad data ---- | |
mid_data <- merge( | |
mids_sub, mida, by.x = c("dispnum"), by.y = c("dispnum3"), all.x = TRUE) | |
# since we're working w/months, set all start dates to "YYYY-MM-01" | |
mid_data[, start_date := paste0(styear, "-", stmon, "-", "01")] | |
mid_data[, end_date := paste0(endyear, "-", endmon, "-", "01")] | |
# clear out some unneeded columns | |
mid_data[, `:=`(stday = NULL, stmon = NULL, styear = NULL, endday = NULL, | |
endmon = NULL, endyear = NULL, hostlev.y = NULL)] | |
# here, we only want to look at valid disputes, we're removing any odd ducks | |
disputes <- mid_data[ !(is.na(dispnum))] | |
disputes <- disputes[ !(is.na(fatality)) ] | |
dispute_nums <- unique(disputes$dispnum) | |
# take the "from" and "to" columns, and create a monthly panel, and | |
# code onsets and initiations. returns a data.table | |
make_monthly_mid <- function(i, dispute_dt = disputes){ | |
disp <- dispute_nums[i] | |
tmp <- dispute_dt[ dispnum == disp ] | |
tmp2 <- tmp[ , list(month = seq(as.Date(start_date[1]), | |
as.Date(end_date[1]), by = "month")), | |
by = .(dispnum, ccode1, ccode2, hostlev.x, sidea1, sidea2, fatality)] | |
tmp2[, onset := ifelse(month == min(month), 1, 0)] | |
tmp2[, onset_na := ifelse(month == min(month), 1, NA)] | |
tmp2[, init := ifelse(onset == 1 & sidea1 == 1, 1, 0)] | |
tmp2[, init_na := ifelse(onset == 1 & sidea1 == 1, 1, NA)] | |
return(tmp2) | |
} | |
# merging and cleaning mids to dyads | |
monthly_mid_list <- mclapply(1:length(dispute_nums), make_monthly_mid, | |
dispute_dt = disputes, mc.cores = 8) | |
monthly_mids <- rbindlist(monthly_mid_list) | |
master_mids <- merge(master_dyads, monthly_mids, | |
by.x = c("ccode1", "ccode2", "year"), by.y = c("ccode1", "ccode2", "month"), | |
all = TRUE) # since we can have multiple mids per year, set all = TRUE | |
master_mids[, `:=`( | |
hostlev.x = ifelse(is.na(hostlev.x), 0, hostlev.x), | |
fatality = ifelse(is.na(fatality), 0, fatality), | |
onset = ifelse(is.na(onset), 0, onset), | |
onset_na = ifelse(is.na(onset_na) & is.na(dispnum), 0, onset_na), | |
init = ifelse(is.na(init), 0, init), | |
init_na = ifelse(is.na(init_na) & is.na(dispnum), 0, init_na) | |
)] | |
test <- master_mids[,.N, by = .(ccode1, ccode2, year)] | |
save(master_mids, file = "master_mids.rdata") | |
# Appendix ---- | |
# create all dyad months | |
# this tooks about 20 minutes with 8 cores, as a heads up | |
years <- seq.Date(from = as.Date("1946-01-01"), to = as.Date("1980-12-31"), by = "month") | |
dl <- distlist(as.Date("1946-01-01"), type="capdist", useGW=FALSE) | |
dl2 <- dl[ (dl$ccode1 != dl$ccode2), ] | |
make_dyads <- function(i){ | |
dyad_year <- years[i] | |
dl <- distlist(dyad_year, type="capdist", useGW=FALSE) | |
dl2 <- dl[ (dl$ccode1 != dl$ccode2), ] | |
dl2$capdist <- NULL | |
dl2$year <- dyad_year | |
dl2 | |
} | |
master_dyads <- do.call(rbind, mclapply(1:length(years), make_dyads, mc.cores = 8)) | |
# save(master_dyads, file = "~/Dropbox/Dissertation_data/master_dyads_1946_1980.rdata") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment