Last active
May 31, 2016 11:59
-
-
Save ulfelder/5505468 to your computer and use it in GitHub Desktop.
R code for selecting events from GDELT files that look like they might be atrocities (i.e., lethal violence against civilians), summing them by location, and mapping them.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Housekeeping: clear workspace and load packages | |
rm(list=ls(all=TRUE)) | |
library(lubridate) | |
library(RgoogleMaps) | |
library(ggmap) | |
library(mapproj) | |
library(ggplot2) | |
library(plyr) | |
# Load 2011 GDELT data from http://eventdata.psu.edu/data.dir/GDELT.html | |
global.2011 <- read.delim('2011.reduced.txt', header=T) | |
# Create integer for later counting | |
global.2011$count <- 1 | |
# Split Day variable into separate year, month, day vars for later summing | |
splitdate <- function(df) { | |
year <- as.numeric(substr(as.character(df$Day),1,4)) | |
month <- as.numeric(substr(as.character(df$Day),5,6)) | |
day <- as.numeric(substr(as.character(df$Day),7,8)) | |
newdat <- as.data.frame(cbind(df,year,month,day)) | |
return(newdat) | |
} | |
global.2011 <- splitdate(global.2011) | |
# Fix variable type for lat and lon for mapping, etc. | |
global.2011$Actor1Geo_Lat <- as.numeric(as.character(global.2011$Actor1Geo_Lat)) | |
global.2011$Actor1Geo_Long <- as.numeric(as.character(global.2011$Actor1Geo_Long)) | |
# Filtering functions for possible atrocities w/state & non-state perpetrators | |
# where 'df' is data frame from GDELT reduced file | |
atr.state <- function(df) { | |
subset(df, | |
( EventCode==180 | EventCode==1823 | EventCode==190 | | |
EventCode==193 | EventCode==194 | EventCode==195 | | |
EventCode==200 | EventCode==201 | EventCode==202 | | |
EventCode==203 | EventCode==204 | EventCode==205 ) & | |
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" | | |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" | | |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" | | |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) | | |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" | | |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" | | |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" | | |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) & | |
((substr(Actor1Code,1,3)=="GOV" | substr(Actor1Code,1,3)=="MIL" | | |
substr(Actor1Code,1,3)=="COP" | substr(Actor1Code,1,3)=="SPY") | | |
(substr(Actor1Code,4,6)=="GOV" | substr(Actor1Code,4,6)=="MIL" | | |
substr(Actor1Code,4,6)=="COP" | substr(Actor1Code,4,6)=="SPY" )) ) | |
} | |
atr.rebel <- function(df) { | |
subset(df, | |
( EventCode==180 | EventCode==1823 | | |
EventCode==190 | EventCode==193 | EventCode==194 | EventCode==195 | | |
EventCode==200 | EventCode==201 | EventCode==202 | EventCode==203 | | |
EventCode==204 | EventCode==205 ) & | |
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" | | |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" | | |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" | | |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) | | |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" | | |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" | | |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" | | |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) & | |
((substr(Actor1Code,1,3)=="REB" | substr(Actor1Code,1,3)=="SEP") | | |
(substr(Actor1Code,4,6)=="REB" | substr(Actor1Code,4,6)=="SEP") ) ) | |
} | |
# Apply filtering functions to 2011 data | |
state.2011 <- atr.state(global.2011) | |
rebel.2011 <- atr.rebel(global.2011) | |
# Sum by source/target/event combo, sort in descending order, and inspect top 30 | |
state.triad.2011 <- ddply(state.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) ) | |
rebel.triad.2011 <- ddply(rebel.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) ) | |
state.triad.2011 <- state.triad.2011[order(-state.triad.2011$count),] | |
rebel.triad.2011 <- rebel.triad.2011[order(-rebel.triad.2011$count),] | |
print(state.triad.2011[1:30,]) | |
print(rebel.triad.2011[1:30,]) | |
# Sum by geocoded location for mapping, locate countries, inspect top 30 | |
state.loc.2011 <- ddply(state.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) ) | |
rebel.loc.2011 <- ddply(rebel.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) ) | |
state.loc.2011$where[2:length(state.loc.2011$count)] <- map.where(database = "world", | |
state.loc.2011$Actor1Geo_Long[2:length(state.loc.2011$count)], | |
state.loc.2011$Actor1Geo_Lat[2:length(state.loc.2011$count)]) | |
rebel.loc.2011$where[2:length(rebel.loc.2011$count)] <- map.where(database = "world", | |
rebel.loc.2011$Actor1Geo_Long[2:length(rebel.loc.2011$count)], | |
rebel.loc.2011$Actor1Geo_Lat[2:length(rebel.loc.2011$count)]) | |
print(state.loc.2011[1:30,]) | |
print(rebel.loc.2011[1:30,]) | |
# Map: Africa | |
map <- get_map(location = c(lon = 18.35, lat = 4.22), source = "google", | |
maptype = "terrain", color = "bw", zoom = 3) | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) | |
# Map: Asia and Middle East | |
map <- get_map(location = c(lon = 77.12, lat = 28.36), source = "google", | |
maptype = "terrain", color = "bw", zoom = 3) | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) | |
# Map: Syria | |
lat <- c(32,38) | |
lon <- c(35,43) | |
map <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 7, | |
maptype = "terrain", source = "google", color = "bw") | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment