-
-
Save timofei7/6124297 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Housekeeping: clear workspace and load packages | |
rm(list=ls(all=TRUE)) | |
library(lubridate) | |
library(RgoogleMaps) | |
library(ggmap) | |
library(mapproj) | |
library(ggplot2) | |
library(plyr) | |
# Load 2011 GDELT data from http://eventdata.psu.edu/data.dir/GDELT.html | |
global.2011 <- read.delim('2011.reduced.txt', header=T) | |
# Create integer for later counting | |
global.2011$count <- 1 | |
# Split Day variable into separate year, month, day vars for later summing | |
splitdate <- function(df) { | |
year <- as.numeric(substr(as.character(df$Day),1,4)) | |
month <- as.numeric(substr(as.character(df$Day),5,6)) | |
day <- as.numeric(substr(as.character(df$Day),7,8)) | |
newdat <- as.data.frame(cbind(df,year,month,day)) | |
return(newdat) | |
} | |
global.2011 <- splitdate(global.2011) | |
# Fix variable type for lat and lon for mapping, etc. | |
global.2011$Actor1Geo_Lat <- as.numeric(as.character(global.2011$Actor1Geo_Lat)) | |
global.2011$Actor1Geo_Long <- as.numeric(as.character(global.2011$Actor1Geo_Long)) | |
# Filtering functions for possible atrocities w/state & non-state perpetrators | |
# where 'df' is data frame from GDELT reduced file | |
atr.state <- function(df) { | |
subset(df, | |
( EventCode==180 | EventCode==1823 | EventCode==190 | | |
EventCode==193 | EventCode==194 | EventCode==195 | | |
EventCode==200 | EventCode==201 | EventCode==202 | | |
EventCode==203 | EventCode==204 | EventCode==205 ) & | |
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" | | |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" | | |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" | | |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) | | |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" | | |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" | | |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" | | |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) & | |
((substr(Actor1Code,1,3)=="GOV" | substr(Actor1Code,1,3)=="MIL" | | |
substr(Actor1Code,1,3)=="COP" | substr(Actor1Code,1,3)=="SPY") | | |
(substr(Actor1Code,4,6)=="GOV" | substr(Actor1Code,4,6)=="MIL" | | |
substr(Actor1Code,4,6)=="COP" | substr(Actor1Code,4,6)=="SPY" )) ) | |
} | |
atr.rebel <- function(df) { | |
subset(df, | |
( EventCode==180 | EventCode==1823 | | |
EventCode==190 | EventCode==193 | EventCode==194 | EventCode==195 | | |
EventCode==200 | EventCode==201 | EventCode==202 | EventCode==203 | | |
EventCode==204 | EventCode==205 ) & | |
((substr(Actor2Code,1,3)=="CVL" | substr(Actor2Code,1,3)=="OPP" | | |
substr(Actor2Code,1,3)=="EDU" | substr(Actor2Code,1,3)=="LAB" | | |
substr(Actor2Code,1,3)=="REL" | substr(Actor2Code,1,3)=="HLH" | | |
substr(Actor2Code,1,3)=="REF" | substr(Actor2Code,1,3)=="MED" ) | | |
(substr(Actor2Code,4,6)=="CVL" | substr(Actor2Code,4,6)=="OPP" | | |
substr(Actor2Code,4,6)=="EDU" | substr(Actor2Code,4,6)=="LAB" | | |
substr(Actor2Code,4,6)=="REL" | substr(Actor2Code,4,6)=="HLH" | | |
substr(Actor2Code,4,6)=="REF" | substr(Actor2Code,4,6)=="MED" )) & | |
((substr(Actor1Code,1,3)=="REB" | substr(Actor1Code,1,3)=="SEP") | | |
(substr(Actor1Code,4,6)=="REB" | substr(Actor1Code,4,6)=="SEP") ) ) | |
} | |
# Apply filtering functions to 2011 data | |
state.2011 <- atr.state(global.2011) | |
rebel.2011 <- atr.rebel(global.2011) | |
# Sum by source/target/event combo, sort in descending order, and inspect top 30 | |
state.triad.2011 <- ddply(state.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) ) | |
rebel.triad.2011 <- ddply(rebel.2011, .(Actor1Code, Actor2Code, EventCode), summarize, count = sum(count) ) | |
state.triad.2011 <- state.triad.2011[order(-state.triad.2011$count),] | |
rebel.triad.2011 <- rebel.triad.2011[order(-rebel.triad.2011$count),] | |
print(state.triad.2011[1:30,]) | |
print(rebel.triad.2011[1:30,]) | |
# Sum by geocoded location for mapping, locate countries, inspect top 30 | |
state.loc.2011 <- ddply(state.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) ) | |
rebel.loc.2011 <- ddply(rebel.2011, .(Actor1Geo_Lat, Actor1Geo_Long), summarize, count = sum(count) ) | |
state.loc.2011$where[2:length(state.loc.2011$count)] <- map.where(database = "world", | |
state.loc.2011$Actor1Geo_Long[2:length(state.loc.2011$count)], | |
state.loc.2011$Actor1Geo_Lat[2:length(state.loc.2011$count)]) | |
rebel.loc.2011$where[2:length(rebel.loc.2011$count)] <- map.where(database = "world", | |
rebel.loc.2011$Actor1Geo_Long[2:length(rebel.loc.2011$count)], | |
rebel.loc.2011$Actor1Geo_Lat[2:length(rebel.loc.2011$count)]) | |
print(state.loc.2011[1:30,]) | |
print(rebel.loc.2011[1:30,]) | |
# Map: Africa | |
map <- get_map(location = c(lon = 18.35, lat = 4.22), source = "google", | |
maptype = "terrain", color = "bw", zoom = 3) | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) | |
# Map: Asia and Middle East | |
map <- get_map(location = c(lon = 77.12, lat = 28.36), source = "google", | |
maptype = "terrain", color = "bw", zoom = 3) | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) | |
# Map: Syria | |
lat <- c(32,38) | |
lon <- c(35,43) | |
map <- get_map(location = c(lon = mean(lon), lat = mean(lat)), zoom = 7, | |
maptype = "terrain", source = "google", color = "bw") | |
print(ggmap(map) + | |
geom_point(data = rebel.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "blue2") + | |
geom_point(data = state.loc.2011, position = "jitter", | |
aes(x = Actor1Geo_Long, y = Actor1Geo_Lat, size = count), | |
alpha = 0.5, pch = 21, colour = "black", fill = "red2")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment