Last active
January 8, 2018 20:22
-
-
Save felixhaass/5766725 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(Cairo) | |
library(scales) | |
library(RCurl) | |
library(rjson) | |
################################# | |
# Data preparation from web API # | |
################################# | |
# get raw drone data | |
drones <- fromJSON(getURL("http://api.dronestre.am/data")) | |
# The following code prepares three different data frames: | |
# (1) drone_data => raw drone data as data frame & exported as .csv | |
# (2) drone_sum => a sum of drone strikes per month & and monthly fatalities | |
# (3) drone_sum_month => time series of all drone strikes per month, including | |
# months in which nothing happened | |
## (1) Create "drone_data" and "Drone Data.csv" | |
# create empty data frame | |
drone_data <- data.frame(t(rep(NA,length(drones$strike[[1]]))), stringsAsFactors=FALSE) | |
names(drone_data) <- names(drones$strike[[1]]) | |
for (i in 1:length(drones$strike)) { | |
# clean up empty/useless JSON entries and fill with 0 | |
drones$strike[[i]][which(drones$strike[[i]]=="" | drones$strike[[i]]=="?")] <- 0 | |
drones$strike[[i]][which(drones$strike[[i]]=="list()")] <- 0 | |
# fill data frame | |
drone_data[i, ] <- drones$strike[[i]] | |
} | |
# convert dates | |
drone_data$date <- as.Date(drone_data$date, "%Y-%m-%d") | |
write.table(drone_data, "./output/Drone_Data.csv", sep="\t", col.names=TRUE, | |
row.names=FALSE, quote=FALSE, qmethod="double", eol="\n", dec = ".") | |
## (2) create drone_sum | |
# aggregate drone strikes per month | |
drone_data$dummy <- 1 | |
# fix data issues | |
drone_data <- drone_data[-336, ] # wrong/useless data entry | |
# March 2014 dates are currently (20 March 2014) wrong, | |
# you should check if that applies in the future | |
drone_data[drone_data$date > as.Date("2014-04-01"), "date"] <- c(rep("2014-03-01", 4)) | |
drone_sum <- data.frame(CountMonth=rowsum(drone_data$dummy, format(drone_data$date,"%Y-%m")), | |
Dates=levels(factor(format(drone_data$date,"%Y-%m"))), | |
MonthFat=rowsum(as.integer(drone_data$deaths_min), format(drone_data$date,"%Y-%m"), na.rm=T), | |
MonthFatMax=rowsum(as.integer(drone_data$deaths_max), format(drone_data$date,"%Y-%m"), na.rm=T), | |
stringsAsFactors=F) | |
# convert dates in summary file for better plotting | |
drone_sum$Dates <- as.Date(paste(drone_sum$Dates, "-1", sep="")) | |
## (3) create drone_sum_month | |
# length.out argument is quite clumsy to get the correct number of months | |
months <- seq(as.Date("2002/11/1"), | |
by = "month", | |
length.out = as.numeric(round(difftime(Sys.Date(), as.Date("2002-11-1"), units=c("days"))/30.416))) | |
drone_sum_month <- data.frame(Dates=months) | |
drone_sum_month <- merge(drone_sum, drone_sum_month, by="Dates") | |
######################### | |
# Change point analysis # | |
######################### | |
library(changepoint) | |
library(fields) # for xline | |
library(Cairo) | |
library(car) | |
# calculate changepoints | |
for(i in 20:1) { | |
CairoPNG(file=paste0("./figs/changepoint", i,".png"), pointsize=11, width=1280, height=522 ) | |
mean.drone <- cpt.mean(drone_sum_month$CountMonth, method='BinSeg', Q=i) | |
par(mar=c(6, 4, 4, 2)) | |
plot(mean.drone,type='l',cpt.col='red',xlab='',ylab='Frequency of US drone strikes',cpt.width=2, main="Changepoints in US drone strike activity", xaxt="n") | |
axis(1, at=cpts(mean.drone), labels=format.Date(drone_sum_month$Dates[cpts(mean.drone)], "%b-%y "), las=3) | |
xline(which.names("2009-01-01", drone_sum_month$Dates), lty=2, col="blue3", lwd=2) # 1st Obama presidency | |
xline(which.names("2013-01-01", drone_sum_month$Dates), lty=3, col="blue3", lwd=2) # 2nd Obama presidency | |
xline(which.names("2013-06-01", drone_sum_month$Dates), lty=4, col="chartreuse4", lwd=2) # promise to reduce drone strikes | |
dev.off() | |
} | |
# Create single plots | |
CairoPNG(file=paste0("./figs/changepoint_5.png"), pointsize=11, width=1280, height=522 ) | |
mean.drone <- cpt.mean(drone_sum_month$CountMonth, method='BinSeg', Q=5) | |
par(mar=c(6, 4, 4, 2)) | |
plot(mean.drone,type='l',cpt.col='red',xlab='',ylab='Frequency of US drone strikes',cpt.width=2, main="Changepoints in US drone strike activity", xaxt="n") | |
axis(1, at=cpts(mean.drone), labels=format.Date(drone_sum_month$Dates[cpts(mean.drone)], "%b-%y "), las=3) | |
xline(which.names("2009-01-01", drone_sum_month$Dates), lty=2, col="blue3", lwd=2) # 1st Obama presidency | |
xline(which.names("2013-01-01", drone_sum_month$Dates), lty=3, col="blue3", lwd=2) # 2nd Obama presidency | |
xline(which.names("2013-06-01", drone_sum_month$Dates), lty=4, col="chartreuse4", lwd=2) # promise to reduce drone strikes | |
dev.off() | |
######### | |
# Plots # | |
######### | |
# reassign date-class for correct order in ggplot2 | |
drone_sum_month$Dates <- as.POSIXct(drone_sum_month$Dates) | |
#plot number of drone strikes per month, using Cairo device | |
CairoPNG(file="./figs/strikes_per_month.png", width=1280, height=522 ) | |
a <- ggplot(drone_sum_month, aes(Dates, CountMonth)) | |
a + geom_line(stat="identity") + | |
labs(title="Frequency of US Drone Strikes (Monthly), 2002-2013\n", y="Frequency", x="")+ | |
theme_bw() + | |
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) + | |
# the vertical line represents the start of the Obama presidency | |
geom_vline(xintercept=as.numeric(as.POSIXct("2009-01-01")), linetype ="longdash") + | |
scale_x_datetime(breaks="6 months", labels = date_format("%Y %B")) | |
dev.off() | |
#plot drones strike fatalities per month (low and high estimate) | |
CairoPNG(file="./figs/fat_per_month.png", width=1280, height=522) | |
b <- ggplot(drone_sum_month, aes(x=Dates)) | |
b + | |
geom_line(aes(y=MonthFat, colour = "Low Estimate")) + | |
geom_line(aes(y=MonthFatMax, colour ="High Estimate")) + | |
scale_colour_manual("", | |
breaks = c("Low Estimate", "High Estimate"), | |
values = c("red", "black")) + | |
labs(title="Monthly Fatalities by US Drone Strikes, 2002-2013\n", y="Monthly Fatalities", x="")+ | |
theme_bw() + | |
theme(axis.text.x=element_text(hjust=1.1, angle=45), legend.key=element_blank()) + | |
# the vertical line represents the start of the Obama presidency | |
geom_vline(xintercept=as.numeric(as.POSIXct("2009-01-01")), linetype = "longdash") + | |
scale_x_datetime(breaks="6 months", labels = date_format("%Y %B")) | |
dev.off() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment