Last active
August 29, 2015 13:59
-
-
Save jayjacobs/10610909 to your computer and use it in GitHub Desktop.
Creating a Video from Marx data at datadrivensecurity.info
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# which weeks should we look at? | |
whichweek <- c(27, 28, 29, 30) | |
# how many countries to show? | |
numcountry <- 25 # 20 at first | |
# read in marx "geo" data available from | |
# http://datadrivensecurity.info/blog/pages/dds-dataset-collection.html | |
marx <- read.csv("marx-geo.csv") | |
# convert datetime to POSIX date/time object | |
marx$datetime <- strptime(marx$datetime, format='%Y-%m-%d %H:%M:%S') | |
# drop any weird date formats | |
marx <- marx[complete.cases(marx[ ,"datetime"]),] | |
# filter out the weeks. | |
week <- factor(format(marx$datetime, "%V")) | |
mx <- marx[week %in% whichweek, ] # mx is subset of marx data | |
# divide into 5 minute chunks | |
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5)))) | |
# create the timeline to tick through. | |
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300) | |
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5)))) | |
# allframes now has every possible "frame" we want to show in it. | |
# looking at the length will tell us how many frames we will write out. | |
# set up host names in "hosts" | |
host <- levels(mx$host) | |
host <- host[nchar(host)>0] # where host known | |
host <- sort(host, decreasing=T) # sort them | |
# just do country part from hostname | |
hostname <- sapply(host, function(x) { | |
paste(unlist(strsplit(x, '-'))[-1], collapse='-') | |
}, USE.NAMES=T) | |
# now just the hosts we know about | |
mx <- mx[mx$host %in% host, ] | |
# set up source country names in "cnames" | |
cname.tbl <- table(mx$country) | |
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl))>0], decreasing=T) | |
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T) | |
cnames <- head(cnames[nchar(cnames)>0], numcountry) | |
# setting a seed to repeat the colors | |
set.seed(2) | |
ccolor <- as.character(rainbow(length(cnames))) | |
names(ccolor) <- cnames | |
# now filter out where country is known. | |
mx <- mx[mx$country %in% cnames,] | |
# add in a frequency counter for later agregation | |
mx$freq <- 1 | |
# set up dimensions of image | |
ht <- 1152 | |
wt <- 1920 | |
# set up label positions, space them evenly | |
cht <- ht/length(cnames) | |
# cpos has the country positions for y value | |
cpos <- seq(cht/2, ht, by=cht) | |
names(cpos) <- cnames | |
hht <- ht/length(host) | |
# hpos has the host positions for y value | |
hpos <- seq(hht/2, ht, by=hht) | |
names(hpos) <- host | |
# need to know how to scale the bar plots on the side | |
max.country <- max(table(mx$country)) | |
max.host <- max(table(mx$host)) | |
# set x position for hosts and countries | |
hostx <- wt-200 | |
cx <- 200 | |
# gap between bars in barplots | |
gap <- 5 | |
################ | |
# okay, this is where we set things up to loop on | |
start <- Sys.time() # timing it. | |
steps <- 70 # how many frame to move a ball across the screen | |
outdf <- data.frame() # data.frame of all balls | |
# data for barplot on the host side | |
hostbar <- data.frame(name=host, count=0, | |
xleft=hostx, ybottom=hpos-(hht/2)+5, | |
xright=hostx, ytop=hpos+(hht/2)-5, row.names=NULL) | |
# data for barplot on the country side | |
countbar <- data.frame(name=cnames, count=0, | |
xleft=cx, ybottom=cpos-(cht/2)+5, | |
xright=cx, ytop=cpos+(cht/2)-5, | |
color=ccolor, row.names=NULL) | |
date.label <- NULL | |
for(image in seq(length(allframes)+steps)) { | |
# image is the frame number we are showing. | |
# set "it" to be the frame ID, or zero if we are done reading in new data | |
it <- ifelse(image <= length(allframes), allframes[image], 0) | |
# test if we have any data to read for this frame | |
if (sum(mx$frame==it)>0) { | |
# update the date label (to be shown at the top) | |
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p") | |
# prep the data | |
# aggregate, per host and country combination | |
tmx <- aggregate(freq ~ host + country, data=mx[mx$frame==it, ], FUN=sum) | |
# foreach host+country combination, create a row in data.frame | |
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) { | |
country <- as.character(tmx$country[i]) | |
host <- as.character(tmx$host[i]) | |
fromy <- cpos[country] | |
toy <- hpos[host] | |
data.frame(fromx=cx+(nchar(country)*11), fromy=fromy, | |
tox=hostx-(nchar(hostname[host])*9), toy=toy+rnorm(1, mean=0, sd=hht/6), | |
curx=cx+(nchar(country)*11), cury=fromy, col=ccolor[country], | |
size=tmx$freq[i], time=1, | |
host=host, country=country, row.names=NULL) | |
})) | |
# fromx, fromy: From x,y coordiates | |
# tox, toy: To x,y coordinates, with slight "rnorm" variation on y-value | |
# curx, cury: Current x,y coordinates | |
# size: the number of packets in that 5 minute window | |
# time: the "step" the ball is in | |
# host, country: saving for barplot counters | |
} else { | |
# else we have no new data, just make empty data.frame | |
newdf <- data.frame() | |
} | |
if(nrow(outdf)>0) { # we have balls in the air | |
# update the current value based on which step the ball is in. | |
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx | |
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy | |
outdf$time <- outdf$time + 1 | |
# rbind the old data with new data | |
if (nrow(newdf)) { | |
outdf <- rbind(outdf, newdf) | |
} | |
} else { # fresh df | |
if (nrow(newdf)) { | |
outdf <- newdf | |
} | |
} | |
# set up plot | |
png(filename=sprintf("balls/base%04d.png", image), width=1920, height=1080) | |
# set small margin in inches | |
par(mai=c(0,0.3,0,0.3)) | |
# open up an empty plot | |
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1,ht+150), | |
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i") | |
offset <- 50 | |
# add country labels | |
text(cx+5, cpos+offset, labels=cnames, cex=2, adj=0) | |
# add host labels | |
text(hostx-5, hpos+offset, labels=hostname, cex=2, adj=1) | |
# add the date labels | |
text(wt/2, ht+offset, labels=date.label, cex=4, adj=c(0.5, 0)) | |
# stick a little URL in the corner | |
text(hostx, 10, labels="http://datadrivensecurity.info", cex=2, col="slateblue", adj=c(0.5,0), font=3) | |
# now include all the points (balls) in the plot | |
with(outdf, points(curx, cury+offset, type="p", pch=16, col=as.character(col), cex=sqrt(size))) | |
# test to see if we should increase the country barplot (look for time==1) | |
if (sum(outdf$time==1)>0) { | |
cbase <- aggregate(size ~ country, data=outdf[outdf$time==1, ], FUN=sum) | |
for(x in seq(nrow(cbase))) { | |
thisone <- which(countbar$name==as.character(cbase$country)[x]) | |
countbar$xleft[thisone] <- countbar$xleft[thisone] - 200*(as.numeric(cbase$size[x])/max.country) | |
} | |
} | |
# test to see if we should increase any host barplots (look for "time" at "steps") | |
if (sum(outdf$time==steps)>0) { | |
hbase <- aggregate(size ~ host, data=outdf[outdf$time==steps, ], FUN=sum) | |
for(x in seq(nrow(hbase))) { | |
thisone <- which(hostbar$name==as.character(hbase$host)[x]) | |
hostbar$xright[thisone] <- hostbar$xright[thisone] + 200*(as.numeric(hbase$size[x])/max.host) | |
} | |
outdf <- outdf[outdf$time<steps, ] | |
} | |
# now add the two bar plots with a "rect" | |
with(countbar, rect(xleft, ybottom+offset, xright, ytop+offset, col=as.character(color))) | |
with(hostbar, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue")) | |
# close off this image | |
dev.off() | |
# include something to watch while this is running... | |
if (image %% 10 == 0) { | |
#print(outdf) | |
cat(image, "of", length(allframes)+steps, "\n") | |
} | |
} | |
end <- Sys.time() | |
print(end-start) | |
# For Reference, 8134 frames took | |
# Time difference of 41.80941 mins | |
# now we create an HD movie (hopefully) with this: | |
# avconv -f image2 -i balls/base%04d.png -r 25 -b 50000000 -s 1920x1080 -an test5.mp4 | |
# though searching for "stop motion" should yield more options for you. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment