Created
June 15, 2014 17:02
-
-
Save jayjacobs/b42ac3661d38f2b83350 to your computer and use it in GitHub Desktop.
Creating a Video on the TCP/UDP ports in Marx data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# which weeks should we look at? | |
whichweek <- c(27, 28, 29, 30) | |
# how many countries to show? | |
numcountry <- 25 # 20 at first | |
# read in marx data | |
marx <- read.csv("marx-geo.csv") # get from dds.ec | |
# convert datetime to POSIX date/time object | |
marx$datetime <- strptime(marx$datetime, format='%Y-%m-%d %H:%M:%S') | |
# drop any weird date formats | |
# and filter just the columns we want | |
marx <- marx[complete.cases(marx[ ,"datetime"]), | |
c("datetime", "proto", "dpt", "country")] | |
# filter out the weeks. | |
week <- factor(format(marx$datetime, "%V")) | |
mx <- marx[week %in% whichweek, ] # mx is subset of marx data | |
# finally, filter for only those we have a port | |
mx <- mx[!is.na(mx$dpt), ] | |
# divide into 5 minute chunks | |
mx$frame <- as.numeric(paste0(format(mx$datetime, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(mx$datetime, "%M"))/5)))) | |
# create the timeline to tick through. | |
allframes.src <- seq(min(mx$datetime), max(mx$datetime), by=300) | |
allframes <- as.numeric(paste0(format(allframes.src, "%m%d%H"), | |
sprintf("%02d", trunc(as.numeric(format(allframes.src, "%M"))/5)))) | |
# allframes now has every possible "frame" we want to show in it. | |
# set up source country names in "cnames" | |
cname.tbl <- table(mx$country) | |
cname.tbl <- sort(cname.tbl[nchar(names(cname.tbl))>0], decreasing=T) | |
cnames <- sort(head(names(cname.tbl), numcountry), decreasing=T) | |
cnames <- head(cnames[nchar(cnames)>0], numcountry) | |
# setting a seed to repeat the colors | |
set.seed(2) | |
ccolor <- as.character(rainbow(length(cnames))) | |
names(ccolor) <- cnames | |
# now filter out where country is known. | |
mx <- mx[mx$country %in% cnames,] | |
# add in a frequency counter for later agregation | |
mx$freq <- 1 | |
# set up dimensions of image | |
ht <- 1152 | |
wt <- 1920 | |
# set up label positions, space them evenly | |
cht <- ht/length(cnames) | |
# cpos has the country positions for y value | |
cpos <- seq(cht/2, ht, by=cht) | |
names(cpos) <- cnames | |
# look at ports | |
# set number of ports to view | |
numport <- 200 | |
tcp <- summary(factor(mx$dpt[mx$proto=="TCP"]), maxsum=numport) | |
tcp.name <- rev(c(sort(as.numeric(head(names(tcp), numport-1))), "Other")) | |
udp <- summary(factor(mx$dpt[mx$proto=="UDP"]), maxsum=numport) | |
udp.name <- rev(c(sort(as.numeric(head(names(udp), numport-1))), "Other")) | |
ht.tcp <- ht/length(tcp.name) | |
ht.udp <- ht/length(udp.name) | |
# hpos has the host positions for y value | |
ypos.tcp <- seq(ht.tcp/2, ht, by=ht.tcp) | |
ypos.udp <- seq(ht.udp/2, ht, by=ht.udp) | |
names(ypos.tcp) <- tcp.name | |
names(ypos.udp) <- udp.name | |
# need to know how to scale the bar plots on the side | |
max.tcp <- max(tcp) | |
max.udp <- max(udp) | |
# set x position for hosts and countries | |
tcpx <- wt-150 | |
udpx <- 150 | |
countryx <- wt/2 | |
# gap between bars in barplots | |
gap <- 1 | |
################ | |
# okay, this is where we set things up to loop on | |
start <- Sys.time() # timing it. | |
steps <- 60 # how many frame to move a ball across the screen | |
outdf <- data.frame() # data.frame of all balls | |
# data for barplot on the host side | |
tcp.box <- data.frame(name=tcp.name, count=0, | |
xleft=tcpx, ybottom=ypos.tcp-(ht.tcp/2)+gap, | |
xright=tcpx, ytop=ypos.tcp+(ht.tcp/2)-gap, row.names=NULL) | |
udp.box <- data.frame(name=udp.name, count=0, | |
xleft=udpx-8, ybottom=ypos.udp-(ht.udp/2)+gap, | |
xright=udpx-8, ytop=ypos.udp+(ht.udp/2)-gap, row.names=NULL) | |
date.label <- NULL | |
tcp.size <- rep(0.5, length(tcp.name)) | |
names(tcp.size) <- tcp.name | |
udp.size <- rep(0.5, length(udp.name)) | |
names(udp.size) <- udp.name | |
# for testing, can cut down to a handful of frames | |
# allframes <- allframes[1:40] | |
for(image in seq(length(allframes)+steps+20)) { | |
# image is the frame number we are showing. | |
# set "it" to be the frame ID, or zero if we are done reading in new data | |
it <- ifelse(image <= length(allframes), allframes[image], 0) | |
# test if we have any data to read for this frame | |
if (sum(mx$frame==it)>0) { | |
# update the date label (to be shown at the top) | |
date.label <- format(min(mx$datetime[mx$frame==it]), "%A, %B %e, %l%p") | |
# prep the data | |
# aggregate, per port and country combination | |
tmx <- aggregate(freq ~ dpt + country + proto, data=mx[mx$frame==it, ], FUN=sum) | |
# foreach host+country combination, create a row in data.frame | |
newdf <- do.call(rbind, lapply(seq(nrow(tmx)), function(i) { | |
country <- as.character(tmx$country[i]) | |
fromy <- cpos[country] | |
if (tmx$proto[i]=="TCP") { | |
dport <- as.character(ifelse(any(names(ypos.tcp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.tcp) %in% dport), ypos.tcp[dport], ypos.tcp["Other"]) | |
tox <- tcpx | |
mult <- 1 | |
fromx <- countryx+(nchar(country)*7*mult) | |
} else { | |
dport <- as.character(ifelse(any(names(ypos.udp) %in% tmx$dpt[i]), tmx$dpt[i], "Other")) | |
toy <- ifelse(any(names(ypos.udp) %in% dport), ypos.udp[dport], ypos.udp["Other"]) | |
tox <- udpx | |
mult <- -1 | |
fromx <- countryx+(nchar(country)*7*mult) | |
} | |
data.frame(fromx=fromx, fromy=fromy, | |
tox=tox - (nchar(dport)*5*mult), | |
toy=toy+rnorm(1, mean=0, sd=ht.tcp/2), | |
curx=fromx, cury=fromy, col=ccolor[country], | |
size=tmx$freq[i], time=1, mult=mult, | |
dport=dport, country=country, row.names=NULL) | |
})) | |
} else { | |
# else we have no new data, just make empty data.frame | |
newdf <- data.frame() | |
} | |
if(nrow(outdf)>0) { # we have balls in the air | |
# update the current value based on which step the ball is in. | |
outdf$curx <- ((outdf$tox - outdf$fromx) * (outdf$time/steps)) + outdf$fromx | |
outdf$cury <- ((outdf$toy - outdf$fromy) * (outdf$time/steps)) + outdf$fromy | |
outdf$time <- outdf$time + 1 | |
# rbind the old data with new data | |
if (nrow(newdf)) { | |
outdf <- rbind(outdf, newdf) | |
} | |
} else { # fresh df | |
if (nrow(newdf)) { | |
outdf <- newdf | |
} | |
} | |
# set up plot | |
png(filename=sprintf("ports/base%04d.png", image), width=wt, height=ht) | |
# set small margin in inches | |
par(mai=c(0,0.2,0,0.2)) | |
# open up an empty plot | |
plot(c(0,0), type="n", col="white", xlim=c(-1, wt), ylim=c(-1,ht+100), | |
yaxt="n", ann=FALSE, xaxt="n", bty="n", xaxs="i", yaxs="i") | |
offset <- 30 | |
# add country labels | |
text(countryx+5, cpos+offset, labels=cnames, cex=2, adj=0.5) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==1] | |
tcp.size <- ifelse(tcp.size>0.5, tcp.size * 0.9, 0.5) | |
tcp.size[tcp.name %in% bigport] <- 2 | |
text(tcpx-5, ypos.tcp+offset, labels=tcp.name, cex=tcp.size, adj=1) | |
# add tcp labels | |
bigport <- outdf$dport[outdf$time==steps & outdf$mult==-1] | |
# udp.size <- sapply(udp.size, function(x) mean(c(x,1))) | |
udp.size <- ifelse(udp.size>0.5, udp.size * 0.9, 0.5) | |
udp.size[udp.name %in% bigport] <- 2 | |
text(udpx-5, ypos.udp+offset, labels=udp.name, cex=udp.size, adj=0) | |
# add the date labels | |
text(wt/2, ht+offset+5, labels=date.label, cex=3, adj=c(0.5, 0)) | |
# add tcp/udp header | |
text(tcpx, ht+offset+5, labels="TCP", cex=2, adj=c(0.5, 0)) | |
text(udpx, ht+offset+5, labels="UDP", cex=2, adj=c(0.5, 0)) | |
# stick a little URL in the corner | |
text(hostx, 8, labels="http://datadrivensecurity.info", cex=2, col="slateblue", adj=c(0.5,0), font=3) | |
# now include all the points (balls) in the plot | |
with(outdf, points(curx, cury+offset, type="p", pch=16, col=as.character(col), cex=sqrt(size))) | |
# test to see if we should increase the country barplot (look for time==1) | |
if (sum(outdf$time==steps)>0) { | |
cbase <- aggregate(size ~ dport + mult, data=outdf[outdf$time==steps, ], FUN=sum) | |
for(x in seq(nrow(cbase))) { | |
if (cbase$mult[x]==1) { | |
thisone <- which(tcp.box$name==as.character(cbase$dport)[x]) | |
tcp.box$xright[thisone] <- tcp.box$xright[thisone] + | |
(150*(as.numeric(cbase$size[x])/max.tcp)*cbase$mult[x]) | |
} else { | |
thisone <- which(udp.box$name==as.character(cbase$dport)[x]) | |
udp.box$xright[thisone] <- udp.box$xright[thisone] + | |
(150*(as.numeric(cbase$size[x])/max.udp)*cbase$mult[x]) | |
} | |
} | |
outdf <- outdf[outdf$time<steps, ] | |
} | |
# now add the two bar plots with a "rect" | |
with(tcp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue")) | |
with(udp.box, rect(xleft, ybottom+offset, xright, ytop+offset, col="steelblue")) | |
# close off this image | |
dev.off() | |
# include something to watch while this is running... | |
if (image %% 10 == 0) { | |
#print(outdf) | |
cat(image, "of", length(allframes)+steps+20, "\n") | |
} | |
# update: maybe want to modify this to use txtProgressBar() | |
} | |
end <- Sys.time() | |
print(end-start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment