Forked from mvarela/binVis2d.r
Created January 20, 2020 06:08
This is the code used in my blog post about binary data visualization. Feel free to use it as you see fit.
# binviz Veles-like binary visualizaiton
binViz2d <- function(filename, alpha = 1/100, maxsize = 5000000,
save = TRUE, polar = FALSE, sample = FALSE,
sample_size = 2000000, do_density = FALSE){
# setting dens_plot as NA simplifies the logic below a bit
dens_plot = NA
# we read the file as a stream of bytes, and prepare our tibble
# We'll add a column indexing the trigram position in the file
# This will come in handy later if we want to facet the plot by position
# as done in the Veles article. We'll just mutate binViz here, to save memory.
rawdata <- readBin(filename, integer(), n=maxsize, size = 1, signed = FALSE)
size <- rawdata %>% as.tibble %>% nrow
binViz <- cbind(0:(size - 1),rawdata, lead(rawdata), lead(rawdata,n=2L))
colnames(binViz) <- c('idx', 'x', 'y', 'z')
# We then remove any missing values from the dataset
toplot <- binViz %>% as.tibble %>% na.omit
# If sampling is required, we do it now. Sampling is important
# if doing the density plots, as going beyond 1M points gets SLOW
toplot <- toplot %>% sample_n(min(count(toplot), sample_size))
# The actual plotting
theplot <- binViz2d_do_plot(toplot, alpha, polar) +
ggtitle(title_spec(filename, sample, sample_size))
dens_plot <- binViz2d_do_density_plot(toplot, polar)
# Saving the plots
namespec <- name_spec(filename, sample, sample_size, polar)
binViz2d_save(namespec, theplot, dens_plot)
return(list(binViz_plot = theplot, dens_plot = dens_plot))
binViz2d_do_plot <- function(data, alpha, polar){
theplot <- data %>% ggplot(mapping = aes(x,y)) +
geom_point(mapping = aes(color=z), alpha = alpha, size = 0.75) +
scale_color_gradient(low="blue", high="orange") +
coord_fixed(ratio = 1)+
labs(x="i", y="i+1", z="i+2")
theplot <- theplot + coord_polar()
binViz2d_do_density_plot <- function(toplot, polar){
dens_plot <- toplot %>% ggplot(mapping = aes(x,y)) +
stat_density2d(aes(fill = ..density..), geom="raster", contour = FALSE) +
scale_fill_gradient(low="steelblue4", high="sienna2") +
coord_fixed(ratio = 1)+
labs(x="i", y="i+1")
title_spec <- function(name, sampled, nsamples){
title <- paste(name, "-", nsamples, "samples.")
title <- name
# We create a name separated by underscores, this simplifies later parsing
# of file names, if needed, to automate e.g., reports creation
name_spec <- function(name, sampled, nsamples, polar){
polar_str <- ""
polar_str <- "polar"
sampled_str <- ""
sampled_str <- paste("sampled", nsamples, sep="_")
basename <- chartr('/.', '::',
paste("plot", polar_str, sampled_str, name, sep = "_"))
return(paste(basename, ".png", sep=""))
binViz2d_save <- function(namespec, binViz_plot, dens_plot){
png(namespec, width = 15, height = 15, units = "cm", res = 300)
png(paste("density",namespec,sep="_"), width = 15, height = 15,
units = "cm", res = 300)
