Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active October 19, 2016 20:51
Show Gist options
  • Save ashenfad/e3e854ed71a960e2b42d5dca2ad774b8 to your computer and use it in GitHub Desktop.
Save ashenfad/e3e854ed71a960e2b42d5dca2ad774b8 to your computer and use it in GitHub Desktop.
Example Approximate Histogram Viz

A visualization of a distribution encoded by a streaming approximate histogram.

  • Brush to zoom.
  • Click to zoom out.
  • t to toggle trimming some of the outliers from the distribution.
  • r to toggle rounding populations for each bin.
  • i to toggle the distribution interpolation mode.
{
"optype" : "numeric",
"name" : "Weights_P1-P1",
"column_number" : 73,
"order" : 73,
"datatype" : "double",
"preferred" : true,
"summary" : {
"skewness" : 1.04797,
"mean" : 1.73303,
"sum_squares" : 10802.56495,
"bins" : [ [ 1.04013, 165 ], [ 1.09639, 112 ], [ 1.14911, 140 ], [ 1.20628, 157 ], [ 1.27204, 161 ], [ 1.3458, 282 ], [ 1.42276, 200 ], [ 1.48724, 219 ], [ 1.56119, 242 ], [ 1.62585, 198 ], [ 1.69384, 205 ], [ 1.7698, 195 ], [ 1.83393, 100 ], [ 1.90247, 72 ], [ 1.97306, 59 ], [ 2.03773, 37 ], [ 2.09373, 38 ], [ 2.15973, 31 ], [ 2.21971, 37 ], [ 2.27346, 31 ], [ 2.3372, 43 ], [ 2.38917, 25 ], [ 2.4299, 17 ], [ 2.49009, 41 ], [ 2.56516, 25 ], [ 2.61241, 4 ], [ 2.65814, 29 ], [ 2.7496, 36 ], [ 2.83189, 20 ], [ 2.89787, 24 ], [ 2.94131, 7 ], [ 2.99909, 294 ] ],
"maximum" : 3,
"missing_count" : 0,
"variance" : 0.32467,
"median" : 1.58251,
"exact_histogram" : {
"start" : 1,
"width" : 0.1,
"populations" : [ 237, 243, 249, 307, 327, 320, 319, 257, 153, 89, 66, 52, 60, 64, 52, 36, 33, 36, 34, 32, 280 ]
},
"population" : 3246,
"minimum" : 1,
"standard_deviation" : 0.5698,
"kurtosis" : 0.08678,
"sum" : 5625.41125
},
"id" : "000049"
}
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.bar {
fill: #5AC;
opacity: 0.6;
}
.axis {
font: 12px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.brush .extent {
stroke: #fff;
fill-opacity: .125;
shape-rendering: crispEdges;
}
.fname {
font: 12px sans-serif;
font-weight: bold;
}
.modetext {
font: 12px sans-serif;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
/* Given the raw bins (the distribution) and a point, estimate
* the total population up to that point.
*/
function sum(rawBins, value, trapInterpolateMode) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
var bin = rawBins[i];
if ((bin.exact && value >= bin.mean) || value >= bin.next) {
total += bin.pop;
} else if (!bin.exact && value > bin.prev && value <= bin.mean) {
var range = (bin.mean - value) / (bin.mean - bin.prev);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += area * bin.pop / 2;
} else if (value > bin.mean && value <= bin.next) {
var range = (value - bin.mean) / (bin.next - bin.mean);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += bin.pop / 2 + (1 - area) * bin.pop / 2;
}
}
return total;
}
/* Loads the distribution into a convenient format. */
function loadRawBins(data) {
var bins = [];
if ("bins" in data.summary) {
var sbins = data.summary.bins;
minimum = data.summary.minimum;
maximum = data.summary.maximum;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1]};
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
bin.exact = true;
} else {
bin.exact = false;
if (i > 0) {
bin.prev = sbins[i-1][0];
} else {
bin.prev = minimum;
}
if (i < sbins.length - 1) {
bin.next = sbins[i+1][0];
} else {
bin.next = maximum;
}
}
bins[i] = bin;
}
} else {
sbins = data.summary.counts;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
bins[i] = bin;
}
}
return bins;
}
/* Build equidistant bins for visualization given the
* raw bins and a range.
*/
function buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode) {
var vizBins = [];
var maxPop = 0;
var inc = (range[1] - range[0]) / vizBinCount;
// Using a slight shift to act like a closed interval
var shift = 0.0000001;
var prev = range[0] - shift;
var prevPop = sum(rawBins, prev, trapInterpolateMode);
for (var i = 0; i < vizBinCount; i++) {
var next = prev + inc;
var nextPop = sum(rawBins, next, trapInterpolateMode);
var pop = nextPop - prevPop;
if (roundMode) {
pop = Math.round(pop);
}
maxPop = Math.max(maxPop, pop);
vizBins[i] = {range: [prev + shift, next], pop: pop};
prev = next;
prevPop = nextPop;
}
return {bins: vizBins, maxPop: maxPop, range: range};
}
function capitalize(term) {
return term.charAt(0).toUpperCase() + term.slice(1);
}
function trimRange(fullRange, rawBins, buffer, segments) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
total += rawBins[i].pop;
}
var interval = (fullRange[1] - fullRange[0]) / segments;
var start = fullRange[0];
var startThreshold = total * buffer;
for (var i = 0; i < segments; i++) {
if (sum(rawBins, start, false) > startThreshold) break;
start += interval;
}
if (start > fullRange[0]) start -= interval;
var end = fullRange[1];
var endThreshold = total - (total * buffer);
for (var i = 0; i < segments; i++) {
if (sum(rawBins, end, false) < endThreshold) break;
end -= interval;
}
if (end < fullRange[1]) end += interval;
return [start, end];
}
/* Add a small buffer to either side of the requested range */
function bufferRange(range, vizBinCount) {
var buffer = 0.5 * (range[1] - range[0]) / vizBinCount;
return [range[0] - buffer, range[1] + buffer];
}
var margin = {top: 30, right: 40, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json("hist.json", function(error, data) {
var rawBins = loadRawBins(data);
var x, y;
var xAxis, yAxis;
var xAxisG, yAxisG;
var vizBinCount = 32;
var baseRange = [data.summary.minimum, data.summary.maximum];
var initRange = bufferRange(baseRange, vizBinCount);
var trimmedInitRange = trimRange(baseRange, rawBins, 0.01, vizBinCount);
trimmedInitRange = bufferRange(trimmedInitRange, vizBinCount);
// By default trim the edges of the distribution
var trimMode = true;
var resetRange = trimmedInitRange;
var roundMode = false;
var trapInterpolateMode = false;
var viz = buildViz(rawBins, resetRange, vizBinCount, roundMode, trapInterpolateMode);
init();
function init() {
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0]);
svg.selectAll(".bar")
.data(viz.bins)
.enter().append("rect")
.attr("class", "bar")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
xAxisG.append("text")
.attr("class", "fname")
.attr("y", -16)
.attr("x", width)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text(capitalize(data.name));
yAxisG = svg.append("g")
.attr("class", "y axis")
.call(yAxis);
yAxisG.append("text")
.attr("class", "fname")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Population");
var trimText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 10)
.style("text-anchor", "start")
.text("(T)rimming: On");
var roundText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 25)
.style("text-anchor", "start")
.text("(R)ounding: Off");
var interpolateText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 40)
.style("text-anchor", "start")
.text("(I)nterpolation: Linear");
d3.select("body")
.on("keydown", function(d) {
if (d3.event.keyCode == 84) {
// t = 84
trimMode = !trimMode;
if (trimMode) {
resetRange = trimmedInitRange;
trimText.text("(T)rimming: On");
} else {
resetRange = initRange;
trimText.text("(T)rimming: Off");
}
update(rawBins, resetRange, vizBinCount);
} else if (d3.event.keyCode == 82) {
// r = 82
roundMode = !roundMode;
if (roundMode) {
roundText.text("(R)ounding: On");
} else {
roundText.text("(R)ounding: Off");
}
update(rawBins, viz.range, vizBinCount);
} else if (d3.event.keyCode == 73) {
// i = 73
trapInterpolateMode = !trapInterpolateMode;
if (trapInterpolateMode) {
interpolateText.text("(I)nterpolation: Trapezoidal");
} else {
interpolateText.text("(I)nterpolation: Linear");
}
update(rawBins, viz.range, vizBinCount);
}
});
}
function update(rawBins, range, vizBinCount) {
var animationDuration = 700;
viz = buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode);
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0], .1);
svg.selectAll(".bar")
.data(viz.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG.transition().duration(animationDuration).call(xAxis);
yAxisG.transition().duration(animationDuration).call(yAxis);
}
var brushX = d3.scale.identity().domain([0, width]);
var brushExtent;
var brush = d3.svg.brush()
.x(brushX)
.on("brush", brushed)
.on("brushend", brushended);
var gBrush = svg.append("g")
.attr("class", "brush")
.call(brush)
.call(brush.event);
gBrush.selectAll("rect")
.attr("height", height);
function brushed() {
brushExtent = brush.extent();
}
function brushended() {
if (!d3.event.sourceEvent) return; // only transition after input
var start = x.invert(brushExtent[0]);
var end = x.invert(brushExtent[1]);
d3.select(this).call(brush.extent([[0], [0]]));
if (start == end) {
update(rawBins, resetRange, vizBinCount);
} else {
update(rawBins, bufferRange([start, end], vizBinCount), vizBinCount);
}
}
});
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment