Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 14:08
Show Gist options
  • Select an option

  • Save ashenfad/e455be22156aa04da197 to your computer and use it in GitHub Desktop.

Select an option

Save ashenfad/e455be22156aa04da197 to your computer and use it in GitHub Desktop.
Wine Density Histogram

A visualization of wine densities using distributions from two separate clusters (kmeans). The distributions are stored with a streaming histogram.

  • Brush to zoom.
  • Click to zoom out.
  • t to toggle trimming some of the outliers from the distribution.
  • r to toggle rounding populations for each bin.
  • i to toggle the distribution interpolation mode.
[
{
"id" : "000007",
"preferred" : true,
"summary" : {
"mean" : 0.99725,
"sum_squares" : 1621.05778,
"bins" : [ [ 0.99018, 1 ], [ 0.99184, 1 ], [ 0.9922, 1 ], [ 0.9924, 1 ], [ 0.99278, 1 ], [ 0.99308, 2 ], [ 0.99314, 1 ], [ 0.99319, 4 ], [ 0.99332, 1 ], [ 0.9934, 3 ], [ 0.99359, 2 ], [ 0.99363, 4 ], [ 0.9937, 7 ], [ 0.9938, 1 ], [ 0.99392, 6 ], [ 0.99398, 4 ], [ 0.9941, 5 ], [ 0.9942, 3 ], [ 0.99435, 4 ], [ 0.9944, 6 ], [ 0.99452, 10 ], [ 0.9946, 8 ], [ 0.99465, 2 ], [ 0.99473, 5 ], [ 0.9948, 16 ], [ 0.99486, 2 ], [ 0.9949, 11 ], [ 0.99494, 5 ], [ 0.995, 14 ], [ 0.99505, 3 ], [ 0.99512, 17 ], [ 0.99516, 1 ], [ 0.9952, 25 ], [ 0.99529, 19 ], [ 0.99536, 10 ], [ 0.99541, 33 ], [ 0.99545, 13 ], [ 0.9955, 35 ], [ 0.99555, 8 ], [ 0.9956, 35 ], [ 0.99566, 14 ], [ 0.99571, 18 ], [ 0.9958, 42 ], [ 0.99588, 36 ], [ 0.99595, 7 ], [ 0.99602, 22 ], [ 0.9961, 19 ], [ 0.99616, 1 ], [ 0.99621, 28 ], [ 0.99629, 23 ], [ 0.99636, 2 ], [ 0.99641, 27 ], [ 0.9965, 15 ], [ 0.99655, 8 ], [ 0.9966, 37 ], [ 0.99668, 24 ], [ 0.99679, 35 ], [ 0.99685, 8 ], [ 0.99691, 31 ], [ 0.997, 24 ], [ 0.99705, 13 ], [ 0.9971, 17 ], [ 0.99714, 4 ], [ 0.9972, 35 ], [ 0.99724, 1 ], [ 0.99729, 20 ], [ 0.9974, 40 ], [ 0.99745, 2 ], [ 0.9975, 23 ], [ 0.99755, 11 ], [ 0.9976, 35 ], [ 0.99767, 2 ], [ 0.99771, 28 ], [ 0.9978, 33 ], [ 0.99785, 13 ], [ 0.99791, 40 ], [ 0.998, 41 ], [ 0.99808, 34 ], [ 0.99815, 6 ], [ 0.9982, 31 ], [ 0.99825, 8 ], [ 0.99829, 27 ], [ 0.99835, 13 ], [ 0.9984, 33 ], [ 0.99845, 1 ], [ 0.9985, 9 ], [ 0.99855, 9 ], [ 0.99861, 53 ], [ 0.9987, 14 ], [ 0.99882, 19 ], [ 0.99889, 9 ], [ 0.99899, 20 ], [ 0.99908, 26 ], [ 0.99916, 2 ], [ 0.99922, 15 ], [ 0.9993, 5 ], [ 0.99936, 2 ], [ 0.99941, 15 ], [ 0.99947, 15 ], [ 0.99955, 6 ], [ 0.9996, 8 ], [ 0.99966, 2 ], [ 0.9997, 8 ], [ 0.99976, 9 ], [ 0.9998, 17 ], [ 0.99985, 1 ], [ 0.9999, 9 ], [ 1, 19 ], [ 1.00011, 16 ], [ 1.0002, 8 ], [ 1.0003, 3 ], [ 1.00039, 13 ], [ 1.00044, 2 ], [ 1.0005, 4 ], [ 1.00055, 1 ], [ 1.0006, 4 ], [ 1.0007, 1 ], [ 1.0008, 3 ], [ 1.001, 6 ], [ 1.0011, 2 ], [ 1.00119, 2 ], [ 1.0017, 2 ], [ 1.00182, 1 ], [ 1.00196, 1 ], [ 1.0024, 2 ], [ 1.00295, 2 ], [ 1.0103, 2 ], [ 1.03898, 1 ] ],
"maximum" : 1.03898,
"missing_count" : 0,
"variance" : 0,
"median" : 0.99722,
"population" : 1630,
"minimum" : 0.99018,
"standard_deviation" : 0.00202,
"sum" : 1625.51941
},
"datatype" : "double",
"order" : 7,
"optype" : "numeric",
"name" : "density",
"column_number" : 7
}, {
"id" : "000007",
"preferred" : true,
"summary" : {
"mean" : 0.99322,
"sum_squares" : 1792.45765,
"bins" : [ [ 0.98914, 2 ], [ 0.98928, 2 ], [ 0.98944, 2 ], [ 0.9895, 2 ], [ 0.98959, 1 ], [ 0.98969, 2 ], [ 0.98976, 2 ], [ 0.98985, 3 ], [ 0.98992, 2 ], [ 0.99, 9 ], [ 0.9901, 4 ], [ 0.99016, 1 ], [ 0.9902, 8 ], [ 0.99028, 4 ], [ 0.99035, 1 ], [ 0.99039, 6 ], [ 0.99045, 15 ], [ 0.9905, 8 ], [ 0.99059, 16 ], [ 0.99069, 10 ], [ 0.99076, 4 ], [ 0.99082, 14 ], [ 0.99089, 6 ], [ 0.99094, 12 ], [ 0.991, 17 ], [ 0.99105, 6 ], [ 0.99111, 21 ], [ 0.99119, 14 ], [ 0.99125, 10 ], [ 0.9913, 16 ], [ 0.99135, 7 ], [ 0.9914, 28 ], [ 0.99146, 2 ], [ 0.99152, 20 ], [ 0.9916, 17 ], [ 0.99165, 14 ], [ 0.9917, 21 ], [ 0.99175, 15 ], [ 0.9918, 28 ], [ 0.99185, 10 ], [ 0.9919, 10 ], [ 0.99195, 10 ], [ 0.992, 50 ], [ 0.99208, 21 ], [ 0.99215, 13 ], [ 0.9922, 14 ], [ 0.99225, 10 ], [ 0.99231, 21 ], [ 0.99239, 39 ], [ 0.99244, 17 ], [ 0.9925, 19 ], [ 0.99255, 12 ], [ 0.9926, 21 ], [ 0.99266, 4 ], [ 0.9927, 45 ], [ 0.9928, 63 ], [ 0.99284, 9 ], [ 0.9929, 23 ], [ 0.99299, 55 ], [ 0.99305, 18 ], [ 0.9931, 24 ], [ 0.99315, 12 ], [ 0.9932, 41 ], [ 0.99325, 10 ], [ 0.9933, 17 ], [ 0.99335, 10 ], [ 0.9934, 43 ], [ 0.99346, 13 ], [ 0.99351, 22 ], [ 0.9936, 39 ], [ 0.99369, 32 ], [ 0.99374, 8 ], [ 0.9938, 54 ], [ 0.9939, 35 ], [ 0.99394, 1 ], [ 0.994, 44 ], [ 0.99406, 13 ], [ 0.99411, 27 ], [ 0.9942, 37 ], [ 0.99425, 7 ], [ 0.9943, 21 ], [ 0.99434, 5 ], [ 0.9944, 47 ], [ 0.99445, 8 ], [ 0.9945, 19 ], [ 0.99455, 10 ], [ 0.9946, 28 ], [ 0.9947, 16 ], [ 0.99474, 9 ], [ 0.9948, 34 ], [ 0.9949, 14 ], [ 0.99494, 1 ], [ 0.99499, 21 ], [ 0.9951, 23 ], [ 0.99519, 23 ], [ 0.99526, 2 ], [ 0.9953, 21 ], [ 0.99535, 8 ], [ 0.9954, 21 ], [ 0.99545, 4 ], [ 0.9955, 5 ], [ 0.99556, 3 ], [ 0.9956, 16 ], [ 0.99565, 4 ], [ 0.9957, 7 ], [ 0.99576, 6 ], [ 0.9958, 15 ], [ 0.9959, 8 ], [ 0.99596, 2 ], [ 0.996, 11 ], [ 0.99606, 1 ], [ 0.99611, 3 ], [ 0.9962, 11 ], [ 0.99629, 10 ], [ 0.99634, 1 ], [ 0.99645, 1 ], [ 0.99651, 7 ], [ 0.99659, 10 ], [ 0.99665, 2 ], [ 0.9967, 2 ], [ 0.9968, 1 ], [ 0.997, 4 ], [ 0.9972, 2 ], [ 0.99724, 1 ], [ 0.9974, 1 ], [ 0.99754, 1 ], [ 0.9976, 1 ], [ 0.998, 1 ] ],
"maximum" : 0.998,
"missing_count" : 0,
"variance" : 0,
"median" : 0.99319,
"population" : 1817,
"minimum" : 0.98914,
"standard_deviation" : 0.00155,
"sum" : 1804.68491
},
"datatype" : "double",
"order" : 7,
"optype" : "numeric",
"name" : "density",
"column_number" : 7
}
]
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.bar0 {
fill: #5AC;
opacity: 0.6;
}
.bar1 {
fill: #C5A;
opacity: 0.6;
}
.axis {
font: 12px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.brush .extent {
stroke: #fff;
fill-opacity: .125;
}
.fname {
font: 12px sans-serif;
font-weight: bold;
}
.modetext {
font: 12px sans-serif;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
/* Given the raw bins (the distribution) and a point, estimate
* the total population up to that point.
*/
function sum(rawBins, value, trapInterpolateMode) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
var bin = rawBins[i];
if ((bin.exact && value >= bin.mean) || value >= bin.next) {
total += bin.pop;
} else if (!bin.exact && value > bin.prev && value <= bin.mean) {
var range = (bin.mean - value) / (bin.mean - bin.prev);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += area * bin.pop / 2;
} else if (value > bin.mean && value <= bin.next) {
var range = (value - bin.mean) / (bin.next - bin.mean);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += bin.pop / 2 + (1 - area) * bin.pop / 2;
}
}
return total;
}
/* Loads the distribution into a convenient format. */
function loadRawBins(data) {
var bins = [];
if ("bins" in data.summary) {
var sbins = data.summary.bins;
minimum = data.summary.minimum;
maximum = data.summary.maximum;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1]};
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
bin.exact = true;
} else {
bin.exact = false;
if (i > 0) {
bin.prev = sbins[i-1][0];
} else {
bin.prev = minimum;
}
if (i < sbins.length - 1) {
bin.next = sbins[i+1][0];
} else {
bin.next = maximum;
}
}
bins[i] = bin;
}
} else {
sbins = data.summary.counts;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
bins[i] = bin;
}
}
return bins;
}
/* Build equidistant bins for visualization given the
* raw bins and a range.
*/
function buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode) {
var vizBins = [];
var maxPop = 0;
var inc = (range[1] - range[0]) / vizBinCount;
// Using a slight shift to act like a closed interval
var shift = 0.0000001;
var prev = range[0] - shift;
var prevPop = sum(rawBins, prev, trapInterpolateMode);
for (var i = 0; i < vizBinCount; i++) {
var next = prev + inc;
var nextPop = sum(rawBins, next, trapInterpolateMode);
var pop = nextPop - prevPop;
if (roundMode) {
pop = Math.round(pop);
}
maxPop = Math.max(maxPop, pop);
vizBins[i] = {range: [prev + shift, next], pop: pop};
prev = next;
prevPop = nextPop;
}
return {bins: vizBins, maxPop: maxPop, range: range};
}
function capitalize(term) {
return term.charAt(0).toUpperCase() + term.slice(1);
}
function trimRange(fullRange, rawBins, buffer, segments) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
total += rawBins[i].pop;
}
var interval = (fullRange[1] - fullRange[0]) / segments;
var start = fullRange[0];
var startThreshold = total * buffer;
for (var i = 0; i < segments; i++) {
if (sum(rawBins, start, false) > startThreshold) break;
start += interval;
}
if (start > fullRange[0]) start -= interval;
var end = fullRange[1];
var endThreshold = total - (total * buffer);
for (var i = 0; i < segments; i++) {
if (sum(rawBins, end, false) < endThreshold) break;
end -= interval;
}
if (end < fullRange[1]) end += interval;
return [start, end];
}
/* Add a small buffer to either side of the requested range */
function bufferRange(range, vizBinCount) {
var buffer = 0.5 * (range[1] - range[0]) / vizBinCount;
return [range[0] - buffer, range[1] + buffer];
}
var margin = {top: 30, right: 40, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json("density.json", function(error, data) {
var rawBins = [];
rawBins[0] = loadRawBins(data[0]);
rawBins[1] = loadRawBins(data[1]);
var x, y;
var xAxis, yAxis;
var xAxisG, yAxisG;
var vizBinCount = 32;
// Remove to default to the entire range rather than trimming 0.75% from the edges
var trimRange0 = trimRange([data[0].summary.minimum, data[0].summary.maximum], rawBins[0], 0.01, vizBinCount);
var trimRange1 = trimRange([data[1].summary.minimum, data[1].summary.maximum], rawBins[1], 0.01, vizBinCount);
var trimmedInitRange = [Math.min(trimRange0[0], trimRange1[0]), Math.max(trimRange0[1], trimRange1[1])];
trimmedInitRange = bufferRange(trimmedInitRange, vizBinCount);
var initRange = [Math.min(data[0].summary.minimum, data[1].summary.minimum), Math.max(data[0].summary.maximum, data[1].summary.maximum)];
initRange = bufferRange(initRange, vizBinCount);
// By default trim the edges of the distribution
var trimMode = true;
var resetRange = trimmedInitRange;
var roundMode = false;
var trapInterpolateMode = false;
var viz0 = buildViz(rawBins[0], resetRange, vizBinCount, roundMode, trapInterpolateMode);
var viz1 = buildViz(rawBins[1], resetRange, vizBinCount, roundMode, trapInterpolateMode);
init();
function init() {
x = d3.scale.linear()
.domain(viz0.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, Math.max(viz0.maxPop, viz1.maxPop)])
.range([height, 0]);
svg.selectAll(".bar0")
.data(viz0.bins)
.enter().append("rect")
.attr("class", "bar0")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
svg.selectAll(".bar1")
.data(viz1.bins)
.enter().append("rect")
.attr("class", "bar1")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
xAxisG.append("text")
.attr("class", "fname")
.attr("y", -16)
.attr("x", width)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text(capitalize(data[0].name));
yAxisG = svg.append("g")
.attr("class", "y axis")
.call(yAxis);
yAxisG.append("text")
.attr("class", "fname")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Population");
var trimText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 10)
.style("text-anchor", "start")
.text("(T)rimming: On");
var roundText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 25)
.style("text-anchor", "start")
.text("(R)ounding: Off");
var interpolateText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 40)
.style("text-anchor", "start")
.text("(I)nterpolation: Linear");
d3.select("body")
.on("keydown", function(d) {
if (d3.event.keyCode == 84) {
// t = 84
trimMode = !trimMode;
if (trimMode) {
resetRange = trimmedInitRange;
trimText.text("(T)rimming: On");
} else {
resetRange = initRange;
trimText.text("(T)rimming: Off");
}
update(rawBins, resetRange, vizBinCount);
} else if (d3.event.keyCode == 82) {
// r = 82
roundMode = !roundMode;
if (roundMode) {
roundText.text("(R)ounding: On");
} else {
roundText.text("(R)ounding: Off");
}
update(rawBins, viz0.range, vizBinCount);
} else if (d3.event.keyCode == 73) {
// i = 73
trapInterpolateMode = !trapInterpolateMode;
if (trapInterpolateMode) {
interpolateText.text("(I)nterpolation: Trapezoidal");
} else {
interpolateText.text("(I)nterpolation: Linear");
}
update(rawBins, viz0.range, vizBinCount);
}
});
}
function update(rawBins, range, vizBinCount) {
var animationDuration = 700;
viz0 = buildViz(rawBins[0], range, vizBinCount, roundMode, trapInterpolateMode);
viz1 = buildViz(rawBins[1], range, vizBinCount, roundMode, trapInterpolateMode);
x = d3.scale.linear()
.domain(viz0.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, Math.max(viz0.maxPop, viz1.maxPop)])
.range([height, 0], .1);
svg.selectAll(".bar0")
.data(viz0.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
svg.selectAll(".bar1")
.data(viz1.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG.transition().duration(animationDuration).call(xAxis);
yAxisG.transition().duration(animationDuration).call(yAxis);
}
var brushX = d3.scale.identity().domain([0, width]);
var brushExtent;
var brush = d3.svg.brush()
.x(brushX)
.on("brush", brushed)
.on("brushend", brushended);
var gBrush = svg.append("g")
.attr("class", "brush")
.call(brush)
.call(brush.event);
gBrush.selectAll("rect")
.attr("height", height);
function brushed() {
brushExtent = brush.extent();
}
function brushended() {
if (!d3.event.sourceEvent) return; // only transition after input
var start = x.invert(brushExtent[0]);
var end = x.invert(brushExtent[1]);
d3.select(this).call(brush.extent([[0], [0]]));
if (start == end) {
update(rawBins, resetRange, vizBinCount);
} else {
update(rawBins, bufferRange([start, end], vizBinCount), vizBinCount);
}
}
});
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment