Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 14:08
Show Gist options
  • Select an option

  • Save ashenfad/55f283cd628b6775a331 to your computer and use it in GitHub Desktop.

Select an option

Save ashenfad/55f283cd628b6775a331 to your computer and use it in GitHub Desktop.
Wine Sulphate Histogram

A visualization of sulphates in wine using distributions from two separate clusters (kmeans). The distributions are stored with a streaming histogram.

  • Brush to zoom.
  • Click to zoom out.
  • t to toggle trimming some of the outliers from the distribution.
  • r to toggle rounding populations for each bin.
  • i to toggle the distribution interpolation mode.
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.bar0 {
fill: #5AC;
opacity: 0.6;
}
.bar1 {
fill: #C5A;
opacity: 0.6;
}
.axis {
font: 12px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.brush .extent {
stroke: #fff;
fill-opacity: .125;
}
.fname {
font: 12px sans-serif;
font-weight: bold;
}
.modetext {
font: 12px sans-serif;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
/* Given the raw bins (the distribution) and a point, estimate
* the total population up to that point.
*/
function sum(rawBins, value, trapInterpolateMode) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
var bin = rawBins[i];
if ((bin.exact && value >= bin.mean) || value >= bin.next) {
total += bin.pop;
} else if (!bin.exact && value > bin.prev && value <= bin.mean) {
var range = (bin.mean - value) / (bin.mean - bin.prev);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += area * bin.pop / 2;
} else if (value > bin.mean && value <= bin.next) {
var range = (value - bin.mean) / (bin.next - bin.mean);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += bin.pop / 2 + (1 - area) * bin.pop / 2;
}
}
return total;
}
/* Loads the distribution into a convenient format. */
function loadRawBins(data) {
var bins = [];
if ("bins" in data.summary) {
var sbins = data.summary.bins;
minimum = data.summary.minimum;
maximum = data.summary.maximum;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1]};
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
bin.exact = true;
} else {
bin.exact = false;
if (i > 0) {
bin.prev = sbins[i-1][0];
} else {
bin.prev = minimum;
}
if (i < sbins.length - 1) {
bin.next = sbins[i+1][0];
} else {
bin.next = maximum;
}
}
bins[i] = bin;
}
} else {
sbins = data.summary.counts;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
bins[i] = bin;
}
}
return bins;
}
/* Build equidistant bins for visualization given the
* raw bins and a range.
*/
function buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode) {
var vizBins = [];
var maxPop = 0;
var inc = (range[1] - range[0]) / vizBinCount;
// Using a slight shift to act like a closed interval
var shift = 0.0000001;
var prev = range[0] - shift;
var prevPop = sum(rawBins, prev, trapInterpolateMode);
for (var i = 0; i < vizBinCount; i++) {
var next = prev + inc;
var nextPop = sum(rawBins, next, trapInterpolateMode);
var pop = nextPop - prevPop;
if (roundMode) {
pop = Math.round(pop);
}
maxPop = Math.max(maxPop, pop);
vizBins[i] = {range: [prev + shift, next], pop: pop};
prev = next;
prevPop = nextPop;
}
return {bins: vizBins, maxPop: maxPop, range: range};
}
function capitalize(term) {
return term.charAt(0).toUpperCase() + term.slice(1);
}
function trimRange(fullRange, rawBins, buffer, segments) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
total += rawBins[i].pop;
}
var interval = (fullRange[1] - fullRange[0]) / segments;
var start = fullRange[0];
var startThreshold = total * buffer;
for (var i = 0; i < segments; i++) {
if (sum(rawBins, start, false) > startThreshold) break;
start += interval;
}
if (start > fullRange[0]) start -= interval;
var end = fullRange[1];
var endThreshold = total - (total * buffer);
for (var i = 0; i < segments; i++) {
if (sum(rawBins, end, false) < endThreshold) break;
end -= interval;
}
if (end < fullRange[1]) end += interval;
return [start, end];
}
/* Add a small buffer to either side of the requested range */
function bufferRange(range, vizBinCount) {
var buffer = 0.5 * (range[1] - range[0]) / vizBinCount;
return [range[0] - buffer, range[1] + buffer];
}
var margin = {top: 30, right: 40, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json("sulphates.json", function(error, data) {
var rawBins = [];
rawBins[0] = loadRawBins(data[0]);
rawBins[1] = loadRawBins(data[1]);
var x, y;
var xAxis, yAxis;
var xAxisG, yAxisG;
var vizBinCount = 32;
// Remove to default to the entire range rather than trimming 0.75% from the edges
var trimRange0 = trimRange([data[0].summary.minimum, data[0].summary.maximum], rawBins[0], 0.01, vizBinCount);
var trimRange1 = trimRange([data[1].summary.minimum, data[1].summary.maximum], rawBins[1], 0.01, vizBinCount);
var trimmedInitRange = [Math.min(trimRange0[0], trimRange1[0]), Math.max(trimRange0[1], trimRange1[1])];
trimmedInitRange = bufferRange(trimmedInitRange, vizBinCount);
var initRange = [Math.min(data[0].summary.minimum, data[1].summary.minimum), Math.max(data[0].summary.maximum, data[1].summary.maximum)];
initRange = bufferRange(initRange, vizBinCount);
// By default trim the edges of the distribution
var trimMode = true;
var resetRange = trimmedInitRange;
var roundMode = false;
var trapInterpolateMode = false;
var viz0 = buildViz(rawBins[0], resetRange, vizBinCount, roundMode, trapInterpolateMode);
var viz1 = buildViz(rawBins[1], resetRange, vizBinCount, roundMode, trapInterpolateMode);
init();
function init() {
x = d3.scale.linear()
.domain(viz0.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, Math.max(viz0.maxPop, viz1.maxPop)])
.range([height, 0]);
svg.selectAll(".bar0")
.data(viz0.bins)
.enter().append("rect")
.attr("class", "bar0")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
svg.selectAll(".bar1")
.data(viz1.bins)
.enter().append("rect")
.attr("class", "bar1")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
xAxisG.append("text")
.attr("class", "fname")
.attr("y", -16)
.attr("x", width)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text(capitalize(data[0].name));
yAxisG = svg.append("g")
.attr("class", "y axis")
.call(yAxis);
yAxisG.append("text")
.attr("class", "fname")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Population");
var trimText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 10)
.style("text-anchor", "start")
.text("(T)rimming: On");
var roundText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 25)
.style("text-anchor", "start")
.text("(R)ounding: Off");
var interpolateText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 40)
.style("text-anchor", "start")
.text("(I)nterpolation: Linear");
d3.select("body")
.on("keydown", function(d) {
if (d3.event.keyCode == 84) {
// t = 84
trimMode = !trimMode;
if (trimMode) {
resetRange = trimmedInitRange;
trimText.text("(T)rimming: On");
} else {
resetRange = initRange;
trimText.text("(T)rimming: Off");
}
update(rawBins, resetRange, vizBinCount);
} else if (d3.event.keyCode == 82) {
// r = 82
roundMode = !roundMode;
if (roundMode) {
roundText.text("(R)ounding: On");
} else {
roundText.text("(R)ounding: Off");
}
update(rawBins, viz0.range, vizBinCount);
} else if (d3.event.keyCode == 73) {
// i = 73
trapInterpolateMode = !trapInterpolateMode;
if (trapInterpolateMode) {
interpolateText.text("(I)nterpolation: Trapezoidal");
} else {
interpolateText.text("(I)nterpolation: Linear");
}
update(rawBins, viz0.range, vizBinCount);
}
});
}
function update(rawBins, range, vizBinCount) {
var animationDuration = 700;
viz0 = buildViz(rawBins[0], range, vizBinCount, roundMode, trapInterpolateMode);
viz1 = buildViz(rawBins[1], range, vizBinCount, roundMode, trapInterpolateMode);
x = d3.scale.linear()
.domain(viz0.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, Math.max(viz0.maxPop, viz1.maxPop)])
.range([height, 0], .1);
svg.selectAll(".bar0")
.data(viz0.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
svg.selectAll(".bar1")
.data(viz1.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG.transition().duration(animationDuration).call(xAxis);
yAxisG.transition().duration(animationDuration).call(yAxis);
}
var brushX = d3.scale.identity().domain([0, width]);
var brushExtent;
var brush = d3.svg.brush()
.x(brushX)
.on("brush", brushed)
.on("brushend", brushended);
var gBrush = svg.append("g")
.attr("class", "brush")
.call(brush)
.call(brush.event);
gBrush.selectAll("rect")
.attr("height", height);
function brushed() {
brushExtent = brush.extent();
}
function brushended() {
if (!d3.event.sourceEvent) return; // only transition after input
var start = x.invert(brushExtent[0]);
var end = x.invert(brushExtent[1]);
d3.select(this).call(brush.extent([[0], [0]]));
if (start == end) {
update(rawBins, resetRange, vizBinCount);
} else {
update(rawBins, bufferRange([start, end], vizBinCount), vizBinCount);
}
}
});
</script>
[{"optype": "numeric", "name": "Sulphates", "datatype": "double", "preferred": true, "summary": {"sum_squares": 671.13999, "missing_count": 0, "sum": 990.22, "median": 0.61654, "maximum": 1.62, "minimum": 0.3, "standard_deviation": 0.14253, "variance": 0.02032, "population": 1532, "bins": [[0.3, 1], [0.335, 2], [0.36667, 3], [0.395, 8], [0.43357, 28], [0.46173, 52], [0.492, 95], [0.52955, 178], [0.57043, 276], [0.6135, 206], [0.64396, 91], [0.66982, 113], [0.70025, 81], [0.73, 78], [0.76123, 65], [0.7893, 57], [0.8291, 78], [0.86733, 30], [0.90045, 22], [0.92941, 17], [0.96364, 11], [0.99, 7], [1.022, 5], [1.05444, 9], [1.08, 1], [1.105, 2], [1.13, 4], [1.17333, 6], [1.21, 2], [1.36, 2], [1.56, 1], [1.62, 1]], "mean": 0.64636}, "column_number": 10, "order": 10},{"optype": "numeric", "name": "Sulphates", "datatype": "double", "preferred": true, "summary": {"sum_squares": 464.29979, "missing_count": 0, "sum": 907.14, "median": 0.48263, "maximum": 0.88, "minimum": 0.27, "standard_deviation": 0.09957, "variance": 0.00991, "population": 1845, "bins": [[0.27, 1], [0.28, 1], [0.29667, 18], [0.31, 3], [0.32684, 19], [0.346, 55], [0.36513, 78], [0.3835, 123], [0.40495, 107], [0.42535, 127], [0.44462, 158], [0.46407, 172], [0.48519, 160], [0.50325, 200], [0.52481, 131], [0.54337, 101], [0.56405, 74], [0.585, 72], [0.60385, 52], [0.62432, 37], [0.64292, 24], [0.6664, 25], [0.6848, 25], [0.70231, 13], [0.72091, 11], [0.74857, 14], [0.765, 12], [0.78412, 17], [0.8, 1], [0.819, 10], [0.85, 1], [0.88, 3]], "mean": 0.49167}, "column_number": 10, "order": 10}]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment