Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 14:08
Show Gist options
  • Select an option

  • Save ashenfad/2a7bf0be2dd28ef49296 to your computer and use it in GitHub Desktop.

Select an option

Save ashenfad/2a7bf0be2dd28ef49296 to your computer and use it in GitHub Desktop.
Blood Plasma Histogram

A visualization of blood plasma from the UCI diabetes dataset. The distribution is stored with a streaming histogram.

  • Brush to zoom.
  • Click to zoom out.
  • t to toggle trimming some of the outliers from the distribution.
  • r to toggle rounding populations for each bin.
  • i to toggle the distribution interpolation mode.
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.bar {
fill: #5AC;
opacity: 0.6;
}
.axis {
font: 12px sans-serif;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.brush .extent {
stroke: #fff;
fill-opacity: .125;
shape-rendering: crispEdges;
}
.fname {
font: 12px sans-serif;
font-weight: bold;
}
.modetext {
font: 12px sans-serif;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
/* Given the raw bins (the distribution) and a point, estimate
* the total population up to that point.
*/
function sum(rawBins, value, trapInterpolateMode) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
var bin = rawBins[i];
if ((bin.exact && value >= bin.mean) || value >= bin.next) {
total += bin.pop;
} else if (!bin.exact && value > bin.prev && value <= bin.mean) {
var range = (bin.mean - value) / (bin.mean - bin.prev);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += area * bin.pop / 2;
} else if (value > bin.mean && value <= bin.next) {
var range = (value - bin.mean) / (bin.next - bin.mean);
var area = 1 - range;
// Optionally, square the area for trapezoidal interpolation
if (trapInterpolateMode) area = area * area;
total += bin.pop / 2 + (1 - area) * bin.pop / 2;
}
}
return total;
}
/* Loads the distribution into a convenient format. */
function loadRawBins(data) {
var bins = [];
if ("bins" in data.summary) {
var sbins = data.summary.bins;
minimum = data.summary.minimum;
maximum = data.summary.maximum;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1]};
if (bin.pop == 1 || bin.mean == minimum || bin.mean == maximum) {
bin.exact = true;
} else {
bin.exact = false;
if (i > 0) {
bin.prev = sbins[i-1][0];
} else {
bin.prev = minimum;
}
if (i < sbins.length - 1) {
bin.next = sbins[i+1][0];
} else {
bin.next = maximum;
}
}
bins[i] = bin;
}
} else {
sbins = data.summary.counts;
for (var i = 0; i < sbins.length; i++) {
var bin = {mean: sbins[i][0], pop: sbins[i][1], exact: true};
bins[i] = bin;
}
}
return bins;
}
/* Build equidistant bins for visualization given the
* raw bins and a range.
*/
function buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode) {
var vizBins = [];
var maxPop = 0;
var inc = (range[1] - range[0]) / vizBinCount;
// Using a slight shift to act like a closed interval
var shift = 0.0000001;
var prev = range[0] - shift;
var prevPop = sum(rawBins, prev, trapInterpolateMode);
for (var i = 0; i < vizBinCount; i++) {
var next = prev + inc;
var nextPop = sum(rawBins, next, trapInterpolateMode);
var pop = nextPop - prevPop;
if (roundMode) {
pop = Math.round(pop);
}
maxPop = Math.max(maxPop, pop);
vizBins[i] = {range: [prev + shift, next], pop: pop};
prev = next;
prevPop = nextPop;
}
return {bins: vizBins, maxPop: maxPop, range: range};
}
function capitalize(term) {
return term.charAt(0).toUpperCase() + term.slice(1);
}
function trimRange(fullRange, rawBins, buffer, segments) {
var total = 0;
for (var i = 0; i < rawBins.length; i++) {
total += rawBins[i].pop;
}
var interval = (fullRange[1] - fullRange[0]) / segments;
var start = fullRange[0];
var startThreshold = total * buffer;
for (var i = 0; i < segments; i++) {
if (sum(rawBins, start, false) > startThreshold) break;
start += interval;
}
if (start > fullRange[0]) start -= interval;
var end = fullRange[1];
var endThreshold = total - (total * buffer);
for (var i = 0; i < segments; i++) {
if (sum(rawBins, end, false) < endThreshold) break;
end -= interval;
}
if (end < fullRange[1]) end += interval;
return [start, end];
}
/* Add a small buffer to either side of the requested range */
function bufferRange(range, vizBinCount) {
var buffer = 0.5 * (range[1] - range[0]) / vizBinCount;
return [range[0] - buffer, range[1] + buffer];
}
var margin = {top: 30, right: 40, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
d3.json("plasma.json", function(error, data) {
var rawBins = loadRawBins(data);
var x, y;
var xAxis, yAxis;
var xAxisG, yAxisG;
var vizBinCount = 32;
var baseRange = [data.summary.minimum, data.summary.maximum];
var initRange = bufferRange(baseRange, vizBinCount);
var trimmedInitRange = trimRange(baseRange, rawBins, 0.01, vizBinCount);
trimmedInitRange = bufferRange(trimmedInitRange, vizBinCount);
// By default trim the edges of the distribution
var trimMode = true;
var resetRange = trimmedInitRange;
var roundMode = false;
var trapInterpolateMode = false;
var viz = buildViz(rawBins, resetRange, vizBinCount, roundMode, trapInterpolateMode);
init();
function init() {
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0]);
svg.selectAll(".bar")
.data(viz.bins)
.enter().append("rect")
.attr("class", "bar")
.attr("x", function(d) { return x(d.range[0]) + 1; })
.attr("width", function(d) { return x(d.range[1]) - x(d.range[0]) - 2; })
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis);
xAxisG.append("text")
.attr("class", "fname")
.attr("y", -16)
.attr("x", width)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text(capitalize(data.name));
yAxisG = svg.append("g")
.attr("class", "y axis")
.call(yAxis);
yAxisG.append("text")
.attr("class", "fname")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Population");
var trimText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 10)
.style("text-anchor", "start")
.text("(T)rimming: On");
var roundText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 25)
.style("text-anchor", "start")
.text("(R)ounding: Off");
var interpolateText = svg.append("text")
.attr("class", "modetext")
.attr("x", width - 150)
.attr("y", 40)
.style("text-anchor", "start")
.text("(I)nterpolation: Linear");
d3.select("body")
.on("keydown", function(d) {
if (d3.event.keyCode == 84) {
// t = 84
trimMode = !trimMode;
if (trimMode) {
resetRange = trimmedInitRange;
trimText.text("(T)rimming: On");
} else {
resetRange = initRange;
trimText.text("(T)rimming: Off");
}
update(rawBins, resetRange, vizBinCount);
} else if (d3.event.keyCode == 82) {
// r = 82
roundMode = !roundMode;
if (roundMode) {
roundText.text("(R)ounding: On");
} else {
roundText.text("(R)ounding: Off");
}
update(rawBins, viz.range, vizBinCount);
} else if (d3.event.keyCode == 73) {
// i = 73
trapInterpolateMode = !trapInterpolateMode;
if (trapInterpolateMode) {
interpolateText.text("(I)nterpolation: Trapezoidal");
} else {
interpolateText.text("(I)nterpolation: Linear");
}
update(rawBins, viz.range, vizBinCount);
}
});
}
function update(rawBins, range, vizBinCount) {
var animationDuration = 700;
viz = buildViz(rawBins, range, vizBinCount, roundMode, trapInterpolateMode);
x = d3.scale.linear()
.domain(viz.range)
.range([0, width], .1);
y = d3.scale.linear()
.domain([0, viz.maxPop])
.range([height, 0], .1);
svg.selectAll(".bar")
.data(viz.bins)
.transition().duration(animationDuration)
.attr("y", function(d) { return y(d.pop);})
.attr("height", function(d) { return height - y(d.pop); });
xAxis = d3.svg.axis()
.ticks(Math.round(width / 60))
.scale(x)
.orient("bottom");
yAxis = d3.svg.axis()
.ticks(Math.round(height / 30))
.scale(y)
.orient("left");
xAxisG.transition().duration(animationDuration).call(xAxis);
yAxisG.transition().duration(animationDuration).call(yAxis);
}
var brushX = d3.scale.identity().domain([0, width]);
var brushExtent;
var brush = d3.svg.brush()
.x(brushX)
.on("brush", brushed)
.on("brushend", brushended);
var gBrush = svg.append("g")
.attr("class", "brush")
.call(brush)
.call(brush.event);
gBrush.selectAll("rect")
.attr("height", height);
function brushed() {
brushExtent = brush.extent();
}
function brushended() {
if (!d3.event.sourceEvent) return; // only transition after input
var start = x.invert(brushExtent[0]);
var end = x.invert(brushExtent[1]);
d3.select(this).call(brush.extent([[0], [0]]));
if (start == end) {
update(rawBins, resetRange, vizBinCount);
} else {
update(rawBins, bufferRange([start, end], vizBinCount), vizBinCount);
}
}
});
</script>
{
"id" : "000001",
"preferred" : true,
"summary" : {
"mean" : 120.89453,
"sum_squares" : 12008759,
"bins" : [ [ 0, 5 ], [ 44, 1 ], [ 56.66667, 3 ], [ 61.5, 2 ], [ 67.2, 5 ], [ 73.3125, 16 ], [ 79.79167, 24 ], [ 84.26923, 26 ], [ 89.64706, 51 ], [ 95.29787, 47 ], [ 100.97183, 71 ], [ 107.21739, 69 ], [ 111.48148, 27 ], [ 114.57576, 33 ], [ 118.5641, 39 ], [ 123.68852, 61 ], [ 128.62162, 37 ], [ 132.57143, 21 ], [ 137.52632, 38 ], [ 142.65217, 23 ], [ 146.4, 25 ], [ 150.92857, 14 ], [ 154.5625, 16 ], [ 158.15385, 13 ], [ 162.4, 15 ], [ 166.5, 14 ], [ 172, 15 ], [ 176.16667, 6 ], [ 180.125, 16 ], [ 183.5, 6 ], [ 188.23077, 13 ], [ 195.6875, 16 ] ],
"maximum" : 199,
"missing_count" : 0,
"variance" : 1022.24831,
"median" : 116.86008,
"population" : 768,
"minimum" : 0,
"standard_deviation" : 31.97262,
"sum" : 92847
},
"datatype" : "int16",
"order" : 1,
"optype" : "numeric",
"name" : "plasma glucose",
"column_number" : 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment