Created
September 23, 2012 01:36
-
-
Save akrusz/3768478 to your computer and use it in GitHub Desktop.
D3 graph incorporating uncertainty.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html> | |
<head> | |
<title>Uncertainty: Improving the dot plot</title> | |
<meta http-equiv="content-type" content="application/xhtml+xml; charset=utf-8" /> | |
<script type="text/javascript" src="d3.js"></script> | |
<script type="text/javascript" src="jquery-1.8.2.min.js"></script> | |
<style type="text/css"> | |
.axis path, | |
.axis line { | |
fill: none; | |
stroke: black; | |
shape-rendering: crispEdges; | |
} | |
.axis line.tick { | |
stroke: #DDD; | |
} | |
.axis text { | |
font-family: sans-serif; | |
font-size: 12px; | |
fill: #666666; | |
} | |
</style> | |
</head> | |
<body> | |
<script type="text/javascript"> | |
function univariate(params){ | |
// Expected value / mean of the distribution | |
this.mean = undefined, | |
// Width of this distribution's standard deviation | |
this.standardDeviation = undefined, | |
// Where the significant region begins on the x-axis. | |
this.startX = undefined, | |
// Where the significant region ends on the x-axis. | |
this.endX = undefined, | |
// Width in SDs of the region we consider to be significant | |
this.widthInSDs = function(){ | |
return (this.endX - this.startX) / this.standardDeviation; | |
} | |
// Probability density function / measure. Must be overridden. | |
this.value = function(x){ | |
return undefined; | |
} | |
// The function's integral, which must be set to a new univariate. | |
this.antiderivative = undefined; | |
// Ideally we generate a new probability distribution with each data | |
// point, but for now let's just use distros that either only translate | |
// or scale with the mean. This will let us use a single gradient for | |
// all data points. | |
// Does the distribution translate in step with the mean? | |
this.translatesWithMean = false; | |
// Does the distribution start at 0 and expand with the mean? | |
this.scalesWithMean = false; | |
} | |
// Each distro must have either mean = 1 (if scalesWithMean) | |
// or mean = 0 (if translatesWithMean). | |
function normalDistribution(){ | |
// Standard normal distro | |
this.mean = 0; | |
this.standardDeviation = 1; | |
this.translatesWithMean = true; | |
// Probability density function / measure. | |
this.startX = -3.33; | |
this.endX = 3.33; | |
this.value = function(x){ | |
return Math.exp(-x * x / 2)/ Math.sqrt(2 * Math.PI); | |
}; | |
this.antiderivative = new normalDistroIntegral(); | |
} | |
normalDistribution.prototype = new univariate(); | |
function normalDistroIntegral(){ | |
this.translatesWithMean = true; | |
// Width in SDs of the region we consider to be significant | |
this.widthInSDs = function(){ | |
return (this.endX - this.startX); | |
} | |
// This function does not have a mean or SD. | |
this.startX = -2.66; | |
this.endX = 2.66; | |
this.value = function(x){ | |
return 0.5*(1 + erf(x / Math.sqrt(2))); | |
}; | |
} | |
normalDistroIntegral.prototype = new univariate(); | |
// Triangular distribution | |
function triangularDistribution(){ | |
// Standard triangular distro | |
this.mean = 0; | |
this.standardDeviation = 1 / Math.sqrt(6); | |
this.translatesWithMean = true; | |
// Probability density function / measure. | |
this.startX = -1; | |
this.endX = 1; | |
this.value = function(x){ | |
return 1 - Math.abs(x); | |
}; | |
this.antiderivative = new triangularDistroIntegral(); | |
} | |
triangularDistribution.prototype = new univariate(); | |
function triangularDistroIntegral(){ | |
this.translatesWithMean = true; | |
// Width in SDs of the region we consider to be significant | |
this.widthInSDs = function(){ | |
return (this.endX - this.startX) * Math.sqrt(6); | |
} | |
// This function does not have a mean or SD. | |
this.startX = -1; | |
this.endX = 1; | |
this.value = function(x){ | |
return (x < 0) | |
? 0.5 * (x + 1) * (x + 1) | |
: 1 - 0.5 * (x - 1) * (x - 1); | |
}; | |
} | |
triangularDistroIntegral.prototype = new univariate(); | |
function exponentialDistribution(){ | |
// We're passing in the standard deviation / mean. | |
// It'd be nicer theoretically to pass in lambda, but this is more consistent. | |
this.mean = 1; | |
this.standardDeviation = 1; | |
this.scalesWithMean = true; | |
// Probability density function / measure. | |
this.startX = 0; | |
this.endX = 5.5; | |
this.value = function(x){ | |
return Math.exp(-x); | |
}; | |
this.antiderivative = new exponentialDistroIntegral(); | |
} | |
exponentialDistribution.prototype = new univariate(); | |
function exponentialDistroIntegral(){ | |
// We're passing in the standard deviation / mean. | |
// It'd be nicer theoretically to pass in lambda, but this is more consistent. | |
this.scalesWithMean = true; | |
this.widthInSDs = function(){ | |
return (this.endX - this.startX); | |
} | |
// Probability density function / measure. | |
this.startX = 0; | |
this.endX = 5.5; | |
this.value = function(x){ | |
return 1 - Math.exp(-x); | |
}; | |
} | |
exponentialDistroIntegral.prototype = new univariate(); | |
</script> | |
<form name="graphOptions" onsubmit="generateGraph(); return false"> | |
<p> | |
<input type="radio" name="dataSet" value="data1" checked="true"/>Dataset 1 (larger variance) | |
<input type="radio" name="dataSet" value="data2" />Dataset 2 (smaller variance) | | |
<input type="radio" name="stdDevs" value="stdDevs1" checked="true" />Standard deviation set 1 (larger) | |
<input type="radio" name="stdDevs" value="stdDevs2" />Standard deviation set 2 (smaller) | |
</p> | |
<p> | |
<input type="radio" name="type" value="dot" checked="true"/>Discrete "dot" plot | |
<input type="radio" name="type" value="bar" />Discrete bar graph | |
</p> | |
<p> | |
<input type="radio" name="distro" value="normal" checked="true"/>Normal distribution | |
<input type="radio" name="distro" value="triangular" />Triangular distribution | |
<input type="radio" name="distro" value="exponential" />Exponential distribution | |
</p> | |
<p> | |
<input type="checkbox" name="scale" />Lock vertical scale? | |
<input type="checkbox" name="startFromZero" />Force vertical scale to include zero? | |
</p> | |
</form> | |
<script type="text/javascript"> | |
//Width and height | |
var w = 800; | |
var h = 550; | |
var barPadding = 1; | |
var bottomMargin = 42; | |
var leftMargin = 40; | |
var startingX = 1990; | |
var xStep = 1; | |
var verticalDataPixels = h - bottomMargin; | |
var minDisplayValue = Infinity; | |
var maxDisplayValue = -Infinity; | |
var data1 = [ 8, 10, 14, 19, 21, 27, 23, 19, 15, 12, | |
10, 8, 9, 11, 14, 17, 16, 18, 23, 25 ]; | |
var data2 = [ 19, 18, 16, 15.5, 15, 17, 18.5, 17, 16.2, 14, | |
13.5, 13, 13, 13.5, 15, 18, 19, 17.5, 16.5, 16 ]; | |
var stdDevs1 = [ 2, 2, 2.5, 3, 3.5, 4.5, 2.5, 2.2, 2, 2.5, | |
3, 3.5, 4, 4.5, 5.5, 4, 3, 2.5, 2.3, 2 ]; | |
var stdDevs2 = [ .5, .5, .6, .75, 1, 1.2, .75, .73, .5, .6, | |
.75, .8, 1, 1.2, 1.4, 1, .75, .70, .65, .5 ]; | |
$("form input").click(function () { | |
generateGraph(); | |
}); | |
generateGraph(); | |
function generateGraph(){ | |
d3.select("svg.graph").remove(); | |
// Select the distribution. | |
for (var i = 0; i < document.graphOptions.distro.length; i++) { | |
if (document.graphOptions.distro[i].checked) { | |
var distroType = document.graphOptions.distro[i].value; | |
} | |
} | |
var distribution; | |
switch(distroType){ | |
case "normal": | |
distribution = new normalDistribution(); | |
break; | |
case "triangular": | |
distribution = new triangularDistribution(); | |
break; | |
case "exponential": | |
distribution = new exponentialDistribution(); | |
break; | |
} | |
// Select the data set. | |
var dataSet; | |
for (i = 0; i < document.graphOptions.dataSet.length; i++) { | |
if (document.graphOptions.dataSet[i].checked) { | |
dataSet = document.graphOptions.dataSet[i].value; | |
} | |
} | |
var data; | |
switch(dataSet){ | |
case "data1": | |
data = data1; | |
break; | |
case "data2": | |
data = data2; | |
break; | |
} | |
// Select the standard deviation data set. | |
var stdDevSet; | |
for (i = 0; i < document.graphOptions.stdDevs.length; i++) { | |
if (document.graphOptions.stdDevs[i].checked) { | |
stdDevSet = document.graphOptions.stdDevs[i].value; | |
} | |
} | |
var stdDevs; | |
switch(stdDevSet){ | |
case "stdDevs1": | |
stdDevs = stdDevs1; | |
break; | |
case "stdDevs2": | |
stdDevs = stdDevs2; | |
break; | |
} | |
// We'll choose between "dot", "bar", and "line". | |
// Line will take some more SVG/CSS wizardry to implement properly. | |
var graphType; | |
var displayDistribution; | |
for (i = 0; i < document.graphOptions.type.length; i++) { | |
if (document.graphOptions.type[i].checked) { | |
graphType = document.graphOptions.type[i].value; | |
} | |
} | |
if(graphType == "dot"){ | |
displayDistribution = distribution; | |
} | |
else if(graphType == "bar"){ | |
displayDistribution = distribution.antiderivative; | |
} | |
else if(graphType == "line"){ | |
// Not implemented yet. | |
// displayDistribution = distribution; | |
} | |
var keepScale = document.graphOptions.scale.checked; | |
var startFromZero = document.graphOptions.startFromZero.checked; | |
if(maxDisplayValue === -Infinity || !keepScale) | |
{ | |
maxDisplayValue = -Infinity; | |
minDisplayValue = Infinity; | |
if(graphType == "bar" && displayDistribution.translatesWithMean){ | |
for(i = 0; i < data.length; i++){ | |
if(data[i] + (displayDistribution.endX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i] > maxDisplayValue){ | |
maxDisplayValue = data[i] + (displayDistribution.endX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i]; | |
} | |
} | |
minDisplayValue = 0; | |
} | |
else if(displayDistribution.translatesWithMean){ | |
for(i = 0; i < data.length; i++){ | |
if(data[i] + (displayDistribution.endX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i] > maxDisplayValue){ | |
maxDisplayValue = data[i] + (displayDistribution.endX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i]; | |
} | |
if(data[i] + (displayDistribution.startX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i] < minDisplayValue){ | |
minDisplayValue = data[i] + (displayDistribution.startX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i]; | |
} | |
} | |
} | |
else if(displayDistribution.scalesWithMean){ | |
// Note that stdDevs are never used because there's only one parameter for these distros. | |
// TODO: Support negative values. | |
for(i = 0; i < data.length; i++){ | |
if(data[i]*displayDistribution.endX > maxDisplayValue){ | |
maxDisplayValue = data[i]*displayDistribution.endX; | |
} | |
} | |
minDisplayValue = 0; | |
} | |
// If startFromZero and the graph doesn't span the x-axis, | |
// we'll make either the top or bottom of the graph 0. | |
if(startFromZero){ | |
if(minDisplayValue > 0){ | |
minDisplayValue = 0; | |
} | |
else if(maxDisplayValue < 0){ | |
maxDisplayValue = 0; | |
} | |
} | |
} | |
// This is the vertical size of the display area, in terms of data value. | |
var dataRange = maxDisplayValue - minDisplayValue; | |
var maxValue = Math.max.apply(Math, data); | |
var minValue = Math.min.apply(Math, data); | |
// Set up scales | |
var yScale = d3.scale.linear() | |
.domain([minDisplayValue, maxDisplayValue]) | |
.range([verticalDataPixels, 0]); | |
// Define the Y axis | |
// TODO: make the X-axis this way | |
var yAxis = d3.svg.axis() | |
.scale(yScale) | |
.tickSize(-w + leftMargin, 0) | |
.orient("left") | |
.ticks(8); | |
// This is so we can scale the opacity down for data points with large | |
// standard deviations, so total color mass is the same for each data point. | |
// In the case that the difference in magnitude between the lowest and | |
// highest stddevs is large, this will cause the large bands to be too pale. | |
// So, we calculate a minimum stddev for the purposes of normalizing opacity. | |
var maxOpacityDifference = 3; | |
var minStdDev = Math.max.apply(Math, stdDevs)/Math.min.apply(Math, stdDevs) > maxOpacityDifference | |
? Math.max.apply(Math, stdDevs) / maxOpacityDifference | |
: Math.min.apply(Math, stdDevs); | |
//Create SVG element | |
var svg = d3.select("body") | |
.append("svg") | |
.attr("class","graph") | |
.attr("width", w) | |
.attr("height", h); | |
var distributionGradient = svg.append("svg:defs") | |
.append("svg:linearGradient") | |
.attr("id", "distribution") | |
.attr("x1", "0%") | |
.attr("y1", "100%") | |
.attr("x2", "0%") | |
.attr("y2", "0%") | |
.attr("spreadMethod", "pad"); | |
var subGradients = 20; | |
var xValues = []; | |
var densityValues = []; | |
for(i = 0; i <= subGradients; i++){ | |
xValues[i] = displayDistribution.startX + displayDistribution.widthInSDs() * distribution.standardDeviation * i / subGradients; | |
densityValues[i] = displayDistribution.value(xValues[i]); | |
} | |
var densityMax = Math.max.apply(Math, densityValues); | |
var barColor = "#049"; | |
for(i = 0; i <= subGradients; i++){ | |
// TODO: Implement Ramer–Douglas–Peucker algorithm for more efficient interpolation | |
// of density function when implementing separate gradients for each data point | |
distributionGradient.append("svg:stop") | |
.attr("offset", (100 / subGradients) * i + "%") | |
.attr("stop-color", barColor) | |
.attr("stop-opacity", (graphType == "bar") | |
? 1 - densityValues[i] / densityMax | |
: densityValues[i] / densityMax); | |
} | |
//Create Y axis | |
svg.append("g") | |
.attr("class", "axis") | |
.attr("transform", "translate(40,0)") | |
.call(yAxis); | |
// Make the data bars | |
svg.selectAll("rect.graph_gradient") | |
.data(data) | |
.enter() | |
.append("rect") | |
.attr("class","graph_gradient") | |
.attr("x", function(d, i) { | |
return leftMargin + i * ((w - leftMargin)/ data.length); | |
}) | |
.attr("y", function(d, i) { | |
// two basic behaviors: scaling or translating | |
if(displayDistribution.translatesWithMean){ | |
return verticalPositionOfDatum( | |
d + (displayDistribution.endX - distribution.mean)/distribution.standardDeviation*stdDevs[i], | |
minDisplayValue, maxDisplayValue, verticalDataPixels); | |
} | |
else if(displayDistribution.scalesWithMean){ | |
return verticalPositionOfDatum(d * displayDistribution.widthInSDs(), | |
minDisplayValue, maxDisplayValue, verticalDataPixels); | |
} | |
}) | |
.attr("width", (w - leftMargin) / data.length - barPadding) | |
.attr("height", function(d, i) { | |
var top = undefined; | |
var bottom = undefined; | |
if(graphType == "bar" && displayDistribution.translatesWithMean){ | |
top = verticalPositionOfDatum(d + displayDistribution.widthInSDs()*stdDevs[i], | |
minDisplayValue, maxDisplayValue, verticalDataPixels); | |
bottom = verticalPositionOfDatum(d, minDisplayValue, maxDisplayValue, verticalDataPixels); | |
} | |
else if(displayDistribution.translatesWithMean){ | |
top = verticalPositionOfDatum(d + displayDistribution.widthInSDs()*stdDevs[i], | |
minDisplayValue, maxDisplayValue, verticalDataPixels); | |
bottom = verticalPositionOfDatum(d, minDisplayValue, maxDisplayValue, verticalDataPixels); | |
} | |
else if(displayDistribution.scalesWithMean){ | |
top = verticalPositionOfDatum(d * displayDistribution.widthInSDs(), | |
minDisplayValue, maxDisplayValue, verticalDataPixels); | |
bottom = verticalPositionOfDatum(0, minDisplayValue, maxDisplayValue, verticalDataPixels); | |
} | |
return bottom - top; | |
}) | |
.style("fill", "url(#distribution)") | |
.style("opacity", function(d, i) { | |
if(graphType == "bar" && displayDistribution.translatesWithMean){ | |
return 1; | |
} | |
else if(displayDistribution.translatesWithMean){ | |
return Math.min(1, minStdDev / stdDevs[i]); | |
} | |
else if(displayDistribution.scalesWithMean){ | |
return Math.min(1, minValue / d); | |
} | |
}); | |
// Make the solid bottom part of the data bars. | |
// An ugly consequence of reusing the gradient. | |
if(graphType == "bar" && displayDistribution.translatesWithMean){ | |
svg.selectAll("rect.graph_solid_bar") | |
.data(data) | |
.enter() | |
.append("rect") | |
.attr("class","graph_solid_bar") | |
.attr("x", function(d, i) { | |
return leftMargin + i * ((w - leftMargin)/ data.length); | |
}) | |
.attr("y", function(d, i) { | |
// -1 to ensure no gap due to antialiasing | |
return -1 + verticalPositionOfDatum(d + (displayDistribution.startX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i], minDisplayValue, maxDisplayValue, verticalDataPixels); | |
}) | |
.attr("width", (w - leftMargin) / data.length - barPadding) | |
.attr("height", function(d, i) { | |
return 1 + verticalPositionOfDatum(0, minDisplayValue, maxDisplayValue, verticalDataPixels) | |
- verticalPositionOfDatum(d + (displayDistribution.startX - distribution.mean) | |
/distribution.standardDeviation*stdDevs[i], minDisplayValue, maxDisplayValue, verticalDataPixels); | |
}) | |
.style("fill", barColor); | |
} | |
// Make a background white box under the text. | |
svg.append("rect") | |
.attr("class", "background") | |
.attr("x", leftMargin) | |
.attr("y", verticalDataPixels) | |
.attr("width", w - leftMargin) | |
.attr("height", h - verticalDataPixels) | |
.style("fill", "FFFFFF"); | |
// Put in the text for the x-axis labels. | |
svg.selectAll("text.x-scale") | |
.data(data) | |
.enter() | |
.append("text") | |
.attr("class", "x-scale") | |
.text(function(d, i) { | |
return startingX + i * xStep; | |
}) | |
.attr("text-anchor", "middle") | |
.attr("x", function(d, i) { | |
return leftMargin + i * ((w - leftMargin) / data.length) + ((w - leftMargin) / data.length - barPadding) / 2; | |
}) | |
.attr("y", function(d) { | |
return h - 30; | |
}) | |
.attr("font-family", "sans-serif") | |
.attr("font-size", "11px") | |
.attr("fill", "black"); | |
svg.selectAll("text.x-scale") | |
.append('tspan') | |
.attr("class", "stats") | |
.attr("x", function(d, i) { | |
return leftMargin + i * ((w - leftMargin) / data.length) + ((w - leftMargin) / data.length - barPadding) / 2; | |
}) | |
.attr("y", function(d, i) { | |
return h - 10; | |
}) | |
.attr("fill", "#888888") | |
.text(function(d, i) { | |
return "σ: " + stdDevs[i]; | |
}) | |
.append('tspan') | |
.attr("x", function(d, i) { | |
return leftMargin + i * ((w - leftMargin) / data.length) + ((w - leftMargin) / data.length - barPadding) / 2; | |
}) | |
.attr("y", function(d, i) { | |
return h - 20; | |
}) | |
.text(function(d) { | |
return "μ: " + d; | |
}); | |
} | |
function verticalPositionOfDatum(dataValue, windowMin, windowMax, verticalPixels){ | |
return verticalPixels * (windowMax - dataValue) / (windowMax - windowMin); | |
} | |
function erf(x){ | |
var sign = (x < 0) ? -1 : 1; | |
x = Math.abs(x); | |
var a1 = 0.254829592; | |
var a2 = -0.284496736; | |
var a3 = 1.421413741; | |
var a4 = -1.453152027; | |
var a5 = 1.061405429; | |
var p = 0.3275911; | |
var t = 1.0/(1.0 + p*x); | |
var y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*Math.exp(-x*x); | |
return sign*y; | |
} | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment