An exploratory data analysis using d3
Last active
December 24, 2015 20:16
-
-
Save ericgj/2989542b98125cce7d1d to your computer and use it in GitHub Desktop.
Costs of task switching analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!DOCTYPE html> | |
<meta charset="utf-8"> | |
<style> | |
body { | |
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; | |
} | |
.box { | |
font: 10px sans-serif; | |
} | |
.box line, | |
.box rect, | |
.box circle { | |
fill: #fff; | |
stroke: #000; | |
stroke-width: 1.5px; | |
} | |
.box .center { | |
stroke-dasharray: 3,3; | |
} | |
.box .outlier { | |
fill: none; | |
stroke: #ccc; | |
} | |
</style> | |
<body> | |
<script src="//d3js.org/d3.v3.min.js"></script> | |
<script src="whiskerbox.js"></script> | |
<h1>Q1: Is there a correlation between tasks done concurrently and hours spent on task</h1> | |
<h2>Overall box plot</h2> | |
<h2>Box plots by task type</h2> | |
<h2>Identifying outliers</h2> | |
<h2>Linear regression plot excluding outliers</h2> | |
<h1>Q2: Is there a correlation between tasks done concurrently and number of errors</h1> | |
</body> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function (){ | |
// adapted from http://stackoverflow.com/questions/20507536/d3-js-linear-regression?lq=1 | |
function linear(y,x){ | |
var lr = {}; | |
var n = y.length; | |
var sum_x = 0; | |
var sum_y = 0; | |
var sum_xy = 0; | |
var sum_xx = 0; | |
var sum_yy = 0; | |
for (var i = 0; i < y.length; i++) { | |
sum_x += x[i]; | |
sum_y += y[i]; | |
sum_xy += (x[i]*y[i]); | |
sum_xx += (x[i]*x[i]); | |
sum_yy += (y[i]*y[i]); | |
} | |
lr['slope'] = (n * sum_xy - sum_x * sum_y) / (n*sum_xx - sum_x * sum_x); | |
lr['intercept'] = (sum_y - lr.slope * sum_x)/n; | |
lr['r2'] = Math.pow((n*sum_xy - sum_x*sum_y)/Math.sqrt((n*sum_xx-sum_x*sum_x)*(n*sum_yy-sum_y*sum_y)),2); | |
return lr; | |
}; | |
d3.regression = {} | |
d3.regression.linear = linear; | |
})(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function (){ | |
// adapted from http://jsfiddle.net/7WQjr/ | |
function tabulate(columns, data, selection) { | |
var table = selection.append("table"), | |
thead = table.append("thead"), | |
tbody = table.append("tbody"); | |
// append the header row | |
thead.append("tr") | |
.selectAll("th") | |
.data(columns) | |
.enter() | |
.append("th") | |
.text(function(column) { return column; }); | |
// create a row for each object in the data | |
var rows = tbody.selectAll("tr") | |
.data(data) | |
.enter() | |
.append("tr"); | |
// create a cell in each row for each column | |
var cells = rows.selectAll("td") | |
.data(function(row) { | |
return columns.map(function(column) { | |
return {column: column, value: row[column]}; | |
}); | |
}) | |
.enter() | |
.append("td") | |
.text(function(d) { return d.value; }); | |
return table; | |
} | |
d3.tabulate = curryN(3,tabulate); | |
})(); | |
// curry from Ramda | |
// https://github.com/ramda/ramda/blob/master/src/internal/_arity.js | |
function _arity(n, fn) { | |
switch (n) { | |
case 0: return function() { return fn.apply(this, arguments); }; | |
case 1: return function(a0) { return fn.apply(this, arguments); }; | |
case 2: return function(a0, a1) { return fn.apply(this, arguments); }; | |
case 3: return function(a0, a1, a2) { return fn.apply(this, arguments); }; | |
case 4: return function(a0, a1, a2, a3) { return fn.apply(this, arguments); }; | |
case 5: return function(a0, a1, a2, a3, a4) { return fn.apply(this, arguments); }; | |
case 6: return function(a0, a1, a2, a3, a4, a5) { return fn.apply(this, arguments); }; | |
case 7: return function(a0, a1, a2, a3, a4, a5, a6) { return fn.apply(this, arguments); }; | |
case 8: return function(a0, a1, a2, a3, a4, a5, a6, a7) { return fn.apply(this, arguments); }; | |
case 9: return function(a0, a1, a2, a3, a4, a5, a6, a7, a8) { return fn.apply(this, arguments); }; | |
case 10: return function(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9) { return fn.apply(this, arguments); }; | |
default: throw new Error('First argument to _arity must be a non-negative integer no greater than ten'); | |
} | |
}; | |
// https://github.com/ramda/ramda/blob/master/src/internal/_curryN.js | |
function _curryN(length, received, fn){ | |
return function() { | |
var combined = []; | |
var argsIdx = 0; | |
var left = length; | |
var combinedIdx = 0; | |
while (combinedIdx < received.length || argsIdx < arguments.length) { | |
var result; | |
if (combinedIdx < received.length && | |
argsIdx >= arguments.length) { | |
result = received[combinedIdx]; | |
} else { | |
result = arguments[argsIdx]; | |
argsIdx += 1; | |
} | |
combined[combinedIdx] = result; | |
left -= 1; | |
combinedIdx += 1; | |
} | |
return left <= 0 ? fn.apply(this, combined) | |
: _arity(left, _curryN(length, combined, fn)); | |
}; | |
} | |
// https://github.com/ramda/ramda/blob/master/src/curryN.js | |
function curryN(length, fn){ | |
return _arity(length, _curryN(length, [], fn)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
// Copied from http://bl.ocks.org/mbostock/4061502 | |
// Inspired by http://informationandvisualization.de/blog/box-plot | |
d3.box = function() { | |
var width = 1, | |
height = 1, | |
duration = 0, | |
domain = null, | |
value = Number, | |
whiskers = boxWhiskers, | |
quartiles = boxQuartiles, | |
tickFormat = null; | |
// For each small multiple… | |
function box(g) { | |
g.each(function(d, i) { | |
d = d.map(value).sort(d3.ascending); | |
var g = d3.select(this), | |
n = d.length, | |
min = d[0], | |
max = d[n - 1]; | |
// Compute quartiles. Must return exactly 3 elements. | |
var quartileData = d.quartiles = quartiles(d); | |
// Compute whiskers. Must return exactly 2 elements, or null. | |
var whiskerIndices = whiskers && whiskers.call(this, d, i), | |
whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; }); | |
// Compute outliers. If no whiskers are specified, all data are "outliers". | |
// We compute the outliers as indices, so that we can join across transitions! | |
var outlierIndices = whiskerIndices | |
? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n)) | |
: d3.range(n); | |
// Compute the new x-scale. | |
var x1 = d3.scale.linear() | |
.domain(domain && domain.call(this, d, i) || [min, max]) | |
.range([height, 0]); | |
// Retrieve the old x-scale, if this is an update. | |
var x0 = this.__chart__ || d3.scale.linear() | |
.domain([0, Infinity]) | |
.range(x1.range()); | |
// Stash the new scale. | |
this.__chart__ = x1; | |
// Note: the box, median, and box tick elements are fixed in number, | |
// so we only have to handle enter and update. In contrast, the outliers | |
// and other elements are variable, so we need to exit them! Variable | |
// elements also fade in and out. | |
// Update center line: the vertical line spanning the whiskers. | |
var center = g.selectAll("line.center") | |
.data(whiskerData ? [whiskerData] : []); | |
center.enter().insert("line", "rect") | |
.attr("class", "center") | |
.attr("x1", width / 2) | |
.attr("y1", function(d) { return x0(d[0]); }) | |
.attr("x2", width / 2) | |
.attr("y2", function(d) { return x0(d[1]); }) | |
.style("opacity", 1e-6) | |
.transition() | |
.duration(duration) | |
.style("opacity", 1) | |
.attr("y1", function(d) { return x1(d[0]); }) | |
.attr("y2", function(d) { return x1(d[1]); }); | |
center.transition() | |
.duration(duration) | |
.style("opacity", 1) | |
.attr("y1", function(d) { return x1(d[0]); }) | |
.attr("y2", function(d) { return x1(d[1]); }); | |
center.exit().transition() | |
.duration(duration) | |
.style("opacity", 1e-6) | |
.attr("y1", function(d) { return x1(d[0]); }) | |
.attr("y2", function(d) { return x1(d[1]); }) | |
.remove(); | |
// Update innerquartile box. | |
var box = g.selectAll("rect.box") | |
.data([quartileData]); | |
box.enter().append("rect") | |
.attr("class", "box") | |
.attr("x", 0) | |
.attr("y", function(d) { return x0(d[2]); }) | |
.attr("width", width) | |
.attr("height", function(d) { return x0(d[0]) - x0(d[2]); }) | |
.transition() | |
.duration(duration) | |
.attr("y", function(d) { return x1(d[2]); }) | |
.attr("height", function(d) { return x1(d[0]) - x1(d[2]); }); | |
box.transition() | |
.duration(duration) | |
.attr("y", function(d) { return x1(d[2]); }) | |
.attr("height", function(d) { return x1(d[0]) - x1(d[2]); }); | |
// Update median line. | |
var medianLine = g.selectAll("line.median") | |
.data([quartileData[1]]); | |
medianLine.enter().append("line") | |
.attr("class", "median") | |
.attr("x1", 0) | |
.attr("y1", x0) | |
.attr("x2", width) | |
.attr("y2", x0) | |
.transition() | |
.duration(duration) | |
.attr("y1", x1) | |
.attr("y2", x1); | |
medianLine.transition() | |
.duration(duration) | |
.attr("y1", x1) | |
.attr("y2", x1); | |
// Update whiskers. | |
var whisker = g.selectAll("line.whisker") | |
.data(whiskerData || []); | |
whisker.enter().insert("line", "circle, text") | |
.attr("class", "whisker") | |
.attr("x1", 0) | |
.attr("y1", x0) | |
.attr("x2", width) | |
.attr("y2", x0) | |
.style("opacity", 1e-6) | |
.transition() | |
.duration(duration) | |
.attr("y1", x1) | |
.attr("y2", x1) | |
.style("opacity", 1); | |
whisker.transition() | |
.duration(duration) | |
.attr("y1", x1) | |
.attr("y2", x1) | |
.style("opacity", 1); | |
whisker.exit().transition() | |
.duration(duration) | |
.attr("y1", x1) | |
.attr("y2", x1) | |
.style("opacity", 1e-6) | |
.remove(); | |
// Update outliers. | |
var outlier = g.selectAll("circle.outlier") | |
.data(outlierIndices, Number); | |
outlier.enter().insert("circle", "text") | |
.attr("class", "outlier") | |
.attr("r", 5) | |
.attr("cx", width / 2) | |
.attr("cy", function(i) { return x0(d[i]); }) | |
.style("opacity", 1e-6) | |
.transition() | |
.duration(duration) | |
.attr("cy", function(i) { return x1(d[i]); }) | |
.style("opacity", 1); | |
outlier.transition() | |
.duration(duration) | |
.attr("cy", function(i) { return x1(d[i]); }) | |
.style("opacity", 1); | |
outlier.exit().transition() | |
.duration(duration) | |
.attr("cy", function(i) { return x1(d[i]); }) | |
.style("opacity", 1e-6) | |
.remove(); | |
// Compute the tick format. | |
var format = tickFormat || x1.tickFormat(8); | |
// Update box ticks. | |
var boxTick = g.selectAll("text.box") | |
.data(quartileData); | |
boxTick.enter().append("text") | |
.attr("class", "box") | |
.attr("dy", ".3em") | |
.attr("dx", function(d, i) { return i & 1 ? 6 : -6 }) | |
.attr("x", function(d, i) { return i & 1 ? width : 0 }) | |
.attr("y", x0) | |
.attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; }) | |
.text(format) | |
.transition() | |
.duration(duration) | |
.attr("y", x1); | |
boxTick.transition() | |
.duration(duration) | |
.text(format) | |
.attr("y", x1); | |
// Update whisker ticks. These are handled separately from the box | |
// ticks because they may or may not exist, and we want don't want | |
// to join box ticks pre-transition with whisker ticks post-. | |
var whiskerTick = g.selectAll("text.whisker") | |
.data(whiskerData || []); | |
whiskerTick.enter().append("text") | |
.attr("class", "whisker") | |
.attr("dy", ".3em") | |
.attr("dx", 6) | |
.attr("x", width) | |
.attr("y", x0) | |
.text(format) | |
.style("opacity", 1e-6) | |
.transition() | |
.duration(duration) | |
.attr("y", x1) | |
.style("opacity", 1); | |
whiskerTick.transition() | |
.duration(duration) | |
.text(format) | |
.attr("y", x1) | |
.style("opacity", 1); | |
whiskerTick.exit().transition() | |
.duration(duration) | |
.attr("y", x1) | |
.style("opacity", 1e-6) | |
.remove(); | |
}); | |
d3.timer.flush(); | |
} | |
box.width = function(x) { | |
if (!arguments.length) return width; | |
width = x; | |
return box; | |
}; | |
box.height = function(x) { | |
if (!arguments.length) return height; | |
height = x; | |
return box; | |
}; | |
box.tickFormat = function(x) { | |
if (!arguments.length) return tickFormat; | |
tickFormat = x; | |
return box; | |
}; | |
box.duration = function(x) { | |
if (!arguments.length) return duration; | |
duration = x; | |
return box; | |
}; | |
box.domain = function(x) { | |
if (!arguments.length) return domain; | |
domain = x == null ? x : d3.functor(x); | |
return box; | |
}; | |
box.value = function(x) { | |
if (!arguments.length) return value; | |
value = x; | |
return box; | |
}; | |
box.whiskers = function(x) { | |
if (!arguments.length) return whiskers; | |
whiskers = x; | |
return box; | |
}; | |
box.quartiles = function(x) { | |
if (!arguments.length) return quartiles; | |
quartiles = x; | |
return box; | |
}; | |
return box; | |
}; | |
function boxWhiskers(d) { | |
return [0, d.length - 1]; | |
} | |
function boxQuartiles(d) { | |
return [ | |
d3.quantile(d, .25), | |
d3.quantile(d, .5), | |
d3.quantile(d, .75) | |
]; | |
} | |
})(); |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
errors data |
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
worktime data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment