Skip to content

Instantly share code, notes, and snippets.

@mkscrg
Last active August 29, 2015 13:57
Show Gist options
  • Save mkscrg/9544426 to your computer and use it in GitHub Desktop.
Save mkscrg/9544426 to your computer and use it in GitHub Desktop.
Reservoir Sampling

Choose a random sample of 10 values from an unbounded stream. Red marks denote values currently in the sample.

From Wikipedia:

The algorithm creates a "reservoir" array of size k and populates it with the first k items of S. It then iterates through the remaining elements of S until S is exhausted. At the ith element of S, the algorithm generates a random number j between 1 and i. If j is less than k, the jth element of the reservoir array is replaced with the ith element of S. In effect, for all i, the ith element of S is chosen to be included in the reservoir with probability k/i.

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<style>
.sampler rect {
fill: black;
fill-opacity: 0.2;
}
.sampler line {
stroke: red;
stroke-width: 1px;
}
.points circle {
fill: steelblue
}
.lines line {
stroke: red;
stroke-width: 1px;
}
</style>
</head>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
(function() {
// configuration
var k = 10; // sample size
var hz = 10; // new points per second
// svg setup
var width = 960;
var height = 500;
var svg = d3.select("body").append("svg")
.attr("width", width)
.attr("height", height);
// svg groups
var lines = svg.append("g").attr("class", "lines");
var points = svg.append("g").attr("class", "points");;
var sampler = svg.append("g").attr("class", "sampler");
// scales
var y = d3.scale.linear()
.domain([0, 1])
.range([height / 2 - 100, height / 2 + 100]);
var x = (function() {
var _x = d3.scale.linear().range([25, width * 3 / 4 - 50]);
return function(d) {
return _x.domain([0, n - 1 || 1])(d);
};
})();
// data
var n = 0;
var data = [];
var sample = [];
// sampler
var sampleValue = (function() {
sampler.append("rect")
.attr("width", 50)
.attr("height", 240)
.attr("x", width * 3 / 4 - 25)
.attr("y", height / 2 - 120);
var ticks = sampler.append("g");
var updateTicks = function() {
var ticksData = ticks.selectAll("line")
.data(sample, function(d) { return d.v; });
ticksData.enter()
.append("line")
.attr("x1", width * 3 / 4 - 25)
.attr("x2", width * 3 / 4 + 25)
.attr("y1", function(d) { return y(d.v); })
.attr("y2", function(d) { return y(d.v); });
ticksData.exit().remove();
};
return function(d) {
++n;
if (sample.length < k) {
sample.push(d);
updateTicks();
return true;
} else if (Math.random() < k / n) {
sample[Math.floor(Math.random() * k)] = d;
updateTicks();
return true;
} else {
return false;
}
};
})();
// data stream
(function() {
var ms = 1000 / hz;
var sampleAndUpdateLines = function(d, i) {
sampleValue({v: d, i: i});
this.classList.add("post");
points.selectAll(".post")
.data(data).transition()
.duration(ms)
.attr("cx", function(d, i) { return x(i); });
var linesData = lines.selectAll("line")
.data(sample, function(d) { return d.i; });
linesData.enter()
.append("line")
.attr("x1", width * 3 / 4 + 25)
.attr("x2", width * 3 / 4 + 25)
.attr("y1", height / 2 - 120)
.attr("y2", height / 2 + 120);
linesData.transition()
.duration(ms)
.attr("x1", function(d) { return x(d.i); })
.attr("x2", function(d) { return x(d.i); });
linesData.exit().remove();
};
window.setInterval(function() {
data.push(Math.random());
points.selectAll("circle")
.data(data).enter()
.append("circle")
.attr("r", 4)
.attr("cx", width)
.attr("cy", y)
.transition()
.duration(1000)
.ease("linear")
.attr("cx", width * 3 / 4 + 25)
.each("end", sampleAndUpdateLines);
}, ms);
})();
})();
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment