Last active
October 9, 2018 22:40
-
-
Save seejohnrun/5291246 to your computer and use it in GitHub Desktop.
Select (n) random elements from a weighted set randomly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ported from: | |
http://stackoverflow.com/questions/2140787/select-random-k-elements-from-a-list-whose-elements-have-weights | |
// each node in the heap has a value, weight, and totalWeight | |
// the totalWeight is the weight of the node plus any children | |
var Node = {}; | |
var newNode = function (value, weight, totalWeight) { | |
var node = Object.create(Node); | |
node.value = value; | |
node.weight = weight; | |
node.totalWeight = totalWeight; | |
return node; | |
}; | |
// h is the heap, it's like a binary tree that lives in an array | |
// it has a node for each pair in #items. h[1] is the root. Each | |
// other node h[i] has a parent at h[i >> 1]. To get this nice simple | |
// arithmetic, we have to leave h[0] vacant. | |
var rwsHeap = function (items) { | |
// Leave h[0] vacant | |
var h = [undefined], weight; | |
for (var value in items) { | |
weight = items[value]; | |
h.push(newNode(value, weight, weight)); | |
} | |
// Total up the total weights (add h[i]'s total to the parent) | |
for (var i = h.length - 1; i > 1; i--) { // TODO check over equality | |
h[i >> 1].totalWeight += h[i].totalWeight; | |
} | |
return h; | |
}; | |
var rwsHeapPop = function (h) { | |
// Start with a random amount of gas | |
var gas = h[1].totalWeight * Math.random(); | |
var i = 1; | |
// Start driving at the root | |
// While we have enough gas to go past node i | |
while (gas > h[i].weight) { | |
gas -= h[i].weight; | |
i = i << 1; | |
// Move to the first child | |
// If we have enough gas, drive past first child and descendents | |
// And to the next child | |
if (gas > h[i].totalWeight) { | |
gas -= h[i].totalWeight; | |
i += 1; | |
} | |
} | |
// h[1] is the selected node | |
var w = h[i].weight; | |
var v = h[i].value; | |
// Make sure h[i] is not chosen again | |
h[i].weight = 0; | |
// And clean up the total weights to re-distribute | |
while (i !== 0) { | |
h[i].totalWeight -= w; | |
i = i >> 1; | |
} | |
// Return the selected element | |
return v; | |
}; | |
// Create a heap and select #n elements from it | |
// @return array of selected elements | |
var randomWeightedSampleNoReplacement = function (items, n) { | |
var h = rwsHeap(items); | |
var sel = []; | |
var totalItems = Object.keys(items).length; | |
for (var i = 0; i < n && i < totalItems; i++) { | |
sel.push(rwsHeapPop(h)); | |
} | |
return sel; | |
}; | |
// | |
// Usage | |
// | |
var results = randomWeightedSampleNoReplacement({ | |
'a': 24, | |
'b': 1 | |
}, 2); | |
console.log(results); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is a rather egregious bug with this implementation, which I have fixed:
https://gist.github.com/Mumbleskates/75aa2a799eb536f7227d
http://stackoverflow.com/revisions/2149533/10
The comparisons (
gas > value
) are currently written for random generators that return in the interval [0, 1), orceil()
integer values. As this is definitely not the case, the head of the heap is disproportionately likely to be chosen; with integer weights, this algorithm will even return the heap head if it has 0 weight. Usinggas >= value
fixes this issue.