Last active
August 29, 2015 14:13
-
-
Save anandology/b85e010903d1853a1544 to your computer and use it in GitHub Desktop.
Python script to select winners at random using donation amount as weight
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Script to pick N random entries from a file using donation amount as weight. | |
USAGE: python winners.py filename num-winners | |
""" | |
import random | |
import sys | |
import argparse | |
import time | |
def weighted_choice(elements, weight_func): | |
"""Picks one element from given elements, weighted by the weight func. | |
The weight_func should take an element as argument and return weight as integer. | |
""" | |
d = [] | |
for e in elements: | |
weight = weight_func(e) | |
# add e weight times | |
d += [e] * weight | |
return random.choice(d) | |
def weighted_shuffle(elements, weight_func): | |
"""Returns iterator over elements randomly shuffled. | |
The weight_func argument should be a function that take an element as argument and returns its weight as integer. | |
""" | |
elements = elements[:] # make a copy of elements | |
while elements: | |
# pick one element using weighted_choice | |
e = weighted_choice(elements, weight_func) | |
yield e | |
# remove that elememt and continue picking with rest of the elements | |
elements.remove(e) | |
def take(n, seq): | |
it = iter(seq) | |
return list(it.next() for i in range(n)) | |
def compute_weight(record): | |
# amount is the last column | |
amount = int(record[-1]) | |
return 1 + amount / 500 | |
def parse_tsv(filename): | |
f = open(filename) | |
return [line.strip("\r\n").split("\t") for line in f if line.strip()] | |
def average(numbers): | |
return sum(numbers)/len(numbers) | |
def runtest(N, n, weightfunc, label): | |
numbers = range(1, N+1) | |
winners = take(n, weighted_shuffle(numbers, weightfunc)) | |
print "{0}\t{1}\t{2}".format(average(numbers), average(winners), label) | |
def test(): | |
print "A\tB\tC" | |
runtest(1000, 50, lambda n: 1, "weight 1") | |
runtest(1000, 50, lambda n: n/10, "weight n/10") | |
runtest(1000, 50, lambda n: n, "weight n") | |
runtest(1000, 50, lambda n: 1000-n, "weight 1000-n") | |
print "A - average of all numbers" | |
print "B - average of numbers selected in random using weight function" | |
print "C - weight function used" | |
def usage(): | |
print >> sys.stderr, "USAGE: python %s filename num-winners" % sys.argv[0] | |
def parse_arguments(): | |
p = argparse.ArgumentParser() | |
p.add_argument("--seed", type=int, help="seed for random number generator", default=0) | |
p.add_argument("filename", help="path to the file with records") | |
p.add_argument("n", type=int, help="number of records to select") | |
return p.parse_args() | |
def main(): | |
args = parse_arguments() | |
if not args.seed: | |
args.seed = int(time.time()) | |
#filename = sys.argv[1] | |
#n = int(sys.argv[2]) | |
random.seed(args.seed) | |
print >> sys.stderr, "using seed", args.seed | |
data = parse_tsv(args.filename) | |
winners = take(args.n, weighted_shuffle(data, compute_weight)) | |
for w in winners: | |
print "\t".join(w) | |
#print w[-3] | |
if __name__ == '__main__': | |
if "--test" in sys.argv: | |
test() | |
else: | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment