Last active
August 29, 2015 14:01
-
-
Save Radcliffe/f9c50f393af863ac8427 to your computer and use it in GitHub Desktop.
Nit-picking the birthday paradox
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# The birthday paradox predicts that in a group of 23 people, the probability | |
# is about 51% that two people share the same birthday. | |
# p = 1 - (1 - 1/365) * (1 - 2/365) * ... * (1 - 22/365) = 0.507297 | |
# This assumes that birthdays are distributed uniformly and independently | |
# among the 365 days of the year, excluding leap years. | |
# This program removes the assumption that birthdays are distributed | |
# uniformly. Instead, it uses empirical data collected by Roy Murphy | |
# (http://www.panix.com/~murphy/bday.html) | |
# Usage: birthday.py x y | |
# where x is the number of people (default = 23) | |
# and y is the number of iterations (default = 100000) | |
# | |
# Running this program with 23 people and one billion iterations | |
# yielded an empirical probaility of 0.507843500, which is quite close | |
# to the theoretical value. This confirms that the model assumption of | |
# equal distribution of birthdays is reasonable. | |
import sys | |
from random import randint | |
from bisect import bisect_left | |
from time import time | |
start = time() | |
ITERS = 10**5 | |
PEOPLE = 23 | |
nargs = len(sys.argv) | |
if nargs >= 2: | |
PEOPLE = int(sys.argv[1]) | |
if nargs >= 3: | |
ITERS = int(sys.argv[2]) | |
cumulative = [] | |
total = 0 | |
with open("bdata.txt", "r") as data: | |
data.readline() | |
for line in data.readlines(): | |
total += int(line.strip().split(' ')[1]) | |
cumulative.append(total) | |
total = cumulative.pop() / 2 | |
N = len(cumulative) | |
def birthday(cumulative, N, total, people): | |
count = [0] * N | |
for n in xrange(people): | |
bday = bisect_left(cumulative, randint(1, total)) | |
if count[bday] == 1: | |
return 1 | |
count[bday] = 1 | |
return 0 | |
shared = sum(birthday(cumulative, N, total, PEOPLE) for k in xrange(ITERS)) | |
print "In a group of %d people, the probability that two share" % PEOPLE | |
print "the same birthday is approximately" | |
print "%s out of %s." % (shared, ITERS) | |
print "Time: %.2f seconds" % (time() - start) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment