Created
May 17, 2012 19:43
-
-
Save trjordan/2721175 to your computer and use it in GitHub Desktop.
Confidence intervals
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cProfile | |
import sys | |
import math | |
import numpy | |
import random | |
def mean(values): | |
if values: | |
return sum(values) / len(values) | |
else: | |
return None | |
def adaptive_confidence_interval(values, iterations=1000, alpha=0.05): | |
""" Get the confidence in as few iterations as possible, dynamically.""" | |
cur_iterations = 10 | |
try_larger = True | |
NUM_TRIES = 4 | |
THRESHOLD = 0.1 | |
while try_larger: | |
cis = [confidence_interval(values, iterations=cur_iterations, alpha=alpha) | |
for _ in range(NUM_TRIES)] | |
ranges = [d['upper'] - d['lower'] for d in cis] | |
frac_var = (max(ranges) - min(ranges)) / mean(ranges) | |
if frac_var > THRESHOLD and cur_iterations < iterations: | |
cur_iterations *= 2 | |
else: | |
# Just pick one and return it | |
return cis[0] | |
def confidence_interval(values, iterations=1000, alpha=0.05): | |
"""Return the confidence interval of the mean.""" | |
n = len(values) # Sample size to resample with | |
value = mean(values) # Estimator | |
# Bootstrap the standard error by drawing a bunch of choices from the given | |
# values, then calculating the variance of that synthetic distribution. | |
synthetic_values = [] | |
for _ in xrange(iterations): | |
choices = [values[random.randint(0, n-1)] for _ in xrange(n)] | |
current_mean = mean(choices) | |
synthetic_values.append(current_mean) | |
# Compute the confidence interval using pivotal intervals | |
synthetic_values.sort() | |
lower_index = int(math.floor(iterations * ( 1 - alpha / 2 ) )) | |
lower = 2 * value - synthetic_values[lower_index] | |
upper_index = int(math.floor(iterations * alpha / 2 )) | |
upper = 2 * value - synthetic_values[upper_index] | |
return {'lower': lower, | |
'upper': upper, | |
'value': value, | |
'num': len(values)} | |
def main(num_iterations): | |
values = [random.randint(0, 1000) for _ in xrange(num_iterations)] | |
return adaptive_confidence_interval(values) | |
if __name__ == '__main__': | |
if len(sys.argv) < 3: | |
print "Usage: python confidence <profiler> <num_iterations>" | |
exit(1) | |
profiler = sys.argv[1] | |
num_iterations = int(sys.argv[2]) | |
if profiler == 'cProfile': | |
cProfile.runctx('main(num_iterations)', globals(), locals(), | |
filename='confidence.py.cProfile') | |
elif profiler == 'statprof': | |
# Not in standard library -- pip install statprof | |
import statprof | |
statprof.start() | |
try: | |
main(num_iterations) | |
finally: | |
statprof.stop() | |
# No persistent output -- just display the results. | |
statprof.display() | |
elif profiler == 'line_profiler': | |
# Not in standard library -- pip install line_profiler | |
# | |
# CLI only. Add the @profile decorator to functions above, and run from | |
# the command line like so: | |
# kernprof.py -l confidence.py -o confidence.lprof | |
# And view the results: | |
# python -m line_profiler confidence.py.lprof | |
main(num_iterations) | |
print 'good' | |
else: | |
print 'unknown profiler type' | |
exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment