Skip to content

Instantly share code, notes, and snippets.

@dkuebric
Created April 3, 2013 13:33
Show Gist options
  • Save dkuebric/5301243 to your computer and use it in GitHub Desktop.
Save dkuebric/5301243 to your computer and use it in GitHub Desktop.
sampling stats
import math
import matplotlib
import matplotlib.pyplot as plt
def confidence_for_sampling(pop_size=1000, con_level=0.99, sample_rate=0.1):
if con_level not in (0.99, 0.95):
raise Exception("don't know those zvals")
zval = 1.96 if con_level == 0.95 else 2.58
variance = (pop_size * sample_rate) * (1 - sample_rate)
stdev = math.sqrt(variance)
mean = pop_size * sample_rate
con_int = (stdev * zval) / mean
return con_int
def confidence_by_sampling(pop_size, con_level):
x = []
y = []
for s in xrange(1, 100, 1):
con_int = confidence_for_sampling(pop_size, con_level, sample_rate=s/100.0)
x.append(s)
y.append(con_int)
return (x,y)
def main():
con_level = 0.95
pop_size = 20*1000
(x,y) = confidence_by_sampling(pop_size, con_level)
x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False)
axes = plt.gca()
axes.xaxis.set_major_formatter(x_formatter)
axes.ticklabel_format(style='plain',axis='x')
plt.title('Effect of sample rate on %% error at pop_size=%d' % (pop_size,))
plt.xlabel('Sample rate (%)')
plt.ylabel('%% error (confidence interval) at %d%% confidence level' % (con_level*100,))
plt.plot(x,y)
plt.show()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment