deanmalmgren · August 29, 2015 14:03
diff --git a/.gitignore b/.gitignore
 sample_a.dat
 sample_b.dat
diff --git a/README.md b/README.md
diff --git a/compare_sample_means.py b/compare_sample_means.py
 #!/usr/bin/env python

 """Given two files that contain a list of data values, compute the
 differences in the sample means with a bootstrapping approach
 """

 import random

 def read_data(filename):
    with open(filename) as stream:
        return map(float, stream.read().split())

 def calculate_sample_mean(n, data):
    sample = [random.choice(data) for i in xrange(n)]
    return sum(sample) / n

 m = 1000
 n = 100

 a_data = read_data('sample_a.dat')
 b_data = read_data('sample_b.dat')


 # calculate a bunch of sample means
 count = 0
 for j in xrange(m):
    a_mean = calculate_sample_mean(n, a_data)
    b_mean = calculate_sample_mean(n, b_data)
    if a_mean > b_mean:
        count += 1

 print("a_mean > b_mean %(count)d out of %(m)d times" % locals())
diff --git a/create_sample.py b/create_sample.py
 #!/usr/bin/env python

 """I don't currently have access to the movie or horse racing data, so
 use this script to create the group a and group b data sets that are
 used in subsequent steps.
 """

 import sys
 import random

 def write_sample(stream, n, lam):
    filename = stream.name
    sys.stderr.write((
        '%(filename)s with %(n)d values from exponential distribution'
        'with lam=%(lam)s\n'
    ) % locals())
    for i in xrange(n):
        x = random.expovariate(lam)
        stream.write(str(x) + '\n')

 with open('sample_a.dat', 'w') as stream:
    write_sample(stream, 100000, 1.5)

 with open('sample_b.dat', 'w') as stream:
    write_sample(stream, 20000, 1.1)
	#!/usr/bin/env python

	"""Given two files that contain a list of data values, compute the
	differences in the sample means with a bootstrapping approach
	"""

	import random

	def read_data(filename):
	with open(filename) as stream:
	return map(float, stream.read().split())

	def calculate_sample_mean(n, data):
	sample = [random.choice(data) for i in xrange(n)]
	return sum(sample) / n

	m = 1000
	n = 100

	a_data = read_data('sample_a.dat')
	b_data = read_data('sample_b.dat')


	# calculate a bunch of sample means
	count = 0
	for j in xrange(m):
	a_mean = calculate_sample_mean(n, a_data)
	b_mean = calculate_sample_mean(n, b_data)
	if a_mean > b_mean:
	count += 1

	print("a_mean > b_mean %(count)d out of %(m)d times" % locals())
	#!/usr/bin/env python

	"""I don't currently have access to the movie or horse racing data, so
	use this script to create the group a and group b data sets that are
	used in subsequent steps.
	"""

	import sys
	import random

	def write_sample(stream, n, lam):
	filename = stream.name
	sys.stderr.write((
	'%(filename)s with %(n)d values from exponential distribution'
	'with lam=%(lam)s\n'
	) % locals())
	for i in xrange(n):
	x = random.expovariate(lam)
	stream.write(str(x) + '\n')

	with open('sample_a.dat', 'w') as stream:
	write_sample(stream, 100000, 1.5)

	with open('sample_b.dat', 'w') as stream:
	write_sample(stream, 20000, 1.1)