jrjames83 · January 24, 2018 03:53
diff --git a/python_sample_size_estimator_simulation.py b/python_sample_size_estimator_simulation.py
 # How to determine sample size through simulation
 import random
 from collections import Counter

 things = ['camel', 'horse', 'donkey', 'mule', 'bulldozer', 'chain', 'machete', 'tool' ,'border collie', 'widget']
 weights = [random.random() * 1000 for _ in range(10)]

 assert(len(things) == len(weights))

 universe = random.choices(things, weights=weights, k=100000)

 uni_counts = Counter(universe).most_common(10)
 for x, counts in uni_counts:
    print(x, counts/100000)
    
    
 border collie 0.21839
 tool 0.16326
 horse 0.14853
 mule 0.11849
 machete 0.10348
 bulldozer 0.09127
 chain 0.06403
 donkey 0.05163
 camel 0.02267
 widget 0.01825


 for item, frequency in Counter(random.sample(universe, k=400)).most_common(10):
    print(item, frequency/400)
    
 border collie 0.2125
 tool 0.1625
 horse 0.1525
 machete 0.1175
 mule 0.1125
 bulldozer 0.0825
 chain 0.065
 donkey 0.0425
 camel 0.0425
 widget 0.01
	# How to determine sample size through simulation
	import random
	from collections import Counter

	things = ['camel', 'horse', 'donkey', 'mule', 'bulldozer', 'chain', 'machete', 'tool' ,'border collie', 'widget']
	weights = [random.random() * 1000 for _ in range(10)]

	assert(len(things) == len(weights))

	universe = random.choices(things, weights=weights, k=100000)

	uni_counts = Counter(universe).most_common(10)
	for x, counts in uni_counts:
	print(x, counts/100000)


	border collie 0.21839
	tool 0.16326
	horse 0.14853
	mule 0.11849
	machete 0.10348
	bulldozer 0.09127
	chain 0.06403
	donkey 0.05163
	camel 0.02267
	widget 0.01825


	for item, frequency in Counter(random.sample(universe, k=400)).most_common(10):
	print(item, frequency/400)

	border collie 0.2125
	tool 0.1625
	horse 0.1525
	machete 0.1175
	mule 0.1125
	bulldozer 0.0825
	chain 0.065
	donkey 0.0425
	camel 0.0425
	widget 0.01