stringertheory · April 19, 2019 21:49
diff --git a/skittles.py b/skittles.py
 import sys
 import collections
 import random

 N_TRIALS = 100000
 FLAVORS = ['Strawberry', 'Orange', 'Lemon', 'Apple', 'Grape']


 def chisquared(values):
    mean = sum(values) / float(len(values))
    return sum((v - mean)**2/mean for v in values)


 def read_file(filename='skittles.txt'):
    with open(filename) as infile:
        header = next(infile).strip().split()
        for line in infile:
            row = line.strip().split()
            yield dict(zip(header, [int(i) for i in row]))


 # ignore things not in FLAVORS (like "Uncounted")
 observed = collections.Counter()
 for row in read_file():
    for key, value in row.items():
        if key in FLAVORS:
            observed[key] += value

 print(observed, file=sys.stderr)
 print(chisquared(observed.values()), file=sys.stderr)

 for trial_number in range(N_TRIALS):

    # simulate uniform skittle flavors
    dist = collections.Counter()
    for i in range(sum(observed.values())):
        dist[FLAVORS[random.randint(0, len(FLAVORS) - 1)]] += 1

    print(chisquared(dist.values()), flush=True)
	import sys
	import collections
	import random

	N_TRIALS = 100000
	FLAVORS = ['Strawberry', 'Orange', 'Lemon', 'Apple', 'Grape']


	def chisquared(values):
	mean = sum(values) / float(len(values))
	return sum((v - mean)**2/mean for v in values)


	def read_file(filename='skittles.txt'):
	with open(filename) as infile:
	header = next(infile).strip().split()
	for line in infile:
	row = line.strip().split()
	yield dict(zip(header, [int(i) for i in row]))


	# ignore things not in FLAVORS (like "Uncounted")
	observed = collections.Counter()
	for row in read_file():
	for key, value in row.items():
	if key in FLAVORS:
	observed[key] += value

	print(observed, file=sys.stderr)
	print(chisquared(observed.values()), file=sys.stderr)

	for trial_number in range(N_TRIALS):

	# simulate uniform skittle flavors
	dist = collections.Counter()
	for i in range(sum(observed.values())):
	dist[FLAVORS[random.randint(0, len(FLAVORS) - 1)]] += 1

	print(chisquared(dist.values()), flush=True)