tokestermw · August 31, 2016 17:46
diff --git a/wilson_lower_bound.py b/wilson_lower_bound.py
 from __future__ import division


 def average(x):
    return sum(x) / len(x)


 def wilson_lower_bound(count, total, z_score=1.96):
    """ Implementation of Wilson Scores

    Ref: http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
    """
    assert 0 <= count <= total

    if total == 0:
        return 0

    phat = 1.0 * count / total

    base = phat + z_score ** 2 / (2 * total)
    diff = z_score * ((phat * (1 - phat) + z_score ** 2 / (4 * total)) / total) ** .5
    norm = 1 + z_score ** 2 / total
    return (base - diff) / norm


 if __name__ == '__main__':
    a = [0, 0, 0, 1, 1, 0, 1, 1]
    b = [0, 1]
    c = a * 2
    d =  [0, 1, 1, 1, 1, 1, 1, 1]

    avg_a = average(a)
    wlb_a = wilson_lower_bound(sum(a), len(a))

    avg_b = average(b)
    wlb_b = wilson_lower_bound(sum(b), len(b))

    avg_c = average(c)
    wlb_c = wilson_lower_bound(sum(c), len(c))

    avg_d = average(d)
    wlb_d = wilson_lower_bound(sum(d), len(d))

    assert avg_a == avg_b == avg_c, "averages should be equal"
    assert wlb_c > wlb_a > wlb_b, "but wilson lower bound should be higher when there is more data (i.e. lower variance)"
    assert wlb_d > wlb_c and avg_d > avg_c, "d should be highest"
	from __future__ import division


	def average(x):
	return sum(x) / len(x)


	def wilson_lower_bound(count, total, z_score=1.96):
	""" Implementation of Wilson Scores

	Ref: http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
	"""
	assert 0 <= count <= total

	if total == 0:
	return 0

	phat = 1.0 * count / total

	base = phat + z_score ** 2 / (2 * total)
	diff = z_score * ((phat * (1 - phat) + z_score ** 2 / (4 * total)) / total) ** .5
	norm = 1 + z_score ** 2 / total
	return (base - diff) / norm


	if __name__ == '__main__':
	a = [0, 0, 0, 1, 1, 0, 1, 1]
	b = [0, 1]
	c = a * 2
	d = [0, 1, 1, 1, 1, 1, 1, 1]

	avg_a = average(a)
	wlb_a = wilson_lower_bound(sum(a), len(a))

	avg_b = average(b)
	wlb_b = wilson_lower_bound(sum(b), len(b))

	avg_c = average(c)
	wlb_c = wilson_lower_bound(sum(c), len(c))

	avg_d = average(d)
	wlb_d = wilson_lower_bound(sum(d), len(d))

	assert avg_a == avg_b == avg_c, "averages should be equal"
	assert wlb_c > wlb_a > wlb_b, "but wilson lower bound should be higher when there is more data (i.e. lower variance)"
	assert wlb_d > wlb_c and avg_d > avg_c, "d should be highest"