simon-engledew · August 8, 2019 08:06
diff --git a/main.py b/main.py
 import itertools
 import functools
 import collections


 class StatisticsError(ValueError):
    pass


 def _key(pair):
    k, _ = pair
    return k


 def _inc(acc, item):
    acc[item] += 1
    return acc


 def _count(items):
    """
    reduce the items into (item, frequency) pairs
    """
    return functools.reduce(_inc, items, collections.defaultdict(int)).items()


 def _odd(pairs, mid):
    # [1, 2, 3, 4, 5]
    #       ---
    n = mid
    for key, count in pairs:
        n -= count
        if n < 0:
            return key
    raise StopIteration


 def _even(pairs, mid):
    # [1, 2, 3, 4, 5, 6]
    #        ----
    n = mid - 1
    ipairs = iter(pairs)

    for key, count in ipairs:
        n -= count
        if n < 0:
            if n < -1 and count > 1:
                # this bucket contains both values
                return key / 1
            # lookahead to the next bucket for second value
            return (key + _key(next(ipairs))) / 2
    raise StopIteration


 def _reduce(pairs):
    if not len(pairs):
        raise StatisticsError("no median for empty data")

    size = sum(value for key, value in pairs)
    fn = _even if size % 2 == 0 else _odd

    return fn(pairs, size // 2)


 def median(items):
    return _reduce(
        sorted(_count(items), key=_key)
    )
diff --git a/main_test.py b/main_test.py
 from main import median, StatisticsError
 import pytest
 import statistics
 import hypothesis.strategies


 @pytest.mark.parametrize('values', (
    [0, 0, 1, 1],
    [1, 2, 2, 20, 100, 5, 2],
    [1, 2, 2, 20, 100, 5, 2, 1],
    [1, 2],
    [1, 1],
    [1],
    [1, 2, 2, 20, 100, 1000, 500, 500, 500, 10000],
 ))
 def test_median(values):
    assert median(iter(values)) == statistics.median(iter(values))


 def test_median_raises():
    with pytest.raises(StatisticsError):
        median([])


 @hypothesis.given(values=hypothesis.strategies.lists(hypothesis.strategies.integers(), min_size=1))
 def test_fuzz_median(values):
    assert median(iter(values)) == statistics.median(iter(values))
	import itertools
	import functools
	import collections


	class StatisticsError(ValueError):
	pass


	def _key(pair):
	k, _ = pair
	return k


	def _inc(acc, item):
	acc[item] += 1
	return acc


	def _count(items):
	"""
	reduce the items into (item, frequency) pairs
	"""
	return functools.reduce(_inc, items, collections.defaultdict(int)).items()


	def _odd(pairs, mid):
	# [1, 2, 3, 4, 5]
	# ---
	n = mid
	for key, count in pairs:
	n -= count
	if n < 0:
	return key
	raise StopIteration


	def _even(pairs, mid):
	# [1, 2, 3, 4, 5, 6]
	# ----
	n = mid - 1
	ipairs = iter(pairs)

	for key, count in ipairs:
	n -= count
	if n < 0:
	if n < -1 and count > 1:
	# this bucket contains both values
	return key / 1
	# lookahead to the next bucket for second value
	return (key + _key(next(ipairs))) / 2
	raise StopIteration


	def _reduce(pairs):
	if not len(pairs):
	raise StatisticsError("no median for empty data")

	size = sum(value for key, value in pairs)
	fn = _even if size % 2 == 0 else _odd

	return fn(pairs, size // 2)


	def median(items):
	return _reduce(
	sorted(_count(items), key=_key)
	)
	from main import median, StatisticsError
	import pytest
	import statistics
	import hypothesis.strategies


	@pytest.mark.parametrize('values', (
	[0, 0, 1, 1],
	[1, 2, 2, 20, 100, 5, 2],
	[1, 2, 2, 20, 100, 5, 2, 1],
	[1, 2],
	[1, 1],
	[1],
	[1, 2, 2, 20, 100, 1000, 500, 500, 500, 10000],
	))
	def test_median(values):
	assert median(iter(values)) == statistics.median(iter(values))


	def test_median_raises():
	with pytest.raises(StatisticsError):
	median([])


	@hypothesis.given(values=hypothesis.strategies.lists(hypothesis.strategies.integers(), min_size=1))
	def test_fuzz_median(values):
	assert median(iter(values)) == statistics.median(iter(values))