Skip to content

Instantly share code, notes, and snippets.

@simon-engledew
Last active August 8, 2019 08:06
Show Gist options
  • Save simon-engledew/dfa9fdafd82cf126428853c1df5abe05 to your computer and use it in GitHub Desktop.
Save simon-engledew/dfa9fdafd82cf126428853c1df5abe05 to your computer and use it in GitHub Desktop.
Rolling Median
import itertools
import functools
import collections
class StatisticsError(ValueError):
pass
def _key(pair):
k, _ = pair
return k
def _inc(acc, item):
acc[item] += 1
return acc
def _count(items):
"""
reduce the items into (item, frequency) pairs
"""
return functools.reduce(_inc, items, collections.defaultdict(int)).items()
def _odd(pairs, mid):
# [1, 2, 3, 4, 5]
# ---
n = mid
for key, count in pairs:
n -= count
if n < 0:
return key
raise StopIteration
def _even(pairs, mid):
# [1, 2, 3, 4, 5, 6]
# ----
n = mid - 1
ipairs = iter(pairs)
for key, count in ipairs:
n -= count
if n < 0:
if n < -1 and count > 1:
# this bucket contains both values
return key / 1
# lookahead to the next bucket for second value
return (key + _key(next(ipairs))) / 2
raise StopIteration
def _reduce(pairs):
if not len(pairs):
raise StatisticsError("no median for empty data")
size = sum(value for key, value in pairs)
fn = _even if size % 2 == 0 else _odd
return fn(pairs, size // 2)
def median(items):
return _reduce(
sorted(_count(items), key=_key)
)
from main import median, StatisticsError
import pytest
import statistics
import hypothesis.strategies
@pytest.mark.parametrize('values', (
[0, 0, 1, 1],
[1, 2, 2, 20, 100, 5, 2],
[1, 2, 2, 20, 100, 5, 2, 1],
[1, 2],
[1, 1],
[1],
[1, 2, 2, 20, 100, 1000, 500, 500, 500, 10000],
))
def test_median(values):
assert median(iter(values)) == statistics.median(iter(values))
def test_median_raises():
with pytest.raises(StatisticsError):
median([])
@hypothesis.given(values=hypothesis.strategies.lists(hypothesis.strategies.integers(), min_size=1))
def test_fuzz_median(values):
assert median(iter(values)) == statistics.median(iter(values))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment