Created
December 28, 2015 19:04
-
-
Save astrojuanlu/562d1ec55be970403442 to your computer and use it in GitHub Desktop.
Python implementation of Spark reduceByKey()
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import reduce | |
from itertools import groupby | |
def reduceByKey(func, iterable): | |
"""Reduce by key. | |
Equivalent to the Spark counterpart | |
Inspired by http://stackoverflow.com/q/33648581/554319 | |
1. Sort by key | |
2. Group by key yielding (key, grouper) | |
3. For each pair yield (key, reduce(func, last element of each grouper)) | |
""" | |
get_first = lambda p: p[0] | |
get_second = lambda p: p[1] | |
# iterable.groupBy(_._1).map(l => (l._1, l._2.map(_._2).reduce(func))) | |
return map( | |
lambda l: (l[0], reduce(func, map(get_second, l[1]))), | |
groupby(sorted(iterable, key=get_first), get_first) | |
) | |
if __name__ == '__main__': | |
# Example from https://www.safaribooksonline.com/library/view/learning-spark/9781449359034/ch04.html#idp7549488 | |
data = [(1, 2), (3, 4), (3, 6)] | |
expected_result = [(1, 2), (3, 10)] | |
assert list(reduceByKey(lambda x, y: x + y, data)) == expected_result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice hack dude!