Last active
January 30, 2023 15:11
-
-
Save Xonxt/e9258afd264b7b618f99488dd3a4a0e0 to your computer and use it in GitHub Desktop.
Python map-reduce
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def flat_map(iterable: list, map_function=None): | |
"""Applies a `map_function` to every element of the `iterable` input list | |
and then flattens all the results into a new list | |
Args: | |
iterable (list): An input list | |
map_function (callable, optional): A function that is applied to every element of the input list. Defaults to None. | |
Returns: | |
list: A new list | |
Example: | |
Given a list of strings, split every string into a list of word tokens: | |
texts = ['hello hoW are yOu', 'heLLo hello HOW Hello', 'aRE HELLO YOu Are'] | |
flat_map(texts, lambda t: t.split(" ")) | |
>>> ['hello', 'hoW', 'are', 'yOu', 'heLLo', 'hello', 'HOW', 'Hello', 'aRE', 'HELLO', 'YOu', 'Are'] | |
""" | |
assert callable(map_function), "Expecting a Map function" | |
ys = [] | |
for x in iterable: | |
ys.extend(map_function(x)) | |
return ys | |
def flat_map_recursive(iterable: list, map_function=None, flat_list=None): | |
"""Recursively applies a `map_function` to every element of the input list, that can have nested lists | |
Args: | |
iterable (list): An input list | |
map_function (callable, optional): A function that is applied to every element of the input list. Defaults to None. | |
Returns: | |
list: A new list | |
Example: | |
texts = ["111 2222 33333 4444", ["555 5 555", "666 66 66666"], ["777 77777777"], ["888 88888 8888888", ["999 9999999", "00000"]]] | |
flat_map_recursive(texts, lambda t: t.split(" ")) | |
>>> ['111', '2222', '33333', '4444', '555', '5', '555', '666', '66', '66666', '777', '77777777', '888', '88888', '8888888', '999', '9999999', '00000'] | |
""" | |
assert callable(map_function), "Expecting a Map function" | |
if flat_list is None: | |
flat_list = [] | |
if not isinstance(iterable, list): | |
flat_list.extend(map_function(iterable)) | |
else: | |
for x in iterable: | |
flat_map_recursive(x, map_function, flat_list) | |
return flat_list |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def map_reduce(iterable: list, map_function, reduce_function=None): | |
""" | |
Perform a Map-Reduce operation on an iterable (List) | |
Args: | |
iterable (List): An input list | |
map_function (callable): A lambda function to map the input list into pairs of <Key, Value> | |
reduce_function (callable, optional): Recuction lambda function to reduce the lists of Values for same Keys. Defaults to None. | |
Example: | |
Given a list of word, count how many times each word (in lower case) appears in the list | |
input_list = ['hello', 'hoW', 'are', 'heLLo', 'yOu', 'hello', 'HOW', 'Hello', 'aRE', 'HELLO', 'YOu', 'Are'] | |
result = map_reduce(input_list, | |
map_function=lambda x: (x.lower(), 1), | |
reduce_function=lambda a, b: a + b) | |
result | |
>>> {'hello': 5, 'how': 2, 'are': 3, 'you': 2} | |
map_reduce(input_list, lambda x: (x.lower(), 1), None) | |
>>> {'hello': [1, 1, 1, 1, 1], 'how': [1, 1], 'are': [1, 1, 1], 'you': [1, 1]} | |
Returns: | |
Dict: A dictionary of <Key, ReducedValue> or <Key, [V1, V2, ..., Vi]> if no `reduce_function` given | |
""" | |
assert callable(map_function), "Expecting a Map function" | |
if reduce_function is not None: | |
assert callable(reduce_function), "Expecting a proper Reduce function" | |
mapped = list(map(map_function, iterable)) | |
shuffled = {x[0]: list([y[1] for y in mapped if y[0] == x[0]]) for x in mapped} | |
if callable(reduce_function): | |
from functools import reduce | |
reduced = {k: reduce(reduce_function, vlist) for k,vlist in shuffled.items()} | |
return reduced | |
else: | |
return shuffled |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment