Created
February 7, 2016 10:38
-
-
Save uolter/6b00a47d2f08bd95d670 to your computer and use it in GitHub Desktop.
Map reduce example to classify cluster and values
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'uolter' | |
import map_reduce | |
def mapper(input_key, input_value): | |
def cut_and_clean_value(cluster): | |
""" | |
:param cluster: string in the format <cluster>:<value> | |
:return: touple cluster and value. If value is NaN return 0 | |
""" | |
ret = cluster.split(":") | |
if ret[1] == 'NaN': | |
ret[1] = 0.0 | |
val = round(float(ret[1]), 2) | |
if val >= 0.80: | |
val = 4 | |
elif val >= 0.60: | |
val = 3 | |
elif val >= 0.40: | |
val = 2 | |
elif val >= 0.20: | |
val = 1 | |
else: | |
val = 0 | |
return ret[0], val | |
return [cut_and_clean_value(c) for c in input_value.split(";")] | |
def reducer(intermediate_key, intermediate_value_list): | |
return intermediate_key, sum(intermediate_value_list) | |
def main(): | |
input_file = 'text/input.txt' | |
i = {} | |
with open(input_file) as file: | |
data = file.readlines() | |
for d in data: | |
user_id = d[:9] | |
i[user_id] = d[10:] | |
print map_reduce.map_reduce(i, mapper, reducer) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment