Skip to content

Instantly share code, notes, and snippets.

@uolter
Created February 7, 2016 10:38
Show Gist options
  • Save uolter/6b00a47d2f08bd95d670 to your computer and use it in GitHub Desktop.
Save uolter/6b00a47d2f08bd95d670 to your computer and use it in GitHub Desktop.
Map reduce example to classify cluster and values
__author__ = 'uolter'
import map_reduce
def mapper(input_key, input_value):
def cut_and_clean_value(cluster):
"""
:param cluster: string in the format <cluster>:<value>
:return: touple cluster and value. If value is NaN return 0
"""
ret = cluster.split(":")
if ret[1] == 'NaN':
ret[1] = 0.0
val = round(float(ret[1]), 2)
if val >= 0.80:
val = 4
elif val >= 0.60:
val = 3
elif val >= 0.40:
val = 2
elif val >= 0.20:
val = 1
else:
val = 0
return ret[0], val
return [cut_and_clean_value(c) for c in input_value.split(";")]
def reducer(intermediate_key, intermediate_value_list):
return intermediate_key, sum(intermediate_value_list)
def main():
input_file = 'text/input.txt'
i = {}
with open(input_file) as file:
data = file.readlines()
for d in data:
user_id = d[:9]
i[user_id] = d[10:]
print map_reduce.map_reduce(i, mapper, reducer)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment