Skip to content

Instantly share code, notes, and snippets.

@anandology
Created May 18, 2013 09:55
Show Gist options
  • Save anandology/5603909 to your computer and use it in GitHub Desktop.
Save anandology/5603909 to your computer and use it in GitHub Desktop.
"""Example to compute word frequency using simple map/reduce utility from openlibrary.
https://github.com/internetarchive/openlibrary/tree/master/openlibrary/data/mapreduce.py
"""
import sys
import logging
from openlibrary.data import mapreduce
class WordFrequecy(mapreduce.Task):
def map(self, key, value):
words = value.split()
for w in words:
yield w, ""
def reduce(self, key, values):
return key, sum(1 for v in values)
def read_files(filenames):
for filename in filenames:
for line in open(filename):
yield line
def main():
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
lines = read_files(sys.argv[1:])
records = enumerate(lines)
task = WordFrequecy()
for w, count in task.process(records):
print w, count
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment