Created
January 27, 2017 16:16
-
-
Save vlad-bezden/957f8275da18297afee5b305191687fa to your computer and use it in GitHub Desktop.
Calculate number of words in sentences using map/reduce and sorting dictionary by value in descending order
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Calculating number of words and number of times they occurred in sentences | |
Example of how to use map, reduce, and sorting dictionary by value | |
Available functions: | |
- count_words: counts how many times each word occurred in the sentence | |
- combine_counts: combines two dictionaries. In other word sums up number of | |
times word occurred in each sentence | |
''' | |
from functools import reduce | |
def count_words(sentence): | |
'''Counts number of words in sentence | |
Args: | |
sentence: target sentence to be processed | |
Returns: | |
Dictionary of words and number of times it occurred in sentence | |
''' | |
normalized_doc = ''.join(c.lower() | |
if c.isalpha() | |
else ' ' for c in sentence) | |
frequencies = {} | |
for word in normalized_doc.split(): | |
frequencies[word] = frequencies.get(word, 0) + 1 | |
return frequencies | |
def combine_counts(d1, d2): | |
'''Combines two dictionaries in one. | |
Each processed sentence data stored in dictionary. | |
This function combines two dictionaries with correct | |
number of words occurred. | |
Args: | |
d1: first dictionary | |
d2: second dictionary | |
Result: | |
Combined dictionary that contains sum of words | |
and number of times they occurred | |
''' | |
d = d1.copy() | |
for word, count in d2.items(): | |
d[word] = d.get(word, 0) + count | |
return d | |
# Words from Python manifesto example | |
manifesto = [ | |
"Beautiful is better than ugly.", | |
"Explicit is better than implicit.", | |
"Simple is better than complex.", | |
"Complex is better than complicated.", | |
"Flat is better than nested.", | |
"Sparse is better than dense.", | |
"Readability counts.", | |
"Special cases aren't special enough to break the rules.", | |
"Although practicality beats purity.", | |
"Errors should never pass silently.", | |
"Unless explicitly silenced.", | |
"In the face of ambiguity, refuse the temptation to guess.", | |
"There should be one-- and preferably only one --obvious way to do it.", | |
"Although that way may not be obvious at first unless you're Dutch.", | |
"Now is better than never.", | |
"Although never is often better than *right* now.", | |
"If the implementation is hard to explain, it's a bad idea.", | |
"If the implementation is easy to explain, it may be a good idea.", | |
"Namespaces are one honking great idea -- let's do more of those!" | |
] | |
def main(): | |
'''Main entries of the module''' | |
# get list of dictionaries for each sentence | |
words = map(count_words, manifesto) | |
# get total number of words and number of times they occurred | |
total_counts = reduce(combine_counts, words) | |
# print result | |
for w, c in sorted(total_counts.items(), key=lambda x: x[1], reverse=True): | |
#print('{0:15}{1}'.format(w, c)) # Python less than 3.6 | |
print(f'{w:15}{c}') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment