Created
November 23, 2017 14:30
-
-
Save xfenix/d73b8d0808f28df306c9d4f7945fb53f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import logging | |
logger = logging.getLogger('wordsfrequency') | |
class WrongInputError(Exception): | |
pass | |
def count_words_frequency(text): | |
""" Counts words frequency in text. | |
Returns tuple, structure is illustrated by example - ((word, frequency), <...>). | |
Tuple is ordered by frequency in desc order. | |
Word with equal frequency ordered by alphabet. | |
Arguments: | |
text -- string | |
""" | |
words = text.split() | |
stats = dict() | |
for word in words: | |
word = word.lower() | |
if word not in stats: | |
stats[word] = 0 | |
stats[word] += 1 | |
# prepare stats for ordering | |
output = list(stats.items()) | |
# python sort is stable, so we need to sort stats by key in alphabetical order | |
output.sort(key=lambda item: item[0]) | |
# and then sort it by frequency (elements with equal frequency | |
# will not be changed, because sort is stable) | |
output.sort(key=lambda item: item[1], reverse=True) | |
return tuple(output) | |
if __name__ == "__main__": | |
file_data = None | |
try: | |
if(len(sys.argv) > 1): | |
source = sys.argv[1] | |
file_data = open(source).read() | |
else: | |
raise WrongInputError("Cant find file path in script arguments") | |
except IOError: | |
logger.error("""Error: cant read the file.\n\nAborting.""") | |
sys.exit() | |
except WrongInputError: | |
logger.error("""Error: Please, provide real path to the file as first argument.\n\n"""\ | |
"""For example: python words_frequency_count.py ./source.txt\n\n"""\ | |
"""Aborting.""") | |
sys.exit() | |
if file_data: | |
buf = [] | |
stats = count_words_frequency(file_data) | |
for row in stats: | |
buf.append('{}:{}'.format(*row)) | |
print("\n".join(buf)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment