Skip to content

Instantly share code, notes, and snippets.

@xfenix
Created November 23, 2017 14:30
Show Gist options
  • Save xfenix/d73b8d0808f28df306c9d4f7945fb53f to your computer and use it in GitHub Desktop.
Save xfenix/d73b8d0808f28df306c9d4f7945fb53f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import logging
logger = logging.getLogger('wordsfrequency')
class WrongInputError(Exception):
pass
def count_words_frequency(text):
""" Counts words frequency in text.
Returns tuple, structure is illustrated by example - ((word, frequency), <...>).
Tuple is ordered by frequency in desc order.
Word with equal frequency ordered by alphabet.
Arguments:
text -- string
"""
words = text.split()
stats = dict()
for word in words:
word = word.lower()
if word not in stats:
stats[word] = 0
stats[word] += 1
# prepare stats for ordering
output = list(stats.items())
# python sort is stable, so we need to sort stats by key in alphabetical order
output.sort(key=lambda item: item[0])
# and then sort it by frequency (elements with equal frequency
# will not be changed, because sort is stable)
output.sort(key=lambda item: item[1], reverse=True)
return tuple(output)
if __name__ == "__main__":
file_data = None
try:
if(len(sys.argv) > 1):
source = sys.argv[1]
file_data = open(source).read()
else:
raise WrongInputError("Cant find file path in script arguments")
except IOError:
logger.error("""Error: cant read the file.\n\nAborting.""")
sys.exit()
except WrongInputError:
logger.error("""Error: Please, provide real path to the file as first argument.\n\n"""\
"""For example: python words_frequency_count.py ./source.txt\n\n"""\
"""Aborting.""")
sys.exit()
if file_data:
buf = []
stats = count_words_frequency(file_data)
for row in stats:
buf.append('{}:{}'.format(*row))
print("\n".join(buf))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment