Skip to content

Instantly share code, notes, and snippets.

@sakethramanujam
Created April 23, 2020 09:06
Show Gist options
  • Save sakethramanujam/1c370fb73b416c7ef306771bd2ad72e8 to your computer and use it in GitHub Desktop.
Save sakethramanujam/1c370fb73b416c7ef306771bd2ad72e8 to your computer and use it in GitHub Desktop.
Get Top Words!
import argparse
import re
from typing import Dict
def args():
parser = argparse.ArgumentParser()
parser.add_argument(
'-f', '--file', help='Name of file to count words from')
parser.add_argument('-n', '--N', help='Number of top items', type=int)
return parser.parse_args()
def _get_words(lines) -> Dict[str, int]:
words = {}
for l in lines:
ws = l.lower().split()
ws = [re.sub('[()“”‘#.,?!]', '', w) for w in ws]
for w in ws:
if not w in words.keys():
words[w] = 1
elif w in words.keys():
words[w] = words.get(w)+1
return words
def _get_top(words: dict, howmany: int = 10):
sorted_words = sorted(words, key=words.get, reverse=True)
for i in range(howmany):
key = sorted_words[i]
print(key, words.get(key))
def main():
argu = args()
f = open(argu.file, 'r')
lines = f.readlines()
words = _get_words(lines=lines)
_get_top(words=words, howmany=argu.N)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment