mamonu · April 9, 2020 17:32
diff --git a/browncorpuswordcount.py b/browncorpuswordcount.py

 import nltk
 import string

 # nltk.download('brown')      
 # if nltk hasnt been used before this will download the brown corpus

 from nltk.corpus import brown
 from collections import Counter
 import pandas as pd 

 words = brown.words()

 # all words to lowercase 

 lcwords = [str(word).lower() for word in words]

 # get punctuation out
 lcwords = [''.join(c for c in s if c not in string.punctuation) for s in lcwords]

 wordcount=Counter(lcwords)

 df = pd.DataFrame.from_dict(wordcount, orient='index').reset_index()
 df = df.rename(columns={'index':'word', 0:'count'})
 df= df.sort_values(by=['count'],ascending=False)
 print(df.head(40))

	import nltk
	import string

	# nltk.download('brown')
	# if nltk hasnt been used before this will download the brown corpus

	from nltk.corpus import brown
	from collections import Counter
	import pandas as pd

	words = brown.words()

	# all words to lowercase

	lcwords = [str(word).lower() for word in words]

	# get punctuation out
	lcwords = [''.join(c for c in s if c not in string.punctuation) for s in lcwords]

	wordcount=Counter(lcwords)

	df = pd.DataFrame.from_dict(wordcount, orient='index').reset_index()
	df = df.rename(columns={'index':'word', 0:'count'})
	df= df.sort_values(by=['count'],ascending=False)
	print(df.head(40))
No results found