kshepp · July 6, 2018 07:35
diff --git a/nltk_parts_of_speech.py b/nltk_parts_of_speech.py
 from __future__ import division
 import nltk, re
 from nltk import FreqDist
 from nltk import word_tokenize 
 from nltk.tokenize import RegexpTokenizer
 from nltk.corpus import brown 

 tokenizer = RegexpTokenizer(r'\w+') # Takes out the punctuation that Python can't read
 f= open('Conservative.txt').read().decode('utf-8') # open the file
 text = word_tokenize(f)

 nouns = set()
 for word, pos in nltk.pos_tag(text): # remove the call to nltk.pos_tag if `sentence` is a list of tuples as described above
    if pos in ['NN', 'ADJ']: # can change tags here to other parts of speech
        nouns.add(word)

 freqs = nltk.FreqDist([w.lower() for w in nouns])

 print freqs.most_common(50)
	from __future__ import division
	import nltk, re
	from nltk import FreqDist
	from nltk import word_tokenize
	from nltk.tokenize import RegexpTokenizer
	from nltk.corpus import brown

	tokenizer = RegexpTokenizer(r'\w+') # Takes out the punctuation that Python can't read
	f= open('Conservative.txt').read().decode('utf-8') # open the file
	text = word_tokenize(f)

	nouns = set()
	for word, pos in nltk.pos_tag(text): # remove the call to nltk.pos_tag if `sentence` is a list of tuples as described above
	if pos in ['NN', 'ADJ']: # can change tags here to other parts of speech
	nouns.add(word)

	freqs = nltk.FreqDist([w.lower() for w in nouns])

	print freqs.most_common(50)
No results found