Skip to content

Instantly share code, notes, and snippets.

@sangheestyle
Last active August 29, 2015 14:07
Show Gist options
  • Select an option

  • Save sangheestyle/dc096c43640c2fae4842 to your computer and use it in GitHub Desktop.

Select an option

Save sangheestyle/dc096c43640c2fae4842 to your computer and use it in GitHub Desktop.
nlp hw5
from collections import defaultdict
from nltk.corpus import brown
print ">>> Procssing"
word_tag = defaultdict(set)
for word, tag in brown.tagged_words():
word_tag[word].add(tag)
num_words_by_num_tags = defaultdict(int)
words_with_max_tags = [None, 0]
for word, tags in word_tag.iteritems():
num_words_by_num_tags[len(tags)] += 1
if len(tags) > words_with_max_tags[1]:
words_with_max_tags = [word, len(tags), tags]
file_name = "ans_hw5_1_2.txt"
f1 = open(file_name, "wb")
print >>f1, ">>> Ans 1.2.1"
h1 ="# distinct tags"
h2 = "# distinct words"
print >>f1, "%-*s %-*s" % (len(" "), h1, len(" "), h2)
for key, val in num_words_by_num_tags.iteritems():
print >>f1, "%-*d %-*d" % (len(h1), key, len(h2), val)
print >>f1, "\n>>> Ans 1.2.2"
max_word, max_tags = words_with_max_tags[0], words_with_max_tags[2]
print >>f1, max_word, "has", len(max_tags), "distinct tags."
print >>f1, "tags:",
for tag in max_tags: print >>f1, tag, "",
print >>f1, "\nSentences from the corpus containing the word, one for each possible tag.\n"
for index, tag in enumerate(max_tags):
for sent in brown.tagged_sents():
if (max_word, tag) in sent:
print >>f1, "#", index+1, ":", max_word, "as", tag
print >>f1, " ".join(zip(*sent)[0])
print >>f1
break
f1.close()
print "<<< Done, check", file_name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment