Skip to content

Instantly share code, notes, and snippets.

@erochest
Created August 12, 2015 14:49
Show Gist options
  • Select an option

  • Save erochest/12beee208e497c461757 to your computer and use it in GitHub Desktop.

Select an option

Save erochest/12beee208e497c461757 to your computer and use it in GitHub Desktop.
brown_tagged_sents = []
simplified_tagged_sents = []
for sent in brown_tagged_sents:
sent_to_append = []
for (word, tag) in sent:
sent_to_append.append((word, tag[0]))
simplified_tagged_sents.append(sent_to_append)
# Abstract that into:
def modify_tags(corpus, f):
"""Modify the tags in a corpus with f."""
new_corpus = []
for sent in corpus:
new_corpus.append([(w, f(w, t)) for (w, t) in sent])
return new_corpus
modify_tags(brown_tagged_sents, lambda _, tag: tag[0])
##
most_common_100 = {}
def unk(word, tag):
if word in most_common_100:
return tag
else:
return 'UNK'
modify_tags(brown_tagged_sents, unk)
#
[word for word in sentence]
# =>
words = []
for word in sentence:
words.append(word)
[word for sentence in corpus for word in sentence]
# =>
words = []
for sentence in corpus:
for word in sentence:
words.append(word)
[word for sentence in corpus for word in sentence if word[0] in vowels]
# =>
words = []
for sentence in corpus:
for word in sentence:
if word[0] in vowels:
words.append(word)
[word
for sentence in corpus
if len(sentence) > 10
for word in sentence
if word[0] in vowels]
func(
two,
three,
)
# Link to "Evaluation of text classifier."
# http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-text-classification-1.html
#
# It may be helpful for evaluating the training.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment