Created
August 12, 2015 14:49
-
-
Save erochest/12beee208e497c461757 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| brown_tagged_sents = [] | |
| simplified_tagged_sents = [] | |
| for sent in brown_tagged_sents: | |
| sent_to_append = [] | |
| for (word, tag) in sent: | |
| sent_to_append.append((word, tag[0])) | |
| simplified_tagged_sents.append(sent_to_append) | |
| # Abstract that into: | |
| def modify_tags(corpus, f): | |
| """Modify the tags in a corpus with f.""" | |
| new_corpus = [] | |
| for sent in corpus: | |
| new_corpus.append([(w, f(w, t)) for (w, t) in sent]) | |
| return new_corpus | |
| modify_tags(brown_tagged_sents, lambda _, tag: tag[0]) | |
| ## | |
| most_common_100 = {} | |
| def unk(word, tag): | |
| if word in most_common_100: | |
| return tag | |
| else: | |
| return 'UNK' | |
| modify_tags(brown_tagged_sents, unk) | |
| # | |
| [word for word in sentence] | |
| # => | |
| words = [] | |
| for word in sentence: | |
| words.append(word) | |
| [word for sentence in corpus for word in sentence] | |
| # => | |
| words = [] | |
| for sentence in corpus: | |
| for word in sentence: | |
| words.append(word) | |
| [word for sentence in corpus for word in sentence if word[0] in vowels] | |
| # => | |
| words = [] | |
| for sentence in corpus: | |
| for word in sentence: | |
| if word[0] in vowels: | |
| words.append(word) | |
| [word | |
| for sentence in corpus | |
| if len(sentence) > 10 | |
| for word in sentence | |
| if word[0] in vowels] | |
| func( | |
| two, | |
| three, | |
| ) | |
| # Link to "Evaluation of text classifier." | |
| # http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-text-classification-1.html | |
| # | |
| # It may be helpful for evaluating the training. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment