erochest · August 12, 2015 14:49
diff --git a/scratch.py b/scratch.py

 brown_tagged_sents = []
 simplified_tagged_sents = []
 for sent in brown_tagged_sents:
    sent_to_append = []
    for (word, tag) in sent:
        sent_to_append.append((word, tag[0]))
    simplified_tagged_sents.append(sent_to_append)


 # Abstract that into:

 def modify_tags(corpus, f):
    """Modify the tags in a corpus with f."""
    new_corpus = []
    for sent in corpus:
        new_corpus.append([(w, f(w, t)) for (w, t) in sent])
    return new_corpus


 modify_tags(brown_tagged_sents, lambda _, tag: tag[0])

 ##


 most_common_100 = {}


 def unk(word, tag):
    if word in most_common_100:
        return tag
    else:
        return 'UNK'

 modify_tags(brown_tagged_sents, unk)


 #

 [word for word in sentence]
 # =>
 words = []
 for word in sentence:
    words.append(word)

 [word for sentence in corpus for word in sentence]
 # =>
 words = []
 for sentence in corpus:
    for word in sentence:
        words.append(word)

 [word for sentence in corpus for word in sentence if word[0] in vowels]
 # =>
 words = []
 for sentence in corpus:
    for word in sentence:
        if word[0] in vowels:
            words.append(word)

 [word
 for sentence in corpus
 if len(sentence) > 10
 for word in sentence
 if word[0] in vowels]

 func(
    two,
    three,
    )

 # Link to "Evaluation of text classifier."
 # http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-text-classification-1.html
 #
 # It may be helpful for evaluating the training.

	brown_tagged_sents = []
	simplified_tagged_sents = []
	for sent in brown_tagged_sents:
	sent_to_append = []
	for (word, tag) in sent:
	sent_to_append.append((word, tag[0]))
	simplified_tagged_sents.append(sent_to_append)


	# Abstract that into:

	def modify_tags(corpus, f):
	"""Modify the tags in a corpus with f."""
	new_corpus = []
	for sent in corpus:
	new_corpus.append([(w, f(w, t)) for (w, t) in sent])
	return new_corpus


	modify_tags(brown_tagged_sents, lambda _, tag: tag[0])

	##


	most_common_100 = {}


	def unk(word, tag):
	if word in most_common_100:
	return tag
	else:
	return 'UNK'

	modify_tags(brown_tagged_sents, unk)


	#

	[word for word in sentence]
	# =>
	words = []
	for word in sentence:
	words.append(word)

	[word for sentence in corpus for word in sentence]
	# =>
	words = []
	for sentence in corpus:
	for word in sentence:
	words.append(word)

	[word for sentence in corpus for word in sentence if word[0] in vowels]
	# =>
	words = []
	for sentence in corpus:
	for word in sentence:
	if word[0] in vowels:
	words.append(word)

	[word
	for sentence in corpus
	if len(sentence) > 10
	for word in sentence
	if word[0] in vowels]

	func(
	two,
	three,
	)

	# Link to "Evaluation of text classifier."
	# http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-text-classification-1.html
	#
	# It may be helpful for evaluating the training.
No results found