language-engineering · October 24, 2012 11:41
diff --git a/gistfile1.py b/gistfile1.py
 from sussex_nltk import untag_sequence, extract_by_pos

 all_tags = r".+"
 all_nouns = r"N+"
 all_verbs = r"V+"
 all_adjectives = r"J+"

 example_tagged_words = [('The', 'DT'), ('little', 'JJ'), ('badgers', 'NNS'), ('ate', 'VBP'), ('some', 'DT'), ('jam', 'NN')]

 #Decide on some patterns to match
 unigram_regex = [all_nouns, all_adjectives]

 #Extract only those words that match the given patterns, and untag them
 features = untag_sequence(extract_by_pos(example_tagged_words, unigram_regex))

 print features
	from sussex_nltk import untag_sequence, extract_by_pos

	all_tags = r".+"
	all_nouns = r"N+"
	all_verbs = r"V+"
	all_adjectives = r"J+"

	example_tagged_words = [('The', 'DT'), ('little', 'JJ'), ('badgers', 'NNS'), ('ate', 'VBP'), ('some', 'DT'), ('jam', 'NN')]

	#Decide on some patterns to match
	unigram_regex = [all_nouns, all_adjectives]

	#Extract only those words that match the given patterns, and untag them
	features = untag_sequence(extract_by_pos(example_tagged_words, unigram_regex))

	print features