language-engineering · October 12, 2015 05:47
diff --git a/gistfile1.py b/gistfile1.py
 from sussex_nltk.corpus_readers import TwitterCorpusReader
 from sussex_nltk.parse import dep_parse_sentences_arceager
 from nltk.tokenize import word_tokenize
 from nltk import pos_tag

 tcr = TwitterCorpusReader()

 # Get some (here 30) un-tokenised sentences from tweets
 sents = tcr.sample_raw_sents(30) 

 # Tokenise and PoS tag the sentences
 # Notice the round brackets instead of square brackets. This is a generator 
 # expression. It acts quite like a list, but instead of computing all list 
 # elements and storing all in memory, it only does one at a time.
 # Therefore "tagged_sents" is a generator, not a list
 tagged_sents = (pos_tag(word_tokenize(sentence)) for sentence in sents) 

 # Dependency parse the sentences
 parsed_sents = dep_parse_sentences_arceager(tagged_sents)

 # Now you can inspect the results by printing the sentences as in the 
 # previous section'''
 for sentence in parsed_sents:
    print "-----"
    print sentence
	from sussex_nltk.corpus_readers import TwitterCorpusReader
	from sussex_nltk.parse import dep_parse_sentences_arceager
	from nltk.tokenize import word_tokenize
	from nltk import pos_tag

	tcr = TwitterCorpusReader()

	# Get some (here 30) un-tokenised sentences from tweets
	sents = tcr.sample_raw_sents(30)

	# Tokenise and PoS tag the sentences
	# Notice the round brackets instead of square brackets. This is a generator
	# expression. It acts quite like a list, but instead of computing all list
	# elements and storing all in memory, it only does one at a time.
	# Therefore "tagged_sents" is a generator, not a list
	tagged_sents = (pos_tag(word_tokenize(sentence)) for sentence in sents)

	# Dependency parse the sentences
	parsed_sents = dep_parse_sentences_arceager(tagged_sents)

	# Now you can inspect the results by printing the sentences as in the
	# previous section'''
	for sentence in parsed_sents:
	print "-----"
	print sentence