Last active
October 12, 2015 05:48
-
-
Save language-engineering/3979720 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sussex_nltk.tag import twitter_tag_batch | |
from sussex_nltk.corpus_readers import TwitterCorpusReader | |
from sussex_nltk.parse import dep_parse_sentences_arceager | |
tcr = TwitterCorpusReader() | |
# Get some (here 30) un-tokenised sentences from tweets | |
sents = tcr.sample_raw_sents(30) | |
# PoS tag the sentences (remember the twitter tagger | |
# also tokenises for you) | |
tagged_sents = twitter_tag_batch(sents) | |
# Dependency parse the sentences | |
parsed_sents = dep_parse_sentences_arceager(tagged_sents) | |
# Again, you have parsed sentences | |
for sentence in parsed_sents: | |
print "------" | |
print sentence |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment