Skip to content

Instantly share code, notes, and snippets.

View language-engineering's full-sized avatar

language-engineering

View GitHub Profile
class Tester(object):
def __init__(self):
self.n = 5
def inc(self,x):
self.n+=x
def do_math(self):
print self.n * 3 + 5
test = Tester()
test.inc(2)
class Tester(object):
def __init__(self):
self.n = 5
def inc(self,x):
self.n+=x
def do_math(self):
print self.n * 3 + 5
test = Tester()
test.inc(2)
from corpus_readers import AmazonReviewCorpusReader #import reader
arcr = AmazonReviewCorpusReader() #create new reader
positive_reviews = arcr.positive() #store a reader pointing at positive reviews
negative_reviews = arcr.negative()
dvd_reviews = arcr.category("dvd")
positive_dvd_reviews = dvd_reviews.positive()
for review in positive_dvd_reviews.reviews():
print review.raw()
from corpus_readers import AmazonReviewCorpusReader #import reader
arcr = AmazonReviewCorpusReader() #create new reader
positive_reviews = arcr.positive() #store a reader pointing at positive reviews
negative_reviews = arcr.negative()
dvd_reviews = arcr.category("dvd")
positive_dvd_reviews = dvd_reviews.positive()
for review in positive_dvd_reviews.reviews():
print review.raw()
import os, sys
sys.path.append(os.path.join("t:\\", "Departments", "Informatics", "LanguageEngineering"))
from corpus_readers import AmazonReviewCorpusReader
arcr = AmazonReviewCorpusReader( )
positive_reviews = arcr.positive( )
negative_reviews = arcr.negative( )
dvd_reviews = arcr.category( "dvd" )
from sussex_nltk.corpus_readers import AmazonReviewCorpusReader #import reader class
arcr = AmazonReviewCorpusReader() #create new reader
positive_reviews = arcr.positive() #store a reader pointing at all positive reviews
negative_reviews = arcr.negative() #pointing at all negative
dvd_reviews = arcr.category("dvd") #pointing at all dvd
positive_dvd_reviews = dvd_reviews.positive() #pointing at all postive dvd
#get a sample of the tokens in positive dvd reviews using your 5-digit candidate number
from nltk import sent_tokenize
sentences = sent_tokenize(document) #segment a single string (our document) into a list of sentences and store in "sents"
from sussex_nltk.corpus_readers import WSJCorpusReader #import the corpus reader
wsjcr = WSJCorpusReader() #create a new WSJ corpus reader
#get a sample of tokens in the corpus using your 5-digit candidate number
tokens = wsjcr.sample_words(12345)
for token in tokens: #iterate over the tokens
print token #print each token
from sussex_nltk.corpus_readers import RCV1CorpusReader #import the corpus reader
rcv1cr = RCV1CorpusReader() #create a new Reuters corpus reader
tokens = rc1cr.words() #get a list of all tokens in corpus
for token in tokens:
print token #print each token in corpus