This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import twitter | |
# initialize api instance | |
twitter_api = twitter.Api(consumer_key='YOUR_CONSUMER_KEY', | |
consumer_secret='YOUR_CONSUMER_SECRET', | |
access_token_key='YOUR_ACCESS_TOKEN_KEY', | |
access_token_secret='YOUR_ACCESS_TOKEN_SECRET') | |
# test authentication | |
print(twitter_api.VerifyCredentials()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def buildTestSet(search_keyword): | |
try: | |
tweets_fetched = twitter_api.GetSearch(search_keyword, count = 100) | |
print("Fetched " + str(len(tweets_fetched)) + " tweets for the term " + search_keyword) | |
return [{"text":status.text, "label":None} for status in tweets_fetched] | |
except: | |
print("Unfortunately, something went wrong..") | |
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
search_term = input("Enter a search keyword:") | |
testDataSet = buildTestSet(search_term) | |
print(testDataSet[0:4]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def buidTrainingSet(corpusFile, tweetDataFile): | |
import csv | |
import time | |
corpus = [] | |
with open(corpusFile,'rb') as csvfile: | |
lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"") | |
for row in lineReader: | |
corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
corpusFile = "YOUR_FILE_PATH/corpus.csv" | |
tweetDataFile = "YOUR_FILE_PATH/tweetDataFile.csv" | |
trainingData = buildTrainingSet(corpusFile, tweetDataFile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from nltk.tokenize import word_tokenize | |
from string import punctuation | |
from nltk.corpus import stopwords | |
class PreProcessTweets: | |
def __init__(self): | |
self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL']) | |
def processTweets(self, list_of_tweets): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tweetProcessor = PreProcessTweets() | |
preprocessedTrainingSet = tweetProcessor.processTweets(trainingData) | |
preprocessedTestSet = tweetProcessor.processTweets(testDataSet) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import nltk | |
def buildVocabulary(preprocessedTrainingData): | |
all_words = [] | |
for (words, sentiment) in preprocessedTrainingData: | |
all_words.extend(words) | |
wordlist = nltk.FreqDist(all_words) | |
word_features = wordlist.keys() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_features(tweet): | |
tweet_words = set(tweet) | |
features = {} | |
for word in word_features: | |
features['contains(%s)' % word] = (word in tweet_words) | |
return features |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
word_features = buildVocabulary(preprocessedTrainingData) | |
trainingFeatures = nltk.classify.apply_features(extract_features, preprocessedTrainingData) |
OlderNewer