Skip to content

Instantly share code, notes, and snippets.

import twitter
# initialize api instance
twitter_api = twitter.Api(consumer_key='YOUR_CONSUMER_KEY',
consumer_secret='YOUR_CONSUMER_SECRET',
access_token_key='YOUR_ACCESS_TOKEN_KEY',
access_token_secret='YOUR_ACCESS_TOKEN_SECRET')
# test authentication
print(twitter_api.VerifyCredentials())
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Last active February 13, 2019 02:10
Function to build Test data set based on a search term
def buildTestSet(search_keyword):
try:
tweets_fetched = twitter_api.GetSearch(search_keyword, count = 100)
print("Fetched " + str(len(tweets_fetched)) + " tweets for the term " + search_keyword)
return [{"text":status.text, "label":None} for status in tweets_fetched]
except:
print("Unfortunately, something went wrong..")
return None
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Last active August 9, 2020 02:56
Test out buildTestSet() function
search_term = input("Enter a search keyword:")
testDataSet = buildTestSet(search_term)
print(testDataSet[0:4])
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Last active February 13, 2019 02:18
function to retrieve tweets from corpus and save them into a csv file
def buidTrainingSet(corpusFile, tweetDataFile):
import csv
import time
corpus = []
with open(corpusFile,'rb') as csvfile:
lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"")
for row in lineReader:
corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]})
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Created February 12, 2019 12:37
testing buildTrainingSet() function
corpusFile = "YOUR_FILE_PATH/corpus.csv"
tweetDataFile = "YOUR_FILE_PATH/tweetDataFile.csv"
trainingData = buildTrainingSet(corpusFile, tweetDataFile)
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Created February 12, 2019 13:07
tweet preprocessor class
import re
from nltk.tokenize import word_tokenize
from string import punctuation
from nltk.corpus import stopwords
class PreProcessTweets:
def __init__(self):
self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL'])
def processTweets(self, list_of_tweets):
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Last active August 14, 2019 12:45
preprocesing all tweets
tweetProcessor = PreProcessTweets()
preprocessedTrainingSet = tweetProcessor.processTweets(trainingData)
preprocessedTestSet = tweetProcessor.processTweets(testDataSet)
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Created February 13, 2019 01:15
building the vocabulary
import nltk
def buildVocabulary(preprocessedTrainingData):
all_words = []
for (words, sentiment) in preprocessedTrainingData:
all_words.extend(words)
wordlist = nltk.FreqDist(all_words)
word_features = wordlist.keys()
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Created February 13, 2019 01:28
extract features
def extract_features(tweet):
tweet_words = set(tweet)
features = {}
for word in word_features:
features['contains(%s)' % word] = (word in tweet_words)
return features
@AnasAlmasri
AnasAlmasri / SentimentAnalysis.py
Created February 13, 2019 01:37
building feature vector
word_features = buildVocabulary(preprocessedTrainingData)
trainingFeatures = nltk.classify.apply_features(extract_features, preprocessedTrainingData)