Anas Al-Masri AnasAlmasri

Tech Lead | Algo Trader

AnasAlmasri / SentimentAnalysis.py

Last active June 1, 2022 06:08

	import twitter

	# initialize api instance
	twitter_api = twitter.Api(consumer_key='YOUR_CONSUMER_KEY',
	consumer_secret='YOUR_CONSUMER_SECRET',
	access_token_key='YOUR_ACCESS_TOKEN_KEY',
	access_token_secret='YOUR_ACCESS_TOKEN_SECRET')

	# test authentication
	print(twitter_api.VerifyCredentials())

AnasAlmasri / SentimentAnalysis.py

Last active February 13, 2019 02:10

Function to build Test data set based on a search term

	def buildTestSet(search_keyword):
	try:
	tweets_fetched = twitter_api.GetSearch(search_keyword, count = 100)

	print("Fetched " + str(len(tweets_fetched)) + " tweets for the term " + search_keyword)

	return [{"text":status.text, "label":None} for status in tweets_fetched]
	except:
	print("Unfortunately, something went wrong..")
	return None

AnasAlmasri / SentimentAnalysis.py

Last active August 9, 2020 02:56

Test out buildTestSet() function

	search_term = input("Enter a search keyword:")
	testDataSet = buildTestSet(search_term)

	print(testDataSet[0:4])

AnasAlmasri / SentimentAnalysis.py

Last active February 13, 2019 02:18

function to retrieve tweets from corpus and save them into a csv file

	def buidTrainingSet(corpusFile, tweetDataFile):
	import csv
	import time

	corpus = []

	with open(corpusFile,'rb') as csvfile:
	lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"")
	for row in lineReader:
	corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]})

AnasAlmasri / SentimentAnalysis.py

Created February 12, 2019 12:37

testing buildTrainingSet() function

	corpusFile = "YOUR_FILE_PATH/corpus.csv"
	tweetDataFile = "YOUR_FILE_PATH/tweetDataFile.csv"

	trainingData = buildTrainingSet(corpusFile, tweetDataFile)

AnasAlmasri / SentimentAnalysis.py

Created February 12, 2019 13:07

tweet preprocessor class

	import re
	from nltk.tokenize import word_tokenize
	from string import punctuation
	from nltk.corpus import stopwords

	class PreProcessTweets:
	def __init__(self):
	self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL'])

	def processTweets(self, list_of_tweets):

AnasAlmasri / SentimentAnalysis.py

Last active August 14, 2019 12:45

preprocesing all tweets

	tweetProcessor = PreProcessTweets()
	preprocessedTrainingSet = tweetProcessor.processTweets(trainingData)
	preprocessedTestSet = tweetProcessor.processTweets(testDataSet)

AnasAlmasri / SentimentAnalysis.py

Created February 13, 2019 01:15

building the vocabulary

	import nltk

	def buildVocabulary(preprocessedTrainingData):
	all_words = []

	for (words, sentiment) in preprocessedTrainingData:
	all_words.extend(words)

	wordlist = nltk.FreqDist(all_words)
	word_features = wordlist.keys()

AnasAlmasri / SentimentAnalysis.py

Created February 13, 2019 01:28

extract features

	def extract_features(tweet):
	tweet_words = set(tweet)
	features = {}
	for word in word_features:
	features['contains(%s)' % word] = (word in tweet_words)
	return features

AnasAlmasri / SentimentAnalysis.py

Created February 13, 2019 01:37

building feature vector

	word_features = buildVocabulary(preprocessedTrainingData)
	trainingFeatures = nltk.classify.apply_features(extract_features, preprocessedTrainingData)