AnasAlmasri · February 12, 2019 13:07
diff --git a/SentimentAnalysis.py b/SentimentAnalysis.py
 import re
 from nltk.tokenize import word_tokenize
 from string import punctuation 
 from nltk.corpus import stopwords 

 class PreProcessTweets:
    def __init__(self):
        self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL'])
        
    def processTweets(self, list_of_tweets):
        processedTweets=[]
        for tweet in list_of_tweets:
            processedTweets.append((self._processTweet(tweet["text"]),tweet["label"]))
        return processedTweets
    
    def _processTweet(self, tweet):
        tweet = tweet.lower() # convert text to lower-case
        tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', tweet) # remove URLs
        tweet = re.sub('@[^\s]+', 'AT_USER', tweet) # remove usernames
        tweet = re.sub(r'#([^\s]+)', r'\1', tweet) # remove the # in #hashtag
        tweet = word_tokenize(tweet) # remove repeated characters (helloooooooo into hello)
        return [word for word in tweet if word not in self._stopwords]
	import re
	from nltk.tokenize import word_tokenize
	from string import punctuation
	from nltk.corpus import stopwords

	class PreProcessTweets:
	def __init__(self):
	self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL'])

	def processTweets(self, list_of_tweets):
	processedTweets=[]
	for tweet in list_of_tweets:
	processedTweets.append((self._processTweet(tweet["text"]),tweet["label"]))
	return processedTweets

	def _processTweet(self, tweet):
	tweet = tweet.lower() # convert text to lower-case
	tweet = re.sub('((www\.[^\s]+)\|(https?://[^\s]+))', 'URL', tweet) # remove URLs
	tweet = re.sub('@[^\s]+', 'AT_USER', tweet) # remove usernames
	tweet = re.sub(r'#([^\s]+)', r'\1', tweet) # remove the # in #hashtag
	tweet = word_tokenize(tweet) # remove repeated characters (helloooooooo into hello)
	return [word for word in tweet if word not in self._stopwords]