Last active
February 13, 2019 02:18
-
-
Save AnasAlmasri/33caec97bca7f33f8c1f8ab457a0b056 to your computer and use it in GitHub Desktop.
function to retrieve tweets from corpus and save them into a csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def buidTrainingSet(corpusFile, tweetDataFile): | |
import csv | |
import time | |
corpus = [] | |
with open(corpusFile,'rb') as csvfile: | |
lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"") | |
for row in lineReader: | |
corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]}) | |
rate_limit = 180 | |
sleep_time = 900/180 | |
trainingDataSet = [] | |
for tweet in corpus: | |
try: | |
status = twitter_api.GetStatus(tweet["tweet_id"]) | |
print("Tweet fetched" + status.text) | |
tweet["text"] = status.text | |
trainingDataSet.append(tweet) | |
time.sleep(sleep_time) | |
except: | |
continue | |
# now we write them to the empty CSV file | |
with open(tweetDataFile,'wb') as csvfile: | |
linewriter = csv.writer(csvfile,delimiter=',',quotechar="\"") | |
for tweet in trainingDataSet: | |
try: | |
linewriter.writerow([tweet["tweet_id"], tweet["text"], tweet["label"], tweet["topic"]]) | |
except Exception as e: | |
print(e) | |
return trainingDataSet |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment