Skip to content

Instantly share code, notes, and snippets.

@AnasAlmasri
Last active February 13, 2019 02:18
Show Gist options
  • Save AnasAlmasri/33caec97bca7f33f8c1f8ab457a0b056 to your computer and use it in GitHub Desktop.
Save AnasAlmasri/33caec97bca7f33f8c1f8ab457a0b056 to your computer and use it in GitHub Desktop.
function to retrieve tweets from corpus and save them into a csv file
def buidTrainingSet(corpusFile, tweetDataFile):
import csv
import time
corpus = []
with open(corpusFile,'rb') as csvfile:
lineReader = csv.reader(csvfile,delimiter=',', quotechar="\"")
for row in lineReader:
corpus.append({"tweet_id":row[2], "label":row[1], "topic":row[0]})
rate_limit = 180
sleep_time = 900/180
trainingDataSet = []
for tweet in corpus:
try:
status = twitter_api.GetStatus(tweet["tweet_id"])
print("Tweet fetched" + status.text)
tweet["text"] = status.text
trainingDataSet.append(tweet)
time.sleep(sleep_time)
except:
continue
# now we write them to the empty CSV file
with open(tweetDataFile,'wb') as csvfile:
linewriter = csv.writer(csvfile,delimiter=',',quotechar="\"")
for tweet in trainingDataSet:
try:
linewriter.writerow([tweet["tweet_id"], tweet["text"], tweet["label"], tweet["topic"]])
except Exception as e:
print(e)
return trainingDataSet
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment