-
-
Save onmyeoin/b6d99be0216d621788fcbe6bcb34a370 to your computer and use it in GitHub Desktop.
A Python 3.+ script to download all of a user's tweets into a csv.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy | |
import csv | |
def get_all_tweets(screen_name): | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth, wait_on_rate_limit=True) | |
#initialize a list to hold all the tweepy Tweets & list with no retweets | |
alltweets = [] | |
noRT = [] | |
#make initial request for most recent tweets with extended mode enabled to get full tweets | |
new_tweets = api.user_timeline(screen_name = screen_name, tweet_mode = 'extended', count=200) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
#keep grabbing tweets until the api limit is reached | |
while len(alltweets) <= 3200: | |
print("getting tweets before {}".format(oldest)) | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,tweet_mode = 'extended', count=200,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
print("...{} tweets downloaded so far".format(len(alltweets))) | |
#removes retweets | |
for tweet in alltweets: | |
if 'RT' in tweet.full_text: | |
continue | |
else: | |
noRT.append([tweet.id_str, tweet.created_at, tweet.full_text]) | |
#write to csv | |
with open('{}_tweets.csv'.format(screen_name), 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(["id","created_at","text"]) | |
writer.writerows(noRT) | |
print('{}_tweets.csv was successfully created.'.format(screen_name)) | |
pass | |
if __name__ == '__main__': | |
#pass in the username of the account you want to download | |
get_all_tweets("realDonaldTrump") |
Does the account you are trying to download from only have 759 tweets?
This script will keep running until it reaches the maximum amount of tweets allowed by the Twitter API (which is 3240, this looks for 3200)
If the account your trying to download from has less than 3200, change the line
while len(alltweets) <= 3200:
To
while len(alltweets) <= 759
Thanks!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The terminal keeps repeating itself on a loop:
...759 tweets downloaded so far
getting tweets before 102157671012106240
...759 tweets downloaded so far
getting tweets before 102157671012106240