-
-
Save manuchandel/bc8a6ca4b1527b7594945e5091013905 to your computer and use it in GitHub Desktop.
A script to download all of a user's tweets into JSON
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
import tweepy #https://github.com/tweepy/tweepy | |
import json | |
import sys | |
#Twitter API credentials | |
consumer_key = "" | |
consumer_secret = "" | |
access_key = "" | |
access_secret = "" | |
def get_all_tweets(screen_name): | |
# Twitter only allows access to a users most recent 3240 tweets with this method | |
# authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) | |
auth.set_access_token(access_key, access_secret) | |
api = tweepy.API(auth) | |
# initialize a list to hold all the tweepy Tweets | |
alltweets = [] | |
# make initial request for most recent tweets (200 is the maximum allowed count) | |
new_tweets = api.user_timeline(screen_name = screen_name,count=199) | |
# save most recent tweets | |
alltweets.extend(new_tweets) | |
# save the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
# keep grabbing tweets until there are no tweets left to grab | |
while len(new_tweets) > 0: | |
#all subsiquent requests use the max_id param to prevent duplicates | |
new_tweets = api.user_timeline(screen_name = screen_name,count=199,max_id=oldest) | |
#save most recent tweets | |
alltweets.extend(new_tweets) | |
#update the id of the oldest tweet less one | |
oldest = alltweets[-1].id - 1 | |
# print total tweets fetched from given screen name | |
print "Total tweets downloaded from %s are %s" % (screen_name,len(alltweets)) | |
return alltweets | |
def fetch_tweets(screen_names): | |
# initialize the list to hold all tweets from all users | |
alltweets=[] | |
# get all tweets for each screen name | |
for screen_name in screen_names: | |
alltweets.extend(get_all_tweets(screen_name)) | |
return alltweets | |
def store_tweets(alltweets,file='tweets.json'): | |
# a list of all formatted tweets | |
tweet_list=[] | |
for tweet in alltweets: | |
# a dict to contain information about single tweet | |
tweet_information=dict() | |
# text of tweet | |
tweet_information['text']=tweet.text.encode('utf-8') | |
# date and time at which tweet was created | |
tweet_information['created_at']=tweet.created_at.strftime("%Y-%m-%d %H:%M:%S") | |
# id of this tweet | |
tweet_information['id_str']=tweet.id_str | |
# retweet count | |
tweet_information['retweet_count']=tweet.retweet_count | |
# favourites count | |
tweet_information['favorite_count']=tweet.favorite_count | |
# screename of the user to which it was replied (is Nullable) | |
tweet_information['in_reply_to_screen_name']=tweet.in_reply_to_screen_name | |
# user information in user dictionery | |
user_dictionery=tweet._json['user'] | |
# no of followers of the user | |
tweet_information['followers_count']=user_dictionery['followers_count'] | |
# screename of the person who tweeted this | |
tweet_information['screen_name']=user_dictionery['screen_name'] | |
# add this tweet to the tweet_list | |
tweet_list.append(tweet_information) | |
# open file desc to output file with write permissions | |
file_des=open(file,'wb') | |
# dump tweets to the file | |
json.dump(tweet_list,file_des,indent=4,sort_keys=True) | |
# close the file_des | |
file_des.close() | |
if __name__ == '__main__': | |
# pass in the username of the account you want to download | |
alltweets=get_all_tweets(sys.argv[1]) | |
# store the data into json file | |
if len(sys.argv[2])>0: | |
store_tweets(alltweets,sys.argv[2]) | |
else : | |
store_tweets(alltweets) |
Proper invocation:
python all_tweets.py SCREEN_NAME STORAGE_DESTINATION
line 48
SyntaxError: invalid syntax
A tutorial/help file would be nice. You alive?
Im getting error running this code. Is this code correct?
- if you got an error at line 48 that's because you need to add parentheses when using the print statement -> print( "Total ...")
- if you got an error at line 120 or 124 (most likely index out of range) that's because you need to pass arguments the first argument is the screen_name of the user and the second is the name of the file (run this from terminal).
- if you got an error at line 111 that's because the file is open in binary mode, remove the 'b' and remove the ".encode" from line 74 since
this will also cause an error.
after these edits the code should work :)
amazing it works! thank you. I had to change the following:
line 48
print ("Total tweets downloaded from %s are %s" % (screen_name,len(alltweets)))
line 74
tweet_information['text']=tweet.text
line 108
file_des=open(file,'w', encoding='utf-8')
run the script with the two arguments:
$ python tweet_dumper.py theTwitterUsername tweets.json
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have run your code and this error came out. Why?
Traceback (most recent call last):
File "C:\Users\User\Desktop\gettweet_json.py", line 120, in
alltweets=get_all_tweets(sys.argv[1])
IndexError: list index out of range
alltweets=get_all_tweets(sys.argv[1])
if len(sys.argv[2])>0: store_tweets(alltweets,sys.argv[2]) else : store_tweets(alltweets)