Last active
December 15, 2015 12:01
-
-
Save revox/15455afb530211b1f37f to your computer and use it in GitHub Desktop.
Get a Twitter users timeline up to 3,200 tweets written into a CSV, uses sixohsox Twitter modules retry facitlity (30 second sleep) if your rate limited
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dont keep this script in public_html!!!! | |
import twitter, sys,json,csv, time | |
def twitter_user_timeline(twitter_api, q): | |
''' get last tweet ID so we can paginate down from that 200 at a time to 3,200 | |
''' | |
user_timeline = twitter_api.statuses.user_timeline(screen_name=q,count=1) | |
print user_timeline[0]['id'] | |
ids = [user_timeline[0]['id']] | |
statuses = [] | |
for i in range(0, 16): ## iterate through all tweets available with thsi API = 3,200 | |
## tweet extract method with the last list item as the max_id | |
user_timeline = twitter_api.statuses.user_timeline(screen_name=q, | |
count=200, include_retweets=False, max_id=ids[-1]) # note a negative index means counting from end not the start of the array | |
statuses += user_timeline | |
# time.sleep(300) ## 5 minute rest between api calls, uncomment this if your being limited | |
for tweet in user_timeline: | |
ids.append(tweet['id']) ## append those tweet id's | |
print tweet['id'] # I like to watch | |
return statuses | |
''' helper functions, clean data, unpack dictionaries ''' | |
def getVal(val): | |
clean = "" | |
if isinstance(val, bool): | |
return val | |
if isinstance(val, int): | |
return val | |
if val: | |
clean = val.encode('utf-8') | |
return clean | |
def getLng(val): | |
if isinstance(val, dict): | |
return val['coordinates'][0] | |
def getLat(val): | |
if isinstance(val, dict): | |
return val['coordinates'][1] | |
def getPlace(val): | |
if isinstance(val, dict): | |
return val['full_name'].encode('utf-8') | |
# == OAuth Authentication == | |
# The consumer keys can be found on your application's Details | |
consumer_key="" | |
consumer_secret="" | |
# Create an access token under the the "Your access token" section | |
access_token="" | |
access_token_secret="" | |
auth = twitter.oauth.OAuth(access_token, | |
access_token_secret, | |
consumer_key, | |
consumer_secret) | |
twitter_api = twitter.Twitter(auth=auth) | |
twitter_api.retry = True # should prevent rate limit errors but sleep anyway as this only sleeps for 30 seconds | |
# Sample usage | |
q = "David_Cameron" | |
results = twitter_user_timeline(twitter_api, q) | |
print len(results) | |
# Show one sample search result by slicing the list... | |
# print json.dumps(results[0], indent=1) | |
csvfile = open(q + '_timeline.csv', 'w') | |
csvwriter = csv.writer(csvfile) | |
csvwriter.writerow(['created_at', | |
'user-screen_name', | |
'text', | |
'coordinates lng', | |
'coordinates lat', | |
'place', | |
'user-location', | |
'user-geo_enabled', | |
'user-lang', | |
'user-time_zone', | |
'user-statuses_count', | |
'user-followers_count', | |
'user-created_at']) | |
for tweet in results: | |
csvwriter.writerow([tweet['created_at'], | |
getVal(tweet['user']['screen_name']), | |
getVal(tweet['text']), | |
getLng(tweet['coordinates']), | |
getLat(tweet['coordinates']), | |
getPlace(tweet['place']), | |
getVal(tweet['user']['location']), | |
getVal(tweet['user']['geo_enabled']), | |
getVal(tweet['user']['lang']), | |
getVal(tweet['user']['time_zone']), | |
getVal(tweet['user']['statuses_count']), | |
getVal(tweet['user']['followers_count']), | |
getVal(tweet['user']['created_at']) | |
]) | |
print "done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment