Created
December 22, 2017 09:05
-
-
Save scubbo/70c4de3ad3107c6007194325efffc61b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# encoding: utf-8 | |
# Based on https://gist.github.com/yanofsky/5436496 | |
# Made by @jacksquaredson | |
import argparse | |
import csv | |
import time | |
import tweepy #https://github.com/tweepy/tweepy | |
def main(args): | |
#authorize twitter, initialize tweepy | |
auth = tweepy.OAuthHandler(args.consumer_key, args.consumer_secret) | |
auth.set_access_token(args.access_key, args.access_secret) | |
api = tweepy.API(auth) | |
no_retweets = args.no_retweets | |
# fetch the tweet ids | |
with open('tweets.csv', 'r') as f_in: | |
with open('tweet_texts', 'w') as f_out: | |
reader = csv.reader(f_in) | |
writer = csv.writer(f_out) | |
next(reader) # Skip the header | |
for idx, tweet in enumerate(reader): | |
tweet_text = get_tweet_text(no_retweets, api, tweet[0]) | |
if tweet_text: | |
writer.writerow([tweet_text]) | |
if not idx%10 and idx>0: | |
print('Handled {idx} tweets'.format(idx=idx)) | |
def get_tweet_text(no_retweets, api, id): | |
tweet = get_tweet_by_id(api, id) | |
if tweet.retweeted: | |
if no_retweets: | |
return None | |
else: | |
return tweet.retweeted_status.full_text | |
else: | |
return tweet.full_text | |
def get_tweet_by_id(api, id): | |
delay_power = 0 | |
while True: | |
try: | |
return api.get_status(id, tweet_mode='extended') | |
except tweepy.error.RateLimitError: | |
delay_power += 1 | |
time.sleep(2**delay_power) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--no-retweets', '-n', default=False, action='store_true', | |
help='Only store "original" tweets - no RTs') | |
parser.add_argument('--consumer-key', required=True, | |
help='Twitter API Consumer Key') | |
parser.add_argument('--consumer-secret', required=True, | |
help='Twitter API Consumer Secret') | |
parser.add_argument('--access-key', required=True, | |
help='Twitter API Access Key') | |
parser.add_argument('--access-secret', required=True, | |
help='Twitter API Access Secret') | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment