Skip to content

Instantly share code, notes, and snippets.

@prakhar1989
Last active August 29, 2015 14:21
Show Gist options
  • Save prakhar1989/c986a123d48b5fb46571 to your computer and use it in GitHub Desktop.
Save prakhar1989/c986a123d48b5fb46571 to your computer and use it in GitHub Desktop.
tweets.py
import twitter
import logging
import time
import csv
### Install dependancies by running
### pip install python-twitter
# quelch the SSL errors
logging.captureWarnings(True)
api = twitter.Api(
consumer_key = "key",
consumer_secret = "secret",
access_token_key = "key",
access_token_secret = "secret"
)
def map_tweet_to_dict(t):
return {
"user_id" : t.user.id,
"retweet_count" : t.retweet_count,
"favorite_count" : t.favorite_count,
"inreplyto" : t.in_reply_to_screen_name or "none",
"friends" : t.user.friends_count,
"screen_name" : t.user.name.encode('utf-8'),
"followers" : t.user.followers_count,
"listed" : t.user.listed_count,
"text" : t.text.encode('utf-8'),
"tweetid" : t.id
}
### writes an array of dicts to a file
def dump_to_csv(tweets, filename="tweets.csv"):
with open(filename, 'w') as csvfile:
fieldnames = tweets[0].keys()
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# for windows do uncomment the below line and comment the above line .. fuck you windows _|_
# writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n', fieldnames=fieldnames)
writer.writeheader()
for tweet in tweets:
writer.writerow(tweet)
### Algo: if count < 100, call API, else recursively call until count < 100
def tweets_to_csv(term, count, cached_tweets=[], max_id=None):
if count > 100:
tweets = api.GetSearch(term=term, max_id=max_id, count=100)
else:
tweets = api.GetSearch(term=term, max_id=max_id, count=count)
mapped_tweets = map(map_tweet_to_dict, tweets)
cached_tweets.extend(mapped_tweets)
if count > 100:
max_id = mapped_tweets[-1]["tweetid"]
print "Fetching the next", count - 100
time.sleep(1) # be a good netizen
tweets_to_csv(term, count-100, cached_tweets, max_id=max_id)
else:
dump_to_csv(cached_tweets)
if __name__ == "__main__":
# sample usage
tweets_to_csv(term="MS Dhoni IPL", count=130)
@prakhar1989
Copy link
Author

If you want location, then in line 24,

"location": t.location

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment