prakhar1989 · August 29, 2015 14:21 · prakhar1989 · May 14, 2015
diff --git a/tweets.py b/tweets.py
 import twitter
 import logging
 import time
 import csv

 ### Install dependancies by running
 ### pip install python-twitter

 # quelch the SSL errors
 logging.captureWarnings(True)

 api = twitter.Api(
    consumer_key        = "key",
    consumer_secret     = "secret",
    access_token_key    = "key",
    access_token_secret = "secret"
 )

 def map_tweet_to_dict(t):
    return {
        "user_id"        : t.user.id,
        "retweet_count"  : t.retweet_count,
        "favorite_count" : t.favorite_count,
        "inreplyto"      : t.in_reply_to_screen_name or "none",
        "friends"        : t.user.friends_count,
        "screen_name"    : t.user.name.encode('utf-8'),
        "followers"      : t.user.followers_count,
        "listed"         : t.user.listed_count,
        "text"           : t.text.encode('utf-8'),
        "tweetid"        : t.id
    }



 ### writes an array of dicts to a file
 def dump_to_csv(tweets, filename="tweets.csv"):
    with open(filename, 'w') as csvfile:
        fieldnames = tweets[0].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        # for windows do uncomment the below line and comment the above line .. fuck you windows _|_
        # writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n', fieldnames=fieldnames)
        writer.writeheader()
        for tweet in tweets:
            writer.writerow(tweet)


 ### Algo: if count < 100, call API, else recursively call until count < 100
 def tweets_to_csv(term, count, cached_tweets=[], max_id=None):
    if count > 100:
        tweets = api.GetSearch(term=term, max_id=max_id, count=100)
    else:
        tweets = api.GetSearch(term=term, max_id=max_id, count=count)

    mapped_tweets = map(map_tweet_to_dict, tweets)
    cached_tweets.extend(mapped_tweets)

    if count > 100:
        max_id = mapped_tweets[-1]["tweetid"]
        print "Fetching the next", count - 100
        time.sleep(1) # be a good netizen
        tweets_to_csv(term, count-100, cached_tweets, max_id=max_id)
    else:
        dump_to_csv(cached_tweets)


 if __name__ == "__main__":
    # sample usage
    tweets_to_csv(term="MS Dhoni IPL", count=130)
	import twitter
	import logging
	import time
	import csv

	### Install dependancies by running
	### pip install python-twitter

	# quelch the SSL errors
	logging.captureWarnings(True)

	api = twitter.Api(
	consumer_key = "key",
	consumer_secret = "secret",
	access_token_key = "key",
	access_token_secret = "secret"
	)

	def map_tweet_to_dict(t):
	return {
	"user_id" : t.user.id,
	"retweet_count" : t.retweet_count,
	"favorite_count" : t.favorite_count,
	"inreplyto" : t.in_reply_to_screen_name or "none",
	"friends" : t.user.friends_count,
	"screen_name" : t.user.name.encode('utf-8'),
	"followers" : t.user.followers_count,
	"listed" : t.user.listed_count,
	"text" : t.text.encode('utf-8'),
	"tweetid" : t.id
	}



	### writes an array of dicts to a file
	def dump_to_csv(tweets, filename="tweets.csv"):
	with open(filename, 'w') as csvfile:
	fieldnames = tweets[0].keys()
	writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
	# for windows do uncomment the below line and comment the above line .. fuck you windows _\|_
	# writer = csv.DictWriter(csvfile, delimiter=',', lineterminator='\n', fieldnames=fieldnames)
	writer.writeheader()
	for tweet in tweets:
	writer.writerow(tweet)


	### Algo: if count < 100, call API, else recursively call until count < 100
	def tweets_to_csv(term, count, cached_tweets=[], max_id=None):
	if count > 100:
	tweets = api.GetSearch(term=term, max_id=max_id, count=100)
	else:
	tweets = api.GetSearch(term=term, max_id=max_id, count=count)

	mapped_tweets = map(map_tweet_to_dict, tweets)
	cached_tweets.extend(mapped_tweets)

	if count > 100:
	max_id = mapped_tweets[-1]["tweetid"]
	print "Fetching the next", count - 100
	time.sleep(1) # be a good netizen
	tweets_to_csv(term, count-100, cached_tweets, max_id=max_id)
	else:
	dump_to_csv(cached_tweets)


	if __name__ == "__main__":
	# sample usage
	tweets_to_csv(term="MS Dhoni IPL", count=130)