Created
February 12, 2010 19:20
-
-
Save paulsmith/302885 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Tallies up the geocoded tweets and geo-enabled users in your Twitter social graph / friend | |
timeline, at most for the last 3,200 friend tweets (as rate-limited by the Twitter API) | |
Prints some stats to stderr, the geocoded tweets in JSON format to stdout | |
I would execute it like so: | |
$ ./geo_tweet_stats.py username password | tee geotweets.json | |
""" | |
import sys | |
import json | |
import subprocess | |
if len(sys.argv) != 3: | |
raise SystemExit('enter your Twitter username and password as arguments') | |
username, password = sys.argv[1:] | |
geo_tweets = [] | |
all_users = set() | |
geo_enabled_users = set() | |
MAX_COUNT = 200 | |
MAX_PAGES = 16 | |
num_tweets = 0 | |
page = 1 | |
count = MAX_COUNT | |
while num_tweets < MAX_COUNT * MAX_PAGES: | |
timeline_raw = subprocess.Popen(['curl', '-s', '-u', '%s:%s' % (username, password), 'http://twitter.com/statuses/friends_timeline.json?page=%s&count=%s' % (page, count)], stdout=subprocess.PIPE).communicate()[0] | |
timeline = json.loads(timeline_raw) | |
if len(timeline): | |
for tweet in timeline: | |
if tweet['geo']: | |
geo_tweets.append(tweet) | |
if tweet['user']['geo_enabled']: | |
geo_enabled_users.add(tweet['user']['name']) | |
all_users.add(tweet['user']['name']) | |
page += 1 | |
count = min(MAX_COUNT, len(timeline)) | |
num_tweets += len(timeline) | |
print >> sys.stderr, 'Fetched %s tweets' % num_tweets | |
else: | |
break | |
print >> sys.stderr, 'Found %s (%.2f%%) tweets were geocoded.' % (len(geo_tweets), len(geo_tweets) / float(num_tweets) * 100) | |
print >> sys.stderr, 'Found %s (%.2f%%) users were geo-enabled.' % (len(geo_enabled_users), len(geo_enabled_users) / float(len(all_users)) * 100) | |
json.dump(geo_tweets, sys.stdout, sort_keys=True, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment