Skip to content

Instantly share code, notes, and snippets.

@paulsmith
Created February 12, 2010 19:20
Show Gist options
  • Save paulsmith/302885 to your computer and use it in GitHub Desktop.
Save paulsmith/302885 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Tallies up the geocoded tweets and geo-enabled users in your Twitter social graph / friend
timeline, at most for the last 3,200 friend tweets (as rate-limited by the Twitter API)
Prints some stats to stderr, the geocoded tweets in JSON format to stdout
I would execute it like so:
$ ./geo_tweet_stats.py username password | tee geotweets.json
"""
import sys
import json
import subprocess
if len(sys.argv) != 3:
raise SystemExit('enter your Twitter username and password as arguments')
username, password = sys.argv[1:]
geo_tweets = []
all_users = set()
geo_enabled_users = set()
MAX_COUNT = 200
MAX_PAGES = 16
num_tweets = 0
page = 1
count = MAX_COUNT
while num_tweets < MAX_COUNT * MAX_PAGES:
timeline_raw = subprocess.Popen(['curl', '-s', '-u', '%s:%s' % (username, password), 'http://twitter.com/statuses/friends_timeline.json?page=%s&count=%s' % (page, count)], stdout=subprocess.PIPE).communicate()[0]
timeline = json.loads(timeline_raw)
if len(timeline):
for tweet in timeline:
if tweet['geo']:
geo_tweets.append(tweet)
if tweet['user']['geo_enabled']:
geo_enabled_users.add(tweet['user']['name'])
all_users.add(tweet['user']['name'])
page += 1
count = min(MAX_COUNT, len(timeline))
num_tweets += len(timeline)
print >> sys.stderr, 'Fetched %s tweets' % num_tweets
else:
break
print >> sys.stderr, 'Found %s (%.2f%%) tweets were geocoded.' % (len(geo_tweets), len(geo_tweets) / float(num_tweets) * 100)
print >> sys.stderr, 'Found %s (%.2f%%) users were geo-enabled.' % (len(geo_enabled_users), len(geo_enabled_users) / float(len(all_users)) * 100)
json.dump(geo_tweets, sys.stdout, sort_keys=True, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment