Created
June 6, 2015 09:53
-
-
Save dgouldin/3ef28e9b16f7d143274f to your computer and use it in GitHub Desktop.
Twitter friend similarity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import json | |
import os | |
import urlparse | |
from collections import defaultdict | |
from datetime import datetime | |
from dateutil.relativedelta import relativedelta | |
from requests import HTTPError | |
from requests_oauthlib import OAuth1Session | |
twitter = OAuth1Session( | |
client_key=os.environ['CLIENT_KEY'], | |
client_secret=os.environ['CLIENT_SECRET'], | |
resource_owner_key=os.environ['RESOURCE_OWNER_KEY'], | |
resource_owner_secret=os.environ['RESOURCE_OWNER_SECRET'], | |
) | |
def get(path, *args, **kwargs): | |
url = urlparse.urljoin('https://api.twitter.com/1.1/', path) | |
return twitter.get(url, *args, **kwargs) | |
cache = os.environ.get('CACHE') | |
if cache and not os.path.exists(cache): | |
os.makedirs(cache) | |
def friends(id=None, screen_name=None): | |
# TODO: support pagination | |
if cache: | |
cache_filename = os.path.join(cache, '{}'.format(id or screen_name)) | |
if os.path.exists(cache_filename): | |
with open(cache_filename, 'r') as f: | |
return json.loads(f.read()) | |
params = { | |
'count': 5000, | |
'id': id, | |
'screen_name': screen_name, | |
} | |
r = get('friends/ids.json', params=params) | |
r.raise_for_status() | |
ids = r.json()['ids'] | |
if cache: | |
with open(cache_filename, 'w') as f: | |
f.write(json.dumps(ids)) | |
return ids | |
graph = defaultdict(lambda: set([])) | |
graph['dgouldin'] = set(friends(screen_name='dgouldin')) | |
for id in sorted(graph['dgouldin']): | |
try: | |
ids = friends(id=id) | |
except HTTPError as e: | |
if e.response.status_code == 429: | |
reset = datetime.fromtimestamp(int( | |
e.response.headers['x-rate-limit-reset'])) | |
delta = relativedelta(reset, datetime.now()) | |
print 'rate limit hit, wait {}m:{}s'.format(delta.minutes, | |
delta.seconds) | |
break | |
else: | |
graph[id] |= set(ids) & graph['dgouldin'] | |
similarity = len(graph[id]) / len(graph['dgouldin']) | |
print '{}: {:.2f}% similar'.format(id, similarity * 100) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment