|
#!/usr/bin/env python |
|
"""Print out list of authors common in two subreddits.""" |
|
|
|
from json import loads |
|
# from hashlib import sha1 |
|
import sys |
|
from time import sleep |
|
from urllib2 import urlopen |
|
|
|
URLBASE = 'http://www.reddit.com/r/%s/.json' |
|
|
|
|
|
def usage(msg=None): |
|
"""How do we use us?""" |
|
|
|
if msg: |
|
sys.stderr.write('error: %s\n\n' % msg) |
|
sys.stderr.write('usage: %s subreddit-1 subreddit-2\n' % |
|
sys.argv[0].split('/')[-1]) |
|
sys.exit(1) |
|
|
|
|
|
def fetchdata(suburl): |
|
"""Fetch set of authors and 'after' value or None.""" |
|
|
|
after = None |
|
users = set() |
|
|
|
# they want us to rate-limit queries |
|
sleep(2) |
|
|
|
try: |
|
data = urlopen(suburl).read() |
|
# print 'fetched %s digest %s' % (suburl, sha1(data).hexdigest()) |
|
jdata = loads(data) |
|
except Exception, exc: |
|
sys.stderr.write('exception raised for %s: %s' % (suburl, str(exc))) |
|
return None |
|
|
|
if 'data' in jdata: |
|
if 'after' in jdata['data'] and jdata['data']['after']: |
|
after = jdata['data']['after'] |
|
else: |
|
print 'reached the end of %s' % suburl |
|
if 'children' in jdata['data'] and \ |
|
len(jdata['data']['children']) > 0: |
|
users = set(story['data']['author'] for story in jdata['data']['children']) |
|
else: |
|
print 'no data in %s; quitting' % suburl |
|
else: |
|
print 'no data in %s; quitting' % suburl |
|
|
|
return (users, after) |
|
|
|
|
|
def loadloop(subreddit1, subreddit2): |
|
"""Loop over subreddits, printing common authors.""" |
|
|
|
# users to compare |
|
users1 = set() |
|
users2 = set() |
|
|
|
# users already seen |
|
seen = set() |
|
|
|
suburl1 = subreddit1 |
|
suburl2 = subreddit2 |
|
|
|
# handle pagination |
|
after1 = None |
|
after2 = None |
|
|
|
stop = False |
|
loopcount = 0 |
|
|
|
while not stop: |
|
if after1: |
|
suburl1 = '%s?after=%s' % (subreddit1, after1) |
|
if after2: |
|
suburl2 = '%s?after=%s' % (subreddit2, after2) |
|
|
|
data = fetchdata(suburl1) |
|
if data: |
|
users1.update(data[0]) |
|
after1 = data[1] |
|
|
|
if not (data and after1): |
|
stop = True |
|
|
|
data = fetchdata(suburl2) |
|
if data: |
|
users2.update(data[0]) |
|
after2 = data[1] |
|
|
|
if not (data and after2): |
|
stop = True |
|
|
|
loopcount += 1 |
|
diff = users1 & users2 |
|
if diff and diff != seen: |
|
seen = diff |
|
diff = list(diff) |
|
diff.sort() |
|
print 'loop: %d; common so far:' % loopcount |
|
for user in diff: |
|
print ' http://www.reddit.com/user/%s' % user |
|
|
|
|
|
def main(args): |
|
"""Main branching logic.""" |
|
|
|
if len(args) != 2: |
|
usage() |
|
|
|
loadloop(URLBASE % args[0], URLBASE % args[1]) |
|
|
|
|
|
if __name__ == '__main__': |
|
main(sys.argv[1:]) |
|
|
|
# eof |