Skip to content

Instantly share code, notes, and snippets.

@faried
Created June 24, 2011 07:37
Show Gist options
  • Save faried/1044393 to your computer and use it in GitHub Desktop.
Save faried/1044393 to your computer and use it in GitHub Desktop.
Print out list of authors common in two subreddits.
#!/usr/bin/env python
"""Print out list of authors common in two subreddits."""
from json import loads
# from hashlib import sha1
import sys
from time import sleep
from urllib2 import urlopen
URLBASE = 'http://www.reddit.com/r/%s/.json'
def usage(msg=None):
"""How do we use us?"""
if msg:
sys.stderr.write('error: %s\n\n' % msg)
sys.stderr.write('usage: %s subreddit-1 subreddit-2\n' %
sys.argv[0].split('/')[-1])
sys.exit(1)
def fetchdata(suburl):
"""Fetch set of authors and 'after' value or None."""
after = None
users = set()
# they want us to rate-limit queries
sleep(2)
try:
data = urlopen(suburl).read()
# print 'fetched %s digest %s' % (suburl, sha1(data).hexdigest())
jdata = loads(data)
except Exception, exc:
sys.stderr.write('exception raised for %s: %s' % (suburl, str(exc)))
return None
if 'data' in jdata:
if 'after' in jdata['data'] and jdata['data']['after']:
after = jdata['data']['after']
else:
print 'reached the end of %s' % suburl
if 'children' in jdata['data'] and \
len(jdata['data']['children']) > 0:
users = set(story['data']['author'] for story in jdata['data']['children'])
else:
print 'no data in %s; quitting' % suburl
else:
print 'no data in %s; quitting' % suburl
return (users, after)
def loadloop(subreddit1, subreddit2):
"""Loop over subreddits, printing common authors."""
# users to compare
users1 = set()
users2 = set()
# users already seen
seen = set()
suburl1 = subreddit1
suburl2 = subreddit2
# handle pagination
after1 = None
after2 = None
stop = False
loopcount = 0
while not stop:
if after1:
suburl1 = '%s?after=%s' % (subreddit1, after1)
if after2:
suburl2 = '%s?after=%s' % (subreddit2, after2)
data = fetchdata(suburl1)
if data:
users1.update(data[0])
after1 = data[1]
if not (data and after1):
stop = True
data = fetchdata(suburl2)
if data:
users2.update(data[0])
after2 = data[1]
if not (data and after2):
stop = True
loopcount += 1
diff = users1 & users2
if diff and diff != seen:
seen = diff
diff = list(diff)
diff.sort()
print 'loop: %d; common so far:' % loopcount
for user in diff:
print ' http://www.reddit.com/user/%s' % user
def main(args):
"""Main branching logic."""
if len(args) != 2:
usage()
loadloop(URLBASE % args[0], URLBASE % args[1])
if __name__ == '__main__':
main(sys.argv[1:])
# eof
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment