Created
March 1, 2013 14:28
-
-
Save Plutor/5065001 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import sys | |
| import time | |
| import urllib2 | |
| nodes = ["307035796257525760"] | |
| max_branches = 2 | |
| sleep = 1 | |
| nodes_done = [] | |
| while len(nodes) > 0: | |
| node = nodes.pop(0) | |
| try: | |
| # First get this node | |
| j = json.loads(urllib2.urlopen('https://api.twitter.com/1/statuses/show.json?id=%s&include_entities=true' % node).read()) | |
| # Add the node this one points to | |
| points_to = j["entities"]["urls"][0]["expanded_url"] | |
| points_to = points_to.split('/')[-1] | |
| print '%s -> %s' % (node, points_to) | |
| if not points_to in nodes_done: | |
| nodes.append(points_to) | |
| # Then search for this URL | |
| j = json.loads(urllib2.urlopen('http://search.twitter.com/search.json?q=%s&rpp=%d&include_entities=true&result_type=mixed' % (node, max_branches)).read()) | |
| # Add everything that links to it | |
| for result in j["results"]: | |
| points_from = result["id_str"] | |
| print '%s -> %s' % (points_from, node) | |
| if not points_from in nodes_done: | |
| nodes.append(points_from) | |
| except: | |
| sys.stderr.write('caught error with %s\n' % node) | |
| nodes_done.append(node) | |
| sys.stderr.write('%d done, %d in queue\n' % (len(nodes_done), len(nodes))) | |
| if len(nodes_done) > 1000: | |
| break | |
| time.sleep(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment