Skip to content

Instantly share code, notes, and snippets.

@Plutor
Created March 1, 2013 14:28
Show Gist options
  • Select an option

  • Save Plutor/5065001 to your computer and use it in GitHub Desktop.

Select an option

Save Plutor/5065001 to your computer and use it in GitHub Desktop.
import json
import sys
import time
import urllib2
nodes = ["307035796257525760"]
max_branches = 2
sleep = 1
nodes_done = []
while len(nodes) > 0:
node = nodes.pop(0)
try:
# First get this node
j = json.loads(urllib2.urlopen('https://api.twitter.com/1/statuses/show.json?id=%s&include_entities=true' % node).read())
# Add the node this one points to
points_to = j["entities"]["urls"][0]["expanded_url"]
points_to = points_to.split('/')[-1]
print '%s -> %s' % (node, points_to)
if not points_to in nodes_done:
nodes.append(points_to)
# Then search for this URL
j = json.loads(urllib2.urlopen('http://search.twitter.com/search.json?q=%s&rpp=%d&include_entities=true&result_type=mixed' % (node, max_branches)).read())
# Add everything that links to it
for result in j["results"]:
points_from = result["id_str"]
print '%s -> %s' % (points_from, node)
if not points_from in nodes_done:
nodes.append(points_from)
except:
sys.stderr.write('caught error with %s\n' % node)
nodes_done.append(node)
sys.stderr.write('%d done, %d in queue\n' % (len(nodes_done), len(nodes)))
if len(nodes_done) > 1000:
break
time.sleep(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment