aparrish · April 26, 2012 16:54
diff --git a/gistfile1.py b/gistfile1.py
 import urllib2
 import simplejson
 import re
 import requests
 from bs4 import BeautifulSoup
 from random import choice


 # The request also includes the userip parameter which provides the end
 # user's IP address. Doing so will help distinguish this legitimate
 # server-side traffic from traffic which doesn't come from an end-user.

 id = "hotdog"  # dynamic id from site
 url = ('https://ajax.googleapis.com/ajax/services/search/web'
       '?v=1.0&q='+ id + '&userip=USERS-IP-ADDRESS')

 request = urllib2.Request(url, None, {'Referer': "www.google.com" })
 response = urllib2.urlopen(request)  # opening url

 								# Process the JSON string.
 results = simplejson.load(response)
 results = results["responseData"]
 results = results['cursor']
 results = results["moreResultsUrl"]
 print results

 								#results = results['resultCount']

 								#regex to add number to results
 split = re.split("start=0", results)
 newurl= split[0] + "start=100" +split[1]
 print "found!!!"+ newurl 

 r = requests.get(newurl)
 								# print r.status_code
 								# print r.headers['content-type']
 r= r.text
 soup = BeautifulSoup(r)

 all=[]
 for link in soup.find_all("a"):
  	all.append(link.get("href"))

 urls = list()
 for s in all:
 	if re.findall("/url?", s):
 		
 		split1 = re.split("&", s)	
 		split2 =re.split("=", split1[0])
 		deathlinks=[]
 		
 		print "item: " + split2[1]
 		urls.append(split2[1])

 for url in urls:
 	print url
	import urllib2
	import simplejson
	import re
	import requests
	from bs4 import BeautifulSoup
	from random import choice


	# The request also includes the userip parameter which provides the end
	# user's IP address. Doing so will help distinguish this legitimate
	# server-side traffic from traffic which doesn't come from an end-user.

	id = "hotdog" # dynamic id from site
	url = ('https://ajax.googleapis.com/ajax/services/search/web'
	'?v=1.0&q='+ id + '&userip=USERS-IP-ADDRESS')

	request = urllib2.Request(url, None, {'Referer': "www.google.com" })
	response = urllib2.urlopen(request) # opening url

	# Process the JSON string.
	results = simplejson.load(response)
	results = results["responseData"]
	results = results['cursor']
	results = results["moreResultsUrl"]
	print results

	#results = results['resultCount']

	#regex to add number to results
	split = re.split("start=0", results)
	newurl= split[0] + "start=100" +split[1]
	print "found!!!"+ newurl

	r = requests.get(newurl)
	# print r.status_code
	# print r.headers['content-type']
	r= r.text
	soup = BeautifulSoup(r)

	all=[]
	for link in soup.find_all("a"):
	all.append(link.get("href"))

	urls = list()
	for s in all:
	if re.findall("/url?", s):

	split1 = re.split("&", s)
	split2 =re.split("=", split1[0])
	deathlinks=[]

	print "item: " + split2[1]
	urls.append(split2[1])

	for url in urls:
	print url