Created
February 6, 2015 02:38
-
-
Save antiboredom/e292e807cffe6dece694 to your computer and use it in GitHub Desktop.
scrapes google autocomplete suggestions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import urllib | |
import time | |
import xml.etree.ElementTree as ET | |
chars = 'abcdefghijklmnopqrstuvwxyz' | |
q = sys.argv[1] | |
url = 'http://suggestqueries.google.com/complete/search?output=toolbar&hl=en&q=' + urllib.quote(q.strip() + ' ') | |
def suggest(url): | |
try: | |
result = urllib.urlopen(url).read() | |
xml = ET.fromstring(result) | |
for child in xml: | |
content = child[0].attrib['data'].encode('utf8') | |
print content | |
except: | |
return False | |
suggest(url) | |
for c1 in chars: | |
suggest(url + c1) | |
time.sleep(.1) | |
for c2 in chars: | |
suggest(url + c1 + c2) | |
time.sleep(.1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment