Created
April 23, 2016 20:35
-
-
Save NikolasTzimoulis/1d589828e610a6d889398be87a7daf3e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import json | |
import urllib | |
import urlparse | |
import time | |
import os | |
searchTerm = "software engineer" | |
websites = ["indeed.com", "monster.com"] | |
maxResults = 8 # how many results to grab per website at most | |
waitSeconds = 30 # how many seconds to wait between requests | |
logFileName = "log.html" # file name where the results will be written | |
logFile = open(logFileName, 'w') | |
first = True | |
for w in websites: | |
logFile.write("<h1>"+w+"</h1>") | |
for request in range(int(maxResults/8)): | |
if not first: time.sleep(waitSeconds) | |
first = False | |
print "Results from " + w + ", page " + str(request+1) + ":" | |
try: | |
query = urllib.urlencode({'q': searchTerm+" site:"+w}) | |
response = urllib.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?rsz=8&v=1.0&' + query + "&start=" + str(request) ).read() | |
jsonObject = json.loads ( response ) | |
results = jsonObject [ 'responseData' ] [ 'results' ] | |
for result in results: | |
title = result['title'] | |
url = urllib.unquote(result['url']).decode('utf8') | |
print ( title + '; ' + url ) | |
logFile.write("<p><a href=\""+url+"\">"+title+"</a></p>") | |
except: pass | |
logFile.close() | |
os.startfile(logFileName) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment