Created
July 21, 2008 20:40
-
-
Save pc/160 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import google | |
import time, csv, urllib, os | |
google.LICENSE_KEY = 'YOURKEY' | |
# Same usage: python search.py wp.conf | |
# And wp.conf should look like: | |
# en.wikipedia.org | |
# -- | |
# foo | |
# word | |
# encyclopedia | |
# larry sanger | |
# jimbo wales | |
# your mom | |
class SearchAnalyzer: | |
def __init__(self, site, searches): | |
self.site = site | |
self.searches = searches | |
def siteVariants(self): | |
return ('http://' + self.site, 'https://' + self.site, | |
'http://www.' + self.site, 'https://www.' + self.site) | |
def matchSite(self, url): | |
for s in self.siteVariants(): | |
if url.startswith(s): | |
return True | |
return False | |
def csvFile(self): | |
return self.site + ".csv" | |
def htmlFile(self): | |
return self.site + ".html" | |
def analyze(self): | |
start_range = range(0, 100, 10) | |
results = [] | |
for search in self.searches: | |
page = [] | |
for s in start_range: | |
try: | |
page.append(google.doGoogleSearch(search, start = s)) | |
except Exception, e: | |
print 'google.py exception at:', search, s, e | |
pass | |
time.sleep(0.2) | |
found = False | |
rank = 0 | |
for p in page: | |
for item in p.results: | |
rank += 1 | |
if self.matchSite(item.URL): | |
results.append([time.time(), item.URL, search, rank]) | |
found = True | |
if not found: | |
results.append([time.time(), self.site, search, -1]) | |
f = open(self.csvFile(), "ab") | |
writer = csv.writer(f) | |
writer.writerows(results) | |
f.close() | |
f = open(self.csvFile(), "rb") | |
reader = csv.reader(f) | |
searches = {} | |
for line in reader: | |
ident = '%s,%s' % (line[2], line[1]) | |
if ident not in searches: | |
searches[ident] = [] | |
searches[ident].append(str(100 - int(line[3]))) | |
f.close() | |
sorted_searches = [] | |
for k in searches: | |
keywords, url = k.split(',') | |
lst = ','.join(map(lambda x: str(int(x) + 1), searches[k])) | |
sorted_searches.append({ | |
'query': keywords, | |
'query_encoded': urllib.urlencode({'q': keywords}), | |
'url': url, | |
'lst': lst, | |
'last_rank': (100 - int(searches[k][-1])), | |
}) | |
sorted_searches.sort(cmp = lambda x,y: cmp(x['query'], y['query'])) | |
html = open(self.htmlFile(), "w") | |
html.write('<html>') | |
akeys = ['',] # list of primary keywords goes here | |
bkeys = ['',] # list of secondary keywords goes here | |
alst = [] | |
blst = [] | |
clst = [] | |
for d in sorted_searches: | |
if d['query'] in akeys: | |
alst.append(d) | |
elif d['query'] in bkeys: | |
blst.append(d) | |
else: | |
clst.append(d) | |
for k,l in [['Primary', alst], ['Secondary', blst], ['All', clst]]: | |
html.write('<h1 style="clear: both; font-family: georgia; font-weight: normal; border-bottom: 1px solid #ccc;">%s</h1>' % (k)) | |
for d in l: | |
html.write('''<div style="float: left; text-align: center; margin: 20px 10px; padding: 10px; overflow: hidden; width: 300px;"><div style="margin-bottom: 20px;"><a title="%(query)s" href="http://www.google.com/search?%(query_encoded)s">%(query)s</a><br /><small><a href="%(url)s" title="%(url)s">%(url)s</a><br />Last rank: %(last_rank)s</small></div> | |
<a href="http://chart.apis.google.com/chart?chs=500x500&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90"><img border="0" src="http://chart.apis.google.com/chart?chs=300x300&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90" title="Last rank: %(last_rank)s" /></a> | |
</div> | |
''' % d) | |
html.write('</html>') | |
html.close() | |
argv = os.sys.argv | |
if len(argv) == 2: | |
conf = open(argv[1]).read().split("\n") | |
site = conf[0] | |
keywords = conf[2:-1] | |
print "Analyzing", site, "with", len(keywords), "keywords" | |
SearchAnalyzer(site, keywords).analyze() | |
if __name__ == '__main__': | |
SearchAnalyzer('en.wikipedia.org', ['word']).analyze() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment