pc · July 21, 2008 20:40
diff --git a/gistfile1.sc b/gistfile1.sc
 import google
 import time, csv, urllib, os
    
 google.LICENSE_KEY = 'YOURKEY'

 # Same usage: python search.py wp.conf
 # And wp.conf should look like:
 #   en.wikipedia.org
 #   --
 #   foo
 #   word
 #   encyclopedia
 #   larry sanger
 #   jimbo wales
 #   your mom

 class SearchAnalyzer:
  def __init__(self, site, searches):
    self.site = site
    self.searches = searches

  def siteVariants(self):
    return ('http://' + self.site, 'https://' + self.site,
            'http://www.' + self.site, 'https://www.' + self.site)

  def matchSite(self, url):
    for s in self.siteVariants():
      if url.startswith(s):
        return True
    return False
    
  def csvFile(self):
    return self.site + ".csv"

  def htmlFile(self):
    return self.site + ".html"

  def analyze(self):
    start_range = range(0, 100, 10)
  
    results = []
  
    for search in self.searches:
        page = []
        for s in start_range:
            try:
                page.append(google.doGoogleSearch(search, start = s))
            except Exception, e:
                print 'google.py exception at:', search, s, e
                pass
            
            time.sleep(0.2)

        found = False
        rank = 0
        for p in page:
            for item in p.results:
                rank += 1
                if self.matchSite(item.URL):
                    results.append([time.time(), item.URL, search, rank])
                    found = True
        
        if not found:
            results.append([time.time(), self.site, search, -1])        
            
    f = open(self.csvFile(), "ab")
    writer = csv.writer(f)
    writer.writerows(results)

    f.close()

    f = open(self.csvFile(), "rb")
    reader = csv.reader(f)

    searches = {}

    for line in reader:
        ident = '%s,%s' % (line[2], line[1])
    
        if ident not in searches:
            searches[ident] = []
    
        searches[ident].append(str(100 - int(line[3])))

    f.close()

    sorted_searches = []
    for k in searches:
        keywords, url = k.split(',')    
        lst = ','.join(map(lambda x: str(int(x) + 1), searches[k]))
    
        sorted_searches.append({
            'query': keywords,
            'query_encoded': urllib.urlencode({'q': keywords}),
            'url': url,
            'lst': lst,
            'last_rank': (100 - int(searches[k][-1])),
        })

    sorted_searches.sort(cmp = lambda x,y: cmp(x['query'], y['query']))

    html = open(self.htmlFile(), "w")
    html.write('<html>')    

    akeys = ['',] # list of primary keywords goes here
    bkeys = ['',] # list of secondary keywords goes here

    alst = []
    blst = []
    clst = []

    for d in sorted_searches:
        if d['query'] in akeys:
            alst.append(d)
        elif d['query'] in bkeys:
            blst.append(d)
        else:
            clst.append(d)

    for k,l in [['Primary', alst], ['Secondary', blst], ['All', clst]]:
        html.write('<h1 style="clear: both; font-family: georgia; font-weight: normal; border-bottom: 1px solid #ccc;">%s</h1>' % (k))
        for d in l:
            html.write('''<div style="float: left; text-align: center; margin: 20px 10px; padding: 10px; overflow: hidden; width: 300px;"><div style="margin-bottom: 20px;"><a title="%(query)s" href="http://www.google.com/search?%(query_encoded)s">%(query)s</a><br /><small><a href="%(url)s" title="%(url)s">%(url)s</a><br />Last rank: %(last_rank)s</small></div>
                <a href="http://chart.apis.google.com/chart?chs=500x500&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90"><img border="0" src="http://chart.apis.google.com/chart?chs=300x300&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:|100|90|80|70|60|50|40|30|20|10|1&chm=r,ccdff9,0,0.90,1.00|r,E5ECF9,0,0.80,0.90" title="Last rank: %(last_rank)s" /></a>
                </div>
            ''' % d)    

    html.write('</html>')
    html.close()
    
 argv = os.sys.argv
 if len(argv) == 2:
  conf = open(argv[1]).read().split("\n")
  site = conf[0]
  keywords = conf[2:-1]
  print "Analyzing", site, "with", len(keywords), "keywords"
  SearchAnalyzer(site, keywords).analyze()
    
 if __name__ == '__main__':
  SearchAnalyzer('en.wikipedia.org', ['word']).analyze()
	import google
	import time, csv, urllib, os

	google.LICENSE_KEY = 'YOURKEY'

	# Same usage: python search.py wp.conf
	# And wp.conf should look like:
	# en.wikipedia.org
	# --
	# foo
	# word
	# encyclopedia
	# larry sanger
	# jimbo wales
	# your mom

	class SearchAnalyzer:
	def __init__(self, site, searches):
	self.site = site
	self.searches = searches

	def siteVariants(self):
	return ('http://' + self.site, 'https://' + self.site,
	'http://www.' + self.site, 'https://www.' + self.site)

	def matchSite(self, url):
	for s in self.siteVariants():
	if url.startswith(s):
	return True
	return False

	def csvFile(self):
	return self.site + ".csv"

	def htmlFile(self):
	return self.site + ".html"

	def analyze(self):
	start_range = range(0, 100, 10)

	results = []

	for search in self.searches:
	page = []
	for s in start_range:
	try:
	page.append(google.doGoogleSearch(search, start = s))
	except Exception, e:
	print 'google.py exception at:', search, s, e
	pass

	time.sleep(0.2)

	found = False
	rank = 0
	for p in page:
	for item in p.results:
	rank += 1
	if self.matchSite(item.URL):
	results.append([time.time(), item.URL, search, rank])
	found = True

	if not found:
	results.append([time.time(), self.site, search, -1])

	f = open(self.csvFile(), "ab")
	writer = csv.writer(f)
	writer.writerows(results)

	f.close()

	f = open(self.csvFile(), "rb")
	reader = csv.reader(f)

	searches = {}

	for line in reader:
	ident = '%s,%s' % (line[2], line[1])

	if ident not in searches:
	searches[ident] = []

	searches[ident].append(str(100 - int(line[3])))

	f.close()

	sorted_searches = []
	for k in searches:
	keywords, url = k.split(',')
	lst = ','.join(map(lambda x: str(int(x) + 1), searches[k]))

	sorted_searches.append({
	'query': keywords,
	'query_encoded': urllib.urlencode({'q': keywords}),
	'url': url,
	'lst': lst,
	'last_rank': (100 - int(searches[k][-1])),
	})

	sorted_searches.sort(cmp = lambda x,y: cmp(x['query'], y['query']))

	html = open(self.htmlFile(), "w")
	html.write('<html>')

	akeys = ['',] # list of primary keywords goes here
	bkeys = ['',] # list of secondary keywords goes here

	alst = []
	blst = []
	clst = []

	for d in sorted_searches:
	if d['query'] in akeys:
	alst.append(d)
	elif d['query'] in bkeys:
	blst.append(d)
	else:
	clst.append(d)

	for k,l in [['Primary', alst], ['Secondary', blst], ['All', clst]]:
	html.write('<h1 style="clear: both; font-family: georgia; font-weight: normal; border-bottom: 1px solid #ccc;">%s</h1>' % (k))
	for d in l:
	html.write('''<div style="float: left; text-align: center; margin: 20px 10px; padding: 10px; overflow: hidden; width: 300px;"><div style="margin-bottom: 20px;"><a title="%(query)s" href="http://www.google.com/search?%(query_encoded)s">%(query)s</a><br /><small><a href="%(url)s" title="%(url)s">%(url)s</a><br />Last rank: %(last_rank)s</small></div>
	<a href="http://chart.apis.google.com/chart?chs=500x500&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:\|100\|90\|80\|70\|60\|50\|40\|30\|20\|10\|1&chm=r,ccdff9,0,0.90,1.00\|r,E5ECF9,0,0.80,0.90"><img border="0" src="http://chart.apis.google.com/chart?chs=300x300&cht=ls&chco=cc0000&chls=1,0,0&chf=bg,s,efefef&chd=t:%(lst)s&chxt=r&chxl=0:\|100\|90\|80\|70\|60\|50\|40\|30\|20\|10\|1&chm=r,ccdff9,0,0.90,1.00\|r,E5ECF9,0,0.80,0.90" title="Last rank: %(last_rank)s" /></a>
	</div>
	''' % d)

	html.write('</html>')
	html.close()

	argv = os.sys.argv
	if len(argv) == 2:
	conf = open(argv[1]).read().split("\n")
	site = conf[0]
	keywords = conf[2:-1]
	print "Analyzing", site, "with", len(keywords), "keywords"
	SearchAnalyzer(site, keywords).analyze()

	if __name__ == '__main__':
	SearchAnalyzer('en.wikipedia.org', ['word']).analyze()