Skip to content

Instantly share code, notes, and snippets.

@yupadhyay
Created February 21, 2017 17:36
Show Gist options
  • Save yupadhyay/644f3ee172239c53ccc32c67d57f81e9 to your computer and use it in GitHub Desktop.
Save yupadhyay/644f3ee172239c53ccc32c67d57f81e9 to your computer and use it in GitHub Desktop.
Find google URL Relevency for an end point and save it in excel sheet
import json
import requests
import urllib2
import urllib
import datetime
from openpyxl import Workbook
from watson_developer_cloud import AlchemyLanguageV1
# Get google API key from https://developers.google.com/custom-search/json-api/v1/introduction#identify_your_application_to_google_with_api_key
google_api_key = '*****'
# Get google CX key from http://www.google.com/cse/manage/all
google_cx_key = '*****'
# Get Watson key from https://www.ibm.com/watson/developercloud/alchemy-language/api/v1/#keywords
watson_alchemy_api_key = '******'
#Construct URL to get top 10 result from google
google_search_url = "https://www.googleapis.com/customsearch/v1?num=10&start=1&cx=" + google_cx_key + "&key=" + google_api_key + "&"
query_string = raw_input("What do you want to search for ? >> ")
#Search in google for top 10 result
query = urllib.urlencode( {'q' : query_string } )
response = urllib2.urlopen (google_search_url + query ).read()
data = json.loads ( response )
results = data [ 'items' ]
alchemy_language = AlchemyLanguageV1(api_key=watson_alchemy_api_key)
# Initialize workbook that will create excel sheet
wb = Workbook()
ws = wb.active
# Get uniqie file name
_uniqueTime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#Generate uniue file name to make sure that multiple instances of program can be ran in a day
fileName = _uniqueTime+"_"+query_string.replace(" ","")[:6]
ws.title = fileName
#print('URL,Title,Relevency,text')
ws.append(['url', 'title', 'text','relevency'])
#Go through all results from google first
for result in results:
title = result['title']
url = result['link']
#print ( title + '; ' + url )
#print(json.dumps(alchemy_language.keywords(url=url),indent=2))
#Call Watson API to get keyword result
jsonOutput = alchemy_language.keywords(url=url)
#Parse result to just get keyword value
all_keywords = jsonOutput['keywords']
#Go through all result return by watson API and append them in google sheet
for keyword in all_keywords:
#print(url + ',' + title + ',' + keyword['relevance'] + keyword['text'])
ws.append([url, title, keyword['text'] ,keyword['relevance']])
print('Done !!!!! Your file '+fileName+'.xlsx is created now')
#Finally save file
wb.save(fileName+".xlsx")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment