Created
February 21, 2017 17:36
-
-
Save yupadhyay/644f3ee172239c53ccc32c67d57f81e9 to your computer and use it in GitHub Desktop.
Find google URL Relevency for an end point and save it in excel sheet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
import urllib2 | |
import urllib | |
import datetime | |
from openpyxl import Workbook | |
from watson_developer_cloud import AlchemyLanguageV1 | |
# Get google API key from https://developers.google.com/custom-search/json-api/v1/introduction#identify_your_application_to_google_with_api_key | |
google_api_key = '*****' | |
# Get google CX key from http://www.google.com/cse/manage/all | |
google_cx_key = '*****' | |
# Get Watson key from https://www.ibm.com/watson/developercloud/alchemy-language/api/v1/#keywords | |
watson_alchemy_api_key = '******' | |
#Construct URL to get top 10 result from google | |
google_search_url = "https://www.googleapis.com/customsearch/v1?num=10&start=1&cx=" + google_cx_key + "&key=" + google_api_key + "&" | |
query_string = raw_input("What do you want to search for ? >> ") | |
#Search in google for top 10 result | |
query = urllib.urlencode( {'q' : query_string } ) | |
response = urllib2.urlopen (google_search_url + query ).read() | |
data = json.loads ( response ) | |
results = data [ 'items' ] | |
alchemy_language = AlchemyLanguageV1(api_key=watson_alchemy_api_key) | |
# Initialize workbook that will create excel sheet | |
wb = Workbook() | |
ws = wb.active | |
# Get uniqie file name | |
_uniqueTime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') | |
#Generate uniue file name to make sure that multiple instances of program can be ran in a day | |
fileName = _uniqueTime+"_"+query_string.replace(" ","")[:6] | |
ws.title = fileName | |
#print('URL,Title,Relevency,text') | |
ws.append(['url', 'title', 'text','relevency']) | |
#Go through all results from google first | |
for result in results: | |
title = result['title'] | |
url = result['link'] | |
#print ( title + '; ' + url ) | |
#print(json.dumps(alchemy_language.keywords(url=url),indent=2)) | |
#Call Watson API to get keyword result | |
jsonOutput = alchemy_language.keywords(url=url) | |
#Parse result to just get keyword value | |
all_keywords = jsonOutput['keywords'] | |
#Go through all result return by watson API and append them in google sheet | |
for keyword in all_keywords: | |
#print(url + ',' + title + ',' + keyword['relevance'] + keyword['text']) | |
ws.append([url, title, keyword['text'] ,keyword['relevance']]) | |
print('Done !!!!! Your file '+fileName+'.xlsx is created now') | |
#Finally save file | |
wb.save(fileName+".xlsx") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment