alucard001 · June 24, 2019 04:09 · alucard001 · Jun 24, 2019
diff --git a/googlesearch.py b/googlesearch.py
 #!/usr/local/bin/python3

 '''
 This program is used to get all search terms in Google search console and save it in MSSQL DB.

 Please note: Just using this .py is not enough.  You need to create a Google service account in google API console and 
 connect that service account to Google search console property.

 Points to note:
 ===============
 -  By default (and cannot be changed), as of this writing, Google Search Console would not provide any 
 searchdata in the last 3 days.
 -  For example, today is 24 May 2019, you CANNOT get any search term result in 23 May, 22 May and 21 May.  The latest 
 data is 20 May.  In case if I make the above text misunderstood, of course it did not include 24 May (Today).
 -  Read the command line arguments description.  It is important.
 '''

 import argparse
 import os, time
 import sys
 import json
 from google.oauth2 import service_account
 import googleapiclient.discovery

 import textwrap
 import datetime
 import pyodbc

 import urllib.parse

 # https://stackoverflow.com/a/1301528/1802483
 os.environ['TZ'] = "Asia/Hong_Kong"
 time.tzset()

 # How to do server to server connection using OAuth
 # https://developers.google.com/api-client-library/python/auth/service-accounts

 # API Reference
 # https://developers.google.com/apis-explorer/?hl=zh-TW#p/webmasters/v3/webmasters.searchanalytics.query

 # Required:
 # pip3 install google-auth google-auth-httplib2 google-api-python-client

 # https://github.com/mkleehammer/pyodbc/wiki/Connecting-to-SQL-Server-from-Windows#using-an-odbc-driver
 # https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-2017

 # Declare command-line flags.
 argparser = argparse.ArgumentParser(add_help=False)
 argparser.add_argument('property_uri', type=str, help=('Site or app URI to query data for (including trailing slash).'))
 argparser.add_argument('--days', default=1, type=int, help=('Number of days to get data prior to today.  1 means today(Default), 2 means yesterday and today'))

 def main(argv):
 	args = argparser.parse_args()

 	property_uri = args.property_uri
 	days = args.days

 	# https://stackoverflow.com/a/993367/1802483
 	today = datetime.datetime.today()
 	date_list = [today - datetime.timedelta(days=i) for i in reversed(range(0, days))]

 	SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
 	
 	# Why define a table name?  Because this script may be used by different websites,
 	# which means it will have different tables
 	tablename = ""

 	if(property_uri == "https://website1.com"):
 		tablename = "[ba_analysis].[dbo].[website1]"
 		SERVICE_ACCOUNT_FILE = '/root/googlesearch/website1_service_account_secret_download_from_google_api_console.json'
 	elif(property_uri == "https://website2.com"):
 		tablename = "[ba_analysis].[dbo].[website2]"
 		SERVICE_ACCOUNT_FILE = '/root/googlesearch/website2_service_account_secret_download_from_google_api_console.json'

 	credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

 	webmaster_service = googleapiclient.discovery.build("webmasters", "v3", credentials=credentials)

 	# site_list = webmaster_service.sites().list().execute()
 	# print(site_list); return

 	for current_date in date_list:
 		data_date = start_date = end_date = current_date.strftime("%Y-%m-%d")

 		request = {
 			'startDate': start_date,
 			'endDate': end_date,
 			"dimensions": ["page", "query", "device"],
 			"searchType": "web"
 		}
 		
 		result = webmaster_service.searchanalytics().query(siteUrl=property_uri, body=request).execute()

 		if(len(result.get('rows', [])) == 0):
 			continue
 		
 		# https://github.com/mkleehammer/pyodbc/wiki/Getting-started
 		cnxn = pyodbc.connect(driver='{ODBC Driver 17 for SQL Server}', server='mssql_server_ip', database='db_name', uid='user', pwd='password')
 		cnxn.setencoding(encoding='utf-8')

 		# Create a cursor from the connection
 		cursor = cnxn.cursor()

 		for row in result['rows']:
 			## Debug
 			# print("SEARCH_QUERY: "        + row["keys"][1])
 			# print("LANDING_PAGE: "        + row["keys"][0])
 			# print("DEVICE: "              + row["keys"][2])
 			# print("IMPRESSIONS: "         + str(row["impressions"]))
 			# print("CLICKS: "              + str(row["clicks"]))
 			# print("AVERAGE_POSITION: "    + str(row["position"]))
 			# print("CLICK_THROUGH_RATE: "  + str(row["ctr"]))
 			# print("\n")

 			search_query = row["keys"][1]
 			landing_page = row["keys"][0]
 			device = row["keys"][2]
 			impressions = str(row["impressions"])
 			clicks = str(row["clicks"])
 			average_position = str(row["position"])
 			click_through_rate = str(row["ctr"])

 			if(tablename != ""):
 				try:
 					search_query = search_query.replace("'", r"''")
 					search_query = search_query.replace('"', r'\"')

 					sql = "INSERT INTO {:s} (search_query, landing_page, device, impressions, clicks, average_position, click_through_rate, data_date) VALUES(N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}')".format(
 							tablename, search_query, urllib.parse.unquote(landing_page, encoding='utf-8', errors='replace'), device, impressions, clicks, average_position, click_through_rate, data_date
 						)
 					#print(sql)
 					cursor.execute(sql)

 				except pyodbc.ProgrammingError as ex:
 					print("Exception: ")
 					print(ex)
 					print("Error in SQL: " + sql)					
 					return
 						
 		cnxn.commit()

 if __name__ == '__main__':
 	main(sys.argv)
	#!/usr/local/bin/python3

	'''
	This program is used to get all search terms in Google search console and save it in MSSQL DB.

	Please note: Just using this .py is not enough. You need to create a Google service account in google API console and
	connect that service account to Google search console property.

	Points to note:
	===============
	- By default (and cannot be changed), as of this writing, Google Search Console would not provide any
	searchdata in the last 3 days.
	- For example, today is 24 May 2019, you CANNOT get any search term result in 23 May, 22 May and 21 May. The latest
	data is 20 May. In case if I make the above text misunderstood, of course it did not include 24 May (Today).
	- Read the command line arguments description. It is important.
	'''

	import argparse
	import os, time
	import sys
	import json
	from google.oauth2 import service_account
	import googleapiclient.discovery

	import textwrap
	import datetime
	import pyodbc

	import urllib.parse

	# https://stackoverflow.com/a/1301528/1802483
	os.environ['TZ'] = "Asia/Hong_Kong"
	time.tzset()

	# How to do server to server connection using OAuth
	# https://developers.google.com/api-client-library/python/auth/service-accounts

	# API Reference
	# https://developers.google.com/apis-explorer/?hl=zh-TW#p/webmasters/v3/webmasters.searchanalytics.query

	# Required:
	# pip3 install google-auth google-auth-httplib2 google-api-python-client

	# https://github.com/mkleehammer/pyodbc/wiki/Connecting-to-SQL-Server-from-Windows#using-an-odbc-driver
	# https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-2017

	# Declare command-line flags.
	argparser = argparse.ArgumentParser(add_help=False)
	argparser.add_argument('property_uri', type=str, help=('Site or app URI to query data for (including trailing slash).'))
	argparser.add_argument('--days', default=1, type=int, help=('Number of days to get data prior to today. 1 means today(Default), 2 means yesterday and today'))

	def main(argv):
	args = argparser.parse_args()

	property_uri = args.property_uri
	days = args.days

	# https://stackoverflow.com/a/993367/1802483
	today = datetime.datetime.today()
	date_list = [today - datetime.timedelta(days=i) for i in reversed(range(0, days))]

	SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']

	# Why define a table name? Because this script may be used by different websites,
	# which means it will have different tables
	tablename = ""

	if(property_uri == "https://website1.com"):
	tablename = "[ba_analysis].[dbo].[website1]"
	SERVICE_ACCOUNT_FILE = '/root/googlesearch/website1_service_account_secret_download_from_google_api_console.json'
	elif(property_uri == "https://website2.com"):
	tablename = "[ba_analysis].[dbo].[website2]"
	SERVICE_ACCOUNT_FILE = '/root/googlesearch/website2_service_account_secret_download_from_google_api_console.json'

	credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)

	webmaster_service = googleapiclient.discovery.build("webmasters", "v3", credentials=credentials)

	# site_list = webmaster_service.sites().list().execute()
	# print(site_list); return

	for current_date in date_list:
	data_date = start_date = end_date = current_date.strftime("%Y-%m-%d")

	request = {
	'startDate': start_date,
	'endDate': end_date,
	"dimensions": ["page", "query", "device"],
	"searchType": "web"
	}

	result = webmaster_service.searchanalytics().query(siteUrl=property_uri, body=request).execute()

	if(len(result.get('rows', [])) == 0):
	continue

	# https://github.com/mkleehammer/pyodbc/wiki/Getting-started
	cnxn = pyodbc.connect(driver='{ODBC Driver 17 for SQL Server}', server='mssql_server_ip', database='db_name', uid='user', pwd='password')
	cnxn.setencoding(encoding='utf-8')

	# Create a cursor from the connection
	cursor = cnxn.cursor()

	for row in result['rows']:
	## Debug
	# print("SEARCH_QUERY: " + row["keys"][1])
	# print("LANDING_PAGE: " + row["keys"][0])
	# print("DEVICE: " + row["keys"][2])
	# print("IMPRESSIONS: " + str(row["impressions"]))
	# print("CLICKS: " + str(row["clicks"]))
	# print("AVERAGE_POSITION: " + str(row["position"]))
	# print("CLICK_THROUGH_RATE: " + str(row["ctr"]))
	# print("\n")

	search_query = row["keys"][1]
	landing_page = row["keys"][0]
	device = row["keys"][2]
	impressions = str(row["impressions"])
	clicks = str(row["clicks"])
	average_position = str(row["position"])
	click_through_rate = str(row["ctr"])

	if(tablename != ""):
	try:
	search_query = search_query.replace("'", r"''")
	search_query = search_query.replace('"', r'\"')

	sql = "INSERT INTO {:s} (search_query, landing_page, device, impressions, clicks, average_position, click_through_rate, data_date) VALUES(N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}')".format(
	tablename, search_query, urllib.parse.unquote(landing_page, encoding='utf-8', errors='replace'), device, impressions, clicks, average_position, click_through_rate, data_date
	)
	#print(sql)
	cursor.execute(sql)

	except pyodbc.ProgrammingError as ex:
	print("Exception: ")
	print(ex)
	print("Error in SQL: " + sql)
	return

	cnxn.commit()

	if __name__ == '__main__':
	main(sys.argv)