Skip to content

Instantly share code, notes, and snippets.

@alucard001
Last active June 24, 2019 04:09
Show Gist options
  • Save alucard001/45bbb9b9b334a6ddc260412a6289396f to your computer and use it in GitHub Desktop.
Save alucard001/45bbb9b9b334a6ddc260412a6289396f to your computer and use it in GitHub Desktop.
Google Search API Working Example - Python 3 + MSSQL
#!/usr/local/bin/python3
'''
This program is used to get all search terms in Google search console and save it in MSSQL DB.
Please note: Just using this .py is not enough. You need to create a Google service account in google API console and
connect that service account to Google search console property.
Points to note:
===============
- By default (and cannot be changed), as of this writing, Google Search Console would not provide any
searchdata in the last 3 days.
- For example, today is 24 May 2019, you CANNOT get any search term result in 23 May, 22 May and 21 May. The latest
data is 20 May. In case if I make the above text misunderstood, of course it did not include 24 May (Today).
- Read the command line arguments description. It is important.
'''
import argparse
import os, time
import sys
import json
from google.oauth2 import service_account
import googleapiclient.discovery
import textwrap
import datetime
import pyodbc
import urllib.parse
# https://stackoverflow.com/a/1301528/1802483
os.environ['TZ'] = "Asia/Hong_Kong"
time.tzset()
# How to do server to server connection using OAuth
# https://developers.google.com/api-client-library/python/auth/service-accounts
# API Reference
# https://developers.google.com/apis-explorer/?hl=zh-TW#p/webmasters/v3/webmasters.searchanalytics.query
# Required:
# pip3 install google-auth google-auth-httplib2 google-api-python-client
# https://github.com/mkleehammer/pyodbc/wiki/Connecting-to-SQL-Server-from-Windows#using-an-odbc-driver
# https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-2017
# Declare command-line flags.
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('property_uri', type=str, help=('Site or app URI to query data for (including trailing slash).'))
argparser.add_argument('--days', default=1, type=int, help=('Number of days to get data prior to today. 1 means today(Default), 2 means yesterday and today'))
def main(argv):
args = argparser.parse_args()
property_uri = args.property_uri
days = args.days
# https://stackoverflow.com/a/993367/1802483
today = datetime.datetime.today()
date_list = [today - datetime.timedelta(days=i) for i in reversed(range(0, days))]
SCOPES = ['https://www.googleapis.com/auth/webmasters.readonly']
# Why define a table name? Because this script may be used by different websites,
# which means it will have different tables
tablename = ""
if(property_uri == "https://website1.com"):
tablename = "[ba_analysis].[dbo].[website1]"
SERVICE_ACCOUNT_FILE = '/root/googlesearch/website1_service_account_secret_download_from_google_api_console.json'
elif(property_uri == "https://website2.com"):
tablename = "[ba_analysis].[dbo].[website2]"
SERVICE_ACCOUNT_FILE = '/root/googlesearch/website2_service_account_secret_download_from_google_api_console.json'
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT_FILE, scopes=SCOPES)
webmaster_service = googleapiclient.discovery.build("webmasters", "v3", credentials=credentials)
# site_list = webmaster_service.sites().list().execute()
# print(site_list); return
for current_date in date_list:
data_date = start_date = end_date = current_date.strftime("%Y-%m-%d")
request = {
'startDate': start_date,
'endDate': end_date,
"dimensions": ["page", "query", "device"],
"searchType": "web"
}
result = webmaster_service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
if(len(result.get('rows', [])) == 0):
continue
# https://github.com/mkleehammer/pyodbc/wiki/Getting-started
cnxn = pyodbc.connect(driver='{ODBC Driver 17 for SQL Server}', server='mssql_server_ip', database='db_name', uid='user', pwd='password')
cnxn.setencoding(encoding='utf-8')
# Create a cursor from the connection
cursor = cnxn.cursor()
for row in result['rows']:
## Debug
# print("SEARCH_QUERY: " + row["keys"][1])
# print("LANDING_PAGE: " + row["keys"][0])
# print("DEVICE: " + row["keys"][2])
# print("IMPRESSIONS: " + str(row["impressions"]))
# print("CLICKS: " + str(row["clicks"]))
# print("AVERAGE_POSITION: " + str(row["position"]))
# print("CLICK_THROUGH_RATE: " + str(row["ctr"]))
# print("\n")
search_query = row["keys"][1]
landing_page = row["keys"][0]
device = row["keys"][2]
impressions = str(row["impressions"])
clicks = str(row["clicks"])
average_position = str(row["position"])
click_through_rate = str(row["ctr"])
if(tablename != ""):
try:
search_query = search_query.replace("'", r"''")
search_query = search_query.replace('"', r'\"')
sql = "INSERT INTO {:s} (search_query, landing_page, device, impressions, clicks, average_position, click_through_rate, data_date) VALUES(N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}', N'{:s}')".format(
tablename, search_query, urllib.parse.unquote(landing_page, encoding='utf-8', errors='replace'), device, impressions, clicks, average_position, click_through_rate, data_date
)
#print(sql)
cursor.execute(sql)
except pyodbc.ProgrammingError as ex:
print("Exception: ")
print(ex)
print("Error in SQL: " + sql)
return
cnxn.commit()
if __name__ == '__main__':
main(sys.argv)
@alucard001
Copy link
Author

At the time of this writing, Google has been updated their "time interval", which means you will NOT get any data in the past 4 days.

Now the time interval is increased to 5 days, which means you will not get any data in the past 5 days, including today.

So if you want to use this script, the --days parameter is 6 in order to get data.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment