Created
October 8, 2019 07:36
-
-
Save natzir/574865617247141c0e15cfafb1ccdcb3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"<h3>Bubbles: Google Search Console Opportunity Matrix</h3><p>@author: Natzir Turrado: Technical SEO / Data Scientist. <a href=\"https://twitter.com/natzir9\">Twitter > @natzir9</a></p>\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "VvfcpByztqkf" | |
}, | |
"source": [ | |
"Importing libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "DljFdN6rtcyu" | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[nltk_data] Downloading package stopwords to\n", | |
"[nltk_data] C:\\Users\\nturr\\AppData\\Roaming\\nltk_data...\n", | |
"[nltk_data] Package stopwords is already up-to-date!\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import re\n", | |
"import datetime\n", | |
"import nltk\n", | |
"from IPython.display import display\n", | |
"from collections import defaultdict\n", | |
"from dateutil import relativedelta\n", | |
"from bokeh.io import show, output_file, curdoc, output_notebook\n", | |
"from bokeh.plotting import figure\n", | |
"from bokeh.models import ColumnDataSource, LinearColorMapper, ColorBar, BasicTicker, PrintfTickFormatter, HoverTool, BoxAnnotation, TapTool, OpenURL, Label, LabelSet, Slider, RangeSlider, CustomJS, Button\n", | |
"from bokeh.models.widgets import DataTable, DateFormatter, TableColumn, Tabs, Panel, Div\n", | |
"from bokeh.layouts import gridplot, row, column, layout, widgetbox\n", | |
"from bokeh.embed import file_html\n", | |
"from bokeh.models.annotations import Title, TextAnnotation\n", | |
"from bokeh.palettes import Viridis256\n", | |
"from bokeh.transform import transform\n", | |
"from ipywidgets import interact\n", | |
"from nltk.corpus import stopwords\n", | |
"nltk.download('stopwords')\n", | |
"from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n", | |
"import httplib2\n", | |
"from apiclient import errors\n", | |
"from apiclient.discovery import build\n", | |
"from oauth2client.client import OAuth2WebServerFlow" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "JAVsB-I2t_AZ" | |
}, | |
"source": [ | |
"Insert here your Google **CLIENT_ID**, **CLIENT_SECRET** & your Search Console **SITE PROPERTY**. \n", | |
"<ul><li><a href=\"https://console.developers.google.com/flows/enableapi?apiid=webmasters&credential=client_key\">Create your API credentintials</a></li></ul>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "OCLNELb0tvrb" | |
}, | |
"outputs": [], | |
"source": [ | |
"CLIENT_ID = ''\n", | |
"CLIENT_SECRET = ''\n", | |
"site = ''\n", | |
"\n", | |
"site_no_slash = re.sub('/$','',site)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "YEI-GX8Iu00E" | |
}, | |
"source": [ | |
"Insert here the **date range** (last 3 month of SC data by default)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "inb3cm4Bu1N4" | |
}, | |
"outputs": [], | |
"source": [ | |
"end_date = datetime.date.today()\n", | |
"start_date = end_date - relativedelta.relativedelta(months=3)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "SwSYGqNcvPez" | |
}, | |
"source": [ | |
"Inserte here your **RegEx** and **name secions** (example: '^/$':'home', '^/p/.*':'product',... )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "2-O0kPj8vPmd" | |
}, | |
"outputs": [], | |
"source": [ | |
"sections_name = {'^/$':'home', '^/p/.*':'M > articulo','^/f/.*':'P > proveedor', '^/cat/.*':'M > categorias', '^\\/[^\\/]*\\/ciudad\\/[^\\/]+$':'P > ciudad','^\\/[^\\/]*\\/pro\\/[^\\/]+$':'P > tags','^/v/.*':'M > videos','^/g/.*':'M > galerias','^\\/[^\\/]+$':'P > Categoría','^\\/[^\\/]*\\/[^\\/]+$':'P > Provincia / CCAA'}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "8H3oHaYgv6FC" | |
}, | |
"source": [ | |
"Google Search Console API call. \n", | |
"<ul><li><a href=\"https://developers.google.com/webmaster-tools/search-console-api-original/v3/quickstart/quickstart-python\">Quickstart: Run a Search Console App in Python</a></li>\n", | |
"<li><a href=\"https://developers.google.com/apis-explorer/#p/webmasters/v3/\">Search Console API Explorer</a></li>\n", | |
"</ul>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Go to the following link in your browser: https://accounts.google.com/o/oauth2/v2/auth?client_id=34080392064-3n7skj9umb29hjhftm6ft1u0lmhh59qq.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fwebmasters.readonly&access_type=offline&response_type=code\n", | |
"Enter verification code: 4/rwGWKAUzxWflZIFIEZObhqCn8Gxj5JF8lNw9pyhH_e0hksm9q8GNx04\n" | |
] | |
} | |
], | |
"source": [ | |
"OAUTH_SCOPE = 'https://www.googleapis.com/auth/webmasters.readonly'\n", | |
"REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'\n", | |
"\n", | |
"# Run through the OAuth flow and retrieve credentials\n", | |
"flow = OAuth2WebServerFlow(CLIENT_ID, CLIENT_SECRET, OAUTH_SCOPE, redirect_uri=REDIRECT_URI)\n", | |
"authorize_url = flow.step1_get_authorize_url()\n", | |
"print ('Go to the following link in your browser: ' + authorize_url)\n", | |
"code = input('Enter verification code: ').strip()\n", | |
"credentials = flow.step2_exchange(code)\n", | |
"\n", | |
"# Create an httplib2.Http object and authorize it with our credentials\n", | |
"http = httplib2.Http()\n", | |
"http = credentials.authorize(http)\n", | |
"\n", | |
"webmasters_service = build('webmasters', 'v3', http=http)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 73 | |
}, | |
"colab_type": "code", | |
"id": "OlSOcxRsvqFZ", | |
"outputId": "9dff2389-3d69-499b-f4b2-abcfb70a82b8" | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Enter device category: MOBILE, DESKTOP or TABLET (leave it blank for all devices): \n" | |
] | |
} | |
], | |
"source": [ | |
"def execute_request(service, property_uri, request):\n", | |
" return service.searchanalytics().query(siteUrl=property_uri, body=request).execute()\n", | |
"\n", | |
"request_page = {\n", | |
" 'startDate': datetime.datetime.strftime(start_date,\"%Y-%m-%d\"),\n", | |
" 'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),\n", | |
" 'dimensions': ['page'],\n", | |
" 'rowLimit': 25000\n", | |
"}\n", | |
"\n", | |
"request_query = {\n", | |
" 'startDate': datetime.datetime.strftime(start_date,\"%Y-%m-%d\"),\n", | |
" 'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),\n", | |
" 'dimensions': ['query'],\n", | |
" 'rowLimit': 25000\n", | |
"}\n", | |
"\n", | |
"request_date = {\n", | |
" 'startDate': datetime.datetime.strftime(start_date,\"%Y-%m-%d\"),\n", | |
" 'endDate': datetime.datetime.strftime(end_date,'%Y-%m-%d'),\n", | |
" 'dimensions': ['date'],\n", | |
" 'rowLimit': 25000\n", | |
"}\n", | |
"\n", | |
"#Adding a device filter to request\n", | |
"device_category = input('Enter device category: MOBILE, DESKTOP or TABLET (leave it blank for all devices): ').strip()\n", | |
"if device_category:\n", | |
" request_page['dimensionFilterGroups'] = [{'filters':[{'dimension':'device','expression':device_category}]}]\n", | |
" request_query['dimensionFilterGroups'] = [{'filters': [{'dimension': 'device', 'expression': device_category}]}]\n", | |
" request_date['dimensionFilterGroups'] = [{'filters': [{'dimension': 'device', 'expression': device_category}]}]\n", | |
"else:\n", | |
" device_category = 'ALL Devices'\n", | |
"\n", | |
"#Request to SC API\n", | |
"response_page = execute_request(webmasters_service, site, request_page)\n", | |
"response_query = execute_request(webmasters_service, site, request_query)\n", | |
"response_date = execute_request(webmasters_service, site, request_date)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "nWGKp7SIwTIN" | |
}, | |
"source": [ | |
"Transforming the JSON returned in a dict of list" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "SE-VkmsfwXMA" | |
}, | |
"outputs": [], | |
"source": [ | |
"scDict_page = defaultdict(list)\n", | |
"scDict_query = defaultdict(list)\n", | |
"scDict_date = defaultdict(list)\n", | |
"\n", | |
"def find_replace_sections(string, dictionary):\n", | |
" for item in dictionary.keys():\n", | |
" string = re.sub(item,dictionary[item],string)\n", | |
" return string\n", | |
"\n", | |
"for row in response_page['rows']:\n", | |
" scDict_page['keys'].append(row['keys'][0].replace(site_no_slash,'') or 0)\n", | |
" scDict_page['clicks'].append(row['clicks'] or 0)\n", | |
" scDict_page['ctr'].append(row['ctr'] or 0)\n", | |
" scDict_page['impressions'].append(row['impressions'] or 0)\n", | |
" scDict_page['position'].append(row['position'] or 0)\n", | |
" scDict_page['sections'].append(find_replace_sections(row['keys'][0].replace(site_no_slash,''),sections_name) or 0)\n", | |
" if scDict_page['sections'][-1] not in sections_name.values():\n", | |
" scDict_page['sections'][-1] = 'other'\n", | |
"\n", | |
"for row in response_query['rows']:\n", | |
" scDict_query['keys'].append(row['keys'][0] or 0)\n", | |
" scDict_query['clicks'].append(row['clicks'] or 0)\n", | |
" scDict_query['ctr'].append(row['ctr'] or 0)\n", | |
" scDict_query['impressions'].append(row['impressions'] or 0)\n", | |
" scDict_query['position'].append(row['position'] or 0)\n", | |
"\n", | |
"for row in response_date['rows']:\n", | |
" scDict_date['keys'].append(row['keys'][0] or 0)\n", | |
" scDict_date['clicks'].append(row['clicks'] or 0)\n", | |
" scDict_date['ctr'].append(row['ctr'] or 0)\n", | |
" scDict_date['impressions'].append(row['impressions'] or 0)\n", | |
" scDict_date['position'].append(row['position'] or 0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "i1iD74RjwhMS" | |
}, | |
"source": [ | |
"DataFrame of the first plot: **opportunity matrix by URL**\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 71 | |
}, | |
"colab_type": "code", | |
"id": "q8JlB1bWwhRG", | |
"outputId": "2cc1cc78-41fa-47c9-b4a8-01e339cf7354" | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['M > articulo' 'M > galerias' 'home' 'P > Provincia / CCAA'\n", | |
" 'P > proveedor' 'P > ciudad' 'P > Categoría' 'M > videos' 'P > tags'\n", | |
" 'other' 'M > categorias']\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>keys</th>\n", | |
" <th>clicks</th>\n", | |
" <th>ctr</th>\n", | |
" <th>impressions</th>\n", | |
" <th>position</th>\n", | |
" <th>sections</th>\n", | |
" <th>norm_impressions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>/p/te-contamos-cuantos-padrinos-debe-haber-en-...</td>\n", | |
" <td>16706</td>\n", | |
" <td>11.029829</td>\n", | |
" <td>151462</td>\n", | |
" <td>6.45</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>30.282964</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>/p/por-que-es-importante-el-anillo-de-promesa-...</td>\n", | |
" <td>14651</td>\n", | |
" <td>8.082820</td>\n", | |
" <td>181261</td>\n", | |
" <td>5.25</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>36.240947</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>/p/40-frases-de-amor-increibles-para-invitacio...</td>\n", | |
" <td>14217</td>\n", | |
" <td>5.391947</td>\n", | |
" <td>263671</td>\n", | |
" <td>9.74</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>52.717921</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>/p/20-cosas-que-debes-hacer-con-tu-pareja-al-m...</td>\n", | |
" <td>14134</td>\n", | |
" <td>4.672489</td>\n", | |
" <td>302494</td>\n", | |
" <td>8.29</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>60.480154</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>/p/los-5-lugares-mas-romanticos-de-la-ciudad-d...</td>\n", | |
" <td>12458</td>\n", | |
" <td>8.541594</td>\n", | |
" <td>145851</td>\n", | |
" <td>12.45</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>29.161106</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>/p/las-10-mejores-tiendas-de-vestidos-de-novia...</td>\n", | |
" <td>10536</td>\n", | |
" <td>2.633190</td>\n", | |
" <td>400123</td>\n", | |
" <td>5.60</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>80.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>/p/como-deben-vestir-los-invitados-hombres-en-...</td>\n", | |
" <td>8523</td>\n", | |
" <td>2.882430</td>\n", | |
" <td>295688</td>\n", | |
" <td>10.24</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>59.119369</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>/p/cancionesvals-bodas</td>\n", | |
" <td>7151</td>\n", | |
" <td>4.962733</td>\n", | |
" <td>144094</td>\n", | |
" <td>13.12</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>28.809813</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>/p/10-cosas-que-toda-novia-debe-tener-el-dia-d...</td>\n", | |
" <td>6977</td>\n", | |
" <td>6.147516</td>\n", | |
" <td>113493</td>\n", | |
" <td>6.88</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>22.691479</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>/g/40-regalos-perfectos-para-la-mama-de-la-nov...</td>\n", | |
" <td>6419</td>\n", | |
" <td>10.805124</td>\n", | |
" <td>59407</td>\n", | |
" <td>14.43</td>\n", | |
" <td>M > galerias</td>\n", | |
" <td>11.877577</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" keys clicks ctr \\\n", | |
"0 /p/te-contamos-cuantos-padrinos-debe-haber-en-... 16706 11.029829 \n", | |
"1 /p/por-que-es-importante-el-anillo-de-promesa-... 14651 8.082820 \n", | |
"2 /p/40-frases-de-amor-increibles-para-invitacio... 14217 5.391947 \n", | |
"3 /p/20-cosas-que-debes-hacer-con-tu-pareja-al-m... 14134 4.672489 \n", | |
"4 /p/los-5-lugares-mas-romanticos-de-la-ciudad-d... 12458 8.541594 \n", | |
"5 /p/las-10-mejores-tiendas-de-vestidos-de-novia... 10536 2.633190 \n", | |
"6 /p/como-deben-vestir-los-invitados-hombres-en-... 8523 2.882430 \n", | |
"7 /p/cancionesvals-bodas 7151 4.962733 \n", | |
"8 /p/10-cosas-que-toda-novia-debe-tener-el-dia-d... 6977 6.147516 \n", | |
"9 /g/40-regalos-perfectos-para-la-mama-de-la-nov... 6419 10.805124 \n", | |
"\n", | |
" impressions position sections norm_impressions \n", | |
"0 151462 6.45 M > articulo 30.282964 \n", | |
"1 181261 5.25 M > articulo 36.240947 \n", | |
"2 263671 9.74 M > articulo 52.717921 \n", | |
"3 302494 8.29 M > articulo 60.480154 \n", | |
"4 145851 12.45 M > articulo 29.161106 \n", | |
"5 400123 5.60 M > articulo 80.000000 \n", | |
"6 295688 10.24 M > articulo 59.119369 \n", | |
"7 144094 13.12 M > articulo 28.809813 \n", | |
"8 113493 6.88 M > articulo 22.691479 \n", | |
"9 59407 14.43 M > galerias 11.877577 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame(data = scDict_page)\n", | |
"print(df.sections.unique())\n", | |
"df['norm_impressions']=((df['impressions']-df['impressions'].min())/(df['impressions'].max()-df['impressions'].min()))*80\n", | |
"df['ctr'] = df['ctr']*100\n", | |
"df['position'] = df['position'].round(2)\n", | |
"df['sections'] = df['sections'].astype('category')\n", | |
"df['keys'] = df['keys'].astype('category')\n", | |
"df['impressions'] = df['impressions'].astype('int')\n", | |
"df['clicks'] = df['clicks'].astype('int')\n", | |
"df.sort_values('clicks',inplace=True,ascending=False)\n", | |
"display(df.head(10))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "H2KVOnp_xQpC" | |
}, | |
"source": [ | |
"DataFrame of the second plot: **opportunity matrix by Section**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "YqvUlQKkxQyt" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>sections</th>\n", | |
" <th>ctr</th>\n", | |
" <th>clicks</th>\n", | |
" <th>impressions</th>\n", | |
" <th>position</th>\n", | |
" <th>norm_impressions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>23.398474</td>\n", | |
" <td>266038</td>\n", | |
" <td>8674075</td>\n", | |
" <td>11.598852</td>\n", | |
" <td>150.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>P > proveedor</td>\n", | |
" <td>21.071625</td>\n", | |
" <td>114283</td>\n", | |
" <td>6987444</td>\n", | |
" <td>11.588729</td>\n", | |
" <td>120.822824</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>P > ciudad</td>\n", | |
" <td>30.956604</td>\n", | |
" <td>23273</td>\n", | |
" <td>735164</td>\n", | |
" <td>9.024539</td>\n", | |
" <td>12.664090</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>M > galerias</td>\n", | |
" <td>22.633598</td>\n", | |
" <td>22480</td>\n", | |
" <td>1262099</td>\n", | |
" <td>15.505875</td>\n", | |
" <td>21.779585</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>P > Provincia / CCAA</td>\n", | |
" <td>28.264477</td>\n", | |
" <td>18204</td>\n", | |
" <td>621751</td>\n", | |
" <td>9.473921</td>\n", | |
" <td>10.702149</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>home</td>\n", | |
" <td>10.631713</td>\n", | |
" <td>5771</td>\n", | |
" <td>54281</td>\n", | |
" <td>8.890000</td>\n", | |
" <td>0.885436</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>P > Categoría</td>\n", | |
" <td>27.078691</td>\n", | |
" <td>5394</td>\n", | |
" <td>270951</td>\n", | |
" <td>12.790941</td>\n", | |
" <td>4.633630</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>M > videos</td>\n", | |
" <td>45.614818</td>\n", | |
" <td>586</td>\n", | |
" <td>9619</td>\n", | |
" <td>14.890556</td>\n", | |
" <td>0.112825</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>10</td>\n", | |
" <td>other</td>\n", | |
" <td>58.869415</td>\n", | |
" <td>295</td>\n", | |
" <td>3174</td>\n", | |
" <td>6.870800</td>\n", | |
" <td>0.001332</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>P > tags</td>\n", | |
" <td>61.197136</td>\n", | |
" <td>237</td>\n", | |
" <td>14203</td>\n", | |
" <td>8.041379</td>\n", | |
" <td>0.192124</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" sections ctr clicks impressions position \\\n", | |
"0 M > articulo 23.398474 266038 8674075 11.598852 \n", | |
"7 P > proveedor 21.071625 114283 6987444 11.588729 \n", | |
"6 P > ciudad 30.956604 23273 735164 9.024539 \n", | |
"2 M > galerias 22.633598 22480 1262099 15.505875 \n", | |
"5 P > Provincia / CCAA 28.264477 18204 621751 9.473921 \n", | |
"9 home 10.631713 5771 54281 8.890000 \n", | |
"4 P > Categoría 27.078691 5394 270951 12.790941 \n", | |
"3 M > videos 45.614818 586 9619 14.890556 \n", | |
"10 other 58.869415 295 3174 6.870800 \n", | |
"8 P > tags 61.197136 237 14203 8.041379 \n", | |
"\n", | |
" norm_impressions \n", | |
"0 150.000000 \n", | |
"7 120.822824 \n", | |
"6 12.664090 \n", | |
"2 21.779585 \n", | |
"5 10.702149 \n", | |
"9 0.885436 \n", | |
"4 4.633630 \n", | |
"3 0.112825 \n", | |
"10 0.001332 \n", | |
"8 0.192124 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"df2 = pd.DataFrame(df)\n", | |
"df2 = (df.groupby('sections').agg({'ctr':'mean', 'clicks':'sum','impressions':'sum','position':'mean'})).reset_index()\n", | |
"df2['norm_impressions']=((df2[\"impressions\"]-df2[\"impressions\"].min())/(df2[\"impressions\"].max()-df2[\"impressions\"].min()))*150\n", | |
"df2.sort_values('clicks',inplace=True,ascending=False)\n", | |
"display(df2.head(10))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "zs0iEj4bxREm" | |
}, | |
"source": [ | |
"DataFrame of the the third plot: **opportunity matrix by Topics (IDF)**" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 108 | |
}, | |
"colab_type": "code", | |
"id": "NvRLmhLIxRKn", | |
"outputId": "fe698247-8e06-4956-9686-3a071bab02c8" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>topic</th>\n", | |
" <th>ctr</th>\n", | |
" <th>clicks</th>\n", | |
" <th>impressions</th>\n", | |
" <th>position</th>\n", | |
" <th>idf_weight</th>\n", | |
" <th>norm_impressions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>201</td>\n", | |
" <td>boda</td>\n", | |
" <td>42.127311</td>\n", | |
" <td>40598.0</td>\n", | |
" <td>685716.0</td>\n", | |
" <td>6.555689</td>\n", | |
" <td>2.974185</td>\n", | |
" <td>150.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1664</td>\n", | |
" <td>vestidos</td>\n", | |
" <td>28.740292</td>\n", | |
" <td>16694.0</td>\n", | |
" <td>578266.0</td>\n", | |
" <td>5.956941</td>\n", | |
" <td>3.713304</td>\n", | |
" <td>126.495337</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1173</td>\n", | |
" <td>novia</td>\n", | |
" <td>33.212926</td>\n", | |
" <td>15994.0</td>\n", | |
" <td>608567.0</td>\n", | |
" <td>6.096488</td>\n", | |
" <td>3.806144</td>\n", | |
" <td>133.123674</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1223</td>\n", | |
" <td>padrinos</td>\n", | |
" <td>35.163569</td>\n", | |
" <td>14143.0</td>\n", | |
" <td>66302.0</td>\n", | |
" <td>5.492451</td>\n", | |
" <td>5.756033</td>\n", | |
" <td>14.503329</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>202</td>\n", | |
" <td>bodas</td>\n", | |
" <td>41.994085</td>\n", | |
" <td>13690.0</td>\n", | |
" <td>218305.0</td>\n", | |
" <td>6.412018</td>\n", | |
" <td>3.593512</td>\n", | |
" <td>47.753950</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1354</td>\n", | |
" <td>promesa</td>\n", | |
" <td>38.325381</td>\n", | |
" <td>12275.0</td>\n", | |
" <td>117238.0</td>\n", | |
" <td>4.330189</td>\n", | |
" <td>5.813465</td>\n", | |
" <td>25.645567</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>87</td>\n", | |
" <td>anillo</td>\n", | |
" <td>40.018298</td>\n", | |
" <td>9711.0</td>\n", | |
" <td>131475.0</td>\n", | |
" <td>6.258347</td>\n", | |
" <td>5.194426</td>\n", | |
" <td>28.759908</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>714</td>\n", | |
" <td>frases</td>\n", | |
" <td>40.720588</td>\n", | |
" <td>9527.0</td>\n", | |
" <td>127237.0</td>\n", | |
" <td>6.491316</td>\n", | |
" <td>4.908071</td>\n", | |
" <td>27.832846</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>990</td>\n", | |
" <td>lugares</td>\n", | |
" <td>50.340271</td>\n", | |
" <td>8217.0</td>\n", | |
" <td>56588.0</td>\n", | |
" <td>6.828467</td>\n", | |
" <td>4.594337</td>\n", | |
" <td>12.378393</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1248</td>\n", | |
" <td>pareja</td>\n", | |
" <td>47.908171</td>\n", | |
" <td>7594.0</td>\n", | |
" <td>88217.0</td>\n", | |
" <td>6.680730</td>\n", | |
" <td>4.660526</td>\n", | |
" <td>19.297230</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" topic ctr clicks impressions position idf_weight \\\n", | |
"201 boda 42.127311 40598.0 685716.0 6.555689 2.974185 \n", | |
"1664 vestidos 28.740292 16694.0 578266.0 5.956941 3.713304 \n", | |
"1173 novia 33.212926 15994.0 608567.0 6.096488 3.806144 \n", | |
"1223 padrinos 35.163569 14143.0 66302.0 5.492451 5.756033 \n", | |
"202 bodas 41.994085 13690.0 218305.0 6.412018 3.593512 \n", | |
"1354 promesa 38.325381 12275.0 117238.0 4.330189 5.813465 \n", | |
"87 anillo 40.018298 9711.0 131475.0 6.258347 5.194426 \n", | |
"714 frases 40.720588 9527.0 127237.0 6.491316 4.908071 \n", | |
"990 lugares 50.340271 8217.0 56588.0 6.828467 4.594337 \n", | |
"1248 pareja 47.908171 7594.0 88217.0 6.680730 4.660526 \n", | |
"\n", | |
" norm_impressions \n", | |
"201 150.000000 \n", | |
"1664 126.495337 \n", | |
"1173 133.123674 \n", | |
"1223 14.503329 \n", | |
"202 47.753950 \n", | |
"1354 25.645567 \n", | |
"87 28.759908 \n", | |
"714 27.832846 \n", | |
"990 12.378393 \n", | |
"1248 19.297230 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"df_query = pd.DataFrame(data = scDict_query)\n", | |
"df_query['ctr'] = df_query['ctr']*100\n", | |
"\n", | |
"cv = CountVectorizer(stop_words = stopwords.words('spanish'),max_df = 0.8, min_df = 5, token_pattern=r'(?u)\\b[A-Za-z]+\\b')\n", | |
"word_count_vector=cv.fit_transform(df_query['keys'])\n", | |
"tfidf_transformer=TfidfTransformer()\n", | |
"tfidf_transformer.fit(word_count_vector)\n", | |
"\n", | |
"df_idf = pd.DataFrame(tfidf_transformer.idf_, index=cv.get_feature_names(), columns=['idf_weight']).reset_index()\n", | |
"df_idf.rename(columns={'index':'topic'},inplace=True)\n", | |
"\n", | |
"df_query['keys'] = df_query['keys'].str.split()\n", | |
"df_query['topic_list'] = df_query['keys'].apply(lambda x: [item for item in x if item in df_idf['topic'].tolist()])\n", | |
"df_query['keys'] = df_query['keys'].str.join(\" \")\n", | |
"\n", | |
"def unnesting(data_frame, explode):\n", | |
" idx=data_frame.index.repeat(data_frame[explode[0]].str.len())\n", | |
" df_1=pd.concat([pd.DataFrame({x:np.concatenate(data_frame[x].values)} )for x in explode],axis=1)\n", | |
" df_1.index=idx\n", | |
" return df_1.join(data_frame.drop(explode,1),how='left')\n", | |
"\n", | |
"df_query_unnested = unnesting(df_query,['topic_list'])\n", | |
"df_query_merged = df_query_unnested.merge(df_idf,left_on='topic_list',right_on='topic',how='inner').drop('topic_list',1).merge(df_query)\n", | |
"df_query_grouped = df_query_merged.groupby('topic').agg({'ctr':'mean', 'clicks':'sum','impressions':'sum','position':'mean','idf_weight':'first'}).reset_index()\n", | |
"df_query_grouped['norm_impressions']=((df_query_grouped[\"impressions\"]-df_query_grouped[\"impressions\"].min())/(df_query_grouped[\"impressions\"].max()-df_query_grouped[\"impressions\"].min()))*150\n", | |
"df_query_grouped.sort_values('clicks',inplace=True,ascending=False)\n", | |
"display(df_query_grouped.head(10))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "Gn_n5Pe8yDIH" | |
}, | |
"source": [ | |
"DataFrame of the fourth plot: **Quick Wins** table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "VqYaajteyDa_" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>keys</th>\n", | |
" <th>clicks</th>\n", | |
" <th>ctr</th>\n", | |
" <th>impressions</th>\n", | |
" <th>position</th>\n", | |
" <th>sections</th>\n", | |
" <th>norm_impressions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>/p/te-contamos-cuantos-padrinos-debe-haber-en-...</td>\n", | |
" <td>16706</td>\n", | |
" <td>11.029829</td>\n", | |
" <td>151462</td>\n", | |
" <td>6.45</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>30.282964</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>/p/por-que-es-importante-el-anillo-de-promesa-...</td>\n", | |
" <td>14651</td>\n", | |
" <td>8.082820</td>\n", | |
" <td>181261</td>\n", | |
" <td>5.25</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>36.240947</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>/p/40-frases-de-amor-increibles-para-invitacio...</td>\n", | |
" <td>14217</td>\n", | |
" <td>5.391947</td>\n", | |
" <td>263671</td>\n", | |
" <td>9.74</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>52.717921</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>/p/20-cosas-que-debes-hacer-con-tu-pareja-al-m...</td>\n", | |
" <td>14134</td>\n", | |
" <td>4.672489</td>\n", | |
" <td>302494</td>\n", | |
" <td>8.29</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>60.480154</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>/p/los-5-lugares-mas-romanticos-de-la-ciudad-d...</td>\n", | |
" <td>12458</td>\n", | |
" <td>8.541594</td>\n", | |
" <td>145851</td>\n", | |
" <td>12.45</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>29.161106</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>5</td>\n", | |
" <td>/p/las-10-mejores-tiendas-de-vestidos-de-novia...</td>\n", | |
" <td>10536</td>\n", | |
" <td>2.633190</td>\n", | |
" <td>400123</td>\n", | |
" <td>5.60</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>80.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>6</td>\n", | |
" <td>/p/como-deben-vestir-los-invitados-hombres-en-...</td>\n", | |
" <td>8523</td>\n", | |
" <td>2.882430</td>\n", | |
" <td>295688</td>\n", | |
" <td>10.24</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>59.119369</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>7</td>\n", | |
" <td>/p/cancionesvals-bodas</td>\n", | |
" <td>7151</td>\n", | |
" <td>4.962733</td>\n", | |
" <td>144094</td>\n", | |
" <td>13.12</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>28.809813</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>8</td>\n", | |
" <td>/p/10-cosas-que-toda-novia-debe-tener-el-dia-d...</td>\n", | |
" <td>6977</td>\n", | |
" <td>6.147516</td>\n", | |
" <td>113493</td>\n", | |
" <td>6.88</td>\n", | |
" <td>M > articulo</td>\n", | |
" <td>22.691479</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>9</td>\n", | |
" <td>/g/40-regalos-perfectos-para-la-mama-de-la-nov...</td>\n", | |
" <td>6419</td>\n", | |
" <td>10.805124</td>\n", | |
" <td>59407</td>\n", | |
" <td>14.43</td>\n", | |
" <td>M > galerias</td>\n", | |
" <td>11.877577</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" keys clicks ctr \\\n", | |
"0 /p/te-contamos-cuantos-padrinos-debe-haber-en-... 16706 11.029829 \n", | |
"1 /p/por-que-es-importante-el-anillo-de-promesa-... 14651 8.082820 \n", | |
"2 /p/40-frases-de-amor-increibles-para-invitacio... 14217 5.391947 \n", | |
"3 /p/20-cosas-que-debes-hacer-con-tu-pareja-al-m... 14134 4.672489 \n", | |
"4 /p/los-5-lugares-mas-romanticos-de-la-ciudad-d... 12458 8.541594 \n", | |
"5 /p/las-10-mejores-tiendas-de-vestidos-de-novia... 10536 2.633190 \n", | |
"6 /p/como-deben-vestir-los-invitados-hombres-en-... 8523 2.882430 \n", | |
"7 /p/cancionesvals-bodas 7151 4.962733 \n", | |
"8 /p/10-cosas-que-toda-novia-debe-tener-el-dia-d... 6977 6.147516 \n", | |
"9 /g/40-regalos-perfectos-para-la-mama-de-la-nov... 6419 10.805124 \n", | |
"\n", | |
" impressions position sections norm_impressions \n", | |
"0 151462 6.45 M > articulo 30.282964 \n", | |
"1 181261 5.25 M > articulo 36.240947 \n", | |
"2 263671 9.74 M > articulo 52.717921 \n", | |
"3 302494 8.29 M > articulo 60.480154 \n", | |
"4 145851 12.45 M > articulo 29.161106 \n", | |
"5 400123 5.60 M > articulo 80.000000 \n", | |
"6 295688 10.24 M > articulo 59.119369 \n", | |
"7 144094 13.12 M > articulo 28.809813 \n", | |
"8 113493 6.88 M > articulo 22.691479 \n", | |
"9 59407 14.43 M > galerias 11.877577 " | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"df4 = pd.DataFrame(df)\n", | |
"\n", | |
"#data frame for average ctr\n", | |
"df5 = pd.DataFrame(data = scDict_date)\n", | |
"mean_ctr =df5['ctr'].mean()*100\n", | |
"display(df4.head(10))" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "RBttrsfMymZM" | |
}, | |
"source": [ | |
"Plots default configs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "uBacDpguyoXk", | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"\n", | |
" <div class=\"bk-root\">\n", | |
" <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n", | |
" <span id=\"1001\">Loading BokehJS ...</span>\n", | |
" </div>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"application/javascript": [ | |
"\n", | |
"(function(root) {\n", | |
" function now() {\n", | |
" return new Date();\n", | |
" }\n", | |
"\n", | |
" var force = true;\n", | |
"\n", | |
" if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", | |
" root._bokeh_onload_callbacks = [];\n", | |
" root._bokeh_is_loading = undefined;\n", | |
" }\n", | |
"\n", | |
" var JS_MIME_TYPE = 'application/javascript';\n", | |
" var HTML_MIME_TYPE = 'text/html';\n", | |
" var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", | |
" var CLASS_NAME = 'output_bokeh rendered_html';\n", | |
"\n", | |
" /**\n", | |
" * Render data to the DOM node\n", | |
" */\n", | |
" function render(props, node) {\n", | |
" var script = document.createElement(\"script\");\n", | |
" node.appendChild(script);\n", | |
" }\n", | |
"\n", | |
" /**\n", | |
" * Handle when an output is cleared or removed\n", | |
" */\n", | |
" function handleClearOutput(event, handle) {\n", | |
" var cell = handle.cell;\n", | |
"\n", | |
" var id = cell.output_area._bokeh_element_id;\n", | |
" var server_id = cell.output_area._bokeh_server_id;\n", | |
" // Clean up Bokeh references\n", | |
" if (id != null && id in Bokeh.index) {\n", | |
" Bokeh.index[id].model.document.clear();\n", | |
" delete Bokeh.index[id];\n", | |
" }\n", | |
"\n", | |
" if (server_id !== undefined) {\n", | |
" // Clean up Bokeh references\n", | |
" var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", | |
" cell.notebook.kernel.execute(cmd, {\n", | |
" iopub: {\n", | |
" output: function(msg) {\n", | |
" var id = msg.content.text.trim();\n", | |
" if (id in Bokeh.index) {\n", | |
" Bokeh.index[id].model.document.clear();\n", | |
" delete Bokeh.index[id];\n", | |
" }\n", | |
" }\n", | |
" }\n", | |
" });\n", | |
" // Destroy server and session\n", | |
" var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", | |
" cell.notebook.kernel.execute(cmd);\n", | |
" }\n", | |
" }\n", | |
"\n", | |
" /**\n", | |
" * Handle when a new output is added\n", | |
" */\n", | |
" function handleAddOutput(event, handle) {\n", | |
" var output_area = handle.output_area;\n", | |
" var output = handle.output;\n", | |
"\n", | |
" // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", | |
" if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", | |
" return\n", | |
" }\n", | |
"\n", | |
" var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", | |
"\n", | |
" if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", | |
" toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", | |
" // store reference to embed id on output_area\n", | |
" output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", | |
" }\n", | |
" if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", | |
" var bk_div = document.createElement(\"div\");\n", | |
" bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", | |
" var script_attrs = bk_div.children[0].attributes;\n", | |
" for (var i = 0; i < script_attrs.length; i++) {\n", | |
" toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", | |
" }\n", | |
" // store reference to server id on output_area\n", | |
" output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", | |
" }\n", | |
" }\n", | |
"\n", | |
" function register_renderer(events, OutputArea) {\n", | |
"\n", | |
" function append_mime(data, metadata, element) {\n", | |
" // create a DOM node to render to\n", | |
" var toinsert = this.create_output_subarea(\n", | |
" metadata,\n", | |
" CLASS_NAME,\n", | |
" EXEC_MIME_TYPE\n", | |
" );\n", | |
" this.keyboard_manager.register_events(toinsert);\n", | |
" // Render to node\n", | |
" var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", | |
" render(props, toinsert[toinsert.length - 1]);\n", | |
" element.append(toinsert);\n", | |
" return toinsert\n", | |
" }\n", | |
"\n", | |
" /* Handle when an output is cleared or removed */\n", | |
" events.on('clear_output.CodeCell', handleClearOutput);\n", | |
" events.on('delete.Cell', handleClearOutput);\n", | |
"\n", | |
" /* Handle when a new output is added */\n", | |
" events.on('output_added.OutputArea', handleAddOutput);\n", | |
"\n", | |
" /**\n", | |
" * Register the mime type and append_mime function with output_area\n", | |
" */\n", | |
" OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", | |
" /* Is output safe? */\n", | |
" safe: true,\n", | |
" /* Index of renderer in `output_area.display_order` */\n", | |
" index: 0\n", | |
" });\n", | |
" }\n", | |
"\n", | |
" // register the mime type if in Jupyter Notebook environment and previously unregistered\n", | |
" if (root.Jupyter !== undefined) {\n", | |
" var events = require('base/js/events');\n", | |
" var OutputArea = require('notebook/js/outputarea').OutputArea;\n", | |
"\n", | |
" if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", | |
" register_renderer(events, OutputArea);\n", | |
" }\n", | |
" }\n", | |
"\n", | |
" \n", | |
" if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", | |
" root._bokeh_timeout = Date.now() + 5000;\n", | |
" root._bokeh_failed_load = false;\n", | |
" }\n", | |
"\n", | |
" var NB_LOAD_WARNING = {'data': {'text/html':\n", | |
" \"<div style='background-color: #fdd'>\\n\"+\n", | |
" \"<p>\\n\"+\n", | |
" \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", | |
" \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", | |
" \"</p>\\n\"+\n", | |
" \"<ul>\\n\"+\n", | |
" \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n", | |
" \"<li>use INLINE resources instead, as so:</li>\\n\"+\n", | |
" \"</ul>\\n\"+\n", | |
" \"<code>\\n\"+\n", | |
" \"from bokeh.resources import INLINE\\n\"+\n", | |
" \"output_notebook(resources=INLINE)\\n\"+\n", | |
" \"</code>\\n\"+\n", | |
" \"</div>\"}};\n", | |
"\n", | |
" function display_loaded() {\n", | |
" var el = document.getElementById(\"1001\");\n", | |
" if (el != null) {\n", | |
" el.textContent = \"BokehJS is loading...\";\n", | |
" }\n", | |
" if (root.Bokeh !== undefined) {\n", | |
" if (el != null) {\n", | |
" el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", | |
" }\n", | |
" } else if (Date.now() < root._bokeh_timeout) {\n", | |
" setTimeout(display_loaded, 100)\n", | |
" }\n", | |
" }\n", | |
"\n", | |
"\n", | |
" function run_callbacks() {\n", | |
" try {\n", | |
" root._bokeh_onload_callbacks.forEach(function(callback) {\n", | |
" if (callback != null)\n", | |
" callback();\n", | |
" });\n", | |
" } finally {\n", | |
" delete root._bokeh_onload_callbacks\n", | |
" }\n", | |
" console.debug(\"Bokeh: all callbacks have finished\");\n", | |
" }\n", | |
"\n", | |
" function load_libs(css_urls, js_urls, callback) {\n", | |
" if (css_urls == null) css_urls = [];\n", | |
" if (js_urls == null) js_urls = [];\n", | |
"\n", | |
" root._bokeh_onload_callbacks.push(callback);\n", | |
" if (root._bokeh_is_loading > 0) {\n", | |
" console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", | |
" return null;\n", | |
" }\n", | |
" if (js_urls == null || js_urls.length === 0) {\n", | |
" run_callbacks();\n", | |
" return null;\n", | |
" }\n", | |
" console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", | |
" root._bokeh_is_loading = css_urls.length + js_urls.length;\n", | |
"\n", | |
" function on_load() {\n", | |
" root._bokeh_is_loading--;\n", | |
" if (root._bokeh_is_loading === 0) {\n", | |
" console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", | |
" run_callbacks()\n", | |
" }\n", | |
" }\n", | |
"\n", | |
" function on_error() {\n", | |
" console.error(\"failed to load \" + url);\n", | |
" }\n", | |
"\n", | |
" for (var i = 0; i < css_urls.length; i++) {\n", | |
" var url = css_urls[i];\n", | |
" const element = document.createElement(\"link\");\n", | |
" element.onload = on_load;\n", | |
" element.onerror = on_error;\n", | |
" element.rel = \"stylesheet\";\n", | |
" element.type = \"text/css\";\n", | |
" element.href = url;\n", | |
" console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", | |
" document.body.appendChild(element);\n", | |
" }\n", | |
"\n", | |
" for (var i = 0; i < js_urls.length; i++) {\n", | |
" var url = js_urls[i];\n", | |
" var element = document.createElement('script');\n", | |
" element.onload = on_load;\n", | |
" element.onerror = on_error;\n", | |
" element.async = false;\n", | |
" element.src = url;\n", | |
" console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", | |
" document.head.appendChild(element);\n", | |
" }\n", | |
" };var element = document.getElementById(\"1001\");\n", | |
" if (element == null) {\n", | |
" console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n", | |
" return false;\n", | |
" }\n", | |
"\n", | |
" function inject_raw_css(css) {\n", | |
" const element = document.createElement(\"style\");\n", | |
" element.appendChild(document.createTextNode(css));\n", | |
" document.body.appendChild(element);\n", | |
" }\n", | |
"\n", | |
" var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n", | |
" var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n", | |
"\n", | |
" var inline_js = [\n", | |
" function(Bokeh) {\n", | |
" Bokeh.set_log_level(\"info\");\n", | |
" },\n", | |
" \n", | |
" function(Bokeh) {\n", | |
" \n", | |
" },\n", | |
" function(Bokeh) {} // ensure no trailing comma for IE\n", | |
" ];\n", | |
"\n", | |
" function run_inline_js() {\n", | |
" \n", | |
" if ((root.Bokeh !== undefined) || (force === true)) {\n", | |
" for (var i = 0; i < inline_js.length; i++) {\n", | |
" inline_js[i].call(root, root.Bokeh);\n", | |
" }if (force === true) {\n", | |
" display_loaded();\n", | |
" }} else if (Date.now() < root._bokeh_timeout) {\n", | |
" setTimeout(run_inline_js, 100);\n", | |
" } else if (!root._bokeh_failed_load) {\n", | |
" console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", | |
" root._bokeh_failed_load = true;\n", | |
" } else if (force !== true) {\n", | |
" var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n", | |
" cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", | |
" }\n", | |
"\n", | |
" }\n", | |
"\n", | |
" if (root._bokeh_is_loading === 0) {\n", | |
" console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", | |
" run_inline_js();\n", | |
" } else {\n", | |
" load_libs(css_urls, js_urls, function() {\n", | |
" console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", | |
" run_inline_js();\n", | |
" });\n", | |
" }\n", | |
"}(window));" | |
], | |
"application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"<div style='background-color: #fdd'>\\n\"+\n \"<p>\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"</p>\\n\"+\n \"<ul>\\n\"+\n \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n \"<li>use INLINE resources instead, as so:</li>\\n\"+\n \"</ul>\\n\"+\n \"<code>\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"</code>\\n\"+\n \"</div>\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"1001\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };var element = document.getElementById(\"1001\");\n if (element == null) {\n console.error(\"Bokeh: ERROR: autoload.js configured with elementid '1001' but no matching script tag was found. \")\n return false;\n }\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.2.0.min.js\"];\n var css_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.2.0.min.css\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.2.0.min.css\"];\n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n \n function(Bokeh) {\n \n },\n function(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n \n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"1001\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"plot_height = 600\n", | |
"plot_width = 600\n", | |
"#output_file('opportunity_matrix.html',title=device_category+' Plot')\n", | |
"#html = '''<h3>Opportunity Matrix</h3><p>@author: Natzir Turrado: Technical SEO / Data Scientist. <a href=\"https://twitter.com/natzir9\">Twitter > @natzir9</a></p>'''\n", | |
"#sup_title = Div(text=html)\n", | |
"output_notebook()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "YQnk4A1xyogV" | |
}, | |
"source": [ | |
"First plot: : opportunity matrix by URL" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 223 | |
}, | |
"colab_type": "code", | |
"id": "ZU9qKeq2yolr", | |
"outputId": "ebc6eef1-c2c8-4bf8-d3dc-19143c7919ab" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.bokehjs_exec.v0+json": "", | |
"text/html": [ | |
"\n", | |
"<script src=\"http://localhost:59438/autoload.js?bokeh-autoload-element=1002&bokeh-absolute-url=http://localhost:59438&resources=none\" id=\"1002\"></script>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.bokehjs_exec.v0+json": { | |
"server_id": "bd5fd88a73c340629e0ae596dda2ce01" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"def modify_doc_p1(doc):\n", | |
" \n", | |
" source = ColumnDataSource(df)\n", | |
"\n", | |
" p1 = figure(title=device_category+' URI Data',x_range = (0,df['ctr'].median()*2), y_range = (0,df['position'].median()*2),toolbar_location=\"above\",width=plot_width, height=plot_height, tools=['pan','tap','box_zoom','wheel_zoom','save','reset'])\n", | |
"\n", | |
" p1.xaxis.axis_label = \"CTR\"\n", | |
" p1.yaxis.axis_label = \"AVG Position\"\n", | |
"\n", | |
" color_mapper = LinearColorMapper(palette = Viridis256, low = df['clicks'].min(), high = df['clicks'].max())\n", | |
" color_bar = ColorBar(color_mapper = color_mapper,location = (0, 0),ticker = BasicTicker())\n", | |
" p1.add_layout(color_bar, 'right')\n", | |
"\n", | |
" low_box = BoxAnnotation(left=0, right=mean_ctr, fill_alpha=0.1, fill_color='red')\n", | |
" high_box = BoxAnnotation(left=mean_ctr, right=100, fill_alpha=0.1, fill_color='green')\n", | |
" p1.add_layout(low_box)\n", | |
" p1.add_layout(high_box)\n", | |
"\n", | |
" label_citation = Label(x=25, y=10, x_units='screen', y_units='screen', text='Natzir Turrado', render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_copyright = Label(x=10, y=10, x_units='screen', y_units='screen', text='©', angle=3.2, render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_color_bar = Label(x=530, y=260, x_units='screen', y_units='screen', text='Clicks', angle=1.6, render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '10pt', text_font_style='italic')\n", | |
" label_size = Label(x=300, y=10, x_units='screen', y_units='screen', text='Bubble size = Impressions', render_mode='css', border_line_color=None, background_fill_color=None,text_font_size = '10pt', text_font_style='italic')\n", | |
" label_source = Label(x=260, y=500, x_units='screen', y_units='screen', text='Data: Last 3 Mo. of SC data / 25.000 URL', render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '8pt', text_font_style='italic')\n", | |
"\n", | |
" p1.scatter(x = 'ctr', y = 'position', size = 'norm_impressions', legend = None, fill_color = transform('clicks', color_mapper), source = source)\n", | |
"\n", | |
" p1.add_tools(HoverTool(tooltips = [('uri', '@keys'),('clicks', '@clicks'),('impressions', '@impressions'),('ctr', '@ctr'),('avg position', '@position'),('section', '@sections')]))\n", | |
"\n", | |
" taptool = p1.select(type=TapTool)\n", | |
" search_console_url = 'https://search.google.com/search-console/performance/search-analytics?resource_id='+site_no_slash+'%2F&breakdown=query&page=!'+site_no_slash+'@keys'\n", | |
" taptool.callback = OpenURL(url=search_console_url)\n", | |
"\n", | |
" p1.add_layout(label_citation)\n", | |
" p1.add_layout(label_copyright)\n", | |
" p1.add_layout(label_color_bar)\n", | |
" p1.add_layout(label_size)\n", | |
" p1.add_layout(label_source)\n", | |
"\n", | |
" p1.toolbar.logo = None\n", | |
" \n", | |
" #Slider\n", | |
" slider_p1 = Slider(title='Select the number of URIs', start=0, end=len(df.index), step=1, value=100)\n", | |
"\n", | |
" def callback_1(attr,old,new):\n", | |
" source.data = ColumnDataSource.from_df(df[:slider_p1.value])\n", | |
"\n", | |
" slider_p1.on_change('value',callback_1)\n", | |
" \n", | |
" doc.add_root(column(slider_p1, p1))\n", | |
" \n", | |
" callback_1(None,None,None) \n", | |
"\n", | |
"show(modify_doc_p1)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "WD_LohZ2yos6" | |
}, | |
"source": [ | |
"Second plot: opportunity matrix by Section" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "bx5pDg2byox0" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.bokehjs_exec.v0+json": "", | |
"text/html": [ | |
"\n", | |
"<script src=\"http://localhost:59440/autoload.js?bokeh-autoload-element=1003&bokeh-absolute-url=http://localhost:59440&resources=none\" id=\"1003\"></script>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.bokehjs_exec.v0+json": { | |
"server_id": "332e6357624d4aee80a77070e34d0b09" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"def modify_doc_p2(doc):\n", | |
"\n", | |
" source2 = ColumnDataSource(df2)\n", | |
"\n", | |
" p2 = figure(title=device_category+' Section Data', x_range = (0,df2['ctr'].mean()*2), y_range = (0,df2['position'].mean()*2),toolbar_location=\"above\",width=plot_width, height=plot_height, tools=['pan','tap','box_zoom','wheel_zoom','save','reset'])\n", | |
"\n", | |
" p2.xaxis.axis_label = \"CTR\"\n", | |
" p2.yaxis.axis_label = \"AVG Position\"\n", | |
"\n", | |
" color_mapper = LinearColorMapper(palette = Viridis256, low = df2['clicks'].min(), high = df2['clicks'].max())\n", | |
" color_bar = ColorBar(color_mapper = color_mapper,location = (0, 0),ticker = BasicTicker())\n", | |
" p2.add_layout(color_bar, 'right')\n", | |
"\n", | |
" label_citation2 = Label(x=25, y=10, x_units='screen', y_units='screen', text='Natzir Turrado', render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_copyright2 = Label(x=10, y=10, x_units='screen', y_units='screen', text='©', angle=3.2, render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_color_bar2 = Label(x=530, y=260, x_units='screen', y_units='screen', text='Clicks', angle=1.6, render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '10pt', text_font_style='italic')\n", | |
" label_size2 = Label(x=300, y=10, x_units='screen', y_units='screen', text='Bubble size = Impressions', render_mode='css', border_line_color=None, background_fill_color=None,text_font_size = '10pt', text_font_style='italic')\n", | |
" label_source2 = Label(x=260, y=500, x_units='screen', y_units='screen', text='Data: Last 3 Mo. of SC data / 25.000 URL', render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '8pt', text_font_style='italic')\n", | |
"\n", | |
" p2.scatter(x = 'ctr', y = 'position', size = 'norm_impressions', legend = None, fill_color = transform('clicks', color_mapper), source = source2)\n", | |
"\n", | |
" p2.add_tools(HoverTool(tooltips = [('section', '@sections'),('clicks', '@clicks'),('impressions', '@impressions'),('ctr', '@ctr'),('avg position', '@position')]))\n", | |
"\n", | |
" p2.add_layout(label_citation2)\n", | |
" p2.add_layout(label_copyright2)\n", | |
" p2.add_layout(label_color_bar2)\n", | |
" p2.add_layout(label_size2)\n", | |
" p2.add_layout(label_source2)\n", | |
"\n", | |
" p2.toolbar.logo = None\n", | |
"\n", | |
" #Slider\n", | |
" slider_p2 = Slider(title='Select the number of Sections', start=0, end=len(df2.index), step=1, value=5)\n", | |
"\n", | |
" def callback_p2(attr,old,new):\n", | |
" source2.data = ColumnDataSource.from_df(df2[:slider_p2.value])\n", | |
"\n", | |
" slider_p2.on_change('value',callback_p2)\n", | |
"\n", | |
" doc.add_root(column(slider_p2, p2))\n", | |
" \n", | |
" callback_p2(None,None,None) \n", | |
"\n", | |
"show(modify_doc_p2)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "8s8mv-3LypFK" | |
}, | |
"source": [ | |
"Third plot: Topics (IDF)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "RBlmSmBxypJv" | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.bokehjs_exec.v0+json": "", | |
"text/html": [ | |
"\n", | |
"<script src=\"http://localhost:59442/autoload.js?bokeh-autoload-element=1004&bokeh-absolute-url=http://localhost:59442&resources=none\" id=\"1004\"></script>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.bokehjs_exec.v0+json": { | |
"server_id": "57eb8a3c16d1400aabd97af86e7b8c44" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"def modify_doc_p3(doc):\n", | |
" \n", | |
" source3 = ColumnDataSource(df_query_grouped)\n", | |
"\n", | |
" p3 = figure(title=device_category+' Topic Data', x_range = (0,df_query_grouped['ctr'].mean()*2), y_range = (0,df_query_grouped['position'].mean()*2),toolbar_location=\"above\",width=plot_width, height=plot_height, tools=['pan','tap','box_zoom','wheel_zoom','save','reset'])\n", | |
"\n", | |
" p3.xaxis.axis_label = \"CTR\"\n", | |
" p3.yaxis.axis_label = \"AVG Position\"\n", | |
"\n", | |
" color_mapper = LinearColorMapper(palette = Viridis256, low = df_query_grouped['clicks'].min(), high = df_query_grouped['clicks'].max())\n", | |
" color_bar = ColorBar(color_mapper = color_mapper,location = (0, 0),ticker = BasicTicker())\n", | |
" p3.add_layout(color_bar, 'right')\n", | |
"\n", | |
" label_citation3 = Label(x=25, y=10, x_units='screen', y_units='screen', text='Natzir Turrado', render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_copyright3 = Label(x=10, y=10, x_units='screen', y_units='screen', text='©', angle=3.2, render_mode='css', border_line_color=None, background_fill_color=None)\n", | |
" label_color_bar3 = Label(x=530, y=260, x_units='screen', y_units='screen', text='Clicks', angle=1.6, render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '10pt', text_font_style='italic')\n", | |
" label_size3 = Label(x=300, y=10, x_units='screen', y_units='screen', text='Bubble size = Impressions', render_mode='css', border_line_color=None, background_fill_color=None,text_font_size = '10pt', text_font_style='italic')\n", | |
" label_source3 = Label(x=260, y=500, x_units='screen', y_units='screen', text='Data: Last 3 Mo. of SC data / 25.000 URL', render_mode='css', border_line_color=None, background_fill_color=None, text_font_size = '8pt', text_font_style='italic')\n", | |
"\n", | |
" p3.scatter(x = 'ctr', y = 'position', size = 'norm_impressions', legend = None, fill_color = transform('clicks', color_mapper), source = source3)\n", | |
"\n", | |
" p3.add_tools(HoverTool(tooltips = [('topic', '@topic'),('clicks', '@clicks'),('impressions', '@impressions'),('ctr', '@ctr'),('avg position', '@position'),('idf weight', '@idf_weight')]))\n", | |
"\n", | |
" taptool = p3.select(type=TapTool)\n", | |
" search_console_url = 'https://search.google.com/search-console/performance/search-analytics?resource_id='+site_no_slash+'%2F&breakdown=query&query=*@topic'\n", | |
" taptool.callback = OpenURL(url=search_console_url)\n", | |
"\n", | |
" p3.add_layout(label_citation3)\n", | |
" p3.add_layout(label_copyright3)\n", | |
" p3.add_layout(label_color_bar3)\n", | |
" p3.add_layout(label_size3)\n", | |
" p3.add_layout(label_source3)\n", | |
"\n", | |
" p3.toolbar.logo = None\n", | |
" \n", | |
" #Slider\n", | |
" slider_p3 = Slider(title='Select the number of Topics', start=0, end=len(df_query_grouped.index), step=1, value=100) \n", | |
"\n", | |
" def callback_p3(attr,old,new):\n", | |
" source3.data = ColumnDataSource.from_df(df_query_grouped[:slider_p3.value])\n", | |
"\n", | |
" slider_p3.on_change('value',callback_p3)\n", | |
" \n", | |
" doc.add_root(column(slider_p3, p3))\n", | |
" \n", | |
" callback_p3(None,None,None) \n", | |
"\n", | |
"show(modify_doc_p3)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"colab_type": "text", | |
"id": "1JNdh_VEzECx" | |
}, | |
"source": [ | |
"Fourth plot: table plot" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"colab": {}, | |
"colab_type": "code", | |
"id": "0Z8NIDyszEHx", | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.bokehjs_exec.v0+json": "", | |
"text/html": [ | |
"\n", | |
"<script src=\"http://localhost:59444/autoload.js?bokeh-autoload-element=1005&bokeh-absolute-url=http://localhost:59444&resources=none\" id=\"1005\"></script>" | |
] | |
}, | |
"metadata": { | |
"application/vnd.bokehjs_exec.v0+json": { | |
"server_id": "2315697636ef408baea5b80596a59a8c" | |
} | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"def modify_doc_p4(doc):\n", | |
" \n", | |
" source4 = ColumnDataSource(data=dict(keys=[], clicks=[], ctr=[], impressions=[], position=[], norm_impressions=[], sections=[]))\n", | |
"\n", | |
" columns = [TableColumn(field=\"keys\", title=\"Keys\",width=160),TableColumn(field=\"impressions\", title=\"Impressions\",width=35),TableColumn(field=\"clicks\", title=\"Clicks\",width=35),TableColumn(field=\"ctr\", title=\"CTR\",width=15),TableColumn(field=\"position\", title=\"AVG Pos.\",width=15),TableColumn(field=\"sections\", title=\"Section\",width=50)]\n", | |
"\n", | |
" p4 = DataTable(source=source4, columns=columns, width=plot_width, height=plot_height)\n", | |
" \n", | |
" #Sliders\n", | |
" slider_p4_ctr = RangeSlider(title='CTR',start=min(df4.ctr), end=max(df4.ctr), value=(min(df4.ctr), df4.ctr.mean()), step=0.1)\n", | |
" slider_p4_position = RangeSlider(title='AVG Position',start=min(df4.position), end=max(df4.position), value=(3,6), step=1)\n", | |
" slider_p4_impressions = RangeSlider(title='impressions',start=min(df4.impressions), end=max(df4.impressions), value=(df4.impressions.quantile(0.33),max(df4.impressions)), step=50)\n", | |
" slider_p4_clicks = RangeSlider(title='clicks',start=min(df4.clicks), end=max(df4.clicks), value=(min(df4.clicks),max(df4.clicks)), step=50)\n", | |
"\n", | |
" def callback_p4(attr, old, new):\n", | |
" source4.data = ColumnDataSource.from_df(df4[df4.ctr.between(slider_p4_ctr.value[0],slider_p4_ctr.value[1]) & df4.position.between(slider_p4_position.value[0],slider_p4_position.value[1]) & df4.impressions.between(slider_p4_impressions.value[0],slider_p4_impressions.value[1]) & df4.clicks.between(slider_p4_clicks.value[0],slider_p4_clicks.value[1])])\n", | |
"\n", | |
" slider_p4_ctr.on_change('value',callback_p4)\n", | |
" slider_p4_position.on_change('value',callback_p4)\n", | |
" slider_p4_impressions.on_change('value',callback_p4)\n", | |
" slider_p4_clicks.on_change('value',callback_p4)\n", | |
"\n", | |
" #Export button\n", | |
" download_button = Button(label='Save Table to CSV', button_type=\"success\")\n", | |
" download_button.callback = CustomJS(args=dict(source=source4), code=\"\"\"\n", | |
"\n", | |
" var data = source.data;\n", | |
" var filetext = 'Keys,Impressions,Clicks,CTR,Position,Section\\\\n';\n", | |
" if (data.hasOwnProperty('keys')) {\n", | |
" for (i=0; i < data['keys'].length; i++) {\n", | |
" var currRow = [data['keys'][i].toString(),\n", | |
" data['impressions'][i].toString(),\n", | |
" data['clicks'][i].toString(),\n", | |
" data['ctr'][i].toString(),\n", | |
" data['position'][i].toString(),\n", | |
" data['sections'][i].toString().concat('\\\\n')];\n", | |
"\n", | |
" var joined = currRow.join();\n", | |
" filetext = filetext.concat(joined);\n", | |
" }\n", | |
" }\n", | |
" else { filetext += 'no data';}\n", | |
"\n", | |
" var filename = 'opportunities.csv';\n", | |
" var blob = new Blob([filetext], { type: 'text/csv;charset=utf-8;' });\n", | |
" //addresses IE\n", | |
" if (navigator.msSaveBlob) {\n", | |
" navigator.msSaveBlob(blob, filename);\n", | |
" }\n", | |
" else {\n", | |
" var link = document.createElement('a');\n", | |
" link = document.createElement('a')\n", | |
" link.href = URL.createObjectURL(blob);\n", | |
" link.download = filename\n", | |
" link.target = '_blank';\n", | |
" link.style.visibility = 'hidden';\n", | |
" link.dispatchEvent(new MouseEvent('click'))\n", | |
" }\n", | |
" \"\"\")\n", | |
"\n", | |
" doc.add_root(column(slider_p4_ctr,slider_p4_position,slider_p4_impressions,slider_p4_clicks,download_button, p4))\n", | |
" \n", | |
" callback_p4(None,None,None)\n", | |
" \n", | |
"show(modify_doc_p4)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Layout\n", | |
"#l1 = layout(widgetbox(slider_p1),p1)\n", | |
"#l2 = layout(widgetbox(slider_p2),p2)\n", | |
"#l3 = layout(widgetbox(slider_p3),p3)\n", | |
"#l4 = layout(widgetbox(slider_p4_ctr,slider_p4_position,slider_p4_impressions,slider_p4_clicks,download_button,p4))\n", | |
"#uri_panel = Panel(child=l1, title='URI')\n", | |
"#sections_panel = Panel(child=l2, title='Sections')\n", | |
"#topics_panel = Panel(child=l3, title='Topics/IDF')\n", | |
"#data_table_panel = Panel(child=l4, title='Quick Wins')\n", | |
"\n", | |
"#tabs = Tabs(tabs=[uri_panel, sections_panel,topics_panel, data_table_panel])\n", | |
"\n", | |
"#plot_all = column(sup_title, tabs)\n", | |
"#show(plot_all)\n", | |
"#callbackp1(None, None, None)\n", | |
"#callbackp2(None, None, None)\n", | |
"#callbackp3(None, None, None)\n", | |
"#callbackp4(None, None, None)\n", | |
"#curdoc().title = device_category+' Plot'\n", | |
"#curdoc().add_root(layout)" | |
] | |
} | |
], | |
"metadata": { | |
"colab": { | |
"name": "bubbles.ipynb", | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment