Last active
September 13, 2024 15:46
-
-
Save adamsilverstein/3e61dae002161f25b34d1ea85278522b to your computer and use it in GitHub Desktop.
wordpress-sites-using-react.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"history_visible": true, | |
"authorship_tag": "ABX9TyNMm/k3t6TkYb2rz3WLSNnB", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/adamsilverstein/3e61dae002161f25b34d1ea85278522b/wordpress-sites-using-react.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Setup" | |
], | |
"metadata": { | |
"id": "4G2WkwMPzxbT" | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "qTmLBxDxBAZL" | |
}, | |
"source": [ | |
"### Provide your credentials to the runtime" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"id": "SeTJb51SKs_W", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "ee94171d-9851-4d31-a331-a234aa6d3f71" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Authenticated\n" | |
] | |
} | |
], | |
"source": [ | |
"from google.colab import auth\n", | |
"auth.authenticate_user()\n", | |
"print('Authenticated')\n", | |
"project_id = 'wpp-research'" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "goQQ96EDKs_7" | |
}, | |
"source": [ | |
"### Declare the Cloud project ID which will be used throughout this notebook\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.cloud.bigquery import magics\n", | |
"# Update with your own Google Cloud Platform project name\n", | |
"magics.context.project = 'wpp-research'" | |
], | |
"metadata": { | |
"id": "YdTgQYtSoOoE" | |
}, | |
"execution_count": 18, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Add a helper to get the latest dataset" | |
], | |
"metadata": { | |
"id": "yV85Ec6A9FED" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from datetime import datetime, timedelta\n", | |
"\n", | |
"def get_first_of_previous_month():\n", | |
" today = datetime.now()\n", | |
" first_day_previous_month = datetime(today.year, today.month - 1, 1) if today.month > 1 else datetime(today.year - 1, 12, 1)\n", | |
" return first_day_previous_month.strftime('%Y_%m_%d')\n", | |
"\n", | |
"dataset_date = get_first_of_previous_month() # eg. \"2023_06_01\" - datasets are updated monthly, indicate the latest" | |
], | |
"metadata": { | |
"id": "stNLljYnR355" | |
}, | |
"execution_count": 25, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "UMKGkkZEPVRu" | |
}, | |
"source": [ | |
"### Optional: Enable data table display\n", | |
"\n", | |
"Colab includes the ``google.colab.data_table`` package that can be used to display large pandas dataframes as an interactive data table.\n", | |
"It can be enabled with:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"id": "LMNA-vBHPyHz", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "43b2eb23-d197-4601-bae5-cd601bcca009" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"The google.colab.data_table extension is already loaded. To reload it, use:\n", | |
" %reload_ext google.colab.data_table\n" | |
] | |
} | |
], | |
"source": [ | |
"%load_ext google.colab.data_table" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.colab import data_table\n", | |
"data_table.enable_dataframe_formatter()" | |
], | |
"metadata": { | |
"id": "JlBfb2k3JpRS" | |
}, | |
"execution_count": 14, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Queries" | |
], | |
"metadata": { | |
"id": "B8m1-MgQyOa6" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 143 | |
}, | |
"id": "WdhqTRxkxxJU", | |
"outputId": "a153795a-1ccd-4fbe-bd9e-903a2688d527" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" react_plus_wordpress_site_count wordpress_site_count react_site_count\n", | |
"0 475601 13192035 3559550" | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-37011f28-54c0-48d7-8f9a-4ef5c07a7df1\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>react_plus_wordpress_site_count</th>\n", | |
" <th>wordpress_site_count</th>\n", | |
" <th>react_site_count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>475601</td>\n", | |
" <td>13192035</td>\n", | |
" <td>3559550</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-37011f28-54c0-48d7-8f9a-4ef5c07a7df1')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-37011f28-54c0-48d7-8f9a-4ef5c07a7df1 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-37011f28-54c0-48d7-8f9a-4ef5c07a7df1');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
"\n", | |
" </div>\n", | |
" </div>\n" | |
], | |
"application/vnd.google.colaboratory.intrinsic+json": { | |
"type": "dataframe", | |
"variable_name": "site_counts_data", | |
"summary": "{\n \"name\": \"site_counts_data\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"react_plus_wordpress_site_count\",\n \"properties\": {\n \"dtype\": \"Int64\",\n \"num_unique_values\": 1,\n \"samples\": [\n 475601\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wordpress_site_count\",\n \"properties\": {\n \"dtype\": \"Int64\",\n \"num_unique_values\": 1,\n \"samples\": [\n 13192035\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"react_site_count\",\n \"properties\": {\n \"dtype\": \"Int64\",\n \"num_unique_values\": 1,\n \"samples\": [\n 3559550\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" | |
}, | |
"application/vnd.google.colaboratory.module+javascript": "\n import \"https://ssl.gstatic.com/colaboratory/data_table/81954c9606dcf997/data_table.js\";\n\n const table = window.createDataTable({\n data: [[{\n 'v': 0,\n 'f': \"0\",\n },\n{\n 'v': 475601,\n 'f': \"475601\",\n },\n{\n 'v': 13192035,\n 'f': \"13192035\",\n },\n{\n 'v': 3559550,\n 'f': \"3559550\",\n }]],\n columns: [[\"number\", \"index\"], [\"number\", \"react_plus_wordpress_site_count\"], [\"number\", \"wordpress_site_count\"], [\"number\", \"react_site_count\"]],\n columnOptions: [{\"width\": \"1px\", \"className\": \"index_column\"}],\n rowsPerPage: 25,\n helpUrl: \"https://colab.research.google.com/notebooks/data_table.ipynb\",\n suppressOutputScrolling: true,\n minimumWidth: undefined,\n });\n\n function appendQuickchartButton(parentElement) {\n let quickchartButtonContainerElement = document.createElement('div');\n quickchartButtonContainerElement.innerHTML = `\n<div id=\"df-19ff3770-cee1-49d7-a95c-179a664423ad\">\n <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-19ff3770-cee1-49d7-a95c-179a664423ad')\"\n title=\"Suggest charts\"\n style=\"display:none;\">\n \n<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n width=\"24px\">\n <g>\n <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n </g>\n</svg>\n </button>\n \n<style>\n .colab-df-quickchart {\n --bg-color: #E8F0FE;\n --fill-color: #1967D2;\n --hover-bg-color: #E2EBFA;\n --hover-fill-color: #174EA6;\n --disabled-fill-color: #AAA;\n --disabled-bg-color: #DDD;\n }\n\n [theme=dark] .colab-df-quickchart {\n --bg-color: #3B4455;\n --fill-color: #D2E3FC;\n --hover-bg-color: #434B5C;\n --hover-fill-color: #FFFFFF;\n --disabled-bg-color: #3B4455;\n --disabled-fill-color: #666;\n }\n\n .colab-df-quickchart {\n background-color: var(--bg-color);\n border: none;\n border-radius: 50%;\n cursor: pointer;\n display: none;\n fill: var(--fill-color);\n height: 32px;\n padding: 0;\n width: 32px;\n }\n\n .colab-df-quickchart:hover {\n background-color: var(--hover-bg-color);\n box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n fill: var(--button-hover-fill-color);\n }\n\n .colab-df-quickchart-complete:disabled,\n .colab-df-quickchart-complete:disabled:hover {\n background-color: var(--disabled-bg-color);\n fill: var(--disabled-fill-color);\n box-shadow: none;\n }\n\n .colab-df-spinner {\n border: 2px solid var(--fill-color);\n border-color: transparent;\n border-bottom-color: var(--fill-color);\n animation:\n spin 1s steps(1) infinite;\n }\n\n @keyframes spin {\n 0% {\n border-color: transparent;\n border-bottom-color: var(--fill-color);\n border-left-color: var(--fill-color);\n }\n 20% {\n border-color: transparent;\n border-left-color: var(--fill-color);\n border-top-color: var(--fill-color);\n }\n 30% {\n border-color: transparent;\n border-left-color: var(--fill-color);\n border-top-color: var(--fill-color);\n border-right-color: var(--fill-color);\n }\n 40% {\n border-color: transparent;\n border-right-color: var(--fill-color);\n border-top-color: var(--fill-color);\n }\n 60% {\n border-color: transparent;\n border-right-color: var(--fill-color);\n }\n 80% {\n border-color: transparent;\n border-right-color: var(--fill-color);\n border-bottom-color: var(--fill-color);\n }\n 90% {\n border-color: transparent;\n border-bottom-color: var(--fill-color);\n }\n }\n</style>\n\n <script>\n async function quickchart(key) {\n const quickchartButtonEl =\n document.querySelector('#' + key + ' button');\n quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n quickchartButtonEl.classList.add('colab-df-spinner');\n try {\n const charts = await google.colab.kernel.invokeFunction(\n 'suggestCharts', [key], {});\n } catch (error) {\n console.error('Error during call to suggestCharts:', error);\n }\n quickchartButtonEl.classList.remove('colab-df-spinner');\n quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n }\n (() => {\n let quickchartButtonEl =\n document.querySelector('#df-19ff3770-cee1-49d7-a95c-179a664423ad button');\n quickchartButtonEl.style.display =\n google.colab.kernel.accessAllowed ? 'block' : 'none';\n })();\n </script>\n</div>`;\n parentElement.appendChild(quickchartButtonContainerElement);\n }\n\n appendQuickchartButton(table);\n " | |
}, | |
"metadata": {}, | |
"execution_count": 24 | |
} | |
], | |
"source": [ | |
"from google.cloud import bigquery\n", | |
"\n", | |
"\n", | |
"client = bigquery.Client(project_id)\n", | |
"\n", | |
"query = f\"\"\"\n", | |
" WITH\n", | |
" wordpress_sites AS (\n", | |
" SELECT DISTINCT page AS url\n", | |
" FROM\n", | |
" `httparchive.all.pages`,\n", | |
" UNNEST(technologies) AS technologies\n", | |
" WHERE\n", | |
" date = '{dataset_date}'\n", | |
" AND technologies.technology = 'WordPress'\n", | |
" ),\n", | |
" react_sites AS (\n", | |
" SELECT DISTINCT page AS url,\n", | |
" FROM\n", | |
" `httparchive.all.pages`,\n", | |
" UNNEST(technologies) AS technologies\n", | |
" WHERE\n", | |
" date = '{dataset_date}'\n", | |
" AND technologies.technology = 'React'\n", | |
" ),\n", | |
" site_counts AS (\n", | |
" SELECT\n", | |
" COUNT(wordpress_sites.url) AS wordpress_sites_count,\n", | |
" COUNT(react_sites.url) AS react_sites_count,\n", | |
" FROM wordpress_sites, react_sites\n", | |
" )\n", | |
"\n", | |
" SELECT\n", | |
" COUNT(DISTINCT url) AS react_plus_wordpress_site_count,\n", | |
" (SELECT COUNT(DISTINCT url) FROM wordpress_sites) AS wordpress_site_count,\n", | |
" (SELECT COUNT(DISTINCT url) FROM react_sites) AS react_site_count\n", | |
" FROM\n", | |
" wordpress_sites\n", | |
" JOIN\n", | |
" react_sites\n", | |
" USING (url)\n", | |
"\"\"\"\n", | |
"\n", | |
"query_job = client.query(query)\n", | |
"\n", | |
"# convert to dataframe\n", | |
"site_counts_data = query_job.to_dataframe()\n", | |
"site_counts_data.head(1000)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Calculate some percentages\n", | |
"# site_counts_data['react_plus_wordpress_site_count']/site_counts_data['wordpress_site_count'] is percent_of_wordpress_sites_also_using_react\n", | |
"percent_of_wordpress_sites_also_using_react = site_counts_data['react_plus_wordpress_site_count'] / site_counts_data['wordpress_site_count']\n", | |
"\n", | |
"# site_counts_data['react_plus_wordpress_site_count']/site_counts_data['react_site_count'] is percent_of_react_sites_also_using_wordpress\n", | |
"percent_of_react_sites_also_using_wordpress = site_counts_data['react_plus_wordpress_site_count'] / site_counts_data['react_site_count']\n", | |
"\n", | |
"# print the two values as percentages\n", | |
"print(f\"{percent_of_wordpress_sites_also_using_react[0]*100:.2f}% of WordPress sites also use React\")\n", | |
"print(f\"{percent_of_react_sites_also_using_wordpress[0]*100:.2f}% of React sites also use WordPress\")\n", | |
"\n", | |
"\n" | |
], | |
"metadata": { | |
"id": "EoBG3oodDagQ", | |
"outputId": "7cf779dd-7576-4864-e011-3699d2d0c8e2", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
} | |
}, | |
"execution_count": 26, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"3.61% of WordPress sites also use React\n", | |
"13.36% of React sites also use WordPress\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment