Created
November 27, 2023 19:48
-
-
Save adamsilverstein/0c52afe02e61bc37b04dac54842407cf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%bigquery bytype | |
WITH | |
embed_data AS ( | |
SELECT | |
url, | |
JSON_EXTRACT(payload, '$._cms.wordpress.has_embed_block') AS has_embed_block, | |
CAST(JSON_EXTRACT(payload, '$._cms.wordpress.embed_block_count.total') AS FLOAT64) | |
AS embed_block_count_total, | |
JSON_EXTRACT(payload, '$._cms.wordpress.embed_block_count.total_by_type') AS embeds, | |
FROM `httparchive.pages.2023_10_01_desktop` | |
WHERE JSON_EXTRACT(payload, '$._cms.wordpress.has_embed_block') = 'true' | |
), | |
keys AS ( | |
SELECT | |
* | |
FROM embed_data | |
CROSS JOIN UNNEST(REGEXP_EXTRACT_ALL(embeds, r'"(.*?"\s*:\d*)')) AS key_value | |
), | |
site_block_counts AS ( | |
SELECT | |
url, | |
REGEXP_EXTRACT(key_value, r'(.*?)"\s*:\d?') AS key, | |
CAST(REGEXP_EXTRACT(key_value, r'.*?"\s*:(\d?)') AS INTEGER) AS value | |
FROM keys | |
), | |
total_sites AS ( | |
SELECT COUNT(DISTINCT URL) AS total_sites_with_embeds | |
FROM embed_data | |
WHERE has_embed_block = 'true' | |
), | |
site_rollup AS ( | |
SELECT | |
key, | |
AVG(value) AS average_count, | |
COUNT(DISTINCT url) AS site_count, | |
(SELECT total_sites_with_embeds FROM total_sites) AS total | |
FROM site_block_counts | |
GROUP BY key | |
ORDER BY site_count DESC | |
) | |
SELECT | |
key AS embed_type, | |
site_count, | |
site_count / total * 100 AS percent_of_sites, | |
average_count AS average_embeds_per_page | |
FROM site_rollup |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Results