Skip to content

Instantly share code, notes, and snippets.

@tjmonsi
Created June 27, 2019 05:00
Show Gist options
  • Save tjmonsi/9199ec2f192040d00952da31fd179a61 to your computer and use it in GitHub Desktop.
Save tjmonsi/9199ec2f192040d00952da31fd179a61 to your computer and use it in GitHub Desktop.
#standardSQL
-- counts top fonts that are locally hosted
WITH
response_bodies AS (
SELECT * FROM `httparchive.sample_data.response_bodies_desktop_1k`
-- UNION ALL
-- SELECT * FROM `httparchive.sample_data.response_bodies_mobile_1k`
),
t1 AS (
-- this is for local
SELECT * FROM response_bodies WHERE NET.HOST(page) = NET.HOST(url)
),
temp AS (
SELECT REGEXP_EXTRACT_ALL(body, r"\@font-face\s*\{[^}]+\}") AS font_display_arr, url, page
FROM t1 WHERE STRPOS(body, "@font-face") > 0
),
temp2 AS (
SELECT REGEXP_EXTRACT_ALL(font_face, r"font-family\s*:\s*['\"][\w\s]+['\"]") AS font_family
FROM temp, UNNEST(font_display_arr) AS font_face
),
processedTable AS (
SELECT REPLACE(REPLACE(TRIM(SPLIT(font_family, ":")[OFFSET(1)]), '"', ''), "'", '') AS font_family
FROM temp2, UNNEST(font_family) AS font_family
)
SELECT value AS font_family, count FROM (
SELECT
APPROX_TOP_COUNT(font_family, 200) AS f0
FROM processedTable
), UNNEST(f0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment