Skip to content

Instantly share code, notes, and snippets.

@tdsmith
Created December 6, 2019 22:19
Show Gist options
  • Save tdsmith/4ba247305886521d16082947494eb2e8 to your computer and use it in GitHub Desktop.
Save tdsmith/4ba247305886521d16082947494eb2e8 to your computer and use it in GitHub Desktop.
CREATE TEMP FUNCTION json2array(json STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
return Object.keys(JSON.parse(json || "{}")).map(x => x.replace("CSSProperty", ""));
""";
WITH sample AS (
SELECT json2array(JSON_EXTRACT(payload, "$._blinkFeatureFirstUsed.CSSFeatures")) AS css
FROM httparchive.pages.2019_11_01_desktop
)
, distinct_properties AS (
SELECT
property,
COUNT(*)/(SELECT COUNT(*) FROM sample) AS probability
FROM
sample
CROSS JOIN UNNEST(css) AS property
GROUP BY property
)
, property_pairs AS (
SELECT
x.property AS property_a,
x.probability AS probability_a,
y.property AS property_b,
y.probability AS probability_b
FROM
distinct_properties x
CROSS JOIN distinct_properties y
WHERE x.property < y.property
)
, pairs_exist AS (
SELECT
property_a,
property_b,
COUNTIF(property_a IN UNNEST(css) AND property_b IN UNNEST(css))/(SELECT COUNT(*) FROM sample) AS probability_ab
FROM
sample
CROSS JOIN property_pairs
GROUP BY 1, 2
)
SELECT
property_a,
probability_a,
property_b,
probability_b,
probability_ab,
probability_ab/probability_b AS probability_a_given_b,
probability_ab/probability_a AS probability_b_given_a,
FROM
pairs_exist
LEFT JOIN property_pairs
USING (property_a, property_b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment