Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save boydnorwood/b2668ca95a58966b0e67f9f16f81deac to your computer and use it in GitHub Desktop.
Save boydnorwood/b2668ca95a58966b0e67f9f16f81deac to your computer and use it in GitHub Desktop.
This query lets you analyze which merchants are showing up the most in the product packs across all of your keywords in Nozzle.
-- Merchant Analysis in Product Listings In SERPs
-- Change the workspace slug and project slug (workspaceslug_projectslug) on lines 9, 22, and 116 to your own slugs
WITH
-- filter keywords early to reduce query execution time
filtered_keyword_ids AS (
SELECT keyword_id
FROM nozzledata.workspaceslug_projectslug.latest_keywords_by_keyword_id
JOIN UNNEST(keyword_groups) AS kg
-- WHERE kg IN ('Jewelry')
-- JOIN UNNEST(keyword_sources) as kw_source
-- WHERE kw_source.keyword_source_id IN (123)
GROUP BY keyword_id
),
-- grabbing the latest version of each serp in case of reparse
latest_rankings AS (
SELECT AS VALUE
ARRAY_AGG(t ORDER BY inserted_at DESC LIMIT 1)[OFFSET(0)]
FROM nozzledata.workspaceslug_projectslug.rankings t
JOIN filtered_keyword_ids USING (keyword_id)
WHERE requested >= TIMESTAMP_ADD(CURRENT_TIMESTAMP(), INTERVAL - 90 DAY)
GROUP BY ranking_id
),
all_keywords_by_requested AS (
SELECT
keyword_id,
requested,
FROM (SELECT DISTINCT keyword_id FROM filtered_keyword_ids)
CROSS JOIN (SELECT DISTINCT requested FROM latest_rankings)
),
-- first fill forwards, then backfill as necessary
all_rankings_fill_null AS (
SELECT
a.keyword_id,
a.requested,
IFNULL(d.requested, IFNULL(
LAST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
FIRST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
)) AS data_from_requested,
FROM all_keywords_by_requested a
LEFT JOIN (SELECT DISTINCT requested, keyword_id FROM latest_rankings) d USING (keyword_id, requested)
),
fill_null_data AS (
SELECT
a.keyword_id,
a.requested,
a.data_from_requested,
r.* EXCEPT (keyword_id, requested)
FROM all_rankings_fill_null a
LEFT JOIN latest_rankings r ON a.keyword_id=r.keyword_id AND a.data_from_requested=r.requested
),
filtered_results AS (
SELECT
requested,
keyword_id,
COALESCE(result.merchant.merchant_name, result.product.merchant) AS merchant_name,
result.url.domain_id,
result.pack_rank,
result.rank,
DENSE_RANK() OVER (PARTITION BY keyword_id, requested, result.pack_rank ORDER BY result.item_rank) AS item_rank,
result.layout.is_pack,
keyword_metrics.country_adwords_search_volume,
result.nozzle_metrics.click_through_rate,
result.measurements.pixels_from_top,
result.measurements.percentage_of_viewport,
result.measurements.percentage_of_dom,
result.measurements.is_visible,
FROM fill_null_data n
JOIN UNNEST(results) AS result
WHERE requested IS NOT NULL
AND result.paid.is_paid IS NOT TRUE
AND result.product.is_product IS TRUE
),
per_serp_data AS (
SELECT
requested,
keyword_id,
merchant_name,
MIN(rank) AS rank,
MIN(item_rank) AS item_rank,
AVG(item_rank) AS item_rank_avg,
CAST(SUM(country_adwords_search_volume / 30 * click_through_rate) AS INT64) AS estimated_traffic,
MIN(pixels_from_top) AS pixels_from_top,
SUM(percentage_of_viewport) AS above_the_fold_percentage, -- 100% = 1 | 5% = 0.05
SUM(percentage_of_dom) AS serp_percentage, -- 100% = 1 | 5% = 0.05
-- how many product packs are on the SERP, and a breakout if they are in the top 3
COUNT(DISTINCT IF(is_pack IS TRUE, pack_rank, NULL)) AS product_pack_count,
COUNT(DISTINCT IF(is_pack IS TRUE AND rank BETWEEN 1 AND 3, pack_rank, NULL)) AS product_pack_top_3_count,
-- how many visible products are on the SERP and how many total products (typically non-visible products are in a carousel)
COUNTIF(is_visible IS TRUE) AS visible_product_count,
COUNT(*) AS product_count,
COUNTIF(is_visible IS TRUE AND domain_id IS NOT NULL) AS visible_product_count_with_domain,
COUNTIF(domain_id IS NOT NULL) AS product_count_with_domain,
FROM filtered_results
JOIN nozzledata.workspaceslug_projectslug.latest_keywords_by_keyword_id k USING (keyword_id)
GROUP BY keyword_id, requested, merchant_name
),
aggregate_by_requested AS (
SELECT
requested,
merchant_name,
ROUND(AVG(rank), 2) AS rank,
ROUND(AVG(item_rank), 2) AS item_rank,
ROUND(AVG(item_rank_avg), 2) AS item_rank_avg_avg,
SUM(estimated_traffic) AS estimated_traffic,
CAST(AVG(pixels_from_top) AS INT64) AS pixels_from_top,
ROUND(AVG(above_the_fold_percentage), 4) AS above_the_fold_percentage,
ROUND(AVG(serp_percentage), 4) AS serp_percentage,
SUM(product_pack_count) AS product_pack_count,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(0)] AS product_pack_count_min,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(1)] AS product_pack_count_p25,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(2)] AS product_pack_count_p50,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(3)] AS product_pack_count_p75,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(4)] AS product_pack_count_max,
SUM(product_pack_top_3_count) AS product_pack_top_3_count,
COUNT(DISTINCT CONCAT(keyword_id, requested)) AS serps_with_at_least_1_product,
COUNT(DISTINCT keyword_id) AS keywords_with_at_least_1_product,
SUM(visible_product_count) AS visible_product_count,
SUM(product_count) AS product_count,
SUM(visible_product_count_with_domain) AS visible_product_count_with_domain,
SUM(product_count_with_domain) AS product_count_with_domain,
FROM per_serp_data
GROUP BY requested, merchant_name
),
aggregate_by_requested_by_keyword AS (
SELECT
keyword_id,
requested,
merchant_name,
ROUND(AVG(rank), 2) AS rank,
ROUND(AVG(item_rank), 2) AS item_rank,
ROUND(AVG(item_rank_avg), 2) AS item_rank_avg_avg,
SUM(estimated_traffic) AS estimated_traffic,
CAST(AVG(pixels_from_top) AS INT64) AS pixels_from_top,
ROUND(AVG(above_the_fold_percentage), 4) AS above_the_fold_percentage,
ROUND(AVG(serp_percentage), 4) AS serp_percentage,
SUM(product_pack_count) AS product_pack_count,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(0)] AS product_pack_count_min,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(1)] AS product_pack_count_p25,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(2)] AS product_pack_count_p50,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(3)] AS product_pack_count_p75,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(4)] AS product_pack_count_max,
SUM(product_pack_top_3_count) AS product_pack_top_3_count,
COUNT(DISTINCT CONCAT(keyword_id, requested)) AS serps_with_at_least_1_product,
COUNT(DISTINCT keyword_id) AS keywords_with_at_least_1_product,
SUM(visible_product_count) AS visible_product_count,
SUM(product_count) AS product_count,
SUM(visible_product_count_with_domain) AS visible_product_count_with_domain,
SUM(product_count_with_domain) AS product_count_with_domain,
FROM per_serp_data
GROUP BY keyword_id, requested, merchant_name
),
aggregate_by_keyword AS (
SELECT
keyword_id,
merchant_name,
ROUND(AVG(rank), 2) AS rank,
ROUND(AVG(item_rank), 2) AS item_rank,
ROUND(AVG(item_rank_avg), 2) AS item_rank_avg_avg,
SUM(estimated_traffic) AS estimated_traffic,
CAST(AVG(pixels_from_top) AS INT64) AS pixels_from_top,
ROUND(AVG(above_the_fold_percentage), 4) AS above_the_fold_percentage,
ROUND(AVG(serp_percentage), 4) AS serp_percentage,
SUM(product_pack_count) AS product_pack_count,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(0)] AS product_pack_count_min,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(1)] AS product_pack_count_p25,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(2)] AS product_pack_count_p50,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(3)] AS product_pack_count_p75,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(4)] AS product_pack_count_max,
SUM(product_pack_top_3_count) AS product_pack_top_3_count,
COUNT(DISTINCT CONCAT(keyword_id, requested)) AS serps_with_at_least_1_product,
COUNT(DISTINCT keyword_id) AS keywords_with_at_least_1_product,
SUM(visible_product_count) AS visible_product_count,
SUM(product_count) AS product_count,
SUM(visible_product_count_with_domain) AS visible_product_count_with_domain,
SUM(product_count_with_domain) AS product_count_with_domain,
FROM per_serp_data
GROUP BY keyword_id, merchant_name
),
aggregate_total AS (
SELECT
merchant_name,
ROUND(AVG(rank), 2) AS rank,
ROUND(AVG(item_rank), 2) AS item_rank,
ROUND(AVG(item_rank_avg), 2) AS item_rank_avg_avg,
SUM(estimated_traffic) AS estimated_traffic,
CAST(AVG(pixels_from_top) AS INT64) AS pixels_from_top,
ROUND(AVG(above_the_fold_percentage), 4) AS above_the_fold_percentage,
ROUND(AVG(serp_percentage), 4) AS serp_percentage,
SUM(product_pack_count) AS product_pack_count,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(0)] AS product_pack_count_min,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(1)] AS product_pack_count_p25,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(2)] AS product_pack_count_p50,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(3)] AS product_pack_count_p75,
APPROX_QUANTILES(product_pack_count, 4)[OFFSET(4)] AS product_pack_count_max,
SUM(product_pack_top_3_count) AS product_pack_top_3_count,
COUNT(DISTINCT CONCAT(keyword_id, requested)) AS serps_with_at_least_1_product,
COUNT(DISTINCT keyword_id) AS keywords_with_at_least_1_product,
SUM(visible_product_count) AS visible_product_count,
SUM(product_count) AS product_count,
SUM(visible_product_count_with_domain) AS visible_product_count_with_domain,
SUM(product_count_with_domain) AS product_count_with_domain,
FROM per_serp_data
GROUP BY merchant_name
)
SELECT * FROM aggregate_by_requested_by_keyword
-- SELECT * FROM aggregate_total ORDER BY product_count DESC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment