Skip to content

Instantly share code, notes, and snippets.

@derekperkins
Created September 21, 2021 20:07
Show Gist options
  • Save derekperkins/e2f912b352ec4863e5a6ff9b17d70d09 to your computer and use it in GitHub Desktop.
Save derekperkins/e2f912b352ec4863e5a6ff9b17d70d09 to your computer and use it in GitHub Desktop.
ClarityPR Nozzle Query
WITH
raw_data AS (
SELECT * EXCEPT (brand_properties, segments_by_keyword_groups, keyword_count)
FROM nozzledata.claritypr_safemoon.results_by_brand__beta
JOIN UNNEST(brand_properties)
JOIN UNNEST(segments_by_keyword_groups)
WHERE
requested >= PARSE_TIMESTAMP('%Y%m%d', @DS_START_DATE) AND requested <= PARSE_TIMESTAMP('%Y%m%d', @DS_END_DATE)
AND segment_id = 1
AND keyword_group_hash = FARM_FINGERPRINT('- All Keywords -')
AND brand_id = 375923618976250
),
-- this doesn't fill null on days where there was no keyword data
distinct_keywords_by_requested AS (
SELECT DISTINCT requested, keyword_id FROM raw_data
),
distinct_keywords AS (
SELECT DISTINCT keyword_id FROM raw_data
),
distinct_requested AS (
SELECT DISTINCT requested FROM raw_data
),
-- used for pagination
latest_requested AS (
SELECT
MAX(requested) AS requested
FROM raw_data
),
earliest_requested AS (
SELECT
MIN(requested) AS requested
FROM raw_data
),
requested_samples AS (
SELECT * FROM earliest_requested
UNION DISTINCT
SELECT * FROM latest_requested
),
all_keywords_by_requested AS (
SELECT
keyword_id,
requested,
FROM distinct_keywords
CROSS JOIN distinct_requested
),
-- first fill forwards, then backfill as necessary
all_rankings_fill_null AS (
select
a.keyword_id,
a.requested,
IFNULL(d.requested, IFNULL(
LAST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
FIRST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
)) as data_from_requested,
from all_keywords_by_requested a
left join distinct_keywords_by_requested d using (keyword_id, requested)
),
-- this has to run before expanding brands/properties
fill_null_data AS (
SELECT
a.keyword_id,
a.requested,
a.data_from_requested,
1 AS keyword_count,
r.* EXCEPT (keyword_id, requested)
FROM all_rankings_fill_null a
LEFT JOIN raw_data r ON a.keyword_id=r.keyword_id AND a.data_from_requested=r.requested
),
per_serp_metrics AS (
SELECT
keyword_id AS keyword_id,
requested AS requested,
-- keyword counts
ANY_VALUE(data_from_requested) AS data_from_requested,
ANY_VALUE(keyword_count) AS keyword_count,
ANY_VALUE(summary_metrics__estimated_traffic__sum__total) as summary_metrics__estimated_traffic__sum__total,
IFNULL(MIN(result__rank), 101) AS top_rank,
IFNULL(MIN(result__measurements__pixels_from_top), 30000) AS top_pixels_from_top,
IFNULL(SUM(result__nozzle_metrics__click_through_rate), 0) AS click_through_rate,
SUM(result__nozzle_metrics__estimated_traffic) AS estimated_traffic__sum__total,
SUM(result__nozzle_metrics__ppc_value) AS ppc_value__sum__total,
HLL_COUNT.INIT(keyword_id, 15) AS unique_keywords__count__total,
HLL_COUNT.INIT(result__url__url_id, 15) AS unique_urls__count__total,
COUNTIF(result__rank IS NOT NULL) AS unique_results__count__total,
ANY_VALUE(keyword_metrics__adwords_search_volume) AS adwords_search_volume__sum__total,
IF(requested=(SELECT requested FROM latest_requested), ARRAY_AGG(STRUCT(
-- ranking data
result__page,
result__rank,
result__paid_adjusted_rank,
result__item_rank,
-- visibility
result__measurements__is_visible,
-- result pixel measurement information
result__measurements__pixels_from_top,
result__measurements__pixels_from_left,
result__measurements__pixel_height,
result__measurements__pixel_width,
result__measurements__pixel_height_viewport,
result__measurements__pixel_width_viewport,
-- calculated metrics
result__nozzle_metrics__click_through_rate,
result__nozzle_metrics__estimated_traffic,
result__nozzle_metrics__ppc_value,
-- common metrics
result__price__price,
result__rating__normalized,
result__review__review_count,
-- result data
result__url__domain,
result__url__domain_id,
result__url__url,
result__url__url_id,
result__title__text
) ORDER BY result__rank, result__item_rank)[SAFE_OFFSET(0)], NULL) AS top_ranking_result,
FROM fill_null_data
JOIN requested_samples USING (requested)
GROUP BY keyword_id, requested
),
-- get the average of all the minimum values
per_serp_metrics_aggregation AS (
SELECT
keyword_id,
requested,
-- keyword counts
COUNTIF(requested = data_from_requested) AS actual_keyword_count,
ANY_VALUE(keyword_count) AS keyword_count,
APPROX_QUANTILES(top_rank, 4) AS top_rank__quantiles,
(SUM(top_rank) + (ANY_VALUE(keyword_count) - COUNT(DISTINCT keyword_id)) * 101) / ANY_VALUE(keyword_count) AS top_rank__avg__total,
APPROX_QUANTILES(top_pixels_from_top, 4) AS top_pixels_from_top__quantiles,
(SUM(top_pixels_from_top) + (ANY_VALUE(keyword_count) - COUNT(DISTINCT keyword_id)) * 30000) / ANY_VALUE(keyword_count) AS top_pixels_from_top__avg__total,
APPROX_QUANTILES(click_through_rate, 4) AS click_through_rate__quantiles,
SUM(click_through_rate)/ANY_VALUE(keyword_count) AS click_through_rate__avg__total,
SUM(estimated_traffic__sum__total) AS estimated_traffic__sum__total,
SUM(ppc_value__sum__total) AS ppc_value__sum__total,
HLL_COUNT.MERGE(unique_keywords__count__total) AS unique_keywords__count__total,
HLL_COUNT.MERGE(unique_urls__count__total) AS unique_urls__count__total,
SUM(unique_results__count__total) AS unique_results__count__total,
SUM(adwords_search_volume__sum__total) AS adwords_search_volume__sum__total,
ANY_VALUE(top_ranking_result) AS top_ranking_result,
ANY_VALUE(summary_metrics__estimated_traffic__sum__total) as summary_metrics__estimated_traffic__sum__total,
FROM per_serp_metrics
GROUP BY keyword_id, requested
),
-- calculate min metrics per keyword_id, group_by, and requested
group_by_totals AS (
SELECT
requested,
EXTRACT(WEEK FROM requested) as requested_week,
keyword_id,
keyword_count,
top_rank__avg__total AS top_rank__avg__total__value,
top_rank__avg__total - FIRST_VALUE(top_rank__avg__total) OVER (change) AS top_rank__avg__total__change,
top_pixels_from_top__avg__total AS top_pixels_from_top__avg__total__value,
top_pixels_from_top__avg__total - FIRST_VALUE(top_pixels_from_top__avg__total) OVER (change) AS top_pixels_from_top__avg__total__change,
click_through_rate__avg__total AS click_through_rate__avg__total__value,
click_through_rate__avg__total - FIRST_VALUE(click_through_rate__avg__total) OVER (change) AS click_through_rate__avg__total__change,
estimated_traffic__sum__total AS estimated_traffic__sum__total__value,
estimated_traffic__sum__total - FIRST_VALUE(estimated_traffic__sum__total) OVER (change) AS estimated_traffic__sum__total__change,
ppc_value__sum__total AS ppc_value__sum__total__value,
ppc_value__sum__total - FIRST_VALUE(ppc_value__sum__total) OVER (change) AS ppc_value__sum__total__change,
unique_keywords__count__total AS unique_keywords__count__total__value,
unique_keywords__count__total - FIRST_VALUE(unique_keywords__count__total) OVER (change) AS unique_keywords__count__total__change,
unique_urls__count__total AS unique_urls__count__total__value,
unique_urls__count__total - FIRST_VALUE(unique_urls__count__total) OVER (change) AS unique_urls__count__total__change,
unique_results__count__total AS unique_results__count__total__value,
unique_results__count__total - FIRST_VALUE(unique_results__count__total) OVER (change) AS unique_results__count__total__change,
adwords_search_volume__sum__total AS adwords_search_volume__sum__total__value,
adwords_search_volume__sum__total - FIRST_VALUE(adwords_search_volume__sum__total) OVER (change) AS adwords_search_volume__sum__total__change,
summary_metrics__estimated_traffic__sum__total AS summary_metrics__estimated_traffic__sum__total__value,
summary_metrics__estimated_traffic__sum__total - FIRST_VALUE(summary_metrics__estimated_traffic__sum__total) OVER (change) AS summary_metrics__estimated_traffic__sum__total__change,
top_ranking_result,
FROM per_serp_metrics_aggregation
WINDOW change AS (PARTITION BY keyword_id ORDER BY requested)
)
SELECT * FROM group_by_totals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment