Created
September 21, 2021 20:07
-
-
Save derekperkins/e2f912b352ec4863e5a6ff9b17d70d09 to your computer and use it in GitHub Desktop.
ClarityPR Nozzle Query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
raw_data AS ( | |
SELECT * EXCEPT (brand_properties, segments_by_keyword_groups, keyword_count) | |
FROM nozzledata.claritypr_safemoon.results_by_brand__beta | |
JOIN UNNEST(brand_properties) | |
JOIN UNNEST(segments_by_keyword_groups) | |
WHERE | |
requested >= PARSE_TIMESTAMP('%Y%m%d', @DS_START_DATE) AND requested <= PARSE_TIMESTAMP('%Y%m%d', @DS_END_DATE) | |
AND segment_id = 1 | |
AND keyword_group_hash = FARM_FINGERPRINT('- All Keywords -') | |
AND brand_id = 375923618976250 | |
), | |
-- this doesn't fill null on days where there was no keyword data | |
distinct_keywords_by_requested AS ( | |
SELECT DISTINCT requested, keyword_id FROM raw_data | |
), | |
distinct_keywords AS ( | |
SELECT DISTINCT keyword_id FROM raw_data | |
), | |
distinct_requested AS ( | |
SELECT DISTINCT requested FROM raw_data | |
), | |
-- used for pagination | |
latest_requested AS ( | |
SELECT | |
MAX(requested) AS requested | |
FROM raw_data | |
), | |
earliest_requested AS ( | |
SELECT | |
MIN(requested) AS requested | |
FROM raw_data | |
), | |
requested_samples AS ( | |
SELECT * FROM earliest_requested | |
UNION DISTINCT | |
SELECT * FROM latest_requested | |
), | |
all_keywords_by_requested AS ( | |
SELECT | |
keyword_id, | |
requested, | |
FROM distinct_keywords | |
CROSS JOIN distinct_requested | |
), | |
-- first fill forwards, then backfill as necessary | |
all_rankings_fill_null AS ( | |
select | |
a.keyword_id, | |
a.requested, | |
IFNULL(d.requested, IFNULL( | |
LAST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), | |
FIRST_VALUE(d.requested IGNORE NULLS) OVER (PARTITION BY keyword_id ORDER BY requested ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) | |
)) as data_from_requested, | |
from all_keywords_by_requested a | |
left join distinct_keywords_by_requested d using (keyword_id, requested) | |
), | |
-- this has to run before expanding brands/properties | |
fill_null_data AS ( | |
SELECT | |
a.keyword_id, | |
a.requested, | |
a.data_from_requested, | |
1 AS keyword_count, | |
r.* EXCEPT (keyword_id, requested) | |
FROM all_rankings_fill_null a | |
LEFT JOIN raw_data r ON a.keyword_id=r.keyword_id AND a.data_from_requested=r.requested | |
), | |
per_serp_metrics AS ( | |
SELECT | |
keyword_id AS keyword_id, | |
requested AS requested, | |
-- keyword counts | |
ANY_VALUE(data_from_requested) AS data_from_requested, | |
ANY_VALUE(keyword_count) AS keyword_count, | |
ANY_VALUE(summary_metrics__estimated_traffic__sum__total) as summary_metrics__estimated_traffic__sum__total, | |
IFNULL(MIN(result__rank), 101) AS top_rank, | |
IFNULL(MIN(result__measurements__pixels_from_top), 30000) AS top_pixels_from_top, | |
IFNULL(SUM(result__nozzle_metrics__click_through_rate), 0) AS click_through_rate, | |
SUM(result__nozzle_metrics__estimated_traffic) AS estimated_traffic__sum__total, | |
SUM(result__nozzle_metrics__ppc_value) AS ppc_value__sum__total, | |
HLL_COUNT.INIT(keyword_id, 15) AS unique_keywords__count__total, | |
HLL_COUNT.INIT(result__url__url_id, 15) AS unique_urls__count__total, | |
COUNTIF(result__rank IS NOT NULL) AS unique_results__count__total, | |
ANY_VALUE(keyword_metrics__adwords_search_volume) AS adwords_search_volume__sum__total, | |
IF(requested=(SELECT requested FROM latest_requested), ARRAY_AGG(STRUCT( | |
-- ranking data | |
result__page, | |
result__rank, | |
result__paid_adjusted_rank, | |
result__item_rank, | |
-- visibility | |
result__measurements__is_visible, | |
-- result pixel measurement information | |
result__measurements__pixels_from_top, | |
result__measurements__pixels_from_left, | |
result__measurements__pixel_height, | |
result__measurements__pixel_width, | |
result__measurements__pixel_height_viewport, | |
result__measurements__pixel_width_viewport, | |
-- calculated metrics | |
result__nozzle_metrics__click_through_rate, | |
result__nozzle_metrics__estimated_traffic, | |
result__nozzle_metrics__ppc_value, | |
-- common metrics | |
result__price__price, | |
result__rating__normalized, | |
result__review__review_count, | |
-- result data | |
result__url__domain, | |
result__url__domain_id, | |
result__url__url, | |
result__url__url_id, | |
result__title__text | |
) ORDER BY result__rank, result__item_rank)[SAFE_OFFSET(0)], NULL) AS top_ranking_result, | |
FROM fill_null_data | |
JOIN requested_samples USING (requested) | |
GROUP BY keyword_id, requested | |
), | |
-- get the average of all the minimum values | |
per_serp_metrics_aggregation AS ( | |
SELECT | |
keyword_id, | |
requested, | |
-- keyword counts | |
COUNTIF(requested = data_from_requested) AS actual_keyword_count, | |
ANY_VALUE(keyword_count) AS keyword_count, | |
APPROX_QUANTILES(top_rank, 4) AS top_rank__quantiles, | |
(SUM(top_rank) + (ANY_VALUE(keyword_count) - COUNT(DISTINCT keyword_id)) * 101) / ANY_VALUE(keyword_count) AS top_rank__avg__total, | |
APPROX_QUANTILES(top_pixels_from_top, 4) AS top_pixels_from_top__quantiles, | |
(SUM(top_pixels_from_top) + (ANY_VALUE(keyword_count) - COUNT(DISTINCT keyword_id)) * 30000) / ANY_VALUE(keyword_count) AS top_pixels_from_top__avg__total, | |
APPROX_QUANTILES(click_through_rate, 4) AS click_through_rate__quantiles, | |
SUM(click_through_rate)/ANY_VALUE(keyword_count) AS click_through_rate__avg__total, | |
SUM(estimated_traffic__sum__total) AS estimated_traffic__sum__total, | |
SUM(ppc_value__sum__total) AS ppc_value__sum__total, | |
HLL_COUNT.MERGE(unique_keywords__count__total) AS unique_keywords__count__total, | |
HLL_COUNT.MERGE(unique_urls__count__total) AS unique_urls__count__total, | |
SUM(unique_results__count__total) AS unique_results__count__total, | |
SUM(adwords_search_volume__sum__total) AS adwords_search_volume__sum__total, | |
ANY_VALUE(top_ranking_result) AS top_ranking_result, | |
ANY_VALUE(summary_metrics__estimated_traffic__sum__total) as summary_metrics__estimated_traffic__sum__total, | |
FROM per_serp_metrics | |
GROUP BY keyword_id, requested | |
), | |
-- calculate min metrics per keyword_id, group_by, and requested | |
group_by_totals AS ( | |
SELECT | |
requested, | |
EXTRACT(WEEK FROM requested) as requested_week, | |
keyword_id, | |
keyword_count, | |
top_rank__avg__total AS top_rank__avg__total__value, | |
top_rank__avg__total - FIRST_VALUE(top_rank__avg__total) OVER (change) AS top_rank__avg__total__change, | |
top_pixels_from_top__avg__total AS top_pixels_from_top__avg__total__value, | |
top_pixels_from_top__avg__total - FIRST_VALUE(top_pixels_from_top__avg__total) OVER (change) AS top_pixels_from_top__avg__total__change, | |
click_through_rate__avg__total AS click_through_rate__avg__total__value, | |
click_through_rate__avg__total - FIRST_VALUE(click_through_rate__avg__total) OVER (change) AS click_through_rate__avg__total__change, | |
estimated_traffic__sum__total AS estimated_traffic__sum__total__value, | |
estimated_traffic__sum__total - FIRST_VALUE(estimated_traffic__sum__total) OVER (change) AS estimated_traffic__sum__total__change, | |
ppc_value__sum__total AS ppc_value__sum__total__value, | |
ppc_value__sum__total - FIRST_VALUE(ppc_value__sum__total) OVER (change) AS ppc_value__sum__total__change, | |
unique_keywords__count__total AS unique_keywords__count__total__value, | |
unique_keywords__count__total - FIRST_VALUE(unique_keywords__count__total) OVER (change) AS unique_keywords__count__total__change, | |
unique_urls__count__total AS unique_urls__count__total__value, | |
unique_urls__count__total - FIRST_VALUE(unique_urls__count__total) OVER (change) AS unique_urls__count__total__change, | |
unique_results__count__total AS unique_results__count__total__value, | |
unique_results__count__total - FIRST_VALUE(unique_results__count__total) OVER (change) AS unique_results__count__total__change, | |
adwords_search_volume__sum__total AS adwords_search_volume__sum__total__value, | |
adwords_search_volume__sum__total - FIRST_VALUE(adwords_search_volume__sum__total) OVER (change) AS adwords_search_volume__sum__total__change, | |
summary_metrics__estimated_traffic__sum__total AS summary_metrics__estimated_traffic__sum__total__value, | |
summary_metrics__estimated_traffic__sum__total - FIRST_VALUE(summary_metrics__estimated_traffic__sum__total) OVER (change) AS summary_metrics__estimated_traffic__sum__total__change, | |
top_ranking_result, | |
FROM per_serp_metrics_aggregation | |
WINDOW change AS (PARTITION BY keyword_id ORDER BY requested) | |
) | |
SELECT * FROM group_by_totals |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment