Skip to content

Instantly share code, notes, and snippets.

@tdsmith
Created December 2, 2020 18:50
Show Gist options
  • Save tdsmith/97038b14bb36851086a28b01ecd52a87 to your computer and use it in GitHub Desktop.
Save tdsmith/97038b14bb36851086a28b01ecd52a87 to your computer and use it in GitHub Desktop.
CREATE TEMPORARY TABLE enrollments AS (
WITH analysis_windows AS (
(SELECT 0 AS analysis_window_start, 6 AS analysis_window_end)
),
raw_enrollments AS (
SELECT
e.client_id,
`mozfun.map.get_key`(e.event_map_values, 'branch')
AS branch,
min(e.submission_date) AS enrollment_date,
count(e.submission_date) AS num_enrollment_events
FROM
`moz-fx-data-shared-prod.telemetry.events` e
WHERE
e.event_category = 'normandy'
AND e.event_method = 'enroll'
AND e.submission_date
BETWEEN '2020-10-27' AND '2020-11-10'
AND e.event_string_value = 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
GROUP BY e.client_id, branch
),
segmented_enrollments AS (
SELECT
raw_enrollments.*,
ds_0.regular_users_v3,
ds_0.new_or_resurrected_v3,
ds_1.tcp_conn_high_latency,
ds_1.tcp_conn_med_latency,
ds_1.tcp_conn_low_latency,
ds_1.other_tcp_conn_latency
FROM raw_enrollments
LEFT JOIN (
SELECT
e.client_id,
COALESCE(LOGICAL_OR(is_regular_user_v3), FALSE) AS regular_users_v3,
LOGICAL_OR(COALESCE(is_new_or_resurrected_v3, TRUE)) AS new_or_resurrected_v3
FROM raw_enrollments e
LEFT JOIN `moz-fx-data-shared-prod.telemetry.clients_last_seen` ds
ON ds.client_id = e.client_id
AND ds.submission_date BETWEEN
DATE_ADD('2020-10-27', interval 0 day)
AND DATE_ADD('2020-11-10', interval 0 day)
AND ds.submission_date BETWEEN
DATE_ADD(e.enrollment_date, interval 0 day)
AND DATE_ADD(e.enrollment_date, interval 0 day)
GROUP BY e.client_id
) ds_0 USING (client_id)
LEFT JOIN (
SELECT
e.client_id,
mozfun.hist.percentiles(mozfun.hist.merge(ARRAY_AGG(tcp_conn_ms)), [0.5])[SAFE_OFFSET(0)].value >= 255 AS tcp_conn_high_latency,
mozfun.hist.percentiles(mozfun.hist.merge(ARRAY_AGG(tcp_conn_ms)), [0.5])[SAFE_OFFSET(0)].value BETWEEN 63 AND 245 AS tcp_conn_med_latency,
mozfun.hist.percentiles(mozfun.hist.merge(ARRAY_AGG(tcp_conn_ms)), [0.5])[SAFE_OFFSET(0)].value BETWEEN 1 AND 62 AS tcp_conn_low_latency,
mozfun.hist.merge(ARRAY_AGG(tcp_conn_ms)) IS NULL OR mozfun.hist.percentiles(mozfun.hist.merge(ARRAY_AGG(tcp_conn_ms)), [0.5])[SAFE_OFFSET(0)].value <= 0 AS other_tcp_conn_latency
FROM raw_enrollments e
LEFT JOIN (
SELECT
DATE(submission_timestamp) AS submission_date,
client_id,
mozfun.hist.merge(ARRAY_AGG(mozfun.hist.extract(payload.histograms.HTTP_PAGE_TCP_CONNECTION_2))) AS tcp_conn_ms,
FROM `moz-fx-data-shared-prod.telemetry.main`
WHERE
DATE(submission_timestamp) BETWEEN DATE_SUB("2020-10-27", INTERVAL 8 DAY) AND "2020-11-10"
AND normalized_channel = 'release'
GROUP BY submission_date, client_id
) ds
ON ds.client_id = e.client_id
AND ds.submission_date BETWEEN
DATE_ADD('2020-10-27', interval 0 day)
AND DATE_ADD('2020-11-10', interval 0 day)
AND ds.submission_date BETWEEN
DATE_ADD(e.enrollment_date, interval 0 day)
AND DATE_ADD(e.enrollment_date, interval 0 day)
GROUP BY e.client_id
) ds_1 USING (client_id)
)
SELECT
e.*,
aw.*
FROM segmented_enrollments e
CROSS JOIN analysis_windows aw
);
CREATE OR REPLACE TABLE bug_1671484_pref_validation_of_relpreload_performance_impact_release_82_83_overall_1 AS (
SELECT
enrollments.*,
ds_0.tagged_follow_on_search_count,
ds_0.separate_search_engine,
ds_0.searches_with_ads,
ds_0.tagged_search_count,
ds_0.search_count,
ds_0.organic_search_count,
ds_0.ad_clicks,
ds_1.load_input_event_response_no_preload_ms,
ds_1.time_to_load_event_start_preload_ms,
ds_1.time_to_load_event_end_no_preload_ms,
ds_1.time_to_load_event_start_no_preload_ms,
ds_1.time_to_load_event_end_preload_ms,
ds_1.load_input_event_response_preload_ms,
ds_1.main_has_contradictory_branch,
ds_1.main_has_non_enrolled_data,
ds_2.uri_count,
ds_2.active_hours,
ds_2.clients_daily_has_contradictory_branch,
ds_2.clients_daily_has_non_enrolled_data
FROM enrollments
LEFT JOIN (
SELECT
e.client_id,
e.analysis_window_start,
e.analysis_window_end,
COALESCE(SUM(tagged_follow_on), 0) AS tagged_follow_on_search_count,
COALESCE(ANY_VALUE(default_search_engine != default_private_search_engine), false) AS separate_search_engine,
COALESCE(SUM(search_with_ads), 0) AS searches_with_ads,
COALESCE(SUM(tagged_sap), 0) AS tagged_search_count,
COALESCE(SUM(sap), 0) AS search_count,
COALESCE(SUM(organic), 0) AS organic_search_count,
COALESCE(SUM(ad_click), 0) AS ad_clicks
FROM enrollments e
LEFT JOIN `moz-fx-data-shared-prod.search.search_clients_daily` ds
ON ds.client_id = e.client_id
AND ds.submission_date BETWEEN '2020-10-27' AND '2020-11-16'
AND ds.submission_date BETWEEN
DATE_ADD(e.enrollment_date, interval e.analysis_window_start day)
AND DATE_ADD(e.enrollment_date, interval e.analysis_window_end day)
GROUP BY e.client_id, e.analysis_window_start, e.analysis_window_end
) ds_0 USING (client_id, analysis_window_start, analysis_window_end)
LEFT JOIN (
SELECT
e.client_id,
e.analysis_window_start,
e.analysis_window_end,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.load_input_event_response_no_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.load_input_event_response_no_preload_ms).values)))
) AS load_input_event_response_no_preload_ms,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.time_to_load_event_start_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.time_to_load_event_start_preload_ms).values)))
) AS time_to_load_event_start_preload_ms,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.time_to_load_event_end_no_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.time_to_load_event_end_no_preload_ms).values)))
) AS time_to_load_event_end_no_preload_ms,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.time_to_load_event_start_no_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.time_to_load_event_start_no_preload_ms).values)))
) AS time_to_load_event_start_no_preload_ms,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.time_to_load_event_end_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.time_to_load_event_end_preload_ms).values)))
) AS time_to_load_event_end_preload_ms,
SAFE_DIVIDE(
SUM(CAST(JSON_EXTRACT_SCALAR(payload.histograms.load_input_event_response_preload_ms, "$.sum") AS int64)),
SUM((SELECT SUM(value) FROM UNNEST(`moz-fx-data-shared-prod`.udf.json_extract_histogram(payload.histograms.load_input_event_response_preload_ms).values)))
) AS load_input_event_response_preload_ms,
COALESCE(LOGICAL_OR(`mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
).branch != e.branch), FALSE) AS main_has_contradictory_branch,
COALESCE(LOGICAL_OR(`mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
).branch IS NULL), FALSE) AS main_has_non_enrolled_data
FROM enrollments e
LEFT JOIN (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
environment.experiments
FROM `moz-fx-data-shared-prod`.telemetry.main
) ds
ON ds.client_id = e.client_id
AND ds.submission_date BETWEEN '2020-10-27' AND '2020-11-16'
AND ds.submission_date BETWEEN
DATE_ADD(e.enrollment_date, interval e.analysis_window_start day)
AND DATE_ADD(e.enrollment_date, interval e.analysis_window_end day)
AND (
ds.submission_date != e.enrollment_date
OR `mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
).branch IS NOT NULL
)
GROUP BY e.client_id, e.analysis_window_start, e.analysis_window_end
) ds_1 USING (client_id, analysis_window_start, analysis_window_end)
LEFT JOIN (
SELECT
e.client_id,
e.analysis_window_start,
e.analysis_window_end,
COALESCE(SUM(scalar_parent_browser_engagement_total_uri_count_sum), 0) AS uri_count,
COALESCE(SUM(active_hours_sum), 0) AS active_hours,
COALESCE(LOGICAL_OR(`mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
) != e.branch), FALSE) AS clients_daily_has_contradictory_branch,
COALESCE(LOGICAL_OR(`mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
) IS NULL), FALSE) AS clients_daily_has_non_enrolled_data
FROM enrollments e
LEFT JOIN `moz-fx-data-shared-prod.telemetry.clients_daily` ds
ON ds.client_id = e.client_id
AND ds.submission_date BETWEEN '2020-10-27' AND '2020-11-16'
AND ds.submission_date BETWEEN
DATE_ADD(e.enrollment_date, interval e.analysis_window_start day)
AND DATE_ADD(e.enrollment_date, interval e.analysis_window_end day)
AND (
ds.submission_date != e.enrollment_date
OR `mozfun.map.get_key`(
ds.experiments, 'bug-1671484-pref-validation-of-relpreload-performance-impact-release-82-83'
) IS NOT NULL
)
GROUP BY e.client_id, e.analysis_window_start, e.analysis_window_end
) ds_2 USING (client_id, analysis_window_start, analysis_window_end)
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment