Created
July 30, 2022 10:29
-
-
Save khunreus/d20e150f13dc7264d6abe06aeaa8707c to your computer and use it in GitHub Desktop.
The query pulls page_view event data from the Google BigQuery public dataset for ecommerce (GA4). Using standard UNNEST practice.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- pulling user page views from GA4 events | |
WITH base_table AS ( | |
SELECT | |
event_name, | |
event_date, | |
event_timestamp, | |
user_pseudo_id, | |
user_id, | |
device, | |
geo, | |
traffic_source, | |
event_params, | |
user_properties | |
FROM | |
`bigquery-public-data.ga4_obfuscated_sample_ecommerce.events_*` | |
WHERE | |
_table_suffix >= '20210101' | |
AND event_name IN ('page_view') | |
) | |
, unnested_events AS ( | |
SELECT | |
event_date AS date, | |
event_timestamp AS event_timestamp_microseconds, | |
user_pseudo_id, | |
MAX(CASE WHEN c.key = 'ga_session_id' THEN c.value.int_value END) AS visitID, | |
MAX(CASE WHEN c.key = 'ga_session_number' THEN c.value.int_value END) AS visitNumber, | |
MAX(CASE WHEN c.key = 'page_title' THEN c.value.string_value END) AS page_title | |
FROM | |
base_table, | |
UNNEST (event_params) c | |
GROUP BY 1,2,3 | |
) | |
, ranked_screens AS ( | |
SELECT | |
*, | |
DENSE_RANK() OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) page_rank, | |
LAG(page_title,1) OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) previous_page, | |
LEAD(page_title,1) OVER (PARTITION BY user_pseudo_id, visitID ORDER BY event_timestamp_microseconds ASC) next_page | |
FROM | |
unnested_events | |
) | |
, screen_summary AS ( | |
SELECT | |
*, | |
MAX(page_rank) OVER (PARTITION BY user_pseudo_id, visitID) last_page_rank, | |
ARRAY_AGG(page_title) OVER (PARTITION BY user_pseudo_id, visitID) pages_on_a_visit | |
FROM | |
ranked_screens | |
) | |
, screen_summary_agg AS ( | |
SELECT * EXCEPT(pages_on_a_visit), | |
ARRAY_TO_STRING(ARRAY(SELECT DISTINCT * FROM UNNEST(pages_on_a_visit) ORDER BY 1 ASC), '>>') AS screens_on_a_visit | |
FROM | |
screen_summary | |
) | |
SELECT | |
*, | |
MIN(page_rank) OVER (PARTITION BY user_pseudo_id, visitID, page_title) AS page_rank_session_first_seen | |
FROM | |
screen_summary_agg | |
--WHERE user_pseudo_id = '10273164.5274553015' | |
-- AND date = '20210106' | |
--ORDER BY 2 ASC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment