Last active
June 1, 2024 10:03
-
-
Save nicobrx/49bd948aa093e776ee89df563c790c49 to your computer and use it in GitHub Desktop.
Gets a page_value metric from GA4 data in BigQuery that is similar to the Universal Analytics Page Value metric: https://support.google.com/analytics/answer/2695658 To use the query, replace the table references in lines 15 and 26 with your GA4 events table. Make sure to replace the table's date suffix with '*' so the query runs across date part…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- set dates here so we don't have to update them in multiple places | |
with dates as ( | |
select | |
'20230812' as start_date, | |
-- the next line gets yesterday | |
format_date('%Y%m%d',date_sub(current_date(), interval 1 day)) as end_date | |
), | |
-- get all events with event values | |
p as ( | |
select | |
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as session_id, | |
event_timestamp, | |
sum(event_value_in_usd) as event_value | |
from `your-gcp-project.your-ga4-dataset_123456.events_*` | |
where _table_suffix between (select start_date from dates) and (select end_date from dates) | |
and event_value_in_usd is not null | |
group by session_id, event_timestamp), | |
-- get all page_view events | |
q as ( | |
select | |
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as session_id, | |
event_timestamp, | |
(select value.string_value from unnest(event_params) where key = 'page_location') as page_location | |
from `your-gcp-project.your-ga4-dataset_123456.events_*` | |
where _table_suffix between (select start_date from dates) and (select end_date from dates) | |
and event_name = 'page_view' | |
), | |
-- join event values to page views - this inflates revenue but we'll come back later and | |
-- divide the values by the number of page views in a given session that occurred before | |
-- the revenue event | |
-- the pseudo_session_id is there to handle cases where multiple revenue events happen in | |
-- the same session - it effectively turns the session into multiple sessions so revenue is | |
-- attributed to page views appropriately | |
r as ( | |
select | |
concat(q.session_id,p.event_timestamp) as pseudo_session_id, | |
p.event_value, | |
q.page_location, | |
(case when (q.event_timestamp < p.event_timestamp) then true else false end) as is_before_revenue_event | |
from q | |
full outer join p on q.session_id = p.session_id | |
order by q.session_id), | |
-- calculate page value, but only for page views that happen before revenue event | |
-- need to do it this way so we only attribute revenue to those pages | |
s as ( | |
select | |
pseudo_session_id, | |
page_location, | |
event_value / (count(page_location) over (partition by pseudo_session_id)) as page_revenue | |
from r | |
where is_before_revenue_event), | |
-- sum page revenue by page | |
t as ( | |
select | |
page_location, | |
sum(page_revenue) as page_revenue | |
from s | |
group by page_location), | |
-- get all page views with counts | |
u as (select | |
page_location, | |
count(page_location) as views | |
from q | |
group by page_location) | |
-- join the previous two queries to get page value - boom! | |
select | |
u.page_location, | |
u.views, | |
t.page_revenue/u.views as page_value | |
from u | |
left join t on u.page_location = t.page_location | |
order by views desc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment