Skip to content

Instantly share code, notes, and snippets.

@nicobrx
Last active June 1, 2024 10:03
Show Gist options
  • Save nicobrx/49bd948aa093e776ee89df563c790c49 to your computer and use it in GitHub Desktop.
Save nicobrx/49bd948aa093e776ee89df563c790c49 to your computer and use it in GitHub Desktop.
Gets a page_value metric from GA4 data in BigQuery that is similar to the Universal Analytics Page Value metric: https://support.google.com/analytics/answer/2695658 To use the query, replace the table references in lines 15 and 26 with your GA4 events table. Make sure to replace the table's date suffix with '*' so the query runs across date part…
-- set dates here so we don't have to update them in multiple places
with dates as (
select
'20230812' as start_date,
-- the next line gets yesterday
format_date('%Y%m%d',date_sub(current_date(), interval 1 day)) as end_date
),
-- get all events with event values
p as (
select
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as session_id,
event_timestamp,
sum(event_value_in_usd) as event_value
from `your-gcp-project.your-ga4-dataset_123456.events_*`
where _table_suffix between (select start_date from dates) and (select end_date from dates)
and event_value_in_usd is not null
group by session_id, event_timestamp),
-- get all page_view events
q as (
select
concat(user_pseudo_id,(select value.int_value from unnest(event_params) where key = 'ga_session_id')) as session_id,
event_timestamp,
(select value.string_value from unnest(event_params) where key = 'page_location') as page_location
from `your-gcp-project.your-ga4-dataset_123456.events_*`
where _table_suffix between (select start_date from dates) and (select end_date from dates)
and event_name = 'page_view'
),
-- join event values to page views - this inflates revenue but we'll come back later and
-- divide the values by the number of page views in a given session that occurred before
-- the revenue event
-- the pseudo_session_id is there to handle cases where multiple revenue events happen in
-- the same session - it effectively turns the session into multiple sessions so revenue is
-- attributed to page views appropriately
r as (
select
concat(q.session_id,p.event_timestamp) as pseudo_session_id,
p.event_value,
q.page_location,
(case when (q.event_timestamp < p.event_timestamp) then true else false end) as is_before_revenue_event
from q
full outer join p on q.session_id = p.session_id
order by q.session_id),
-- calculate page value, but only for page views that happen before revenue event
-- need to do it this way so we only attribute revenue to those pages
s as (
select
pseudo_session_id,
page_location,
event_value / (count(page_location) over (partition by pseudo_session_id)) as page_revenue
from r
where is_before_revenue_event),
-- sum page revenue by page
t as (
select
page_location,
sum(page_revenue) as page_revenue
from s
group by page_location),
-- get all page views with counts
u as (select
page_location,
count(page_location) as views
from q
group by page_location)
-- join the previous two queries to get page value - boom!
select
u.page_location,
u.views,
t.page_revenue/u.views as page_value
from u
left join t on u.page_location = t.page_location
order by views desc
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment