Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save krisjan-oldekamp/bad52a06062b09729434ab65091b5393 to your computer and use it in GitHub Desktop.
Save krisjan-oldekamp/bad52a06062b09729434ab65091b5393 to your computer and use it in GitHub Desktop.
Define custom Channel Groupings in a reusable "User Defined Function"(UDF) to make your life easier when working with Google Analytics 4 data in BigQuery. Full article on stacktonic.com
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as (
case
when (tsource = 'direct' or tsource is null)
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null)
then 'direct'
when regexp_contains(campaign, r'^(.*shop.*)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'shopping_paid'
when regexp_contains(tsource, r'^(google|bing)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'search_paid'
when regexp_contains(tsource, r'^(twitter|facebook|fb|instagram|ig|linkedin|pinterest)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*|social_paid)$')
then 'social_paid'
when regexp_contains(tsource, r'^(youtube)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'video_paid'
when regexp_contains(medium, r'^(display|banner|expandable|interstitial|cpm)$')
then 'display'
when regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'other_paid'
when regexp_contains(medium, r'^(.*shop.*)$')
then 'shopping_organic'
when regexp_contains(tsource, r'^.*(twitter|t\.co|facebook|instagram|linkedin|lnkd\.in|pinterest).*')
or regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media|sm|social-unpaid|social_unpaid)$')
then 'social_organic'
when regexp_contains(medium, r'^(.*video.*)$')
then 'video_organic'
when regexp_contains(tsource, r'^(google|bing|yahoo|baidu|duckduckgo|yandex|ask)$')
or medium = 'organic'
then 'search_organic'
when regexp_contains(tsource, r'^(email|mail|e-mail|e_mail|e mail|mail\.google\.com)$')
or regexp_contains(medium, r'^(email|mail|e-mail|e_mail|e mail)$')
then 'email'
when regexp_contains(medium, r'^(affiliate|affiliates)$')
then 'affiliate'
when medium = 'referral'
then 'referral'
when medium = 'audio'
then 'audio'
when medium = 'sms'
then 'sms'
when ends_with(medium, 'push')
or regexp_contains(medium, r'.*(mobile|notification).*')
then 'mobile_push'
else '(other)'
end
);
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as (
case
when (tsource = '(direct)' or tsource is null)
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null)
then 'direct'
when regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media)$')
then 'social'
when regexp_contains(medium, r'^(email|mail)$')
then 'email'
when regexp_contains(medium, r'^(affiliate|affiliates)$')
then 'affiliate'
when regexp_contains(medium, r'^(cpc|ppc|paidsearch)$')
then 'search_paid'
when regexp_contains(medium, r'^(display|cpm|banner)$')
then 'display'
when medium = 'organic'
then 'search_organic'
when medium = 'referral'
then 'referral'
else '(other)'
end
);
#standardSQL
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
with
-- select session in last 30 days
sessions as (
select
user_pseudo_id as ga_client_id,
concat(user_pseudo_id,'.',(select cast(value.int_value as string) from unnest(event_params) where key = 'ga_session_id')) as session_id, -- combine user_pseudo_id and session_id for a unique session-id
timestamp_micros(min(event_timestamp)) as session_start,
array_agg(
if(event_name in('page_view','user_engagement','scroll'), struct(
event_timestamp,
lower((select value.string_value from unnest(event_params) where key = 'source')) as source,
lower((select value.string_value from unnest(event_params) where key = 'medium')) as medium,
lower((select value.string_value from unnest(event_params) where key = 'campaign')) as campaign,
(select value.int_value from unnest(event_params) where key = 'entrances') as is_entrance,
(select value.int_value from unnest(event_params) where key = 'ignore_referrer') as ignore_referrer
), null)
ignore nulls) as channels_in_session,
countif(event_name = 'purchase') as conversions,
sum(ecommerce.purchase_revenue) as conversion_value
from
`<your-project>.analytics_<your-dataset>.events_*`
where
_table_suffix between
format_date('%Y%m%d', date_sub(current_date(), interval 30 day))
and format_date('%Y%m%d', date_sub(current_date(), interval 1 day))
group by
user_pseudo_id,
session_id
),
-- get first campaign parameters from session and aggegrated metrics
traffic_acquisition as (
select
(select t.source from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as source,
(select t.medium from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as medium,
count(distinct session_id) as sessions,
sum(conversions) as conversions,
ifnull(sum(conversion_value), 0) as conversion_value
from
sessions
group by
1, 2
)
-- map source / medium to channel grouping using a user defined function (ignore campaign)
select
*,
<your-dataset>.channel_grouping(source, medium, null) as channel_grouping
from
traffic_acquisition
@mayrop
Copy link

mayrop commented Aug 10, 2022

Hello, Can the updated default channel grouping definition be the reason we're getting only 'direct' in the paths : image

Even though when we test the function there are plenty of different channel grouping ? image (The conversion metrics is ajusted in out main query)

how do you do that journey id?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment