Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save admariner/d53916804bc24fd5ead2985d57934450 to your computer and use it in GitHub Desktop.
Save admariner/d53916804bc24fd5ead2985d57934450 to your computer and use it in GitHub Desktop.
Define custom Channel Groupings in a reusable "User Defined Function"(UDF) to make your life easier when working with Google Analytics 4 data in BigQuery. Full article on stacktonic.com
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as (
case
when (tsource = 'direct' or tsource is null)
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null)
then 'direct'
when regexp_contains(campaign, r'^(.*shop.*)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'shopping_paid'
when regexp_contains(tsource, r'^(google|bing)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'search_paid'
when regexp_contains(tsource, r'^(twitter|facebook|fb|instagram|ig|linkedin|pinterest)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*|social_paid)$')
then 'social_paid'
when regexp_contains(tsource, r'^(youtube)$')
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'video_paid'
when regexp_contains(medium, r'^(display|banner|expandable|interstitial|cpm)$')
then 'display'
when regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$')
then 'other_paid'
when regexp_contains(medium, r'^(.*shop.*)$')
then 'shopping_organic'
when regexp_contains(tsource, r'^.*(twitter|t\.co|facebook|instagram|linkedin|lnkd\.in|pinterest).*')
or regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media|sm|social-unpaid|social_unpaid)$')
then 'social_organic'
when regexp_contains(medium, r'^(.*video.*)$')
then 'video_organic'
when regexp_contains(tsource, r'^(google|bing|yahoo|baidu|duckduckgo|yandex|ask)$')
or medium = 'organic'
then 'search_organic'
when regexp_contains(tsource, r'^(email|mail|e-mail|e_mail|e mail|mail\.google\.com)$')
or regexp_contains(medium, r'^(email|mail|e-mail|e_mail|e mail)$')
then 'email'
when regexp_contains(medium, r'^(affiliate|affiliates)$')
then 'affiliate'
when medium = 'referral'
then 'referral'
when medium = 'audio'
then 'audio'
when medium = 'sms'
then 'sms'
when ends_with(medium, 'push')
or regexp_contains(medium, r'.*(mobile|notification).*')
then 'mobile_push'
else '(other)'
end
);
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as (
case
when (tsource = '(direct)' or tsource is null)
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null)
then 'direct'
when regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media)$')
then 'social'
when regexp_contains(medium, r'^(email|mail)$')
then 'email'
when regexp_contains(medium, r'^(affiliate|affiliates)$')
then 'affiliate'
when regexp_contains(medium, r'^(cpc|ppc|paidsearch)$')
then 'search_paid'
when regexp_contains(medium, r'^(display|cpm|banner)$')
then 'display'
when medium = 'organic'
then 'search_organic'
when medium = 'referral'
then 'referral'
else '(other)'
end
);
#standardSQL
-- Author: Krisjan Oldekamp
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function
with
-- select session in last 30 days
sessions as (
select
user_pseudo_id as ga_client_id,
concat(user_pseudo_id,'.',(select cast(value.int_value as string) from unnest(event_params) where key = 'ga_session_id')) as session_id, -- combine user_pseudo_id and session_id for a unique session-id
timestamp_micros(min(event_timestamp)) as session_start,
array_agg(
if(event_name in('page_view','user_engagement','scroll'), struct(
event_timestamp,
lower((select value.string_value from unnest(event_params) where key = 'source')) as source,
lower((select value.string_value from unnest(event_params) where key = 'medium')) as medium,
lower((select value.string_value from unnest(event_params) where key = 'campaign')) as campaign,
(select value.int_value from unnest(event_params) where key = 'entrances') as is_entrance,
(select value.int_value from unnest(event_params) where key = 'ignore_referrer') as ignore_referrer
), null)
ignore nulls) as channels_in_session,
countif(event_name = 'purchase') as conversions,
sum(ecommerce.purchase_revenue) as conversion_value
from
`<your-project>.analytics_<your-dataset>.events_*`
where
_table_suffix between
format_date('%Y%m%d', date_sub(current_date(), interval 30 day))
and format_date('%Y%m%d', date_sub(current_date(), interval 1 day))
group by
user_pseudo_id,
session_id
),
-- get first campaign parameters from session and aggegrated metrics
traffic_acquisition as (
select
(select t.source from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as source,
(select t.medium from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as medium,
count(distinct session_id) as sessions,
sum(conversions) as conversions,
ifnull(sum(conversion_value), 0) as conversion_value
from
sessions
group by
1, 2
)
-- map source / medium to channel grouping using a user defined function (ignore campaign)
select
*,
<your-dataset>.channel_grouping(source, medium, null) as channel_grouping
from
traffic_acquisition
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment