-
-
Save krisjan-oldekamp/bad52a06062b09729434ab65091b5393 to your computer and use it in GitHub Desktop.
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as ( | |
case | |
when (tsource = 'direct' or tsource is null) | |
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null) | |
then 'direct' | |
when regexp_contains(campaign, r'^(.*shop.*)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'shopping_paid' | |
when regexp_contains(tsource, r'^(google|bing)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'search_paid' | |
when regexp_contains(tsource, r'^(twitter|facebook|fb|instagram|ig|linkedin|pinterest)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*|social_paid)$') | |
then 'social_paid' | |
when regexp_contains(tsource, r'^(youtube)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'video_paid' | |
when regexp_contains(medium, r'^(display|banner|expandable|interstitial|cpm)$') | |
then 'display' | |
when regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'other_paid' | |
when regexp_contains(medium, r'^(.*shop.*)$') | |
then 'shopping_organic' | |
when regexp_contains(tsource, r'^.*(twitter|t\.co|facebook|instagram|linkedin|lnkd\.in|pinterest).*') | |
or regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media|sm|social-unpaid|social_unpaid)$') | |
then 'social_organic' | |
when regexp_contains(medium, r'^(.*video.*)$') | |
then 'video_organic' | |
when regexp_contains(tsource, r'^(google|bing|yahoo|baidu|duckduckgo|yandex|ask)$') | |
or medium = 'organic' | |
then 'search_organic' | |
when regexp_contains(tsource, r'^(email|mail|e-mail|e_mail|e mail|mail\.google\.com)$') | |
or regexp_contains(medium, r'^(email|mail|e-mail|e_mail|e mail)$') | |
then 'email' | |
when regexp_contains(medium, r'^(affiliate|affiliates)$') | |
then 'affiliate' | |
when medium = 'referral' | |
then 'referral' | |
when medium = 'audio' | |
then 'audio' | |
when medium = 'sms' | |
then 'sms' | |
when ends_with(medium, 'push') | |
or regexp_contains(medium, r'.*(mobile|notification).*') | |
then 'mobile_push' | |
else '(other)' | |
end | |
); |
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as ( | |
case | |
when (tsource = '(direct)' or tsource is null) | |
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null) | |
then 'direct' | |
when regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media)$') | |
then 'social' | |
when regexp_contains(medium, r'^(email|mail)$') | |
then 'email' | |
when regexp_contains(medium, r'^(affiliate|affiliates)$') | |
then 'affiliate' | |
when regexp_contains(medium, r'^(cpc|ppc|paidsearch)$') | |
then 'search_paid' | |
when regexp_contains(medium, r'^(display|cpm|banner)$') | |
then 'display' | |
when medium = 'organic' | |
then 'search_organic' | |
when medium = 'referral' | |
then 'referral' | |
else '(other)' | |
end | |
); |
#standardSQL | |
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
with | |
-- select session in last 30 days | |
sessions as ( | |
select | |
user_pseudo_id as ga_client_id, | |
concat(user_pseudo_id,'.',(select cast(value.int_value as string) from unnest(event_params) where key = 'ga_session_id')) as session_id, -- combine user_pseudo_id and session_id for a unique session-id | |
timestamp_micros(min(event_timestamp)) as session_start, | |
array_agg( | |
if(event_name in('page_view','user_engagement','scroll'), struct( | |
event_timestamp, | |
lower((select value.string_value from unnest(event_params) where key = 'source')) as source, | |
lower((select value.string_value from unnest(event_params) where key = 'medium')) as medium, | |
lower((select value.string_value from unnest(event_params) where key = 'campaign')) as campaign, | |
(select value.int_value from unnest(event_params) where key = 'entrances') as is_entrance, | |
(select value.int_value from unnest(event_params) where key = 'ignore_referrer') as ignore_referrer | |
), null) | |
ignore nulls) as channels_in_session, | |
countif(event_name = 'purchase') as conversions, | |
sum(ecommerce.purchase_revenue) as conversion_value | |
from | |
`<your-project>.analytics_<your-dataset>.events_*` | |
where | |
_table_suffix between | |
format_date('%Y%m%d', date_sub(current_date(), interval 30 day)) | |
and format_date('%Y%m%d', date_sub(current_date(), interval 1 day)) | |
group by | |
user_pseudo_id, | |
session_id | |
), | |
-- get first campaign parameters from session and aggegrated metrics | |
traffic_acquisition as ( | |
select | |
(select t.source from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as source, | |
(select t.medium from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as medium, | |
count(distinct session_id) as sessions, | |
sum(conversions) as conversions, | |
ifnull(sum(conversion_value), 0) as conversion_value | |
from | |
sessions | |
group by | |
1, 2 | |
) | |
-- map source / medium to channel grouping using a user defined function (ignore campaign) | |
select | |
*, | |
<your-dataset>.channel_grouping(source, medium, null) as channel_grouping | |
from | |
traffic_acquisition | |
Sometimes BigQuery seems to groups the results, are all the 3379 rows in the first table set to "direct" or only just the first results? + maybe a good idea to double check if the rules in the default channel groupings should be adjusted to your specific situation (I see a lot of "other_paid" sessions for example, probably social_paid or search_paid something like that is in there.
Sadly even at the last page it's all "direct". But it must indeed have something to do with our dataset because running the query on the ga4 sample data works perfectly fine. I'll check what adjustments can be made for the channel grouping in our situation.
Thanks for the code share. Have you also looked at saying if user comes to Homepage via Organic then it's Brand Organic Search or Non Brand Organic Search?. Is that just an if statement so if true or false
Hello,
Can the updated default channel grouping definition be the reason we're getting only 'direct' in the paths :
Even though when we test the function there are plenty of different channel grouping ?
(The conversion metrics is ajusted in out main query)