Forked from krisjan-oldekamp/google_analytics_bigquery_channel_grouping_function_advanced_ga4.sql
Created
November 11, 2023 00:27
-
-
Save admariner/d53916804bc24fd5ead2985d57934450 to your computer and use it in GitHub Desktop.
Define custom Channel Groupings in a reusable "User Defined Function"(UDF) to make your life easier when working with Google Analytics 4 data in BigQuery. Full article on stacktonic.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as ( | |
case | |
when (tsource = 'direct' or tsource is null) | |
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null) | |
then 'direct' | |
when regexp_contains(campaign, r'^(.*shop.*)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'shopping_paid' | |
when regexp_contains(tsource, r'^(google|bing)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'search_paid' | |
when regexp_contains(tsource, r'^(twitter|facebook|fb|instagram|ig|linkedin|pinterest)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*|social_paid)$') | |
then 'social_paid' | |
when regexp_contains(tsource, r'^(youtube)$') | |
and regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'video_paid' | |
when regexp_contains(medium, r'^(display|banner|expandable|interstitial|cpm)$') | |
then 'display' | |
when regexp_contains(medium, r'^(.*cp.*|ppc|paid.*)$') | |
then 'other_paid' | |
when regexp_contains(medium, r'^(.*shop.*)$') | |
then 'shopping_organic' | |
when regexp_contains(tsource, r'^.*(twitter|t\.co|facebook|instagram|linkedin|lnkd\.in|pinterest).*') | |
or regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media|sm|social-unpaid|social_unpaid)$') | |
then 'social_organic' | |
when regexp_contains(medium, r'^(.*video.*)$') | |
then 'video_organic' | |
when regexp_contains(tsource, r'^(google|bing|yahoo|baidu|duckduckgo|yandex|ask)$') | |
or medium = 'organic' | |
then 'search_organic' | |
when regexp_contains(tsource, r'^(email|mail|e-mail|e_mail|e mail|mail\.google\.com)$') | |
or regexp_contains(medium, r'^(email|mail|e-mail|e_mail|e mail)$') | |
then 'email' | |
when regexp_contains(medium, r'^(affiliate|affiliates)$') | |
then 'affiliate' | |
when medium = 'referral' | |
then 'referral' | |
when medium = 'audio' | |
then 'audio' | |
when medium = 'sms' | |
then 'sms' | |
when ends_with(medium, 'push') | |
or regexp_contains(medium, r'.*(mobile|notification).*') | |
then 'mobile_push' | |
else '(other)' | |
end | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
create or replace function `<your-project>.<your-dataset>.channel_grouping`(tsource string, medium string, campaign string) as ( | |
case | |
when (tsource = '(direct)' or tsource is null) | |
and (regexp_contains(medium, r'^(\(not set\)|\(none\))$') or medium is null) | |
then 'direct' | |
when regexp_contains(medium, r'^(social|social_advertising|social-advertising|social_network|social-network|social_media|social-media)$') | |
then 'social' | |
when regexp_contains(medium, r'^(email|mail)$') | |
then 'email' | |
when regexp_contains(medium, r'^(affiliate|affiliates)$') | |
then 'affiliate' | |
when regexp_contains(medium, r'^(cpc|ppc|paidsearch)$') | |
then 'search_paid' | |
when regexp_contains(medium, r'^(display|cpm|banner)$') | |
then 'display' | |
when medium = 'organic' | |
then 'search_organic' | |
when medium = 'referral' | |
then 'referral' | |
else '(other)' | |
end | |
); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#standardSQL | |
-- Author: Krisjan Oldekamp | |
-- https://stacktonic.com/article/google-analytics-4-and-big-query-create-custom-channel-groupings-in-a-reusable-sql-function | |
with | |
-- select session in last 30 days | |
sessions as ( | |
select | |
user_pseudo_id as ga_client_id, | |
concat(user_pseudo_id,'.',(select cast(value.int_value as string) from unnest(event_params) where key = 'ga_session_id')) as session_id, -- combine user_pseudo_id and session_id for a unique session-id | |
timestamp_micros(min(event_timestamp)) as session_start, | |
array_agg( | |
if(event_name in('page_view','user_engagement','scroll'), struct( | |
event_timestamp, | |
lower((select value.string_value from unnest(event_params) where key = 'source')) as source, | |
lower((select value.string_value from unnest(event_params) where key = 'medium')) as medium, | |
lower((select value.string_value from unnest(event_params) where key = 'campaign')) as campaign, | |
(select value.int_value from unnest(event_params) where key = 'entrances') as is_entrance, | |
(select value.int_value from unnest(event_params) where key = 'ignore_referrer') as ignore_referrer | |
), null) | |
ignore nulls) as channels_in_session, | |
countif(event_name = 'purchase') as conversions, | |
sum(ecommerce.purchase_revenue) as conversion_value | |
from | |
`<your-project>.analytics_<your-dataset>.events_*` | |
where | |
_table_suffix between | |
format_date('%Y%m%d', date_sub(current_date(), interval 30 day)) | |
and format_date('%Y%m%d', date_sub(current_date(), interval 1 day)) | |
group by | |
user_pseudo_id, | |
session_id | |
), | |
-- get first campaign parameters from session and aggegrated metrics | |
traffic_acquisition as ( | |
select | |
(select t.source from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as source, | |
(select t.medium from unnest(channels_in_session) as t where t.ignore_referrer is null order by t.event_timestamp asc limit 1) as medium, | |
count(distinct session_id) as sessions, | |
sum(conversions) as conversions, | |
ifnull(sum(conversion_value), 0) as conversion_value | |
from | |
sessions | |
group by | |
1, 2 | |
) | |
-- map source / medium to channel grouping using a user defined function (ignore campaign) | |
select | |
*, | |
<your-dataset>.channel_grouping(source, medium, null) as channel_grouping | |
from | |
traffic_acquisition | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment