Last active
April 25, 2024 19:41
-
-
Save lewish/a37ecb423a45ccf861373d1c942a0ea5 to your computer and use it in GitHub Desktop.
Sessionized segment tracks & pages (BigQuery)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This query creates a combined view of tracks and pages from segment data. | |
Sessions are computed by finding the any session start timestamp denoted by an activity gap of 30 minutes. | |
Session IDs are assigned to each track or page record using a combination of the session index, user ID, and date. | |
*/ | |
WITH | |
with_session_starts AS ( | |
SELECT | |
*, | |
COALESCE( (UNIX_MILLIS(timestamp) - UNIX_MILLIS(LAG(timestamp) OVER (PARTITION BY user_id ORDER BY timestamp ASC)))/(1000*60) >= 30, | |
TRUE ) AS session_start_event | |
FROM (( | |
SELECT | |
timestamp, | |
user_id, | |
context_ip, | |
context_page_url, | |
context_page_path, | |
STRUCT(id, | |
event) AS tracks_info, | |
STRUCT(NULL AS id, | |
NULL AS url, | |
NULL AS referrer, | |
NULL AS url_hash, | |
NULL AS title, | |
NULL as name, | |
NULL as search, | |
NULL as path) AS pages_info | |
FROM | |
<your_segment_schema>.tracks) | |
UNION ALL ( | |
SELECT | |
timestamp, | |
user_id, | |
context_ip, | |
context_page_url, | |
context_page_path, | |
STRUCT(NULL AS id, | |
NULL AS event) AS tracks_info, | |
STRUCT(id, | |
url, | |
referrer, | |
url_hash, | |
title, | |
name, | |
search, | |
path) AS pages_info | |
FROM | |
<your_segment_schema>.pages)) | |
WHERE NOT user_id is NULL ), | |
with_session_index AS ( | |
SELECT | |
*, | |
SUM( | |
IF | |
( session_start_event, | |
1, | |
0 ) ) OVER (PARTITION BY user_id ORDER BY timestamp ASC) AS session_index | |
FROM | |
with_session_starts ), | |
with_session_id AS ( | |
SELECT | |
*, | |
farm_fingerprint(CONCAT(CAST(session_index AS STRING), "|", CAST(user_id AS STRING), "|", CAST(DATE(timestamp) AS STRING))) AS session_id | |
FROM | |
with_session_index) | |
SELECT | |
* | |
FROM | |
with_session_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment