Forked from bstancil/Finding User Sessions with SQL
Last active
March 11, 2022 14:03
-
-
Save cloudify/47aac4faee7a1298df4d to your computer and use it in GitHub Desktop.
These queries let you define find user sessions against event data logged to Segment SQL, Snowplow, or Google BigQuery.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- These queries let you define find user sessions against event data | |
-- logged to Segment SQL, Snowplow, or Google BigQuery. | |
-- For more details, see the full post: | |
-- LINK | |
--- SEGMENT SQL | |
-- Finding the start of every session | |
SELECT * | |
FROM ( | |
SELECT * | |
LAG(sent_at,1) OVER (PARTITION BY user_id ORDER BY sent_at) AS last_event | |
FROM "your_project".tracks | |
) last | |
WHERE EXTRACT('EPOCH' FROM sent_at) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) | |
OR last_event IS NULL | |
-- Mapping every event to its session | |
SELECT *, | |
SUM(is_new_session) OVER (ORDER BY user_id, sent_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id, | |
SUM(is_new_session) OVER (PARTITION BY user_id ORDER BY sent_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id | |
FROM ( | |
SELECT *, | |
CASE WHEN EXTRACT('EPOCH' FROM sent_at) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) | |
OR last_event IS NULL | |
THEN 1 ELSE 0 END AS is_new_session | |
FROM ( | |
SELECT *, | |
LAG(sent_at,1) OVER (PARTITION BY user_id ORDER BY sent_at) AS last_event | |
FROM "your_project".tracks | |
) last | |
) final | |
--- SNOWPLOW | |
-- Finding the start of every session | |
SELECT * | |
FROM ( | |
SELECT * | |
LAG(collector_tstamp,1) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp) AS last_event | |
FROM "your_project".events | |
) last | |
WHERE EXTRACT('EPOCH' FROM collector_tstamp) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) | |
OR last_event IS NULL | |
-- Mapping every event to its session | |
SELECT *, | |
SUM(is_new_session) OVER (ORDER BY domain_userid, collector_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id, | |
SUM(is_new_session) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id | |
FROM ( | |
SELECT *, | |
CASE WHEN EXTRACT('EPOCH' FROM collector_tstamp) - EXTRACT('EPOCH' FROM last_event) >= (60 * 10) | |
OR last_event IS NULL | |
THEN 1 ELSE 0 END AS is_new_session | |
FROM ( | |
SELECT *, | |
LAG(collector_tstamp,1) OVER (PARTITION BY domain_userid ORDER BY collector_tstamp) AS last_event | |
FROM "your_project".events | |
) last | |
) final | |
--- GOOGLE BIGQUERY | |
-- Finding the start of every session | |
SELECT * | |
FROM ( | |
SELECT *, | |
LAG(occurred_at,1) OVER (PARTITION BY fullVisitorId ORDER BY occurred_at) AS last_event | |
FROM ( | |
SELECT fullVisitorId, | |
visitStartTime + hits.time/1000 AS occurred_at | |
FROM "your_tables" | |
) pre | |
) last | |
WHERE occurred_at - last_event >= (60 * 10) | |
OR last_event IS NULL | |
-- Mapping every event to its session | |
SELECT *, | |
SUM(is_new_session) OVER (ORDER BY fullVisitorId, occurred_at) AS global_session_id | |
FROM ( | |
SELECT *, | |
CASE WHEN occurred_at - last_event >= (60 * 10) OR last_event IS NULL THEN 1 ELSE 0 END AS is_new_session | |
FROM ( | |
SELECT *, | |
LAG(occurred_at,1) OVER (PARTITION BY fullVisitorId ORDER BY occurred_at) AS last_event | |
FROM ( | |
SELECT fullVisitorId, | |
visitStartTime + hits.time/1000 AS occurred_at | |
FROM "your_tables" | |
) pre | |
) last | |
) final | |
ORDER BY 1,2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment