Last active
April 12, 2024 18:15
-
-
Save bassemawhoob/91df0d2dc12239cb0f860662d0a615b9 to your computer and use it in GitHub Desktop.
Revenue Retention Cohort Analysis - PostgreSQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Based on: https://medium.com/quick-code/how-to-write-sql-to-calculate-user-cohort-retention-a1b3b57c7a2f | |
-- Uses DATE_PART instead of DATE_DIFF as it is not supported by PostgreSQL | |
-- (branch_id, scheduled_for, total_cents) | |
WITH activities AS ( | |
SELECT branch_id, scheduled_for, total_cents | |
FROM orders | |
WHERE orders.status = 'complete' | |
), | |
-- (branch_id, cohort_month): cohort month is the first order date | |
cohort_items AS ( | |
SELECT branch_id, date_trunc('MONTH', MIN(scheduled_for))::DATE AS cohort_month | |
FROM activities | |
GROUP BY branch_id | |
ORDER BY 1, 2 | |
), | |
-- (branch_id, month_number, revenue): branch X has activity in month number X | |
branch_orders AS ( | |
SELECT A.branch_id, (DATE_PART('year', C.cohort_month) - DATE_PART('year', A.scheduled_for)) * 12 + | |
(DATE_PART('month', C.cohort_month) - DATE_PART('month', A.scheduled_for)) AS month_number, SUM(A.total_cents) as revenue | |
FROM activities A | |
LEFT JOIN cohort_items C ON A.branch_id = C.branch_id | |
GROUP BY 1, 2 | |
ORDER BY 1, 2 | |
), | |
-- (cohort_month, count, revenue) | |
cohort_size AS ( | |
SELECT cohort_month, COUNT(DISTINCT A.branch_id) AS num_branches, SUM(A.total_cents) as revenue | |
FROM cohort_items C | |
LEFT JOIN activities A ON A.branch_id = C.branch_id | |
GROUP BY 1 | |
ORDER BY 1 | |
), | |
-- (cohort_month, month_number, count, revenue) | |
retention_table AS ( | |
SELECT C.cohort_month, A.month_number, COUNT(DISTINCT A.branch_id) AS num_branches, SUM(A.revenue) as revenue | |
FROM branch_orders A | |
LEFT JOIN cohort_items C ON A.branch_id = C.branch_id | |
GROUP BY 1, 2 | |
) | |
-- final value: (cohort_month, size, month_number, total_revenue, repeators, percentage) | |
SELECT B.cohort_month, S.num_branches AS total_branches, B.month_number, B.revenue / 100 as total_revenue, B.num_branches AS total_repeators, B.revenue / S.revenue as percentage | |
FROM retention_table B | |
LEFT JOIN cohort_size S ON B.cohort_month = S.cohort_month | |
WHERE B.cohort_month IS NOT NULL | |
ORDER BY 1, 3 desc |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
could you please provide the example dataset so that people can understand easily.
Thanks