Created
August 25, 2022 19:50
-
-
Save nwizugbesamson/cf0f4bc46bc77cac83948e23415e450b to your computer and use it in GitHub Desktop.
Cohort retention table script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- CREATE CUSTOMER ACTIVITY TABLE | |
DROP TABLE customer_details_table; | |
CREATE TEMPORARY TABLE customer_details_table | |
WITH cte AS ( | |
SELECT | |
user_id, | |
transaction_date, | |
DATE_ADD(DATE_ADD(MAKEDATE(YEAR(transaction_date), 1), INTERVAL (MONTH(transaction_date))-1 MONTH), INTERVAL (1)-1 DAY) customer_cohort_date | |
-- ROW_NUMBER() OVER(PARTITION BY user_id, transaction_date ORDER BY transaction_date) as duplicate_flag | |
FROM retention.customer_record | |
WHERE user_id IS NOT NULL AND transaction_date IS NOT NULL | |
) | |
SELECT | |
* | |
FROM cte; | |
-- CREATE COHORT ACTIVATION TABLE AS DISTINCT USER IS AND MIN TRANSACTION DATE | |
DROP TABLE cohort_activation; | |
CREATE TEMPORARY TABLE cohort_activation | |
WITH cte AS ( | |
SELECT | |
* | |
FROM retention.customer_record | |
WHERE user_id IS NOT NULL AND transaction_date IS NOT NULL | |
), | |
grouped_data AS | |
( | |
SELECT distinct user_id, | |
MIN(transaction_date) activation_date | |
FROM cte | |
GROUP BY user_id | |
) | |
SELECT *, | |
DATE_ADD(DATE_ADD(MAKEDATE(YEAR(activation_date), 1), INTERVAL (MONTH(activation_date))-1 MONTH), INTERVAL (1)-1 DAY) cohort_date | |
from grouped_data; | |
-- CREATE COHORT INDEX | |
DROP TABLE cohort_index_table; | |
CREATE TEMPORARY TABLE cohort_index_table | |
WITH data as( | |
SELECT | |
cd.*, | |
c.cohort_date, | |
YEAR(cd.transaction_date) transaction_year, | |
MONTH(cd.transaction_date) transaction_month, | |
YEAR(c.activation_date) activation_year, | |
MONTH(c.activation_date) activation_month | |
FROM customer_details_table cd | |
LEFT JOIN cohort_activation c | |
ON cd.user_id = c.user_id | |
), | |
cohort_data AS ( | |
SELECT | |
data.*, | |
transaction_year - activation_year AS year_difference, | |
transaction_month - activation_month AS month_difference | |
FROM data) | |
SELECT | |
*, | |
year_difference * 12 + month_difference + 1 AS cohort_index | |
FROM cohort_data; | |
select | |
cohort_date | |
-- sum(case when cohort_index = 1 THEN 1 else 0 END) as group_1 | |
from cohort_index_table | |
group by cohort_date | |
order by cohort_date; | |
CREATE TEMPORARY TABLE cohort_pct | |
with grp_data as( | |
select distinct | |
user_id, cohort_date, cohort_index | |
from cohort_index_table | |
) | |
select cohort_date, | |
sum(case when cohort_index = 1 then 1 else 0 end) as grp_1, | |
sum(case when cohort_index = 2 then 1 else 0 end) as grp_2, | |
sum(case when cohort_index = 3 then 1 else 0 end) as grp_3, | |
sum(case when cohort_index = 4 then 1 else 0 end) as grp_4, | |
sum(case when cohort_index = 5 then 1 else 0 end) as grp_5, | |
sum(case when cohort_index = 6 then 1 else 0 end) as grp_6, | |
sum(case when cohort_index = 7 then 1 else 0 end) as grp_7, | |
sum(case when cohort_index = 8 then 1 else 0 end) as grp_8, | |
sum(case when cohort_index = 9 then 1 else 0 end) as grp_9, | |
sum(case when cohort_index = 10 then 1 else 0 end) as grp_10, | |
sum(case when cohort_index = 11 then 1 else 0 end) as grp_11, | |
sum(case when cohort_index = 12 then 1 else 0 end) as grp_12, | |
sum(case when cohort_index = 13 then 1 else 0 end) as grp_13, | |
sum(case when cohort_index = 14 then 1 else 0 end) as grp_14, | |
sum(case when cohort_index = 15 then 1 else 0 end) as grp_15, | |
sum(case when cohort_index = 16 then 1 else 0 end) as grp_16, | |
sum(case when cohort_index = 17 then 1 else 0 end) as grp_17, | |
sum(case when cohort_index = 18 then 1 else 0 end) as grp_18, | |
sum(case when cohort_index = 19 then 1 else 0 end) as grp_19 | |
from grp_data | |
group by cohort_date | |
order by cohort_date; | |
select | |
cohort_date, | |
1.0 * grp_1/grp_1 * 100, | |
1.0 * grp_2/grp_1 * 100, | |
1.0 * grp_3/grp_1 * 100, | |
1.0 * grp_4/grp_1 * 100, | |
1.0 * grp_5/grp_1 * 100, | |
1.0 * grp_6/grp_1 * 100, | |
1.0 * grp_7/grp_1 * 100, | |
1.0 * grp_8/grp_1 * 100, | |
1.0 * grp_9/grp_1 * 100, | |
1.0 * grp_10/grp_1 * 100, | |
1.0 * grp_11/grp_1 * 100, | |
1.0 * grp_12/grp_1 * 100, | |
1.0 * grp_13/grp_1 * 100, | |
1.0 * grp_14/grp_1 * 100, | |
1.0 * grp_15/grp_1 * 100, | |
1.0 * grp_16/grp_1 * 100, | |
1.0 * grp_17/grp_1 * 100, | |
1.0 * grp_18/grp_1 * 100, | |
1.0 * grp_19/grp_1 * 100 | |
from cohort_pct; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment