Last active
May 13, 2021 12:38
-
-
Save doryokujin/c13a6111076612d6bc10ce0664e38ed0 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その② 〜アクセスに基づいたユーザーセグメントの作成 月次編(アクティビティ)〜 ref: https://qiita.com/doryokujin/items/605158dc61e378685265
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/* | |
SELECT target_month, segment, COUNT(1) AS cnt | |
FROM | |
( | |
*/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
IF(this.td_client_id IS NOT NULL, this.td_client_id, all_past.td_client_id) AS td_client_id, | |
CASE | |
WHEN this.td_client_id IS NOT NULL AND all_past.td_client_id IS NOT NULL THEN 'active' | |
WHEN this.td_client_id IS NULL AND all_past.td_client_id IS NOT NULL THEN 'non_active' | |
WHEN this.td_client_id IS NOT NULL AND all_past.td_client_id IS NULL THEN 'new' | |
END AS segment, | |
TD_TIME_FORMAT(min_time, 'yyyy-MM-dd', 'JST') AS min_time, | |
TD_TIME_FORMAT(max_time, 'yyyy-MM-dd', 'JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去,厳密には以下を用いる */ | |
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST') ) */ | |
GROUP BY td_client_id | |
) all_past | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id | |
) this | |
ON all_past.td_client_id = this.td_client_id | |
/* | |
) | |
GROUP BY target_month, segment | |
ORDER BY cnt DESC | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
target_month, | |
kv['active'] AS active, | |
kv['non_active'] AS non_active, | |
kv['new'] AS new, | |
kv['active']+kv['non_active']+kv['new'] AS total | |
FROM ( | |
SELECT target_month, map_agg(segment, cnt) kv | |
FROM segment_monthly_acctivity | |
GROUP BY target_month | |
) | |
ORDER BY target_month |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/* | |
SELECT target_month, activity_past_1, activity_past_2, COUNT(1) AS cnt, | |
MIN(min_time_past_1) AS min_time_past_1, MAX(max_time_past_1) AS max_time_past_1, | |
MIN(min_time_past_2) AS min_time_past_2, MAX(max_time_past_2) AS max_time_past_2 | |
FROM | |
( | |
*/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
past_1month.td_client_id, | |
past_1month.segment AS activity_past_1, | |
past_2month.segment AS activity_past_2, | |
past_1month.min_time AS min_time_past_1, past_1month.max_time AS max_time_past_1, | |
past_2month.min_time AS min_time_past_2, past_2month.max_time AS max_time_past_2 | |
FROM | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id, | |
CASE | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active' | |
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active' | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome' | |
END AS segment, | |
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time, | |
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST') ) */ | |
GROUP BY td_client_id | |
) past_month_ago | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */ | |
GROUP BY td_client_id | |
) past_month | |
ON past_month_ago.td_client_id = past_month.td_client_id | |
) past_1month | |
LEFT OUTER JOIN /* 集合 past_1month は past_2month を内包する */ | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id, | |
CASE | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active' | |
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active' | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome' | |
END AS segment, | |
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time, | |
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-2M', 'JST') /* 1ヶ月前より過去 */ | |
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST')-1, 'JST' )) */ | |
GROUP BY td_client_id | |
) past_month_ago | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */ | |
GROUP BY td_client_id | |
) past_month | |
ON past_month_ago.td_client_id = past_month.td_client_id | |
) past_2month | |
ON past_1month.td_client_id = past_2month.td_client_id | |
/* | |
) | |
GROUP BY target_month, activity_past_1, activity_past_2 | |
*/ | |
ORDER BY activity_past_1, activity_past_2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT target_month, segment_name, cnt | |
FROM segment_monthly_activity s | |
JOIN | |
( SELECT activity_past_1, activity_past_2, segment_name FROM segment_monthly_activity_master ) m | |
ON s.activity_past_1 = m.activity_past_1 AND s.activity_past_2 = m.activity_past_2 | |
ORDER BY target_month |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LEFT OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */ | |
AND td_title = 'お問い合わせ - Treasure Data' | |
GROUP BY td_client_id | |
) past_month_cv | |
ON past_month.td_client_id = past_month_cv.td_client_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/**/ | |
SELECT target_month, activity_past_1, activity_past_2, COUNT(1) AS cnt, | |
MIN(min_time_past_1) AS min_time_past_1, MAX(max_time_past_1) AS max_time_past_1, | |
MIN(min_time_past_2) AS min_time_past_2, MAX(max_time_past_2) AS max_time_past_2 | |
FROM | |
( | |
/**/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
CASE | |
WHEN past_1month.segment = 'active' AND past_1month.with_cv = 1 THEN 'active_cv' | |
WHEN past_1month.segment = 'active' AND past_1month.with_cv = 0 THEN 'active' | |
WHEN past_1month.segment = 'welcome' AND past_1month.with_cv = 1 THEN 'welcome_cv' | |
WHEN past_1month.segment = 'welcome' AND past_1month.with_cv = 0 THEN 'welcome' | |
ELSE past_1month.segment | |
END AS activity_past_1, | |
CASE | |
WHEN past_2month.segment = 'active' AND past_2month.with_cv = 1 THEN 'active_cv' | |
WHEN past_2month.segment = 'active' AND past_2month.with_cv = 0 THEN 'active' | |
WHEN past_2month.segment = 'welcome' AND past_2month.with_cv = 1 THEN 'welcome_cv' | |
WHEN past_2month.segment = 'welcome' AND past_2month.with_cv = 0 THEN 'welcome' | |
ELSE past_2month.segment | |
END AS activity_past_2, | |
past_1month.min_time AS min_time_past_1, past_1month.max_time AS max_time_past_1, | |
past_2month.min_time AS min_time_past_2, past_2month.max_time AS max_time_past_2 | |
FROM | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id, | |
CASE | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active' | |
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active' | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome' | |
END AS segment, | |
IF(past_month_cv.td_client_id IS NOT NULL,1,0) AS with_cv, | |
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time, | |
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */ | |
GROUP BY td_client_id | |
) past_month | |
ON past_month_ago.td_client_id = past_month.td_client_id | |
LEFT OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') /* 前月1ヶ月間 */ | |
AND td_title = 'お問い合わせ - Treasure Data' | |
GROUP BY td_client_id | |
) past_month_cv | |
ON past_month.td_client_id = past_month_cv.td_client_id | |
) past_1month | |
LEFT OUTER JOIN /* 集合 past_1month は past_2month を内包する */ | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id) AS td_client_id, | |
CASE | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'active' | |
WHEN past_month.td_client_id IS NULL AND past_month_ago.td_client_id IS NOT NULL THEN 'non_active' | |
WHEN past_month.td_client_id IS NOT NULL AND past_month_ago.td_client_id IS NULL THEN 'welcome' | |
END AS segment, | |
IF(past_month_cv.td_client_id IS NOT NULL,1,0) AS with_cv, | |
TD_TIME_FORMAT(past_month.min_time, 'yyyy-MM-dd', 'JST') AS min_time, | |
TD_TIME_FORMAT(past_month.max_time, 'yyyy-MM-dd', 'JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-2M', 'JST') /* 1ヶ月前より過去 */ | |
/* WHERE TD_TIME_RANGE( time, NULL, TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST')-1, 'JST' )) */ | |
GROUP BY td_client_id | |
) past_month_ago | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */ | |
GROUP BY td_client_id | |
) past_month | |
ON past_month_ago.td_client_id = past_month.td_client_id | |
LEFT OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') /* 前々月1ヶ月間 */ | |
AND td_title = 'お問い合わせ - Treasure Data' | |
GROUP BY td_client_id | |
) past_month_cv | |
ON past_month.td_client_id = past_month_cv.td_client_id | |
) past_2month | |
ON past_1month.td_client_id = past_2month.td_client_id | |
/**/ | |
) | |
GROUP BY target_month, activity_past_1, activity_past_2 | |
/**/ | |
ORDER BY activity_past_1, activity_past_2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment