Created
December 6, 2018 01:32
-
-
Save doryokujin/b06e4390156402eff4e07fb1498a4b57 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その④ 〜アクセスに基づいたユーザーセグメントの作成 月次編(時間帯)〜 ref: https://qiita.com/doryokujin/items/750e3f322c2bc00371ac
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/* | |
SELECT target_month, segment, COUNT(1) AS cnt | |
FROM | |
( | |
*/ | |
SELECT target_month, | |
CASE | |
WHEN 50<=ratio_freq_sleeping OR 50<=ratio_pv_sleeping THEN 'sleeping' | |
WHEN 50<=ratio_freq_morning OR 50<=ratio_pv_morning THEN 'morning' | |
WHEN 50<=ratio_freq_working OR 50<=ratio_pv_working THEN 'working' | |
WHEN 50<=ratio_freq_night OR 50<=ratio_pv_night THEN 'night' | |
WHEN | |
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_morning OR 30<=ratio_pv_morning) | |
THEN 'sleeping,morning' | |
WHEN | |
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_working OR 30<=ratio_pv_working) | |
THEN 'sleeping,working' | |
WHEN | |
(30<=ratio_freq_sleeping OR 30<=ratio_pv_sleeping) AND (30<=ratio_freq_night OR 30<=ratio_pv_night) | |
THEN 'sleeping,night' | |
WHEN | |
(30<=ratio_freq_morning OR 30<=ratio_pv_morning) AND (30<=ratio_freq_working OR 30<=ratio_pv_working) | |
THEN 'morning,working' | |
WHEN | |
(30<=ratio_freq_morning OR 30<=ratio_pv_morning) AND (30<=ratio_freq_night OR 30<=ratio_pv_night) | |
THEN 'morning,night' | |
WHEN | |
(30<=ratio_freq_working OR 30<=ratio_pv_working) AND (30<=ratio_freq_night OR 30<=ratio_pv_night) | |
THEN 'working,night' | |
WHEN freq IS NULL THEN 'non_active' | |
ELSE 'alltime' | |
END AS segment, | |
freq, freq_sleeping, freq_morning, freq_working, freq_night, | |
ratio_freq_sleeping, ratio_freq_morning, ratio_freq_working, ratio_freq_night, | |
pv, pv_sleeping, pv_morning, pv_working, pv_night, | |
ratio_pv_sleeping, ratio_pv_morning, ratio_pv_working, ratio_pv_night | |
FROM | |
( | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, | |
freq, freq_sleeping, freq_morning, freq_working, freq_night, | |
ROUND(1.0*freq_sleeping/freq,2)*100 AS ratio_freq_sleeping, | |
ROUND(1.0*freq_morning /freq,2)*100 AS ratio_freq_morning, | |
ROUND(1.0*freq_working /freq,2)*100 AS ratio_freq_working, | |
ROUND(1.0*freq_night /freq,2)*100 AS ratio_freq_night, | |
pv, pv_sleeping, pv_morning, pv_working, pv_night, | |
ROUND(1.0*pv_sleeping/pv,2)*100 AS ratio_pv_sleeping, | |
ROUND(1.0*pv_morning /pv,2)*100 AS ratio_pv_morning, | |
ROUND(1.0*pv_working /pv,2)*100 AS ratio_pv_working, | |
ROUND(1.0*pv_night /pv,2)*100 AS ratio_pv_night, | |
min_time, max_time | |
FROM | |
( | |
SELECT | |
td_client_id, | |
SUM(IF(hour IN ('00','01','02','03','04','05'), 1, 0)) AS freq_sleeping, | |
SUM(IF(hour IN ('06','07','08','09'), 1, 0)) AS freq_morning, | |
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), 1, 0)) AS freq_working, | |
SUM(IF(hour IN ('18','19','20','21','22','23'), 1, 0)) AS freq_night, | |
COUNT(1) AS freq, | |
SUM(IF(hour IN ('00','01','02','03','04','05'), pv, 0)) AS pv_sleeping, | |
SUM(IF(hour IN ('06','07','08','09'), pv, 0)) AS pv_morning, | |
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), pv, 0)) AS pv_working, | |
SUM(IF(hour IN ('18','19','20','21','22','23'), pv, 0)) AS pv_night, | |
SUM(pv) AS pv, | |
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'HH','JST') AS hour, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'HH','JST') | |
) | |
GROUP BY td_client_id | |
) past_month | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id | |
) | |
/* 前月分のみのユーザーのみ取得で良いのならば past_month_ago サブクエリは不要 *//* | |
) | |
GROUP BY target_month, segment HAVING segment != 'non_active' | |
ORDER BY cnt DESC | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'HH','JST') AS hour, | |
COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'HH','JST') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SUM(IF(hour IN ('00','01','02','03','04','05'), 1, 0)) AS freq_sleeping, | |
SUM(IF(hour IN ('06','07','08','09'), 1, 0)) AS freq_morning, | |
SUM(IF(hour IN ('10','11','12','13','14','15','16','17'), 1, 0)) AS freq_working, | |
SUM(IF(hour IN ('18','19','20','21','22','23'), 1, 0)) AS freq_night, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ROUND(1.0*freq_sleeping/freq,2)*100 AS ratio_freq_sleeping, | |
ROUND(1.0*freq_morning /freq,2)*100 AS ratio_freq_morning, | |
ROUND(1.0*freq_working /freq,2)*100 AS ratio_freq_working, | |
ROUND(1.0*freq_night /freq,2)*100 AS ratio_freq_night, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment