Last active
December 5, 2018 08:36
-
-
Save doryokujin/35b654a5181c11c36e021feb64476c02 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その③ 〜アクセスに基づいたユーザーセグメントの作成 月次編(平日/土日)〜 ref: https://qiita.com/doryokujin/items/89392c4cc0bc00c71629
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TD_TIME_FORMAT(time,'u','JST') # 1(月曜日) 〜 7(日曜日) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend_total, | |
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday_total | |
FROM | |
( | |
SELECT TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST') | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/**/ | |
SELECT target_month, | |
CASE | |
WHEN 70 <= ratio_freq_weekend OR 70 <= ratio_pv_weekend THEN 'weekend' | |
WHEN ratio_freq_weekend <= 30 OR ratio_pv_weekend <= 30 THEN 'weekday' | |
WHEN ratio_freq_weekend IS NULL THEN 'non_active' | |
ELSE 'allday' | |
END AS segment, | |
COUNT(1) AS cnt | |
FROM | |
( | |
/**/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, | |
freq_weekend, freq_weekend_total, | |
freq_weekday, freq_weekday_total, | |
ROUND(1.0*freq_weekend/freq_weekend_total,2)*100 AS freq_weekend_ratio, | |
ROUND(1.0*freq_weekday/freq_weekday_total,2)*100 AS freq_weekday_ratio, | |
ROUND( | |
( 1.0*freq_weekend/freq_weekend_total ) / ( 1.0*freq_weekend/freq_weekend_total + 1.0*freq_weekday/freq_weekday_total ) | |
,2)*100 AS ratio_freq_weekend, | |
pv_weekend, pv_weekday, | |
ROUND(1.0*pv_weekend/freq_weekend_total,2)*100 AS pv_weekend_avg, | |
ROUND(1.0*pv_weekday/freq_weekday_total,2)*100 AS pv_weekday_avg, | |
ROUND( | |
( 1.0*pv_weekend/freq_weekend_total ) / ( 1.0*pv_weekend/freq_weekend_total + 1.0*pv_weekday/freq_weekday_total ) | |
,2)*100 AS ratio_pv_weekend, | |
freq_weekend_total, freq_weekday_total, | |
min_time, max_time | |
FROM | |
( | |
SELECT td_client_id, freq_weekend_total, freq_weekday_total, freq_weekend, freq_weekday, pv_weekend, pv_weekday, min_time, max_time | |
FROM | |
( | |
SELECT | |
td_client_id, | |
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend, | |
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday, | |
SUM(IF(week_num IN ('6','7'), pv,0)) AS pv_weekend, | |
SUM(IF(week_num NOT IN ('6','7'), pv,0)) AS pv_weekday, | |
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST') | |
) | |
GROUP BY td_client_id | |
), | |
( | |
SELECT | |
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend_total, | |
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday_total | |
FROM | |
( | |
SELECT TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST') | |
) | |
) | |
) past_month | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id | |
/* 前月分のみのユーザーのみ取得で良いのならば past_month_ago サブクエリは不要 */ | |
/**/ | |
) | |
GROUP BY target_month, | |
CASE | |
WHEN 70 <= ratio_freq_weekend OR 70 <= ratio_pv_weekend THEN 'weekend' | |
WHEN ratio_freq_weekend <= 30 OR ratio_pv_weekend <= 30 THEN 'weekday' | |
WHEN ratio_freq_weekend IS NULL THEN 'non_active' | |
ELSE 'allday' | |
END | |
/**/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment