Last active
December 7, 2018 05:36
-
-
Save doryokujin/ec04127763152c050ad66eaef37e74f2 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その② 〜アクセスに基づいたユーザーセグメントの作成 月次編(頻度)〜 ref: https://qiita.com/doryokujin/items/d4899269a0c0aa7db6ea
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TD_DATE_TRUNC('day',time,'JST') AS access_day |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/* | |
SELECT target_month, freq/*_per_week*/ ,COUNT(1) AS cnt, AVG(pv_daily_avg) AS pv_daily_avg, STDDEV(pv_daily_avg) pv_stdev | |
FROM | |
( | |
*/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, | |
IF(freq IS NOT NULL, freq, 0) AS freq, | |
IF(freq IS NOT NULL, freq/4,-1) AS freq_per_week, | |
IF(pv IS NOT NULL, pv, 0) AS pv, | |
IF(pv_daily_avg IS NOT NULL,pv_daily_avg, 0) AS pv_daily_avg, | |
IF(pv_stdev IS NOT NULL, pv_stdev, 0) AS pv_stdev, | |
min_time, max_time | |
FROM | |
( | |
SELECT | |
td_client_id, | |
COUNT(1) AS freq, SUM(pv) AS pv, AVG(pv) AS pv_daily_avg, STDDEV(pv) AS pv_stdev, | |
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST') | |
) | |
GROUP BY td_client_id | |
) past_month | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id | |
/* | |
) | |
GROUP BY target_month, freq/*_per_week*/ | |
ORDER BY freq/*_per_week*/ DESC | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */ | |
/* | |
SELECT target_month, frequency_past_1, frequency_past_2, frequency_past_1-IF(frequency_past_2 IS NOT NULL,frequency_past_2,0) AS diff, COUNT(1) AS cnt | |
FROM | |
( | |
*/ | |
SELECT | |
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month, | |
past_1month.td_client_id, | |
past_1month.freq_per_week AS frequency_past_1, | |
past_2month.freq_per_week AS frequency_past_2, | |
past_1month.min_time AS min_time_past_1, past_1month.max_time AS max_time_past_1, | |
past_2month.min_time AS min_time_past_2, past_2month.max_time AS max_time_past_2 | |
FROM | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, | |
IF(freq IS NOT NULL, freq, 0) AS freq, | |
IF(freq IS NOT NULL, freq/4,-1) AS freq_per_week, | |
IF(pv IS NOT NULL, pv, 0) AS pv, | |
IF(pv_daily_avg IS NOT NULL,pv_daily_avg, 0) AS pv_daily_avg, | |
IF(pv_stdev IS NOT NULL, pv_stdev, 0) AS pv_stdev, | |
min_time, max_time | |
FROM | |
( | |
SELECT | |
td_client_id, | |
COUNT(1) AS freq, SUM(pv) AS pv, AVG(pv) AS pv_daily_avg, STDDEV(pv) AS pv_stdev, | |
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST') | |
) | |
GROUP BY td_client_id | |
) past_month | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id | |
) past_1month | |
LEFT OUTER JOIN /* 集合 past_1month は past_2month を内包する */ | |
( | |
SELECT | |
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id, | |
IF(freq IS NOT NULL, freq, 0) AS freq, | |
IF(freq IS NOT NULL, freq/4,-1) AS freq_per_week, | |
IF(pv IS NOT NULL, pv, 0) AS pv, | |
IF(pv_daily_avg IS NOT NULL,pv_daily_avg, 0) AS pv_daily_avg, | |
IF(pv_stdev IS NOT NULL, pv_stdev, 0) AS pv_stdev, | |
min_time, max_time | |
FROM | |
( | |
SELECT | |
td_client_id, | |
COUNT(1) AS freq, SUM(pv) AS pv, AVG(pv) AS pv_daily_avg, STDDEV(pv) AS pv_stdev, | |
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time | |
FROM | |
( | |
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-1M/-1M', 'JST') | |
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST') | |
) | |
GROUP BY td_client_id | |
) past_month | |
FULL OUTER JOIN | |
( | |
SELECT td_client_id | |
FROM pageviews | |
WHERE TD_INTERVAL(time, '-10y/-2M', 'JST') /* 1ヶ月前より過去 */ | |
GROUP BY td_client_id | |
) past_month_ago | |
ON past_month.td_client_id = past_month_ago.td_client_id | |
) past_2month | |
ON past_1month.td_client_id = past_2month.td_client_id | |
/* | |
) | |
GROUP BY target_month, frequency_past_1, frequency_past_2 | |
*/ | |
ORDER BY frequency_past_1 DESC, frequency_past_2 DESC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment