Skip to content

Instantly share code, notes, and snippets.

@doryokujin
Last active December 5, 2018 08:36
Show Gist options
  • Save doryokujin/35b654a5181c11c36e021feb64476c02 to your computer and use it in GitHub Desktop.
Save doryokujin/35b654a5181c11c36e021feb64476c02 to your computer and use it in GitHub Desktop.
『Treasure Data でアクセスログ分析の限界に挑む』その③ 〜アクセスに基づいたユーザーセグメントの作成 月次編(平日/土日)〜 ref: https://qiita.com/doryokujin/items/89392c4cc0bc00c71629
TD_TIME_FORMAT(time,'u','JST') # 1(月曜日) 〜 7(日曜日)
SELECT
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend_total,
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday_total
FROM
(
SELECT TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST')
)
/* TD_SCHEDULED_TIME() = '2018-11-23 11:11:00' */
/**/
SELECT target_month,
CASE
WHEN 70 <= ratio_freq_weekend OR 70 <= ratio_pv_weekend THEN 'weekend'
WHEN ratio_freq_weekend <= 30 OR ratio_pv_weekend <= 30 THEN 'weekday'
WHEN ratio_freq_weekend IS NULL THEN 'non_active'
ELSE 'allday'
END AS segment,
COUNT(1) AS cnt
FROM
(
/**/
SELECT
TD_TIME_FORMAT(TD_DATE_TRUNC('month', TD_DATE_TRUNC('month', TD_SCHEDULED_TIME(),'JST')-1, 'JST'),'yyyy-MM-dd','JST') AS target_month,
IF(past_month.td_client_id IS NOT NULL, past_month.td_client_id, past_month_ago.td_client_id ) AS td_client_id,
freq_weekend, freq_weekend_total,
freq_weekday, freq_weekday_total,
ROUND(1.0*freq_weekend/freq_weekend_total,2)*100 AS freq_weekend_ratio,
ROUND(1.0*freq_weekday/freq_weekday_total,2)*100 AS freq_weekday_ratio,
ROUND(
( 1.0*freq_weekend/freq_weekend_total ) / ( 1.0*freq_weekend/freq_weekend_total + 1.0*freq_weekday/freq_weekday_total )
,2)*100 AS ratio_freq_weekend,
pv_weekend, pv_weekday,
ROUND(1.0*pv_weekend/freq_weekend_total,2)*100 AS pv_weekend_avg,
ROUND(1.0*pv_weekday/freq_weekday_total,2)*100 AS pv_weekday_avg,
ROUND(
( 1.0*pv_weekend/freq_weekend_total ) / ( 1.0*pv_weekend/freq_weekend_total + 1.0*pv_weekday/freq_weekday_total )
,2)*100 AS ratio_pv_weekend,
freq_weekend_total, freq_weekday_total,
min_time, max_time
FROM
(
SELECT td_client_id, freq_weekend_total, freq_weekday_total, freq_weekend, freq_weekday, pv_weekend, pv_weekday, min_time, max_time
FROM
(
SELECT
td_client_id,
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend,
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday,
SUM(IF(week_num IN ('6','7'), pv,0)) AS pv_weekend,
SUM(IF(week_num NOT IN ('6','7'), pv,0)) AS pv_weekday,
TD_TIME_FORMAT(MIN(min_time),'yyyy-MM-dd','JST') AS min_time, TD_TIME_FORMAT(MAX(max_time),'yyyy-MM-dd','JST') AS max_time
FROM
(
SELECT td_client_id, TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num, COUNT(1) AS pv, MIN(time) AS min_time, MAX(time) AS max_time
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY td_client_id, TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST')
)
GROUP BY td_client_id
),
(
SELECT
SUM(IF(week_num IN ('6','7'), 1,0)) AS freq_weekend_total,
SUM(IF(week_num NOT IN ('6','7'), 1,0)) AS freq_weekday_total
FROM
(
SELECT TD_DATE_TRUNC('day',time,'JST') AS access_day, TD_TIME_FORMAT(time,'u','JST') AS week_num
FROM pageviews
WHERE TD_INTERVAL(time, '-1M', 'JST')
GROUP BY TD_DATE_TRUNC('day',time,'JST'), TD_TIME_FORMAT(time,'u','JST')
)
)
) past_month
FULL OUTER JOIN
(
SELECT td_client_id
FROM pageviews
WHERE TD_INTERVAL(time, '-10y/-1M', 'JST') /* 1ヶ月前より過去 */
GROUP BY td_client_id
) past_month_ago
ON past_month.td_client_id = past_month_ago.td_client_id
/* 前月分のみのユーザーのみ取得で良いのならば past_month_ago サブクエリは不要 */
/**/
)
GROUP BY target_month,
CASE
WHEN 70 <= ratio_freq_weekend OR 70 <= ratio_pv_weekend THEN 'weekend'
WHEN ratio_freq_weekend <= 30 OR ratio_pv_weekend <= 30 THEN 'weekday'
WHEN ratio_freq_weekend IS NULL THEN 'non_active'
ELSE 'allday'
END
/**/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment