Last active
November 10, 2015 05:32
-
-
Save itochu0523/8d38b1fc139f9a39de81 to your computer and use it in GitHub Desktop.
簡単なセッションテーブルからパス解析のテーブルを作成する手順 ref: http://qiita.com/itochu0523/items/a918a899152b5adacdd3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') as date | |
,count(distinct session_id) as session_cnt | |
FROM | |
session_summary | |
GROUP BY | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') | |
ORDER BY | |
TD_TIME_FORMAT(TD_TIME_PARSE(session_start_time),'yyyy-MM-dd') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- hive or presto | |
SELECT | |
a.td_url as td_url | |
,ROW_NUMBER() OVER ( ORDER BY a.cnt DESC ) AS rownum | |
FROM | |
( | |
SELECT | |
regexp_replace(td_url, '(http://|https://)', '') as td_url | |
,count(1) as cnt | |
FROM access | |
-- 2ヶ月間のマスタを作成 | |
WHERE TD_TIME_RANGE(time, '2015-04-01', '2015-05-31', 'JST') | |
GROUP BY regexp_replace(td_url, '(http://|https://)', '') | |
) a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
a.td_url as td_url | |
,ROW_NUMBER() OVER ( ORDER BY a.cnt DESC ) AS rownum | |
FROM | |
( | |
SELECT | |
regexp_replace(td_url, '(http://|https://)', '') as td_url | |
,count(1) as cnt | |
FROM access | |
WHERE TD_TIME_RANGE(time, '2015-06-01', TD_TIME_ADD('2015-06-01','1d'), 'JST') | |
GROUP BY regexp_replace(td_url, '(http://|https://)', '') | |
) a |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- hive or presto | |
SELECT | |
c.td_url as td_url | |
,(d.max_rownum + c.rownum) as rownum | |
FROM | |
( | |
select | |
a.td_url as td_url | |
,ROW_NUMBER() OVER ( ORDER BY b.rownum DESC ) AS rownum | |
from | |
tmp_page_master a | |
LEFT OUTER JOIN | |
page_master b | |
ON | |
a.td_url = b.td_url | |
WHERE b.rownum is null | |
) c | |
, | |
( select max(rownum) as max_rownum from page_master ) d |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
td_url | |
, rownum as page_id | |
,(CASE | |
-- 以下の行に足していく... | |
WHEN td_url like '%thank%' THEN 1 | |
WHEN td_url LIKE '%THANK%' THEN 1 | |
WHEN td_url LIKE '%Thank%' THEN 1 | |
ELSE 0 | |
END) as conversion_flag | |
FROM page_master |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
b.session_id as session_id | |
, b.time as time | |
, b.td_ip as td_ip | |
, b.td_path as td_path | |
, b.td_client_id as td_client_id | |
, b.td_title as td_title | |
, b.mobile_flag as mobile_flag | |
, b.td_browser as td_browser | |
, b.td_color as td_color | |
, b.td_os_version as td_os_version | |
, b.td_browser_version as td_browser_version | |
, b.td_referrer as td_referrer | |
, b.search_word as search_word | |
, b.td_screen as td_screen | |
, b.td_os as td_os | |
, b.td_host as td_host | |
, b.td_url as td_url | |
, (CASE WHEN c.conversion_flag = 1 THEN 1 ELSE 0 END) as conversion_flag | |
, c.page_id as page_id | |
, b.td_language as td_language | |
FROM | |
( | |
SELECT | |
TD_SESSIONIZE(time, 86400, td_ip) as session_id | |
, time | |
, td_ip | |
, td_path | |
, td_client_id | |
, td_title | |
, td_browser | |
, td_color | |
, td_os_version | |
, td_browser_version | |
, td_referrer | |
, td_screen | |
, td_os | |
, td_host | |
, td_url | |
, regexp_replace(td_url, '(http://|https://)', '') as url | |
, td_language | |
, (CASE | |
WHEN td_referrer like '%google%' THEN parse_url(TD_URL_DECODE(td_referrer),'QUERY','q') | |
WHEN td_referrer like '%bing.com%' THEN parse_url(TD_URL_DECODE(td_referrer),'QUERY','q') | |
WHEN td_referrer like '%yahoo%' THEN parse_url(TD_URL_DECODE(td_referrer),'QUERY','p') | |
ELSE parse_url(TD_URL_DECODE(td_referrer),'QUERY','p') | |
END) as search_word | |
, (CASE WHEN td_os IN ('iOS','BlackBerry OS','Android','Windows Phone') THEN 1 ELSE 0 END) as mobile_flag | |
FROM | |
( | |
SELECT time, td_ip, td_path, td_client_id, td_title | |
,td_browser, td_color, td_os_version | |
,td_browser_version, td_referrer, td_screen | |
,td_os, td_host, td_url, td_language | |
FROM access | |
distribute by td_ip | |
sort by td_ip,time | |
) a | |
) b | |
,page_master_detail c | |
WHERE b.url = c.td_url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
session_id | |
, td_client_id | |
, collect_list(CAST(page_id as STRING)) as path | |
, count(1) as page_cnt | |
FROM session_detail | |
GROUP BY | |
session_id, td_client_id |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
session_id, | |
pid | |
FROM | |
session_path | |
LATERAL VIEW | |
explode(PATH) pageTable AS pid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment