Last active
October 26, 2016 08:23
-
-
Save itochu0523/e40ecc8b988dbdd2a0bc48a2e2bd9bb7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- INSERT INTO id_mapping -- Presto | |
With ssuid_table as | |
( | |
select | |
session_id | |
,max(ssuid) as ssuid | |
from tmp_id_mapping | |
group by session_id | |
), | |
cluid_table as | |
( | |
select | |
session_id | |
,max(cluid) as cluid | |
from tmp_id_mapping | |
group by session_id | |
), | |
tmp_id_mapping as | |
( | |
SELECT | |
(CASE WHEN b.ssuid is NULL and b.referrer_ssuid is not NULL THEN b.referrer_ssuid ELSE b.ssuid END) as ssuid | |
,b.fp_id as fp_id | |
,b.cluid as cluid | |
,b.td_global_id as td_global_id | |
,b.referrer_ssuid as referrer_ssuid | |
,b.session_id as session_id | |
FROM | |
( | |
SELECT | |
a.ssuid as ssuid | |
,a.fp_id as fp_id | |
,a.cluid as cluid | |
,a.td_global_id as td_global_id | |
,a.referrer_ssuid as referrer_ssuid | |
,a.session_id as session_id | |
FROM | |
( | |
SELECT | |
t.ssuid as ssuid | |
,t.fp_id as fp_id | |
,t.cluid as cluid | |
,t.td_global_id as td_global_id | |
--,replace(regexp_extract(url_extract_query(td_referrer),'ssuid=\d+'),'ssuid=','') as referrer_ssuid -- presto | |
,parse_url(t.td_referrer, 'QUERY', 'ssuid') as referrer_ssuid -- Hive | |
,TD_SESSIONIZE(time, 3600, t.tag) as session_id | |
,count(1) | |
FROM | |
( | |
SELECT | |
time | |
,td_ip | |
,concat(concat(concat(td_ip,td_os),td_browser),td_screen) as tag | |
,td_referrer | |
,ssuid | |
,fp_id | |
,cluid | |
,td_global_id | |
FROM catalina_glico | |
distribute by td_ip | |
sort by td_ip,time | |
) t | |
GROUP BY | |
1,2,3,4,5,6 | |
) as a | |
) as b | |
) | |
INSERT INTO TABLE id_mapping -- Hive | |
SELECT | |
c.session_id | |
,d.cluid | |
,c.ssuid | |
FROM | |
ssuid_table c | |
JOIN | |
cluid_table d | |
ON c.session_id = d.session_id | |
ORDER BY 1,2,3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment