Created
June 19, 2019 08:52
-
-
Save myui/273c08c756de08d82d6cfa3a575d8221 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with tmp as ( | |
select | |
-- group by is sometimes faster than distinct | |
-- distinct extract_feature(feature) as feature | |
extract_feature(feature) as feature | |
from | |
test l | |
lateral view explode(features) r as feature | |
), | |
mapped as ( | |
select | |
feature, | |
feature_hashing(feature) as index | |
from | |
tmp | |
group by | |
feature | |
) | |
-- INSERT OVERWRITE TABLE mapping | |
select | |
index, | |
collect_set(feature) as features -- collision can be happened | |
from | |
mapped | |
group by | |
index | |
-- order by index asc | |
-- limit 100 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://console.treasuredata.com/app/jobs/494047056/results