Created
July 16, 2015 12:05
-
-
Save myui/88f086ab9d2c249111c5 to your computer and use it in GitHub Desktop.
parallel_similarity_join.sql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table similarities | |
as | |
WITH test_rnd as ( | |
select | |
rand(31) as rnd, | |
id, | |
features | |
from | |
test_hivemall | |
), | |
t01 as ( | |
select | |
id, | |
features | |
from | |
test_rnd | |
where | |
rnd < 0.2 | |
), | |
t02 as ( | |
select | |
id, | |
features | |
from | |
test_rnd | |
where | |
rnd >= 0.2 and rnd < 0.4 | |
), | |
t03 as ( | |
select | |
id, | |
features | |
from | |
test_rnd | |
where | |
rnd >= 0.4 and rnd < 0.6 | |
), | |
t04 as ( | |
select | |
id, | |
features | |
from | |
test_rnd | |
where | |
rnd >= 0.6 and rnd < 0.8 | |
), | |
t05 as ( | |
select | |
id, | |
features | |
from | |
test_rnd | |
where | |
rnd >= 0.8 | |
), | |
s01 as ( | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM | |
t01 t2 | |
LEFT OUTER JOIN train_hivemall t1 | |
), | |
s02 as ( | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM | |
t02 t2 | |
LEFT OUTER JOIN train_hivemall t1 | |
), | |
s03 as ( | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM | |
t03 t2 | |
LEFT OUTER JOIN train_hivemall t1 | |
), | |
s04 as ( | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM | |
t04 t2 | |
LEFT OUTER JOIN train_hivemall t1 | |
), | |
s05 as ( | |
SELECT | |
each_top_k( | |
10, t2.id, angular_similarity(t2.features, t1.features), | |
t2.id, | |
t1.id, | |
t1.y | |
) as (rank, similarity, base_id, neighbor_id, y) | |
FROM | |
t05 t2 | |
LEFT OUTER JOIN train_hivemall t1 | |
) | |
select * from s01 | |
union all | |
select * from s02 | |
union all | |
select * from s03 | |
union all | |
select * from s04 | |
union all | |
select * from s05 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment