Skip to content

Instantly share code, notes, and snippets.

@myui
Created July 16, 2015 12:05
Show Gist options
  • Save myui/88f086ab9d2c249111c5 to your computer and use it in GitHub Desktop.
Save myui/88f086ab9d2c249111c5 to your computer and use it in GitHub Desktop.
parallel_similarity_join.sql
create table similarities
as
WITH test_rnd as (
select
rand(31) as rnd,
id,
features
from
test_hivemall
),
t01 as (
select
id,
features
from
test_rnd
where
rnd < 0.2
),
t02 as (
select
id,
features
from
test_rnd
where
rnd >= 0.2 and rnd < 0.4
),
t03 as (
select
id,
features
from
test_rnd
where
rnd >= 0.4 and rnd < 0.6
),
t04 as (
select
id,
features
from
test_rnd
where
rnd >= 0.6 and rnd < 0.8
),
t05 as (
select
id,
features
from
test_rnd
where
rnd >= 0.8
),
s01 as (
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t01 t2
LEFT OUTER JOIN train_hivemall t1
),
s02 as (
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t02 t2
LEFT OUTER JOIN train_hivemall t1
),
s03 as (
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t03 t2
LEFT OUTER JOIN train_hivemall t1
),
s04 as (
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t04 t2
LEFT OUTER JOIN train_hivemall t1
),
s05 as (
SELECT
each_top_k(
10, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t05 t2
LEFT OUTER JOIN train_hivemall t1
)
select * from s01
union all
select * from s02
union all
select * from s03
union all
select * from s04
union all
select * from s05
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment