Skip to content

Instantly share code, notes, and snippets.

@myui
Created July 17, 2015 09:30
Show Gist options
  • Save myui/46a5a018f294947a8a5b to your computer and use it in GitHub Desktop.
Save myui/46a5a018f294947a8a5b to your computer and use it in GitHub Desktop.
similarity10.sql
set hivevar:k=11;
create table similarities
as
WITH test_rnd as (
select
rand(31) as rnd,
id,
features
from
test_hivemall
),
t01 as (
select id, features from test_rnd
where rnd < 0.1
),
t02 as (
select id, features from test_rnd
where rnd >= 0.1 and rnd < 0.2
),
t03 as (
select id, features from test_rnd
where rnd >= 0.2 and rnd < 0.3
),
t04 as (
select id, features from test_rnd
where rnd >= 0.3 and rnd < 0.4
),
t05 as (
select id, features from test_rnd
where rnd >= 0.4 and rnd < 0.5
),
t06 as (
select id, features from test_rnd
where rnd >= 0.5 and rnd < 0.6
),
t07 as (
select id, features from test_rnd
where rnd >= 0.6 and rnd < 0.7
),
t08 as (
select id, features from test_rnd
where rnd >= 0.7 and rnd < 0.8
),
t09 as (
select id, features from test_rnd
where rnd >= 0.8 and rnd < 0.9
),
t10 as (
select id, features from test_rnd
where rnd >= 0.9
),
s01 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t01 t2
LEFT OUTER JOIN train_hivemall t1
),
s02 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t02 t2
LEFT OUTER JOIN train_hivemall t1
),
s03 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t03 t2
LEFT OUTER JOIN train_hivemall t1
),
s04 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t04 t2
LEFT OUTER JOIN train_hivemall t1
),
s05 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t05 t2
LEFT OUTER JOIN train_hivemall t1
),
s06 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t06 t2
LEFT OUTER JOIN train_hivemall t1
),
s07 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t07 t2
LEFT OUTER JOIN train_hivemall t1
),
s08 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t08 t2
LEFT OUTER JOIN train_hivemall t1
),
s09 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t09 t2
LEFT OUTER JOIN train_hivemall t1
),
s10 as (
SELECT
each_top_k(
${k}, t2.id, angular_similarity(t2.features, t1.features),
t2.id,
t1.id,
t1.y
) as (rank, similarity, base_id, neighbor_id, y)
FROM
t10 t2
LEFT OUTER JOIN train_hivemall t1
)
select * from s01
union all
select * from s02
union all
select * from s03
union all
select * from s04
union all
select * from s05
union all
select * from s06
union all
select * from s07
union all
select * from s08
union all
select * from s09
union all
select * from s10
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment