myui · July 17, 2015 09:30
diff --git a/similarity10.sql b/similarity10.sql
 set hivevar:k=11;

 create table similarities
 as
 WITH test_rnd as (
 select
  rand(31) as rnd,
  id,
  features
 from
  test_hivemall
 ),
 t01 as (
  select id, features from test_rnd
  where rnd < 0.1
 ),
 t02 as (
  select id, features from test_rnd
  where rnd >= 0.1 and rnd < 0.2
 ),
 t03 as (
  select id, features from test_rnd
  where rnd >= 0.2 and rnd < 0.3
 ),
 t04 as (
  select id, features from test_rnd
  where rnd >= 0.3 and rnd < 0.4
 ),
 t05 as (
  select id, features from test_rnd
  where rnd >= 0.4 and rnd < 0.5
 ),
 t06 as (
  select id, features from test_rnd
  where rnd >= 0.5 and rnd < 0.6
 ),
 t07 as (
  select id, features from test_rnd
  where rnd >= 0.6 and rnd < 0.7
 ),
 t08 as (
  select id, features from test_rnd
  where rnd >= 0.7 and rnd < 0.8
 ),
 t09 as (
  select id, features from test_rnd
  where rnd >= 0.8 and rnd < 0.9
 ),
 t10 as (
  select id, features from test_rnd
  where rnd >= 0.9
 ),
 s01 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t01 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s02 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t02 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s03 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t03 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s04 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t04 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s05 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t05 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s06 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t06 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s07 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t07 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s08 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t08 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s09 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t09 t2 
  LEFT OUTER JOIN train_hivemall t1
 ),
 s10 as (
 SELECT
  each_top_k(
    ${k}, t2.id, angular_similarity(t2.features, t1.features), 
    t2.id, 
    t1.id,  
    t1.y
  ) as (rank, similarity, base_id, neighbor_id, y)
 FROM
  t10 t2 
  LEFT OUTER JOIN train_hivemall t1
 )
 select * from s01
 union all
 select * from s02
 union all
 select * from s03
 union all
 select * from s04
 union all
 select * from s05
 union all
 select * from s06
 union all
 select * from s07
 union all
 select * from s08
 union all
 select * from s09
 union all
 select * from s10
 ;
	set hivevar:k=11;

	create table similarities
	as
	WITH test_rnd as (
	select
	rand(31) as rnd,
	id,
	features
	from
	test_hivemall
	),
	t01 as (
	select id, features from test_rnd
	where rnd < 0.1
	),
	t02 as (
	select id, features from test_rnd
	where rnd >= 0.1 and rnd < 0.2
	),
	t03 as (
	select id, features from test_rnd
	where rnd >= 0.2 and rnd < 0.3
	),
	t04 as (
	select id, features from test_rnd
	where rnd >= 0.3 and rnd < 0.4
	),
	t05 as (
	select id, features from test_rnd
	where rnd >= 0.4 and rnd < 0.5
	),
	t06 as (
	select id, features from test_rnd
	where rnd >= 0.5 and rnd < 0.6
	),
	t07 as (
	select id, features from test_rnd
	where rnd >= 0.6 and rnd < 0.7
	),
	t08 as (
	select id, features from test_rnd
	where rnd >= 0.7 and rnd < 0.8
	),
	t09 as (
	select id, features from test_rnd
	where rnd >= 0.8 and rnd < 0.9
	),
	t10 as (
	select id, features from test_rnd
	where rnd >= 0.9
	),
	s01 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t01 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s02 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t02 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s03 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t03 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s04 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t04 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s05 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t05 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s06 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t06 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s07 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t07 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s08 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t08 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s09 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t09 t2
	LEFT OUTER JOIN train_hivemall t1
	),
	s10 as (
	SELECT
	each_top_k(
	${k}, t2.id, angular_similarity(t2.features, t1.features),
	t2.id,
	t1.id,
	t1.y
	) as (rank, similarity, base_id, neighbor_id, y)
	FROM
	t10 t2
	LEFT OUTER JOIN train_hivemall t1
	)
	select * from s01
	union all
	select * from s02
	union all
	select * from s03
	union all
	select * from s04
	union all
	select * from s05
	union all
	select * from s06
	union all
	select * from s07
	union all
	select * from s08
	union all
	select * from s09
	union all
	select * from s10
	;