Skip to content

Instantly share code, notes, and snippets.

View myui's full-sized avatar

Makoto YUI myui

View GitHub Profile
select feature_hashing(array("userid#4505:3.3","movieid#2331:4.999", "movieid#2331"));

["1828616:3.3","6238429:4.999","6238429"]

SELECT
  features_hashing(
 array_concat(
@myui
myui / lda.sql
Last active April 29, 2017 20:36
-- fitting
select
label, word, avg(lambda) as lambda
from (
select
train_lda(feature, "-topic 2 -iter 20")
as (label, word, lambda)
from
data
) t1
@myui
myui / latlon.md
Last active April 29, 2017 20:08
WITH data as (
  select 25.7724247 as lat, -80.1854473 as lon, 10 as zoom
  union all
  select 25.7724247 as lat, -80.1854473 as lon, 15 as zoom
)
select 
   map_url(lat,lon,zoom) as osm_url,
   map_url(lat,lon,zoom,'-type googlemaps') as gmap_url,
 tile(lat,lon,zoom) as tile_number
use news20;
set hivemall.smile.nprocs=4;
drop table rf_model;
create table rf_model
as
select train_randomforest_classifier(features,convert_label(label),'-trees 50 -seed 71')
from train;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
--------------------
Hivemall
Hivemall is a library for machine learning implemented as Hive
UDFs/UDAFs/UDTFs.
Hivemall has been incubating since 2016-09-13.
Three most important issues to address in the move towards graduation:
create table page (
docid int,
contents string
);
INSERT OVERWRITE TABLE page_exploded
select
d.docid,
normalize_unicode(t.word) as word
from
WITH term_frequency as (
select
docid,
word,
freq
from (
select
docid,
tf(word) as word2freq
from