Last active
June 29, 2016 13:30
-
-
Save myui/8123042 to your computer and use it in GitHub Desktop.
Hive/Hivemallを利用した広告クリックスルー率(CTR)の推定 ref: http://qiita.com/myui/items/f726ca3dcc48410abe45
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create or replace view training2 as | |
select | |
rowid, | |
clicks, | |
(impression - clicks) as noclick, | |
mhash(concat("1_", displayurl)) as displayurl, | |
mhash(concat("2_", adid)) as adid, | |
... | |
-1 as bias | |
from ( | |
... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
INSERT OVERWRITE TABLE training_rcfile | |
select transform(*) | |
ROW FORMAT DELIMITED .. | |
using 'gawk -f kddconv.awk' | |
as (rowid BIGINT, label FLOAT, features ARRAY<INT>) | |
ROW FORMAT DELIMITED .. | |
from training2 | |
CLUSTER BY CAST(rand(47) * 100 as INT), CAST(rand(49) * 100 as INT), CAST(rand(50) * 100 as INT); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rowid=$1; | |
positive=$2; | |
negative=$3; | |
features=$4; | |
for(i=5;i<=NF;i++) | |
{ | |
features = features "," $i; | |
} | |
for(i=0;i<positive;i++) | |
{ | |
print rowid "\t1.0\t" features | |
} | |
for(i=0;i<negative;i++) | |
{ | |
print rowid "\t0.0\t" features | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set hivevar:total_steps=5000000; | |
create table lr_model | |
as | |
select | |
feature, | |
avg(weight) as weight -- バギング | |
from | |
(select | |
logress(features,label, "-total_steps ${total_steps}") as (feature,weight) | |
from | |
training_rcfile | |
) t -- map-onlyの弱学習器が複数実行される | |
group by feature -- featureの値ごとにreducerにshuffleされる |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table testing_exploded as | |
select | |
rowid, | |
feature | |
from | |
testing2 | |
LATERAL VIEW explode(features) t AS feature |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
t.rowid, | |
sigmoid(sum(m.weight)) as prob | |
FROM | |
testing_exploded t LEFT OUTER JOIN | |
model m ON (t.feature = m.feature) | |
GROUP BY | |
t.rowid |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment