Created
October 9, 2015 04:03
-
-
Save actsasflinn/7da7e8cada227fa6f960 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table cars_rowid as | |
select | |
regexp_replace(reflect('java.util.UUID','randomUUID'), '-', '') as rowid, | |
* | |
from flinn.cars; | |
create or replace view cars_train as | |
select | |
rowid, | |
mhash(concat("1:", buying)) as buying, | |
mhash(concat("2:", maintenance)) as maintenance, | |
mhash(concat("3:", doors)) as doors, | |
mhash(concat("4:", persons)) as persons, | |
mhash(concat("5:", lug_boot)) as lug_boot, | |
mhash(concat("6:", safety)) as safety, | |
rating, | |
-1 as bias | |
from cars_rowid; | |
create or replace view cars_testing as | |
select | |
t.rowid, | |
t2.label, | |
array(buying, maintenance, doors, persons, lug_boot, safety, bias) as features | |
from ( | |
select * from cars_train | |
) t | |
left join ( | |
select | |
rowid, | |
CAST(IF(rating != "unacc", 1.0, 0.0) AS float) AS label | |
from cars_rowid | |
) t2 on t.rowid = t2.rowid; | |
SELECT | |
CAST(feature as int) as feature, | |
CAST(AVG(weight) as float) as weight | |
FROM ( | |
SELECT | |
logress(features, label,'-total_steps 5') as (feature, weight) | |
FROM | |
cars_testing | |
) t | |
GROUP BY feature | |
SELECT | |
logress(features, label,'-total_steps 5') as (feature, weight) | |
FROM | |
cars_testing | |
drop table testing_exploded; | |
create table testing_exploded as | |
select | |
rowid, | |
feature | |
from | |
cars_testing | |
LATERAL VIEW explode(features) t AS feature; | |
set hivevar:total_steps=5000000; | |
drop table lr_model; | |
create table lr_model | |
as | |
select | |
feature, | |
cast(avg(weight) as float) as weight | |
from | |
(select | |
logress(features, label, "-total_steps ${total_steps}") as (feature,weight) | |
from | |
cars_testing | |
) t | |
group by feature; | |
drop table lr_predict; | |
create table lr_predict | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY "\t" | |
LINES TERMINATED BY "\n" | |
STORED AS TEXTFILE | |
as | |
select | |
t.rowid, | |
sigmoid(sum(m.weight)) as prob | |
from | |
testing_exploded t LEFT OUTER JOIN | |
lr_model m ON (t.feature = m.feature) | |
group by | |
t.rowid | |
order by | |
rowid ASC; | |
drop table results; | |
create table results as select cars_rowid.*, lr_predict.prob from lr_predict left join cars_rowid on lr_predict.rowid = cars_rowid.rowid; | |
set hivevar:xtimes=3; | |
create or replace view training_x3 | |
as | |
select | |
* | |
from ( | |
select | |
amplify(${xtimes}, *) as (rowid, label, features) | |
from | |
cars_testing | |
) t | |
CLUSTER BY rand(); | |
create table lr_model_x3 | |
as | |
select | |
feature, | |
cast(avg(weight) as float) as weight | |
from | |
(select | |
logress(features,label) as (feature,weight) | |
from | |
training_x3 | |
) t | |
group by feature; | |
drop table lr_predict_x3; | |
create table lr_predict_x3 | |
ROW FORMAT DELIMITED | |
FIELDS TERMINATED BY "\t" | |
LINES TERMINATED BY "\n" | |
STORED AS TEXTFILE | |
as | |
select | |
t.rowid, | |
sigmoid(sum(m.weight)) as prob | |
from | |
testing_exploded t LEFT OUTER JOIN | |
lr_model_x3 m ON (t.feature = m.feature) | |
group by | |
t.rowid | |
order by | |
rowid ASC; | |
drop table results_x3; | |
create table results_x3 as select cars_rowid.*, lr_predict_x3.prob from lr_predict_x3 left join cars_rowid on lr_predict_x3.rowid = cars_rowid.rowid; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment