Last active
March 15, 2016 04:37
-
-
Save myui/0dd150c42eb9466e19c1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH train_ordered as ( | |
select * from training2 | |
order by rowid asc | |
), | |
train_quantified as ( | |
select | |
t0.rowid, | |
t0.sales, | |
t2.*, | |
t0.competitiondistance | |
from | |
train_ordered t0 | |
-- indexing non-number columns | |
LATERAL VIEW quantify(true, | |
stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day) t2 | |
as stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day | |
) | |
INSERT OVERWRITE TABLE training3 | |
SELECT | |
t1.rowid, | |
ARRAY( -- padding zero for missing values | |
t1.stateholiday, t1.store, t1.promo, t1.dayofweek, t1.schoolholiday, | |
IF(t1.promo2sinceweek IS NULL, 0, t1.promo2sinceweek), | |
IF(t1.competitionopensinceyear IS NULL, 0, t1.competitionopensinceyear), | |
t1.assortment, | |
IF(t1.promo2sinceyear IS NULL, 0, t1.promo2sinceyear), | |
IF(t1.promointerval IS NULL, 0, t1.promointerval), | |
t1.promo2, t1.storetype, | |
IF(t1.competitionopensincemonth IS NULL, 0, t1.competitionopensincemonth), | |
t1.year, t1.month, t1.day, | |
IF(t1.competitiondistance IS NULL, 0, t1.competitiondistance) | |
) AS features, | |
LN(1 + t1.sales) AS label, -- log scale conversion | |
t1.sales | |
FROM | |
train_quantified t1 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment