Skip to content

Instantly share code, notes, and snippets.

@myui
Last active March 15, 2016 04:37
Show Gist options
  • Save myui/0dd150c42eb9466e19c1 to your computer and use it in GitHub Desktop.
Save myui/0dd150c42eb9466e19c1 to your computer and use it in GitHub Desktop.
WITH train_ordered as (
select * from training2
order by rowid asc
),
train_quantified as (
select
t0.rowid,
t0.sales,
t2.*,
t0.competitiondistance
from
train_ordered t0
-- indexing non-number columns
LATERAL VIEW quantify(true,
stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day) t2
as stateholiday, store, promo, dayofweek, schoolholiday, promo2sinceweek, competitionopensinceyear, assortment, promo2sinceyear, promointerval, promo2, storetype, competitionopensincemonth, year, month, day
)
INSERT OVERWRITE TABLE training3
SELECT
t1.rowid,
ARRAY( -- padding zero for missing values
t1.stateholiday, t1.store, t1.promo, t1.dayofweek, t1.schoolholiday,
IF(t1.promo2sinceweek IS NULL, 0, t1.promo2sinceweek),
IF(t1.competitionopensinceyear IS NULL, 0, t1.competitionopensinceyear),
t1.assortment,
IF(t1.promo2sinceyear IS NULL, 0, t1.promo2sinceyear),
IF(t1.promointerval IS NULL, 0, t1.promointerval),
t1.promo2, t1.storetype,
IF(t1.competitionopensincemonth IS NULL, 0, t1.competitionopensincemonth),
t1.year, t1.month, t1.day,
IF(t1.competitiondistance IS NULL, 0, t1.competitiondistance)
) AS features,
LN(1 + t1.sales) AS label, -- log scale conversion
t1.sales
FROM
train_quantified t1
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment