Skip to content

Instantly share code, notes, and snippets.

@davideanastasia
Last active October 11, 2018 21:01
Show Gist options
  • Save davideanastasia/fb3617793511aead30f6b5650d29911a to your computer and use it in GitHub Desktop.
Save davideanastasia/fb3617793511aead30f6b5650d29911a to your computer and use it in GitHub Desktop.
CREATE MODEL `kaggle_talkingdata_adtracking.talkingdata_logreg_0001`
OPTIONS (
model_type='logistic_reg',
input_label_cols=['is_attributed'],
data_split_method='seq',
data_split_col='click_time'
) AS
SELECT CAST(ip AS STRING) as ip,
CAST(app AS STRING) as app,
CAST(device AS STRING) as device,
CAST(os AS STRING) as os,
CAST(channel AS STRING) as channel,
CAST(EXTRACT(hour from click_time) AS STRING) as hod,
CONCAT('app_', CAST(app AS STRING), '_device_', CAST(device AS STRING)) app_x_device,
CONCAT('os_', CAST(os AS STRING), '_channel_', CAST(channel AS STRING)) os_x_channel,
click_time,
is_attributed
FROM `kaggle_talkingdata_adtracking.dataset_train` ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment