library(xgboost)
library(dplyr)
params = list(min_child_weight = 0.00001, lambda = 0 )
nrounds = 1
# sparse ---
# treat missing as NA_real
dat = data_frame(
y = c(0, 1, 1),
x = c(NA_real_, 0, 1)
)
options(na.action = na.pass)
model_matrix = Matrix::sparse.model.matrix(data = dat, y ~ x)
dat_xgb = xgb.DMatrix(model_matrix, label = dat$y, missing = NA_real_)
set.seed(12345)
xgb_mdl = xgb.train(params = params, data = dat_xgb, nrounds = nrounds, objective = "binary:logistic")
pred_leaf = predict(xgb_mdl, newdata = dat_xgb, predleaf = TRUE)
print(pred_leaf)
#> [,1]
#> [1,] 1
#> [2,] 1
#> [3,] 2
# treat missing as zero
dat = data_frame(
y = c(0, 1, 1),
x = c(0, 0, 1)
)
options(na.action = na.pass)
model_matrix = Matrix::sparse.model.matrix(data = dat, y ~ x)
dat_xgb = xgb.DMatrix(model_matrix, label = dat$y, missing = NA_real_)
set.seed(12345)
xgb_mdl = xgb.train(params = params, data = dat_xgb, nrounds = nrounds, objective = "binary:logistic")
pred_leaf = predict(xgb_mdl, newdata = dat_xgb, predleaf = TRUE)
print(pred_leaf)
#> [,1]
#> [1,] 1
#> [2,] 1
#> [3,] 2
# dense ---
# treat missing as NA_real
dat = data_frame(
y = c(0, 1, 1),
x = c(NA_real_, 0, 1)
)
options(na.action = na.pass)
model_matrix = model.matrix(data = dat, y ~ x)
dat_xgb = xgb.DMatrix(model_matrix, label = dat$y, missing = NA_real_)
set.seed(12345)
xgb_mdl = xgb.train(params = params, data = dat_xgb, nrounds = nrounds, objective = "binary:logistic")
pred_leaf = predict(xgb_mdl, newdata = dat_xgb, predleaf = TRUE)
print(pred_leaf)
#> [,1]
#> [1,] 2
#> [2,] 1
#> [3,] 1
# treat missing as zero
dat = data_frame(
y = c(0, 1, 1),
x = c(0, 0, 1)
)
options(na.action = na.pass)
model_matrix = model.matrix(data = dat, y ~ x)
dat_xgb = xgb.DMatrix(model_matrix, label = dat$y, missing = NA_real_)
set.seed(12345)
xgb_mdl = xgb.train(params = params, data = dat_xgb, nrounds = nrounds, objective = "binary:logistic")
pred_leaf = predict(xgb_mdl, newdata = dat_xgb, predleaf = TRUE)
print(pred_leaf)
#> [,1]
#> [1,] 1
#> [2,] 1
#> [3,] 2
Last active
October 11, 2018 13:42
-
-
Save jtilly/9568a2a01863caea7d9f6cf67acf087b to your computer and use it in GitHub Desktop.
Treatment of missing values with and without sparse matrices
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment