Skip to content

Instantly share code, notes, and snippets.

@WalkerHarrison
Created December 15, 2017 03:07
Show Gist options
  • Save WalkerHarrison/8ad3585d1e45dcd7e472c13d0974b548 to your computer and use it in GitHub Desktop.
Save WalkerHarrison/8ad3585d1e45dcd7e472c13d0974b548 to your computer and use it in GitHub Desktop.
library(xgboost)
library(BayesTree)
library(mice)
clean_data_impute = function(df) {
preds_remove <- c("sale", "author", "price", "authorstyle",
"count", "Surface_Rect", "Surface_Rnd",
"diff_origin", "singlefig", "lot")
preds_num <- c("position", "year", "logprice", "Height_in",
"Width_in", "Diam_in", "Surface", "nfigures")
preds_keep <- setdiff(names(df), preds_remove)
preds_factors <- setdiff(preds_keep, preds_num)
cleaned_df <- df %>%
mutate(nfigures = nfigures + singlefig,
Shape = dplyr::recode(Shape, "ovale" = "oval",
"ronde" = "round"),
type_intermed = replace(type_intermed,
Interm == 0, "N"),
material = replace(material, material %in%
c("huile", "huile sur papier", "pastel", "rond"), "other"),
material = replace(material, material %in%
c("octogone", "tableau", "tableaux pendants"), "canvas"),
Surface = replace(Surface, Surface == 0, NA),
logSurface = log(Surface),
Width_in = pmax(Width_in, Diam_in, na.rm = T),
Height_in = pmax(Height_in, Diam_in, na.rm = T)) %>%
mutate_all(funs(replace(., . == "n/a", NA))) %>%
mutate_all(funs(replace(., . == "", NA))) %>%
mutate_at(preds_factors, funs(replace(., is.na(.), "NA"))) %>%
mutate_at(preds_factors, factor) %>%
select(preds_keep, logSurface)
impute_cols = c("Height_in", "Width_in", "Surface")
imputed = mice(cleaned_df[,impute_cols], method = "pmm", maxit = 50, seed = 1)
imputed = complete(imputed)
cleaned_df[,impute_cols] = imputed
cleaned_df$logSurface = log(cleaned_df$Surface)
return(cleaned_df)
}
#### xgboost ######
train = paint_train %>% select(-logprice) %>% data.matrix()
train.y = paint_train %>% select(logprice) %>% pull()
data.new <- paint_test %>% select(-logprice) %>% data.matrix()
xgb.fit <- xgboost(data = dtrain,
label = train.y,
objective = "reg:linear",
eval_metric = "rmse",
max.depth = 10,
eta = 0.3,
nround = 100,
verbose =0)
xgb.pred <- exp(predict(xgb.fit, newdata = data.new))
###### BART ####
paint_train <- clean_data_impute(paintings_train) %>% distinct()
paint_test <- clean_data_impute(paintings_test)
train <- paint_train %>% select(dealer, year, Interm, origin_cat, endbuyer,
engraved, prevcoll, finished, lrgfont, discauth, logSurface,
winningbiddertype, position)
train.y <- paint_train$logprice
test <- paint_test %>% select(dealer, year, Interm, origin_cat, endbuyer,
engraved, prevcoll, finished, lrgfont, discauth, logSurface,
winningbiddertype, position)
lmbart <- bart(x.train=train,
y.train=train.y,
x.test=test)
bart.pred <- colMeans(lmbart$yhat.test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment