Last active
June 19, 2024 19:16
-
-
Save edgararuiz/7392387c9ba65bf8341d2beb67f28ac0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Connect to Spark and create table pointer | |
library(sparklyr) | |
library(dplyr) | |
sc <- spark_connect(method = "databricks_connect") | |
lendingclub_dat <- tbl(sc, dbplyr::in_catalog("hive_metastore", "default", "lendingclub")) | |
# Prepare the data, replicating what was done in the first article | |
lendingclub_prep <- lendingclub_dat |> | |
select(term, bc_util, bc_open_to_buy, all_util) |> | |
mutate(term = trimws(substr(term, 1,4))) |> | |
mutate(across(everything(), as.numeric)) |> | |
filter(!if_any(everything(), is.na)) | |
# Show how Spark splits the row count into multiple discrite jobs | |
lendingclub_prep |> | |
spark_apply(nrow) |> | |
collect() | |
# Create the function that pulls the vetiver model and runs predictions | |
predict_vetiver <- function(x) { | |
library(workflows) | |
board <- pins::board_connect( | |
auth = "manual", | |
server = "https://pub.demo.posit.team/", | |
key = "[YOUR CONNECT KEY]" | |
) | |
model <- vetiver::vetiver_pin_read(board, "[email protected]/lending_club_model") | |
preds <- predict(model, x) | |
x$pred <- preds[,1][[1]] | |
x[x$pred >= 20, ] | |
} | |
# Test locally to make sure the function returns what's expected | |
# In this case, knowing that all_util drives a lot of the prediction | |
# we filtered for values over 130 | |
lendingclub_local <- lendingclub_prep |> | |
filter(all_util >= 130) |> | |
head(50) |> | |
collect() | |
predict_vetiver(lendingclub_local) | |
# Executes the prediction function over the entire dataset | |
lendingclub_prep |> | |
spark_apply( | |
f = predict_vetiver, | |
columns = "term double, bc_util double, bc_open_to_buy double, all_util double, pred double" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment