Created
November 30, 2022 10:03
-
-
Save jonlachmann/47bcfc4b1a072debd7278547992cd1a6 to your computer and use it in GitHub Desktop.
Explain a VAR model with shapr, using the branch output_size from my fork.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generate mock data (two variable which go from 1-100 and 101-200 with some random noise). | |
data <- matrix(rnorm(200, 1:200, 0.5), 100, 2) | |
K <- ncol(data) # Number of variables in model | |
horizon <- 12 # Forecast horizon | |
p <- 2 # Lag order of model | |
# Create the lagged matrix of data X | |
X <- embed(data, p+1)[,-seq_len(K)] | |
X <- cbind(X, 1) | |
colnames(X) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept") | |
# Create the matrix for the dependent variable Y | |
Y <- tail(data, -p) | |
colnames(Y) <- c("x1", "x2") | |
# Estimate a basic VAR model | |
coefs <- qr.solve(X, Y) | |
#' Basic predict function for a VAR model. | |
#' @param model The model to use, should contain coefficients. | |
#' @param X The vector of data to use to start the prediction from, i.e. the last observation of the lagged data. | |
#' @param h The forecast horizon. | |
#' @return A matrix where each row contains subsequent steps ahead and the columns are the different variables. | |
pred_var <- function (model, X, h=1) { | |
res <- matrix(NA, h, model$k) | |
for (i in seq_len(h)) { | |
pred <- as.numeric(X) %*% model$coefs | |
res[i, ] <- pred | |
X <- c(pred, X[c(1, 2, 5)]) | |
} | |
return(res) | |
} | |
#' Predict a specific variable from a VAR model using multiple X vectors. | |
#' @param model The model to use for predictions. | |
#' Should have a property "step" to select forecast horizon and a property "variable" to select which variable to forecast. | |
#' @param newdata A data.table where each row contains an X vector to use for prediction. | |
#' @return A vector where each scalar is a prediction produced from a specific X vector. | |
pred_specific_var <- function (model, newdata) { | |
step <- model$step | |
variable <- model$variable | |
res <- as.data.frame(matrix(NA, nrow(newdata), step)) | |
for (i in seq_len(nrow(newdata))) { | |
res[i, ] <- pred_var(model, as.numeric(newdata[i, ]), step)[, variable] | |
} | |
return(res) | |
} | |
# Assemble the "model" object | |
model <- list(coefs=coefs, k=K, step=horizon, variable=1) | |
class(model) <- "var" | |
# Create a matrix of the last observation seen before the prediction as the X we want to explain. | |
x_explain <- matrix(c(Y[nrow(Y), ], X[nrow(X), c(1, 2, 5)]), nrow=1) | |
colnames(x_explain) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept") | |
# Group X by variable. | |
groups <- list(x1=c("x1_1", "x1_2"), x2=c("x2_1", "x2_2"), intercept="intercept") | |
# Load the shapr package on the branch output_size (you should be inside the shapr folder when running this). | |
devtools::load_all() | |
# Create the explanation, note that prediction zero is of length "output_size" here, but not necessarily containing a repeated value. | |
explanation <- explain( | |
model = model, | |
x_explain = x_explain, | |
x_train = X, | |
approach = "empirical", | |
prediction_zero = rep(Y[98,1], 12), | |
predict_model = pred_specific_var, | |
group = groups, | |
output_size = 12, | |
parallel = FALSE | |
) | |
# Print the explanation (plotting does not work yet). | |
print(explanation) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment