jonlachmann · November 30, 2022 10:03
diff --git a/var_shapr.R b/var_shapr.R
 # Generate mock data (two variable which go from 1-100 and 101-200 with some random noise).
 data <- matrix(rnorm(200, 1:200, 0.5), 100, 2)

 K <- ncol(data) # Number of variables in model
 horizon <- 12 # Forecast horizon
 p <- 2 # Lag order of model

 # Create the lagged matrix of data X
 X <- embed(data, p+1)[,-seq_len(K)]
 X <- cbind(X, 1)
 colnames(X) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

 # Create the matrix for the dependent variable Y
 Y <- tail(data, -p)
 colnames(Y) <- c("x1", "x2")

 # Estimate a basic VAR model
 coefs <- qr.solve(X, Y)

 #' Basic predict function for a VAR model.
 #' @param model The model to use, should contain coefficients.
 #' @param X The vector of data to use to start the prediction from, i.e. the last observation of the lagged data.
 #' @param h The forecast horizon.
 #' @return A matrix where each row contains subsequent steps ahead and the columns are the different variables.
 pred_var <- function (model, X, h=1) {
  res <- matrix(NA, h, model$k)
  for (i in seq_len(h)) {
    pred <- as.numeric(X) %*% model$coefs
    res[i, ] <- pred
    X <- c(pred, X[c(1, 2, 5)])
  }
  return(res)
 }

 #' Predict a specific variable from a VAR model using multiple X vectors.
 #' @param model The model to use for predictions.
 #' Should have a property "step" to select forecast horizon and a property "variable" to select which variable to forecast.
 #' @param newdata A data.table where each row contains an X vector to use for prediction.
 #' @return A vector where each scalar is a prediction produced from a specific X vector.
 pred_specific_var <- function (model, newdata) {
  step <- model$step
  variable <- model$variable
  res <- as.data.frame(matrix(NA, nrow(newdata), step))
  for (i in seq_len(nrow(newdata))) {
    res[i, ] <- pred_var(model, as.numeric(newdata[i, ]), step)[, variable]
  }
  return(res)
 }

 # Assemble the "model" object
 model <- list(coefs=coefs, k=K, step=horizon, variable=1)
 class(model) <- "var"

 # Create a matrix of the last observation seen before the prediction as the X we want to explain.
 x_explain <- matrix(c(Y[nrow(Y), ], X[nrow(X), c(1, 2, 5)]), nrow=1)
 colnames(x_explain) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

 # Group X by variable.
 groups <- list(x1=c("x1_1", "x1_2"), x2=c("x2_1", "x2_2"), intercept="intercept")

 # Load the shapr package on the branch output_size (you should be inside the shapr folder when running this).
 devtools::load_all()

 # Create the explanation, note that prediction zero is of length "output_size" here, but not necessarily containing a repeated value.
 explanation <- explain(
  model = model,
  x_explain = x_explain,
  x_train = X,
  approach = "empirical",
  prediction_zero = rep(Y[98,1], 12),
  predict_model = pred_specific_var,
  group = groups,
  output_size = 12,
  parallel = FALSE
 )

 # Print the explanation (plotting does not work yet).
 print(explanation)
	# Generate mock data (two variable which go from 1-100 and 101-200 with some random noise).
	data <- matrix(rnorm(200, 1:200, 0.5), 100, 2)

	K <- ncol(data) # Number of variables in model
	horizon <- 12 # Forecast horizon
	p <- 2 # Lag order of model

	# Create the lagged matrix of data X
	X <- embed(data, p+1)[,-seq_len(K)]
	X <- cbind(X, 1)
	colnames(X) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

	# Create the matrix for the dependent variable Y
	Y <- tail(data, -p)
	colnames(Y) <- c("x1", "x2")

	# Estimate a basic VAR model
	coefs <- qr.solve(X, Y)

	#' Basic predict function for a VAR model.
	#' @param model The model to use, should contain coefficients.
	#' @param X The vector of data to use to start the prediction from, i.e. the last observation of the lagged data.
	#' @param h The forecast horizon.
	#' @return A matrix where each row contains subsequent steps ahead and the columns are the different variables.
	pred_var <- function (model, X, h=1) {
	res <- matrix(NA, h, model$k)
	for (i in seq_len(h)) {
	pred <- as.numeric(X) %*% model$coefs
	res[i, ] <- pred
	X <- c(pred, X[c(1, 2, 5)])
	}
	return(res)
	}

	#' Predict a specific variable from a VAR model using multiple X vectors.
	#' @param model The model to use for predictions.
	#' Should have a property "step" to select forecast horizon and a property "variable" to select which variable to forecast.
	#' @param newdata A data.table where each row contains an X vector to use for prediction.
	#' @return A vector where each scalar is a prediction produced from a specific X vector.
	pred_specific_var <- function (model, newdata) {
	step <- model$step
	variable <- model$variable
	res <- as.data.frame(matrix(NA, nrow(newdata), step))
	for (i in seq_len(nrow(newdata))) {
	res[i, ] <- pred_var(model, as.numeric(newdata[i, ]), step)[, variable]
	}
	return(res)
	}

	# Assemble the "model" object
	model <- list(coefs=coefs, k=K, step=horizon, variable=1)
	class(model) <- "var"

	# Create a matrix of the last observation seen before the prediction as the X we want to explain.
	x_explain <- matrix(c(Y[nrow(Y), ], X[nrow(X), c(1, 2, 5)]), nrow=1)
	colnames(x_explain) <- c("x1_1", "x1_2", "x2_1", "x2_2", "intercept")

	# Group X by variable.
	groups <- list(x1=c("x1_1", "x1_2"), x2=c("x2_1", "x2_2"), intercept="intercept")

	# Load the shapr package on the branch output_size (you should be inside the shapr folder when running this).
	devtools::load_all()

	# Create the explanation, note that prediction zero is of length "output_size" here, but not necessarily containing a repeated value.
	explanation <- explain(
	model = model,
	x_explain = x_explain,
	x_train = X,
	approach = "empirical",
	prediction_zero = rep(Y[98,1], 12),
	predict_model = pred_specific_var,
	group = groups,
	output_size = 12,
	parallel = FALSE
	)

	# Print the explanation (plotting does not work yet).
	print(explanation)