Created
August 1, 2018 15:39
-
-
Save noleto/88d39f92563d671c290f5b8aac66e1b2 to your computer and use it in GitHub Desktop.
LIME local prediction computed from weights of features
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import xgboost as xgb | |
from sklearn.model_selection import train_test_split | |
from sklearn.datasets import make_regression | |
import lime.lime_tabular | |
### make data | |
X, y = make_regression(n_samples=100, n_features=5, n_informative=3, random_state=0, noise=4.0, bias=10.0) | |
feature_names = ["x" + str(i+1) for i in range(0,5)] | |
data = pd.DataFrame(X, columns=feature_names) | |
data["target"] = y | |
X_train, X_test, y_train, y_test = train_test_split(data[feature_names], ## predictors only | |
data.target, | |
test_size=0.30, | |
random_state=0) | |
### create and fit model | |
estimator = xgb.XGBRegressor() | |
estimator.fit(X_train, y_train) | |
## receive data as numpy array which has no column names, so we fix it | |
def xgb_predict(data_asarray): | |
data_asframe = pd.DataFrame(data_asarray, columns=feature_names) | |
return estimator.predict(data_asframe) | |
instance = X_test.iloc[2] | |
exp = lime_tabular_explainer.explain_instance(instance, xgb_predict, num_features=5) | |
## instance should be scaled as coefs are create from a scaled data set, see https://github.com/marcotcr/lime/issues/189 | |
scaled_instance = (instance - lime_tabular_explainer.scaler.mean_) / lime_tabular_explainer.scaler.scale_ | |
local_exp = exp.as_map()[1] | |
coefs = [x[1] for x in local_exp] | |
ids = [x[0] for x in local_exp] | |
print("Manual calculation of local pred:", sum(coefs * scaled_instance[ids]) + exp.intercept[1]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment