Created
December 2, 2021 19:49
-
-
Save cnmoro/11eea791ed019d46968e11532d42dafa to your computer and use it in GitHub Desktop.
shap_feature_importance.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import shap | |
import numpy as np | |
import pandas as pd | |
categoric_features = tuple(['FEATURE1', 'FEATURE2', 'ETC']) | |
def avaliar_importancias_features(modelo_treinado, X): | |
explainer = shap.Explainer(modelo_treinado) | |
shap_values = explainer.shap_values(X) | |
vals = np.abs(shap_values).mean(0) | |
feature_importance = pd.DataFrame(list(zip(X.columns, sum(vals))), columns=['Feature', 'Peso']) | |
feature_importance.sort_values(by=['Peso'], ascending=False,inplace=True) | |
feat_imp_dict = feature_importance.to_dict(orient='records') | |
feat_imp_dict_final = {} | |
# Somar importâncias de features categóricas | |
# Utilizado pois o oneHotEncoding feito pelo pd.get_dummies | |
# Adiciona um underline ao nome de cada feature categórica, seguido do nome da categoria | |
for fi in feat_imp_dict: | |
if fi['Feature'].startswith(categoric_features): | |
real_feature_name = fi['Feature'][:fi['Feature'].rfind('_')] | |
if real_feature_name in feat_imp_dict_final: | |
feat_imp_dict_final[real_feature_name] = feat_imp_dict_final[real_feature_name] + fi['Peso'] | |
else: | |
feat_imp_dict_final[real_feature_name] = fi['Peso'] | |
else: | |
feat_imp_dict_final[fi['Feature']] = fi['Peso'] | |
# Retorna os mais importantes primeiro | |
return dict(sorted(feat_imp_dict_final.items(), key=lambda item: item[1], reverse=True)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment