Skip to content

Instantly share code, notes, and snippets.

@gvyshnya
Created July 11, 2020 13:37
Show Gist options
  • Save gvyshnya/513080f611491b8baa08cc1bf6987144 to your computer and use it in GitHub Desktop.
Save gvyshnya/513080f611491b8baa08cc1bf6987144 to your computer and use it in GitHub Desktop.
from sklearn.base import clone
def drop_col_feat_imp(model, X_train, y_train, random_state = 42):
# clone the model to have the exact same specification as the one initially trained
model_clone = clone(model)
# set random_state for comparability
model_clone.random_state = random_state
# training and scoring the benchmark model
model_clone.fit(X_train, y_train)
benchmark_score = model_clone.score(X_train, y_train)
# list for storing feature importances
importances = []
# iterating over all columns and storing feature importance (difference between benchmark and new model)
for col in X_train.columns:
model_clone = clone(model)
model_clone.random_state = random_state
model_clone.fit(X_train.drop(col, axis = 1), y_train)
drop_col_score = model_clone.score(X_train.drop(col, axis = 1), y_train)
importances.append( round( (benchmark_score - drop_col_score)/benchmark_score, 4) )
importances_df = pd.DataFrame(X_train.columns, columns=['Feature'])
importances_df['drop_col_importance'] = importances
return importances_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment