Created
July 30, 2021 17:27
-
-
Save GastonMazzei/1f22c7af7cdb8daec9d5141b19551417 to your computer and use it in GitHub Desktop.
Importancia Relativa usando el algoritmo "Gradient Boosting Classifier" de Scikit-Learn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as plt | |
from sklearn.ensemble import GradientBoostingClassifier | |
# Definimos la semilla (SEED) y la cantidad de filas (N) | |
SEED=1234 | |
N = 2500 | |
np.random.seed(SEED) | |
# Definimos un vector de ruido | |
X_noise = np.random.rand(N) | |
# Definimos un vector con un periodo de un seno con frecuencia "100" | |
X_sine = np.sin(2*np.pi*np.linspace(0,1,N)*100) | |
# OPCIONAL: Deinimos una dummy variable por si quieren agregar algo | |
X_dummy = np.ones(N) | |
# Definimos un vector de entrada cuyas columnas son los tres anteriores | |
X = np.vstack([X_noise, X_sine, X_dummy]).T | |
# Definimos la clase a partir de una condicion de umbral | |
# sobre la columna senoidal | |
Y = [] | |
for i in range(N): | |
if X_sine[i] >= 0: | |
if True or X_dummy[i]!=1: # OPCIONAL: incorporar condicion sobre la dummy | |
Y += [1] | |
else: | |
Y += [0] | |
else: | |
Y += [0] | |
Y = np.asarray(Y) | |
# Entrenamos un "Gradient Boosting Classifier" con 100 estimadores | |
gb = GradientBoostingClassifier(n_estimators=100, random_state=SEED) | |
gb.fit(X, Y) | |
# Plotteamos la feature importance | |
plt.bar(range(X.shape[1]), gb.feature_importances_*100) | |
plt.xticks(range(X.shape[1]), ['ruido', 'sin(x)', 'dummy']) | |
plt.title('Importancia relativa\n segun Gradient Boosting\n') | |
plt.xlabel('columnas') | |
plt.ylabel('Importancia relativa (%)') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment