Last active
December 18, 2023 14:58
-
-
Save joaopcnogueira/55248b80534ffb8d17d141dfa3f42b9b to your computer and use it in GitHub Desktop.
Feature selection by Backward Elimination using p-value
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import statsmodels.formula.api as sm | |
def backward_elimination(X, y, sl): | |
""" | |
X: the data matrix with the independent variables (predictors) | |
y: the matrix of the dependent variable (target) | |
sl: statistical level, by default the user should add 0.05 (5%) | |
""" | |
X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1) | |
while(True): | |
regressor_OLS = sm.OLS(y,X).fit() | |
ind = np.argmax(regressor_OLS.pvalues) | |
maxPvalue = regressor_OLS.pvalues[ind] | |
if maxPvalue > sl: | |
X = np.delete(X, ind, axis=1) | |
else: | |
print(regressor_OLS.summary()) | |
X = np.delete(X, 0, axis=1) | |
return X | |
# USAGE | |
# Suppose one have a matrix of features X and want to use | |
# multiple linear regression to predict values of a matrix y. | |
# In order to select the best features, he might use the | |
# backward elimination method and save the selected features | |
# in a new matrix called X_opt: | |
# sl = 0.05 | |
# X_opt = backward_elimination(X, sl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment