joaopcnogueira · December 18, 2023 14:58
diff --git a/backward_elimination.py b/backward_elimination.py
 import numpy as np
 import statsmodels.formula.api as sm
 def backward_elimination(X, y, sl):
    """
    X: the data matrix with the independent variables (predictors)
    y: the matrix of the dependent variable (target)
    sl: statistical level, by default the user should add 0.05 (5%)
    """
    X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
    while(True):
        regressor_OLS = sm.OLS(y,X).fit()
        ind = np.argmax(regressor_OLS.pvalues)
        maxPvalue = regressor_OLS.pvalues[ind]
        if maxPvalue > sl:
            X = np.delete(X, ind, axis=1)
        else:
            print(regressor_OLS.summary())
            X = np.delete(X, 0, axis=1)
            return X

 # USAGE
 # Suppose one have a matrix of features X and want to use
 # multiple linear regression to predict values of a matrix y.
 # In order to select the best features, he might use the
 # backward elimination method and save the selected features
 # in a new matrix called X_opt:
 #   sl = 0.05
 #   X_opt = backward_elimination(X, sl)
	import numpy as np
	import statsmodels.formula.api as sm
	def backward_elimination(X, y, sl):
	"""
	X: the data matrix with the independent variables (predictors)
	y: the matrix of the dependent variable (target)
	sl: statistical level, by default the user should add 0.05 (5%)
	"""
	X = np.append(arr=np.ones((len(X),1)).astype(int), values=X, axis=1)
	while(True):
	regressor_OLS = sm.OLS(y,X).fit()
	ind = np.argmax(regressor_OLS.pvalues)
	maxPvalue = regressor_OLS.pvalues[ind]
	if maxPvalue > sl:
	X = np.delete(X, ind, axis=1)
	else:
	print(regressor_OLS.summary())
	X = np.delete(X, 0, axis=1)
	return X

	# USAGE
	# Suppose one have a matrix of features X and want to use
	# multiple linear regression to predict values of a matrix y.
	# In order to select the best features, he might use the
	# backward elimination method and save the selected features
	# in a new matrix called X_opt:
	# sl = 0.05
	# X_opt = backward_elimination(X, sl)