-
-
Save brentp/5355925 to your computer and use it in GitHub Desktop.
| from sklearn import linear_model | |
| from scipy import stats | |
| import numpy as np | |
| class LinearRegression(linear_model.LinearRegression): | |
| """ | |
| LinearRegression class after sklearn's, but calculate t-statistics | |
| and p-values for model coefficients (betas). | |
| Additional attributes available after .fit() | |
| are `t` and `p` which are of the shape (y.shape[1], X.shape[1]) | |
| which is (n_features, n_coefs) | |
| This class sets the intercept to 0 by default, since usually we include it | |
| in X. | |
| """ | |
| def __init__(self, *args, **kwargs): | |
| if not "fit_intercept" in kwargs: | |
| kwargs['fit_intercept'] = False | |
| super(LinearRegression, self)\ | |
| .__init__(*args, **kwargs) | |
| def fit(self, X, y, n_jobs=1): | |
| self = super(LinearRegression, self).fit(X, y, n_jobs) | |
| sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1]) | |
| se = np.array([ | |
| np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X)))) | |
| for i in range(sse.shape[0]) | |
| ]) | |
| self.t = self.coef_ / se | |
| self.p = 2 * (1 - stats.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1])) | |
| return self |
it seems this code doesn't work, neighter original, nor with __init__method from above
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167):
Port = df2.iloc[range(c-36,c),3]
Var_x = df2.iloc[range(c-36,c),[0,4,5]]
y = Port.to_numpy()
x = Var_x.to_numpy().reshape(-1,3)
model = LinearRegression()
model.fit(x, y)
model = LinearRegression().fit(x, y)
r_sq = model.score(x, y)
print(model.coef_)
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167): Port = df2.iloc[range(c-36,c),3] Var_x = df2.iloc[range(c-36,c),[0,4,5]] y = Port.to_numpy() x = Var_x.to_numpy().reshape(-1,3) model = LinearRegression() model.fit(x, y) model = LinearRegression().fit(x, y) r_sq = model.score(x, y) print(model.coef_)
You can try the following:
from sklearn.feature_selection import f_regression
f_statistic, p_value = f_regression(X, y)
maybe you can change the init method into