-
-
Save brentp/5355925 to your computer and use it in GitHub Desktop.
from sklearn import linear_model | |
from scipy import stats | |
import numpy as np | |
class LinearRegression(linear_model.LinearRegression): | |
""" | |
LinearRegression class after sklearn's, but calculate t-statistics | |
and p-values for model coefficients (betas). | |
Additional attributes available after .fit() | |
are `t` and `p` which are of the shape (y.shape[1], X.shape[1]) | |
which is (n_features, n_coefs) | |
This class sets the intercept to 0 by default, since usually we include it | |
in X. | |
""" | |
def __init__(self, *args, **kwargs): | |
if not "fit_intercept" in kwargs: | |
kwargs['fit_intercept'] = False | |
super(LinearRegression, self)\ | |
.__init__(*args, **kwargs) | |
def fit(self, X, y, n_jobs=1): | |
self = super(LinearRegression, self).fit(X, y, n_jobs) | |
sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1]) | |
se = np.array([ | |
np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X)))) | |
for i in range(sse.shape[0]) | |
]) | |
self.t = self.coef_ / se | |
self.p = 2 * (1 - stats.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1])) | |
return self |
it seems this code doesn't work, neighter original, nor with __init__
method from above
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167):
Port = df2.iloc[range(c-36,c),3]
Var_x = df2.iloc[range(c-36,c),[0,4,5]]
y = Port.to_numpy()
x = Var_x.to_numpy().reshape(-1,3)
model = LinearRegression()
model.fit(x, y)
model = LinearRegression().fit(x, y)
r_sq = model.score(x, y)
print(model.coef_)
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167): Port = df2.iloc[range(c-36,c),3] Var_x = df2.iloc[range(c-36,c),[0,4,5]] y = Port.to_numpy() x = Var_x.to_numpy().reshape(-1,3) model = LinearRegression() model.fit(x, y) model = LinearRegression().fit(x, y) r_sq = model.score(x, y) print(model.coef_)
You can try the following:
from sklearn.feature_selection import f_regression
f_statistic, p_value = f_regression(X, y)
maybe you can change the init method into