-
-
Save brentp/5355925 to your computer and use it in GitHub Desktop.
from sklearn import linear_model | |
from scipy import stats | |
import numpy as np | |
class LinearRegression(linear_model.LinearRegression): | |
""" | |
LinearRegression class after sklearn's, but calculate t-statistics | |
and p-values for model coefficients (betas). | |
Additional attributes available after .fit() | |
are `t` and `p` which are of the shape (y.shape[1], X.shape[1]) | |
which is (n_features, n_coefs) | |
This class sets the intercept to 0 by default, since usually we include it | |
in X. | |
""" | |
def __init__(self, *args, **kwargs): | |
if not "fit_intercept" in kwargs: | |
kwargs['fit_intercept'] = False | |
super(LinearRegression, self)\ | |
.__init__(*args, **kwargs) | |
def fit(self, X, y, n_jobs=1): | |
self = super(LinearRegression, self).fit(X, y, n_jobs) | |
sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1]) | |
se = np.array([ | |
np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X)))) | |
for i in range(sse.shape[0]) | |
]) | |
self.t = self.coef_ / se | |
self.p = 2 * (1 - stats.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1])) | |
return self |
Would this manipulation also work for extending scikit's Lasso?
Sorry I know it's really stupid, but I try to run the following code in my notebook and it didn't work
from sklearn import linear_model
from scipy import stats
import numpy as np
def __init__(self, *args, **kwargs):
if not "fit_intercept" in kwargs:
kwargs['fit_intercept'] = False
super(LinearRegression, self)\
.__init__(*args, **kwargs)
def fit(self, X, y, n_jobs=1):
self = super(LinearRegression, self).fit(X, y, n_jobs)
sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1])
se = np.array([
np.sqrt(np.diagonal(sse[i] * np.linalg.inv(np.dot(X.T, X))))
for i in range(sse.shape[0])
])
self.t = self.coef_ / se
self.p = 2 * (1 - stats.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1]))
return self
regr = LinearRegression()
regr.fit(X_train, y_train.to_frame())
It returns an error
RuntimeError: scikit-learn estimators should always specify their parameters in the signature of their __init__ (no varargs). <class '__main__.LinearRegression'> with constructor (self, *args, **kwargs) doesn't follow this convention.
>
I've tried to google the baseEstimator of sklearn but I couldn't understand at all. Can anyone point out what mistake I have made?
maybe you can change the init method into
def __init__(self, fit_intercept=True, normalize=False, copy_X=True,
n_jobs=1):
self.fit_intercept = fit_intercept
self.normalize = normalize
self.copy_X = copy_X
self.n_jobs = n_jobs
it seems this code doesn't work, neighter original, nor with __init__
method from above
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167):
Port = df2.iloc[range(c-36,c),3]
Var_x = df2.iloc[range(c-36,c),[0,4,5]]
y = Port.to_numpy()
x = Var_x.to_numpy().reshape(-1,3)
model = LinearRegression()
model.fit(x, y)
model = LinearRegression().fit(x, y)
r_sq = model.score(x, y)
print(model.coef_)
In the code below I am running time-series regression for the last 36 periods. I can find coefficients through 'model.coef_' but how do I find p-value for each of these coefficients?
for c in range(36,167): Port = df2.iloc[range(c-36,c),3] Var_x = df2.iloc[range(c-36,c),[0,4,5]] y = Port.to_numpy() x = Var_x.to_numpy().reshape(-1,3) model = LinearRegression() model.fit(x, y) model = LinearRegression().fit(x, y) r_sq = model.score(x, y) print(model.coef_)
You can try the following:
from sklearn.feature_selection import f_regression
f_statistic, p_value = f_regression(X, y)
or do this : se = np.array([np.sqrt(np.diagonal(sse * np.linalg.inv(np.dot(X.T, X))))])