Created
November 5, 2023 17:21
-
-
Save apoorvalal/0011811861e8f701b1bb84593992676d to your computer and use it in GitHub Desktop.
lean implementation of OLS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from scipy.linalg import lstsq | |
np.random.seed(42) | |
# %% | |
def ols(X, y, vcov = 'HC1', driver = 'gelsy'): | |
""" | |
Fast, minimal implementation of least squares regression with robust SEs | |
Args: | |
X: n X p array of covariates | |
y: n X 1 array of outcomes | |
vcov: if not None, returns robust SEs | |
driver: lapack driver. Defaults to 'gelsy', avoids directly solving normal eqns. | |
Returns: | |
p X 1 coefficient vector, or tuple of coefficient vector and standard error | |
if vcov is not None. | |
""" | |
if int(X[:, 0].sum()) != len(y): | |
X = np.c_[np.repeat(1, len(y)), X] | |
n, k = X.shape | |
# coefficients | |
β = lstsq(X, y, lapack_driver = driver)[0] | |
if vcov is not None: | |
ε = y - X @ β | |
M = np.einsum("ij,i,ik->jk", X, ε**2, X) # yer a wizard harry | |
XtX = np.linalg.inv(X.T @ X) | |
Σ = XtX @ M @ XtX | |
return β, np.sqrt( (n / (n - k)) * np.diag(Σ)) | |
return β | |
# %% | |
n, p = 1000, 2 | |
X = np.c_[np.repeat(1, n), | |
np.random.normal(size = n*p).reshape((n, p)) | |
] | |
β = np.array([1, 3, 2]) | |
y = X @ β + np.random.normal(0, 0.5 + 0.1 * X[:, 1]>0, n) # heteroskedasticity | |
ols(X, y) | |
# (array([1.00393544, 3.02776697, 2.01715091]), | |
# array([0.03114339, 0.0340145 , 0.02988759])) | |
# %% just coefficients | |
ols(X, y, None) | |
# array([1.00393544, 3.02776697, 2.01715091]) | |
# %% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment