Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Created November 5, 2023 17:21
Show Gist options
  • Save apoorvalal/0011811861e8f701b1bb84593992676d to your computer and use it in GitHub Desktop.
Save apoorvalal/0011811861e8f701b1bb84593992676d to your computer and use it in GitHub Desktop.
lean implementation of OLS
import numpy as np
from scipy.linalg import lstsq
np.random.seed(42)
# %%
def ols(X, y, vcov = 'HC1', driver = 'gelsy'):
"""
Fast, minimal implementation of least squares regression with robust SEs
Args:
X: n X p array of covariates
y: n X 1 array of outcomes
vcov: if not None, returns robust SEs
driver: lapack driver. Defaults to 'gelsy', avoids directly solving normal eqns.
Returns:
p X 1 coefficient vector, or tuple of coefficient vector and standard error
if vcov is not None.
"""
if int(X[:, 0].sum()) != len(y):
X = np.c_[np.repeat(1, len(y)), X]
n, k = X.shape
# coefficients
β = lstsq(X, y, lapack_driver = driver)[0]
if vcov is not None:
ε = y - X @ β
M = np.einsum("ij,i,ik->jk", X, ε**2, X) # yer a wizard harry
XtX = np.linalg.inv(X.T @ X)
Σ = XtX @ M @ XtX
return β, np.sqrt( (n / (n - k)) * np.diag(Σ))
return β
# %%
n, p = 1000, 2
X = np.c_[np.repeat(1, n),
np.random.normal(size = n*p).reshape((n, p))
]
β = np.array([1, 3, 2])
y = X @ β + np.random.normal(0, 0.5 + 0.1 * X[:, 1]>0, n) # heteroskedasticity
ols(X, y)
# (array([1.00393544, 3.02776697, 2.01715091]),
# array([0.03114339, 0.0340145 , 0.02988759]))
# %% just coefficients
ols(X, y, None)
# array([1.00393544, 3.02776697, 2.01715091])
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment