Created
July 31, 2024 10:26
-
-
Save sachinsdate/e413c378dcac12a93f37eefdba6f046e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import pandas as pd | |
from patsy import dmatrices | |
import numpy as np | |
import scipy.stats | |
import statsmodels.api as sm | |
from sklearn.model_selection import train_test_split | |
import matplotlib.pyplot as plt | |
#Read the autos dataset into a Pandas Dataframe (automobiles.data.csv can be downloaded from the Gists section) | |
filename = 'automobiles.data.csv' | |
df = pd.read_csv(filepath_or_buffer=filename, header=0) | |
#Remove rows with NaNs | |
df = df.dropna() | |
print(df) | |
#Create a patsy expression of a linear model that regresses city_mpg on curb_weight and horsepower without an intercept | |
expr = 'city_mpg ~ curb_weight + horsepower -1' | |
#Create training and testing datasets in a 80:20 format | |
df_train, df_test = train_test_split(df, test_size=0.2) | |
#Carve out the y and X matrices | |
y_train, X_train = dmatrices(expr, df_train, return_type='dataframe') | |
y_test, X_test = dmatrices(expr, df_test, return_type='dataframe') | |
#Print the correlation matrix of X with y | |
corr_df_auto = pd.concat([y_train, X_train], axis=1).corr() | |
print(corr_df_auto) | |
#Create the OLS model, train it, and print the training summary | |
olsr_model_auto = sm.OLS(endog=y_train, exog=X_train) | |
olsr_results_auto= olsr_model_auto.fit() | |
print(olsr_results_auto.summary()) | |
#Create the truncated model, train it, and print the training summary | |
olsr_model_auto_truncated = sm.OLS(endog=y_train, exog=X_train[['curb_weight']]) | |
olsr_results_auto_tuncated= olsr_model_auto_truncated.fit() | |
print(olsr_results_auto_tuncated.summary()) | |
#Calculate the omitted variable bias manually | |
#Get the variance-covariance matrix of X: | |
cov_mat = df_train[['curb_weight', 'horsepower']].cov(ddof=1) | |
print('cov(curb_weight, horsepower)=',cov_mat['curb_weight']['horsepower'], 'var(curb_weight)=',cov_mat['curb_weight']['curb_weight']) | |
#Coeffs of fitted full model | |
gamma_1_cap = olsr_results_auto.params.at['curb_weight'] | |
gamma_2_cap = olsr_results_auto.params.at['horsepower'] | |
print(gamma_1_cap, gamma_2_cap) | |
#Omitted Variable Bias on E(beta_1_cap) | |
bias_beta_1 = gamma_2_cap*(cov_mat['curb_weight']['horsepower'])/cov_mat['curb_weight']['curb_weight'] | |
print('bias_beta_1=',bias_beta_1) | |
#Print the biased expectation of beta_1_cap | |
print('Biased E(beta_1_cap)=',gamma_1_cap + bias_beta_1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment