Created
October 12, 2019 18:44
-
-
Save grisaitis/cf481034bb413a14d3ea851dab201d31 to your computer and use it in GitHub Desktop.
stackoverflow.com/questions/22381497
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from sklearn import linear_model\n", | |
"randn = np.random.randn\n", | |
"\n", | |
"X = pd.DataFrame(randn(10,3), columns=['X1','X2','X3'])\n", | |
"y = pd.DataFrame(randn(10,1), columns=['Y']) \n", | |
"\n", | |
"model = linear_model.LinearRegression()\n", | |
"model.fit(X=X, y=y)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## show scikit-learn results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"from scikit-learn\n", | |
"[-0.28671532]\n", | |
"[[ 0.17501115 -0.6928708 0.22336584]]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(\"from scikit-learn\")\n", | |
"print(model.intercept_)\n", | |
"print(model.coef_)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## reproduce scikit-learn results with linear algebra" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"N = len(X)\n", | |
"p = len(X.columns) + 1 # plus one because LinearRegression adds an intercept term" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"X_with_intercept = np.empty(shape=(N, p), dtype=np.float)\n", | |
"X_with_intercept[:, 0] = 1\n", | |
"X_with_intercept[:, 1:p] = X.values" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[[-0.28671532]\n", | |
" [ 0.17501115]\n", | |
" [-0.6928708 ]\n", | |
" [ 0.22336584]]\n" | |
] | |
} | |
], | |
"source": [ | |
"beta_hat = np.linalg.inv(X_with_intercept.T @ X_with_intercept) @ X_with_intercept.T @ y.values\n", | |
"print(beta_hat)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## compute standard errors of the parameter estimates" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"SE(beta_hat[0]): 0.2468580488280805\n", | |
"SE(beta_hat[1]): 0.2965501221823944\n", | |
"SE(beta_hat[2]): 0.3518847753610169\n", | |
"SE(beta_hat[3]): 0.3250760291745124\n" | |
] | |
} | |
], | |
"source": [ | |
"y_hat = model.predict(X)\n", | |
"residuals = y.values - y_hat\n", | |
"residual_sum_of_squares = residuals.T @ residuals\n", | |
"sigma_squared_hat = residual_sum_of_squares[0, 0] / (N - p)\n", | |
"var_beta_hat = np.linalg.inv(X_with_intercept.T @ X_with_intercept) * sigma_squared_hat\n", | |
"for p_ in range(p):\n", | |
" standard_error = var_beta_hat[p_, p_] ** 0.5\n", | |
" print(f\"SE(beta_hat[{p_}]): {standard_error}\")" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## confirm with `statsmodels`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" OLS Regression Results \n", | |
"==============================================================================\n", | |
"Dep. Variable: y R-squared: 0.428\n", | |
"Model: OLS Adj. R-squared: 0.143\n", | |
"Method: Least Squares F-statistic: 1.499\n", | |
"Date: Sat, 12 Oct 2019 Prob (F-statistic): 0.307\n", | |
"Time: 14:32:46 Log-Likelihood: -8.7045\n", | |
"No. Observations: 10 AIC: 25.41\n", | |
"Df Residuals: 6 BIC: 26.62\n", | |
"Df Model: 3 \n", | |
"Covariance Type: nonrobust \n", | |
"==============================================================================\n", | |
" coef std err t P>|t| [0.025 0.975]\n", | |
"------------------------------------------------------------------------------\n", | |
"const -0.2867 0.247 -1.161 0.290 -0.891 0.317\n", | |
"x1 0.1750 0.297 0.590 0.577 -0.551 0.901\n", | |
"x2 -0.6929 0.352 -1.969 0.096 -1.554 0.168\n", | |
"x3 0.2234 0.325 0.687 0.518 -0.572 1.019\n", | |
"==============================================================================\n", | |
"Omnibus: 1.786 Durbin-Watson: 2.439\n", | |
"Prob(Omnibus): 0.409 Jarque-Bera (JB): 0.888\n", | |
"Skew: -0.307 Prob(JB): 0.641\n", | |
"Kurtosis: 1.675 Cond. No. 1.91\n", | |
"==============================================================================\n", | |
"\n", | |
"Warnings:\n", | |
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" | |
] | |
} | |
], | |
"source": [ | |
"import statsmodels.api as sm\n", | |
"ols = sm.OLS(y.values, X_with_intercept)\n", | |
"ols_result = ols.fit()\n", | |
"print(ols_result.summary())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
}, | |
"varInspector": { | |
"cols": { | |
"lenName": 16, | |
"lenType": 16, | |
"lenVar": 40 | |
}, | |
"kernels_config": { | |
"python": { | |
"delete_cmd_postfix": "", | |
"delete_cmd_prefix": "del ", | |
"library": "var_list.py", | |
"varRefreshCmd": "print(var_dic_list())" | |
}, | |
"r": { | |
"delete_cmd_postfix": ") ", | |
"delete_cmd_prefix": "rm(", | |
"library": "var_list.r", | |
"varRefreshCmd": "cat(var_dic_list()) " | |
} | |
}, | |
"types_to_exclude": [ | |
"module", | |
"function", | |
"builtin_function_or_method", | |
"instance", | |
"_Feature" | |
], | |
"window_display": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment