Skip to content

Instantly share code, notes, and snippets.

@igg
Created March 25, 2015 20:56
Show Gist options
  • Save igg/3f196cde84c9b3968e8c to your computer and use it in GitHub Desktop.
Save igg/3f196cde84c9b3968e8c to your computer and use it in GitHub Desktop.
Does numpy.linalg.lstsq throw warnings/errors for collinear independent variables?
{
"metadata": {
"name": "",
"signature": "sha256:c6dba3c0372f5d64d1306da6b7f9262071490693ce9037782a41a1c688fad155"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"print ('Checking colinearity warnings in numpy.linalg.lstsq using example in:')\n",
"print ('http://stackoverflow.com/questions/11479064/multivariate-linear-regression-in-python/26918329#26918329')\n",
"y = [-6,-5,-10,-5,-8,-3,-6,-8,-8]\n",
"x = [[-4.95,-4.55,-10.96,-1.08,-6.52,-0.81,-7.01,-4.46,-11.54],\n",
" [-5.87,-4.52,-11.64,-3.36,-7.45,-2.36,-7.33,-7.65,-10.03],\n",
" [-0.76,-0.71, -0.98, 0.75,-0.86,-0.50,-0.33,-0.94, -1.03],\n",
" [14.73,13.74, 15.49,24.72,16.59,22.44,13.93,11.40, 18.18],\n",
" [ 4.02, 4.47, 4.18, 4.96, 4.29, 4.81, 4.32, 4.43, 4.28],\n",
" [ 0.20, 0.16, 0.19, 0.16, 0.10, 0.15, 0.21, 0.16, 0.21],\n",
" [ 0.45, 0.50, 0.53, 0.60, 0.48, 0.53, 0.50, 0.49, 0.55]]\n",
"print ('{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}'.format('y','x1','x2','x3','x4','x5','x6','x7'))\n",
"for r in range(len(y)):\n",
" print (\"{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}\".format(\n",
" y[r],x[0][r],x[1][r],x[2][r],x[3][r],x[4][r],x[5][r],x[6][r])\n",
" )\n",
"print ()\n",
"X = np.column_stack(x+[[1]*len(x[0])])\n",
"beta_hat = np.linalg.lstsq(X,y)[0]\n",
"print (beta_hat)\n",
"y_est = np.dot(X,beta_hat)\n",
"print ()\n",
"print ('{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}'.format('y','y_est','x1','x2','x3','x4','x5','x6','x7'))\n",
"for r in range(len(y)):\n",
" print (\"{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}\".format(\n",
" y[r],y_est[r],x[0][r],x[1][r],x[2][r],x[3][r],x[4][r],x[5][r],x[6][r])\n",
" )\n",
"\n",
"print ('------------------------------')\n",
"print ('duplicating x3')\n",
"y = [-6,-5,-10,-5,-8,-3,-6,-8,-8]\n",
"x = [[-4.95,-4.55,-10.96,-1.08,-6.52,-0.81,-7.01,-4.46,-11.54],\n",
" [-5.87,-4.52,-11.64,-3.36,-7.45,-2.36,-7.33,-7.65,-10.03],\n",
" [-0.76,-0.71, -0.98, 0.75,-0.86,-0.50,-0.33,-0.94, -1.03],\n",
" [-0.76,-0.71, -0.98, 0.75,-0.86,-0.50,-0.33,-0.94, -1.03],\n",
" [14.73,13.74, 15.49,24.72,16.59,22.44,13.93,11.40, 18.18],\n",
" [ 4.02, 4.47, 4.18, 4.96, 4.29, 4.81, 4.32, 4.43, 4.28],\n",
" [ 0.20, 0.16, 0.19, 0.16, 0.10, 0.15, 0.21, 0.16, 0.21],\n",
" [ 0.45, 0.50, 0.53, 0.60, 0.48, 0.53, 0.50, 0.49, 0.55]]\n",
"print ('{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}'.format('y','x1','x2','x3','x3c','x4','x5','x6','x7'))\n",
"for r in range(len(y)):\n",
" print (\"{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}\".format(\n",
" y[r],x[0][r],x[1][r],x[2][r],x[3][r],x[4][r],x[5][r],x[6][r],x[7][r])\n",
" )\n",
"print ()\n",
"X = np.column_stack(x+[[1]*len(x[0])])\n",
"beta_hat = np.linalg.lstsq(X,y)[0]\n",
"print (beta_hat)\n",
"y_est = np.dot(X,beta_hat)\n",
"print ()\n",
"print ('{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}{:>7s}'.format('y','y_est','x1','x2','x3','x3c','x4','x5','x6','x7'))\n",
"for r in range(len(y)):\n",
" print (\"{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}{:>7.2f}\".format(\n",
" y[r],y_est[r],x[0][r],x[1][r],x[2][r],x[3][r],x[4][r],x[5][r],x[6][r],x[7][r])\n",
" )\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Checking colinearity warnings in numpy.linalg.lstsq using example in:\n",
"http://stackoverflow.com/questions/11479064/multivariate-linear-regression-in-python/26918329#26918329\n",
" y x1 x2 x3 x4 x5 x6 x7\n",
" -6.00 -4.95 -5.87 -0.76 14.73 4.02 0.20 0.45\n",
" -5.00 -4.55 -4.52 -0.71 13.74 4.47 0.16 0.50\n",
" -10.00 -10.96 -11.64 -0.98 15.49 4.18 0.19 0.53\n",
" -5.00 -1.08 -3.36 0.75 24.72 4.96 0.16 0.60\n",
" -8.00 -6.52 -7.45 -0.86 16.59 4.29 0.10 0.48\n",
" -3.00 -0.81 -2.36 -0.50 22.44 4.81 0.15 0.53\n",
" -6.00 -7.01 -7.33 -0.33 13.93 4.32 0.21 0.50\n",
" -8.00 -4.46 -7.65 -0.94 11.40 4.43 0.16 0.49\n",
" -8.00 -11.54 -10.03 -1.03 18.18 4.28 0.21 0.55\n",
"\n",
"[ -0.49104607 0.83271938 0.0860167 0.1326091 6.85681762\n",
" 22.98163883 -41.08437805 -19.08085066]\n",
"\n",
" y y_est x1 x2 x3 x4 x5 x6 x7\n",
" -6.00 -5.98 -4.95 -5.87 -0.76 14.73 4.02 0.20 0.45\n",
" -5.00 -5.06 -4.55 -4.52 -0.71 13.74 4.47 0.16 0.50\n",
" -10.00 -10.17 -10.96 -11.64 -0.98 15.49 4.18 0.19 0.53\n",
" -5.00 -4.97 -1.08 -3.36 0.75 24.72 4.96 0.16 0.60\n",
" -8.00 -7.96 -6.52 -7.45 -0.86 16.59 4.29 0.10 0.48\n",
" -3.00 -3.06 -0.81 -2.36 -0.50 22.44 4.81 0.15 0.53\n",
" -6.00 -6.02 -7.01 -7.33 -0.33 13.93 4.32 0.21 0.50\n",
" -8.00 -7.91 -4.46 -7.65 -0.94 11.40 4.43 0.16 0.49\n",
" -8.00 -7.87 -11.54 -10.03 -1.03 18.18 4.28 0.21 0.55\n",
"------------------------------\n",
"duplicating x3\n",
" y x1 x2 x3 x3c x4 x5 x6 x7\n",
" -6.00 -4.95 -5.87 -0.76 -0.76 14.73 4.02 0.20 0.45\n",
" -5.00 -4.55 -4.52 -0.71 -0.71 13.74 4.47 0.16 0.50\n",
" -10.00 -10.96 -11.64 -0.98 -0.98 15.49 4.18 0.19 0.53\n",
" -5.00 -1.08 -3.36 0.75 0.75 24.72 4.96 0.16 0.60\n",
" -8.00 -6.52 -7.45 -0.86 -0.86 16.59 4.29 0.10 0.48\n",
" -3.00 -0.81 -2.36 -0.50 -0.50 22.44 4.81 0.15 0.53\n",
" -6.00 -7.01 -7.33 -0.33 -0.33 13.93 4.32 0.21 0.50\n",
" -8.00 -4.46 -7.65 -0.94 -0.94 11.40 4.43 0.16 0.49\n",
" -8.00 -11.54 -10.03 -1.03 -1.03 18.18 4.28 0.21 0.55\n",
"\n",
"[ -0.49104607 0.83271938 0.04300835 0.04300835 0.1326091\n",
" 6.85681762 22.98163883 -41.08437805 -19.08085066]\n",
"\n",
" y y_est x1 x2 x3 x3c x4 x5 x6 x7\n",
" -6.00 -5.98 -4.95 -5.87 -0.76 -0.76 14.73 4.02 0.20 0.45\n",
" -5.00 -5.06 -4.55 -4.52 -0.71 -0.71 13.74 4.47 0.16 0.50\n",
" -10.00 -10.17 -10.96 -11.64 -0.98 -0.98 15.49 4.18 0.19 0.53\n",
" -5.00 -4.97 -1.08 -3.36 0.75 0.75 24.72 4.96 0.16 0.60\n",
" -8.00 -7.96 -6.52 -7.45 -0.86 -0.86 16.59 4.29 0.10 0.48\n",
" -3.00 -3.06 -0.81 -2.36 -0.50 -0.50 22.44 4.81 0.15 0.53\n",
" -6.00 -6.02 -7.01 -7.33 -0.33 -0.33 13.93 4.32 0.21 0.50\n",
" -8.00 -7.91 -4.46 -7.65 -0.94 -0.94 11.40 4.43 0.16 0.49\n",
" -8.00 -7.87 -11.54 -10.03 -1.03 -1.03 18.18 4.28 0.21 0.55\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment