Last active
August 29, 2015 13:58
-
-
Save jason-curtis/10286720 to your computer and use it in GitHub Desktop.
Statsmodels Issue 987 - formula support in wls_prediction_std (2)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:d95c278a4f5c235b14b85ab9c6888ff0e0b56e3637c67e326a84f15ed991a89e" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "heading", | |
"level": 1, | |
"metadata": {}, | |
"source": [ | |
"Lack of formula support in wls_prediction_std" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"https://github.com/statsmodels/statsmodels/issues/987\n", | |
"\n", | |
"Is it possible to generate exog such that wls_prediction_std will accept it? Or is this just a bug/missing feature?" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from scipy import stats\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import statsmodels.formula.api as sm\n", | |
"\n", | |
"# The important one\n", | |
"from statsmodels.sandbox.regression.predstd import wls_prediction_std\n", | |
"\n", | |
"d = pd.DataFrame({'x': np.random.randn(100) })\n", | |
"d['y'] = d.x + np.random.randn(100)\n", | |
"f = sm.ols('y~x', d).fit()\n", | |
"\n", | |
"new_exog=pd.DataFrame({'x':[0.5]})" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 42 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# This works, which makes me think that this format for new_exog should be sufficient\n", | |
"print f.predict(new_exog)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[ 0.36164212]\n" | |
] | |
} | |
], | |
"prompt_number": 43 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# ...But this doesn't work\n", | |
"print wls_prediction_std(f, new_exog)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ValueError", | |
"evalue": "wrong shape of exog", | |
"output_type": "pyerr", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", | |
"\u001b[1;32m<ipython-input-44-723c5af991d8>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# ...But this doesn't work\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mprint\u001b[0m \u001b[0mwls_prediction_std\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_exog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[1;32m/usr/local/lib/python2.7/dist-packages/statsmodels/sandbox/regression/predstd.pyc\u001b[0m in \u001b[0;36mwls_prediction_std\u001b[1;34m(res, exog, weights, alpha)\u001b[0m\n\u001b[0;32m 79\u001b[0m \u001b[0mexog\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_2d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 80\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcovb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mexog\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 81\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'wrong shape of exog'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 82\u001b[0m \u001b[0mpredicted\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mres\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mparams\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 83\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | |
"\u001b[1;31mValueError\u001b[0m: wrong shape of exog" | |
] | |
} | |
], | |
"prompt_number": 44 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Workaround 1\n", | |
"=====\n", | |
"\n", | |
"Steal the exog transformation wrapper from model.predict()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def transform_exog_to_model(fit, exog):\n", | |
" transform=True\n", | |
" self=fit\n", | |
" \n", | |
" # The following is lifted straight from statsmodels.base.model.Results.predict()\n", | |
" if transform and hasattr(self.model, 'formula') and exog is not None:\n", | |
" from patsy import dmatrix\n", | |
" exog = dmatrix(self.model.data.orig_exog.design_info.builder,\n", | |
" exog)\n", | |
"\n", | |
" if exog is not None:\n", | |
" exog = np.asarray(exog)\n", | |
" if exog.ndim == 1 and (self.model.exog.ndim == 1 or\n", | |
" self.model.exog.shape[1] == 1):\n", | |
" exog = exog[:, None]\n", | |
" exog = np.atleast_2d(exog) # needed in count model shape[1]\n", | |
" \n", | |
" # end lifted code\n", | |
" return exog" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 45 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print new_exog" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" x\n", | |
"0 0.5\n", | |
"\n", | |
"[1 rows x 1 columns]\n" | |
] | |
} | |
], | |
"prompt_number": 46 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"transformed_exog = transform_exog_to_model(f, new_exog)\n", | |
"print transformed_exog" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"[[ 1. 0.5]]\n" | |
] | |
} | |
], | |
"prompt_number": 47 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"print wls_prediction_std(f, transformed_exog, weights=[1])" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(array([ 0.99865096]), array([-1.62014821]), array([ 2.34343245]))\n" | |
] | |
} | |
], | |
"prompt_number": 49 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Better fix\n", | |
"====\n", | |
"\n", | |
"wls_prediction_std should actually use Result.predict" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"def wls_prediction_std(res, exog=None, weights=None, alpha=0.05):\n", | |
" covb = res.cov_params()\n", | |
" if exog is None:\n", | |
" exog = res.model.exog\n", | |
" predicted = res.fittedvalues\n", | |
" if weights is None:\n", | |
" weights = res.model.weights\n", | |
" else:\n", | |
" # Using res.predict() is less hacky than using res.model.predict()\n", | |
" predicted = res.predict(exog)\n", | |
"\n", | |
" if covb.shape[1] != exog.shape[1]:\n", | |
" # raise ValueError('wrong shape of exog') # Nope\n", | |
" # Instead of panicking/raising ValueError in this case, transform \n", | |
" # the exog appropriately so that the interval calculations below can\n", | |
" # function\n", | |
" exog = transform_exog_to_model(res, exog)\n", | |
"\n", | |
" if weights is None:\n", | |
" weights = 1.\n", | |
" else:\n", | |
" weights = np.asarray(weights)\n", | |
" if len(weights) > 1 and len(weights) != exog.shape[0]:\n", | |
" raise ValueError('weights and exog do not have matching shape')\n", | |
"\n", | |
" # The rest of this is unaltered\n", | |
"\n", | |
" if weights is None:\n", | |
" weights = res.model.weights\n", | |
"\n", | |
"\n", | |
" # full covariance:\n", | |
" #predvar = res3.mse_resid + np.diag(np.dot(X2,np.dot(covb,X2.T)))\n", | |
" # predication variance only\n", | |
" predvar = res.mse_resid/weights + (exog * np.dot(covb, exog.T).T).sum(1)\n", | |
" predstd = np.sqrt(predvar)\n", | |
" tppf = stats.t.isf(alpha/2., res.df_resid)\n", | |
" interval_u = predicted + tppf * predstd\n", | |
" interval_l = predicted - tppf * predstd\n", | |
" return predstd, interval_l, interval_u" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 55 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"# And now, does this work?\n", | |
"print wls_prediction_std(f, new_exog)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(array([ 0.99865096]), array([-1.62014821]), array([ 2.34343245]))\n" | |
] | |
} | |
], | |
"prompt_number": 58 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment