Skip to content

Instantly share code, notes, and snippets.

@akelleh
Created March 1, 2018 17:57
Show Gist options
  • Save akelleh/47ddf9b826dd5e825a5e826fe282cb33 to your computer and use it in GitHub Desktop.
Save akelleh/47ddf9b826dd5e825a5e826fe282cb33 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"from statsmodels.sandbox.regression.gmm import IV2SLS \n",
"from statsmodels.api import OLS, Logit\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"N = 5000\n",
"\n",
"u = np.random.normal(size=N)\n",
"\n",
"z = np.random.normal(size=N)\n",
"\n",
"p_d = 1. / (1. + np.exp(-(u+z)))\n",
"d = np.random.binomial(1, p=p_d)\n",
"\n",
"y0 = np.random.normal(size=N)\n",
"y1 = np.random.normal(u)\n",
"y = (d==1)* y1 + (d==0) * y0\n",
"X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'y0': y0, 'y1': y1})\n"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"X['intercept'] = 1."
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.34807908945587035"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[X['d'] == 1]['y'].mean() - X[X['d'] == 0]['y'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-0.007102631189169498"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(X['y1'] - X['y0']).mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Let's make sure our instrument is reasonably strong..."
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>z</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>1.000000</td>\n",
" <td>0.253473</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>0.253473</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" z d\n",
"z 1.000000 0.253473\n",
"d 0.253473 1.000000"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[['z', 'd']].corr() "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### First, let's do it in two stages manually, so we can see the process"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.614431\n",
" Iterations 5\n"
]
},
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>y</td> <th> R-squared: </th> <td> 0.000</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> -0.000</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 0.4417</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Thu, 01 Mar 2018</td> <th> Prob (F-statistic):</th> <td> 0.506</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>12:40:30</td> <th> Log-Likelihood: </th> <td> -8034.4</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 5000</td> <th> AIC: </th> <td>1.607e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 4998</td> <th> BIC: </th> <td>1.609e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>d_expected</th> <td> 0.0591</td> <td> 0.089</td> <td> 0.665</td> <td> 0.506</td> <td> -0.115</td> <td> 0.233</td>\n",
"</tr>\n",
"<tr>\n",
" <th>intercept</th> <td> 0.1666</td> <td> 0.048</td> <td> 3.493</td> <td> 0.000</td> <td> 0.073</td> <td> 0.260</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>14.783</td> <th> Durbin-Watson: </th> <td> 2.046</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.001</td> <th> Jarque-Bera (JB): </th> <td> 15.132</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.115</td> <th> Prob(JB): </th> <td>0.000518</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 3.142</td> <th> Cond. No. </th> <td> 6.56</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.000\n",
"Model: OLS Adj. R-squared: -0.000\n",
"Method: Least Squares F-statistic: 0.4417\n",
"Date: Thu, 01 Mar 2018 Prob (F-statistic): 0.506\n",
"Time: 12:40:30 Log-Likelihood: -8034.4\n",
"No. Observations: 5000 AIC: 1.607e+04\n",
"Df Residuals: 4998 BIC: 1.609e+04\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"d_expected 0.0591 0.089 0.665 0.506 -0.115 0.233\n",
"intercept 0.1666 0.048 3.493 0.000 0.073 0.260\n",
"==============================================================================\n",
"Omnibus: 14.783 Durbin-Watson: 2.046\n",
"Prob(Omnibus): 0.001 Jarque-Bera (JB): 15.132\n",
"Skew: 0.115 Prob(JB): 0.000518\n",
"Kurtosis: 3.142 Cond. No. 6.56\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"instrument_model = Logit(X['d'], X[['z', 'intercept']])\n",
"instrument_result = instrument_model.fit()\n",
"\n",
"X['d_expected'] = instrument_result.predict(X[['z', 'intercept']])\n",
"causal_model = OLS(X['y'], X[['d_expected', 'intercept']])\n",
"result = causal_model.fit()\n",
"result.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Now, using statsmodel's implementation for 2sls. \n",
"Note: their result.summary method is broken!!"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>-0.084862</td>\n",
" <td>0.08305</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intercept</th>\n",
" <td>0.134165</td>\n",
" <td>0.22065</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1\n",
"d -0.084862 0.08305\n",
"intercept 0.134165 0.22065"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n",
"result = model.fit()\n",
"result.conf_int()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### This all works even if Z doesn't cause D! As long as they're associated (and D doesn't cause Z) then you're okay!"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"N = 5000\n",
"\n",
"u = np.random.normal(size=N)\n",
"\n",
"uz = np.random.normal(size=N)\n",
"\n",
"z = np.random.normal(uz, size=N)\n",
"\n",
"p_d = 1. / (1. + np.exp(-(u+uz)))\n",
"d = np.random.binomial(1, p=p_d)\n",
"\n",
"y0 = np.random.normal(size=N)\n",
"y1 = np.random.normal(u)\n",
"y = (d==1)* y1 + (d==0) * y0\n",
"X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'y0': y0, 'y1': y1})"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Let's make sure the instrument is reasonably strong!"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>z</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>1.000000</td>\n",
" <td>0.260994</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>0.260994</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" z d\n",
"z 1.000000 0.260994\n",
"d 0.260994 1.000000"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X[['z', 'd']].corr()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {},
"outputs": [],
"source": [
"X['intercept'] = 1."
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>-0.294580</td>\n",
" <td>0.209399</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intercept</th>\n",
" <td>0.060953</td>\n",
" <td>0.318652</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1\n",
"d -0.294580 0.209399\n",
"intercept 0.060953 0.318652"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n",
"result = model.fit()\n",
"result.conf_int()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Looks good!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### What if the assumption that Z only causes Y through D is violated?\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [],
"source": [
"N = 5000\n",
"\n",
"u = np.random.normal(size=N)\n",
"\n",
"z = np.random.normal(size=N)\n",
"\n",
"a = z + np.random.normal(size=N)\n",
"\n",
"p_d = 1. / (1. + np.exp(-(u+z)))\n",
"d = np.random.binomial(1, p=p_d)\n",
"\n",
"y0 = np.random.normal(size=N)\n",
"y1 = a + np.random.normal(u)\n",
"y = (d==1)* y1 + (d==0) * y0\n",
"X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'a' :a, 'y0': y0, 'y1': y1})"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>d</th>\n",
" <th>y</th>\n",
" <th>y0</th>\n",
" <th>y1</th>\n",
" <th>z</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>1.000000</td>\n",
" <td>0.255894</td>\n",
" <td>0.474881</td>\n",
" <td>-0.021884</td>\n",
" <td>0.711883</td>\n",
" <td>0.704982</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>0.255894</td>\n",
" <td>1.000000</td>\n",
" <td>0.245038</td>\n",
" <td>-0.014144</td>\n",
" <td>0.373323</td>\n",
" <td>0.363579</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y</th>\n",
" <td>0.474881</td>\n",
" <td>0.245038</td>\n",
" <td>1.000000</td>\n",
" <td>0.308260</td>\n",
" <td>0.656390</td>\n",
" <td>0.342872</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y0</th>\n",
" <td>-0.021884</td>\n",
" <td>-0.014144</td>\n",
" <td>0.308260</td>\n",
" <td>1.000000</td>\n",
" <td>-0.011003</td>\n",
" <td>-0.031003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>y1</th>\n",
" <td>0.711883</td>\n",
" <td>0.373323</td>\n",
" <td>0.656390</td>\n",
" <td>-0.011003</td>\n",
" <td>1.000000</td>\n",
" <td>0.514429</td>\n",
" </tr>\n",
" <tr>\n",
" <th>z</th>\n",
" <td>0.704982</td>\n",
" <td>0.363579</td>\n",
" <td>0.342872</td>\n",
" <td>-0.031003</td>\n",
" <td>0.514429</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a d y y0 y1 z\n",
"a 1.000000 0.255894 0.474881 -0.021884 0.711883 0.704982\n",
"d 0.255894 1.000000 0.245038 -0.014144 0.373323 0.363579\n",
"y 0.474881 0.245038 1.000000 0.308260 0.656390 0.342872\n",
"y0 -0.021884 -0.014144 0.308260 1.000000 -0.011003 -0.031003\n",
"y1 0.711883 0.373323 0.656390 -0.011003 1.000000 0.514429\n",
"z 0.704982 0.363579 0.342872 -0.031003 0.514429 1.000000"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X.corr()"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [],
"source": [
"X['intercept'] = 1"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>2.618198</td>\n",
" <td>3.178242</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intercept</th>\n",
" <td>-1.168116</td>\n",
" <td>-0.875108</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1\n",
"d 2.618198 3.178242\n",
"intercept -1.168116 -0.875108"
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n",
"result = model.fit()\n",
"result.conf_int()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Lot's of bias!!!"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can fix it!!!"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>-0.152977</td>\n",
" <td>0.424428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intercept</th>\n",
" <td>0.178235</td>\n",
" <td>0.470210</td>\n",
" </tr>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>0.470047</td>\n",
" <td>0.544819</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1\n",
"d -0.152977 0.424428\n",
"intercept 0.178235 0.470210\n",
"a 0.470047 0.544819"
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# here, we have to include 'a' in the instrument definition as well, \n",
"# even though it's a control variable for the y regression\n",
"model = IV2SLS(X['y'], X[['d', 'intercept', 'a']], \n",
" instrument=X[['z', 'intercept', 'a']])\n",
"result = model.fit()\n",
"result.conf_int()"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Optimization terminated successfully.\n",
" Current function value: 0.622092\n",
" Iterations 5\n"
]
},
{
"data": {
"text/html": [
"<table class=\"simpletable\">\n",
"<caption>OLS Regression Results</caption>\n",
"<tr>\n",
" <th>Dep. Variable:</th> <td>y</td> <th> R-squared: </th> <td> 0.226</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.225</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 727.9</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Date:</th> <td>Thu, 01 Mar 2018</td> <th> Prob (F-statistic):</th> <td>3.63e-278</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>12:56:02</td> <th> Log-Likelihood: </th> <td> -8602.5</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Observations:</th> <td> 5000</td> <th> AIC: </th> <td>1.721e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Residuals:</th> <td> 4997</td> <th> BIC: </th> <td>1.723e+04</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n",
"</tr>\n",
"<tr>\n",
" <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n",
"</tr>\n",
"<tr>\n",
" <th>d_expected</th> <td> 0.1193</td> <td> 0.147</td> <td> 0.812</td> <td> 0.417</td> <td> -0.169</td> <td> 0.407</td>\n",
"</tr>\n",
"<tr>\n",
" <th>a</th> <td> 0.5089</td> <td> 0.019</td> <td> 26.639</td> <td> 0.000</td> <td> 0.471</td> <td> 0.546</td>\n",
"</tr>\n",
"<tr>\n",
" <th>intercept</th> <td> 0.3323</td> <td> 0.074</td> <td> 4.473</td> <td> 0.000</td> <td> 0.187</td> <td> 0.478</td>\n",
"</tr>\n",
"</table>\n",
"<table class=\"simpletable\">\n",
"<tr>\n",
" <th>Omnibus:</th> <td>12.644</td> <th> Durbin-Watson: </th> <td> 2.056</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Prob(Omnibus):</th> <td> 0.002</td> <th> Jarque-Bera (JB): </th> <td> 12.644</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Skew:</th> <td> 0.121</td> <th> Prob(JB): </th> <td> 0.00180</td>\n",
"</tr>\n",
"<tr>\n",
" <th>Kurtosis:</th> <td> 3.045</td> <th> Cond. No. </th> <td> 12.1</td>\n",
"</tr>\n",
"</table>"
],
"text/plain": [
"<class 'statsmodels.iolib.summary.Summary'>\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.226\n",
"Model: OLS Adj. R-squared: 0.225\n",
"Method: Least Squares F-statistic: 727.9\n",
"Date: Thu, 01 Mar 2018 Prob (F-statistic): 3.63e-278\n",
"Time: 12:56:02 Log-Likelihood: -8602.5\n",
"No. Observations: 5000 AIC: 1.721e+04\n",
"Df Residuals: 4997 BIC: 1.723e+04\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"d_expected 0.1193 0.147 0.812 0.417 -0.169 0.407\n",
"a 0.5089 0.019 26.639 0.000 0.471 0.546\n",
"intercept 0.3323 0.074 4.473 0.000 0.187 0.478\n",
"==============================================================================\n",
"Omnibus: 12.644 Durbin-Watson: 2.056\n",
"Prob(Omnibus): 0.002 Jarque-Bera (JB): 12.644\n",
"Skew: 0.121 Prob(JB): 0.00180\n",
"Kurtosis: 3.045 Cond. No. 12.1\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"instrument_model = Logit(X['d'], X[['z', 'intercept']])\n",
"instrument_result = instrument_model.fit()\n",
"\n",
"X['d_expected'] = instrument_result.predict(X[['z', 'intercept']])\n",
"causal_model = OLS(X['y'], X[['d_expected', 'a', 'intercept']])\n",
"result = causal_model.fit()\n",
"result.summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment