Created
March 1, 2018 17:57
-
-
Save akelleh/47ddf9b826dd5e825a5e826fe282cb33 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from statsmodels.sandbox.regression.gmm import IV2SLS \n", | |
| "from statsmodels.api import OLS, Logit\n", | |
| "import numpy as np\n", | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 92, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 5000\n", | |
| "\n", | |
| "u = np.random.normal(size=N)\n", | |
| "\n", | |
| "z = np.random.normal(size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(u+z)))\n", | |
| "d = np.random.binomial(1, p=p_d)\n", | |
| "\n", | |
| "y0 = np.random.normal(size=N)\n", | |
| "y1 = np.random.normal(u)\n", | |
| "y = (d==1)* y1 + (d==0) * y0\n", | |
| "X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'y0': y0, 'y1': y1})\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 93, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['intercept'] = 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "0.34807908945587035" | |
| ] | |
| }, | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[X['d'] == 1]['y'].mean() - X[X['d'] == 0]['y'].mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 97, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "-0.007102631189169498" | |
| ] | |
| }, | |
| "execution_count": 97, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(X['y1'] - X['y0']).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Let's make sure our instrument is reasonably strong..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 110, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>z</th>\n", | |
| " <th>d</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>z</th>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.253473</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>0.253473</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " z d\n", | |
| "z 1.000000 0.253473\n", | |
| "d 0.253473 1.000000" | |
| ] | |
| }, | |
| "execution_count": 110, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[['z', 'd']].corr() " | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### First, let's do it in two stages manually, so we can see the process" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 98, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.614431\n", | |
| " Iterations 5\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>OLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>y</td> <th> R-squared: </th> <td> 0.000</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> -0.000</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 0.4417</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Thu, 01 Mar 2018</td> <th> Prob (F-statistic):</th> <td> 0.506</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>12:40:30</td> <th> Log-Likelihood: </th> <td> -8034.4</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td> 5000</td> <th> AIC: </th> <td>1.607e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td> 4998</td> <th> BIC: </th> <td>1.609e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>d_expected</th> <td> 0.0591</td> <td> 0.089</td> <td> 0.665</td> <td> 0.506</td> <td> -0.115</td> <td> 0.233</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 0.1666</td> <td> 0.048</td> <td> 3.493</td> <td> 0.000</td> <td> 0.073</td> <td> 0.260</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>14.783</td> <th> Durbin-Watson: </th> <td> 2.046</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.001</td> <th> Jarque-Bera (JB): </th> <td> 15.132</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td> 0.115</td> <th> Prob(JB): </th> <td>0.000518</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 3.142</td> <th> Cond. No. </th> <td> 6.56</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " OLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: y R-squared: 0.000\n", | |
| "Model: OLS Adj. R-squared: -0.000\n", | |
| "Method: Least Squares F-statistic: 0.4417\n", | |
| "Date: Thu, 01 Mar 2018 Prob (F-statistic): 0.506\n", | |
| "Time: 12:40:30 Log-Likelihood: -8034.4\n", | |
| "No. Observations: 5000 AIC: 1.607e+04\n", | |
| "Df Residuals: 4998 BIC: 1.609e+04\n", | |
| "Df Model: 1 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "d_expected 0.0591 0.089 0.665 0.506 -0.115 0.233\n", | |
| "intercept 0.1666 0.048 3.493 0.000 0.073 0.260\n", | |
| "==============================================================================\n", | |
| "Omnibus: 14.783 Durbin-Watson: 2.046\n", | |
| "Prob(Omnibus): 0.001 Jarque-Bera (JB): 15.132\n", | |
| "Skew: 0.115 Prob(JB): 0.000518\n", | |
| "Kurtosis: 3.142 Cond. No. 6.56\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 98, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "instrument_model = Logit(X['d'], X[['z', 'intercept']])\n", | |
| "instrument_result = instrument_model.fit()\n", | |
| "\n", | |
| "X['d_expected'] = instrument_result.predict(X[['z', 'intercept']])\n", | |
| "causal_model = OLS(X['y'], X[['d_expected', 'intercept']])\n", | |
| "result = causal_model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Now, using statsmodel's implementation for 2sls. \n", | |
| "Note: their result.summary method is broken!!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 108, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>-0.084862</td>\n", | |
| " <td>0.08305</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>intercept</th>\n", | |
| " <td>0.134165</td>\n", | |
| " <td>0.22065</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "d -0.084862 0.08305\n", | |
| "intercept 0.134165 0.22065" | |
| ] | |
| }, | |
| "execution_count": 108, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n", | |
| "result = model.fit()\n", | |
| "result.conf_int()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### This all works even if Z doesn't cause D! As long as they're associated (and D doesn't cause Z) then you're okay!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 113, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 5000\n", | |
| "\n", | |
| "u = np.random.normal(size=N)\n", | |
| "\n", | |
| "uz = np.random.normal(size=N)\n", | |
| "\n", | |
| "z = np.random.normal(uz, size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(u+uz)))\n", | |
| "d = np.random.binomial(1, p=p_d)\n", | |
| "\n", | |
| "y0 = np.random.normal(size=N)\n", | |
| "y1 = np.random.normal(u)\n", | |
| "y = (d==1)* y1 + (d==0) * y0\n", | |
| "X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'y0': y0, 'y1': y1})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Let's make sure the instrument is reasonably strong!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 114, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>z</th>\n", | |
| " <th>d</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>z</th>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.260994</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>0.260994</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " z d\n", | |
| "z 1.000000 0.260994\n", | |
| "d 0.260994 1.000000" | |
| ] | |
| }, | |
| "execution_count": 114, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[['z', 'd']].corr()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['intercept'] = 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 116, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>-0.294580</td>\n", | |
| " <td>0.209399</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>intercept</th>\n", | |
| " <td>0.060953</td>\n", | |
| " <td>0.318652</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "d -0.294580 0.209399\n", | |
| "intercept 0.060953 0.318652" | |
| ] | |
| }, | |
| "execution_count": 116, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n", | |
| "result = model.fit()\n", | |
| "result.conf_int()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Looks good!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### What if the assumption that Z only causes Y through D is violated?\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 145, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 5000\n", | |
| "\n", | |
| "u = np.random.normal(size=N)\n", | |
| "\n", | |
| "z = np.random.normal(size=N)\n", | |
| "\n", | |
| "a = z + np.random.normal(size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(u+z)))\n", | |
| "d = np.random.binomial(1, p=p_d)\n", | |
| "\n", | |
| "y0 = np.random.normal(size=N)\n", | |
| "y1 = a + np.random.normal(u)\n", | |
| "y = (d==1)* y1 + (d==0) * y0\n", | |
| "X = pd.DataFrame({'d': d, 'y': y, 'z': z, 'a' :a, 'y0': y0, 'y1': y1})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 146, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>a</th>\n", | |
| " <th>d</th>\n", | |
| " <th>y</th>\n", | |
| " <th>y0</th>\n", | |
| " <th>y1</th>\n", | |
| " <th>z</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>a</th>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.255894</td>\n", | |
| " <td>0.474881</td>\n", | |
| " <td>-0.021884</td>\n", | |
| " <td>0.711883</td>\n", | |
| " <td>0.704982</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>0.255894</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.245038</td>\n", | |
| " <td>-0.014144</td>\n", | |
| " <td>0.373323</td>\n", | |
| " <td>0.363579</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>y</th>\n", | |
| " <td>0.474881</td>\n", | |
| " <td>0.245038</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.308260</td>\n", | |
| " <td>0.656390</td>\n", | |
| " <td>0.342872</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>y0</th>\n", | |
| " <td>-0.021884</td>\n", | |
| " <td>-0.014144</td>\n", | |
| " <td>0.308260</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>-0.011003</td>\n", | |
| " <td>-0.031003</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>y1</th>\n", | |
| " <td>0.711883</td>\n", | |
| " <td>0.373323</td>\n", | |
| " <td>0.656390</td>\n", | |
| " <td>-0.011003</td>\n", | |
| " <td>1.000000</td>\n", | |
| " <td>0.514429</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>z</th>\n", | |
| " <td>0.704982</td>\n", | |
| " <td>0.363579</td>\n", | |
| " <td>0.342872</td>\n", | |
| " <td>-0.031003</td>\n", | |
| " <td>0.514429</td>\n", | |
| " <td>1.000000</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " a d y y0 y1 z\n", | |
| "a 1.000000 0.255894 0.474881 -0.021884 0.711883 0.704982\n", | |
| "d 0.255894 1.000000 0.245038 -0.014144 0.373323 0.363579\n", | |
| "y 0.474881 0.245038 1.000000 0.308260 0.656390 0.342872\n", | |
| "y0 -0.021884 -0.014144 0.308260 1.000000 -0.011003 -0.031003\n", | |
| "y1 0.711883 0.373323 0.656390 -0.011003 1.000000 0.514429\n", | |
| "z 0.704982 0.363579 0.342872 -0.031003 0.514429 1.000000" | |
| ] | |
| }, | |
| "execution_count": 146, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.corr()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 147, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['intercept'] = 1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 148, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>2.618198</td>\n", | |
| " <td>3.178242</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>intercept</th>\n", | |
| " <td>-1.168116</td>\n", | |
| " <td>-0.875108</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "d 2.618198 3.178242\n", | |
| "intercept -1.168116 -0.875108" | |
| ] | |
| }, | |
| "execution_count": 148, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = IV2SLS(X['y'], X[['d', 'intercept']], instrument=X[['z', 'intercept']])\n", | |
| "result = model.fit()\n", | |
| "result.conf_int()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Lot's of bias!!!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "We can fix it!!!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 149, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>0</th>\n", | |
| " <th>1</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>d</th>\n", | |
| " <td>-0.152977</td>\n", | |
| " <td>0.424428</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>intercept</th>\n", | |
| " <td>0.178235</td>\n", | |
| " <td>0.470210</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>a</th>\n", | |
| " <td>0.470047</td>\n", | |
| " <td>0.544819</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 0 1\n", | |
| "d -0.152977 0.424428\n", | |
| "intercept 0.178235 0.470210\n", | |
| "a 0.470047 0.544819" | |
| ] | |
| }, | |
| "execution_count": 149, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# here, we have to include 'a' in the instrument definition as well, \n", | |
| "# even though it's a control variable for the y regression\n", | |
| "model = IV2SLS(X['y'], X[['d', 'intercept', 'a']], \n", | |
| " instrument=X[['z', 'intercept', 'a']])\n", | |
| "result = model.fit()\n", | |
| "result.conf_int()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 150, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Optimization terminated successfully.\n", | |
| " Current function value: 0.622092\n", | |
| " Iterations 5\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>OLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>y</td> <th> R-squared: </th> <td> 0.226</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.225</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td> 727.9</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Thu, 01 Mar 2018</td> <th> Prob (F-statistic):</th> <td>3.63e-278</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>12:56:02</td> <th> Log-Likelihood: </th> <td> -8602.5</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td> 5000</td> <th> AIC: </th> <td>1.721e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td> 4997</td> <th> BIC: </th> <td>1.723e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 2</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>d_expected</th> <td> 0.1193</td> <td> 0.147</td> <td> 0.812</td> <td> 0.417</td> <td> -0.169</td> <td> 0.407</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>a</th> <td> 0.5089</td> <td> 0.019</td> <td> 26.639</td> <td> 0.000</td> <td> 0.471</td> <td> 0.546</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 0.3323</td> <td> 0.074</td> <td> 4.473</td> <td> 0.000</td> <td> 0.187</td> <td> 0.478</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>12.644</td> <th> Durbin-Watson: </th> <td> 2.056</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.002</td> <th> Jarque-Bera (JB): </th> <td> 12.644</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td> 0.121</td> <th> Prob(JB): </th> <td> 0.00180</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 3.045</td> <th> Cond. No. </th> <td> 12.1</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " OLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: y R-squared: 0.226\n", | |
| "Model: OLS Adj. R-squared: 0.225\n", | |
| "Method: Least Squares F-statistic: 727.9\n", | |
| "Date: Thu, 01 Mar 2018 Prob (F-statistic): 3.63e-278\n", | |
| "Time: 12:56:02 Log-Likelihood: -8602.5\n", | |
| "No. Observations: 5000 AIC: 1.721e+04\n", | |
| "Df Residuals: 4997 BIC: 1.723e+04\n", | |
| "Df Model: 2 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "d_expected 0.1193 0.147 0.812 0.417 -0.169 0.407\n", | |
| "a 0.5089 0.019 26.639 0.000 0.471 0.546\n", | |
| "intercept 0.3323 0.074 4.473 0.000 0.187 0.478\n", | |
| "==============================================================================\n", | |
| "Omnibus: 12.644 Durbin-Watson: 2.056\n", | |
| "Prob(Omnibus): 0.002 Jarque-Bera (JB): 12.644\n", | |
| "Skew: 0.121 Prob(JB): 0.00180\n", | |
| "Kurtosis: 3.045 Cond. No. 12.1\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 150, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "instrument_model = Logit(X['d'], X[['z', 'intercept']])\n", | |
| "instrument_result = instrument_model.fit()\n", | |
| "\n", | |
| "X['d_expected'] = instrument_result.predict(X[['z', 'intercept']])\n", | |
| "causal_model = OLS(X['y'], X[['d_expected', 'a', 'intercept']])\n", | |
| "result = causal_model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.0" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment