Created
February 15, 2018 06:45
-
-
Save akelleh/37159668dc10b2f0653df29c29bb6c88 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 101, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import pandas as pd\n", | |
| "from statsmodels.api import WLS, OLS\n", | |
| "from sklearn.linear_model import LogisticRegression" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 223, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 10000\n", | |
| "a = np.random.uniform(0.1, 1., size=N)\n", | |
| "b = np.random.uniform(0.1, 1., size=N)\n", | |
| "\n", | |
| "v0 = np.random.normal(0., 1., size=N)\n", | |
| "v1 = np.random.normal(0., 1., size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(a + b)))\n", | |
| "d = np.random.binomial(1., p=p_d)\n", | |
| "\n", | |
| "y0 = 100 + 3*a + 2*b + v0\n", | |
| "y1 = 102 + 6*a + 4*b + v1\n", | |
| "y = (d==1)*y1 + (d==0)*y0\n", | |
| "\n", | |
| "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 224, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>$A$</th>\n", | |
| " <th>$B$</th>\n", | |
| " <th>$D$</th>\n", | |
| " <th>$Y$</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.298399</td>\n", | |
| " <td>0.157860</td>\n", | |
| " <td>0</td>\n", | |
| " <td>101.921423</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.948741</td>\n", | |
| " <td>0.774021</td>\n", | |
| " <td>1</td>\n", | |
| " <td>111.275426</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.284150</td>\n", | |
| " <td>0.522778</td>\n", | |
| " <td>1</td>\n", | |
| " <td>106.100454</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.299112</td>\n", | |
| " <td>0.771244</td>\n", | |
| " <td>0</td>\n", | |
| " <td>101.526627</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.633202</td>\n", | |
| " <td>0.865644</td>\n", | |
| " <td>1</td>\n", | |
| " <td>109.408435</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " $A$ $B$ $D$ $Y$\n", | |
| "0 0.298399 0.157860 0 101.921423\n", | |
| "1 0.948741 0.774021 1 111.275426\n", | |
| "2 0.284150 0.522778 1 106.100454\n", | |
| "3 0.299112 0.771244 0 101.526627\n", | |
| "4 0.633202 0.865644 1 109.408435" | |
| ] | |
| }, | |
| "execution_count": 224, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 225, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.110342523326139" | |
| ] | |
| }, | |
| "execution_count": 225, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 226, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.746104820767432" | |
| ] | |
| }, | |
| "execution_count": 226, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1 - y0).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 227, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "$D$\n", | |
| "0 102.545746\n", | |
| "1 102.821791\n", | |
| "Name: $Y_0$, dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 227, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# baseline bias?\n", | |
| "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y, '$Y_0$': y0, '$Y_1$': y1 })\n", | |
| "X.groupby('$D$').mean()['$Y_0$']\n", | |
| "# looks like it!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 228, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "$D$\n", | |
| "0 4.482932\n", | |
| "1 4.834297\n", | |
| "Name: $\\delta$, dtype: float64" | |
| ] | |
| }, | |
| "execution_count": 228, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# differential treatment effect bias?\n", | |
| "X['$\\delta$'] = X['$Y_1$'] - X['$Y_0$'] \n", | |
| "X.groupby('$D$').mean()['$\\delta$']\n", | |
| "# looks like it!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## (1) Identify a set Z satisfying the back door criterion" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Looking at the data generating process, $D$ and $Y$ are both caused by $A$ and $B$. We need to control for both $A$ and $B$ to satisfy the back door criterion!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## (2) Compute propensity scores, $P(D=1|Z)$" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 229, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "propensity_model = LogisticRegression(C=1e9) # we don't want bias due to regularization!! use a large C.\n", | |
| "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n", | |
| "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 230, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>$A$</th>\n", | |
| " <th>$B$</th>\n", | |
| " <th>$D$</th>\n", | |
| " <th>$Y$</th>\n", | |
| " <th>$Y_0$</th>\n", | |
| " <th>$Y_1$</th>\n", | |
| " <th>$\\delta$</th>\n", | |
| " <th>$P(D=1|A,B)$</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>0.298399</td>\n", | |
| " <td>0.157860</td>\n", | |
| " <td>0</td>\n", | |
| " <td>101.921423</td>\n", | |
| " <td>101.921423</td>\n", | |
| " <td>105.433990</td>\n", | |
| " <td>3.512567</td>\n", | |
| " <td>0.622326</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>0.948741</td>\n", | |
| " <td>0.774021</td>\n", | |
| " <td>1</td>\n", | |
| " <td>111.275426</td>\n", | |
| " <td>103.901519</td>\n", | |
| " <td>111.275426</td>\n", | |
| " <td>7.373907</td>\n", | |
| " <td>0.848982</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>0.284150</td>\n", | |
| " <td>0.522778</td>\n", | |
| " <td>1</td>\n", | |
| " <td>106.100454</td>\n", | |
| " <td>101.816137</td>\n", | |
| " <td>106.100454</td>\n", | |
| " <td>4.284317</td>\n", | |
| " <td>0.697258</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.299112</td>\n", | |
| " <td>0.771244</td>\n", | |
| " <td>0</td>\n", | |
| " <td>101.526627</td>\n", | |
| " <td>101.526627</td>\n", | |
| " <td>106.413867</td>\n", | |
| " <td>4.887240</td>\n", | |
| " <td>0.747716</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.633202</td>\n", | |
| " <td>0.865644</td>\n", | |
| " <td>1</td>\n", | |
| " <td>109.408435</td>\n", | |
| " <td>104.252385</td>\n", | |
| " <td>109.408435</td>\n", | |
| " <td>5.156051</td>\n", | |
| " <td>0.818254</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " $A$ $B$ $D$ $Y$ $Y_0$ $Y_1$ $\\delta$ \\\n", | |
| "0 0.298399 0.157860 0 101.921423 101.921423 105.433990 3.512567 \n", | |
| "1 0.948741 0.774021 1 111.275426 103.901519 111.275426 7.373907 \n", | |
| "2 0.284150 0.522778 1 106.100454 101.816137 106.100454 4.284317 \n", | |
| "3 0.299112 0.771244 0 101.526627 101.526627 106.413867 4.887240 \n", | |
| "4 0.633202 0.865644 1 109.408435 104.252385 109.408435 5.156051 \n", | |
| "\n", | |
| " $P(D=1|A,B)$ \n", | |
| "0 0.622326 \n", | |
| "1 0.848982 \n", | |
| "2 0.697258 \n", | |
| "3 0.747716 \n", | |
| "4 0.818254 " | |
| ] | |
| }, | |
| "execution_count": 230, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## (3) Compute weights, $w_{i, ATE} = \\frac{1}{p_i}$ if $d=1$, and $w_{i, ATE} = \\frac{1}{1 - p_i}$ if $d=0$, " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 231, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## (4) Compute the weighted regression estimate for the causal effect, with the specification $Y \\sim \\delta D + \\alpha + \\epsilon$" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 238, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>WLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.632</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>WLS</td> <th> Adj. R-squared: </th> <td> 0.632</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.540e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Thu, 15 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>00:53:31</td> <th> Log-Likelihood: </th> <td> -20755.</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td> 10000</td> <th> AIC: </th> <td>4.151e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td> 9998</td> <th> BIC: </th> <td>4.153e+04</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>HC3</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>z</th> <th>P>|z|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 4.7099</td> <td> 0.038</td> <td> 124.108</td> <td> 0.000</td> <td> 4.636</td> <td> 4.784</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 102.7817</td> <td> 0.028</td> <td> 3610.452</td> <td> 0.000</td> <td> 102.726</td> <td> 102.837</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>27.386</td> <th> Durbin-Watson: </th> <td> 1.959</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 21.660</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td> 0.003</td> <th> Prob(JB): </th> <td>1.98e-05</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 2.772</td> <th> Cond. No. </th> <td> 2.62</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " WLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.632\n", | |
| "Model: WLS Adj. R-squared: 0.632\n", | |
| "Method: Least Squares F-statistic: 1.540e+04\n", | |
| "Date: Thu, 15 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 00:53:31 Log-Likelihood: -20755.\n", | |
| "No. Observations: 10000 AIC: 4.151e+04\n", | |
| "Df Residuals: 9998 BIC: 4.153e+04\n", | |
| "Df Model: 1 \n", | |
| "Covariance Type: HC3 \n", | |
| "==============================================================================\n", | |
| " coef std err z P>|z| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 4.7099 0.038 124.108 0.000 4.636 4.784\n", | |
| "intercept 102.7817 0.028 3610.452 0.000 102.726 102.837\n", | |
| "==============================================================================\n", | |
| "Omnibus: 27.386 Durbin-Watson: 1.959\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 21.660\n", | |
| "Skew: 0.003 Prob(JB): 1.98e-05\n", | |
| "Kurtosis: 2.772 Cond. No. 2.62\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors are heteroscedasticity robust (HC3)\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 238, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X['intercept'] = 1.\n", | |
| "model = WLS(X['$Y$'], X[['$D$', 'intercept']], weights=X['$W_{ATE}$'])\n", | |
| "result = model.fit(cov_type='HC3')\n", | |
| "result.summary()\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Compare with an OLS regression that controls for $A$ and $B$" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 112, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>OLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.869</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>OLS</td> <th> Adj. R-squared: </th> <td> 0.869</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>2.205e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Wed, 14 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>22:47:55</td> <th> Log-Likelihood: </th> <td>-1.4947e+06</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td>1000000</td> <th> AIC: </th> <td>2.989e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td>999996</td> <th> BIC: </th> <td>2.989e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 3</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 4.5907</td> <td> 0.003</td> <td> 1834.191</td> <td> 0.000</td> <td> 4.586</td> <td> 4.596</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$A$</th> <td> 5.2439</td> <td> 0.004</td> <td> 1255.978</td> <td> 0.000</td> <td> 5.236</td> <td> 5.252</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$B$</th> <td> 3.4957</td> <td> 0.004</td> <td> 836.955</td> <td> 0.000</td> <td> 3.488</td> <td> 3.504</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 98.1239</td> <td> 0.004</td> <td> 2.68e+04</td> <td> 0.000</td> <td> 98.117</td> <td> 98.131</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>160.154</td> <th> Durbin-Watson: </th> <td> 2.000</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 167.868</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td>-0.013</td> <th> Prob(JB): </th> <td>3.53e-37</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 3.058</td> <th> Cond. No. </th> <td> 7.37</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " OLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.869\n", | |
| "Model: OLS Adj. R-squared: 0.869\n", | |
| "Method: Least Squares F-statistic: 2.205e+06\n", | |
| "Date: Wed, 14 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 22:47:55 Log-Likelihood: -1.4947e+06\n", | |
| "No. Observations: 1000000 AIC: 2.989e+06\n", | |
| "Df Residuals: 999996 BIC: 2.989e+06\n", | |
| "Df Model: 3 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 4.5907 0.003 1834.191 0.000 4.586 4.596\n", | |
| "$A$ 5.2439 0.004 1255.978 0.000 5.236 5.252\n", | |
| "$B$ 3.4957 0.004 836.955 0.000 3.488 3.504\n", | |
| "intercept 98.1239 0.004 2.68e+04 0.000 98.117 98.131\n", | |
| "==============================================================================\n", | |
| "Omnibus: 160.154 Durbin-Watson: 2.000\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 167.868\n", | |
| "Skew: -0.013 Prob(JB): 3.53e-37\n", | |
| "Kurtosis: 3.058 Cond. No. 7.37\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 112, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X['intercept'] = 1.\n", | |
| "model = OLS(X['$Y$'], X[['$D$', '$A$', '$B$', 'intercept']])\n", | |
| "result = model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Looks good! the interval for the $D$ coefficient contains the value estimated above for `(y1 - y0).mean()`. What happens if the propensity model was mis-specified?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 94, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 1000000\n", | |
| "a = np.random.uniform(0.1, 1., size=N)\n", | |
| "b = np.random.uniform(0.1, 1., size=N)\n", | |
| "\n", | |
| "v0 = np.random.normal(0., 1., size=N)\n", | |
| "v1 = np.random.normal(0., 1., size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(a + b + a*b))) # now, the propensity model has the extra non-linearity from a*b!\n", | |
| "d = np.random.binomial(1., p=p_d)\n", | |
| "\n", | |
| "y1 = 102 + 3*a + 2*b + 6*a*b + v1\n", | |
| "y0 = 100 + 2*a + 1*b - 2*a*b + v0\n", | |
| "y = (d==1)*y1 + (d==0)*y0\n", | |
| "\n", | |
| "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 95, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.821379950883355" | |
| ] | |
| }, | |
| "execution_count": 95, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.5224362133970155" | |
| ] | |
| }, | |
| "execution_count": 96, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1 - y0).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 123, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "propensity_model = LogisticRegression(C=1e10) # we don't want bias due to regularization!! use a large C.\n", | |
| "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n", | |
| "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 124, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 125, | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>WLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.638</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>WLS</td> <th> Adj. R-squared: </th> <td> 0.638</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.764e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Wed, 14 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>23:42:16</td> <th> Log-Likelihood: </th> <td>-2.0689e+06</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td>1000000</td> <th> AIC: </th> <td>4.138e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td>999998</td> <th> BIC: </th> <td>4.138e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 4.7517</td> <td> 0.004</td> <td> 1328.191</td> <td> 0.000</td> <td> 4.745</td> <td> 4.759</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 102.7486</td> <td> 0.003</td> <td> 4.06e+04</td> <td> 0.000</td> <td> 102.744</td> <td> 102.754</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>503.715</td> <th> Durbin-Watson: </th> <td> 2.001</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td> 452.324</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td> 0.001</td> <th> Prob(JB): </th> <td>6.01e-99</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 2.896</td> <th> Cond. No. </th> <td> 2.62</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " WLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.638\n", | |
| "Model: WLS Adj. R-squared: 0.638\n", | |
| "Method: Least Squares F-statistic: 1.764e+06\n", | |
| "Date: Wed, 14 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 23:42:16 Log-Likelihood: -2.0689e+06\n", | |
| "No. Observations: 1000000 AIC: 4.138e+06\n", | |
| "Df Residuals: 999998 BIC: 4.138e+06\n", | |
| "Df Model: 1 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 4.7517 0.004 1328.191 0.000 4.745 4.759\n", | |
| "intercept 102.7486 0.003 4.06e+04 0.000 102.744 102.754\n", | |
| "==============================================================================\n", | |
| "Omnibus: 503.715 Durbin-Watson: 2.001\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 452.324\n", | |
| "Skew: 0.001 Prob(JB): 6.01e-99\n", | |
| "Kurtosis: 2.896 Cond. No. 2.62\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 125, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X['intercept'] = 1.\n", | |
| "model = WLS(X['$Y$'], X[['$D$', 'intercept']], weights=X['$W_{ATE}$'])\n", | |
| "result = model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Not bad! We're tolerant to a little mis-specification. That's probably partly a product of our specific data generating process. \n", | |
| "\n", | |
| "## Now let's try estimating the ATC and ATT\n", | |
| "\n", | |
| "Remember, $w_{i ,ATC} = \\frac{1-p_i}{p_i}$ if $d_i = 1$ and one if $d_i=0$. Also, $w_{i ,ATC} = \\frac{p_i}{1-p_i}$ if $d_i = 0$ and one if $d_i=1$\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 114, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['$W_{ATC}$'] = (X['$D$'] == 1)* (1. - X['$P(D=1|A,B)$']) / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 115, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "X['$W_{ATT}$'] = (X['$D$'] == 1)* 1. + (X['$D$'] == 0)* X['$P(D=1|A,B)$'] /(1. - X['$P(D=1|A,B)$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 116, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>WLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.649</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>WLS</td> <th> Adj. R-squared: </th> <td> 0.649</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.850e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Wed, 14 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>23:35:35</td> <th> Log-Likelihood: </th> <td>-2.0648e+06</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td>1000000</td> <th> AIC: </th> <td>4.130e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td>999998</td> <th> BIC: </th> <td>4.130e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 4.8352</td> <td> 0.004</td> <td> 1360.065</td> <td> 0.000</td> <td> 4.828</td> <td> 4.842</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 102.8326</td> <td> 0.003</td> <td> 4.09e+04</td> <td> 0.000</td> <td> 102.828</td> <td> 102.837</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>5440.296</td> <th> Durbin-Watson: </th> <td> 1.999</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td>5506.076</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td> 0.178</td> <th> Prob(JB): </th> <td> 0.00</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 2.928</td> <th> Cond. No. </th> <td> 2.62</td>\n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " WLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.649\n", | |
| "Model: WLS Adj. R-squared: 0.649\n", | |
| "Method: Least Squares F-statistic: 1.850e+06\n", | |
| "Date: Wed, 14 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 23:35:35 Log-Likelihood: -2.0648e+06\n", | |
| "No. Observations: 1000000 AIC: 4.130e+06\n", | |
| "Df Residuals: 999998 BIC: 4.130e+06\n", | |
| "Df Model: 1 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 4.8352 0.004 1360.065 0.000 4.828 4.842\n", | |
| "intercept 102.8326 0.003 4.09e+04 0.000 102.828 102.837\n", | |
| "==============================================================================\n", | |
| "Omnibus: 5440.296 Durbin-Watson: 1.999\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 5506.076\n", | |
| "Skew: 0.178 Prob(JB): 0.00\n", | |
| "Kurtosis: 2.928 Cond. No. 2.62\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 116, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = WLS(X['$Y$'], X[['$D$','intercept']], weights=X['$W_{ATT}$'])\n", | |
| "result = model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>WLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.620</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>WLS</td> <th> Adj. R-squared: </th> <td> 0.620</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.631e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Wed, 14 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>23:35:36</td> <th> Log-Likelihood: </th> <td>-2.0738e+06</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td>1000000</td> <th> AIC: </th> <td>4.148e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td>999998</td> <th> BIC: </th> <td>4.148e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 1</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 4.5086</td> <td> 0.004</td> <td> 1277.004</td> <td> 0.000</td> <td> 4.502</td> <td> 4.516</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 102.5042</td> <td> 0.002</td> <td> 4.11e+04</td> <td> 0.000</td> <td> 102.499</td> <td> 102.509</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>40037.598</td> <th> Durbin-Watson: </th> <td> 1.977</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td>45007.475</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td>-0.511</td> <th> Prob(JB): </th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 3.186</td> <th> Cond. No. </th> <td> 2.62</td> \n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " WLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.620\n", | |
| "Model: WLS Adj. R-squared: 0.620\n", | |
| "Method: Least Squares F-statistic: 1.631e+06\n", | |
| "Date: Wed, 14 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 23:35:36 Log-Likelihood: -2.0738e+06\n", | |
| "No. Observations: 1000000 AIC: 4.148e+06\n", | |
| "Df Residuals: 999998 BIC: 4.148e+06\n", | |
| "Df Model: 1 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 4.5086 0.004 1277.004 0.000 4.502 4.516\n", | |
| "intercept 102.5042 0.002 4.11e+04 0.000 102.499 102.509\n", | |
| "==============================================================================\n", | |
| "Omnibus: 40037.598 Durbin-Watson: 1.977\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 45007.475\n", | |
| "Skew: -0.511 Prob(JB): 0.00\n", | |
| "Kurtosis: 3.186 Cond. No. 2.62\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 117, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "model = WLS(X['$Y$'], X[['$D$','intercept']], weights=X['$W_{ATC}$'])\n", | |
| "result = model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## What are the true values?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.834500037380762" | |
| ] | |
| }, | |
| "execution_count": 122, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "treated = X[X['$D$'] == 1]\n", | |
| "ATT = (treated['$Y_1$'] - treated['$Y_0$']).mean()\n", | |
| "ATT" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 121, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "4.504568355874528" | |
| ] | |
| }, | |
| "execution_count": 121, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "control = X[X['$D$'] == 0]\n", | |
| "ATC = (control['$Y_1$'] - control['$Y_0$']).mean()\n", | |
| "ATC" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## So it looks good!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 188, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "N = 10000\n", | |
| "a = np.random.uniform(0.1, 1., size=N)\n", | |
| "b = np.random.uniform(0.1, 1., size=N)\n", | |
| "\n", | |
| "v0 = np.random.normal(0., 1., size=N)\n", | |
| "v1 = np.random.normal(0., 1., size=N)\n", | |
| "\n", | |
| "p_d = 1. / (1. + np.exp(-(a + b + a*b))) # now, the propensity model has the extra non-linearity from a*b!\n", | |
| "d = np.random.binomial(1., p=p_d)\n", | |
| "\n", | |
| "y1 = 102 + 3*a + 2*b + 6*a*b + v1\n", | |
| "y0 = 100 + 2*a + 1*b - 2*a*b + v0\n", | |
| "y = (d==1)*y1 + (d==0)*y0\n", | |
| "\n", | |
| "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 189, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.523058110698841" | |
| ] | |
| }, | |
| "execution_count": 189, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1 - y0).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 190, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.820844123939224" | |
| ] | |
| }, | |
| "execution_count": 190, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 155, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "propensity_model = LogisticRegression(C=1e15) # we don't want bias due to regularization!! use a large C.\n", | |
| "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n", | |
| "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]\n", | |
| "\n", | |
| "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 156, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<table class=\"simpletable\">\n", | |
| "<caption>WLS Regression Results</caption>\n", | |
| "<tr>\n", | |
| " <th>Dep. Variable:</th> <td>$Y$</td> <th> R-squared: </th> <td> 0.817</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Model:</th> <td>WLS</td> <th> Adj. R-squared: </th> <td> 0.817</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Method:</th> <td>Least Squares</td> <th> F-statistic: </th> <td>1.113e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Date:</th> <td>Wed, 14 Feb 2018</td> <th> Prob (F-statistic):</th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Time:</th> <td>23:55:18</td> <th> Log-Likelihood: </th> <td>-1.8718e+06</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>No. Observations:</th> <td>1000000</td> <th> AIC: </th> <td>3.744e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Residuals:</th> <td>999995</td> <th> BIC: </th> <td>3.744e+06</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Df Model:</th> <td> 4</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Covariance Type:</th> <td>nonrobust</td> <th> </th> <td> </td> \n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <td></td> <th>coef</th> <th>std err</th> <th>t</th> <th>P>|t|</th> <th>[0.025</th> <th>0.975]</th> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$D$</th> <td> 5.4912</td> <td> 0.003</td> <td> 1925.728</td> <td> 0.000</td> <td> 5.486</td> <td> 5.497</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$A$</th> <td> 2.5023</td> <td> 0.013</td> <td> 195.606</td> <td> 0.000</td> <td> 2.477</td> <td> 2.527</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$B$</th> <td> 1.5139</td> <td> 0.013</td> <td> 118.437</td> <td> 0.000</td> <td> 1.489</td> <td> 1.539</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>$AB$</th> <td> 2.1753</td> <td> 0.021</td> <td> 102.981</td> <td> 0.000</td> <td> 2.134</td> <td> 2.217</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>intercept</th> <td> 98.2113</td> <td> 0.008</td> <td> 1.24e+04</td> <td> 0.000</td> <td> 98.196</td> <td> 98.227</td>\n", | |
| "</tr>\n", | |
| "</table>\n", | |
| "<table class=\"simpletable\">\n", | |
| "<tr>\n", | |
| " <th>Omnibus:</th> <td>179967.067</td> <th> Durbin-Watson: </th> <td> 1.989</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Prob(Omnibus):</th> <td> 0.000</td> <th> Jarque-Bera (JB): </th> <td>917090.149</td>\n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Skew:</th> <td>-0.783</td> <th> Prob(JB): </th> <td> 0.00</td> \n", | |
| "</tr>\n", | |
| "<tr>\n", | |
| " <th>Kurtosis:</th> <td> 7.422</td> <th> Cond. No. </th> <td> 27.7</td> \n", | |
| "</tr>\n", | |
| "</table>" | |
| ], | |
| "text/plain": [ | |
| "<class 'statsmodels.iolib.summary.Summary'>\n", | |
| "\"\"\"\n", | |
| " WLS Regression Results \n", | |
| "==============================================================================\n", | |
| "Dep. Variable: $Y$ R-squared: 0.817\n", | |
| "Model: WLS Adj. R-squared: 0.817\n", | |
| "Method: Least Squares F-statistic: 1.113e+06\n", | |
| "Date: Wed, 14 Feb 2018 Prob (F-statistic): 0.00\n", | |
| "Time: 23:55:18 Log-Likelihood: -1.8718e+06\n", | |
| "No. Observations: 1000000 AIC: 3.744e+06\n", | |
| "Df Residuals: 999995 BIC: 3.744e+06\n", | |
| "Df Model: 4 \n", | |
| "Covariance Type: nonrobust \n", | |
| "==============================================================================\n", | |
| " coef std err t P>|t| [0.025 0.975]\n", | |
| "------------------------------------------------------------------------------\n", | |
| "$D$ 5.4912 0.003 1925.728 0.000 5.486 5.497\n", | |
| "$A$ 2.5023 0.013 195.606 0.000 2.477 2.527\n", | |
| "$B$ 1.5139 0.013 118.437 0.000 1.489 1.539\n", | |
| "$AB$ 2.1753 0.021 102.981 0.000 2.134 2.217\n", | |
| "intercept 98.2113 0.008 1.24e+04 0.000 98.196 98.227\n", | |
| "==============================================================================\n", | |
| "Omnibus: 179967.067 Durbin-Watson: 1.989\n", | |
| "Prob(Omnibus): 0.000 Jarque-Bera (JB): 917090.149\n", | |
| "Skew: -0.783 Prob(JB): 0.00\n", | |
| "Kurtosis: 7.422 Cond. No. 27.7\n", | |
| "==============================================================================\n", | |
| "\n", | |
| "Warnings:\n", | |
| "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", | |
| "\"\"\"" | |
| ] | |
| }, | |
| "execution_count": 156, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "X['intercept'] = 1.\n", | |
| "X['$A^2$'] = X['$A$'] ** 2\n", | |
| "X['$B^2$'] = X['$B$'] ** 2\n", | |
| "X['$AB$'] = X['$B$'] * X['$A$']\n", | |
| "\n", | |
| "model = WLS(X['$Y$'], X[['$D$', '$A$', '$B$', '$AB$', 'intercept']], weights=X['$W_{ATE}$'])\n", | |
| "result = model.fit()\n", | |
| "result.summary()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## So we have a doubly robust regression by including $A$ and $B$ in the regression model, as well as the propensity score model!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Can we use more general models with weighting and the g-formula?" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Check the naive estimate first:" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 165, | |
| "metadata": { | |
| "scrolled": true | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Epoch 1/1\n", | |
| "1000000/1000000 [==============================] - 18s 18us/step - loss: 54.4477\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.callbacks.History at 0x7fd8a3c342e8>" | |
| ] | |
| }, | |
| "execution_count": 165, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from keras.models import Model\n", | |
| "from keras.layers import Input, Dense\n", | |
| "\n", | |
| "x_variables = ['$D$']\n", | |
| "\n", | |
| "x_in = Input(shape=(len(x_variables),))\n", | |
| "h1 = Dense(100, activation='relu')(x_in)\n", | |
| "h2 = Dense(100, activation='relu')(h1)\n", | |
| "y_out = Dense(1, activation='linear')(h2)\n", | |
| "\n", | |
| "model = Model(inputs=[x_in], outputs=[y_out])\n", | |
| "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n", | |
| "\n", | |
| "model.fit(X[x_variables], X['$Y$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 166, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = X.copy()\n", | |
| "df['$D$'] = 1\n", | |
| "y1_hat = model.predict(df['$D$'])\n", | |
| "df['$D$'] = 0\n", | |
| "y0_hat = model.predict(df['$D$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 167, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.798075" | |
| ] | |
| }, | |
| "execution_count": 167, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1_hat - y0_hat).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "## close to the naive estimator! Now, let's try weighting..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 168, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Epoch 1/1\n", | |
| "1000000/1000000 [==============================] - 30s 30us/step - loss: 107.8438\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.callbacks.History at 0x7fd8b73e01d0>" | |
| ] | |
| }, | |
| "execution_count": 168, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from keras.models import Model\n", | |
| "from keras.layers import Input, Dense\n", | |
| "\n", | |
| "x_variables = ['$D$']\n", | |
| "\n", | |
| "x_in = Input(shape=(len(x_variables),))\n", | |
| "h1 = Dense(100, activation='relu')(x_in)\n", | |
| "h2 = Dense(100, activation='relu')(h1)\n", | |
| "y_out = Dense(1, activation='linear')(h2)\n", | |
| "\n", | |
| "model = Model(inputs=[x_in], outputs=[y_out])\n", | |
| "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n", | |
| "\n", | |
| "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 169, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = X.copy()\n", | |
| "df['$D$'] = 1\n", | |
| "y1_hat = model.predict(df['$D$'])\n", | |
| "df['$D$'] = 0\n", | |
| "y0_hat = model.predict(df['$D$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 170, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.4365234" | |
| ] | |
| }, | |
| "execution_count": 170, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1_hat - y0_hat).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Much better! Now, let's try doubly-robust ..." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 172, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Epoch 1/1\n", | |
| "1000000/1000000 [==============================] - 30s 30us/step - loss: 63.2424\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.callbacks.History at 0x7fd8b748b4e0>" | |
| ] | |
| }, | |
| "execution_count": 172, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from keras.models import Model\n", | |
| "from keras.layers import Input, Dense\n", | |
| "\n", | |
| "x_variables = ['$D$', '$A$', '$B$']\n", | |
| "\n", | |
| "x_in = Input(shape=(len(x_variables),))\n", | |
| "h1 = Dense(100, activation='relu')(x_in)\n", | |
| "h2 = Dense(100, activation='relu')(h1)\n", | |
| "y_out = Dense(1, activation='linear')(h2)\n", | |
| "\n", | |
| "model = Model(inputs=[x_in], outputs=[y_out])\n", | |
| "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n", | |
| "\n", | |
| "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 174, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = X.copy()\n", | |
| "df['$D$'] = 1\n", | |
| "y1_hat = model.predict(df[x_variables])\n", | |
| "df['$D$'] = 0\n", | |
| "y0_hat = model.predict(df[x_variables])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 175, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.416282" | |
| ] | |
| }, | |
| "execution_count": 175, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1_hat - y0_hat).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Let's go crazy -- feedforward nets for everything!" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 201, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Epoch 1/10\n", | |
| "10000/10000 [==============================] - 0s 42us/step - loss: 0.1710\n", | |
| "Epoch 2/10\n", | |
| "10000/10000 [==============================] - 0s 20us/step - loss: 0.1604\n", | |
| "Epoch 3/10\n", | |
| "10000/10000 [==============================] - 0s 20us/step - loss: 0.1604\n", | |
| "Epoch 4/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1600\n", | |
| "Epoch 5/10\n", | |
| "10000/10000 [==============================] - 0s 20us/step - loss: 0.1602\n", | |
| "Epoch 6/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1598\n", | |
| "Epoch 7/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1599\n", | |
| "Epoch 8/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1595\n", | |
| "Epoch 9/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1596\n", | |
| "Epoch 10/10\n", | |
| "10000/10000 [==============================] - 0s 19us/step - loss: 0.1593\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "from keras.models import Model\n", | |
| "from keras.layers import Input, Dense\n", | |
| "\n", | |
| "x_variables = ['$A$', '$B$']\n", | |
| "\n", | |
| "x_in = Input(shape=(len(x_variables),))\n", | |
| "h1 = Dense(100, activation='relu')(x_in)\n", | |
| "h2 = Dense(100, activation='relu')(h1)\n", | |
| "y_out = Dense(1, activation='linear')(h2)\n", | |
| "\n", | |
| "propensity_model = Model(inputs=[x_in], outputs=[y_out])\n", | |
| "propensity_model.compile(loss='mean_squared_error', optimizer='RMSprop')\n", | |
| "\n", | |
| "propensity_model.fit(X[x_variables], X['$D$'], epochs=10)\n", | |
| "\n", | |
| "X['$P(D=1|A,B)$'] = propensity_model.predict(X[['$A$', '$B$']])\n", | |
| "\n", | |
| "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 202, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Epoch 1/10\n", | |
| "10000/10000 [==============================] - 1s 54us/step - loss: 11955.0360\n", | |
| "Epoch 2/10\n", | |
| "10000/10000 [==============================] - 0s 31us/step - loss: 938.2027\n", | |
| "Epoch 3/10\n", | |
| "10000/10000 [==============================] - 0s 32us/step - loss: 7.6465\n", | |
| "Epoch 4/10\n", | |
| "10000/10000 [==============================] - 0s 31us/step - loss: 7.2900\n", | |
| "Epoch 5/10\n", | |
| "10000/10000 [==============================] - 0s 31us/step - loss: 7.5390\n", | |
| "Epoch 6/10\n", | |
| "10000/10000 [==============================] - 0s 31us/step - loss: 7.3276\n", | |
| "Epoch 7/10\n", | |
| "10000/10000 [==============================] - 0s 37us/step - loss: 7.2862\n", | |
| "Epoch 8/10\n", | |
| "10000/10000 [==============================] - 0s 36us/step - loss: 7.3980\n", | |
| "Epoch 9/10\n", | |
| "10000/10000 [==============================] - 0s 31us/step - loss: 7.2535\n", | |
| "Epoch 10/10\n", | |
| "10000/10000 [==============================] - 0s 32us/step - loss: 7.3800\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<keras.callbacks.History at 0x7fd8b5946e80>" | |
| ] | |
| }, | |
| "execution_count": 202, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "from keras.models import Model\n", | |
| "from keras.layers import Input, Dense\n", | |
| "\n", | |
| "x_variables = ['$D$']\n", | |
| "\n", | |
| "x_in = Input(shape=(len(x_variables),))\n", | |
| "h1 = Dense(100, activation='relu')(x_in)\n", | |
| "h2 = Dense(100, activation='relu')(h1)\n", | |
| "y_out = Dense(1, activation='linear')(h2)\n", | |
| "\n", | |
| "model = Model(inputs=[x_in], outputs=[y_out])\n", | |
| "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n", | |
| "\n", | |
| "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'], epochs=10)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 203, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = X.copy()\n", | |
| "df['$D$'] = 1\n", | |
| "y1_hat = model.predict(df[x_variables])\n", | |
| "df['$D$'] = 0\n", | |
| "y0_hat = model.predict(df[x_variables])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 204, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.6454697" | |
| ] | |
| }, | |
| "execution_count": 204, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1_hat - y0_hat).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 205, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "5.523058110698841" | |
| ] | |
| }, | |
| "execution_count": 205, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "(y1 - y0).mean()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.2" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment