akelleh · February 15, 2018 06:45
diff --git a/weighted_regression.ipynb b/weighted_regression.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from statsmodels.api import WLS, OLS\n",
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 223,
   "metadata": {},
   "outputs": [],
   "source": [
    "N = 10000\n",
    "a = np.random.uniform(0.1, 1., size=N)\n",
    "b = np.random.uniform(0.1, 1., size=N)\n",
    "\n",
    "v0 = np.random.normal(0., 1., size=N)\n",
    "v1 = np.random.normal(0., 1., size=N)\n",
    "\n",
    "p_d = 1. / (1. + np.exp(-(a + b)))\n",
    "d = np.random.binomial(1., p=p_d)\n",
    "\n",
    "y0 = 100 + 3*a + 2*b + v0\n",
    "y1 = 102 + 6*a + 4*b + v1\n",
    "y = (d==1)*y1 + (d==0)*y0\n",
    "\n",
    "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>$A$</th>\n",
       "      <th>$B$</th>\n",
       "      <th>$D$</th>\n",
       "      <th>$Y$</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.298399</td>\n",
       "      <td>0.157860</td>\n",
       "      <td>0</td>\n",
       "      <td>101.921423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.948741</td>\n",
       "      <td>0.774021</td>\n",
       "      <td>1</td>\n",
       "      <td>111.275426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.284150</td>\n",
       "      <td>0.522778</td>\n",
       "      <td>1</td>\n",
       "      <td>106.100454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.299112</td>\n",
       "      <td>0.771244</td>\n",
       "      <td>0</td>\n",
       "      <td>101.526627</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.633202</td>\n",
       "      <td>0.865644</td>\n",
       "      <td>1</td>\n",
       "      <td>109.408435</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        $A$       $B$  $D$         $Y$\n",
       "0  0.298399  0.157860    0  101.921423\n",
       "1  0.948741  0.774021    1  111.275426\n",
       "2  0.284150  0.522778    1  106.100454\n",
       "3  0.299112  0.771244    0  101.526627\n",
       "4  0.633202  0.865644    1  109.408435"
      ]
     },
     "execution_count": 224,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 225,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.110342523326139"
      ]
     },
     "execution_count": 225,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 226,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.746104820767432"
      ]
     },
     "execution_count": 226,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1 - y0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "$D$\n",
       "0    102.545746\n",
       "1    102.821791\n",
       "Name: $Y_0$, dtype: float64"
      ]
     },
     "execution_count": 227,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# baseline bias?\n",
    "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y, '$Y_0$': y0, '$Y_1$': y1 })\n",
    "X.groupby('$D$').mean()['$Y_0$']\n",
    "# looks like it!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "$D$\n",
       "0    4.482932\n",
       "1    4.834297\n",
       "Name: $\\delta$, dtype: float64"
      ]
     },
     "execution_count": 228,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# differential treatment effect bias?\n",
    "X['$\\delta$'] = X['$Y_1$'] - X['$Y_0$'] \n",
    "X.groupby('$D$').mean()['$\\delta$']\n",
    "# looks like it!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## (1) Identify a set Z satisfying the back door criterion"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Looking at the data generating process, $D$ and $Y$ are both caused by $A$ and $B$. We need to control for both $A$ and $B$ to satisfy the back door criterion!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## (2) Compute propensity scores, $P(D=1|Z)$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "metadata": {},
   "outputs": [],
   "source": [
    "propensity_model = LogisticRegression(C=1e9) # we don't want bias due to regularization!! use a large C.\n",
    "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n",
    "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>$A$</th>\n",
       "      <th>$B$</th>\n",
       "      <th>$D$</th>\n",
       "      <th>$Y$</th>\n",
       "      <th>$Y_0$</th>\n",
       "      <th>$Y_1$</th>\n",
       "      <th>$\\delta$</th>\n",
       "      <th>$P(D=1|A,B)$</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.298399</td>\n",
       "      <td>0.157860</td>\n",
       "      <td>0</td>\n",
       "      <td>101.921423</td>\n",
       "      <td>101.921423</td>\n",
       "      <td>105.433990</td>\n",
       "      <td>3.512567</td>\n",
       "      <td>0.622326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.948741</td>\n",
       "      <td>0.774021</td>\n",
       "      <td>1</td>\n",
       "      <td>111.275426</td>\n",
       "      <td>103.901519</td>\n",
       "      <td>111.275426</td>\n",
       "      <td>7.373907</td>\n",
       "      <td>0.848982</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.284150</td>\n",
       "      <td>0.522778</td>\n",
       "      <td>1</td>\n",
       "      <td>106.100454</td>\n",
       "      <td>101.816137</td>\n",
       "      <td>106.100454</td>\n",
       "      <td>4.284317</td>\n",
       "      <td>0.697258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.299112</td>\n",
       "      <td>0.771244</td>\n",
       "      <td>0</td>\n",
       "      <td>101.526627</td>\n",
       "      <td>101.526627</td>\n",
       "      <td>106.413867</td>\n",
       "      <td>4.887240</td>\n",
       "      <td>0.747716</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.633202</td>\n",
       "      <td>0.865644</td>\n",
       "      <td>1</td>\n",
       "      <td>109.408435</td>\n",
       "      <td>104.252385</td>\n",
       "      <td>109.408435</td>\n",
       "      <td>5.156051</td>\n",
       "      <td>0.818254</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        $A$       $B$  $D$         $Y$       $Y_0$       $Y_1$  $\\delta$  \\\n",
       "0  0.298399  0.157860    0  101.921423  101.921423  105.433990  3.512567   \n",
       "1  0.948741  0.774021    1  111.275426  103.901519  111.275426  7.373907   \n",
       "2  0.284150  0.522778    1  106.100454  101.816137  106.100454  4.284317   \n",
       "3  0.299112  0.771244    0  101.526627  101.526627  106.413867  4.887240   \n",
       "4  0.633202  0.865644    1  109.408435  104.252385  109.408435  5.156051   \n",
       "\n",
       "   $P(D=1|A,B)$  \n",
       "0      0.622326  \n",
       "1      0.848982  \n",
       "2      0.697258  \n",
       "3      0.747716  \n",
       "4      0.818254  "
      ]
     },
     "execution_count": 230,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## (3) Compute weights, $w_{i, ATE} = \\frac{1}{p_i}$ if $d=1$, and  $w_{i, ATE} = \\frac{1}{1 - p_i}$ if $d=0$, "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "metadata": {},
   "outputs": [],
   "source": [
    "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## (4) Compute the weighted regression estimate for the causal effect, with the specification $Y \\sim \\delta D + \\alpha + \\epsilon$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 238,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>WLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th> <td>   0.632</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>WLS</td>       <th>  Adj. R-squared:    </th> <td>   0.632</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>1.540e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Thu, 15 Feb 2018</td> <th>  Prob (F-statistic):</th>  <td>  0.00</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>00:53:31</td>     <th>  Log-Likelihood:    </th> <td> -20755.</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td> 10000</td>      <th>  AIC:               </th> <td>4.151e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>  9998</td>      <th>  BIC:               </th> <td>4.153e+04</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>         <td>HC3</td>       <th>                     </th>     <td> </td>    \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    4.7099</td> <td>    0.038</td> <td>  124.108</td> <td> 0.000</td> <td>    4.636</td> <td>    4.784</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>  102.7817</td> <td>    0.028</td> <td> 3610.452</td> <td> 0.000</td> <td>  102.726</td> <td>  102.837</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>27.386</td> <th>  Durbin-Watson:     </th> <td>   1.959</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  21.660</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.003</td> <th>  Prob(JB):          </th> <td>1.98e-05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 2.772</td> <th>  Cond. No.          </th> <td>    2.62</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            WLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.632\n",
       "Model:                            WLS   Adj. R-squared:                  0.632\n",
       "Method:                 Least Squares   F-statistic:                 1.540e+04\n",
       "Date:                Thu, 15 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        00:53:31   Log-Likelihood:                -20755.\n",
       "No. Observations:               10000   AIC:                         4.151e+04\n",
       "Df Residuals:                    9998   BIC:                         4.153e+04\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:                  HC3                                         \n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            4.7099      0.038    124.108      0.000       4.636       4.784\n",
       "intercept    102.7817      0.028   3610.452      0.000     102.726     102.837\n",
       "==============================================================================\n",
       "Omnibus:                       27.386   Durbin-Watson:                   1.959\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               21.660\n",
       "Skew:                           0.003   Prob(JB):                     1.98e-05\n",
       "Kurtosis:                       2.772   Cond. No.                         2.62\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors are heteroscedasticity robust (HC3)\n",
       "\"\"\""
      ]
     },
     "execution_count": 238,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['intercept'] = 1.\n",
    "model = WLS(X['$Y$'], X[['$D$', 'intercept']], weights=X['$W_{ATE}$'])\n",
    "result = model.fit(cov_type='HC3')\n",
    "result.summary()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compare with an OLS regression that controls for $A$ and $B$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th>  <td>   0.869</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.869</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>2.205e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 14 Feb 2018</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>22:47:55</td>     <th>  Log-Likelihood:    </th> <td>-1.4947e+06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>1000000</td>     <th>  AIC:               </th>  <td>2.989e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>999996</td>      <th>  BIC:               </th>  <td>2.989e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    4.5907</td> <td>    0.003</td> <td> 1834.191</td> <td> 0.000</td> <td>    4.586</td> <td>    4.596</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$A$</th>       <td>    5.2439</td> <td>    0.004</td> <td> 1255.978</td> <td> 0.000</td> <td>    5.236</td> <td>    5.252</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$B$</th>       <td>    3.4957</td> <td>    0.004</td> <td>  836.955</td> <td> 0.000</td> <td>    3.488</td> <td>    3.504</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>   98.1239</td> <td>    0.004</td> <td> 2.68e+04</td> <td> 0.000</td> <td>   98.117</td> <td>   98.131</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>160.154</td> <th>  Durbin-Watson:     </th> <td>   2.000</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td> 167.868</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td>-0.013</td>  <th>  Prob(JB):          </th> <td>3.53e-37</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 3.058</td>  <th>  Cond. No.          </th> <td>    7.37</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.869\n",
       "Model:                            OLS   Adj. R-squared:                  0.869\n",
       "Method:                 Least Squares   F-statistic:                 2.205e+06\n",
       "Date:                Wed, 14 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        22:47:55   Log-Likelihood:            -1.4947e+06\n",
       "No. Observations:             1000000   AIC:                         2.989e+06\n",
       "Df Residuals:                  999996   BIC:                         2.989e+06\n",
       "Df Model:                           3                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            4.5907      0.003   1834.191      0.000       4.586       4.596\n",
       "$A$            5.2439      0.004   1255.978      0.000       5.236       5.252\n",
       "$B$            3.4957      0.004    836.955      0.000       3.488       3.504\n",
       "intercept     98.1239      0.004   2.68e+04      0.000      98.117      98.131\n",
       "==============================================================================\n",
       "Omnibus:                      160.154   Durbin-Watson:                   2.000\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              167.868\n",
       "Skew:                          -0.013   Prob(JB):                     3.53e-37\n",
       "Kurtosis:                       3.058   Cond. No.                         7.37\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['intercept'] = 1.\n",
    "model = OLS(X['$Y$'], X[['$D$', '$A$', '$B$', 'intercept']])\n",
    "result = model.fit()\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Looks good! the interval for the $D$ coefficient contains the value estimated above for `(y1 - y0).mean()`. What happens if the propensity model was mis-specified?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "N = 1000000\n",
    "a = np.random.uniform(0.1, 1., size=N)\n",
    "b = np.random.uniform(0.1, 1., size=N)\n",
    "\n",
    "v0 = np.random.normal(0., 1., size=N)\n",
    "v1 = np.random.normal(0., 1., size=N)\n",
    "\n",
    "p_d = 1. / (1. + np.exp(-(a + b + a*b)))  # now, the propensity model has the extra non-linearity from a*b!\n",
    "d = np.random.binomial(1., p=p_d)\n",
    "\n",
    "y1 = 102 + 3*a + 2*b + 6*a*b + v1\n",
    "y0 = 100 + 2*a + 1*b - 2*a*b + v0\n",
    "y = (d==1)*y1 + (d==0)*y0\n",
    "\n",
    "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 95,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.821379950883355"
      ]
     },
     "execution_count": 95,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.5224362133970155"
      ]
     },
     "execution_count": 96,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1 - y0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [],
   "source": [
    "propensity_model = LogisticRegression(C=1e10) # we don't want bias due to regularization!! use a large C.\n",
    "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n",
    "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {},
   "outputs": [],
   "source": [
    "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>WLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th>  <td>   0.638</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>WLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.638</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>1.764e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 14 Feb 2018</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:42:16</td>     <th>  Log-Likelihood:    </th> <td>-2.0689e+06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>1000000</td>     <th>  AIC:               </th>  <td>4.138e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>999998</td>      <th>  BIC:               </th>  <td>4.138e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    4.7517</td> <td>    0.004</td> <td> 1328.191</td> <td> 0.000</td> <td>    4.745</td> <td>    4.759</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>  102.7486</td> <td>    0.003</td> <td> 4.06e+04</td> <td> 0.000</td> <td>  102.744</td> <td>  102.754</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>503.715</td> <th>  Durbin-Watson:     </th> <td>   2.001</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th> <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td> 452.324</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>          <td> 0.001</td>  <th>  Prob(JB):          </th> <td>6.01e-99</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>      <td> 2.896</td>  <th>  Cond. No.          </th> <td>    2.62</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            WLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.638\n",
       "Model:                            WLS   Adj. R-squared:                  0.638\n",
       "Method:                 Least Squares   F-statistic:                 1.764e+06\n",
       "Date:                Wed, 14 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        23:42:16   Log-Likelihood:            -2.0689e+06\n",
       "No. Observations:             1000000   AIC:                         4.138e+06\n",
       "Df Residuals:                  999998   BIC:                         4.138e+06\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            4.7517      0.004   1328.191      0.000       4.745       4.759\n",
       "intercept    102.7486      0.003   4.06e+04      0.000     102.744     102.754\n",
       "==============================================================================\n",
       "Omnibus:                      503.715   Durbin-Watson:                   2.001\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              452.324\n",
       "Skew:                           0.001   Prob(JB):                     6.01e-99\n",
       "Kurtosis:                       2.896   Cond. No.                         2.62\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 125,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['intercept'] = 1.\n",
    "model = WLS(X['$Y$'], X[['$D$', 'intercept']], weights=X['$W_{ATE}$'])\n",
    "result = model.fit()\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Not bad! We're tolerant to a little mis-specification. That's probably partly a product of our specific data generating process. \n",
    "\n",
    "## Now let's try estimating the ATC and ATT\n",
    "\n",
    "Remember, $w_{i ,ATC} = \\frac{1-p_i}{p_i}$ if $d_i = 1$ and one if $d_i=0$. Also, $w_{i ,ATC} = \\frac{p_i}{1-p_i}$ if $d_i = 0$ and one if $d_i=1$\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {},
   "outputs": [],
   "source": [
    "X['$W_{ATC}$'] = (X['$D$'] == 1)* (1. - X['$P(D=1|A,B)$']) / X['$P(D=1|A,B)$']  + (X['$D$'] == 0)* 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {},
   "outputs": [],
   "source": [
    "X['$W_{ATT}$'] = (X['$D$'] == 1)* 1. + (X['$D$'] == 0)* X['$P(D=1|A,B)$'] /(1. - X['$P(D=1|A,B)$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>WLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th>  <td>   0.649</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>WLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.649</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>1.850e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 14 Feb 2018</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:35:35</td>     <th>  Log-Likelihood:    </th> <td>-2.0648e+06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>1000000</td>     <th>  AIC:               </th>  <td>4.130e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>999998</td>      <th>  BIC:               </th>  <td>4.130e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    4.8352</td> <td>    0.004</td> <td> 1360.065</td> <td> 0.000</td> <td>    4.828</td> <td>    4.842</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>  102.8326</td> <td>    0.003</td> <td> 4.09e+04</td> <td> 0.000</td> <td>  102.828</td> <td>  102.837</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>5440.296</td> <th>  Durbin-Watson:     </th> <td>   1.999</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th>  <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td>5506.076</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>           <td> 0.178</td>  <th>  Prob(JB):          </th> <td>    0.00</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>       <td> 2.928</td>  <th>  Cond. No.          </th> <td>    2.62</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            WLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.649\n",
       "Model:                            WLS   Adj. R-squared:                  0.649\n",
       "Method:                 Least Squares   F-statistic:                 1.850e+06\n",
       "Date:                Wed, 14 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        23:35:35   Log-Likelihood:            -2.0648e+06\n",
       "No. Observations:             1000000   AIC:                         4.130e+06\n",
       "Df Residuals:                  999998   BIC:                         4.130e+06\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            4.8352      0.004   1360.065      0.000       4.828       4.842\n",
       "intercept    102.8326      0.003   4.09e+04      0.000     102.828     102.837\n",
       "==============================================================================\n",
       "Omnibus:                     5440.296   Durbin-Watson:                   1.999\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):             5506.076\n",
       "Skew:                           0.178   Prob(JB):                         0.00\n",
       "Kurtosis:                       2.928   Cond. No.                         2.62\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = WLS(X['$Y$'], X[['$D$','intercept']], weights=X['$W_{ATT}$'])\n",
    "result = model.fit()\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>WLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th>  <td>   0.620</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>WLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.620</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>1.631e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 14 Feb 2018</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:35:36</td>     <th>  Log-Likelihood:    </th> <td>-2.0738e+06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>1000000</td>     <th>  AIC:               </th>  <td>4.148e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>999998</td>      <th>  BIC:               </th>  <td>4.148e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    4.5086</td> <td>    0.004</td> <td> 1277.004</td> <td> 0.000</td> <td>    4.502</td> <td>    4.516</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>  102.5042</td> <td>    0.002</td> <td> 4.11e+04</td> <td> 0.000</td> <td>  102.499</td> <td>  102.509</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>40037.598</td> <th>  Durbin-Watson:     </th> <td>   1.977</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th>  <td> 0.000</td>   <th>  Jarque-Bera (JB):  </th> <td>45007.475</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>           <td>-0.511</td>   <th>  Prob(JB):          </th> <td>    0.00</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>       <td> 3.186</td>   <th>  Cond. No.          </th> <td>    2.62</td> \n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            WLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.620\n",
       "Model:                            WLS   Adj. R-squared:                  0.620\n",
       "Method:                 Least Squares   F-statistic:                 1.631e+06\n",
       "Date:                Wed, 14 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        23:35:36   Log-Likelihood:            -2.0738e+06\n",
       "No. Observations:             1000000   AIC:                         4.148e+06\n",
       "Df Residuals:                  999998   BIC:                         4.148e+06\n",
       "Df Model:                           1                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            4.5086      0.004   1277.004      0.000       4.502       4.516\n",
       "intercept    102.5042      0.002   4.11e+04      0.000     102.499     102.509\n",
       "==============================================================================\n",
       "Omnibus:                    40037.598   Durbin-Watson:                   1.977\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):            45007.475\n",
       "Skew:                          -0.511   Prob(JB):                         0.00\n",
       "Kurtosis:                       3.186   Cond. No.                         2.62\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = WLS(X['$Y$'], X[['$D$','intercept']], weights=X['$W_{ATC}$'])\n",
    "result = model.fit()\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## What are the true values?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.834500037380762"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "treated = X[X['$D$'] == 1]\n",
    "ATT = (treated['$Y_1$'] - treated['$Y_0$']).mean()\n",
    "ATT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.504568355874528"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "control = X[X['$D$'] == 0]\n",
    "ATC = (control['$Y_1$'] - control['$Y_0$']).mean()\n",
    "ATC"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## So it looks good!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "metadata": {},
   "outputs": [],
   "source": [
    "N = 10000\n",
    "a = np.random.uniform(0.1, 1., size=N)\n",
    "b = np.random.uniform(0.1, 1., size=N)\n",
    "\n",
    "v0 = np.random.normal(0., 1., size=N)\n",
    "v1 = np.random.normal(0., 1., size=N)\n",
    "\n",
    "p_d = 1. / (1. + np.exp(-(a + b + a*b)))  # now, the propensity model has the extra non-linearity from a*b!\n",
    "d = np.random.binomial(1., p=p_d)\n",
    "\n",
    "y1 = 102 + 3*a + 2*b + 6*a*b + v1\n",
    "y0 = 100 + 2*a + 1*b - 2*a*b + v0\n",
    "y = (d==1)*y1 + (d==0)*y0\n",
    "\n",
    "X = pd.DataFrame({'$D$': d, '$A$': a, '$B$': b, '$Y$': y })\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.523058110698841"
      ]
     },
     "execution_count": 189,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1 - y0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.820844123939224"
      ]
     },
     "execution_count": 190,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[X['$D$'] == 1].mean()['$Y$'] - X[X['$D$'] == 0].mean()['$Y$']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "propensity_model = LogisticRegression(C=1e15) # we don't want bias due to regularization!! use a large C.\n",
    "propensity_model = propensity_model.fit(X[['$A$', '$B$']], X['$D$'])\n",
    "X['$P(D=1|A,B)$'] = propensity_model.predict_proba(X[['$A$', '$B$']])[:,1]\n",
    "\n",
    "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>WLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>           <td>$Y$</td>       <th>  R-squared:         </th>  <td>   0.817</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>WLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.817</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>1.113e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Wed, 14 Feb 2018</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>23:55:18</td>     <th>  Log-Likelihood:    </th> <td>-1.8718e+06</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>1000000</td>     <th>  AIC:               </th>  <td>3.744e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>999995</td>      <th>  BIC:               </th>  <td>3.744e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     4</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$D$</th>       <td>    5.4912</td> <td>    0.003</td> <td> 1925.728</td> <td> 0.000</td> <td>    5.486</td> <td>    5.497</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$A$</th>       <td>    2.5023</td> <td>    0.013</td> <td>  195.606</td> <td> 0.000</td> <td>    2.477</td> <td>    2.527</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$B$</th>       <td>    1.5139</td> <td>    0.013</td> <td>  118.437</td> <td> 0.000</td> <td>    1.489</td> <td>    1.539</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>$AB$</th>      <td>    2.1753</td> <td>    0.021</td> <td>  102.981</td> <td> 0.000</td> <td>    2.134</td> <td>    2.217</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>intercept</th> <td>   98.2113</td> <td>    0.008</td> <td> 1.24e+04</td> <td> 0.000</td> <td>   98.196</td> <td>   98.227</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>179967.067</td> <th>  Durbin-Watson:     </th>  <td>   1.989</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th>   <td> 0.000</td>   <th>  Jarque-Bera (JB):  </th> <td>917090.149</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>            <td>-0.783</td>   <th>  Prob(JB):          </th>  <td>    0.00</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>        <td> 7.422</td>   <th>  Cond. No.          </th>  <td>    27.7</td> \n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            WLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:                    $Y$   R-squared:                       0.817\n",
       "Model:                            WLS   Adj. R-squared:                  0.817\n",
       "Method:                 Least Squares   F-statistic:                 1.113e+06\n",
       "Date:                Wed, 14 Feb 2018   Prob (F-statistic):               0.00\n",
       "Time:                        23:55:18   Log-Likelihood:            -1.8718e+06\n",
       "No. Observations:             1000000   AIC:                         3.744e+06\n",
       "Df Residuals:                  999995   BIC:                         3.744e+06\n",
       "Df Model:                           4                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
       "------------------------------------------------------------------------------\n",
       "$D$            5.4912      0.003   1925.728      0.000       5.486       5.497\n",
       "$A$            2.5023      0.013    195.606      0.000       2.477       2.527\n",
       "$B$            1.5139      0.013    118.437      0.000       1.489       1.539\n",
       "$AB$           2.1753      0.021    102.981      0.000       2.134       2.217\n",
       "intercept     98.2113      0.008   1.24e+04      0.000      98.196      98.227\n",
       "==============================================================================\n",
       "Omnibus:                   179967.067   Durbin-Watson:                   1.989\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):           917090.149\n",
       "Skew:                          -0.783   Prob(JB):                         0.00\n",
       "Kurtosis:                       7.422   Cond. No.                         27.7\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['intercept'] = 1.\n",
    "X['$A^2$'] = X['$A$'] ** 2\n",
    "X['$B^2$'] = X['$B$'] ** 2\n",
    "X['$AB$'] = X['$B$'] * X['$A$']\n",
    "\n",
    "model = WLS(X['$Y$'], X[['$D$', '$A$', '$B$', '$AB$', 'intercept']], weights=X['$W_{ATE}$'])\n",
    "result = model.fit()\n",
    "result.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## So we have a doubly robust regression by including $A$ and $B$ in the regression model, as well as the propensity score model!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Can we use more general models with weighting and the g-formula?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check the naive estimate first:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/1\n",
      "1000000/1000000 [==============================] - 18s 18us/step - loss: 54.4477\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fd8a3c342e8>"
      ]
     },
     "execution_count": 165,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "\n",
    "x_variables = ['$D$']\n",
    "\n",
    "x_in = Input(shape=(len(x_variables),))\n",
    "h1 = Dense(100, activation='relu')(x_in)\n",
    "h2 = Dense(100, activation='relu')(h1)\n",
    "y_out = Dense(1, activation='linear')(h2)\n",
    "\n",
    "model = Model(inputs=[x_in], outputs=[y_out])\n",
    "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n",
    "\n",
    "model.fit(X[x_variables], X['$Y$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = X.copy()\n",
    "df['$D$'] = 1\n",
    "y1_hat = model.predict(df['$D$'])\n",
    "df['$D$'] = 0\n",
    "y0_hat = model.predict(df['$D$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.798075"
      ]
     },
     "execution_count": 167,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1_hat - y0_hat).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## close to the naive estimator! Now, let's try weighting..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/1\n",
      "1000000/1000000 [==============================] - 30s 30us/step - loss: 107.8438\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fd8b73e01d0>"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "\n",
    "x_variables = ['$D$']\n",
    "\n",
    "x_in = Input(shape=(len(x_variables),))\n",
    "h1 = Dense(100, activation='relu')(x_in)\n",
    "h2 = Dense(100, activation='relu')(h1)\n",
    "y_out = Dense(1, activation='linear')(h2)\n",
    "\n",
    "model = Model(inputs=[x_in], outputs=[y_out])\n",
    "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n",
    "\n",
    "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = X.copy()\n",
    "df['$D$'] = 1\n",
    "y1_hat = model.predict(df['$D$'])\n",
    "df['$D$'] = 0\n",
    "y0_hat = model.predict(df['$D$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.4365234"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1_hat - y0_hat).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Much better! Now, let's try doubly-robust ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/1\n",
      "1000000/1000000 [==============================] - 30s 30us/step - loss: 63.2424\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fd8b748b4e0>"
      ]
     },
     "execution_count": 172,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "\n",
    "x_variables = ['$D$', '$A$', '$B$']\n",
    "\n",
    "x_in = Input(shape=(len(x_variables),))\n",
    "h1 = Dense(100, activation='relu')(x_in)\n",
    "h2 = Dense(100, activation='relu')(h1)\n",
    "y_out = Dense(1, activation='linear')(h2)\n",
    "\n",
    "model = Model(inputs=[x_in], outputs=[y_out])\n",
    "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n",
    "\n",
    "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = X.copy()\n",
    "df['$D$'] = 1\n",
    "y1_hat = model.predict(df[x_variables])\n",
    "df['$D$'] = 0\n",
    "y0_hat = model.predict(df[x_variables])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.416282"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1_hat - y0_hat).mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Let's go crazy -- feedforward nets for everything!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "10000/10000 [==============================] - 0s 42us/step - loss: 0.1710\n",
      "Epoch 2/10\n",
      "10000/10000 [==============================] - 0s 20us/step - loss: 0.1604\n",
      "Epoch 3/10\n",
      "10000/10000 [==============================] - 0s 20us/step - loss: 0.1604\n",
      "Epoch 4/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1600\n",
      "Epoch 5/10\n",
      "10000/10000 [==============================] - 0s 20us/step - loss: 0.1602\n",
      "Epoch 6/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1598\n",
      "Epoch 7/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1599\n",
      "Epoch 8/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1595\n",
      "Epoch 9/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1596\n",
      "Epoch 10/10\n",
      "10000/10000 [==============================] - 0s 19us/step - loss: 0.1593\n"
     ]
    }
   ],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "\n",
    "x_variables = ['$A$', '$B$']\n",
    "\n",
    "x_in = Input(shape=(len(x_variables),))\n",
    "h1 = Dense(100, activation='relu')(x_in)\n",
    "h2 = Dense(100, activation='relu')(h1)\n",
    "y_out = Dense(1, activation='linear')(h2)\n",
    "\n",
    "propensity_model = Model(inputs=[x_in], outputs=[y_out])\n",
    "propensity_model.compile(loss='mean_squared_error', optimizer='RMSprop')\n",
    "\n",
    "propensity_model.fit(X[x_variables], X['$D$'], epochs=10)\n",
    "\n",
    "X['$P(D=1|A,B)$'] = propensity_model.predict(X[['$A$', '$B$']])\n",
    "\n",
    "X['$W_{ATE}$'] = (X['$D$'] == 1)* 1. / X['$P(D=1|A,B)$'] + (X['$D$'] == 0)* 1. /(1. - X['$P(D=1|A,B)$'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/10\n",
      "10000/10000 [==============================] - 1s 54us/step - loss: 11955.0360\n",
      "Epoch 2/10\n",
      "10000/10000 [==============================] - 0s 31us/step - loss: 938.2027\n",
      "Epoch 3/10\n",
      "10000/10000 [==============================] - 0s 32us/step - loss: 7.6465\n",
      "Epoch 4/10\n",
      "10000/10000 [==============================] - 0s 31us/step - loss: 7.2900\n",
      "Epoch 5/10\n",
      "10000/10000 [==============================] - 0s 31us/step - loss: 7.5390\n",
      "Epoch 6/10\n",
      "10000/10000 [==============================] - 0s 31us/step - loss: 7.3276\n",
      "Epoch 7/10\n",
      "10000/10000 [==============================] - 0s 37us/step - loss: 7.2862\n",
      "Epoch 8/10\n",
      "10000/10000 [==============================] - 0s 36us/step - loss: 7.3980\n",
      "Epoch 9/10\n",
      "10000/10000 [==============================] - 0s 31us/step - loss: 7.2535\n",
      "Epoch 10/10\n",
      "10000/10000 [==============================] - 0s 32us/step - loss: 7.3800\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7fd8b5946e80>"
      ]
     },
     "execution_count": 202,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.models import Model\n",
    "from keras.layers import Input, Dense\n",
    "\n",
    "x_variables = ['$D$']\n",
    "\n",
    "x_in = Input(shape=(len(x_variables),))\n",
    "h1 = Dense(100, activation='relu')(x_in)\n",
    "h2 = Dense(100, activation='relu')(h1)\n",
    "y_out = Dense(1, activation='linear')(h2)\n",
    "\n",
    "model = Model(inputs=[x_in], outputs=[y_out])\n",
    "model.compile(loss='mean_squared_error', optimizer='RMSprop')\n",
    "\n",
    "model.fit(X[x_variables], X['$Y$'], sample_weight=X['$W_{ATE}$'], epochs=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = X.copy()\n",
    "df['$D$'] = 1\n",
    "y1_hat = model.predict(df[x_variables])\n",
    "df['$D$'] = 0\n",
    "y0_hat = model.predict(df[x_variables])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.6454697"
      ]
     },
     "execution_count": 204,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1_hat - y0_hat).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5.523058110698841"
      ]
     },
     "execution_count": 205,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(y1 - y0).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }