Created
April 2, 2018 17:29
-
-
Save akelleh/4f118e6aa0bd26c85c83c3f892141029 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 133, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"# generate some binary data so we can just use logit\n", | |
"# regression everywhere\n", | |
"\n", | |
"N = 5000\n", | |
"\n", | |
"d = np.random.binomial(1, 0.5, size=N)\n", | |
"p_y = 1. / (1. + np.exp(-3.*d))\n", | |
"y = np.random.binomial(1, p_y)\n", | |
"\n", | |
"p_c = 1. / (1. + np.exp(-3.*y))\n", | |
"c = np.random.binomial(1, p_c)\n", | |
"\n", | |
"p_s = 1. / (1. + np.exp(-3.*(c+d)))\n", | |
"s = np.random.binomial(1, p=p_s)\n", | |
"\n", | |
"X = pd.DataFrame({'d': d, 'c': c, 'y': y, 's': s})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 134, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.axes._subplots.AxesSubplot at 0x12b301d50>" | |
] | |
}, | |
"execution_count": 134, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEGCAYAAACevtWaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAADVhJREFUeJzt3W2InXdeh/Hr2zwYcWMLk1HWTLIT\n6KwaH7Dr0F1YxJZWmnYxsSjSQlE37eaNEWVXMVKJ3fpm1wVthfgQcK1dsNl2QRlsbIS1S/Eha6bb\n3WJSUodYzUTbzsZakBLbmp8v5nR79uxM5szkJGfyz/WBgXPf959zfpTplXvu85SqQpLUlmuGPYAk\nafCMuyQ1yLhLUoOMuyQ1yLhLUoOMuyQ1yLhLUoOWjHuSzyZ5Nck/L3I8SX4/yUyS55N8YPBjSpKW\no58z90eAHRc4fjsw0fnZA/zhxY8lSboYa5daUFXPJBm/wJJdwKM1/1bXo0muS/LeqvrPC93vpk2b\nanz8QncrSer17LPPfr2qRpdat2Tc+7AZON21PdvZ9y1xT7KH+bN7tm7dyvT09AAeXpKuHkn+rZ91\nl/UJ1ao6WFWTVTU5OrrkPzySpBUaRNzPAFu6tsc6+yRJQzKIuE8BP9d51cyHgNeXut4uSbq0lrzm\nnuQx4CZgU5JZ4LeAdQBV9UfAYeAOYAZ4A/joSod56623mJ2d5dy5cyu9i0tuw4YNjI2NsW7dumGP\nIkmL6ufVMncvcbyAXxzEMLOzs2zcuJHx8XGSDOIuB6qqOHv2LLOzs2zbtm3Y40jSolbVO1TPnTvH\nyMjIqgw7QBJGRkZW9V8WkgSrLO7Aqg37O1b7fJIEqzDukqSLN4g3MV0y4/ueHOj9vfSpjwz0/qSh\neuDaYU/QlgdeH/YEA+WZuyQ1yLh32b9/Pw899NA3tu+//34efvjhIU4kSStj3Lvs3r2bRx99FIDz\n589z6NAh7rnnniFPJUnLt6qvuV9u4+PjjIyM8Nxzz/HKK69www03MDIyMuyxJGnZjHuP++67j0ce\neYSXX36Z3bt3D3scSVoRL8v0uPPOO3nqqac4duwYt91227DHkaQVWdVn7sN46eL69eu5+eabue66\n61izZs1lf3xJGoRVHfdhOH/+PEePHuWJJ54Y9iiStGJeluly4sQJrr/+em655RYmJiaGPY4krZhn\n7l22b9/OqVOnhj2GJF20VXfmPv8JwqvXap9PkmCVxX3Dhg2cPXt21Qb0nc9z37Bhw7BHkaQLWlWX\nZcbGxpidnWVubm7YoyzqnW9ikqTVbFXFfd26dX7DkSQNwKq6LCNJGgzjLkkNMu6S1CDjLkkNMu6S\n1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkNMu6S1CDjLkkN6ivuSXYk\nOZlkJsm+BY5vTfJ0kueSPJ/kjsGPKknq15JxT7IGOADcDmwH7k6yvWfZbwKPV9UNwF3AHwx6UElS\n//o5c78RmKmqU1X1JnAI2NWzpoDv7Ny+FviPwY0oSVqufr5DdTNwumt7Fvhgz5oHgL9J8kvAdwC3\nDmQ6SdKKDOoJ1buBR6pqDLgD+FySb7nvJHuSTCeZnpubG9BDS5J69RP3M8CWru2xzr5u9wKPA1TV\nPwIbgE29d1RVB6tqsqomR0dHVzaxJGlJ/cT9GDCRZFuS9cw/YTrVs+bfgVsAknw/83H31FyShmTJ\nuFfV28Be4AjwAvOvijme5MEkOzvLPgF8LMnXgMeAX6iqulRDS5IurJ8nVKmqw8Dhnn37u26fAD48\n2NEkSSvlO1QlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHG\nXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIa\nZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIa1Ffck+xIcjLJTJJ9\ni6z52SQnkhxP8ueDHVOStBxrl1qQZA1wAPgJYBY4lmSqqk50rZkAfgP4cFW9luS7LtXAkqSl9XPm\nfiMwU1WnqupN4BCwq2fNx4ADVfUaQFW9OtgxJUnL0U/cNwOnu7ZnO/u6vR94f5K/T3I0yY5BDShJ\nWr4lL8ss434mgJuAMeCZJD9UVf/dvSjJHmAPwNatWwf00JKkXv2cuZ8BtnRtj3X2dZsFpqrqrar6\nV+BF5mP/TarqYFVNVtXk6OjoSmeWJC2hn7gfAyaSbEuyHrgLmOpZ85fMn7WTZBPzl2lODXBOSdIy\nLBn3qnob2AscAV4AHq+q40keTLKzs+wIcDbJCeBp4Neq6uylGlqSdGF9XXOvqsPA4Z59+7tuF/Dx\nzo8kach8h6okNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4\nS1KDjLskNci4S1KDjLskNci4S1KDBvUF2c0a3/fksEdoykuf+siwR5CuCp65S1KDjLskNci4S1KD\njLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLskNci4S1KDjLsk\nNci4S1KDjLskNaivuCfZkeRkkpkk+y6w7qeTVJLJwY0oSVquJeOeZA1wALgd2A7cnWT7Aus2Ar8M\nfHnQQ0qSlqefM/cbgZmqOlVVbwKHgF0LrPtt4NPAuQHOJ0lagX7ivhk43bU929n3DUk+AGypqicv\ndEdJ9iSZTjI9Nze37GElSf256CdUk1wD/C7wiaXWVtXBqpqsqsnR0dGLfWhJ0iL6ifsZYEvX9lhn\n3zs2Aj8IfCnJS8CHgCmfVJWk4ekn7seAiSTbkqwH7gKm3jlYVa9X1aaqGq+qceAosLOqpi/JxJKk\nJS0Z96p6G9gLHAFeAB6vquNJHkyy81IPKElavrX9LKqqw8Dhnn37F1l708WPJUm6GL5DVZIaZNwl\nqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHG\nXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIa\nZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUHGXZIaZNwlqUF9xT3JjiQnk8wk2bfA8Y8nOZHk+SRfTPK+\nwY8qSerXknFPsgY4ANwObAfuTrK9Z9lzwGRV/TDwBeB3Bj2oJKl//Zy53wjMVNWpqnoTOATs6l5Q\nVU9X1RudzaPA2GDHlCQtRz9x3wyc7tqe7exbzL3AXy90IMmeJNNJpufm5vqfUpK0LAN9QjXJPcAk\n8JmFjlfVwaqarKrJ0dHRQT60JKnL2j7WnAG2dG2PdfZ9kyS3AvcDP15V/zuY8SRJK9HPmfsxYCLJ\ntiTrgbuAqe4FSW4A/hjYWVWvDn5MSdJyLBn3qnob2AscAV4AHq+q40keTLKzs+wzwHuAJ5J8NcnU\nIncnSboM+rksQ1UdBg737NvfdfvWAc8lSboIvkNVkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZd\nkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk\n3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWpQcZdkhpk3CWp\nQcZdkhpk3CWpQX3FPcmOJCeTzCTZt8Dxb0vy+c7xLycZH/SgkqT+LRn3JGuAA8DtwHbg7iTbe5bd\nC7xWVdcDvwd8etCDSpL618+Z+43ATFWdqqo3gUPArp41u4A/69z+AnBLkgxuTEnScqztY81m4HTX\n9izwwcXWVNXbSV4HRoCvdy9KsgfY09n8nyQnVzK0FrSJnv/eq1H8m+5qdEX8bvLJK+Z89H39LOon\n7gNTVQeBg5fzMa8WSaaranLYc0i9/N0cjn4uy5wBtnRtj3X2LbgmyVrgWuDsIAaUJC1fP3E/Bkwk\n2ZZkPXAXMNWzZgr4+c7tnwH+tqpqcGNKkpZjycsynWvoe4EjwBrgs1V1PMmDwHRVTQF/AnwuyQzw\nX8z/A6DLy8tdWq383RyCeIItSe3xHaqS1CDjLkkNMu6S1KDL+jp3DUaS72P+XcGbO7vOAFNV9cLw\nppK0mnjmfoVJ8uvMfwREgH/q/AR4bKEPdZN0dfLVMleYJC8CP1BVb/XsXw8cr6qJ4UwmXViSj1bV\nnw57jquFZ+5XnvPA9yyw/72dY9Jq9clhD3A18Zr7ledXgC8m+Rfe/UC3rcD1wN6hTSUBSZ5f7BDw\n3Zdzlqudl2WuQEmuYf6jmLufUD1WVf83vKkkSPIKcBvwWu8h4B+qaqG/OnUJeOZ+Baqq88DRYc8h\nLeCvgPdU1Vd7DyT50uUf5+rlmbskNcgnVCWpQcZdkhpk3KVFJHkgya8Oew5pJYy7JDXIuEtdktyf\n5MUkfwd877DnkVbKl0JKHUl+lPlvEfsR5v/f+Arw7FCHklbIuEvv+jHgL6rqDYAkvd8VLF0xvCwj\nSQ0y7tK7ngF+Ksm3J9kI/OSwB5JWyssyUkdVfSXJ54GvAa8Cx4Y8krRifvyAJDXIyzKS1CDjLkkN\nMu6S1CDjLkkNMu6S1CDjLkkNMu6S1KD/B75X2qYEFtSiAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x12b35f0d0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# True p(y|d)\n", | |
"X.groupby('d').mean().reset_index().plot(x='d', y='y', kind='bar')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 135, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"<matplotlib.axes._subplots.AxesSubplot at 0x12b3c4e90>" | |
] | |
}, | |
"execution_count": 135, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEGCAYAAACevtWaAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAADVZJREFUeJzt3X+o3fddx/Hnq/lhhMUWbq4yc7Pd\nQO+mUcTOSx0MsaWTph0mKw5poKhLu/xjRNkUI5XY1X82B9oK8UfEWTuwWTtQLjY2f8yO4Y/M3K5b\nMSmpl1jNjba9i6UgJbY1b/+4p+vx7Cb33JuTnJNPng+4cL7f74dz3oTbZ7/3e36lqpAkteW6YQ8g\nSRo84y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgZeOe5PNJXknyzxc4niS/n2QuyXNJPjD4MSVJ\nK9HPmfsjwPaLHL8DmOr87AH+8NLHkiRdirXLLaiqryaZvMiSncCjtfhW16NJbkjy7qr6z4vd76ZN\nm2py8mJ3K0nq9cwzz3yrqsaXW7ds3PuwGTjdtT3f2XfRuE9OTjI7OzuAh5eka0eSf+tn3RV9QjXJ\nniSzSWYXFhau5ENL0jVlEHE/A2zp2p7o7PsOVXWwqqaranp8fNm/KiRJqzSIuM8AP9d51cwHgdeW\nu94uSbq8lr3mnuQx4BZgU5J54LeAdQBV9UfAYeBOYA54Hfj4aod58803mZ+f59y5c6u9i8tuw4YN\nTExMsG7dumGPIkkX1M+rZXYtc7yAXxzEMPPz82zcuJHJyUmSDOIuB6qqOHv2LPPz82zdunXY40jS\nBY3UO1TPnTvH2NjYSIYdIAljY2Mj/ZeFJMGIxR0Y2bC/bdTnkyQYwbhLki7dIN7EdNlM7ntyoPf3\n4mc+MtD7k4bqgeuHPUFbHnht2BMMlGfuktQg495l//79PPTQQ9/evv/++3n44YeHOJEkrY5x77J7\n924effRRAM6fP8+hQ4e45557hjyVJK3cSF9zv9ImJycZGxvj2Wef5eWXX+amm25ibGxs2GNJ0ooZ\n9x733XcfjzzyCC+99BK7d+8e9jiStCpelulx11138dRTT3Hs2DFuv/32YY8jSasy0mfuw3jp4vr1\n67n11lu54YYbWLNmzRV/fEkahJGO+zCcP3+eo0eP8sQTTwx7FElaNS/LdDlx4gQ33ngjt912G1NT\nU8MeR5JWzTP3Ltu2bePUqVPDHkOSLtnInbkvfoLw6Br1+SQJRizuGzZs4OzZsyMb0Lc/z33Dhg3D\nHkWSLmqkLstMTEwwPz/PKH959tvfxCRJo2yk4r5u3Tq/4UiSBmCkLstIkgbDuEtSg4y7JDXIuEtS\ng4y7JDXIuEtSg4y7JDXIuEtSg4y7JDXIuEtSg4y7JDXIuEtSg4y7JDXIuEtSg4y7JDXIuEtSg4y7\nJDWor7gn2Z7kZJK5JPuWOP6eJE8neTbJc0nuHPyokqR+LRv3JGuAA8AdwDZgV5JtPct+E3i8qm4C\n7gb+YNCDSpL618+Z+83AXFWdqqo3gEPAzp41BXxP5/b1wH8MbkRJ0kr1E/fNwOmu7fnOvm4PAPck\nmQcOA7+01B0l2ZNkNsnswsLCKsaVJPVjUE+o7gIeqaoJ4E7gC0m+476r6mBVTVfV9Pj4+IAeWpLU\nq5+4nwG2dG1PdPZ1uxd4HKCq/hHYAGwaxICSpJXrJ+7HgKkkW5OsZ/EJ05meNf8O3AaQ5AdZjLvX\nXSRpSJaNe1W9BewFjgDPs/iqmONJHkyyo7PsU8AnknwTeAz4haqqyzW0JOni1vazqKoOs/hEafe+\n/V23TwAfGuxokqTV8h2qktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5J\nDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLu\nktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDeor7km2\nJzmZZC7Jvgus+dkkJ5IcT/IXgx1TkrQSa5dbkGQNcAD4KWAeOJZkpqpOdK2ZAn4D+FBVvZrkey/X\nwJKk5fVz5n4zMFdVp6rqDeAQsLNnzSeAA1X1KkBVvTLYMSVJK9FP3DcDp7u25zv7ur0PeF+Sv09y\nNMn2pe4oyZ4ks0lmFxYWVjexJGlZg3pCdS0wBdwC7AL+JMkNvYuq6mBVTVfV9Pj4+IAeWpLUq5+4\nnwG2dG1PdPZ1mwdmqurNqvpX4AUWYy9JGoJ+4n4MmEqyNcl64G5gpmfNX7F41k6STSxepjk1wDkl\nSSuwbNyr6i1gL3AEeB54vKqOJ3kwyY7OsiPA2SQngKeBX6uqs5draEnSxS37UkiAqjoMHO7Zt7/r\ndgGf7PxIkoasr7hfyyb3PTnsEZry4mc+MuwRpGuCHz8gSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLU\nIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMu\nSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y\n7pLUIOMuSQ3qK+5Jtic5mWQuyb6LrPuZJJVkenAjSpJWatm4J1kDHADuALYBu5JsW2LdRuCXga8N\nekhJ0sr0c+Z+MzBXVaeq6g3gELBziXW/DXwWODfA+SRJq9BP3DcDp7u25zv7vi3JB4AtVfXkxe4o\nyZ4ks0lmFxYWVjysJKk/l/yEapLrgN8FPrXc2qo6WFXTVTU9Pj5+qQ8tSbqAfuJ+BtjStT3R2fe2\njcAPA19J8iLwQWDGJ1UlaXj6ifsxYCrJ1iTrgbuBmbcPVtVrVbWpqiarahI4CuyoqtnLMrEkaVnL\nxr2q3gL2AkeA54HHq+p4kgeT7LjcA0qSVm5tP4uq6jBwuGff/gusveXSx5IkXQrfoSpJDTLuktQg\n4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5J\nDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLu\nktQg4y5JDTLuktQg4y5JDTLuktQg4y5JDTLuktSgvuKeZHuSk0nmkuxb4vgnk5xI8lySLyd57+BH\nlST1a9m4J1kDHADuALYBu5Js61n2LDBdVT8CfAn4nUEPKknqXz9n7jcDc1V1qqreAA4BO7sXVNXT\nVfV6Z/MoMDHYMSVJK9FP3DcDp7u25zv7LuRe4G+WOpBkT5LZJLMLCwv9TylJWpGBPqGa5B5gGvjc\nUser6mBVTVfV9Pj4+CAfWpLUZW0fa84AW7q2Jzr7/p8kHwbuB36yqv5nMONJklajnzP3Y8BUkq1J\n1gN3AzPdC5LcBPwxsKOqXhn8mJKklVg27lX1FrAXOAI8DzxeVceTPJhkR2fZ54B3AU8k+UaSmQvc\nnSTpCujnsgxVdRg43LNvf9ftDw94LknSJfAdqpLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLU\nIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMu\nSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y7pLUIOMuSQ0y\n7pLUIOMuSQ3qK+5Jtic5mWQuyb4ljn9Xki92jn8tyeSgB5Uk9W/ZuCdZAxwA7gC2AbuSbOtZdi/w\nalXdCPwe8NlBDypJ6l8/Z+43A3NVdaqq3gAOATt71uwE/rxz+0vAbUkyuDElSSuxto81m4HTXdvz\nwI9faE1VvZXkNWAM+Fb3oiR7gD2dzf9OcnI1Q2tJm+j59x5F8W+6a9FV8bvJp6+a89H39rOon7gP\nTFUdBA5eyce8ViSZrarpYc8h9fJ3czj6uSxzBtjStT3R2bfkmiRrgeuBs4MYUJK0cv3E/RgwlWRr\nkvXA3cBMz5oZ4Oc7tz8G/G1V1eDGlCStxLKXZTrX0PcCR4A1wOer6niSB4HZqpoB/hT4QpI54L9Y\n/B+Ariwvd2lU+bs5BPEEW5La4ztUJalBxl2SGmTcJalBV/R17hqMJD/A4ruCN3d2nQFmqur54U0l\naZR45n6VSfLrLH4ERIB/6vwEeGypD3WTdG3y1TJXmSQvAD9UVW/27F8PHK+qqeFMJl1cko9X1Z8N\ne45rhWfuV5/zwPcvsf/dnWPSqPr0sAe4lnjN/erzK8CXk/wL73yg23uAG4G9Q5tKApI8d6FDwPdd\nyVmudV6WuQoluY7Fj2LufkL1WFX97/CmkiDJy8DtwKu9h4B/qKql/urUZeCZ+1Woqs4DR4c9h7SE\nvwbeVVXf6D2Q5CtXfpxrl2fuktQgn1CVpAYZd0lqkHGXLiDJA0l+ddhzSKth3CWpQcZd6pLk/iQv\nJPk74P3DnkdaLV8KKXUk+TEWv0XsR1n8b+PrwDNDHUpaJeMuveMngL+sqtcBkvR+V7B01fCyjCQ1\nyLhL7/gq8NEk351kI/DTwx5IWi0vy0gdVfX1JF8Evgm8Ahwb8kjSqvnxA5LUIC/LSFKDjLskNci4\nS1KDjLskNci4S1KDjLskNci4S1KD/g+SH98dxVVpvgAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x12b442ed0>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# P(y|d) in our sample -- biased for the population distribution!\n", | |
"X[X['s'] == 1].groupby('d').mean().reset_index().plot(x='d', y='y', kind='bar')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 136, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>y</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.573028</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.959922</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" y\n", | |
"d \n", | |
"0 0.573028\n", | |
"1 0.959922" | |
] | |
}, | |
"execution_count": 136, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# same thing in numbers. Sampled estimate first\n", | |
"X[X['s'] == 1].groupby('d').mean()[['y']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 137, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>y</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>d</th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>0.500000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>0.958301</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" y\n", | |
"d \n", | |
"0 0.500000\n", | |
"1 0.958301" | |
] | |
}, | |
"execution_count": 137, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# a population level\n", | |
"X.groupby('d').mean()[['y']]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 138, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# we'll use eqn (4) from http://ftp.cs.ucla.edu/pub/stat_ser/r425.pdf\n", | |
"# need a sample level model of P(y|d, c, S=1), and a pop level model of P(c|d)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 139, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"from statsmodels.api import Logit\n", | |
"from collections import defaultdict\n", | |
"\n", | |
"def get_p_y_given_d(df, sampling_label='s', causal_state_label='d', outcome_label='y', control_variable='c'):\n", | |
" X = df.copy()\n", | |
" X['intercept'] = 1.\n", | |
" X_sampled = X[X[sampling_label] == 1].copy()\n", | |
" X_pop = X.copy()\n", | |
"\n", | |
" sample_model = Logit(endog=X_sampled[outcome_label], exog=X_sampled[[causal_state_label, control_variable, 'intercept']])\n", | |
" sample_result = sample_model.fit()\n", | |
"\n", | |
" pop_model = Logit(endog=X[control_variable], exog=X[[causal_state_label,'intercept']])\n", | |
" pop_result = pop_model.fit()\n", | |
"\n", | |
"\n", | |
" p_y_given_d = defaultdict(lambda: 0.)\n", | |
" for d in X[causal_state_label].unique():\n", | |
" for c in X[control_variable].unique():\n", | |
" p_y_given_d[d] += sample_result.predict(exog=[d, c, 1.]) * (c*pop_result.predict(exog=[d, 1.]) + (1.-c)*(1-pop_result.predict(exog=[d, 1.])))\n", | |
" return p_y_given_d" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 140, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Optimization terminated successfully.\n", | |
" Current function value: 0.337474\n", | |
" Iterations 7\n", | |
"Optimization terminated successfully.\n", | |
" Current function value: 0.414124\n", | |
" Iterations 7\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"defaultdict(<function __main__.<lambda>>,\n", | |
" {0: array([0.50655894]), 1: array([0.95874533])})" | |
] | |
}, | |
"execution_count": 140, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"get_p_y_given_d(X)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.11" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment