Last active
October 9, 2017 23:08
-
-
Save brianspiering/0b0b2574a79fed2a8786d1c56101f18a to your computer and use it in GitHub Desktop.
Explore permutation testing with different statistics
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Permutation Testing\n", | |
"-----\n", | |
"\n", | |
"Permutation testing is an example of [resampling in statistics](https://en.wikipedia.org/wiki/Resampling_(statistics).\n", | |
"\n", | |
"Instead of analytically calculating a [test statistic](https://en.wikipedia.org/wiki/Test_statistic), shuffle the labels for groups and see if the differences are still statistically significant.\n", | |
"\n", | |
"HT - [Raymond Hettinger](https://twitter.com/raymondh)'s [resampling code](https://github.com/rhettinger/modernpython/blob/master/resampling.py)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 84, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"reset -fs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from random import shuffle\n", | |
"from statistics import mean, median, harmonic_mean\n", | |
"\n", | |
"import seaborn as sns\n", | |
"\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 105, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# There are 2 groups: drugs and placebo\n", | |
"drug = [54, 53, 65, 50, 48, 52, 70, 65, 68]\n", | |
"placebo = [44, 51, 47, 52, 58, 54, 46, 58, 55, 42]\n", | |
"comb = drug + placebo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 106, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADxVJREFUeJzt3WuMnGd5xvH/VdvQcgzgbYkcL4YQgaAqSbpNQJFQRHoI\nNEpaNVEdVA4RyC1KWqJStSQfAo2EVKoWKghK5OYI5RwOdVMjmipUwAdc1q5J4hhUQyFZ4hITg0PK\nSYa7H/ZNO53MemZ3Zz2Zp/+fNNr3cPud+9GjXH73yTvjVBWSpLb8zKQbkCSNn+EuSQ0y3CWpQYa7\nJDXIcJekBhnuktQgw12SGmS4S1KDDHdJatD6Sb3xxo0ba8uWLZN6e0maSrt37/52Vc0Mq5tYuG/Z\nsoX5+flJvb0kTaUk3xilzmUZSWqQ4S5JDTLcJalBhrskNchwl6QGDQ33JD+b5F+TfCnJviR/PqDm\n8Uk+nORAkl1JtqxFs5Kk0Yxy5/4j4GVV9SLgVODcJC/uq3kd8J2qei7wTuDt421TkrQcQ8O9Fj3c\n7W7oXv3/Nt8FwC3d9q3AOUkyti4lScsy0pp7knVJ9gIPALdX1a6+kk3AfQBVdRQ4AjxjnI1KkkY3\n0idUq+onwKlJTgA+keQXq+runpJBd+mP+pe3k2wDtgHMzs6uoF31+sCueyfdwoq98szjOP/zNx2/\n9zqWuUsm3YH+H1nW0zJV9V3gX4Bz+04tAJsBkqwHngocHvDnt1fVXFXNzcwM/WoESdIKjfK0zEx3\nx06SnwN+FfhyX9kO4DXd9oXAHVX1qDt3SdLxMcqyzInALUnWsfiXwUeq6rYkVwPzVbUDuAF4X5ID\nLN6xb12zjiVJQw0N96q6EzhtwPGrerZ/CFw03tYkSSvlJ1QlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpk\nuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7\nJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1aGi4J9mc5DNJ9ifZl+SNA2rO\nTnIkyd7uddXatCtJGsX6EWqOAm+qqj1JngzsTnJ7Vd3TV/e5qjpv/C1KkpZr6J17VR2sqj3d9veA\n/cCmtW5MkrRyy1pzT7IFOA3YNeD0S5J8KcmnkrxwiT+/Lcl8kvlDhw4tu1lJ0mhGDvckTwI+Blxe\nVQ/1nd4DPKuqXgS8G/jkoGtU1faqmququZmZmZX2LEkaYqRwT7KBxWB/f1V9vP98VT1UVQ932zuB\nDUk2jrVTSdLIRnlaJsANwP6qescSNc/s6khyRnfdB8fZqCRpdKM8LXMW8CrgriR7u2NXArMAVXUd\ncCHwhiRHgR8AW6uq1qBfSdIIhoZ7VX0eyJCaa4BrxtWUJGl1/ISqJDXIcJekBhnuktQgw12SGmS4\nS1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrsk\nNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoaLgn2ZzkM0n2J9mX5I0DapLk\nXUkOJLkzyelr064kaRTrR6g5CrypqvYkeTKwO8ntVXVPT83LgVO615nAtd1PSdIEDL1zr6qDVbWn\n2/4esB/Y1Fd2AfDeWvQF4IQkJ469W0nSSJa15p5kC3AasKvv1Cbgvp79BR79F4Ak6TgZZVkGgCRP\nAj4GXF5VD/WfHvBHasA1tgHbAGZnZ5fRZp/5m1b+Zxty8r2H/8/+V2cvmlAnj3byvR895vld9x6n\nRtbImc9++qRbWJUPTPsETLlXnrmK/BvRSHfuSTawGOzvr6qPDyhZADb37J8E3N9fVFXbq2ququZm\nZmZW0q8kaQSjPC0T4AZgf1W9Y4myHcCru6dmXgwcqaqDY+xTkrQMoyzLnAW8Crgryd7u2JXALEBV\nXQfsBF4BHAC+D1wy/lYlSaMaGu5V9XkGr6n31hRw6biakiStjp9QlaQGGe6S1CDDXZIaZLhLUoMM\nd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCX\npAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KCh4Z7kxiQPJLl7ifNn\nJzmSZG/3umr8bUqSlmP9CDU3A9cA7z1Gzeeq6ryxdCRJWrWhd+5V9Vng8HHoRZI0JuNac39Jki8l\n+VSSFy5VlGRbkvkk84cOHRrTW0uS+o0j3PcAz6qqFwHvBj65VGFVba+quaqam5mZGcNbS5IGWXW4\nV9VDVfVwt70T2JBk46o7kySt2KrDPckzk6TbPqO75oOrva4kaeWGPi2T5IPA2cDGJAvAW4ANAFV1\nHXAh8IYkR4EfAFurqtasY0nSUEPDvaouHnL+GhYflZQkPUb4CVVJapDhLkkNMtwlqUGGuyQ1yHCX\npAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq\nkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatDQcE9yY5IHkty9xPkkeVeSA0nu\nTHL6+NuUJC3HKHfuNwPnHuP8y4FTutc24NrVtyVJWo2h4V5VnwUOH6PkAuC9tegLwAlJThxXg5Kk\n5RvHmvsm4L6e/YXumCRpQtaP4RoZcKwGFibbWFy6YXZ2dgxvrV4n3/vRSbcg6TFiHHfuC8Dmnv2T\ngPsHFVbV9qqaq6q5mZmZMby1JGmQcYT7DuDV3VMzLwaOVNXBMVxXkrRCQ5dlknwQOBvYmGQBeAuw\nAaCqrgN2Aq8ADgDfBy5Zq2YlSaMZGu5VdfGQ8wVcOraOJEmr5idUJalBhrskNchwl6QGGe6S1CDD\nXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwl\nqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWikcE9ybpKvJDmQ5M0D\nzr82yaEke7vX68ffqiRpVOuHFSRZB7wH+DVgAfhikh1VdU9f6Yer6rI16FGStEyj3LmfARyoqq9V\n1Y+BDwEXrG1bkqTVGCXcNwH39ewvdMf6/U6SO5PcmmTzoAsl2ZZkPsn8oUOHVtCuJGkUo4R7Bhyr\nvv1/ALZU1S8B/wzcMuhCVbW9quaqam5mZmZ5nUqSRjZKuC8AvXfiJwH39xZU1YNV9aNu92+BXx5P\ne5KklRgl3L8InJLk2UkeB2wFdvQWJDmxZ/d8YP/4WpQkLdfQp2Wq6miSy4BPA+uAG6tqX5Krgfmq\n2gH8UZLzgaPAYeC1a9izJGmIoeEOUFU7gZ19x67q2b4CuGK8rUmSVspPqEpSgwx3SWqQ4S5JDTLc\nJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12S\nGmS4S1KDDHdJatB0hvttl7d73WXU/u7u56+gmfFd88x9V69J7WOhhzXx1qdO9v2nzJWfuGvSLaxZ\nD1ve/I9rct1e0xnukqRjMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkho0UrgnOTfJV5IcSPLmAecfn+TD\n3fldSbaMu1FJ0uiGhnuSdcB7gJcDLwAuTvKCvrLXAd+pqucC7wTePu5GJUmjG+XO/QzgQFV9rap+\nDHwIuKCv5gLglm77VuCcJBlfm5Kk5Rgl3DcB9/XsL3THBtZU1VHgCPCMcTQoSVq+VNWxC5KLgN+o\nqtd3+68CzqiqP+yp2dfVLHT7X+1qHuy71jZgW7f7POAr4xrIBGwEvj3pJtZAq+OCdsfmuKbPasb2\nrKqaGVa0foQLLQCbe/ZPAu5fomYhyXrgqcDh/gtV1XZg+wjv+ZiXZL6q5ibdx7i1Oi5od2yOa/oc\nj7GNsizzReCUJM9O8jhgK7Cjr2YH8Jpu+0Lgjhr2K4Ekac0MvXOvqqNJLgM+DawDbqyqfUmuBuar\nagdwA/C+JAdYvGPfupZNS5KObZRlGapqJ7Cz79hVPds/BC4ab2uPeU0sLw3Q6rig3bE5rumz5mMb\n+j9UJUnTx68fkKQGGe4jSrIuyb8lua3bvznJfyTZ271OnXSPK5Hk60nu6sYw3x17epLbk/x79/Np\nk+5zuZYY11uTfLNnzl4x6T5XIskJSW5N8uUk+5O8pJE5GzSuqZ6zJM/r6X1vkoeSXH485stlmREl\n+WNgDnhKVZ2X5Gbgtqq6dbKdrU6SrwNzVfXtnmN/CRyuqr/ovkvoaVX1Z5PqcSWWGNdbgYer6q8m\n1dc4JLkF+FxVXd89wfYE4Eqmf84GjetyGpgz+J+vcvkmcCZwKWs8X965jyDJScBvAtdPupfjpPfr\nJG4BfmuCvahHkqcAL2XxCTWq6sdV9V2mfM6OMa6WnAN8taq+wXGYL8N9NH8D/Cnw077jb0tyZ5J3\nJnn8BPoahwL+Kcnu7hPEAL9QVQcBup8/P7HuVm7QuAAu6+bsxmlcugCeAxwCbuqWCa9P8kSmf86W\nGhdM/5w9YivwwW57zefLcB8iyXnAA1W1u+/UFcDzgV8Bng5M1a/APc6qqtNZ/NbPS5O8dNINjcmg\ncV0LnAycChwE/nqC/a3UeuB04NqqOg34L+BRX8M9hZYaVwtzRrfMdD7w0eP1nob7cGcB53druB8C\nXpbk76rqYC36EXATi9+eOXWq6v7u5wPAJ1gcx7eSnAjQ/Xxgch2uzKBxVdW3quonVfVT4G+Zzjlb\nABaqale3fyuLoTjtczZwXI3MGSzeZOypqm91+2s+X4b7EFV1RVWdVFVbWPy16o6q+r2eiQmL62V3\nT7DNFUnyxCRPfmQb+HUWx9H7dRKvAf5+Mh2uzFLjemTOOr/NFM5ZVf0ncF+S53WHzgHuYcrnbKlx\ntTBnnYv53yUZOA7z5dMyy5DkbOBPuqdl7gBmgAB7gT+oqocn2d9yJXkOi3e1sPhr8Qeq6m1JngF8\nBJgF7gUuqqpHfRHcY9UxxvU+Fn+9L+DrwO8/su45TbrHbq8HHgd8DbiExRu1qZ0zWHJc72LK5yzJ\nE1j8SvTnVNWR7tia/zdmuEtSg1yWkaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXo\nvwG6MGjkr4YTOAAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<matplotlib.figure.Figure at 0x10a4d8a90>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# Visualize the groups and see how much overlap there is\n", | |
"sns.distplot(drug, bins=5, kde=False, rug=True);\n", | |
"sns.distplot(placebo, bins=5, kde=False, rug=True);" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 112, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Let's see how using different statistics effects hypothesis testing\n", | |
"\n", | |
"====================\n", | |
"Statistic: Mean\n", | |
"The difference between groups: 7.633\n", | |
"p value: 0.0212\n", | |
"\n", | |
"====================\n", | |
"Statistic: Median\n", | |
"The difference between groups: 2.5\n", | |
"p value: 0.2272\n", | |
"\n", | |
"====================\n", | |
"Statistic: Harmonic Mean\n", | |
"The difference between groups: 7.141\n", | |
"p value: 0.0216\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"print(f\"Let's see how using different statistics effects hypothesis testing\")\n", | |
"print()\n", | |
"\n", | |
"comparison_types = [mean, median, harmonic_mean]\n", | |
"\n", | |
"for comparison_type in comparison_types:\n", | |
" print(\"=\"*20)\n", | |
" print(f\"Statistic: {comparison_type.__name__.title().replace('_', ' ')}\")\n", | |
" \n", | |
" obs_diff = comparison_type(drug) - comparison_type(placebo)\n", | |
" print(f\"The difference between groups: {obs_diff:.4}\")\n", | |
"\n", | |
" cases = 0\n", | |
" n_cases = 10_000\n", | |
" for i in range(n_cases):\n", | |
" shuffle(comb) \n", | |
" new_diff = comparison_type(comb[:len(drug)]) - comparison_type(comb[len(drug):])\n", | |
" cases += new_diff >= obs_diff\n", | |
" \n", | |
" p_value = cases / n_cases\n", | |
" print(\"p value: \", p_value)\n", | |
" print()\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"<br>\n", | |
"<br> \n", | |
"<br>\n", | |
"\n", | |
"----" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment