Skip to content

Instantly share code, notes, and snippets.

@viniciusmss
Created October 10, 2018 11:59
Show Gist options
  • Save viniciusmss/a659e31b157855386356f3d12003d949 to your computer and use it in GitHub Desktop.
Save viniciusmss/a659e31b157855386356f3d12003d949 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>father</th>\n",
" <th>son</th>\n",
" <th>count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>farm</td>\n",
" <td>farm</td>\n",
" <td>703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>farm</td>\n",
" <td>unskilled</td>\n",
" <td>1478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>farm</td>\n",
" <td>skilled</td>\n",
" <td>1430</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>farm</td>\n",
" <td>professional</td>\n",
" <td>1109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>unskilled</td>\n",
" <td>farm</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>unskilled</td>\n",
" <td>unskilled</td>\n",
" <td>1756</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>unskilled</td>\n",
" <td>skilled</td>\n",
" <td>1630</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>unskilled</td>\n",
" <td>professional</td>\n",
" <td>1568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>skilled</td>\n",
" <td>farm</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>skilled</td>\n",
" <td>unskilled</td>\n",
" <td>1453</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>skilled</td>\n",
" <td>skilled</td>\n",
" <td>2068</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>skilled</td>\n",
" <td>professional</td>\n",
" <td>2483</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>professional</td>\n",
" <td>farm</td>\n",
" <td>61</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>professional</td>\n",
" <td>unskilled</td>\n",
" <td>749</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>professional</td>\n",
" <td>skilled</td>\n",
" <td>1183</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>professional</td>\n",
" <td>professional</td>\n",
" <td>3315</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" father son count\n",
"0 farm farm 703\n",
"1 farm unskilled 1478\n",
"2 farm skilled 1430\n",
"3 farm professional 1109\n",
"4 unskilled farm 58\n",
"5 unskilled unskilled 1756\n",
"6 unskilled skilled 1630\n",
"7 unskilled professional 1568\n",
"8 skilled farm 63\n",
"9 skilled unskilled 1453\n",
"10 skilled skilled 2068\n",
"11 skilled professional 2483\n",
"12 professional farm 61\n",
"13 professional unskilled 749\n",
"14 professional skilled 1183\n",
"15 professional professional 3315"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"socialmobility.csv\")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The 95% CI for the probability that a son will be come a skilled laborer \n",
"if his father was an unskilled laborer is: [0.31231278 0.33818487]\n"
]
}
],
"source": [
"df_subset = df[df.father == \"unskilled\"]\n",
"alpha_priors = np.repeat(1, len(df_subset))\n",
"alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
"dist = stats.dirichlet(alpha_posteriors)\n",
"samples = dist.rvs(size=100000)\n",
"print(\"The 95% CI for the probability that a son will be come a skilled laborer \\nif his father \\\n",
"was an unskilled laborer is:\", np.percentile(samples[:,2], [2.5, 97.5]))\n"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The 95% CI for the probability that a father works on a farm \n",
"if his son works as a professional is: [0.1238308 0.13820177]\n"
]
}
],
"source": [
"df_subset = df[df.son == \"professional\"]\n",
"alpha_priors = np.repeat(1, len(df_subset))\n",
"alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
"dist = stats.dirichlet(alpha_posteriors)\n",
"samples = dist.rvs(size=100000)\n",
"print(\"The 95% CI for the probability that a father works on a farm \\nif his son \\\n",
"works as a professional is:\", np.percentile(samples[:,0], [2.5, 97.5]))\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment