viniciusmss · October 10, 2018 11:59
diff --git a/PCW5-2.ipynb b/PCW5-2.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>father</th>\n",
       "      <th>son</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>farm</td>\n",
       "      <td>farm</td>\n",
       "      <td>703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>farm</td>\n",
       "      <td>unskilled</td>\n",
       "      <td>1478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>farm</td>\n",
       "      <td>skilled</td>\n",
       "      <td>1430</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>farm</td>\n",
       "      <td>professional</td>\n",
       "      <td>1109</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>unskilled</td>\n",
       "      <td>farm</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>unskilled</td>\n",
       "      <td>unskilled</td>\n",
       "      <td>1756</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>unskilled</td>\n",
       "      <td>skilled</td>\n",
       "      <td>1630</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>unskilled</td>\n",
       "      <td>professional</td>\n",
       "      <td>1568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>skilled</td>\n",
       "      <td>farm</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>skilled</td>\n",
       "      <td>unskilled</td>\n",
       "      <td>1453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>skilled</td>\n",
       "      <td>skilled</td>\n",
       "      <td>2068</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>skilled</td>\n",
       "      <td>professional</td>\n",
       "      <td>2483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>professional</td>\n",
       "      <td>farm</td>\n",
       "      <td>61</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>professional</td>\n",
       "      <td>unskilled</td>\n",
       "      <td>749</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>professional</td>\n",
       "      <td>skilled</td>\n",
       "      <td>1183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>professional</td>\n",
       "      <td>professional</td>\n",
       "      <td>3315</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          father           son  count\n",
       "0           farm          farm    703\n",
       "1           farm     unskilled   1478\n",
       "2           farm       skilled   1430\n",
       "3           farm  professional   1109\n",
       "4      unskilled          farm     58\n",
       "5      unskilled     unskilled   1756\n",
       "6      unskilled       skilled   1630\n",
       "7      unskilled  professional   1568\n",
       "8        skilled          farm     63\n",
       "9        skilled     unskilled   1453\n",
       "10       skilled       skilled   2068\n",
       "11       skilled  professional   2483\n",
       "12  professional          farm     61\n",
       "13  professional     unskilled    749\n",
       "14  professional       skilled   1183\n",
       "15  professional  professional   3315"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(\"socialmobility.csv\")\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 95% CI for the probability that a son will be come a skilled laborer \n",
      "if his father was an unskilled laborer is: [0.31231278 0.33818487]\n"
     ]
    }
   ],
   "source": [
    "df_subset = df[df.father == \"unskilled\"]\n",
    "alpha_priors = np.repeat(1, len(df_subset))\n",
    "alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
    "dist = stats.dirichlet(alpha_posteriors)\n",
    "samples = dist.rvs(size=100000)\n",
    "print(\"The 95% CI for the probability that a son will be come a skilled laborer \\nif his father \\\n",
    "was an unskilled laborer is:\", np.percentile(samples[:,2], [2.5, 97.5]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 95% CI for the probability that a father works on a farm \n",
      "if his son works as a professional is: [0.1238308  0.13820177]\n"
     ]
    }
   ],
   "source": [
    "df_subset = df[df.son == \"professional\"]\n",
    "alpha_priors = np.repeat(1, len(df_subset))\n",
    "alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
    "dist = stats.dirichlet(alpha_posteriors)\n",
    "samples = dist.rvs(size=100000)\n",
    "print(\"The 95% CI for the probability that a father works on a farm \\nif his son \\\n",
    "works as a professional is:\", np.percentile(samples[:,0], [2.5, 97.5]))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"from scipy import stats\n",
	"import numpy as np\n",
	"import pandas as pd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>father</th>\n",
	" <th>son</th>\n",
	" <th>count</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>farm</td>\n",
	" <td>farm</td>\n",
	" <td>703</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>farm</td>\n",
	" <td>unskilled</td>\n",
	" <td>1478</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>farm</td>\n",
	" <td>skilled</td>\n",
	" <td>1430</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>farm</td>\n",
	" <td>professional</td>\n",
	" <td>1109</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>unskilled</td>\n",
	" <td>farm</td>\n",
	" <td>58</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>5</th>\n",
	" <td>unskilled</td>\n",
	" <td>unskilled</td>\n",
	" <td>1756</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>6</th>\n",
	" <td>unskilled</td>\n",
	" <td>skilled</td>\n",
	" <td>1630</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>7</th>\n",
	" <td>unskilled</td>\n",
	" <td>professional</td>\n",
	" <td>1568</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>8</th>\n",
	" <td>skilled</td>\n",
	" <td>farm</td>\n",
	" <td>63</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>9</th>\n",
	" <td>skilled</td>\n",
	" <td>unskilled</td>\n",
	" <td>1453</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>10</th>\n",
	" <td>skilled</td>\n",
	" <td>skilled</td>\n",
	" <td>2068</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>11</th>\n",
	" <td>skilled</td>\n",
	" <td>professional</td>\n",
	" <td>2483</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>12</th>\n",
	" <td>professional</td>\n",
	" <td>farm</td>\n",
	" <td>61</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>13</th>\n",
	" <td>professional</td>\n",
	" <td>unskilled</td>\n",
	" <td>749</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>14</th>\n",
	" <td>professional</td>\n",
	" <td>skilled</td>\n",
	" <td>1183</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>15</th>\n",
	" <td>professional</td>\n",
	" <td>professional</td>\n",
	" <td>3315</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" father son count\n",
	"0 farm farm 703\n",
	"1 farm unskilled 1478\n",
	"2 farm skilled 1430\n",
	"3 farm professional 1109\n",
	"4 unskilled farm 58\n",
	"5 unskilled unskilled 1756\n",
	"6 unskilled skilled 1630\n",
	"7 unskilled professional 1568\n",
	"8 skilled farm 63\n",
	"9 skilled unskilled 1453\n",
	"10 skilled skilled 2068\n",
	"11 skilled professional 2483\n",
	"12 professional farm 61\n",
	"13 professional unskilled 749\n",
	"14 professional skilled 1183\n",
	"15 professional professional 3315"
	]
	},
	"execution_count": 31,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df = pd.read_csv(\"socialmobility.csv\")\n",
	"df"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The 95% CI for the probability that a son will be come a skilled laborer \n",
	"if his father was an unskilled laborer is: [0.31231278 0.33818487]\n"
	]
	}
	],
	"source": [
	"df_subset = df[df.father == \"unskilled\"]\n",
	"alpha_priors = np.repeat(1, len(df_subset))\n",
	"alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
	"dist = stats.dirichlet(alpha_posteriors)\n",
	"samples = dist.rvs(size=100000)\n",
	"print(\"The 95% CI for the probability that a son will be come a skilled laborer \\nif his father \\\n",
	"was an unskilled laborer is:\", np.percentile(samples[:,2], [2.5, 97.5]))\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"The 95% CI for the probability that a father works on a farm \n",
	"if his son works as a professional is: [0.1238308 0.13820177]\n"
	]
	}
	],
	"source": [
	"df_subset = df[df.son == \"professional\"]\n",
	"alpha_priors = np.repeat(1, len(df_subset))\n",
	"alpha_posteriors = [alpha_priors[i] + df_subset['count'].iloc[i] for i in range(len(df_subset))]\n",
	"dist = stats.dirichlet(alpha_posteriors)\n",
	"samples = dist.rvs(size=100000)\n",
	"print(\"The 95% CI for the probability that a father works on a farm \\nif his son \\\n",
	"works as a professional is:\", np.percentile(samples[:,0], [2.5, 97.5]))\n"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}