NaimKabir · March 18, 2019 17:21
diff --git a/rct.ipynb b/rct.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from itertools import product"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Participant simulation\n",
    "\n",
    "# we'll create 1000 participants in our experiment\n",
    "\n",
    "n = 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# let's create those participant partitions we discussed above\n",
    "\n",
    "compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']\n",
    "response_partitions = ['always_better', 'helped', 'hurt', 'never_better']\n",
    "partition_types = np.array(list(product(compliance_partitions, response_partitions)))\n",
    "\n",
    "# we can also simulate probabilities that our participants will belong to one of the\n",
    "# 16 possible behavior combinations\n",
    "\n",
    "partition_probabilities = np.random.random(16)\n",
    "partition_probabilities = partition_probabilities / partition_probabilities.sum()\n",
    "\n",
    "# to be a true set of probabilities, the vector sum needs to be 1\n",
    "# sometimes this can fail because of precision errors\n",
    "assert partition_probabilities.sum() == 1 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# drawing participant compliance and response behaviors according to the\n",
    "# specified distribution\n",
    "\n",
    "participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)\n",
    "compliance_type, response_type = list(zip(*partition_types[participant_partition]))\n",
    "\n",
    "# assigning participants to Control and Treatment groups with 50% probability\n",
    "\n",
    "assignments = np.array(['control', 'treatment'])\n",
    "participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]\n",
    "\n",
    "# compiling all information into our dataframe\n",
    "\n",
    "df = pd.DataFrame({'assignment': participant_assignment,\n",
    "                   'compliance_type': compliance_type,\n",
    "                   'response_type': response_type})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate outcomes\n",
    "\n",
    "# depending on assignment and compliance type, did the participant take the treatment?\n",
    "\n",
    "# if the participant is an always_taker, they'll always take the treatment.\n",
    "# if they're a complier, they'll take the treatment as long as they're in the treatment condition.\n",
    "# if they're a defier, they'll only take the treatment if they were in the control condition.\n",
    "df['took_treatment'] = (df.compliance_type == 'always_taker') \\\n",
    "                       | ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \\\n",
    "                       | ( (df.compliance_type == 'defier') & (df.assignment == 'control'))\n",
    "\n",
    "# depending on whether they took the treatment and their response_type, \n",
    "# what was the participant's outcome?\n",
    "\n",
    "# if the participant is of the always_better type, they'll definitely have a good outcome.\n",
    "# if the participant is of the 'helped' type, they'll have a good outcome as long as they\n",
    "# took treatment.\n",
    "df['good_outcome'] = (df.response_type == 'always_better') \\\n",
    "                     | ( (df.response_type == 'helped') & (df.took_treatment) )\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>probabilities</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>treatment/untreated/bad</th>\n",
       "      <td>0.278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>treatment/untreated/good</th>\n",
       "      <td>0.174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>treatment/treated/bad</th>\n",
       "      <td>0.344</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>treatment/treated/good</th>\n",
       "      <td>0.204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>control/untreated/bad</th>\n",
       "      <td>0.336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>control/untreated/good</th>\n",
       "      <td>0.182</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>control/treated/bad</th>\n",
       "      <td>0.316</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>control/treated/good</th>\n",
       "      <td>0.166</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          probabilities\n",
       "treatment/untreated/bad           0.278\n",
       "treatment/untreated/good          0.174\n",
       "treatment/treated/bad             0.344\n",
       "treatment/treated/good            0.204\n",
       "control/untreated/bad             0.336\n",
       "control/untreated/good            0.182\n",
       "control/treated/bad               0.316\n",
       "control/treated/good              0.166"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# we can observe the probabilities of each Assignment, Treatment, Outcome\n",
    "# combinations that would emerge\n",
    "\n",
    "# get all the probabilities we need: p(z,x,y) for each z,x,y combination\n",
    "\n",
    "# we can get all states by taking the cartesian product of the different\n",
    "# binary possibilities: treatment vs. control group, took_treatment=True vs. False, etc.\n",
    "states = product(['treatment','control'],[False, True], [False, True])\n",
    "\n",
    "# this is an ugly list comprehension that calculates probabilities for each of the states\n",
    "# we generated above\n",
    "p_states = {f\"{assignment}/{'treated' if treated == 1 else 'untreated'}/{'good' if outcome == 1 else 'bad'}\" : \n",
    "                ( (df[df.assignment == assignment].took_treatment == treated)\n",
    "                   & (df[df.assignment == assignment].good_outcome == outcome)  ).mean()\n",
    "                for assignment, treated, outcome in states\n",
    "                }\n",
    "\n",
    "# display:\n",
    "pd.DataFrame(p_states, index=['probabilities']).T"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"import numpy as np\n",
	"import pandas as pd\n",
	"from itertools import product"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Participant simulation\n",
	"\n",
	"# we'll create 1000 participants in our experiment\n",
	"\n",
	"n = 1000"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"# let's create those participant partitions we discussed above\n",
	"\n",
	"compliance_partitions = ['always_taker', 'complier', 'defier', 'never_taker']\n",
	"response_partitions = ['always_better', 'helped', 'hurt', 'never_better']\n",
	"partition_types = np.array(list(product(compliance_partitions, response_partitions)))\n",
	"\n",
	"# we can also simulate probabilities that our participants will belong to one of the\n",
	"# 16 possible behavior combinations\n",
	"\n",
	"partition_probabilities = np.random.random(16)\n",
	"partition_probabilities = partition_probabilities / partition_probabilities.sum()\n",
	"\n",
	"# to be a true set of probabilities, the vector sum needs to be 1\n",
	"# sometimes this can fail because of precision errors\n",
	"assert partition_probabilities.sum() == 1 "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [],
	"source": [
	"# drawing participant compliance and response behaviors according to the\n",
	"# specified distribution\n",
	"\n",
	"participant_partition = np.random.choice(range(len(partition_types)), n, p=partition_probabilities)\n",
	"compliance_type, response_type = list(zip(*partition_types[participant_partition]))\n",
	"\n",
	"# assigning participants to Control and Treatment groups with 50% probability\n",
	"\n",
	"assignments = np.array(['control', 'treatment'])\n",
	"participant_assignment = assignments[np.concatenate([np.zeros(n//2), np.ones(n//2)]).astype('int32')]\n",
	"\n",
	"# compiling all information into our dataframe\n",
	"\n",
	"df = pd.DataFrame({'assignment': participant_assignment,\n",
	" 'compliance_type': compliance_type,\n",
	" 'response_type': response_type})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Simulate outcomes\n",
	"\n",
	"# depending on assignment and compliance type, did the participant take the treatment?\n",
	"\n",
	"# if the participant is an always_taker, they'll always take the treatment.\n",
	"# if they're a complier, they'll take the treatment as long as they're in the treatment condition.\n",
	"# if they're a defier, they'll only take the treatment if they were in the control condition.\n",
	"df['took_treatment'] = (df.compliance_type == 'always_taker') \\\n",
	" \| ( (df.compliance_type == 'complier') & (df.assignment == 'treatment')) \\\n",
	" \| ( (df.compliance_type == 'defier') & (df.assignment == 'control'))\n",
	"\n",
	"# depending on whether they took the treatment and their response_type, \n",
	"# what was the participant's outcome?\n",
	"\n",
	"# if the participant is of the always_better type, they'll definitely have a good outcome.\n",
	"# if the participant is of the 'helped' type, they'll have a good outcome as long as they\n",
	"# took treatment.\n",
	"df['good_outcome'] = (df.response_type == 'always_better') \\\n",
	" \| ( (df.response_type == 'helped') & (df.took_treatment) )\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>probabilities</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>treatment/untreated/bad</th>\n",
	" <td>0.278</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>treatment/untreated/good</th>\n",
	" <td>0.174</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>treatment/treated/bad</th>\n",
	" <td>0.344</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>treatment/treated/good</th>\n",
	" <td>0.204</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>control/untreated/bad</th>\n",
	" <td>0.336</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>control/untreated/good</th>\n",
	" <td>0.182</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>control/treated/bad</th>\n",
	" <td>0.316</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>control/treated/good</th>\n",
	" <td>0.166</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" probabilities\n",
	"treatment/untreated/bad 0.278\n",
	"treatment/untreated/good 0.174\n",
	"treatment/treated/bad 0.344\n",
	"treatment/treated/good 0.204\n",
	"control/untreated/bad 0.336\n",
	"control/untreated/good 0.182\n",
	"control/treated/bad 0.316\n",
	"control/treated/good 0.166"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# we can observe the probabilities of each Assignment, Treatment, Outcome\n",
	"# combinations that would emerge\n",
	"\n",
	"# get all the probabilities we need: p(z,x,y) for each z,x,y combination\n",
	"\n",
	"# we can get all states by taking the cartesian product of the different\n",
	"# binary possibilities: treatment vs. control group, took_treatment=True vs. False, etc.\n",
	"states = product(['treatment','control'],[False, True], [False, True])\n",
	"\n",
	"# this is an ugly list comprehension that calculates probabilities for each of the states\n",
	"# we generated above\n",
	"p_states = {f\"{assignment}/{'treated' if treated == 1 else 'untreated'}/{'good' if outcome == 1 else 'bad'}\" : \n",
	" ( (df[df.assignment == assignment].took_treatment == treated)\n",
	" & (df[df.assignment == assignment].good_outcome == outcome) ).mean()\n",
	" for assignment, treated, outcome in states\n",
	" }\n",
	"\n",
	"# display:\n",
	"pd.DataFrame(p_states, index=['probabilities']).T"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}