Skip to content

Instantly share code, notes, and snippets.

@ischurov
Created February 9, 2025 21:09
Show Gist options
  • Save ischurov/8181c056ba02bad2ab161dd890693bd9 to your computer and use it in GitHub Desktop.
Save ischurov/8181c056ba02bad2ab161dd890693bd9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"$$P(A)=1/2$$\n",
"$P(B)=p$ unknown\n",
"$A$ and $B$ are independent"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from scipy.stats import chi2_contingency\n",
"from tqdm.auto import tqdm"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def p_value(df, correction):\n",
" chi2, p, dof, expected = chi2_contingency(df, correction=correction)\n",
" return p"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e31f1ef79ae546628ab9026d4b41bbcd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7af83b53e01d4594a4792c9dcf08fc24",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/3 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "46eba5e423604b69ab20d318d3509267",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "533f52c6a4dc43e98448a699ce4c933a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "c3ac532b8af4491c928c200db73b3d35",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "77f4aa1fc7d240b8a7c8a3bcb846dd70",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/3 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b1921634526345108341baf61e7fceb2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6b0a30fa806f4c0995c4c248ffa41408",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "13114d4501d54d9d889c7abd870c8230",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1000 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# A and B are independent\n",
"# A is chosen exactly q * samples_size times\n",
"# B is chosen randomly with probability p\n",
"\n",
"sample_size = 100\n",
"q = 0.5\n",
"n_iterations = 1000\n",
"output = []\n",
"for correction in tqdm([True, False]):\n",
" for p in tqdm([0.3, 0.5, 0.9]):\n",
" for it in tqdm(range(n_iterations)):\n",
" a = np.array([0] * int(sample_size * q) + [1] * int(sample_size * (1 - q)))\n",
" b = np.random.choice([0, 1], p=[1 - p, p], size=sample_size)\n",
" null_df = (\n",
" pd.DataFrame({\"a\": a, \"b\": b}).groupby([\"a\", \"b\"]).size().unstack()\n",
" )\n",
" output.append(\n",
" {\n",
" \"p\": p,\n",
" \"correction\": correction,\n",
" \"p_value\": p_value(null_df, correction),\n",
" }\n",
" )\n",
"result = pd.DataFrame(output)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>correction</th>\n",
" <th>False</th>\n",
" <th>True</th>\n",
" </tr>\n",
" <tr>\n",
" <th>p</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0.3</th>\n",
" <td>0.056</td>\n",
" <td>0.028</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.5</th>\n",
" <td>0.051</td>\n",
" <td>0.034</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0.9</th>\n",
" <td>0.034</td>\n",
" <td>0.012</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"correction False True \n",
"p \n",
"0.3 0.056 0.028\n",
"0.5 0.051 0.034\n",
"0.9 0.034 0.012"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"false_positive_rates = (\n",
" result.assign(is_significant=lambda df: df[\"p_value\"] < 0.05)\n",
" .groupby([\"p\", \"correction\"])[\"is_significant\"]\n",
" .mean()\n",
")\n",
"false_positive_rates.unstack()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment