MikeTrizna · November 12, 2020 20:23
diff --git a/environment.yml b/environment.yml
 channels:
 - conda-forge
 dependencies:
 - plotly
 - pandas
 - matplotlib
diff --git a/reproducing_duke.ipynb b/reproducing_duke.ipynb
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Trying to directly replicate http://covid-exposure-modeler-data-devils.cloud.duke.edu/, using code posted at https://github.com/johnpfay/Covid-Exposure-Model."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:51.712132Z",
     "start_time": "2020-11-12T17:48:51.034697Z"
    }
   },
   "outputs": [],
   "source": [
    "import plotly.express as px\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:51.717721Z",
     "start_time": "2020-11-12T17:48:51.713635Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_random(var,n=10000):\n",
    "    return np.random.uniform(*var+[n])\n",
    "\n",
    "def get_normal(var,n=10000):\n",
    "    return np.random.normal(*var+[n])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:51.777931Z",
     "start_time": "2020-11-12T17:48:51.719754Z"
    }
   },
   "outputs": [],
   "source": [
    "def update_df(surface_area = 900,\n",
    "              height = 10,\n",
    "              num_faculty = 1,\n",
    "              num_students = 10,\n",
    "              duration = 75,\n",
    "              num_class_periods = 26,\n",
    "              breathing_rate_faculty = [0.027,0.029],\n",
    "              breathing_rate_student = [0.012,0.012],\n",
    "              ventilation_w_outside_air = [1,4],\n",
    "              decay_rate_of_virus = [0,1.0],\n",
    "              deposition_to_surface = [0.3,1.5],\n",
    "              additional_control_measures = [0,0],\n",
    "              quanta_emission_rate_faculty = [1.5,0.71],\n",
    "              quanta_emission_rate_student = [0.69,0.71],\n",
    "              exhalation_mask_efficiency = [0.4,0.6],\n",
    "              inhalation_mask_efficiency = [0.3,0.5],\n",
    "              background_infection_rate_faculty = [0.0070,0.0140],\n",
    "              background_infection_rate_student = [0.0070,0.0140]):\n",
    "    #Create dataframe of 10,000 runs\n",
    "    num_runs = 10000\n",
    "    df = pd.DataFrame(index=np.arange(num_runs))\n",
    "    df['VENT']  = get_random(ventilation_w_outside_air,num_runs)\n",
    "    df['DECAY'] = get_random(decay_rate_of_virus,num_runs)\n",
    "    df['DEP']   = get_random(deposition_to_surface,num_runs)\n",
    "    df['OTHER'] = get_random(additional_control_measures,num_runs)\n",
    "    df['L']     = df['VENT'] + df['DECAY'] + df['DEP'] + df['OTHER']\n",
    "    df['LDUR'] = df['L'] * duration / 60\n",
    "    df['VOL']   = surface_area * height*0.305**3\n",
    "    df['EFFOUT'] = get_random(exhalation_mask_efficiency,num_runs)\n",
    "    df['EMMFx']  = get_normal(quanta_emission_rate_faculty,num_runs)\n",
    "    df['EMMSx']  = get_normal(quanta_emission_rate_student,num_runs)\n",
    "    df['EMMF'] = 10**df['EMMFx']\n",
    "    df['EMMS'] = 10**df['EMMSx']\n",
    "    df['INFRATEF'] = get_random(background_infection_rate_faculty,num_runs)\n",
    "    df['INFRATES'] = get_random(background_infection_rate_student,num_runs)\n",
    "    df['CONCF'] = df['EMMF']*(1-df['EFFOUT'])/(df['L']*df['VOL'])*(1-1/df['LDUR']*(1-np.exp(-df['LDUR'])))\n",
    "    df['CONCS'] = df['EMMS']*(1-df['EFFOUT'])/(df['L']*df['VOL'])*(1-1/df['LDUR']*(1-np.exp(-df['LDUR'])))\n",
    "    df['EFFIN'] = get_random(inhalation_mask_efficiency,num_runs)\n",
    "    df['BRFx']   = get_random(breathing_rate_faculty,num_runs)\n",
    "    df['BRSx']   = get_random(breathing_rate_student,num_runs)\n",
    "    df['BRF']   = 60 * df['BRFx']\n",
    "    df['BRS']   = 60 * df['BRSx']\n",
    "    df['INF_S'] = df['CONCS'] * df['BRF'] * duration/60 * (1-df['EFFIN'])\n",
    "    df['INS_F'] = df['CONCF'] * df['BRS'] * duration/60 * (1-df['EFFIN'])\n",
    "    df['INS_S'] = df['CONCS'] * df['BRS'] * duration/60 * (1-df['EFFIN']) \n",
    "    # INECTION PROBABILITIES FOR FACULTY/STUDENT INFECTION\n",
    "    df['PF_S']  = df['INFRATES'] * (1 - np.exp(-df['INF_S']))\n",
    "    df['PS_F']  = df['INFRATEF'] * (1 - np.exp(-df['INS_F']))\n",
    "    df['PS_S']  = df['INFRATES'] * (1 - np.exp(-df['INS_S']))\n",
    "    # INFECTION PROBABILITIES FOR 1 CLASS SESSION\n",
    "    df['PF'] = 1 - ((1-df['PF_S'])**(num_students))\n",
    "    df['PS'] = 1 - (((1-df['PS_S'])**(num_students-1))*((1-df['PS_F'])**(num_faculty)))\n",
    "    # INFECTION PROBABILITIES FOR SEMESTER\n",
    "    df['nPF'] = 1 - df['PF']\n",
    "    df['nPFsemester'] = df['nPF']**num_class_periods\n",
    "    df['PFsemester']  = 1 - df['nPFsemester']\n",
    "    df['nPS'] = 1 - df['PS']\n",
    "    df['nPSsemester'] = df['nPS']**num_class_periods\n",
    "    df['PSsemester']  = 1 - df['nPSsemester']\n",
    "    return(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:51.836661Z",
     "start_time": "2020-11-12T17:48:51.779473Z"
    }
   },
   "outputs": [],
   "source": [
    "covid_df = update_df()\n",
    "covid_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:51.847883Z",
     "start_time": "2020-11-12T17:48:51.838984Z"
    }
   },
   "outputs": [],
   "source": [
    "def update_figure(df,faculty=True):\n",
    "    if faculty: fld = 'PFsemester'; txt = 'Faculty'\n",
    "    else: fld = 'PSsemester'; txt = 'Student'\n",
    "    #Get the max x value\n",
    "    # x_maxf = df['PFsemester'].max()\n",
    "    # x_maxs = df['PSsemester'].max()\n",
    "    x_maxf = df['PFsemester'].quantile(0.99)\n",
    "    x_maxs = df['PSsemester'].quantile(0.99)\n",
    "    x_max = max(x_maxf, x_maxs)\n",
    "    #Update the figure\n",
    "    fig = px.histogram(df,x=fld,nbins=40,histnorm='percent',\n",
    "                       title=f'Calculated Distribution of {txt} Infection Probabilities for Semester<br>from 10,000 Monte Carlo Simulations')\n",
    "    fig.update_xaxes(title_text = 'Probability of infection (%)',\n",
    "                     range=[0,x_max])\n",
    "    fig.update_yaxes(title_text = 'Percentage of 10,000 Monte Carlo cases')\n",
    "    fig.update_layout(xaxis_tickformat = \"%\",\n",
    "                      font_size=10)\n",
    "    #fig.update_layout(transition_duration=500)\n",
    "    return(fig)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T20:01:49.428458Z",
     "start_time": "2020-11-12T20:01:49.214200Z"
    }
   },
   "outputs": [],
   "source": [
    "hist = covid_df['PFsemester'].hist(bins=40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:52.703151Z",
     "start_time": "2020-11-12T17:48:51.849616Z"
    }
   },
   "outputs": [],
   "source": [
    "faculty_fig = update_figure(covid_df, faculty=True)\n",
    "faculty_fig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:52.834965Z",
     "start_time": "2020-11-12T17:48:52.704874Z"
    }
   },
   "outputs": [],
   "source": [
    "student_fig = update_figure(covid_df, faculty=False)\n",
    "student_fig"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:52.842284Z",
     "start_time": "2020-11-12T17:48:52.836950Z"
    }
   },
   "outputs": [],
   "source": [
    "def summarize_output(df,faculty=True):\n",
    "    if faculty: fld = 'PFsemester'; txt = 'FOR FACULTY MEMBER TEACHING THE COURSE'\n",
    "    else: fld = 'PSsemester'; txt = 'FOR A STUDENT TAKING THE COURSE'\n",
    "    #Create markdown from values\n",
    "    the_mean = df[fld].mean()\n",
    "    the_quants = [df[fld].quantile(x) for x in (0.05,0.25,0.5,0.75,0.95)]\n",
    "    #Create Markdown\n",
    "    md_text=f'''  \n",
    "**{txt}**\n",
    "| Best Estimate of Infection Probability | {the_mean:0.2%} |\n",
    "| --: | --- |\n",
    "| 5% chance that infection probability will be less than | {the_quants[0]:0.2%} |\n",
    "| 25% chance that infection probability will be less than | {the_quants[1]:0.2%} |\n",
    "| 50% chance that infection probability will be less than | {the_quants[2]:0.2%} |\n",
    "| 75% chance that infection probability will be less than | {the_quants[3]:0.2%} |\n",
    "| 95% chance that infection probability will be less than | {the_quants[4]:0.2%} |\n",
    "'''\n",
    "    return md_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:52.853493Z",
     "start_time": "2020-11-12T17:48:52.843962Z"
    }
   },
   "outputs": [],
   "source": [
    "print(summarize_output(covid_df, faculty=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-11-12T17:48:52.864247Z",
     "start_time": "2020-11-12T17:48:52.855609Z"
    }
   },
   "outputs": [],
   "source": [
    "print(summarize_output(covid_df, faculty=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "gist": {
   "data": {
    "description": "Reproducing Duke model",
    "public": false
   },
   "id": ""
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	channels:
	- conda-forge
	dependencies:
	- plotly
	- pandas
	- matplotlib
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Trying to directly replicate http://covid-exposure-modeler-data-devils.cloud.duke.edu/, using code posted at https://github.com/johnpfay/Covid-Exposure-Model."
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:51.712132Z",
	"start_time": "2020-11-12T17:48:51.034697Z"
	}
	},
	"outputs": [],
	"source": [
	"import plotly.express as px\n",
	"\n",
	"import pandas as pd\n",
	"import numpy as np"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:51.717721Z",
	"start_time": "2020-11-12T17:48:51.713635Z"
	}
	},
	"outputs": [],
	"source": [
	"def get_random(var,n=10000):\n",
	" return np.random.uniform(*var+[n])\n",
	"\n",
	"def get_normal(var,n=10000):\n",
	" return np.random.normal(*var+[n])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:51.777931Z",
	"start_time": "2020-11-12T17:48:51.719754Z"
	}
	},
	"outputs": [],
	"source": [
	"def update_df(surface_area = 900,\n",
	" height = 10,\n",
	" num_faculty = 1,\n",
	" num_students = 10,\n",
	" duration = 75,\n",
	" num_class_periods = 26,\n",
	" breathing_rate_faculty = [0.027,0.029],\n",
	" breathing_rate_student = [0.012,0.012],\n",
	" ventilation_w_outside_air = [1,4],\n",
	" decay_rate_of_virus = [0,1.0],\n",
	" deposition_to_surface = [0.3,1.5],\n",
	" additional_control_measures = [0,0],\n",
	" quanta_emission_rate_faculty = [1.5,0.71],\n",
	" quanta_emission_rate_student = [0.69,0.71],\n",
	" exhalation_mask_efficiency = [0.4,0.6],\n",
	" inhalation_mask_efficiency = [0.3,0.5],\n",
	" background_infection_rate_faculty = [0.0070,0.0140],\n",
	" background_infection_rate_student = [0.0070,0.0140]):\n",
	" #Create dataframe of 10,000 runs\n",
	" num_runs = 10000\n",
	" df = pd.DataFrame(index=np.arange(num_runs))\n",
	" df['VENT'] = get_random(ventilation_w_outside_air,num_runs)\n",
	" df['DECAY'] = get_random(decay_rate_of_virus,num_runs)\n",
	" df['DEP'] = get_random(deposition_to_surface,num_runs)\n",
	" df['OTHER'] = get_random(additional_control_measures,num_runs)\n",
	" df['L'] = df['VENT'] + df['DECAY'] + df['DEP'] + df['OTHER']\n",
	" df['LDUR'] = df['L'] * duration / 60\n",
	" df['VOL'] = surface_area * height0.305*3\n",
	" df['EFFOUT'] = get_random(exhalation_mask_efficiency,num_runs)\n",
	" df['EMMFx'] = get_normal(quanta_emission_rate_faculty,num_runs)\n",
	" df['EMMSx'] = get_normal(quanta_emission_rate_student,num_runs)\n",
	" df['EMMF'] = 10**df['EMMFx']\n",
	" df['EMMS'] = 10**df['EMMSx']\n",
	" df['INFRATEF'] = get_random(background_infection_rate_faculty,num_runs)\n",
	" df['INFRATES'] = get_random(background_infection_rate_student,num_runs)\n",
	" df['CONCF'] = df['EMMF'](1-df['EFFOUT'])/(df['L']df['VOL'])(1-1/df['LDUR'](1-np.exp(-df['LDUR'])))\n",
	" df['CONCS'] = df['EMMS'](1-df['EFFOUT'])/(df['L']df['VOL'])(1-1/df['LDUR'](1-np.exp(-df['LDUR'])))\n",
	" df['EFFIN'] = get_random(inhalation_mask_efficiency,num_runs)\n",
	" df['BRFx'] = get_random(breathing_rate_faculty,num_runs)\n",
	" df['BRSx'] = get_random(breathing_rate_student,num_runs)\n",
	" df['BRF'] = 60 * df['BRFx']\n",
	" df['BRS'] = 60 * df['BRSx']\n",
	" df['INF_S'] = df['CONCS'] * df['BRF'] * duration/60 * (1-df['EFFIN'])\n",
	" df['INS_F'] = df['CONCF'] * df['BRS'] * duration/60 * (1-df['EFFIN'])\n",
	" df['INS_S'] = df['CONCS'] * df['BRS'] * duration/60 * (1-df['EFFIN']) \n",
	" # INECTION PROBABILITIES FOR FACULTY/STUDENT INFECTION\n",
	" df['PF_S'] = df['INFRATES'] * (1 - np.exp(-df['INF_S']))\n",
	" df['PS_F'] = df['INFRATEF'] * (1 - np.exp(-df['INS_F']))\n",
	" df['PS_S'] = df['INFRATES'] * (1 - np.exp(-df['INS_S']))\n",
	" # INFECTION PROBABILITIES FOR 1 CLASS SESSION\n",
	" df['PF'] = 1 - ((1-df['PF_S'])**(num_students))\n",
	" df['PS'] = 1 - (((1-df['PS_S'])*(num_students-1))((1-df['PS_F'])**(num_faculty)))\n",
	" # INFECTION PROBABILITIES FOR SEMESTER\n",
	" df['nPF'] = 1 - df['PF']\n",
	" df['nPFsemester'] = df['nPF']**num_class_periods\n",
	" df['PFsemester'] = 1 - df['nPFsemester']\n",
	" df['nPS'] = 1 - df['PS']\n",
	" df['nPSsemester'] = df['nPS']**num_class_periods\n",
	" df['PSsemester'] = 1 - df['nPSsemester']\n",
	" return(df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:51.836661Z",
	"start_time": "2020-11-12T17:48:51.779473Z"
	}
	},
	"outputs": [],
	"source": [
	"covid_df = update_df()\n",
	"covid_df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:51.847883Z",
	"start_time": "2020-11-12T17:48:51.838984Z"
	}
	},
	"outputs": [],
	"source": [
	"def update_figure(df,faculty=True):\n",
	" if faculty: fld = 'PFsemester'; txt = 'Faculty'\n",
	" else: fld = 'PSsemester'; txt = 'Student'\n",
	" #Get the max x value\n",
	" # x_maxf = df['PFsemester'].max()\n",
	" # x_maxs = df['PSsemester'].max()\n",
	" x_maxf = df['PFsemester'].quantile(0.99)\n",
	" x_maxs = df['PSsemester'].quantile(0.99)\n",
	" x_max = max(x_maxf, x_maxs)\n",
	" #Update the figure\n",
	" fig = px.histogram(df,x=fld,nbins=40,histnorm='percent',\n",
	" title=f'Calculated Distribution of {txt} Infection Probabilities for Semester<br>from 10,000 Monte Carlo Simulations')\n",
	" fig.update_xaxes(title_text = 'Probability of infection (%)',\n",
	" range=[0,x_max])\n",
	" fig.update_yaxes(title_text = 'Percentage of 10,000 Monte Carlo cases')\n",
	" fig.update_layout(xaxis_tickformat = \"%\",\n",
	" font_size=10)\n",
	" #fig.update_layout(transition_duration=500)\n",
	" return(fig)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T20:01:49.428458Z",
	"start_time": "2020-11-12T20:01:49.214200Z"
	}
	},
	"outputs": [],
	"source": [
	"hist = covid_df['PFsemester'].hist(bins=40)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:52.703151Z",
	"start_time": "2020-11-12T17:48:51.849616Z"
	}
	},
	"outputs": [],
	"source": [
	"faculty_fig = update_figure(covid_df, faculty=True)\n",
	"faculty_fig"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:52.834965Z",
	"start_time": "2020-11-12T17:48:52.704874Z"
	}
	},
	"outputs": [],
	"source": [
	"student_fig = update_figure(covid_df, faculty=False)\n",
	"student_fig"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:52.842284Z",
	"start_time": "2020-11-12T17:48:52.836950Z"
	}
	},
	"outputs": [],
	"source": [
	"def summarize_output(df,faculty=True):\n",
	" if faculty: fld = 'PFsemester'; txt = 'FOR FACULTY MEMBER TEACHING THE COURSE'\n",
	" else: fld = 'PSsemester'; txt = 'FOR A STUDENT TAKING THE COURSE'\n",
	" #Create markdown from values\n",
	" the_mean = df[fld].mean()\n",
	" the_quants = [df[fld].quantile(x) for x in (0.05,0.25,0.5,0.75,0.95)]\n",
	" #Create Markdown\n",
	" md_text=f''' \n",
	"{txt}\n",
	"\| Best Estimate of Infection Probability \| {the_mean:0.2%} \|\n",
	"\| --: \| --- \|\n",
	"\| 5% chance that infection probability will be less than \| {the_quants[0]:0.2%} \|\n",
	"\| 25% chance that infection probability will be less than \| {the_quants[1]:0.2%} \|\n",
	"\| 50% chance that infection probability will be less than \| {the_quants[2]:0.2%} \|\n",
	"\| 75% chance that infection probability will be less than \| {the_quants[3]:0.2%} \|\n",
	"\| 95% chance that infection probability will be less than \| {the_quants[4]:0.2%} \|\n",
	"'''\n",
	" return md_text"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:52.853493Z",
	"start_time": "2020-11-12T17:48:52.843962Z"
	}
	},
	"outputs": [],
	"source": [
	"print(summarize_output(covid_df, faculty=True))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-11-12T17:48:52.864247Z",
	"start_time": "2020-11-12T17:48:52.855609Z"
	}
	},
	"outputs": [],
	"source": [
	"print(summarize_output(covid_df, faculty=False))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"gist": {
	"data": {
	"description": "Reproducing Duke model",
	"public": false
	},
	"id": ""
	},
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}