Skip to content

Instantly share code, notes, and snippets.

@charrismatic
Created July 29, 2020 04:26
Show Gist options
  • Save charrismatic/c37858485a2581644bd3ba02f09fc4d4 to your computer and use it in GitHub Desktop.
Save charrismatic/c37858485a2581644bd3ba02f09fc4d4 to your computer and use it in GitHub Desktop.
test-notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Slope Comparisons"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import altair as alt\n",
"from scipy import stats\n",
"from altair import datum\n",
"from ipywidgets import interact\n",
"\n",
"# Set up some global config and variables\n",
"alt.renderers.enable('default')\n",
"pd.options.mode.chained_assignment = None\n",
"np.seterr(all='ignore')\n",
"\n",
"df = pd.read_csv('https://raw.githubusercontent.com/ironhacks/COVID-19-notebook-demo-1/master/jhu-daily-reports.csv')\n",
"df['Active'] = df.Confirmed - (df.Deaths + df.Recovered)\n",
"samples = df[['Date', 'Country']].groupby('Date').Country.nunique()\n",
"days = samples[samples > 1].index.tolist()\n",
"df = df[df['Date'].isin(days)]\n",
"\n",
"country_level = df.groupby(['Country', 'Date'], as_index=False).sum()\n",
"def state_data(country):\n",
" return df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()\n",
"def county_data(state):\n",
" return df[(df['Country'] == 'US') & (df['State'] == state)].groupby(['County', 'Date'], as_index=False).sum()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def slope_chart(data, by, offset, xscale='linear', limit=400, scale=1, value='Confirmed_New', window=7, today=days[-1]):\n",
" data = data[data['Date']<=today]\n",
" source = data[data['Date'] == today]\n",
" for var in source[by].unique():\n",
" values = data[data[by] == var].sort_values('Date').tail(window)[['Confirmed', value]]\n",
" slope, intercept, r_value, p_value, std_err = stats.linregress(values.Confirmed, values[value])\n",
" source.loc[source[by] == var, 'Slope'] = slope\n",
" source.fillna(0, inplace=True)\n",
" source = source[source[value] > limit]\n",
"\n",
" title = 'Slope of %s in last %d days since %s vs. Total Confirmed' % (' '.join(value.split('_')), window, today)\n",
" base = alt.Chart(source, title=title).mark_point(filled=True, stroke='grey').encode(\n",
" alt.X('Confirmed:Q', scale=alt.Scale(type=xscale), axis=alt.Axis(offset=offset)),\n",
" alt.Y('Slope:Q', axis=alt.Axis(title='Slope')),\n",
" color=alt.Color(by+':N', scale=alt.Scale(scheme='category20'), legend=alt.Legend(columns=2, clipHeight=20, padding=10)),\n",
" size=alt.Size(value+':Q', scale=alt.Scale(domain=[source.Confirmed_New.min(), source.Confirmed_New.max()], range=[100*scale, 3000*scale])),\n",
" tooltip=[by, 'Confirmed', 'Slope', value]\n",
" )\n",
" text = base.mark_text().encode(\n",
" text=by+':N',\n",
" size=alt.value(12),\n",
" color=alt.value('black')\n",
" ).transform_filter(datum[value] > limit*2)\n",
" regression = base.transform_regression('Confirmed', 'Slope', method=\"poly\", order=1).mark_line(strokeDash=[6,8]).encode(color=alt.value('grey'), size=alt.value(2))\n",
" hline = alt.Chart().mark_rule(color='red', strokeDash=[6,3]).encode(alt.Y('a:Q', axis=alt.Axis(title=''))).transform_calculate(a=\"0\")\n",
"\n",
" return (base+text+regression+hline) if offset == 0 else (base+text+regression)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Country Level"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"slope_chart(country_level, 'Country', 0, xscale='log', limit=450, scale=3, window=7).properties(\n",
" width=1200,\n",
" height=800\n",
").interactive()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = country_level\n",
"state = alt.Chart(data[data['Country'] == 'US'].sort_values('Date').tail(60)).mark_line().encode(\n",
" alt.X('Date:T', axis=alt.Axis(title='Cumulative Cases')),\n",
" alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n",
")\n",
"reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='red', strokeDash=[6,3])\n",
"(state+reg).interactive()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# State Level"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@interact(window=(2, 21, 1))\n",
"def chart(window=7):\n",
" return slope_chart(state_data('US'), 'State', 0, limit=100, xscale='log', scale=3, window=window).properties(\n",
" width=1200,\n",
" height=800\n",
" ).interactive()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = state_data('US')\n",
"state = alt.Chart(data[data['State'] == 'OK'].sort_values('Date').tail(60)).mark_line().encode(\n",
" x='Confirmed:Q',\n",
" y='Confirmed_New:Q'\n",
")\n",
"reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n",
"(state+reg).interactive()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# US County Level"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"@interact(window=(2, 21, 1))\n",
"def chart(window=7):\n",
" return slope_chart(county_data('CA'), 'County', 0, xscale='log', limit=15, scale=5, window=window).properties(\n",
" width=1100,\n",
" height=600\n",
" ).interactive()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"outputs": [],
"source": [
"data = county_data('CA')\n",
"state = alt.Chart(data[data['County'] == 'Los Angeles'].sort_values('Date').tail(60)).mark_line().encode(\n",
" x='Confirmed:Q',\n",
" y='Confirmed_New:Q'\n",
")\n",
"reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n",
"(state+reg).interactive()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# US Hospitalizations"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "e741db7d90ba48129e8bc8f0ceeb2dee",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"interactive(children=(IntSlider(value=7, description='window', max=21, min=2), Output()), _dom_classes=('widge…"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dfh = pd.read_csv('https://covidtracking.com/api/v1/states/daily.csv')\n",
"dfh.date = pd.to_datetime(dfh.date, format='%Y%m%d')\n",
"dfh.date = dfh.date.dt.strftime('%m-%d-%Y')\n",
"dfh = dfh.rename({'date': 'Date', 'state':'State', 'hospitalizedCurrently': 'Hospitalized'}, axis=1)\n",
"data = state_data('US')\n",
"data = data.merge(dfh, on=['Date', 'State'], how='outer')\n",
"@interact(window=(2, 21, 1))\n",
"def chart(window=7):\n",
" return slope_chart(\n",
" data, \n",
" 'State', \n",
" 0, \n",
" xscale='log', \n",
" limit=200, \n",
" scale=2, \n",
" value='Hospitalized', \n",
" window=window, \n",
" today=days[-1]\n",
" ).properties(\n",
" width=1100,\n",
" height=800\n",
" ).interactive()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"state = alt.Chart(data[data['State'] == 'CA'].sort_values('Date').tail(7)).mark_line().encode(\n",
" x='Date:T',\n",
" y='Hospitalized:Q'\n",
")\n",
"reg = state.transform_regression(\"Date\", \"Hospitalized\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n",
"(state+reg).interactive()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = country_level\n",
"state = alt.Chart(data[(data['Country'] == 'India')]).mark_line().encode(\n",
" alt.X('Date:T', axis=alt.Axis(title='Date')),\n",
" alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n",
")\n",
"reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='grey', strokeDash=[6,3])\n",
"\n",
"marks = pd.DataFrame([\n",
" {\"Phase\": \"1\", \"start\": \"03-25-2020\", \"end\": \"04-14-2020\"},\n",
" {\"Phase\": \"2\", \"start\": \"04-14-2020\", \"end\": \"05-03-2020\"},\n",
" {\"Phase\": \"3\", \"start\": \"05-03-2020\", \"end\": \"05-17-2020\"},\n",
" {\"Phase\": \"4\", \"start\": \"05-17-2020\", \"end\": \"05-31-2020\"},\n",
"])\n",
"rect = alt.Chart(marks).mark_rect(opacity=0.3).encode(x='start:T', x2='end:T', color='Phase:N')\n",
"\n",
"(rect+state+reg).properties(\n",
" width=800,\n",
" height=500\n",
").interactive()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment