Created
July 29, 2020 04:26
-
-
Save charrismatic/c37858485a2581644bd3ba02f09fc4d4 to your computer and use it in GitHub Desktop.
test-notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Slope Comparisons" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import altair as alt\n", | |
"from scipy import stats\n", | |
"from altair import datum\n", | |
"from ipywidgets import interact\n", | |
"\n", | |
"# Set up some global config and variables\n", | |
"alt.renderers.enable('default')\n", | |
"pd.options.mode.chained_assignment = None\n", | |
"np.seterr(all='ignore')\n", | |
"\n", | |
"df = pd.read_csv('https://raw.githubusercontent.com/ironhacks/COVID-19-notebook-demo-1/master/jhu-daily-reports.csv')\n", | |
"df['Active'] = df.Confirmed - (df.Deaths + df.Recovered)\n", | |
"samples = df[['Date', 'Country']].groupby('Date').Country.nunique()\n", | |
"days = samples[samples > 1].index.tolist()\n", | |
"df = df[df['Date'].isin(days)]\n", | |
"\n", | |
"country_level = df.groupby(['Country', 'Date'], as_index=False).sum()\n", | |
"def state_data(country):\n", | |
" return df[df['Country'] == country].groupby(['State', 'Date'], as_index=False).sum()\n", | |
"def county_data(state):\n", | |
" return df[(df['Country'] == 'US') & (df['State'] == state)].groupby(['County', 'Date'], as_index=False).sum()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def slope_chart(data, by, offset, xscale='linear', limit=400, scale=1, value='Confirmed_New', window=7, today=days[-1]):\n", | |
" data = data[data['Date']<=today]\n", | |
" source = data[data['Date'] == today]\n", | |
" for var in source[by].unique():\n", | |
" values = data[data[by] == var].sort_values('Date').tail(window)[['Confirmed', value]]\n", | |
" slope, intercept, r_value, p_value, std_err = stats.linregress(values.Confirmed, values[value])\n", | |
" source.loc[source[by] == var, 'Slope'] = slope\n", | |
" source.fillna(0, inplace=True)\n", | |
" source = source[source[value] > limit]\n", | |
"\n", | |
" title = 'Slope of %s in last %d days since %s vs. Total Confirmed' % (' '.join(value.split('_')), window, today)\n", | |
" base = alt.Chart(source, title=title).mark_point(filled=True, stroke='grey').encode(\n", | |
" alt.X('Confirmed:Q', scale=alt.Scale(type=xscale), axis=alt.Axis(offset=offset)),\n", | |
" alt.Y('Slope:Q', axis=alt.Axis(title='Slope')),\n", | |
" color=alt.Color(by+':N', scale=alt.Scale(scheme='category20'), legend=alt.Legend(columns=2, clipHeight=20, padding=10)),\n", | |
" size=alt.Size(value+':Q', scale=alt.Scale(domain=[source.Confirmed_New.min(), source.Confirmed_New.max()], range=[100*scale, 3000*scale])),\n", | |
" tooltip=[by, 'Confirmed', 'Slope', value]\n", | |
" )\n", | |
" text = base.mark_text().encode(\n", | |
" text=by+':N',\n", | |
" size=alt.value(12),\n", | |
" color=alt.value('black')\n", | |
" ).transform_filter(datum[value] > limit*2)\n", | |
" regression = base.transform_regression('Confirmed', 'Slope', method=\"poly\", order=1).mark_line(strokeDash=[6,8]).encode(color=alt.value('grey'), size=alt.value(2))\n", | |
" hline = alt.Chart().mark_rule(color='red', strokeDash=[6,3]).encode(alt.Y('a:Q', axis=alt.Axis(title=''))).transform_calculate(a=\"0\")\n", | |
"\n", | |
" return (base+text+regression+hline) if offset == 0 else (base+text+regression)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Country Level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"slope_chart(country_level, 'Country', 0, xscale='log', limit=450, scale=3, window=7).properties(\n", | |
" width=1200,\n", | |
" height=800\n", | |
").interactive()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = country_level\n", | |
"state = alt.Chart(data[data['Country'] == 'US'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
" alt.X('Date:T', axis=alt.Axis(title='Cumulative Cases')),\n", | |
" alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n", | |
")\n", | |
"reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='red', strokeDash=[6,3])\n", | |
"(state+reg).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# State Level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@interact(window=(2, 21, 1))\n", | |
"def chart(window=7):\n", | |
" return slope_chart(state_data('US'), 'State', 0, limit=100, xscale='log', scale=3, window=window).properties(\n", | |
" width=1200,\n", | |
" height=800\n", | |
" ).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = state_data('US')\n", | |
"state = alt.Chart(data[data['State'] == 'OK'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
" x='Confirmed:Q',\n", | |
" y='Confirmed_New:Q'\n", | |
")\n", | |
"reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
"(state+reg).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# US County Level" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"@interact(window=(2, 21, 1))\n", | |
"def chart(window=7):\n", | |
" return slope_chart(county_data('CA'), 'County', 0, xscale='log', limit=15, scale=5, window=window).properties(\n", | |
" width=1100,\n", | |
" height=600\n", | |
" ).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"jupyter": { | |
"source_hidden": true | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"data = county_data('CA')\n", | |
"state = alt.Chart(data[data['County'] == 'Los Angeles'].sort_values('Date').tail(60)).mark_line().encode(\n", | |
" x='Confirmed:Q',\n", | |
" y='Confirmed_New:Q'\n", | |
")\n", | |
"reg = state.transform_regression(\"Confirmed\", \"Confirmed_New\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
"(state+reg).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# US Hospitalizations" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "e741db7d90ba48129e8bc8f0ceeb2dee", | |
"version_major": 2, | |
"version_minor": 0 | |
}, | |
"text/plain": [ | |
"interactive(children=(IntSlider(value=7, description='window', max=21, min=2), Output()), _dom_classes=('widge…" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"dfh = pd.read_csv('https://covidtracking.com/api/v1/states/daily.csv')\n", | |
"dfh.date = pd.to_datetime(dfh.date, format='%Y%m%d')\n", | |
"dfh.date = dfh.date.dt.strftime('%m-%d-%Y')\n", | |
"dfh = dfh.rename({'date': 'Date', 'state':'State', 'hospitalizedCurrently': 'Hospitalized'}, axis=1)\n", | |
"data = state_data('US')\n", | |
"data = data.merge(dfh, on=['Date', 'State'], how='outer')\n", | |
"@interact(window=(2, 21, 1))\n", | |
"def chart(window=7):\n", | |
" return slope_chart(\n", | |
" data, \n", | |
" 'State', \n", | |
" 0, \n", | |
" xscale='log', \n", | |
" limit=200, \n", | |
" scale=2, \n", | |
" value='Hospitalized', \n", | |
" window=window, \n", | |
" today=days[-1]\n", | |
" ).properties(\n", | |
" width=1100,\n", | |
" height=800\n", | |
" ).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"state = alt.Chart(data[data['State'] == 'CA'].sort_values('Date').tail(7)).mark_line().encode(\n", | |
" x='Date:T',\n", | |
" y='Hospitalized:Q'\n", | |
")\n", | |
"reg = state.transform_regression(\"Date\", \"Hospitalized\", method=\"poly\").mark_line(color='red', strokeDash=[6,3])\n", | |
"(state+reg).interactive()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"data = country_level\n", | |
"state = alt.Chart(data[(data['Country'] == 'India')]).mark_line().encode(\n", | |
" alt.X('Date:T', axis=alt.Axis(title='Date')),\n", | |
" alt.Y('Confirmed_New:Q', axis=alt.Axis(title='New Cases'))\n", | |
")\n", | |
"reg = state.transform_regression(\"Date\", \"Confirmed_New\", method=\"linear\").mark_line(color='grey', strokeDash=[6,3])\n", | |
"\n", | |
"marks = pd.DataFrame([\n", | |
" {\"Phase\": \"1\", \"start\": \"03-25-2020\", \"end\": \"04-14-2020\"},\n", | |
" {\"Phase\": \"2\", \"start\": \"04-14-2020\", \"end\": \"05-03-2020\"},\n", | |
" {\"Phase\": \"3\", \"start\": \"05-03-2020\", \"end\": \"05-17-2020\"},\n", | |
" {\"Phase\": \"4\", \"start\": \"05-17-2020\", \"end\": \"05-31-2020\"},\n", | |
"])\n", | |
"rect = alt.Chart(marks).mark_rect(opacity=0.3).encode(x='start:T', x2='end:T', color='Phase:N')\n", | |
"\n", | |
"(rect+state+reg).properties(\n", | |
" width=800,\n", | |
" height=500\n", | |
").interactive()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment